diff --git a/.cargo/cargo.toml b/.cargo/cargo.toml new file mode 100644 index 0000000..0236928 --- /dev/null +++ b/.cargo/cargo.toml @@ -0,0 +1,5 @@ +[source.crates-io] +replace-with = "vendored-sources" + +[source.vendored-sources] +directory = "vendor" diff --git a/vendor/aho-corasick/.cargo-checksum.json b/vendor/aho-corasick/.cargo-checksum.json index 45e3316..16b5b4e 100644 --- a/vendor/aho-corasick/.cargo-checksum.json +++ b/vendor/aho-corasick/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"11fd01de244f822ebfc5323bebd362d6183ab31ded726899ea117ade2de96d2e","DESIGN.md":"d336d97838a16dbc2052658c8a361434829944e3d80373572d9e75bb04c24e78","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"6c36ae81ea9af36e6f964d7045d9719b427fe36f15db99b879feb5453734941e","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"66f3948ee578c0a5a38c61f795547c453bea7cac4d10457af3a23b88e380aa8a","src/automaton.rs":"22258a3e118672413119f8f543a9b912cce954e63524575c0ebfdf9011f9c2dd","src/dfa.rs":"d61f5a33a52d1ee0032782b626ccd066292b0a8827bef63ea3ac02dcc4ec4e47","src/lib.rs":"2a92d5c5e930f2d306508802e8a929135e1f41c9f5f8deda8f7eb98947179dd2","src/macros.rs":"c6c52ae05b24433cffaca7b78b3645d797862c5d5feffddf9f54909095ed6e05","src/nfa/contiguous.rs":"bed6b2f3c37c20baa18d919724984840b76602cc0d461c2684d2ded47673b366","src/nfa/mod.rs":"ee7b3109774d14bbad5239c16bb980dd6b8185ec136d94fbaf2f0dc27d5ffa15","src/nfa/noncontiguous.rs":"3fc777465842825dfca839fe354d88d77a6e94610481ad05497247ebccb0187a","src/packed/api.rs":"37a6580d3578d2244580ce2edc12105933c597f0c0b95d70e53a343e4d9f3582","src/packed/mod.rs":"b2c79103c1ed99b7d4261335909fff41a597a324325d62c3325afd656077bcb9","src/packed/pattern.rs":"dd74572178c20cf651ae272bf2c985fb0b3fadc5140cdcb1bff95a3fbcfe2ade","src/packed/rabinkarp.rs":"7bdabb91ec8a22a37a12edabf893270a04c57dea9d6714d507ba17f094285e15","src/packed/teddy/README.md":"b4b83fb5afafbbea6cb76fe70f49cc8ced888f682d98abe5ea5773e95d9ec2b0","src/packed/teddy/compile.rs":"6725dc38114953c0429652e2a4d31dcc33e54c0f5a6cee79f282c6a3d7b41683","src/packed/teddy/mod.rs":"0ce9fcba05a68301474fe30a71824650d05dcb8f04405fe9fc6b9326229f7db0","src/packed/teddy/runtime.rs":"2dc68cc08e24169eafcebbab35187d6aab38ef994267d2735b019b43bad0e6f2","src/packed/tests.rs":"f28307860843c36c9998657e3518c250ec2f4eac800cf912cad6d7aaa81bab7c","src/packed/vector.rs":"48909869f0ecf8832d338ed54c48a77d972bbcda57476d3b863078a18e59f709","src/tests.rs":"c68192ab97b6161d0d6ee96fefd80cc7d14e4486ddcd8d1f82b5c92432c24ed5","src/transducer.rs":"02daa33a5d6dac41dcfd67f51df7c0d4a91c5131c781fb54c4de3520c585a6e1","src/util/alphabet.rs":"6dc22658a38deddc0279892035b18870d4585069e35ba7c7e649a24509acfbcc","src/util/buffer.rs":"f9e37f662c46c6ecd734458dedbe76c3bb0e84a93b6b0117c0d4ad3042413891","src/util/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/util/debug.rs":"ab301ad59aa912529cb97233a54a05914dd3cb2ec43e6fec7334170b97ac5998","src/util/error.rs":"ecccd60e7406305023efcc6adcc826eeeb083ab8f7fbfe3d97469438cd4c4e5c","src/util/int.rs":"b735f3ae8d398849fe0ab1575d634df15803b21167945b894205fdde4a1a9e58","src/util/mod.rs":"7ab28d11323ecdbd982087f32eb8bceeee84f1a2583f3aae27039c36d58cf12c","src/util/prefilter.rs":"ef36a945b8f564771a031d98e0dbf85eea93d1fc66a1c60b7baa98fecf66bb69","src/util/primitives.rs":"f89f3fa1d8db4e37de9ca767c6d05e346404837cade6d063bba68972fafa610b","src/util/remapper.rs":"9f12d911583a325c11806eeceb46d0dfec863cfcfa241aed84d31af73da746e5","src/util/search.rs":"6af803e08b8b8c8a33db100623f1621b0d741616524ce40893d8316897f27ffe","src/util/special.rs":"7d2f9cb9dd9771f59816e829b2d96b1239996f32939ba98764e121696c52b146"},"package":"67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04"} \ No newline at end of file +{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"747d0fcb1257c9b8b013104da3c5a67f5d6cf8a95a2163b13703c01cab2c010a","DESIGN.md":"59c960e1b73b1d7fb41e4df6c0c1b1fcf44dd2ebc8a349597a7d0595f8cb5130","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"afc4d559a98cf190029af0bf320fc0022725e349cd2a303aac860254e28f3c53","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ahocorasick.rs":"c699c07df70be45c666e128509ad571a7649d2073e4ae16ac1efd6793c9c6890","src/automaton.rs":"22258a3e118672413119f8f543a9b912cce954e63524575c0ebfdf9011f9c2dd","src/dfa.rs":"bfef1a94c5e7410584b1beb4e857b40d1ae2031b881cbc06fb1300409bbd555f","src/lib.rs":"2a92d5c5e930f2d306508802e8a929135e1f41c9f5f8deda8f7eb98947179dd2","src/macros.rs":"c6c52ae05b24433cffaca7b78b3645d797862c5d5feffddf9f54909095ed6e05","src/nfa/contiguous.rs":"aeb6ee5fd80eea04decbc4b46aa27d1ab270b78d416a644da25b7934f009ee66","src/nfa/mod.rs":"ee7b3109774d14bbad5239c16bb980dd6b8185ec136d94fbaf2f0dc27d5ffa15","src/nfa/noncontiguous.rs":"de94f02b04efd8744fb096759a8897c22012b0e0ca3ace161fd87c71befefe04","src/packed/api.rs":"160d3b10823316f7b0924e13c3afd222c8a7db5c0a00432401f311ef27d6a1b7","src/packed/ext.rs":"66be06fde8558429da23a290584d4b9fae665bf64c2578db4fe5f5f3ee864869","src/packed/mod.rs":"0020cd6f07ba5c8955923a9516d7f758864260eda53a6b6f629131c45ddeec62","src/packed/pattern.rs":"1e3a289a730c141fc30b295811e372d046c6619c7fd670308299b889a06c7673","src/packed/rabinkarp.rs":"403146eb1d838a84601d171393542340513cd1ee7ff750f2372161dd47746586","src/packed/teddy/README.md":"3a43194b64e221543d885176aba3beb1224a927385a20eca842daf6b0ea2f342","src/packed/teddy/builder.rs":"720735ea6c7ff92b081426513e6e82feed24a922849297bb538d28f7b8129f81","src/packed/teddy/generic.rs":"ea252ab05b32cea7dd9d71e332071d243db7dd0362e049252a27e5881ba2bf39","src/packed/teddy/mod.rs":"17d741f7e2fb9dbac5ba7d1bd4542cf1e35e9f146ace728e23fe6bbed20028b2","src/packed/tests.rs":"8e2f56eb3890ed3876ecb47d3121996e416563127b6430110d7b516df3f83b4b","src/packed/vector.rs":"840065521cbd4701fa5b8b506d1537843d858c903f7cadf3c68749ea1780874b","src/tests.rs":"c68192ab97b6161d0d6ee96fefd80cc7d14e4486ddcd8d1f82b5c92432c24ed5","src/transducer.rs":"02daa33a5d6dac41dcfd67f51df7c0d4a91c5131c781fb54c4de3520c585a6e1","src/util/alphabet.rs":"6dc22658a38deddc0279892035b18870d4585069e35ba7c7e649a24509acfbcc","src/util/buffer.rs":"f9e37f662c46c6ecd734458dedbe76c3bb0e84a93b6b0117c0d4ad3042413891","src/util/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/util/debug.rs":"ab301ad59aa912529cb97233a54a05914dd3cb2ec43e6fec7334170b97ac5998","src/util/error.rs":"ecccd60e7406305023efcc6adcc826eeeb083ab8f7fbfe3d97469438cd4c4e5c","src/util/int.rs":"4ab6dbdba10027ddec2af63a9b28ce4eee30ded0daa5d8eb068b2b55542b6039","src/util/mod.rs":"7ab28d11323ecdbd982087f32eb8bceeee84f1a2583f3aae27039c36d58cf12c","src/util/prefilter.rs":"9fa4498f18bf70478b1996c1a013698b626d15f119aa81dbc536673c9f045718","src/util/primitives.rs":"f89f3fa1d8db4e37de9ca767c6d05e346404837cade6d063bba68972fafa610b","src/util/remapper.rs":"9f12d911583a325c11806eeceb46d0dfec863cfcfa241aed84d31af73da746e5","src/util/search.rs":"6af803e08b8b8c8a33db100623f1621b0d741616524ce40893d8316897f27ffe","src/util/special.rs":"7d2f9cb9dd9771f59816e829b2d96b1239996f32939ba98764e121696c52b146"},"package":"b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"} \ No newline at end of file diff --git a/vendor/aho-corasick/Cargo.toml b/vendor/aho-corasick/Cargo.toml index 4292610..05e899c 100644 --- a/vendor/aho-corasick/Cargo.toml +++ b/vendor/aho-corasick/Cargo.toml @@ -13,9 +13,13 @@ edition = "2021" rust-version = "1.60.0" name = "aho-corasick" -version = "1.0.1" +version = "1.1.2" authors = ["Andrew Gallant "] -exclude = ["/aho-corasick-debug"] +exclude = [ + "/aho-corasick-debug", + "/benchmarks", + "/tmp", +] autotests = false description = "Fast multiple substring searching." homepage = "https://github.com/BurntSushi/aho-corasick" @@ -36,13 +40,14 @@ all-features = true rustdoc-args = [ "--cfg", "docsrs", + "--generate-link-to-definition", ] [profile.bench] -debug = true +debug = 2 [profile.release] -debug = true +debug = 2 [lib] name = "aho_corasick" diff --git a/vendor/aho-corasick/DESIGN.md b/vendor/aho-corasick/DESIGN.md index 4814357..f911f0c 100644 --- a/vendor/aho-corasick/DESIGN.md +++ b/vendor/aho-corasick/DESIGN.md @@ -470,12 +470,12 @@ If all of that fails, then a packed multiple substring algorithm will be attempted. Currently, the only algorithm available for this is Teddy, but more may be added in the future. Teddy is unlike the above prefilters in that it confirms its own matches, so when Teddy is active, it might not be necessary -for Aho-Corasick to run at all. However, the current Teddy implementation only -works in `x86_64` and when SSSE3 or AVX2 are available, and moreover, only -works _well_ when there are a small number of patterns (say, less than 100). -Teddy also requires the haystack to be of a certain length (more than 16-34 -bytes). When the haystack is shorter than that, Rabin-Karp is used instead. -(See `src/packed/rabinkarp.rs`.) +for Aho-Corasick to run at all. However, the current Teddy implementation +only works in `x86_64` when SSSE3 or AVX2 are available or in `aarch64` +(using NEON), and moreover, only works _well_ when there are a small number +of patterns (say, less than 100). Teddy also requires the haystack to be of a +certain length (more than 16-34 bytes). When the haystack is shorter than that, +Rabin-Karp is used instead. (See `src/packed/rabinkarp.rs`.) There is a more thorough description of Teddy at [`src/packed/teddy/README.md`](src/packed/teddy/README.md). diff --git a/vendor/aho-corasick/README.md b/vendor/aho-corasick/README.md index 69f39e4..c0f525f 100644 --- a/vendor/aho-corasick/README.md +++ b/vendor/aho-corasick/README.md @@ -170,3 +170,5 @@ supported version of Rust. * [G-Research/ahocorasick_rs](https://github.com/G-Research/ahocorasick_rs/) is a Python wrapper for this library. +* [tmikus/ahocorasick_rs](https://github.com/tmikus/ahocorasick_rs) is a Go + wrapper for this library. diff --git a/vendor/aho-corasick/src/ahocorasick.rs b/vendor/aho-corasick/src/ahocorasick.rs index 88e6536..2947627 100644 --- a/vendor/aho-corasick/src/ahocorasick.rs +++ b/vendor/aho-corasick/src/ahocorasick.rs @@ -1975,6 +1975,7 @@ impl AhoCorasick { /// configurations: /// /// ``` + /// # if !cfg!(target_pointer_width = "64") { return; } /// use aho_corasick::{AhoCorasick, AhoCorasickKind, MatchKind}; /// /// let ac = AhoCorasick::builder() @@ -1995,7 +1996,7 @@ impl AhoCorasick { /// .ascii_case_insensitive(true) /// .build(&["foobar", "bruce", "triskaidekaphobia", "springsteen"]) /// .unwrap(); - /// assert_eq!(9_128, ac.memory_usage()); + /// assert_eq!(10_879, ac.memory_usage()); /// /// let ac = AhoCorasick::builder() /// .kind(Some(AhoCorasickKind::ContiguousNFA)) @@ -2578,6 +2579,7 @@ impl AhoCorasickBuilder { /// More to the point, the memory usage increases superlinearly as this /// number increases. pub fn dense_depth(&mut self, depth: usize) -> &mut AhoCorasickBuilder { + self.nfa_noncontiguous.dense_depth(depth); self.nfa_contiguous.dense_depth(depth); self } diff --git a/vendor/aho-corasick/src/dfa.rs b/vendor/aho-corasick/src/dfa.rs index e6f4314..eabd15b 100644 --- a/vendor/aho-corasick/src/dfa.rs +++ b/vendor/aho-corasick/src/dfa.rs @@ -93,15 +93,9 @@ pub struct DFA { /// instead of the IDs being 0, 1, 2, 3, ..., they are 0*stride, 1*stride, /// 2*stride, 3*stride, ... trans: Vec, - /// The matches for every match state in this DFA. This is indexed by order - /// of match states in the DFA. Namely, as constructed, match states are - /// always laid out sequentially and contiguously in memory. Thus, after - /// converting a match state ID to a match state index, the indices are - /// all adjacent. - /// - /// More concretely, when a search enters a match state with id 'sid', then - /// the matching patterns are at 'matches[(sid >> stride2) - 2]'. The '- 2' - /// is to offset the first two states of a DFA: the dead and fail states. + /// The matches for every match state in this DFA. This is first indexed by + /// state index (so that's `sid >> stride2`) and then by order in which the + /// matches are meant to occur. matches: Vec>, /// The amount of heap memory used, in bytes, by the inner Vecs of /// 'matches'. @@ -174,13 +168,19 @@ impl DFA { /// Adds the given pattern IDs as matches to the given state and also /// records the added memory usage. - fn set_matches(&mut self, sid: StateID, pids: &[PatternID]) { - use core::mem::size_of; - - assert!(!pids.is_empty(), "match state must have non-empty pids"); + fn set_matches( + &mut self, + sid: StateID, + pids: impl Iterator, + ) { let index = (sid.as_usize() >> self.stride2).checked_sub(2).unwrap(); - self.matches[index].extend_from_slice(pids); - self.matches_memory_usage += size_of::() * pids.len(); + let mut at_least_one = false; + for pid in pids { + self.matches[index].push(pid); + self.matches_memory_usage += PatternID::SIZE; + at_least_one = true; + } + assert!(at_least_one, "match state must have non-empty pids"); } } @@ -524,6 +524,18 @@ impl Builder { dfa.byte_classes.alphabet_len(), dfa.byte_classes.stride(), ); + // The vectors can grow ~twice as big during construction because a + // Vec amortizes growth. But here, let's shrink things back down to + // what we actually need since we're never going to add more to it. + dfa.trans.shrink_to_fit(); + dfa.pattern_lens.shrink_to_fit(); + dfa.matches.shrink_to_fit(); + // TODO: We might also want to shrink each Vec inside of `dfa.matches`, + // or even better, convert it to one contiguous allocation. But I think + // I went with nested allocs for good reason (can't remember), so this + // may be tricky to do. I decided not to shrink them here because it + // might require a fair bit of work to do. It's unclear whether it's + // worth it. Ok(dfa) } @@ -543,20 +555,33 @@ impl Builder { }; for (oldsid, state) in nnfa.states().iter().with_state_ids() { let newsid = old2new(oldsid); - if !state.matches.is_empty() { - dfa.set_matches(newsid, &state.matches); + if state.is_match() { + dfa.set_matches(newsid, nnfa.iter_matches(oldsid)); } sparse_iter( - state, + nnfa, + oldsid, &dfa.byte_classes, |byte, class, mut oldnextsid| { if oldnextsid == noncontiguous::NFA::FAIL { if anchored.is_anchored() { oldnextsid = noncontiguous::NFA::DEAD; + } else if state.fail() == noncontiguous::NFA::DEAD { + // This is a special case that avoids following + // DEAD transitions in a non-contiguous NFA. + // Following these transitions is pretty slow + // because the non-contiguous NFA will always use + // a sparse representation for it (because the + // DEAD state is usually treated as a sentinel). + // The *vast* majority of failure states are DEAD + // states, so this winds up being pretty slow if + // we go through the non-contiguous NFA state + // transition logic. Instead, just do it ourselves. + oldnextsid = noncontiguous::NFA::DEAD; } else { oldnextsid = nnfa.next_state( Anchored::No, - state.fail, + state.fail(), byte, ); } @@ -569,7 +594,7 @@ impl Builder { // Now that we've remapped all the IDs in our states, all that's left // is remapping the special state IDs. let old = nnfa.special(); - let mut new = &mut dfa.special; + let new = &mut dfa.special; new.max_special_id = old2new(old.max_special_id); new.max_match_id = old2new(old.max_match_id); if anchored.is_anchored() { @@ -620,11 +645,12 @@ impl Builder { remap_anchored[oldsid] = newsid; is_anchored[newsid.as_usize() >> stride2] = true; } - if !state.matches.is_empty() { - dfa.set_matches(newsid, &state.matches); + if state.is_match() { + dfa.set_matches(newsid, nnfa.iter_matches(oldsid)); } sparse_iter( - state, + nnfa, + oldsid, &dfa.byte_classes, |_, class, oldnextsid| { let class = usize::from(class); @@ -645,18 +671,28 @@ impl Builder { remap_unanchored[oldsid] = unewsid; remap_anchored[oldsid] = anewsid; is_anchored[anewsid.as_usize() >> stride2] = true; - if !state.matches.is_empty() { - dfa.set_matches(unewsid, &state.matches); - dfa.set_matches(anewsid, &state.matches); + if state.is_match() { + dfa.set_matches(unewsid, nnfa.iter_matches(oldsid)); + dfa.set_matches(anewsid, nnfa.iter_matches(oldsid)); } sparse_iter( - state, + nnfa, + oldsid, &dfa.byte_classes, |byte, class, oldnextsid| { let class = usize::from(class); if oldnextsid == noncontiguous::NFA::FAIL { - dfa.trans[unewsid.as_usize() + class] = nnfa - .next_state(Anchored::No, state.fail, byte); + let oldnextsid = + if state.fail() == noncontiguous::NFA::DEAD { + noncontiguous::NFA::DEAD + } else { + nnfa.next_state( + Anchored::No, + state.fail(), + byte, + ) + }; + dfa.trans[unewsid.as_usize() + class] = oldnextsid; } else { dfa.trans[unewsid.as_usize() + class] = oldnextsid; dfa.trans[anewsid.as_usize() + class] = oldnextsid; @@ -680,7 +716,7 @@ impl Builder { // Now that we've remapped all the IDs in our states, all that's left // is remapping the special state IDs. let old = nnfa.special(); - let mut new = &mut dfa.special; + let new = &mut dfa.special; new.max_special_id = remap_anchored[old.max_special_id]; new.max_match_id = remap_anchored[old.max_match_id]; new.start_unanchored_id = remap_unanchored[old.start_unanchored_id]; @@ -763,14 +799,15 @@ impl Builder { /// `byte_classes.alphabet_len()` times, once for every possible class in /// ascending order. fn sparse_iter( - state: &noncontiguous::State, + nnfa: &noncontiguous::NFA, + oldsid: StateID, classes: &ByteClasses, mut f: F, ) { let mut prev_class = None; let mut byte = 0usize; - for &(b, id) in state.trans.iter() { - while byte < usize::from(b) { + for t in nnfa.iter_trans(oldsid) { + while byte < usize::from(t.byte()) { let rep = byte.as_u8(); let class = classes.get(rep); byte += 1; @@ -779,11 +816,11 @@ fn sparse_iter( prev_class = Some(class); } } - let rep = b; + let rep = t.byte(); let class = classes.get(rep); byte += 1; if prev_class != Some(class) { - f(rep, class, id); + f(rep, class, t.next()); prev_class = Some(class); } } diff --git a/vendor/aho-corasick/src/nfa/contiguous.rs b/vendor/aho-corasick/src/nfa/contiguous.rs index f558c1e..29c1621 100644 --- a/vendor/aho-corasick/src/nfa/contiguous.rs +++ b/vendor/aho-corasick/src/nfa/contiguous.rs @@ -684,6 +684,8 @@ impl<'a> State<'a> { /// dense format. Otherwise, the choice between dense and sparse will be /// automatically chosen based on the old state. fn write( + nnfa: &noncontiguous::NFA, + oldsid: StateID, old: &noncontiguous::State, classes: &ByteClasses, dst: &mut Vec, @@ -692,45 +694,46 @@ impl<'a> State<'a> { let sid = StateID::new(dst.len()).map_err(|e| { BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted()) })?; + let old_len = nnfa.iter_trans(oldsid).count(); // For states with a lot of transitions, we might as well just make // them dense. These kinds of hot states tend to be very rare, so we're // okay with it. This also gives us more sentinels in the state's // 'kind', which lets us create different state kinds to save on // space. - let kind = if force_dense - || old.trans.len() > State::MAX_SPARSE_TRANSITIONS - { + let kind = if force_dense || old_len > State::MAX_SPARSE_TRANSITIONS { State::KIND_DENSE - } else if old.trans.len() == 1 && old.matches.is_empty() { + } else if old_len == 1 && !old.is_match() { State::KIND_ONE } else { // For a sparse state, the kind is just the number of transitions. - u32::try_from(old.trans.len()).unwrap() + u32::try_from(old_len).unwrap() }; if kind == State::KIND_DENSE { dst.push(kind); - dst.push(old.fail.as_u32()); - State::write_dense_trans(old, classes, dst)?; + dst.push(old.fail().as_u32()); + State::write_dense_trans(nnfa, oldsid, classes, dst)?; } else if kind == State::KIND_ONE { - let class = u32::from(classes.get(old.trans[0].0)); + let t = nnfa.iter_trans(oldsid).next().unwrap(); + let class = u32::from(classes.get(t.byte())); dst.push(kind | (class << 8)); - dst.push(old.fail.as_u32()); - dst.push(old.trans[0].1.as_u32()); + dst.push(old.fail().as_u32()); + dst.push(t.next().as_u32()); } else { dst.push(kind); - dst.push(old.fail.as_u32()); - State::write_sparse_trans(old, classes, dst)?; + dst.push(old.fail().as_u32()); + State::write_sparse_trans(nnfa, oldsid, classes, dst)?; } // Now finally write the number of matches and the matches themselves. - if !old.matches.is_empty() { - if old.matches.len() == 1 { - let pid = old.matches[0].as_u32(); + if old.is_match() { + let matches_len = nnfa.iter_matches(oldsid).count(); + if matches_len == 1 { + let pid = nnfa.iter_matches(oldsid).next().unwrap().as_u32(); assert_eq!(0, pid & (1 << 31)); dst.push((1 << 31) | pid); } else { - assert_eq!(0, old.matches.len() & (1 << 31)); - dst.push(old.matches.len().as_u32()); - dst.extend(old.matches.iter().map(|pid| pid.as_u32())); + assert_eq!(0, matches_len & (1 << 31)); + dst.push(matches_len.as_u32()); + dst.extend(nnfa.iter_matches(oldsid).map(|pid| pid.as_u32())); } } Ok(sid) @@ -744,13 +747,14 @@ impl<'a> State<'a> { /// This returns an error if `dst` became so big that `StateID`s can no /// longer be created for new states. fn write_sparse_trans( - old: &noncontiguous::State, + nnfa: &noncontiguous::NFA, + oldsid: StateID, classes: &ByteClasses, dst: &mut Vec, ) -> Result<(), BuildError> { let (mut chunk, mut len) = ([0; 4], 0); - for &(byte, _) in old.trans.iter() { - chunk[len] = classes.get(byte); + for t in nnfa.iter_trans(oldsid) { + chunk[len] = classes.get(t.byte()); len += 1; if len == 4 { dst.push(u32::from_ne_bytes(chunk)); @@ -773,8 +777,8 @@ impl<'a> State<'a> { } dst.push(u32::from_ne_bytes(chunk)); } - for &(_, next) in old.trans.iter() { - dst.push(next.as_u32()); + for t in nnfa.iter_trans(oldsid) { + dst.push(t.next().as_u32()); } Ok(()) } @@ -787,7 +791,8 @@ impl<'a> State<'a> { /// This returns an error if `dst` became so big that `StateID`s can no /// longer be created for new states. fn write_dense_trans( - old: &noncontiguous::State, + nnfa: &noncontiguous::NFA, + oldsid: StateID, classes: &ByteClasses, dst: &mut Vec, ) -> Result<(), BuildError> { @@ -807,8 +812,9 @@ impl<'a> State<'a> { .take(classes.alphabet_len()), ); assert!(start < dst.len(), "equivalence classes are never empty"); - for &(byte, next) in old.trans.iter() { - dst[start + usize::from(classes.get(byte))] = next.as_u32(); + for t in nnfa.iter_trans(oldsid) { + dst[start + usize::from(classes.get(t.byte()))] = + t.next().as_u32(); } Ok(()) } @@ -960,8 +966,10 @@ impl Builder { index_to_state_id[oldsid] = NFA::FAIL; continue; } - let force_dense = state.depth.as_usize() < self.dense_depth; + let force_dense = state.depth().as_usize() < self.dense_depth; let newsid = State::write( + nnfa, + oldsid, state, &nfa.byte_classes, &mut nfa.repr, @@ -980,7 +988,7 @@ impl Builder { // is remapping the special state IDs. let remap = &index_to_state_id; let old = nnfa.special(); - let mut new = &mut nfa.special; + let new = &mut nfa.special; new.max_special_id = remap[old.max_special_id]; new.max_match_id = remap[old.max_match_id]; new.start_unanchored_id = remap[old.start_unanchored_id]; @@ -992,6 +1000,11 @@ impl Builder { nfa.memory_usage(), nfa.byte_classes.alphabet_len(), ); + // The vectors can grow ~twice as big during construction because a + // Vec amortizes growth. But here, let's shrink things back down to + // what we actually need since we're never going to add more to it. + nfa.repr.shrink_to_fit(); + nfa.pattern_lens.shrink_to_fit(); Ok(nfa) } @@ -1074,8 +1087,6 @@ fn u32_len(ntrans: usize) -> usize { #[cfg(test)] mod tests { - use super::*; - // This test demonstrates a SWAR technique I tried in the sparse transition // code inside of 'next_state'. Namely, sparse transitions work by // iterating over u32 chunks, with each chunk containing up to 4 classes @@ -1090,8 +1101,14 @@ mod tests { // Anyway, this code was a little tricky to write, so I converted it to a // test in case someone figures out how to use it more effectively than // I could. + // + // (This also only works on little endian. So big endian would need to be + // accounted for if we ever decided to use this I think.) + #[cfg(target_endian = "little")] #[test] fn swar() { + use super::*; + fn has_zero_byte(x: u32) -> u32 { const LO_U32: u32 = 0x01010101; const HI_U32: u32 = 0x80808080; diff --git a/vendor/aho-corasick/src/nfa/noncontiguous.rs b/vendor/aho-corasick/src/nfa/noncontiguous.rs index 3990489..af32617 100644 --- a/vendor/aho-corasick/src/nfa/noncontiguous.rs +++ b/vendor/aho-corasick/src/nfa/noncontiguous.rs @@ -100,6 +100,38 @@ pub struct NFA { /// The third state (index 2) is generally intended to be the starting or /// "root" state. states: Vec, + /// Transitions stored in a sparse representation via a linked list. + /// + /// Each transition contains three pieces of information: the byte it + /// is defined for, the state it transitions to and a link to the next + /// transition in the same state (or `StateID::ZERO` if it is the last + /// transition). + /// + /// The first transition for each state is determined by `State::sparse`. + /// + /// Note that this contains a complete set of all transitions in this NFA, + /// including states that have a dense representation for transitions. + /// (Adding dense transitions for a state doesn't remove its sparse + /// transitions, since deleting transitions from this particular sparse + /// representation would be fairly expensive.) + sparse: Vec, + /// Transitions stored in a dense representation. + /// + /// A state has a row in this table if and only if `State::dense` is + /// not equal to `StateID::ZERO`. When not zero, there are precisely + /// `NFA::byte_classes::alphabet_len()` entries beginning at `State::dense` + /// in this table. + /// + /// Generally a very small minority of states have a dense representation + /// since it uses so much memory. + dense: Vec, + /// Matches stored in linked list for each state. + /// + /// Like sparse transitions, each match has a link to the next match in the + /// state. + /// + /// The first match for each state is determined by `State::matches`. + matches: Vec, /// The length, in bytes, of each pattern in this NFA. This slice is /// indexed by `PatternID`. /// @@ -140,8 +172,6 @@ pub struct NFA { /// states have been added, the states are shuffled such that the above /// predicates hold. special: Special, - /// The number of bytes of heap used by this sparse NFA. - memory_usage: usize, } impl NFA { @@ -229,12 +259,329 @@ impl NFA { /// Re-maps all state IDs in this NFA according to the `map` function /// given. pub(crate) fn remap(&mut self, map: impl Fn(StateID) -> StateID) { + let alphabet_len = self.byte_classes.alphabet_len(); for state in self.states.iter_mut() { state.fail = map(state.fail); - for (_, ref mut sid) in state.trans.iter_mut() { - *sid = map(*sid); + let mut link = state.sparse; + while link != StateID::ZERO { + let t = &mut self.sparse[link]; + t.next = map(t.next); + link = t.link; + } + if state.dense != StateID::ZERO { + let start = state.dense.as_usize(); + for next in self.dense[start..][..alphabet_len].iter_mut() { + *next = map(*next); + } + } + } + } + + /// Iterate over all of the transitions for the given state ID. + pub(crate) fn iter_trans( + &self, + sid: StateID, + ) -> impl Iterator + '_ { + let mut link = self.states[sid].sparse; + core::iter::from_fn(move || { + if link == StateID::ZERO { + return None; + } + let t = self.sparse[link]; + link = t.link; + Some(t) + }) + } + + /// Iterate over all of the matches for the given state ID. + pub(crate) fn iter_matches( + &self, + sid: StateID, + ) -> impl Iterator + '_ { + let mut link = self.states[sid].matches; + core::iter::from_fn(move || { + if link == StateID::ZERO { + return None; + } + let m = self.matches[link]; + link = m.link; + Some(m.pid) + }) + } + + /// Return the link following the one given. If the one given is the last + /// link for the given state, then return `None`. + /// + /// If no previous link is given, then this returns the first link in the + /// state, if one exists. + /// + /// This is useful for manually iterating over the transitions in a single + /// state without borrowing the NFA. This permits mutating other parts of + /// the NFA during iteration. Namely, one can access the transition pointed + /// to by the link via `self.sparse[link]`. + fn next_link( + &self, + sid: StateID, + prev: Option, + ) -> Option { + let link = + prev.map_or(self.states[sid].sparse, |p| self.sparse[p].link); + if link == StateID::ZERO { + None + } else { + Some(link) + } + } + + /// Follow the transition for the given byte in the given state. If no such + /// transition exists, then the FAIL state ID is returned. + #[inline(always)] + fn follow_transition(&self, sid: StateID, byte: u8) -> StateID { + let s = &self.states[sid]; + // This is a special case that targets starting states and states + // near a start state. Namely, after the initial trie is constructed, + // we look for states close to the start state to convert to a dense + // representation for their transitions. This winds up using a lot more + // memory per state in exchange for faster transition lookups. But + // since we only do this for a small number of states (by default), the + // memory usage is usually minimal. + // + // This has *massive* benefit when executing searches because the + // unanchored starting state is by far the hottest state and is + // frequently visited. Moreover, the 'for' loop below that works + // decently on an actually sparse state is disastrous on a state that + // is nearly or completely dense. + if s.dense == StateID::ZERO { + self.follow_transition_sparse(sid, byte) + } else { + let class = usize::from(self.byte_classes.get(byte)); + self.dense[s.dense.as_usize() + class] + } + } + + /// Like `follow_transition`, but always uses the sparse representation. + #[inline(always)] + fn follow_transition_sparse(&self, sid: StateID, byte: u8) -> StateID { + for t in self.iter_trans(sid) { + if byte <= t.byte { + if byte == t.byte { + return t.next; + } + break; + } + } + NFA::FAIL + } + + /// Set the transition for the given byte to the state ID given. + /// + /// Note that one should not set transitions to the FAIL state. It is not + /// technically incorrect, but it wastes space. If a transition is not + /// defined, then it is automatically assumed to lead to the FAIL state. + fn add_transition( + &mut self, + prev: StateID, + byte: u8, + next: StateID, + ) -> Result<(), BuildError> { + if self.states[prev].dense != StateID::ZERO { + let dense = self.states[prev].dense; + let class = usize::from(self.byte_classes.get(byte)); + self.dense[dense.as_usize() + class] = next; + } + + let head = self.states[prev].sparse; + if head == StateID::ZERO || byte < self.sparse[head].byte { + let new_link = self.alloc_transition()?; + self.sparse[new_link] = Transition { byte, next, link: head }; + self.states[prev].sparse = new_link; + return Ok(()); + } else if byte == self.sparse[head].byte { + self.sparse[head].next = next; + return Ok(()); + } + + // We handled the only cases where the beginning of the transition + // chain needs to change. At this point, we now know that there is + // at least one entry in the transition chain and the byte for that + // transition is less than the byte for the transition we're adding. + let (mut link_prev, mut link_next) = (head, self.sparse[head].link); + while link_next != StateID::ZERO && byte > self.sparse[link_next].byte + { + link_prev = link_next; + link_next = self.sparse[link_next].link; + } + if link_next == StateID::ZERO || byte < self.sparse[link_next].byte { + let link = self.alloc_transition()?; + self.sparse[link] = Transition { byte, next, link: link_next }; + self.sparse[link_prev].link = link; + } else { + assert_eq!(byte, self.sparse[link_next].byte); + self.sparse[link_next].next = next; + } + Ok(()) + } + + /// This sets every possible transition (all 255 of them) for the given + /// state to the name `next` value. + /// + /// This is useful for efficiently initializing start/dead states. + /// + /// # Panics + /// + /// This requires that the state has no transitions added to it already. + /// If it has any transitions, then this panics. It will also panic if + /// the state has been densified prior to calling this. + fn init_full_state( + &mut self, + prev: StateID, + next: StateID, + ) -> Result<(), BuildError> { + assert_eq!( + StateID::ZERO, + self.states[prev].dense, + "state must not be dense yet" + ); + assert_eq!( + StateID::ZERO, + self.states[prev].sparse, + "state must have zero transitions" + ); + let mut prev_link = StateID::ZERO; + for byte in 0..=255 { + let new_link = self.alloc_transition()?; + self.sparse[new_link] = + Transition { byte, next, link: StateID::ZERO }; + if prev_link == StateID::ZERO { + self.states[prev].sparse = new_link; + } else { + self.sparse[prev_link].link = new_link; + } + prev_link = new_link; + } + Ok(()) + } + + /// Add a match for the given pattern ID to the state for the given ID. + fn add_match( + &mut self, + sid: StateID, + pid: PatternID, + ) -> Result<(), BuildError> { + let head = self.states[sid].matches; + let mut link = head; + while self.matches[link].link != StateID::ZERO { + link = self.matches[link].link; + } + let new_match_link = self.alloc_match()?; + self.matches[new_match_link].pid = pid; + if link == StateID::ZERO { + self.states[sid].matches = new_match_link; + } else { + self.matches[link].link = new_match_link; + } + Ok(()) + } + + /// Copy matches from the `src` state to the `dst` state. This is useful + /// when a match state can be reached via a failure transition. In which + /// case, you'll want to copy the matches (if any) from the state reached + /// by the failure transition to the original state you were at. + fn copy_matches( + &mut self, + src: StateID, + dst: StateID, + ) -> Result<(), BuildError> { + let head_dst = self.states[dst].matches; + let mut link_dst = head_dst; + while self.matches[link_dst].link != StateID::ZERO { + link_dst = self.matches[link_dst].link; + } + let mut link_src = self.states[src].matches; + while link_src != StateID::ZERO { + let new_match_link = + StateID::new(self.matches.len()).map_err(|e| { + BuildError::state_id_overflow( + StateID::MAX.as_u64(), + e.attempted(), + ) + })?; + self.matches.push(Match { + pid: self.matches[link_src].pid, + link: StateID::ZERO, + }); + if link_dst == StateID::ZERO { + self.states[dst].matches = new_match_link; + } else { + self.matches[link_dst].link = new_match_link; } + + link_dst = new_match_link; + link_src = self.matches[link_src].link; } + Ok(()) + } + + /// Create a new entry in `NFA::trans`, if there's room, and return that + /// entry's ID. If there's no room, then an error is returned. + fn alloc_transition(&mut self) -> Result { + let id = StateID::new(self.sparse.len()).map_err(|e| { + BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted()) + })?; + self.sparse.push(Transition::default()); + Ok(id) + } + + /// Create a new entry in `NFA::matches`, if there's room, and return that + /// entry's ID. If there's no room, then an error is returned. + fn alloc_match(&mut self) -> Result { + let id = StateID::new(self.matches.len()).map_err(|e| { + BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted()) + })?; + self.matches.push(Match::default()); + Ok(id) + } + + /// Create a new set of `N` transitions in this NFA's dense transition + /// table. The ID return corresponds to the index at which the `N` + /// transitions begin. So `id+0` is the first transition and `id+(N-1)` is + /// the last. + /// + /// `N` is determined via `NFA::byte_classes::alphabet_len`. + fn alloc_dense_state(&mut self) -> Result { + let id = StateID::new(self.dense.len()).map_err(|e| { + BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted()) + })?; + // We use FAIL because it's the correct default. If a state doesn't + // have a transition defined for every possible byte value, then the + // transition function should return NFA::FAIL. + self.dense.extend( + core::iter::repeat(NFA::FAIL) + .take(self.byte_classes.alphabet_len()), + ); + Ok(id) + } + + /// Allocate and add a fresh state to the underlying NFA and return its + /// ID (guaranteed to be one more than the ID of the previously allocated + /// state). If the ID would overflow `StateID`, then this returns an error. + fn alloc_state(&mut self, depth: usize) -> Result { + // This is OK because we error when building the trie if we see a + // pattern whose length cannot fit into a 'SmallIndex', and the longest + // possible depth corresponds to the length of the longest pattern. + let depth = SmallIndex::new(depth) + .expect("patterns longer than SmallIndex::MAX are not allowed"); + let id = StateID::new(self.states.len()).map_err(|e| { + BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted()) + })?; + self.states.push(State { + sparse: StateID::ZERO, + dense: StateID::ZERO, + matches: StateID::ZERO, + fail: self.special.start_unanchored_id, + depth, + }); + Ok(id) } } @@ -263,8 +610,7 @@ unsafe impl Automaton for NFA { // 2. All state.fail values point to a state closer to the start state. // 3. The start state has no transitions to the FAIL state. loop { - let state = &self.states[sid]; - let next = state.next_state(byte); + let next = self.follow_transition(sid, byte); if next != NFA::FAIL { return next; } @@ -275,7 +621,7 @@ unsafe impl Automaton for NFA { if anchored.is_anchored() { return NFA::DEAD; } - sid = state.fail; + sid = self.states[sid].fail(); } } @@ -331,17 +677,21 @@ unsafe impl Automaton for NFA { #[inline(always)] fn match_len(&self, sid: StateID) -> usize { - self.states[sid].matches.len() + self.iter_matches(sid).count() } #[inline(always)] fn match_pattern(&self, sid: StateID, index: usize) -> PatternID { - self.states[sid].matches[index] + self.iter_matches(sid).nth(index).unwrap() } #[inline(always)] fn memory_usage(&self) -> usize { - self.memory_usage + self.states.len() * core::mem::size_of::() + + self.sparse.len() * core::mem::size_of::() + + self.matches.len() * core::mem::size_of::() + + self.dense.len() * StateID::SIZE + + self.pattern_lens.len() * SmallIndex::SIZE + self.prefilter.as_ref().map_or(0, |p| p.memory_usage()) } @@ -356,22 +706,32 @@ unsafe impl Automaton for NFA { /// It contains the transitions to the next state, a failure transition for /// cases where there exists no other transition for the current input byte /// and the matches implied by visiting this state (if any). -#[derive(Clone)] +#[derive(Clone, Debug)] pub(crate) struct State { - /// The set of defined transitions for this state sorted by `u8`. In an - /// unanchored search, if a byte is not in this set of transitions, then - /// it should transition to `fail`. In an anchored search, it should - /// transition to the special DEAD state. - pub(crate) trans: Vec<(u8, StateID)>, - /// The patterns that match once this state is entered. Note that order - /// is important in the leftmost case. For example, if one adds 'foo' and - /// 'foo' (duplicate patterns are not disallowed), then in a leftmost-first - /// search, only the first 'foo' will ever match. - pub(crate) matches: Vec, + /// A pointer to `NFA::trans` corresponding to the head of a linked list + /// containing all of the transitions for this state. + /// + /// This is `StateID::ZERO` if and only if this state has zero transitions. + sparse: StateID, + /// A pointer to a row of `N` transitions in `NFA::dense`. These + /// transitions correspond precisely to what is obtained by traversing + /// `sparse`, but permits constant time lookup. + /// + /// When this is zero (which is true for most states in the default + /// configuration), then this state has no dense representation. + /// + /// Note that `N` is equal to `NFA::byte_classes::alphabet_len()`. This is + /// typically much less than 256 (the maximum value). + dense: StateID, + /// A pointer to `NFA::matches` corresponding to the head of a linked list + /// containing all of the matches for this state. + /// + /// This is `StateID::ZERO` if and only if this state is not a match state. + matches: StateID, /// The state that should be transitioned to if the current byte in the /// haystack does not have a corresponding transition defined in this /// state. - pub(crate) fail: StateID, + fail: StateID, /// The depth of this state. Specifically, this is the distance from this /// state to the starting state. (For the special sentinel states DEAD and /// FAIL, their depth is always 0.) The depth of a starting state is 0. @@ -384,96 +744,92 @@ pub(crate) struct State { /// a sparse representation for all states unconditionally.) In any case, /// this is really the only convenient place to compute and store this /// information, which we need when building the contiguous NFA. - pub(crate) depth: SmallIndex, + depth: SmallIndex, } impl State { - /// Return the heap memory used by this state. Note that if `State` is - /// itself on the heap, then callers need to call this in addition to - /// `size_of::()` to get the full heap memory used. - fn memory_usage(&self) -> usize { - use core::mem::size_of; + /// Return true if and only if this state is a match state. + pub(crate) fn is_match(&self) -> bool { + self.matches != StateID::ZERO + } - (self.trans.len() * size_of::<(u8, StateID)>()) - + (self.matches.len() * size_of::()) + /// Returns the failure transition for this state. + pub(crate) fn fail(&self) -> StateID { + self.fail } - /// Return true if and only if this state is a match state. - fn is_match(&self) -> bool { - !self.matches.is_empty() + /// Returns the depth of this state. That is, the number of transitions + /// this state is from the start state of the NFA. + pub(crate) fn depth(&self) -> SmallIndex { + self.depth } +} - /// Return the next state by following the transition for the given byte. - /// If no transition for the given byte is defined, then the FAIL state ID - /// is returned. - #[inline(always)] - fn next_state(&self, byte: u8) -> StateID { - // This is a special case that targets the unanchored starting state. - // By construction, the unanchored starting state is actually a dense - // state, because every possible transition is defined on it. Any - // transitions that weren't added as part of initial trie construction - // get explicitly added as a self-transition back to itself. Thus, we - // can treat it as if it were dense and do a constant time lookup. - // - // This has *massive* benefit when executing searches because the - // unanchored starting state is by far the hottest state and is - // frequently visited. Moreover, the 'for' loop below that works - // decently on an actually sparse state is disastrous on a state that - // is nearly or completely dense. - // - // This optimization also works in general, including for non-starting - // states that happen to have every transition defined. Namely, it - // is impossible for 'self.trans' to have duplicate transitions (by - // construction) and transitions are always in sorted ascending order. - // So if a state has 256 transitions, it is, by construction, dense and - // amenable to constant time indexing. - if self.trans.len() == 256 { - self.trans[usize::from(byte)].1 - } else { - for &(b, id) in self.trans.iter() { - if b == byte { - return id; - } - } - NFA::FAIL - } +/// A single transition in a non-contiguous NFA. +#[derive(Clone, Copy, Default)] +#[repr(packed)] +pub(crate) struct Transition { + byte: u8, + next: StateID, + link: StateID, +} + +impl Transition { + /// Return the byte for which this transition is defined. + pub(crate) fn byte(&self) -> u8 { + self.byte } - /// Set the transition for the given byte to the state ID given. - /// - /// Note that one should not set transitions to the FAIL state. It is not - /// technically incorrect, but it wastes space. If a transition is not - /// defined, then it is automatically assumed to lead to the FAIL state. - fn set_next_state(&mut self, byte: u8, next: StateID) { - match self.trans.binary_search_by_key(&byte, |&(b, _)| b) { - Ok(i) => self.trans[i] = (byte, next), - Err(i) => self.trans.insert(i, (byte, next)), - } + /// Return the ID of the state that this transition points to. + pub(crate) fn next(&self) -> StateID { + self.next + } + + /// Return the ID of the next transition. + fn link(&self) -> StateID { + self.link } } -impl core::fmt::Debug for State { +impl core::fmt::Debug for Transition { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - use crate::{automaton::sparse_transitions, util::debug::DebugByte}; + write!( + f, + "Transition(byte: {:X?}, next: {:?}, link: {:?})", + self.byte, + self.next().as_usize(), + self.link().as_usize() + ) + } +} - let it = sparse_transitions(self.trans.iter().copied()).enumerate(); - for (i, (start, end, sid)) in it { - if i > 0 { - write!(f, ", ")?; - } - if start == end { - write!(f, "{:?} => {:?}", DebugByte(start), sid.as_usize())?; - } else { - write!( - f, - "{:?}-{:?} => {:?}", - DebugByte(start), - DebugByte(end), - sid.as_usize() - )?; - } - } - Ok(()) +/// A single match in a non-contiguous NFA. +#[derive(Clone, Copy, Default)] +struct Match { + pid: PatternID, + link: StateID, +} + +impl Match { + /// Return the pattern ID for this match. + pub(crate) fn pattern(&self) -> PatternID { + self.pid + } + + /// Return the ID of the next match. + fn link(&self) -> StateID { + self.link + } +} + +impl core::fmt::Debug for Match { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "Match(pid: {:?}, link: {:?})", + self.pattern().as_usize(), + self.link().as_usize() + ) } } @@ -487,6 +843,7 @@ pub struct Builder { match_kind: MatchKind, prefilter: bool, ascii_case_insensitive: bool, + dense_depth: usize, } impl Default for Builder { @@ -495,6 +852,7 @@ impl Default for Builder { match_kind: MatchKind::default(), prefilter: true, ascii_case_insensitive: false, + dense_depth: 3, } } } @@ -544,6 +902,17 @@ impl Builder { self } + /// Set the limit on how many states use a dense representation for their + /// transitions. Other states will generally use a sparse representation. + /// + /// See + /// [`AhoCorasickBuilder::dense_depth`](crate::AhoCorasickBuilder::dense_depth) + /// for more documentation and examples. + pub fn dense_depth(&mut self, depth: usize) -> &mut Builder { + self.dense_depth = depth; + self + } + /// Enable heuristic prefilter optimizations. /// /// See @@ -577,13 +946,15 @@ impl<'a> Compiler<'a> { nfa: NFA { match_kind: builder.match_kind, states: vec![], + sparse: vec![], + dense: vec![], + matches: vec![], pattern_lens: vec![], prefilter: None, byte_classes: ByteClasses::singletons(), min_pattern_len: usize::MAX, max_pattern_len: 0, special: Special::zero(), - memory_usage: 0, }, byteset: ByteClassSet::empty(), }) @@ -594,39 +965,59 @@ impl<'a> Compiler<'a> { I: IntoIterator, P: AsRef<[u8]>, { + // Add dummy transition/match links, so that no valid link will point + // to another link at index 0. + self.nfa.sparse.push(Transition::default()); + self.nfa.matches.push(Match::default()); + // Add a dummy dense transition so that no states can have dense==0 + // represent a valid pointer to dense transitions. This permits + // dense==0 to be a sentinel indicating "no dense transitions." + self.nfa.dense.push(NFA::DEAD); // the dead state, only used for leftmost and fixed to id==0 - self.add_state(0)?; + self.nfa.alloc_state(0)?; // the fail state, which is never entered and fixed to id==1 - self.add_state(0)?; + self.nfa.alloc_state(0)?; // unanchored start state, initially fixed to id==2 but later shuffled // to appear after all non-start match states. - self.nfa.special.start_unanchored_id = self.add_state(0)?; + self.nfa.special.start_unanchored_id = self.nfa.alloc_state(0)?; // anchored start state, initially fixed to id==3 but later shuffled // to appear after unanchored start state. - self.nfa.special.start_anchored_id = self.add_state(0)?; + self.nfa.special.start_anchored_id = self.nfa.alloc_state(0)?; // Initialize the unanchored starting state in order to make it dense, // and thus make transition lookups on this state faster. - self.init_unanchored_start_state(); + self.init_unanchored_start_state()?; + // Set all transitions on the DEAD state to point to itself. This way, + // the DEAD state can never be escaped. It MUST be used as a sentinel + // in any correct search. + self.add_dead_state_loop()?; // Build the base trie from the given patterns. self.build_trie(patterns)?; + self.nfa.states.shrink_to_fit(); + // Turn our set of bytes into equivalent classes. This NFA + // implementation uses byte classes only for states that use a dense + // representation of transitions. (And that's why this comes before + // `self.densify()`, as the byte classes need to be set first.) + self.nfa.byte_classes = self.byteset.byte_classes(); // Add transitions (and maybe matches) to the anchored starting state. // The anchored starting state is used for anchored searches. The only // mechanical difference between it and the unanchored start state is // that missing transitions map to the DEAD state instead of the FAIL // state. - self.set_anchored_start_state(); + self.set_anchored_start_state()?; // Rewrite transitions to the FAIL state on the unanchored start state // as self-transitions. This keeps the start state active at all times. self.add_unanchored_start_state_loop(); - // Set all transitions on the DEAD state to point to itself. This way, - // the DEAD state can never be escaped. It MUST be used as a sentinel - // in any correct search. - self.add_dead_state_loop(); + // Make some (possibly zero) states use a dense representation for + // transitions. It's important to do this right after the states + // and non-failure transitions are solidified. That way, subsequent + // accesses (particularly `fill_failure_transitions`) will benefit from + // the faster transition lookup in densified states. + self.densify()?; // The meat of the Aho-Corasick algorithm: compute and write failure // transitions. i.e., the state to move to when a transition isn't // defined in the current state. These are epsilon transitions and thus // make this formulation an NFA. - self.fill_failure_transitions(); + self.fill_failure_transitions()?; // Handle a special case under leftmost semantics when at least one // of the patterns is the empty string. self.close_start_state_loop_for_leftmost(); @@ -634,12 +1025,7 @@ impl<'a> Compiler<'a> { // NON-MATCH, ... This permits us to very quickly query the type of // the state we're currently in during a search. self.shuffle(); - // Turn our set of bytes into equivalent classes. This NFA - // implementation doesn't use byte classes directly, but any - // Aho-Corasick searcher built from this one might. - self.nfa.byte_classes = self.byteset.byte_classes(); self.nfa.prefilter = self.prefilter.build(); - self.calculate_memory_usage(); // Store the maximum ID of all *relevant* special states. Start states // are only relevant when we have a prefilter, otherwise, there is zero // reason to care about whether a state is a start state or not during @@ -657,6 +1043,10 @@ impl<'a> Compiler<'a> { } else { self.nfa.special.max_match_id }; + self.nfa.sparse.shrink_to_fit(); + self.nfa.dense.shrink_to_fit(); + self.nfa.matches.shrink_to_fit(); + self.nfa.pattern_lens.shrink_to_fit(); Ok(self.nfa) } @@ -723,10 +1113,9 @@ impl<'a> Compiler<'a> { continue 'PATTERNS; } - // Add this byte to our equivalence classes. We don't use these - // for NFA construction. These are instead used only if we're - // building a DFA. They would technically be useful for the - // NFA, but it would require a second pass over the patterns. + // Add this byte to our equivalence classes. These don't + // get used while building the trie, but other Aho-Corasick + // implementations may use them. self.byteset.set_range(b, b); if self.builder.ascii_case_insensitive { let b = opposite_ascii_case(b); @@ -740,22 +1129,22 @@ impl<'a> Compiler<'a> { // use a dense representation that uses more memory but is // faster. Other states use a sparse representation that uses // less memory but is slower. - let next = self.nfa.states[prev].next_state(b); + let next = self.nfa.follow_transition(prev, b); if next != NFA::FAIL { prev = next; } else { - let next = self.add_state(depth)?; - self.nfa.states[prev].set_next_state(b, next); + let next = self.nfa.alloc_state(depth)?; + self.nfa.add_transition(prev, b, next)?; if self.builder.ascii_case_insensitive { let b = opposite_ascii_case(b); - self.nfa.states[prev].set_next_state(b, next); + self.nfa.add_transition(prev, b, next)?; } prev = next; } } // Once the pattern has been added, log the match in the final // state that it reached. - self.nfa.states[prev].matches.push(pid); + self.nfa.add_match(prev, pid)?; } Ok(()) } @@ -883,7 +1272,7 @@ impl<'a> Compiler<'a> { /// automaton. There are also a couple Java libraries that support leftmost /// longest semantics, but they do it by building a queue of matches at /// search time, which is even worse than what Perl is doing. ---AG - fn fill_failure_transitions(&mut self) { + fn fill_failure_transitions(&mut self) -> Result<(), BuildError> { let is_leftmost = self.builder.match_kind.is_leftmost(); let start_uid = self.nfa.special.start_unanchored_id; // Initialize the queue for breadth first search with all transitions @@ -892,15 +1281,18 @@ impl<'a> Compiler<'a> { // transitions, then this would never terminate. let mut queue = VecDeque::new(); let mut seen = self.queued_set(); - for i in 0..self.nfa.states[start_uid].trans.len() { - let (_, next) = self.nfa.states[start_uid].trans[i]; + let mut prev_link = None; + while let Some(link) = self.nfa.next_link(start_uid, prev_link) { + prev_link = Some(link); + let t = self.nfa.sparse[link]; + // Skip anything we've seen before and any self-transitions on the // start state. - if next == start_uid || seen.contains(next) { + if start_uid == t.next() || seen.contains(t.next) { continue; } - queue.push_back(next); - seen.insert(next); + queue.push_back(t.next); + seen.insert(t.next); // Under leftmost semantics, if a state immediately following // the start state is a match state, then we never want to // follow its failure transition since the failure transition @@ -909,14 +1301,17 @@ impl<'a> Compiler<'a> { // found. // // We apply the same logic to non-start states below as well. - if is_leftmost && self.nfa.states[next].is_match() { - self.nfa.states[next].fail = NFA::DEAD; + if is_leftmost && self.nfa.states[t.next].is_match() { + self.nfa.states[t.next].fail = NFA::DEAD; } } while let Some(id) = queue.pop_front() { - for i in 0..self.nfa.states[id].trans.len() { - let (b, next) = self.nfa.states[id].trans[i]; - if seen.contains(next) { + let mut prev_link = None; + while let Some(link) = self.nfa.next_link(id, prev_link) { + prev_link = Some(link); + let t = self.nfa.sparse[link]; + + if seen.contains(t.next) { // The only way to visit a duplicate state in a transition // list is when ASCII case insensitivity is enabled. In // this case, we want to skip it since it's redundant work. @@ -925,8 +1320,8 @@ impl<'a> Compiler<'a> { // See the 'acasei010' regression test. continue; } - queue.push_back(next); - seen.insert(next); + queue.push_back(t.next); + seen.insert(t.next); // As above for start states, under leftmost semantics, once // we see a match all subsequent states should have no failure @@ -949,17 +1344,17 @@ impl<'a> Compiler<'a> { // transition to the dead state on all match states, the dead // state will automatically propagate to all subsequent states // via the failure state computation below. - if is_leftmost && self.nfa.states[next].is_match() { - self.nfa.states[next].fail = NFA::DEAD; + if is_leftmost && self.nfa.states[t.next].is_match() { + self.nfa.states[t.next].fail = NFA::DEAD; continue; } let mut fail = self.nfa.states[id].fail; - while self.nfa.states[fail].next_state(b) == NFA::FAIL { + while self.nfa.follow_transition(fail, t.byte) == NFA::FAIL { fail = self.nfa.states[fail].fail; } - fail = self.nfa.states[fail].next_state(b); - self.nfa.states[next].fail = fail; - self.copy_matches(fail, next); + fail = self.nfa.follow_transition(fail, t.byte); + self.nfa.states[t.next].fail = fail; + self.nfa.copy_matches(fail, t.next)?; } // If the start state is a match state, then this automaton can // match the empty string. This implies all states are match states @@ -971,9 +1366,11 @@ impl<'a> Compiler<'a> { // states only report the first match, which is never empty since // it isn't a start state. if !is_leftmost { - self.copy_matches(self.nfa.special.start_unanchored_id, id); + self.nfa + .copy_matches(self.nfa.special.start_unanchored_id, id)?; } } + Ok(()) } /// Shuffle the states so that they appear in this sequence: @@ -1083,6 +1480,51 @@ impl<'a> Compiler<'a> { remapper.remap(&mut self.nfa); } + /// Attempts to convert the transition representation of a subset of states + /// in this NFA from sparse to dense. This can greatly improve search + /// performance since states with a higher number of transitions tend to + /// correlate with very active states. + /// + /// We generally only densify states that are close to the start state. + /// These tend to be the most active states and thus benefit from a dense + /// representation more than other states. + /// + /// This tends to best balance between memory usage and performance. In + /// particular, the *vast majority* of all states in a typical Aho-Corasick + /// automaton have only 1 transition and are usually farther from the start + /// state and thus don't get densified. + /// + /// Note that this doesn't remove the sparse representation of transitions + /// for states that are densified. It could be done, but actually removing + /// entries from `NFA::sparse` is likely more expensive than it's worth. + fn densify(&mut self) -> Result<(), BuildError> { + for i in 0..self.nfa.states.len() { + let sid = StateID::new(i).unwrap(); + // Don't bother densifying states that are only used as sentinels. + if sid == NFA::DEAD || sid == NFA::FAIL { + continue; + } + // Only densify states that are "close enough" to the start state. + if self.nfa.states[sid].depth.as_usize() + >= self.builder.dense_depth + { + continue; + } + let dense = self.nfa.alloc_dense_state()?; + let mut prev_link = None; + while let Some(link) = self.nfa.next_link(sid, prev_link) { + prev_link = Some(link); + let t = self.nfa.sparse[link]; + + let class = usize::from(self.nfa.byte_classes.get(t.byte)); + let index = dense.as_usize() + class; + self.nfa.dense[index] = t.next; + } + self.nfa.states[sid].dense = dense; + } + Ok(()) + } + /// Returns a set that tracked queued states. /// /// This is only necessary when ASCII case insensitivity is enabled, since @@ -1104,23 +1546,35 @@ impl<'a> Compiler<'a> { /// make the unanchored starting state dense, and thus in turn make /// transition lookups on it faster. (Which is worth doing because it's /// the most active state.) - fn init_unanchored_start_state(&mut self) { + fn init_unanchored_start_state(&mut self) -> Result<(), BuildError> { let start_uid = self.nfa.special.start_unanchored_id; - for byte in 0..=255 { - self.nfa.states[start_uid].set_next_state(byte, NFA::FAIL); - } + let start_aid = self.nfa.special.start_anchored_id; + self.nfa.init_full_state(start_uid, NFA::FAIL)?; + self.nfa.init_full_state(start_aid, NFA::FAIL)?; + Ok(()) } /// Setup the anchored start state by copying all of the transitions and /// matches from the unanchored starting state with one change: the failure /// transition is changed to the DEAD state, so that for any undefined /// transitions, the search will stop. - fn set_anchored_start_state(&mut self) { + fn set_anchored_start_state(&mut self) -> Result<(), BuildError> { let start_uid = self.nfa.special.start_unanchored_id; let start_aid = self.nfa.special.start_anchored_id; - self.nfa.states[start_aid].trans = - self.nfa.states[start_uid].trans.clone(); - self.copy_matches(start_uid, start_aid); + let (mut uprev_link, mut aprev_link) = (None, None); + loop { + let unext = self.nfa.next_link(start_uid, uprev_link); + let anext = self.nfa.next_link(start_aid, aprev_link); + let (ulink, alink) = match (unext, anext) { + (Some(ulink), Some(alink)) => (ulink, alink), + (None, None) => break, + _ => unreachable!(), + }; + uprev_link = Some(ulink); + aprev_link = Some(alink); + self.nfa.sparse[alink].next = self.nfa.sparse[ulink].next; + } + self.nfa.copy_matches(start_uid, start_aid)?; // This is the main difference between the unanchored and anchored // starting states. If a lookup on an anchored starting state fails, // then the search should stop. @@ -1128,6 +1582,7 @@ impl<'a> Compiler<'a> { // N.B. This assumes that the loop on the unanchored starting state // hasn't been created yet. self.nfa.states[start_aid].fail = NFA::DEAD; + Ok(()) } /// Set the failure transitions on the start state to loop back to the @@ -1141,10 +1596,11 @@ impl<'a> Compiler<'a> { /// state already exists or not. fn add_unanchored_start_state_loop(&mut self) { let start_uid = self.nfa.special.start_unanchored_id; - let start = &mut self.nfa.states[start_uid]; - for b in 0..=255 { - if start.next_state(b) == NFA::FAIL { - start.set_next_state(b, start_uid); + let mut prev_link = None; + while let Some(link) = self.nfa.next_link(start_uid, prev_link) { + prev_link = Some(link); + if self.nfa.sparse[link].next() == NFA::FAIL { + self.nfa.sparse[link].next = start_uid; } } } @@ -1164,10 +1620,18 @@ impl<'a> Compiler<'a> { fn close_start_state_loop_for_leftmost(&mut self) { let start_uid = self.nfa.special.start_unanchored_id; let start = &mut self.nfa.states[start_uid]; + let dense = start.dense; if self.builder.match_kind.is_leftmost() && start.is_match() { - for b in 0..=255 { - if start.next_state(b) == start_uid { - start.set_next_state(b, NFA::DEAD); + let mut prev_link = None; + while let Some(link) = self.nfa.next_link(start_uid, prev_link) { + prev_link = Some(link); + if self.nfa.sparse[link].next() == start_uid { + self.nfa.sparse[link].next = NFA::DEAD; + if dense != StateID::ZERO { + let b = self.nfa.sparse[link].byte; + let class = usize::from(self.nfa.byte_classes.get(b)); + self.nfa.dense[dense.as_usize() + class] = NFA::DEAD; + } } } } @@ -1176,51 +1640,9 @@ impl<'a> Compiler<'a> { /// Sets all transitions on the dead state to point back to the dead state. /// Normally, missing transitions map back to the failure state, but the /// point of the dead state is to act as a sink that can never be escaped. - fn add_dead_state_loop(&mut self) { - let dead = &mut self.nfa.states[NFA::DEAD]; - for b in 0..=255 { - dead.set_next_state(b, NFA::DEAD); - } - } - - /// Copy matches from the `src` state to the `dst` state. This is useful - /// when a match state can be reached via a failure transition. In which - /// case, you'll want to copy the matches (if any) from the state reached - /// by the failure transition to the original state you were at. - fn copy_matches(&mut self, src: StateID, dst: StateID) { - let (src, dst) = - get_two_mut(&mut self.nfa.states, src.as_usize(), dst.as_usize()); - dst.matches.extend_from_slice(&src.matches); - } - - /// Allocate and add a fresh state to the underlying NFA and return its - /// ID (guaranteed to be one more than the ID of the previously allocated - /// state). If the ID would overflow `StateID`, then this returns an error. - fn add_state(&mut self, depth: usize) -> Result { - // This is OK because we error when building the trie if we see a - // pattern whose length cannot fit into a 'SmallIndex', and the longest - // possible depth corresponds to the length of the longest pattern. - let depth = SmallIndex::new(depth) - .expect("patterns longer than SmallIndex::MAX are not allowed"); - let id = StateID::new(self.nfa.states.len()).map_err(|e| { - BuildError::state_id_overflow(StateID::MAX.as_u64(), e.attempted()) - })?; - self.nfa.states.push(State { - trans: vec![], - matches: vec![], - fail: self.nfa.special.start_unanchored_id, - depth, - }); - Ok(id) - } - - /// Computes the total amount of heap used by this NFA in bytes. - fn calculate_memory_usage(&mut self) { - use core::mem::size_of; - - for state in self.nfa.states.iter() { - self.nfa.memory_usage += size_of::() + state.memory_usage(); - } + fn add_dead_state_loop(&mut self) -> Result<(), BuildError> { + self.nfa.init_full_state(NFA::DEAD, NFA::DEAD)?; + Ok(()) } } @@ -1268,7 +1690,10 @@ impl QueuedSet { impl core::fmt::Debug for NFA { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - use crate::automaton::fmt_state_indicator; + use crate::{ + automaton::{fmt_state_indicator, sparse_transitions}, + util::debug::DebugByte, + }; writeln!(f, "noncontiguous::NFA(")?; for (sid, state) in self.states.iter().with_state_ids() { @@ -1285,11 +1710,37 @@ impl core::fmt::Debug for NFA { sid.as_usize(), state.fail.as_usize() )?; - state.fmt(f)?; + + let it = sparse_transitions( + self.iter_trans(sid).map(|t| (t.byte, t.next)), + ) + .enumerate(); + for (i, (start, end, sid)) in it { + if i > 0 { + write!(f, ", ")?; + } + if start == end { + write!( + f, + "{:?} => {:?}", + DebugByte(start), + sid.as_usize() + )?; + } else { + write!( + f, + "{:?}-{:?} => {:?}", + DebugByte(start), + DebugByte(end), + sid.as_usize() + )?; + } + } + write!(f, "\n")?; if self.is_match(sid) { write!(f, " matches: ")?; - for (i, pid) in state.matches.iter().enumerate() { + for (i, pid) in self.iter_matches(sid).enumerate() { if i > 0 { write!(f, ", ")?; } @@ -1309,18 +1760,3 @@ impl core::fmt::Debug for NFA { Ok(()) } } - -/// Safely return two mutable borrows to two different locations in the given -/// slice. -/// -/// This panics if i == j. -fn get_two_mut(xs: &mut [T], i: usize, j: usize) -> (&mut T, &mut T) { - assert!(i != j, "{} must not be equal to {}", i, j); - if i < j { - let (before, after) = xs.split_at_mut(j); - (&mut before[i], &mut after[0]) - } else { - let (before, after) = xs.split_at_mut(i); - (&mut after[0], &mut before[j]) - } -} diff --git a/vendor/aho-corasick/src/packed/api.rs b/vendor/aho-corasick/src/packed/api.rs index bc4c60d..44f0bc9 100644 --- a/vendor/aho-corasick/src/packed/api.rs +++ b/vendor/aho-corasick/src/packed/api.rs @@ -1,9 +1,7 @@ +use alloc::sync::Arc; + use crate::{ - packed::{ - pattern::Patterns, - rabinkarp::RabinKarp, - teddy::{self, Teddy}, - }, + packed::{pattern::Patterns, rabinkarp::RabinKarp, teddy}, util::search::{Match, Span}, }; @@ -77,7 +75,9 @@ impl Default for MatchKind { /// .collect(); /// assert_eq!(vec![PatternID::must(1)], matches); /// # Some(()) } -/// # if cfg!(all(feature = "std", target_arch = "x86_64")) { +/// # if cfg!(all(feature = "std", any( +/// # target_arch = "x86_64", target_arch = "aarch64", +/// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); @@ -87,8 +87,9 @@ impl Default for MatchKind { pub struct Config { kind: MatchKind, force: Option, - force_teddy_fat: Option, - force_avx: Option, + only_teddy_fat: Option, + only_teddy_256bit: Option, + heuristic_pattern_limits: bool, } /// An internal option for forcing the use of a particular packed algorithm. @@ -115,8 +116,9 @@ impl Config { Config { kind: MatchKind::LeftmostFirst, force: None, - force_teddy_fat: None, - force_avx: None, + only_teddy_fat: None, + only_teddy_256bit: None, + heuristic_pattern_limits: true, } } @@ -138,7 +140,7 @@ impl Config { /// should not use it as it is not part of the API stability guarantees of /// this crate. #[doc(hidden)] - pub fn force_teddy(&mut self, yes: bool) -> &mut Config { + pub fn only_teddy(&mut self, yes: bool) -> &mut Config { if yes { self.force = Some(ForceAlgorithm::Teddy); } else { @@ -153,8 +155,8 @@ impl Config { /// should not use it as it is not part of the API stability guarantees of /// this crate. #[doc(hidden)] - pub fn force_teddy_fat(&mut self, yes: Option) -> &mut Config { - self.force_teddy_fat = yes; + pub fn only_teddy_fat(&mut self, yes: Option) -> &mut Config { + self.only_teddy_fat = yes; self } @@ -165,8 +167,8 @@ impl Config { /// should not use it as it is not part of the API stability guarantees of /// this crate. #[doc(hidden)] - pub fn force_avx(&mut self, yes: Option) -> &mut Config { - self.force_avx = yes; + pub fn only_teddy_256bit(&mut self, yes: Option) -> &mut Config { + self.only_teddy_256bit = yes; self } @@ -176,7 +178,7 @@ impl Config { /// should not use it as it is not part of the API stability guarantees of /// this crate. #[doc(hidden)] - pub fn force_rabin_karp(&mut self, yes: bool) -> &mut Config { + pub fn only_rabin_karp(&mut self, yes: bool) -> &mut Config { if yes { self.force = Some(ForceAlgorithm::RabinKarp); } else { @@ -184,6 +186,17 @@ impl Config { } self } + + /// Request that heuristic limitations on the number of patterns be + /// employed. This useful to disable for benchmarking where one wants to + /// explore how Teddy performs on large number of patterns even if the + /// heuristics would otherwise refuse construction. + /// + /// This is enabled by default. + pub fn heuristic_pattern_limits(&mut self, yes: bool) -> &mut Config { + self.heuristic_pattern_limits = yes; + self + } } /// A builder for constructing a packed searcher from a collection of patterns. @@ -207,7 +220,9 @@ impl Config { /// .collect(); /// assert_eq!(vec![PatternID::ZERO], matches); /// # Some(()) } -/// # if cfg!(all(feature = "std", target_arch = "x86_64")) { +/// # if cfg!(all(feature = "std", any( +/// # target_arch = "x86_64", target_arch = "aarch64", +/// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); @@ -241,6 +256,7 @@ impl Builder { } let mut patterns = self.patterns.clone(); patterns.set_match_kind(self.config.kind); + let patterns = Arc::new(patterns); let rabinkarp = RabinKarp::new(&patterns); // Effectively, we only want to return a searcher if we can use Teddy, // since Teddy is our only fast packed searcher at the moment. @@ -250,7 +266,7 @@ impl Builder { let (search_kind, minimum_len) = match self.config.force { None | Some(ForceAlgorithm::Teddy) => { debug!("trying to build Teddy packed matcher"); - let teddy = match self.build_teddy(&patterns) { + let teddy = match self.build_teddy(Arc::clone(&patterns)) { None => return None, Some(teddy) => teddy, }; @@ -265,11 +281,12 @@ impl Builder { Some(Searcher { patterns, rabinkarp, search_kind, minimum_len }) } - fn build_teddy(&self, patterns: &Patterns) -> Option { + fn build_teddy(&self, patterns: Arc) -> Option { teddy::Builder::new() - .avx(self.config.force_avx) - .fat(self.config.force_teddy_fat) - .build(&patterns) + .only_256bit(self.config.only_teddy_256bit) + .only_fat(self.config.only_teddy_fat) + .heuristic_pattern_limits(self.config.heuristic_pattern_limits) + .build(patterns) } /// Add the given pattern to this set to match. @@ -327,6 +344,16 @@ impl Builder { } self } + + /// Returns the number of patterns added to this builder. + pub fn len(&self) -> usize { + self.patterns.len() + } + + /// Returns the length, in bytes, of the shortest pattern added. + pub fn minimum_len(&self) -> usize { + self.patterns.minimum_len() + } } impl Default for Builder { @@ -357,7 +384,9 @@ impl Default for Builder { /// .collect(); /// assert_eq!(vec![PatternID::ZERO], matches); /// # Some(()) } -/// # if cfg!(all(feature = "std", target_arch = "x86_64")) { +/// # if cfg!(all(feature = "std", any( +/// # target_arch = "x86_64", target_arch = "aarch64", +/// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); @@ -365,7 +394,7 @@ impl Default for Builder { /// ``` #[derive(Clone, Debug)] pub struct Searcher { - patterns: Patterns, + patterns: Arc, rabinkarp: RabinKarp, search_kind: SearchKind, minimum_len: usize, @@ -373,7 +402,7 @@ pub struct Searcher { #[derive(Clone, Debug)] enum SearchKind { - Teddy(Teddy), + Teddy(teddy::Searcher), RabinKarp, } @@ -400,7 +429,9 @@ impl Searcher { /// .collect(); /// assert_eq!(vec![PatternID::ZERO], matches); /// # Some(()) } - /// # if cfg!(all(feature = "std", target_arch = "x86_64")) { + /// # if cfg!(all(feature = "std", any( + /// # target_arch = "x86_64", target_arch = "aarch64", + /// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); @@ -448,7 +479,9 @@ impl Searcher { /// assert_eq!(0, mat.start()); /// assert_eq!(6, mat.end()); /// # Some(()) } - /// # if cfg!(all(feature = "std", target_arch = "x86_64")) { + /// # if cfg!(all(feature = "std", any( + /// # target_arch = "x86_64", target_arch = "aarch64", + /// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); @@ -484,7 +517,9 @@ impl Searcher { /// assert_eq!(3, mat.start()); /// assert_eq!(9, mat.end()); /// # Some(()) } - /// # if cfg!(all(feature = "std", target_arch = "x86_64")) { + /// # if cfg!(all(feature = "std", any( + /// # target_arch = "x86_64", target_arch = "aarch64", + /// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); @@ -502,17 +537,11 @@ impl Searcher { if haystack[span].len() < teddy.minimum_len() { return self.find_in_slow(haystack, span); } - teddy.find_at( - &self.patterns, - &haystack[..span.end], - span.start, - ) + teddy.find(&haystack[..span.end], span.start) + } + SearchKind::RabinKarp => { + self.rabinkarp.find_at(&haystack[..span.end], span.start) } - SearchKind::RabinKarp => self.rabinkarp.find_at( - &self.patterns, - &haystack[..span.end], - span.start, - ), } } @@ -539,12 +568,15 @@ impl Searcher { /// PatternID::must(1), /// ], matches); /// # Some(()) } - /// # if cfg!(all(feature = "std", target_arch = "x86_64")) { + /// # if cfg!(all(feature = "std", any( + /// # target_arch = "x86_64", target_arch = "aarch64", + /// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); /// # } /// ``` + #[inline] pub fn find_iter<'a, 'b, B: ?Sized + AsRef<[u8]>>( &'a self, haystack: &'b B, @@ -568,12 +600,15 @@ impl Searcher { /// // leftmost-first is the default. /// assert_eq!(&MatchKind::LeftmostFirst, searcher.match_kind()); /// # Some(()) } - /// # if cfg!(all(feature = "std", target_arch = "x86_64")) { + /// # if cfg!(all(feature = "std", any( + /// # target_arch = "x86_64", target_arch = "aarch64", + /// # ))) { /// # example().unwrap() /// # } else { /// # assert!(example().is_none()); /// # } /// ``` + #[inline] pub fn match_kind(&self) -> &MatchKind { self.patterns.match_kind() } @@ -588,12 +623,14 @@ impl Searcher { /// want to avoid ever using the slower variant, which one can do by /// never passing a haystack shorter than the minimum length returned by /// this method. + #[inline] pub fn minimum_len(&self) -> usize { self.minimum_len } /// Returns the approximate total amount of heap used by this searcher, in /// units of bytes. + #[inline] pub fn memory_usage(&self) -> usize { self.patterns.memory_usage() + self.rabinkarp.memory_usage() @@ -607,11 +644,7 @@ impl Searcher { /// built but the haystack is smaller than ~34 bytes, then Teddy might not /// be able to run. fn find_in_slow(&self, haystack: &[u8], span: Span) -> Option { - self.rabinkarp.find_at( - &self.patterns, - &haystack[..span.end], - span.start, - ) + self.rabinkarp.find_at(&haystack[..span.end], span.start) } } diff --git a/vendor/aho-corasick/src/packed/ext.rs b/vendor/aho-corasick/src/packed/ext.rs new file mode 100644 index 0000000..b689642 --- /dev/null +++ b/vendor/aho-corasick/src/packed/ext.rs @@ -0,0 +1,39 @@ +/// A trait for adding some helper routines to pointers. +pub(crate) trait Pointer { + /// Returns the distance, in units of `T`, between `self` and `origin`. + /// + /// # Safety + /// + /// Same as `ptr::offset_from` in addition to `self >= origin`. + unsafe fn distance(self, origin: Self) -> usize; + + /// Casts this pointer to `usize`. + /// + /// Callers should not convert the `usize` back to a pointer if at all + /// possible. (And if you believe it's necessary, open an issue to discuss + /// why. Otherwise, it has the potential to violate pointer provenance.) + /// The purpose of this function is just to be able to do arithmetic, i.e., + /// computing offsets or alignments. + fn as_usize(self) -> usize; +} + +impl Pointer for *const T { + unsafe fn distance(self, origin: *const T) -> usize { + // TODO: Replace with `ptr::sub_ptr` once stabilized. + usize::try_from(self.offset_from(origin)).unwrap_unchecked() + } + + fn as_usize(self) -> usize { + self as usize + } +} + +impl Pointer for *mut T { + unsafe fn distance(self, origin: *mut T) -> usize { + (self as *const T).distance(origin as *const T) + } + + fn as_usize(self) -> usize { + (self as *const T).as_usize() + } +} diff --git a/vendor/aho-corasick/src/packed/mod.rs b/vendor/aho-corasick/src/packed/mod.rs index 9235df6..3990bc9 100644 --- a/vendor/aho-corasick/src/packed/mod.rs +++ b/vendor/aho-corasick/src/packed/mod.rs @@ -40,7 +40,9 @@ let matches: Vec = searcher .collect(); assert_eq!(vec![PatternID::ZERO], matches); # Some(()) } -# if cfg!(all(feature = "std", target_arch = "x86_64")) { +# if cfg!(all(feature = "std", any( +# target_arch = "x86_64", target_arch = "aarch64", +# ))) { # example().unwrap() # } else { # assert!(example().is_none()); @@ -66,7 +68,9 @@ let matches: Vec = searcher .collect(); assert_eq!(vec![PatternID::must(1)], matches); # Some(()) } -# if cfg!(all(feature = "std", target_arch = "x86_64")) { +# if cfg!(all(feature = "std", any( +# target_arch = "x86_64", target_arch = "aarch64", +# ))) { # example().unwrap() # } else { # assert!(example().is_none()); @@ -95,8 +99,8 @@ implementation detail, here are some common reasons: so, but this limit may fluctuate based on available CPU features. * The available packed algorithms require CPU features that aren't available. For example, currently, this crate only provides packed algorithms for - `x86_64`. Therefore, constructing a packed searcher on any other target - (e.g., ARM) will always fail. + `x86_64` and `aarch64`. Therefore, constructing a packed searcher on any + other target will always fail. * Zero patterns were given, or one of the patterns given was empty. Packed searchers require at least one pattern and that all patterns are non-empty. * Something else about the nature of the patterns (typically based on @@ -107,10 +111,10 @@ implementation detail, here are some common reasons: pub use crate::packed::api::{Builder, Config, FindIter, MatchKind, Searcher}; mod api; +mod ext; mod pattern; mod rabinkarp; mod teddy; #[cfg(all(feature = "std", test))] mod tests; -#[cfg(all(feature = "std", target_arch = "x86_64"))] mod vector; diff --git a/vendor/aho-corasick/src/packed/pattern.rs b/vendor/aho-corasick/src/packed/pattern.rs index a0b371c..95aca4d 100644 --- a/vendor/aho-corasick/src/packed/pattern.rs +++ b/vendor/aho-corasick/src/packed/pattern.rs @@ -1,14 +1,11 @@ use core::{cmp, fmt, mem, u16, usize}; -use alloc::{string::String, vec, vec::Vec}; +use alloc::{boxed::Box, string::String, vec, vec::Vec}; -use crate::packed::api::MatchKind; - -/// The type used for representing a pattern identifier. -/// -/// We don't use `usize` here because our packed searchers don't scale to -/// huge numbers of patterns, so we keep things a bit smaller. -pub type PatternID = u16; +use crate::{ + packed::{api::MatchKind, ext::Pointer}, + PatternID, +}; /// A non-empty collection of non-empty patterns to search for. /// @@ -20,7 +17,7 @@ pub type PatternID = u16; /// Note that this collection is not a set. The same pattern can appear more /// than once. #[derive(Clone, Debug)] -pub struct Patterns { +pub(crate) struct Patterns { /// The match semantics supported by this collection of patterns. /// /// The match semantics determines the order of the iterator over patterns. @@ -38,14 +35,17 @@ pub struct Patterns { order: Vec, /// The length of the smallest pattern, in bytes. minimum_len: usize, - /// The largest pattern identifier. This should always be equivalent to - /// the number of patterns minus one in this collection. - max_pattern_id: PatternID, /// The total number of pattern bytes across the entire collection. This /// is used for reporting total heap usage in constant time. total_pattern_bytes: usize, } +// BREADCRUMBS: I think we want to experiment with a different bucket +// representation. Basically, each bucket is just a Range to a single +// contiguous allocation? Maybe length-prefixed patterns or something? The +// idea is to try to get rid of the pointer chasing in verification. I don't +// know that that is the issue, but I suspect it is. + impl Patterns { /// Create a new collection of patterns for the given match semantics. The /// ID of each pattern is the index of the pattern at which it occurs in @@ -54,13 +54,12 @@ impl Patterns { /// If any of the patterns in the slice given are empty, then this panics. /// Similarly, if the number of patterns given is zero, then this also /// panics. - pub fn new() -> Patterns { + pub(crate) fn new() -> Patterns { Patterns { kind: MatchKind::default(), by_id: vec![], order: vec![], minimum_len: usize::MAX, - max_pattern_id: 0, total_pattern_bytes: 0, } } @@ -68,12 +67,11 @@ impl Patterns { /// Add a pattern to this collection. /// /// This panics if the pattern given is empty. - pub fn add(&mut self, bytes: &[u8]) { + pub(crate) fn add(&mut self, bytes: &[u8]) { assert!(!bytes.is_empty()); assert!(self.by_id.len() <= u16::MAX as usize); - let id = self.by_id.len() as u16; - self.max_pattern_id = id; + let id = PatternID::new(self.by_id.len()).unwrap(); self.order.push(id); self.by_id.push(bytes.to_vec()); self.minimum_len = cmp::min(self.minimum_len, bytes.len()); @@ -83,7 +81,7 @@ impl Patterns { /// Set the match kind semantics for this collection of patterns. /// /// If the kind is not set, then the default is leftmost-first. - pub fn set_match_kind(&mut self, kind: MatchKind) { + pub(crate) fn set_match_kind(&mut self, kind: MatchKind) { self.kind = kind; match self.kind { MatchKind::LeftmostFirst => { @@ -92,10 +90,7 @@ impl Patterns { MatchKind::LeftmostLongest => { let (order, by_id) = (&mut self.order, &mut self.by_id); order.sort_by(|&id1, &id2| { - by_id[id1 as usize] - .len() - .cmp(&by_id[id2 as usize].len()) - .reverse() + by_id[id1].len().cmp(&by_id[id2].len()).reverse() }); } } @@ -104,18 +99,18 @@ impl Patterns { /// Return the number of patterns in this collection. /// /// This is guaranteed to be greater than zero. - pub fn len(&self) -> usize { + pub(crate) fn len(&self) -> usize { self.by_id.len() } /// Returns true if and only if this collection of patterns is empty. - pub fn is_empty(&self) -> bool { + pub(crate) fn is_empty(&self) -> bool { self.len() == 0 } /// Returns the approximate total amount of heap used by these patterns, in /// units of bytes. - pub fn memory_usage(&self) -> usize { + pub(crate) fn memory_usage(&self) -> usize { self.order.len() * mem::size_of::() + self.by_id.len() * mem::size_of::>() + self.total_pattern_bytes @@ -123,38 +118,29 @@ impl Patterns { /// Clears all heap memory associated with this collection of patterns and /// resets all state such that it is a valid empty collection. - pub fn reset(&mut self) { + pub(crate) fn reset(&mut self) { self.kind = MatchKind::default(); self.by_id.clear(); self.order.clear(); self.minimum_len = usize::MAX; - self.max_pattern_id = 0; - } - - /// Return the maximum pattern identifier in this collection. This can be - /// useful in searchers for ensuring that the collection of patterns they - /// are provided at search time and at build time have the same size. - pub fn max_pattern_id(&self) -> PatternID { - assert_eq!((self.max_pattern_id + 1) as usize, self.len()); - self.max_pattern_id } /// Returns the length, in bytes, of the smallest pattern. /// /// This is guaranteed to be at least one. - pub fn minimum_len(&self) -> usize { + pub(crate) fn minimum_len(&self) -> usize { self.minimum_len } /// Returns the match semantics used by these patterns. - pub fn match_kind(&self) -> &MatchKind { + pub(crate) fn match_kind(&self) -> &MatchKind { &self.kind } /// Return the pattern with the given identifier. If such a pattern does /// not exist, then this panics. - pub fn get(&self, id: PatternID) -> Pattern<'_> { - Pattern(&self.by_id[id as usize]) + pub(crate) fn get(&self, id: PatternID) -> Pattern<'_> { + Pattern(&self.by_id[id]) } /// Return the pattern with the given identifier without performing bounds @@ -164,9 +150,8 @@ impl Patterns { /// /// Callers must ensure that a pattern with the given identifier exists /// before using this method. - #[cfg(all(feature = "std", target_arch = "x86_64"))] - pub unsafe fn get_unchecked(&self, id: PatternID) -> Pattern<'_> { - Pattern(self.by_id.get_unchecked(id as usize)) + pub(crate) unsafe fn get_unchecked(&self, id: PatternID) -> Pattern<'_> { + Pattern(self.by_id.get_unchecked(id.as_usize())) } /// Return an iterator over all the patterns in this collection, in the @@ -187,7 +172,7 @@ impl Patterns { /// the order provided by this iterator, then the result is guaranteed /// to satisfy the correct match semantics. (Either leftmost-first or /// leftmost-longest.) - pub fn iter(&self) -> PatternIter<'_> { + pub(crate) fn iter(&self) -> PatternIter<'_> { PatternIter { patterns: self, i: 0 } } } @@ -200,7 +185,7 @@ impl Patterns { /// The lifetime `'p` corresponds to the lifetime of the collection of patterns /// this is iterating over. #[derive(Debug)] -pub struct PatternIter<'p> { +pub(crate) struct PatternIter<'p> { patterns: &'p Patterns, i: usize, } @@ -221,7 +206,7 @@ impl<'p> Iterator for PatternIter<'p> { /// A pattern that is used in packed searching. #[derive(Clone)] -pub struct Pattern<'a>(&'a [u8]); +pub(crate) struct Pattern<'a>(&'a [u8]); impl<'a> fmt::Debug for Pattern<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -233,97 +218,263 @@ impl<'a> fmt::Debug for Pattern<'a> { impl<'p> Pattern<'p> { /// Returns the length of this pattern, in bytes. - pub fn len(&self) -> usize { + pub(crate) fn len(&self) -> usize { self.0.len() } /// Returns the bytes of this pattern. - pub fn bytes(&self) -> &[u8] { + pub(crate) fn bytes(&self) -> &[u8] { &self.0 } /// Returns the first `len` low nybbles from this pattern. If this pattern /// is shorter than `len`, then this panics. - #[cfg(all(feature = "std", target_arch = "x86_64"))] - pub fn low_nybbles(&self, len: usize) -> Vec { - let mut nybs = vec![]; - for &b in self.bytes().iter().take(len) { - nybs.push(b & 0xF); + pub(crate) fn low_nybbles(&self, len: usize) -> Box<[u8]> { + let mut nybs = vec![0; len].into_boxed_slice(); + for (i, byte) in self.bytes().iter().take(len).enumerate() { + nybs[i] = byte & 0xF; } nybs } /// Returns true if this pattern is a prefix of the given bytes. #[inline(always)] - pub fn is_prefix(&self, bytes: &[u8]) -> bool { - self.len() <= bytes.len() && self.equals(&bytes[..self.len()]) + pub(crate) fn is_prefix(&self, bytes: &[u8]) -> bool { + is_prefix(bytes, self.bytes()) } - /// Returns true if and only if this pattern equals the given bytes. + /// Returns true if this pattern is a prefix of the haystack given by the + /// raw `start` and `end` pointers. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. #[inline(always)] - pub fn equals(&self, bytes: &[u8]) -> bool { - // Why not just use memcmp for this? Well, memcmp requires calling out - // to libc, and this routine is called in fairly hot code paths. Other - // than just calling out to libc, it also seems to result in worse - // codegen. By rolling our own memcpy in pure Rust, it seems to appear - // more friendly to the optimizer. - // - // This results in an improvement in just about every benchmark. Some - // smaller than others, but in some cases, up to 30% faster. - - let (x, y) = (self.bytes(), bytes); - if x.len() != y.len() { + pub(crate) unsafe fn is_prefix_raw( + &self, + start: *const u8, + end: *const u8, + ) -> bool { + let patlen = self.bytes().len(); + let haylen = end.distance(start); + if patlen > haylen { return false; } - // If we don't have enough bytes to do 4-byte at a time loads, then - // fall back to the naive slow version. - if x.len() < 4 { - for (&b1, &b2) in x.iter().zip(y) { - if b1 != b2 { - return false; - } + // SAFETY: We've checked that the haystack has length at least equal + // to this pattern. All other safety concerns are the responsibility + // of the caller. + is_equal_raw(start, self.bytes().as_ptr(), patlen) + } +} + +/// Returns true if and only if `needle` is a prefix of `haystack`. +/// +/// This uses a latency optimized variant of `memcmp` internally which *might* +/// make this faster for very short strings. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +#[inline(always)] +fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { + if needle.len() > haystack.len() { + return false; + } + // SAFETY: Our pointers are derived directly from borrowed slices which + // uphold all of our safety guarantees except for length. We account for + // length with the check above. + unsafe { is_equal_raw(haystack.as_ptr(), needle.as_ptr(), needle.len()) } +} + +/// Compare corresponding bytes in `x` and `y` for equality. +/// +/// That is, this returns true if and only if `x.len() == y.len()` and +/// `x[i] == y[i]` for all `0 <= i < x.len()`. +/// +/// Note that this isn't used. We only use it in tests as a convenient way +/// of testing `is_equal_raw`. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +/// +/// # Motivation +/// +/// Why not use slice equality instead? Well, slice equality usually results in +/// a call out to the current platform's `libc` which might not be inlineable +/// or have other overhead. This routine isn't guaranteed to be a win, but it +/// might be in some cases. +#[cfg(test)] +#[inline(always)] +fn is_equal(x: &[u8], y: &[u8]) -> bool { + if x.len() != y.len() { + return false; + } + // SAFETY: Our pointers are derived directly from borrowed slices which + // uphold all of our safety guarantees except for length. We account for + // length with the check above. + unsafe { is_equal_raw(x.as_ptr(), y.as_ptr(), x.len()) } +} + +/// Compare `n` bytes at the given pointers for equality. +/// +/// This returns true if and only if `*x.add(i) == *y.add(i)` for all +/// `0 <= i < n`. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +/// +/// # Motivation +/// +/// Why not use slice equality instead? Well, slice equality usually results in +/// a call out to the current platform's `libc` which might not be inlineable +/// or have other overhead. This routine isn't guaranteed to be a win, but it +/// might be in some cases. +/// +/// # Safety +/// +/// * Both `x` and `y` must be valid for reads of up to `n` bytes. +/// * Both `x` and `y` must point to an initialized value. +/// * Both `x` and `y` must each point to an allocated object and +/// must either be in bounds or at most one byte past the end of the +/// allocated object. `x` and `y` do not need to point to the same allocated +/// object, but they may. +/// * Both `x` and `y` must be _derived from_ a pointer to their respective +/// allocated objects. +/// * The distance between `x` and `x+n` must not overflow `isize`. Similarly +/// for `y` and `y+n`. +/// * The distance being in bounds must not rely on "wrapping around" the +/// address space. +#[inline(always)] +unsafe fn is_equal_raw(mut x: *const u8, mut y: *const u8, n: usize) -> bool { + // If we don't have enough bytes to do 4-byte at a time loads, then + // handle each possible length specially. Note that I used to have a + // byte-at-a-time loop here and that turned out to be quite a bit slower + // for the memmem/pathological/defeat-simple-vector-alphabet benchmark. + if n < 4 { + return match n { + 0 => true, + 1 => x.read() == y.read(), + 2 => { + x.cast::().read_unaligned() + == y.cast::().read_unaligned() } - return true; + // I also tried copy_nonoverlapping here and it looks like the + // codegen is the same. + 3 => x.cast::<[u8; 3]>().read() == y.cast::<[u8; 3]>().read(), + _ => unreachable!(), + }; + } + // When we have 4 or more bytes to compare, then proceed in chunks of 4 at + // a time using unaligned loads. + // + // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is + // that this particular version of memcmp is likely to be called with tiny + // needles. That means that if we do 8 byte loads, then a higher proportion + // of memcmp calls will use the slower variant above. With that said, this + // is a hypothesis and is only loosely supported by benchmarks. There's + // likely some improvement that could be made here. The main thing here + // though is to optimize for latency, not throughput. + + // SAFETY: The caller is responsible for ensuring the pointers we get are + // valid and readable for at least `n` bytes. We also do unaligned loads, + // so there's no need to ensure we're aligned. (This is justified by this + // routine being specifically for short strings.) + let xend = x.add(n.wrapping_sub(4)); + let yend = y.add(n.wrapping_sub(4)); + while x < xend { + let vx = x.cast::().read_unaligned(); + let vy = y.cast::().read_unaligned(); + if vx != vy { + return false; } - // When we have 4 or more bytes to compare, then proceed in chunks of 4 - // at a time using unaligned loads. - // - // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason - // is that this particular version of memcmp is likely to be called - // with tiny needles. That means that if we do 8 byte loads, then a - // higher proportion of memcmp calls will use the slower variant above. - // With that said, this is a hypothesis and is only loosely supported - // by benchmarks. There's likely some improvement that could be made - // here. The main thing here though is to optimize for latency, not - // throughput. - - // SAFETY: Via the conditional above, we know that both `px` and `py` - // have the same length, so `px < pxend` implies that `py < pyend`. - // Thus, derefencing both `px` and `py` in the loop below is safe. - // - // Moreover, we set `pxend` and `pyend` to be 4 bytes before the actual - // end of of `px` and `py`. Thus, the final dereference outside of the - // loop is guaranteed to be valid. (The final comparison will overlap - // with the last comparison done in the loop for lengths that aren't - // multiples of four.) - // - // Finally, we needn't worry about alignment here, since we do - // unaligned loads. - unsafe { - let (mut px, mut py) = (x.as_ptr(), y.as_ptr()); - let (pxend, pyend) = (px.add(x.len() - 4), py.add(y.len() - 4)); - while px < pxend { - let vx = (px as *const u32).read_unaligned(); - let vy = (py as *const u32).read_unaligned(); - if vx != vy { - return false; - } - px = px.add(4); - py = py.add(4); - } - let vx = (pxend as *const u32).read_unaligned(); - let vy = (pyend as *const u32).read_unaligned(); - vx == vy + x = x.add(4); + y = y.add(4); + } + let vx = xend.cast::().read_unaligned(); + let vy = yend.cast::().read_unaligned(); + vx == vy +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn equals_different_lengths() { + assert!(!is_equal(b"", b"a")); + assert!(!is_equal(b"a", b"")); + assert!(!is_equal(b"ab", b"a")); + assert!(!is_equal(b"a", b"ab")); + } + + #[test] + fn equals_mismatch() { + let one_mismatch = [ + (&b"a"[..], &b"x"[..]), + (&b"ab"[..], &b"ax"[..]), + (&b"abc"[..], &b"abx"[..]), + (&b"abcd"[..], &b"abcx"[..]), + (&b"abcde"[..], &b"abcdx"[..]), + (&b"abcdef"[..], &b"abcdex"[..]), + (&b"abcdefg"[..], &b"abcdefx"[..]), + (&b"abcdefgh"[..], &b"abcdefgx"[..]), + (&b"abcdefghi"[..], &b"abcdefghx"[..]), + (&b"abcdefghij"[..], &b"abcdefghix"[..]), + (&b"abcdefghijk"[..], &b"abcdefghijx"[..]), + (&b"abcdefghijkl"[..], &b"abcdefghijkx"[..]), + (&b"abcdefghijklm"[..], &b"abcdefghijklx"[..]), + (&b"abcdefghijklmn"[..], &b"abcdefghijklmx"[..]), + ]; + for (x, y) in one_mismatch { + assert_eq!(x.len(), y.len(), "lengths should match"); + assert!(!is_equal(x, y)); + assert!(!is_equal(y, x)); } } + + #[test] + fn equals_yes() { + assert!(is_equal(b"", b"")); + assert!(is_equal(b"a", b"a")); + assert!(is_equal(b"ab", b"ab")); + assert!(is_equal(b"abc", b"abc")); + assert!(is_equal(b"abcd", b"abcd")); + assert!(is_equal(b"abcde", b"abcde")); + assert!(is_equal(b"abcdef", b"abcdef")); + assert!(is_equal(b"abcdefg", b"abcdefg")); + assert!(is_equal(b"abcdefgh", b"abcdefgh")); + assert!(is_equal(b"abcdefghi", b"abcdefghi")); + } + + #[test] + fn prefix() { + assert!(is_prefix(b"", b"")); + assert!(is_prefix(b"a", b"")); + assert!(is_prefix(b"ab", b"")); + assert!(is_prefix(b"foo", b"foo")); + assert!(is_prefix(b"foobar", b"foo")); + + assert!(!is_prefix(b"foo", b"fob")); + assert!(!is_prefix(b"foobar", b"fob")); + } } diff --git a/vendor/aho-corasick/src/packed/rabinkarp.rs b/vendor/aho-corasick/src/packed/rabinkarp.rs index a30b63c..fdd8a6f 100644 --- a/vendor/aho-corasick/src/packed/rabinkarp.rs +++ b/vendor/aho-corasick/src/packed/rabinkarp.rs @@ -1,9 +1,6 @@ -use alloc::{vec, vec::Vec}; +use alloc::{sync::Arc, vec, vec::Vec}; -use crate::{ - packed::pattern::{PatternID, Patterns}, - util::search::Match, -}; +use crate::{packed::pattern::Patterns, util::search::Match, PatternID}; /// The type of the rolling hash used in the Rabin-Karp algorithm. type Hash = usize; @@ -36,7 +33,9 @@ const NUM_BUCKETS: usize = 64; /// But ESMAJ provides something a bit more concrete: /// https://www-igm.univ-mlv.fr/~lecroq/string/node5.html #[derive(Clone, Debug)] -pub struct RabinKarp { +pub(crate) struct RabinKarp { + /// The patterns we're searching for. + patterns: Arc, /// The order of patterns in each bucket is significant. Namely, they are /// arranged such that the first one to match is the correct match. This /// may not necessarily correspond to the order provided by the caller. @@ -51,16 +50,6 @@ pub struct RabinKarp { /// The factor to subtract out of a hash before updating it with a new /// byte. hash_2pow: usize, - /// The maximum identifier of a pattern. This is used as a sanity check - /// to ensure that the patterns provided by the caller are the same as - /// the patterns that were used to compile the matcher. This sanity check - /// possibly permits safely eliminating bounds checks regardless of what - /// patterns are provided by the caller. - /// - /// (Currently, we don't use this to elide bounds checks since it doesn't - /// result in a measurable performance improvement, but we do use it for - /// better failure modes.) - max_pattern_id: PatternID, } impl RabinKarp { @@ -68,7 +57,7 @@ impl RabinKarp { /// /// This panics if any of the patterns in the collection are empty, or if /// the collection is itself empty. - pub fn new(patterns: &Patterns) -> RabinKarp { + pub(crate) fn new(patterns: &Arc) -> RabinKarp { assert!(patterns.len() >= 1); let hash_len = patterns.minimum_len(); assert!(hash_len >= 1); @@ -79,10 +68,10 @@ impl RabinKarp { } let mut rk = RabinKarp { + patterns: Arc::clone(patterns), buckets: vec![vec![]; NUM_BUCKETS], hash_len, hash_2pow, - max_pattern_id: patterns.max_pattern_id(), }; for (id, pat) in patterns.iter() { let hash = rk.hash(&pat.bytes()[..rk.hash_len]); @@ -94,18 +83,12 @@ impl RabinKarp { /// Return the first matching pattern in the given haystack, begining the /// search at `at`. - pub fn find_at( + pub(crate) fn find_at( &self, - patterns: &Patterns, haystack: &[u8], mut at: usize, ) -> Option { assert_eq!(NUM_BUCKETS, self.buckets.len()); - assert_eq!( - self.max_pattern_id, - patterns.max_pattern_id(), - "Rabin-Karp must be called with same patterns it was built with", - ); if at + self.hash_len > haystack.len() { return None; @@ -115,7 +98,7 @@ impl RabinKarp { let bucket = &self.buckets[hash % NUM_BUCKETS]; for &(phash, pid) in bucket { if phash == hash { - if let Some(c) = self.verify(patterns, pid, haystack, at) { + if let Some(c) = self.verify(pid, haystack, at) { return Some(c); } } @@ -134,10 +117,9 @@ impl RabinKarp { /// Returns the approximate total amount of heap used by this searcher, in /// units of bytes. - pub fn memory_usage(&self) -> usize { - let num_patterns = self.max_pattern_id as usize + 1; + pub(crate) fn memory_usage(&self) -> usize { self.buckets.len() * core::mem::size_of::>() - + num_patterns * core::mem::size_of::<(Hash, PatternID)>() + + self.patterns.len() * core::mem::size_of::<(Hash, PatternID)>() } /// Verify whether the pattern with the given id matches at @@ -152,14 +134,13 @@ impl RabinKarp { #[cold] fn verify( &self, - patterns: &Patterns, id: PatternID, haystack: &[u8], at: usize, ) -> Option { - let pat = patterns.get(id); + let pat = self.patterns.get(id); if pat.is_prefix(&haystack[at..]) { - Some(Match::must(id as usize, at..at + pat.len())) + Some(Match::new(id, at..at + pat.len())) } else { None } diff --git a/vendor/aho-corasick/src/packed/teddy/README.md b/vendor/aho-corasick/src/packed/teddy/README.md index 51b999b..f0928cb 100644 --- a/vendor/aho-corasick/src/packed/teddy/README.md +++ b/vendor/aho-corasick/src/packed/teddy/README.md @@ -225,14 +225,14 @@ fingerprints in a haystack's 16 byte block, where `i` is the `ith` byte in that block. Once we have that, we can look for the position of the least significant bit -in `C`. (Least significant because we only target `x86_64` here, which is -always little endian. Thus, the least significant bytes correspond to bytes -in our haystack at a lower address.) That position, modulo `8`, gives us -the pattern that the fingerprint matches. That position, integer divided by -`8`, also gives us the byte offset that the fingerprint occurs in inside the -16 byte haystack block. Using those two pieces of information, we can run a -verification procedure that tries to match all substrings containing that -fingerprint at that position in the haystack. +in `C`. (Least significant because we only target little endian here. Thus, +the least significant bytes correspond to bytes in our haystack at a lower +address.) That position, modulo `8`, gives us the pattern that the fingerprint +matches. That position, integer divided by `8`, also gives us the byte offset +that the fingerprint occurs in inside the 16 byte haystack block. Using those +two pieces of information, we can run a verification procedure that tries +to match all substrings containing that fingerprint at that position in the +haystack. # Implementation notes diff --git a/vendor/aho-corasick/src/packed/teddy/builder.rs b/vendor/aho-corasick/src/packed/teddy/builder.rs new file mode 100644 index 0000000..be91777 --- /dev/null +++ b/vendor/aho-corasick/src/packed/teddy/builder.rs @@ -0,0 +1,780 @@ +use core::{ + fmt::Debug, + panic::{RefUnwindSafe, UnwindSafe}, +}; + +use alloc::sync::Arc; + +use crate::packed::{ext::Pointer, pattern::Patterns, teddy::generic::Match}; + +/// A builder for constructing a Teddy matcher. +/// +/// The builder primarily permits fine grained configuration of the Teddy +/// matcher. Most options are made only available for testing/benchmarking +/// purposes. In reality, options are automatically determined by the nature +/// and number of patterns given to the builder. +#[derive(Clone, Debug)] +pub(crate) struct Builder { + /// When none, this is automatically determined. Otherwise, `false` means + /// slim Teddy is used (8 buckets) and `true` means fat Teddy is used + /// (16 buckets). Fat Teddy requires AVX2, so if that CPU feature isn't + /// available and Fat Teddy was requested, no matcher will be built. + only_fat: Option, + /// When none, this is automatically determined. Otherwise, `false` means + /// that 128-bit vectors will be used (up to SSSE3 instructions) where as + /// `true` means that 256-bit vectors will be used. As with `fat`, if + /// 256-bit vectors are requested and they aren't available, then a + /// searcher will not be built. + only_256bit: Option, + /// When true (the default), the number of patterns will be used as a + /// heuristic for refusing construction of a Teddy searcher. The point here + /// is that too many patterns can overwhelm Teddy. But this can be disabled + /// in cases where the caller knows better. + heuristic_pattern_limits: bool, +} + +impl Default for Builder { + fn default() -> Builder { + Builder::new() + } +} + +impl Builder { + /// Create a new builder for configuring a Teddy matcher. + pub(crate) fn new() -> Builder { + Builder { + only_fat: None, + only_256bit: None, + heuristic_pattern_limits: true, + } + } + + /// Build a matcher for the set of patterns given. If a matcher could not + /// be built, then `None` is returned. + /// + /// Generally, a matcher isn't built if the necessary CPU features aren't + /// available, an unsupported target or if the searcher is believed to be + /// slower than standard techniques (i.e., if there are too many literals). + pub(crate) fn build(&self, patterns: Arc) -> Option { + self.build_imp(patterns) + } + + /// Require the use of Fat (true) or Slim (false) Teddy. Fat Teddy uses + /// 16 buckets where as Slim Teddy uses 8 buckets. More buckets are useful + /// for a larger set of literals. + /// + /// `None` is the default, which results in an automatic selection based + /// on the number of literals and available CPU features. + pub(crate) fn only_fat(&mut self, yes: Option) -> &mut Builder { + self.only_fat = yes; + self + } + + /// Request the use of 256-bit vectors (true) or 128-bit vectors (false). + /// Generally, a larger vector size is better since it either permits + /// matching more patterns or matching more bytes in the haystack at once. + /// + /// `None` is the default, which results in an automatic selection based on + /// the number of literals and available CPU features. + pub(crate) fn only_256bit(&mut self, yes: Option) -> &mut Builder { + self.only_256bit = yes; + self + } + + /// Request that heuristic limitations on the number of patterns be + /// employed. This useful to disable for benchmarking where one wants to + /// explore how Teddy performs on large number of patterns even if the + /// heuristics would otherwise refuse construction. + /// + /// This is enabled by default. + pub(crate) fn heuristic_pattern_limits( + &mut self, + yes: bool, + ) -> &mut Builder { + self.heuristic_pattern_limits = yes; + self + } + + fn build_imp(&self, patterns: Arc) -> Option { + let patlimit = self.heuristic_pattern_limits; + // There's no particular reason why we limit ourselves to little endian + // here, but it seems likely that some parts of Teddy as they are + // currently written (e.g., the uses of `trailing_zeros`) are likely + // wrong on non-little-endian targets. Such things are likely easy to + // fix, but at the time of writing (2023/09/18), I actually do not know + // how to test this code on a big-endian target. So for now, we're + // conservative and just bail out. + if !cfg!(target_endian = "little") { + debug!("skipping Teddy because target isn't little endian"); + return None; + } + // Too many patterns will overwhelm Teddy and likely lead to slow + // downs, typically in the verification step. + if patlimit && patterns.len() > 64 { + debug!("skipping Teddy because of too many patterns"); + return None; + } + + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + { + use self::x86_64::{FatAVX2, SlimAVX2, SlimSSSE3}; + + let mask_len = core::cmp::min(4, patterns.minimum_len()); + let beefy = patterns.len() > 32; + let has_avx2 = self::x86_64::is_available_avx2(); + let has_ssse3 = has_avx2 || self::x86_64::is_available_ssse3(); + let use_avx2 = if self.only_256bit == Some(true) { + if !has_avx2 { + debug!( + "skipping Teddy because avx2 was demanded but unavailable" + ); + return None; + } + true + } else if self.only_256bit == Some(false) { + if !has_ssse3 { + debug!( + "skipping Teddy because ssse3 was demanded but unavailable" + ); + return None; + } + false + } else if !has_ssse3 && !has_avx2 { + debug!( + "skipping Teddy because ssse3 and avx2 are unavailable" + ); + return None; + } else { + has_avx2 + }; + let fat = match self.only_fat { + None => use_avx2 && beefy, + Some(false) => false, + Some(true) if !use_avx2 => { + debug!( + "skipping Teddy because fat was demanded, but fat \ + Teddy requires avx2 which is unavailable" + ); + return None; + } + Some(true) => true, + }; + // Just like for aarch64, it's possible that too many patterns will + // overhwelm Teddy. Unlike aarch64 though, we have Fat teddy which + // helps things scale a bit more by spreading patterns over more + // buckets. + // + // These thresholds were determined by looking at the measurements + // for the rust/aho-corasick/packed/leftmost-first and + // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/` + // benchmarks. + if patlimit && mask_len == 1 && patterns.len() > 16 { + debug!( + "skipping Teddy (mask len: 1) because there are \ + too many patterns", + ); + return None; + } + match (mask_len, use_avx2, fat) { + (1, false, _) => { + debug!("Teddy choice: 128-bit slim, 1 byte"); + SlimSSSE3::<1>::new(&patterns) + } + (1, true, false) => { + debug!("Teddy choice: 256-bit slim, 1 byte"); + SlimAVX2::<1>::new(&patterns) + } + (1, true, true) => { + debug!("Teddy choice: 256-bit fat, 1 byte"); + FatAVX2::<1>::new(&patterns) + } + (2, false, _) => { + debug!("Teddy choice: 128-bit slim, 2 bytes"); + SlimSSSE3::<2>::new(&patterns) + } + (2, true, false) => { + debug!("Teddy choice: 256-bit slim, 2 bytes"); + SlimAVX2::<2>::new(&patterns) + } + (2, true, true) => { + debug!("Teddy choice: 256-bit fat, 2 bytes"); + FatAVX2::<2>::new(&patterns) + } + (3, false, _) => { + debug!("Teddy choice: 128-bit slim, 3 bytes"); + SlimSSSE3::<3>::new(&patterns) + } + (3, true, false) => { + debug!("Teddy choice: 256-bit slim, 3 bytes"); + SlimAVX2::<3>::new(&patterns) + } + (3, true, true) => { + debug!("Teddy choice: 256-bit fat, 3 bytes"); + FatAVX2::<3>::new(&patterns) + } + (4, false, _) => { + debug!("Teddy choice: 128-bit slim, 4 bytes"); + SlimSSSE3::<4>::new(&patterns) + } + (4, true, false) => { + debug!("Teddy choice: 256-bit slim, 4 bytes"); + SlimAVX2::<4>::new(&patterns) + } + (4, true, true) => { + debug!("Teddy choice: 256-bit fat, 4 bytes"); + FatAVX2::<4>::new(&patterns) + } + _ => { + debug!("no supported Teddy configuration found"); + None + } + } + } + #[cfg(target_arch = "aarch64")] + { + use self::aarch64::SlimNeon; + + let mask_len = core::cmp::min(4, patterns.minimum_len()); + if self.only_256bit == Some(true) { + debug!( + "skipping Teddy because 256-bits were demanded \ + but unavailable" + ); + return None; + } + if self.only_fat == Some(true) { + debug!( + "skipping Teddy because fat was demanded but unavailable" + ); + } + // Since we don't have Fat teddy in aarch64 (I think we'd want at + // least 256-bit vectors for that), we need to be careful not to + // allow too many patterns as it might overwhelm Teddy. Generally + // speaking, as the mask length goes up, the more patterns we can + // handle because the mask length results in fewer candidates + // generated. + // + // These thresholds were determined by looking at the measurements + // for the rust/aho-corasick/packed/leftmost-first and + // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/` + // benchmarks. + match mask_len { + 1 => { + if patlimit && patterns.len() > 16 { + debug!( + "skipping Teddy (mask len: 1) because there are \ + too many patterns", + ); + } + debug!("Teddy choice: 128-bit slim, 1 byte"); + SlimNeon::<1>::new(&patterns) + } + 2 => { + if patlimit && patterns.len() > 32 { + debug!( + "skipping Teddy (mask len: 2) because there are \ + too many patterns", + ); + } + debug!("Teddy choice: 128-bit slim, 2 bytes"); + SlimNeon::<2>::new(&patterns) + } + 3 => { + if patlimit && patterns.len() > 48 { + debug!( + "skipping Teddy (mask len: 3) because there are \ + too many patterns", + ); + } + debug!("Teddy choice: 128-bit slim, 3 bytes"); + SlimNeon::<3>::new(&patterns) + } + 4 => { + debug!("Teddy choice: 128-bit slim, 4 bytes"); + SlimNeon::<4>::new(&patterns) + } + _ => { + debug!("no supported Teddy configuration found"); + None + } + } + } + #[cfg(not(any( + all(target_arch = "x86_64", target_feature = "sse2"), + target_arch = "aarch64" + )))] + { + None + } + } +} + +/// A searcher that dispatches to one of several possible Teddy variants. +#[derive(Clone, Debug)] +pub(crate) struct Searcher { + /// The Teddy variant we use. We use dynamic dispatch under the theory that + /// it results in better codegen then a enum, although this is a specious + /// claim. + /// + /// This `Searcher` is essentially a wrapper for a `SearcherT` trait + /// object. We just make `memory_usage` and `minimum_len` available without + /// going through dynamic dispatch. + imp: Arc, + /// Total heap memory used by the Teddy variant. + memory_usage: usize, + /// The minimum haystack length this searcher can handle. It is intended + /// for callers to use some other search routine (such as Rabin-Karp) in + /// cases where the haystack (or remainer of the haystack) is too short. + minimum_len: usize, +} + +impl Searcher { + /// Look for the leftmost occurrence of any pattern in this search in the + /// given haystack starting at the given position. + /// + /// # Panics + /// + /// This panics when `haystack[at..].len()` is less than the minimum length + /// for this haystack. + #[inline(always)] + pub(crate) fn find( + &self, + haystack: &[u8], + at: usize, + ) -> Option { + // SAFETY: The Teddy implementations all require a minimum haystack + // length, and this is required for safety. Therefore, we assert it + // here in order to make this method sound. + assert!(haystack[at..].len() >= self.minimum_len); + let hayptr = haystack.as_ptr(); + // SAFETY: Construction of the searcher guarantees that we are able + // to run it in the current environment (i.e., we won't get an AVX2 + // searcher on a x86-64 CPU without AVX2 support). Also, the pointers + // are valid as they are derived directly from a borrowed slice. + let teddym = unsafe { + self.imp.find(hayptr.add(at), hayptr.add(haystack.len()))? + }; + let start = teddym.start().as_usize().wrapping_sub(hayptr.as_usize()); + let end = teddym.end().as_usize().wrapping_sub(hayptr.as_usize()); + let span = crate::Span { start, end }; + // OK because we won't permit the construction of a searcher that + // could report a pattern ID bigger than what can fit in the crate-wide + // PatternID type. + let pid = crate::PatternID::new_unchecked(teddym.pattern().as_usize()); + let m = crate::Match::new(pid, span); + Some(m) + } + + /// Returns the approximate total amount of heap used by this type, in + /// units of bytes. + #[inline(always)] + pub(crate) fn memory_usage(&self) -> usize { + self.memory_usage + } + + /// Returns the minimum length, in bytes, that a haystack must be in order + /// to use it with this searcher. + #[inline(always)] + pub(crate) fn minimum_len(&self) -> usize { + self.minimum_len + } +} + +/// A trait that provides dynamic dispatch over the different possible Teddy +/// variants on the same algorithm. +/// +/// On `x86_64` for example, it isn't known until runtime which of 12 possible +/// variants will be used. One might use one of the four slim 128-bit vector +/// variants, or one of the four 256-bit vector variants or even one of the +/// four fat 256-bit vector variants. +/// +/// Since this choice is generally made when the Teddy searcher is constructed +/// and this choice is based on the patterns given and what the current CPU +/// supports, it follows that there must be some kind of indirection at search +/// time that "selects" the variant chosen at build time. +/// +/// There are a few different ways to go about this. One approach is to use an +/// enum. It works fine, but in my experiments, this generally results in worse +/// codegen. Another approach, which is what we use here, is dynamic dispatch +/// via a trait object. We basically implement this trait for each possible +/// variant, select the variant we want at build time and convert it to a +/// trait object for use at search time. +/// +/// Another approach is to use function pointers and stick each of the possible +/// variants into a union. This is essentially isomorphic to the dynamic +/// dispatch approach, but doesn't require any allocations. Since this crate +/// requires `alloc`, there's no real reason (AFAIK) to go down this path. (The +/// `memchr` crate does this.) +trait SearcherT: + Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static +{ + /// Execute a search on the given haystack (identified by `start` and `end` + /// raw pointers). + /// + /// # Safety + /// + /// Essentially, the `start` and `end` pointers must be valid and point + /// to a haystack one can read. As long as you derive them from, for + /// example, a `&[u8]`, they should automatically satisfy all of the safety + /// obligations: + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// * It must be the case that `start <= end`. + /// * `end - start` must be greater than the minimum length for this + /// searcher. + /// + /// Also, it is expected that implementations of this trait will tag this + /// method with a `target_feature` attribute. Callers must ensure that + /// they are executing this method in an environment where that attribute + /// is valid. + unsafe fn find(&self, start: *const u8, end: *const u8) -> Option; +} + +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +mod x86_64 { + use core::arch::x86_64::{__m128i, __m256i}; + + use alloc::sync::Arc; + + use crate::packed::{ + ext::Pointer, + pattern::Patterns, + teddy::generic::{self, Match}, + }; + + use super::{Searcher, SearcherT}; + + #[derive(Clone, Debug)] + pub(super) struct SlimSSSE3 { + slim128: generic::Slim<__m128i, BYTES>, + } + + // Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes. + macro_rules! slim_ssse3 { + ($len:expr) => { + impl SlimSSSE3<$len> { + /// Creates a new searcher using "slim" Teddy with 128-bit + /// vectors. If SSSE3 is not available in the current + /// environment, then this returns `None`. + pub(super) fn new( + patterns: &Arc, + ) -> Option { + if !is_available_ssse3() { + return None; + } + Some(unsafe { SlimSSSE3::<$len>::new_unchecked(patterns) }) + } + + /// Creates a new searcher using "slim" Teddy with 256-bit + /// vectors without checking whether SSSE3 is available or not. + /// + /// # Safety + /// + /// Callers must ensure that SSSE3 is available in the current + /// environment. + #[target_feature(enable = "ssse3")] + unsafe fn new_unchecked(patterns: &Arc) -> Searcher { + let slim128 = generic::Slim::<__m128i, $len>::new( + Arc::clone(patterns), + ); + let memory_usage = slim128.memory_usage(); + let minimum_len = slim128.minimum_len(); + let imp = Arc::new(SlimSSSE3 { slim128 }); + Searcher { imp, memory_usage, minimum_len } + } + } + + impl SearcherT for SlimSSSE3<$len> { + #[target_feature(enable = "ssse3")] + #[inline] + unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + // SAFETY: All obligations except for `target_feature` are + // passed to the caller. Our use of `target_feature` is + // safe because construction of this type requires that the + // requisite target features are available. + self.slim128.find(start, end) + } + } + }; + } + + slim_ssse3!(1); + slim_ssse3!(2); + slim_ssse3!(3); + slim_ssse3!(4); + + #[derive(Clone, Debug)] + pub(super) struct SlimAVX2 { + slim128: generic::Slim<__m128i, BYTES>, + slim256: generic::Slim<__m256i, BYTES>, + } + + // Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes. + macro_rules! slim_avx2 { + ($len:expr) => { + impl SlimAVX2<$len> { + /// Creates a new searcher using "slim" Teddy with 256-bit + /// vectors. If AVX2 is not available in the current + /// environment, then this returns `None`. + pub(super) fn new( + patterns: &Arc, + ) -> Option { + if !is_available_avx2() { + return None; + } + Some(unsafe { SlimAVX2::<$len>::new_unchecked(patterns) }) + } + + /// Creates a new searcher using "slim" Teddy with 256-bit + /// vectors without checking whether AVX2 is available or not. + /// + /// # Safety + /// + /// Callers must ensure that AVX2 is available in the current + /// environment. + #[target_feature(enable = "avx2")] + unsafe fn new_unchecked(patterns: &Arc) -> Searcher { + let slim128 = generic::Slim::<__m128i, $len>::new( + Arc::clone(&patterns), + ); + let slim256 = generic::Slim::<__m256i, $len>::new( + Arc::clone(&patterns), + ); + let memory_usage = + slim128.memory_usage() + slim256.memory_usage(); + let minimum_len = slim128.minimum_len(); + let imp = Arc::new(SlimAVX2 { slim128, slim256 }); + Searcher { imp, memory_usage, minimum_len } + } + } + + impl SearcherT for SlimAVX2<$len> { + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + // SAFETY: All obligations except for `target_feature` are + // passed to the caller. Our use of `target_feature` is + // safe because construction of this type requires that the + // requisite target features are available. + let len = end.distance(start); + if len < self.slim256.minimum_len() { + self.slim128.find(start, end) + } else { + self.slim256.find(start, end) + } + } + } + }; + } + + slim_avx2!(1); + slim_avx2!(2); + slim_avx2!(3); + slim_avx2!(4); + + #[derive(Clone, Debug)] + pub(super) struct FatAVX2 { + fat256: generic::Fat<__m256i, BYTES>, + } + + // Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes. + macro_rules! fat_avx2 { + ($len:expr) => { + impl FatAVX2<$len> { + /// Creates a new searcher using "slim" Teddy with 256-bit + /// vectors. If AVX2 is not available in the current + /// environment, then this returns `None`. + pub(super) fn new( + patterns: &Arc, + ) -> Option { + if !is_available_avx2() { + return None; + } + Some(unsafe { FatAVX2::<$len>::new_unchecked(patterns) }) + } + + /// Creates a new searcher using "slim" Teddy with 256-bit + /// vectors without checking whether AVX2 is available or not. + /// + /// # Safety + /// + /// Callers must ensure that AVX2 is available in the current + /// environment. + #[target_feature(enable = "avx2")] + unsafe fn new_unchecked(patterns: &Arc) -> Searcher { + let fat256 = generic::Fat::<__m256i, $len>::new( + Arc::clone(&patterns), + ); + let memory_usage = fat256.memory_usage(); + let minimum_len = fat256.minimum_len(); + let imp = Arc::new(FatAVX2 { fat256 }); + Searcher { imp, memory_usage, minimum_len } + } + } + + impl SearcherT for FatAVX2<$len> { + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + // SAFETY: All obligations except for `target_feature` are + // passed to the caller. Our use of `target_feature` is + // safe because construction of this type requires that the + // requisite target features are available. + self.fat256.find(start, end) + } + } + }; + } + + fat_avx2!(1); + fat_avx2!(2); + fat_avx2!(3); + fat_avx2!(4); + + #[inline] + pub(super) fn is_available_ssse3() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "ssse3")] + { + true + } + #[cfg(not(target_feature = "ssse3"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("ssse3") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } + + #[inline] + pub(super) fn is_available_avx2() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "avx2")] + { + true + } + #[cfg(not(target_feature = "avx2"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("avx2") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } +} + +#[cfg(target_arch = "aarch64")] +mod aarch64 { + use core::arch::aarch64::uint8x16_t; + + use alloc::sync::Arc; + + use crate::packed::{ + pattern::Patterns, + teddy::generic::{self, Match}, + }; + + use super::{Searcher, SearcherT}; + + #[derive(Clone, Debug)] + pub(super) struct SlimNeon { + slim128: generic::Slim, + } + + // Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes. + macro_rules! slim_neon { + ($len:expr) => { + impl SlimNeon<$len> { + /// Creates a new searcher using "slim" Teddy with 128-bit + /// vectors. If SSSE3 is not available in the current + /// environment, then this returns `None`. + pub(super) fn new( + patterns: &Arc, + ) -> Option { + Some(unsafe { SlimNeon::<$len>::new_unchecked(patterns) }) + } + + /// Creates a new searcher using "slim" Teddy with 256-bit + /// vectors without checking whether SSSE3 is available or not. + /// + /// # Safety + /// + /// Callers must ensure that SSSE3 is available in the current + /// environment. + #[target_feature(enable = "neon")] + unsafe fn new_unchecked(patterns: &Arc) -> Searcher { + let slim128 = generic::Slim::::new( + Arc::clone(patterns), + ); + let memory_usage = slim128.memory_usage(); + let minimum_len = slim128.minimum_len(); + let imp = Arc::new(SlimNeon { slim128 }); + Searcher { imp, memory_usage, minimum_len } + } + } + + impl SearcherT for SlimNeon<$len> { + #[target_feature(enable = "neon")] + #[inline] + unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + // SAFETY: All obligations except for `target_feature` are + // passed to the caller. Our use of `target_feature` is + // safe because construction of this type requires that the + // requisite target features are available. + self.slim128.find(start, end) + } + } + }; + } + + slim_neon!(1); + slim_neon!(2); + slim_neon!(3); + slim_neon!(4); +} diff --git a/vendor/aho-corasick/src/packed/teddy/compile.rs b/vendor/aho-corasick/src/packed/teddy/compile.rs deleted file mode 100644 index 2e27e10..0000000 --- a/vendor/aho-corasick/src/packed/teddy/compile.rs +++ /dev/null @@ -1,502 +0,0 @@ -// See the README in this directory for an explanation of the Teddy algorithm. - -use core::{cmp, fmt}; - -use alloc::{collections::BTreeMap, format, vec, vec::Vec}; - -use crate::packed::{ - pattern::{PatternID, Patterns}, - teddy::Teddy, -}; - -/// A builder for constructing a Teddy matcher. -/// -/// The builder primarily permits fine grained configuration of the Teddy -/// matcher. Most options are made only available for testing/benchmarking -/// purposes. In reality, options are automatically determined by the nature -/// and number of patterns given to the builder. -#[derive(Clone, Debug)] -pub struct Builder { - /// When none, this is automatically determined. Otherwise, `false` means - /// slim Teddy is used (8 buckets) and `true` means fat Teddy is used - /// (16 buckets). Fat Teddy requires AVX2, so if that CPU feature isn't - /// available and Fat Teddy was requested, no matcher will be built. - fat: Option, - /// When none, this is automatically determined. Otherwise, `false` means - /// that 128-bit vectors will be used (up to SSSE3 instructions) where as - /// `true` means that 256-bit vectors will be used. As with `fat`, if - /// 256-bit vectors are requested and they aren't available, then a - /// searcher will not be built. - avx: Option, -} - -impl Default for Builder { - fn default() -> Builder { - Builder::new() - } -} - -impl Builder { - /// Create a new builder for configuring a Teddy matcher. - pub fn new() -> Builder { - Builder { fat: None, avx: None } - } - - /// Build a matcher for the set of patterns given. If a matcher could not - /// be built, then `None` is returned. - /// - /// Generally, a matcher isn't built if the necessary CPU features aren't - /// available, an unsupported target or if the searcher is believed to be - /// slower than standard techniques (i.e., if there are too many literals). - pub fn build(&self, patterns: &Patterns) -> Option { - self.build_imp(patterns) - } - - /// Require the use of Fat (true) or Slim (false) Teddy. Fat Teddy uses - /// 16 buckets where as Slim Teddy uses 8 buckets. More buckets are useful - /// for a larger set of literals. - /// - /// `None` is the default, which results in an automatic selection based - /// on the number of literals and available CPU features. - pub fn fat(&mut self, yes: Option) -> &mut Builder { - self.fat = yes; - self - } - - /// Request the use of 256-bit vectors (true) or 128-bit vectors (false). - /// Generally, a larger vector size is better since it either permits - /// matching more patterns or matching more bytes in the haystack at once. - /// - /// `None` is the default, which results in an automatic selection based on - /// the number of literals and available CPU features. - pub fn avx(&mut self, yes: Option) -> &mut Builder { - self.avx = yes; - self - } - - fn build_imp(&self, patterns: &Patterns) -> Option { - use crate::packed::teddy::runtime; - - // Most of the logic here is just about selecting the optimal settings, - // or perhaps even rejecting construction altogether. The choices - // we have are: fat (avx only) or not, ssse3 or avx2, and how many - // patterns we allow ourselves to search. Additionally, for testing - // and benchmarking, we permit callers to try to "force" a setting, - // and if the setting isn't allowed (e.g., forcing AVX when AVX isn't - // available), then we bail and return nothing. - - if patterns.len() > 64 { - debug!("skipping Teddy because of too many patterns"); - return None; - } - let has_ssse3 = std::is_x86_feature_detected!("ssse3"); - let has_avx = std::is_x86_feature_detected!("avx2"); - let avx = if self.avx == Some(true) { - if !has_avx { - debug!( - "skipping Teddy because avx was demanded but unavailable" - ); - return None; - } - true - } else if self.avx == Some(false) { - if !has_ssse3 { - debug!( - "skipping Teddy because ssse3 was demanded but unavailable" - ); - return None; - } - false - } else if !has_ssse3 && !has_avx { - debug!("skipping Teddy because ssse3 and avx are unavailable"); - return None; - } else { - has_avx - }; - let fat = match self.fat { - None => avx && patterns.len() > 32, - Some(false) => false, - Some(true) if !avx => { - debug!( - "skipping Teddy because it needs to be fat, but fat \ - Teddy requires avx which is unavailable" - ); - return None; - } - Some(true) => true, - }; - - let mut compiler = Compiler::new(patterns, fat); - compiler.compile(); - let Compiler { buckets, masks, .. } = compiler; - // SAFETY: It is required that the builder only produce Teddy matchers - // that are allowed to run on the current CPU, since we later assume - // that the presence of (for example) TeddySlim1Mask256 means it is - // safe to call functions marked with the `avx2` target feature. - match (masks.len(), avx, fat) { - (1, false, _) => { - debug!("Teddy choice: 128-bit slim, 1 byte"); - Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim1Mask128( - runtime::TeddySlim1Mask128 { - mask1: runtime::Mask128::new(masks[0]), - }, - ), - }) - } - (1, true, false) => { - debug!("Teddy choice: 256-bit slim, 1 byte"); - Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim1Mask256( - runtime::TeddySlim1Mask256 { - mask1: runtime::Mask256::new(masks[0]), - }, - ), - }) - } - (1, true, true) => { - debug!("Teddy choice: 256-bit fat, 1 byte"); - Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddyFat1Mask256( - runtime::TeddyFat1Mask256 { - mask1: runtime::Mask256::new(masks[0]), - }, - ), - }) - } - (2, false, _) => { - debug!("Teddy choice: 128-bit slim, 2 bytes"); - Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim2Mask128( - runtime::TeddySlim2Mask128 { - mask1: runtime::Mask128::new(masks[0]), - mask2: runtime::Mask128::new(masks[1]), - }, - ), - }) - } - (2, true, false) => { - debug!("Teddy choice: 256-bit slim, 2 bytes"); - Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim2Mask256( - runtime::TeddySlim2Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - }, - ), - }) - } - (2, true, true) => { - debug!("Teddy choice: 256-bit fat, 2 bytes"); - Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddyFat2Mask256( - runtime::TeddyFat2Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - }, - ), - }) - } - (3, false, _) => { - debug!("Teddy choice: 128-bit slim, 3 bytes"); - Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim3Mask128( - runtime::TeddySlim3Mask128 { - mask1: runtime::Mask128::new(masks[0]), - mask2: runtime::Mask128::new(masks[1]), - mask3: runtime::Mask128::new(masks[2]), - }, - ), - }) - } - (3, true, false) => { - debug!("Teddy choice: 256-bit slim, 3 bytes"); - Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim3Mask256( - runtime::TeddySlim3Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - mask3: runtime::Mask256::new(masks[2]), - }, - ), - }) - } - (3, true, true) => { - debug!("Teddy choice: 256-bit fat, 3 bytes"); - Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddyFat3Mask256( - runtime::TeddyFat3Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - mask3: runtime::Mask256::new(masks[2]), - }, - ), - }) - } - (4, false, _) => { - debug!("Teddy choice: 128-bit slim, 4 bytes"); - Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim4Mask128( - runtime::TeddySlim4Mask128 { - mask1: runtime::Mask128::new(masks[0]), - mask2: runtime::Mask128::new(masks[1]), - mask3: runtime::Mask128::new(masks[2]), - mask4: runtime::Mask128::new(masks[3]), - }, - ), - }) - } - (4, true, false) => { - debug!("Teddy choice: 256-bit slim, 4 bytes"); - Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddySlim4Mask256( - runtime::TeddySlim4Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - mask3: runtime::Mask256::new(masks[2]), - mask4: runtime::Mask256::new(masks[3]), - }, - ), - }) - } - (4, true, true) => { - debug!("Teddy choice: 256-bit fat, 4 bytes"); - Some(Teddy { - buckets, - max_pattern_id: patterns.max_pattern_id(), - exec: runtime::Exec::TeddyFat4Mask256( - runtime::TeddyFat4Mask256 { - mask1: runtime::Mask256::new(masks[0]), - mask2: runtime::Mask256::new(masks[1]), - mask3: runtime::Mask256::new(masks[2]), - mask4: runtime::Mask256::new(masks[3]), - }, - ), - }) - } - _ => unreachable!(), - } - } -} - -/// A compiler is in charge of allocating patterns into buckets and generating -/// the masks necessary for searching. -#[derive(Clone)] -struct Compiler<'p> { - patterns: &'p Patterns, - buckets: Vec>, - masks: Vec, -} - -impl<'p> Compiler<'p> { - /// Create a new Teddy compiler for the given patterns. If `fat` is true, - /// then 16 buckets will be used instead of 8. - /// - /// This panics if any of the patterns given are empty. - fn new(patterns: &'p Patterns, fat: bool) -> Compiler<'p> { - let mask_len = cmp::min(4, patterns.minimum_len()); - assert!(1 <= mask_len && mask_len <= 4); - - Compiler { - patterns, - buckets: vec![vec![]; if fat { 16 } else { 8 }], - masks: vec![Mask::default(); mask_len], - } - } - - /// Compile the patterns in this compiler into buckets and masks. - fn compile(&mut self) { - let mut lonibble_to_bucket: BTreeMap, usize> = BTreeMap::new(); - for (id, pattern) in self.patterns.iter() { - // We try to be slightly clever in how we assign patterns into - // buckets. Generally speaking, we want patterns with the same - // prefix to be in the same bucket, since it minimizes the amount - // of time we spend churning through buckets in the verification - // step. - // - // So we could assign patterns with the same N-prefix (where N - // is the size of the mask, which is one of {1, 2, 3}) to the - // same bucket. However, case insensitive searches are fairly - // common, so we'd for example, ideally want to treat `abc` and - // `ABC` as if they shared the same prefix. ASCII has the nice - // property that the lower 4 bits of A and a are the same, so we - // therefore group patterns with the same low-nybbe-N-prefix into - // the same bucket. - // - // MOREOVER, this is actually necessary for correctness! In - // particular, by grouping patterns with the same prefix into the - // same bucket, we ensure that we preserve correct leftmost-first - // and leftmost-longest match semantics. In addition to the fact - // that `patterns.iter()` iterates in the correct order, this - // guarantees that all possible ambiguous matches will occur in - // the same bucket. The verification routine could be adjusted to - // support correct leftmost match semantics regardless of bucket - // allocation, but that results in a performance hit. It's much - // nicer to be able to just stop as soon as a match is found. - let lonybs = pattern.low_nybbles(self.masks.len()); - if let Some(&bucket) = lonibble_to_bucket.get(&lonybs) { - self.buckets[bucket].push(id); - } else { - // N.B. We assign buckets in reverse because it shouldn't have - // any influence on performance, but it does make it harder to - // get leftmost match semantics accidentally correct. - let bucket = (self.buckets.len() - 1) - - (id as usize % self.buckets.len()); - self.buckets[bucket].push(id); - lonibble_to_bucket.insert(lonybs, bucket); - } - } - for (bucket_index, bucket) in self.buckets.iter().enumerate() { - for &pat_id in bucket { - let pat = self.patterns.get(pat_id); - for (i, mask) in self.masks.iter_mut().enumerate() { - if self.buckets.len() == 8 { - mask.add_slim(bucket_index as u8, pat.bytes()[i]); - } else { - mask.add_fat(bucket_index as u8, pat.bytes()[i]); - } - } - } - } - } -} - -impl<'p> fmt::Debug for Compiler<'p> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut buckets = vec![vec![]; self.buckets.len()]; - for (i, bucket) in self.buckets.iter().enumerate() { - for &patid in bucket { - buckets[i].push(self.patterns.get(patid)); - } - } - f.debug_struct("Compiler") - .field("buckets", &buckets) - .field("masks", &self.masks) - .finish() - } -} - -/// Mask represents the low and high nybble masks that will be used during -/// search. Each mask is 32 bytes wide, although only the first 16 bytes are -/// used for the SSSE3 runtime. -/// -/// Each byte in the mask corresponds to a 8-bit bitset, where bit `i` is set -/// if and only if the corresponding nybble is in the ith bucket. The index of -/// the byte (0-15, inclusive) corresponds to the nybble. -/// -/// Each mask is used as the target of a shuffle, where the indices for the -/// shuffle are taken from the haystack. AND'ing the shuffles for both the -/// low and high masks together also results in 8-bit bitsets, but where bit -/// `i` is set if and only if the correspond *byte* is in the ith bucket. -/// -/// During compilation, masks are just arrays. But during search, these masks -/// are represented as 128-bit or 256-bit vectors. -/// -/// (See the README is this directory for more details.) -#[derive(Clone, Copy, Default)] -pub struct Mask { - lo: [u8; 32], - hi: [u8; 32], -} - -impl Mask { - /// Update this mask by adding the given byte to the given bucket. The - /// given bucket must be in the range 0-7. - /// - /// This is for "slim" Teddy, where there are only 8 buckets. - fn add_slim(&mut self, bucket: u8, byte: u8) { - assert!(bucket < 8); - - let byte_lo = (byte & 0xF) as usize; - let byte_hi = ((byte >> 4) & 0xF) as usize; - // When using 256-bit vectors, we need to set this bucket assignment in - // the low and high 128-bit portions of the mask. This allows us to - // process 32 bytes at a time. Namely, AVX2 shuffles operate on each - // of the 128-bit lanes, rather than the full 256-bit vector at once. - self.lo[byte_lo] |= 1 << bucket; - self.lo[byte_lo + 16] |= 1 << bucket; - self.hi[byte_hi] |= 1 << bucket; - self.hi[byte_hi + 16] |= 1 << bucket; - } - - /// Update this mask by adding the given byte to the given bucket. The - /// given bucket must be in the range 0-15. - /// - /// This is for "fat" Teddy, where there are 16 buckets. - fn add_fat(&mut self, bucket: u8, byte: u8) { - assert!(bucket < 16); - - let byte_lo = (byte & 0xF) as usize; - let byte_hi = ((byte >> 4) & 0xF) as usize; - // Unlike slim teddy, fat teddy only works with AVX2. For fat teddy, - // the high 128 bits of our mask correspond to buckets 8-15, while the - // low 128 bits correspond to buckets 0-7. - if bucket < 8 { - self.lo[byte_lo] |= 1 << bucket; - self.hi[byte_hi] |= 1 << bucket; - } else { - self.lo[byte_lo + 16] |= 1 << (bucket % 8); - self.hi[byte_hi + 16] |= 1 << (bucket % 8); - } - } - - /// Return the low 128 bits of the low-nybble mask. - pub fn lo128(&self) -> [u8; 16] { - let mut tmp = [0; 16]; - tmp.copy_from_slice(&self.lo[..16]); - tmp - } - - /// Return the full low-nybble mask. - pub fn lo256(&self) -> [u8; 32] { - self.lo - } - - /// Return the low 128 bits of the high-nybble mask. - pub fn hi128(&self) -> [u8; 16] { - let mut tmp = [0; 16]; - tmp.copy_from_slice(&self.hi[..16]); - tmp - } - - /// Return the full high-nybble mask. - pub fn hi256(&self) -> [u8; 32] { - self.hi - } -} - -impl fmt::Debug for Mask { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let (mut parts_lo, mut parts_hi) = (vec![], vec![]); - for i in 0..32 { - parts_lo.push(format!("{:02}: {:08b}", i, self.lo[i])); - parts_hi.push(format!("{:02}: {:08b}", i, self.hi[i])); - } - f.debug_struct("Mask") - .field("lo", &parts_lo) - .field("hi", &parts_hi) - .finish() - } -} diff --git a/vendor/aho-corasick/src/packed/teddy/generic.rs b/vendor/aho-corasick/src/packed/teddy/generic.rs new file mode 100644 index 0000000..2aacd00 --- /dev/null +++ b/vendor/aho-corasick/src/packed/teddy/generic.rs @@ -0,0 +1,1382 @@ +use core::fmt::Debug; + +use alloc::{ + boxed::Box, collections::BTreeMap, format, sync::Arc, vec, vec::Vec, +}; + +use crate::{ + packed::{ + ext::Pointer, + pattern::Patterns, + vector::{FatVector, Vector}, + }, + util::int::U32, + PatternID, +}; + +/// A match type specialized to the Teddy implementations below. +/// +/// Essentially, instead of representing a match at byte offsets, we use +/// raw pointers. This is because the implementations below operate on raw +/// pointers, and so this is a more natural return type based on how the +/// implementation works. +/// +/// Also, the `PatternID` used here is a `u16`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Match { + pid: PatternID, + start: *const u8, + end: *const u8, +} + +impl Match { + /// Returns the ID of the pattern that matched. + pub(crate) fn pattern(&self) -> PatternID { + self.pid + } + + /// Returns a pointer into the haystack at which the match starts. + pub(crate) fn start(&self) -> *const u8 { + self.start + } + + /// Returns a pointer into the haystack at which the match ends. + pub(crate) fn end(&self) -> *const u8 { + self.end + } +} + +/// A "slim" Teddy implementation that is generic over both the vector type +/// and the minimum length of the patterns being searched for. +/// +/// Only 1, 2, 3 and 4 bytes are supported as minimum lengths. +#[derive(Clone, Debug)] +pub(crate) struct Slim { + /// A generic data structure for doing "slim" Teddy verification. + teddy: Teddy<8>, + /// The masks used as inputs to the shuffle operation to generate + /// candidates (which are fed into the verification routines). + masks: [Mask; BYTES], +} + +impl Slim { + /// Create a new "slim" Teddy searcher for the given patterns. + /// + /// # Panics + /// + /// This panics when `BYTES` is any value other than 1, 2, 3 or 4. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + pub(crate) unsafe fn new(patterns: Arc) -> Slim { + assert!( + 1 <= BYTES && BYTES <= 4, + "only 1, 2, 3 or 4 bytes are supported" + ); + let teddy = Teddy::new(patterns); + let masks = SlimMaskBuilder::from_teddy(&teddy); + Slim { teddy, masks } + } + + /// Returns the approximate total amount of heap used by this type, in + /// units of bytes. + #[inline(always)] + pub(crate) fn memory_usage(&self) -> usize { + self.teddy.memory_usage() + } + + /// Returns the minimum length, in bytes, that a haystack must be in order + /// to use it with this searcher. + #[inline(always)] + pub(crate) fn minimum_len(&self) -> usize { + V::BYTES + (BYTES - 1) + } +} + +impl Slim { + /// Look for an occurrences of the patterns in this finder in the haystack + /// given by the `start` and `end` pointers. + /// + /// If no match could be found, then `None` is returned. + /// + /// # Safety + /// + /// The given pointers representing the haystack must be valid to read + /// from. They must also point to a region of memory that is at least the + /// minimum length required by this searcher. + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start; + while cur <= end.sub(V::BYTES) { + if let Some(m) = self.find_one(cur, end) { + return Some(m); + } + cur = cur.add(V::BYTES); + } + if cur < end { + cur = end.sub(V::BYTES); + if let Some(m) = self.find_one(cur, end) { + return Some(m); + } + } + None + } + + /// Look for a match starting at the `V::BYTES` at and after `cur`. If + /// there isn't one, then `None` is returned. + /// + /// # Safety + /// + /// The given pointers representing the haystack must be valid to read + /// from. They must also point to a region of memory that is at least the + /// minimum length required by this searcher. + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + ) -> Option { + let c = self.candidate(cur); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur, end, c) { + return Some(m); + } + } + None + } + + /// Look for a candidate match (represented as a vector) starting at the + /// `V::BYTES` at and after `cur`. If there isn't one, then a vector with + /// all bits set to zero is returned. + /// + /// # Safety + /// + /// The given pointer representing the haystack must be valid to read + /// from. + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn candidate(&self, cur: *const u8) -> V { + let chunk = V::load_unaligned(cur); + Mask::members1(chunk, self.masks) + } +} + +impl Slim { + /// See Slim::find. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start.add(1); + let mut prev0 = V::splat(0xFF); + while cur <= end.sub(V::BYTES) { + if let Some(m) = self.find_one(cur, end, &mut prev0) { + return Some(m); + } + cur = cur.add(V::BYTES); + } + if cur < end { + cur = end.sub(V::BYTES); + prev0 = V::splat(0xFF); + if let Some(m) = self.find_one(cur, end, &mut prev0) { + return Some(m); + } + } + None + } + + /// See Slim::find_one. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + prev0: &mut V, + ) -> Option { + let c = self.candidate(cur, prev0); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur.sub(1), end, c) { + return Some(m); + } + } + None + } + + /// See Slim::candidate. + #[inline(always)] + unsafe fn candidate(&self, cur: *const u8, prev0: &mut V) -> V { + let chunk = V::load_unaligned(cur); + let (res0, res1) = Mask::members2(chunk, self.masks); + let res0prev0 = res0.shift_in_one_byte(*prev0); + let res = res0prev0.and(res1); + *prev0 = res0; + res + } +} + +impl Slim { + /// See Slim::find. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start.add(2); + let mut prev0 = V::splat(0xFF); + let mut prev1 = V::splat(0xFF); + while cur <= end.sub(V::BYTES) { + if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) { + return Some(m); + } + cur = cur.add(V::BYTES); + } + if cur < end { + cur = end.sub(V::BYTES); + prev0 = V::splat(0xFF); + prev1 = V::splat(0xFF); + if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) { + return Some(m); + } + } + None + } + + /// See Slim::find_one. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + prev0: &mut V, + prev1: &mut V, + ) -> Option { + let c = self.candidate(cur, prev0, prev1); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur.sub(2), end, c) { + return Some(m); + } + } + None + } + + /// See Slim::candidate. + #[inline(always)] + unsafe fn candidate( + &self, + cur: *const u8, + prev0: &mut V, + prev1: &mut V, + ) -> V { + let chunk = V::load_unaligned(cur); + let (res0, res1, res2) = Mask::members3(chunk, self.masks); + let res0prev0 = res0.shift_in_two_bytes(*prev0); + let res1prev1 = res1.shift_in_one_byte(*prev1); + let res = res0prev0.and(res1prev1).and(res2); + *prev0 = res0; + *prev1 = res1; + res + } +} + +impl Slim { + /// See Slim::find. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start.add(3); + let mut prev0 = V::splat(0xFF); + let mut prev1 = V::splat(0xFF); + let mut prev2 = V::splat(0xFF); + while cur <= end.sub(V::BYTES) { + if let Some(m) = + self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2) + { + return Some(m); + } + cur = cur.add(V::BYTES); + } + if cur < end { + cur = end.sub(V::BYTES); + prev0 = V::splat(0xFF); + prev1 = V::splat(0xFF); + prev2 = V::splat(0xFF); + if let Some(m) = + self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2) + { + return Some(m); + } + } + None + } + + /// See Slim::find_one. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + prev0: &mut V, + prev1: &mut V, + prev2: &mut V, + ) -> Option { + let c = self.candidate(cur, prev0, prev1, prev2); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur.sub(3), end, c) { + return Some(m); + } + } + None + } + + /// See Slim::candidate. + #[inline(always)] + unsafe fn candidate( + &self, + cur: *const u8, + prev0: &mut V, + prev1: &mut V, + prev2: &mut V, + ) -> V { + let chunk = V::load_unaligned(cur); + let (res0, res1, res2, res3) = Mask::members4(chunk, self.masks); + let res0prev0 = res0.shift_in_three_bytes(*prev0); + let res1prev1 = res1.shift_in_two_bytes(*prev1); + let res2prev2 = res2.shift_in_one_byte(*prev2); + let res = res0prev0.and(res1prev1).and(res2prev2).and(res3); + *prev0 = res0; + *prev1 = res1; + *prev2 = res2; + res + } +} + +/// A "fat" Teddy implementation that is generic over both the vector type +/// and the minimum length of the patterns being searched for. +/// +/// Only 1, 2, 3 and 4 bytes are supported as minimum lengths. +#[derive(Clone, Debug)] +pub(crate) struct Fat { + /// A generic data structure for doing "fat" Teddy verification. + teddy: Teddy<16>, + /// The masks used as inputs to the shuffle operation to generate + /// candidates (which are fed into the verification routines). + masks: [Mask; BYTES], +} + +impl Fat { + /// Create a new "fat" Teddy searcher for the given patterns. + /// + /// # Panics + /// + /// This panics when `BYTES` is any value other than 1, 2, 3 or 4. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + pub(crate) unsafe fn new(patterns: Arc) -> Fat { + assert!( + 1 <= BYTES && BYTES <= 4, + "only 1, 2, 3 or 4 bytes are supported" + ); + let teddy = Teddy::new(patterns); + let masks = FatMaskBuilder::from_teddy(&teddy); + Fat { teddy, masks } + } + + /// Returns the approximate total amount of heap used by this type, in + /// units of bytes. + #[inline(always)] + pub(crate) fn memory_usage(&self) -> usize { + self.teddy.memory_usage() + } + + /// Returns the minimum length, in bytes, that a haystack must be in order + /// to use it with this searcher. + #[inline(always)] + pub(crate) fn minimum_len(&self) -> usize { + V::Half::BYTES + (BYTES - 1) + } +} + +impl Fat { + /// Look for an occurrences of the patterns in this finder in the haystack + /// given by the `start` and `end` pointers. + /// + /// If no match could be found, then `None` is returned. + /// + /// # Safety + /// + /// The given pointers representing the haystack must be valid to read + /// from. They must also point to a region of memory that is at least the + /// minimum length required by this searcher. + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start; + while cur <= end.sub(V::Half::BYTES) { + if let Some(m) = self.find_one(cur, end) { + return Some(m); + } + cur = cur.add(V::Half::BYTES); + } + if cur < end { + cur = end.sub(V::Half::BYTES); + if let Some(m) = self.find_one(cur, end) { + return Some(m); + } + } + None + } + + /// Look for a match starting at the `V::BYTES` at and after `cur`. If + /// there isn't one, then `None` is returned. + /// + /// # Safety + /// + /// The given pointers representing the haystack must be valid to read + /// from. They must also point to a region of memory that is at least the + /// minimum length required by this searcher. + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + ) -> Option { + let c = self.candidate(cur); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur, end, c) { + return Some(m); + } + } + None + } + + /// Look for a candidate match (represented as a vector) starting at the + /// `V::BYTES` at and after `cur`. If there isn't one, then a vector with + /// all bits set to zero is returned. + /// + /// # Safety + /// + /// The given pointer representing the haystack must be valid to read + /// from. + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn candidate(&self, cur: *const u8) -> V { + let chunk = V::load_half_unaligned(cur); + Mask::members1(chunk, self.masks) + } +} + +impl Fat { + /// See `Fat::find`. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start.add(1); + let mut prev0 = V::splat(0xFF); + while cur <= end.sub(V::Half::BYTES) { + if let Some(m) = self.find_one(cur, end, &mut prev0) { + return Some(m); + } + cur = cur.add(V::Half::BYTES); + } + if cur < end { + cur = end.sub(V::Half::BYTES); + prev0 = V::splat(0xFF); + if let Some(m) = self.find_one(cur, end, &mut prev0) { + return Some(m); + } + } + None + } + + /// See `Fat::find_one`. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + prev0: &mut V, + ) -> Option { + let c = self.candidate(cur, prev0); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur.sub(1), end, c) { + return Some(m); + } + } + None + } + + /// See `Fat::candidate`. + #[inline(always)] + unsafe fn candidate(&self, cur: *const u8, prev0: &mut V) -> V { + let chunk = V::load_half_unaligned(cur); + let (res0, res1) = Mask::members2(chunk, self.masks); + let res0prev0 = res0.half_shift_in_one_byte(*prev0); + let res = res0prev0.and(res1); + *prev0 = res0; + res + } +} + +impl Fat { + /// See `Fat::find`. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start.add(2); + let mut prev0 = V::splat(0xFF); + let mut prev1 = V::splat(0xFF); + while cur <= end.sub(V::Half::BYTES) { + if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) { + return Some(m); + } + cur = cur.add(V::Half::BYTES); + } + if cur < end { + cur = end.sub(V::Half::BYTES); + prev0 = V::splat(0xFF); + prev1 = V::splat(0xFF); + if let Some(m) = self.find_one(cur, end, &mut prev0, &mut prev1) { + return Some(m); + } + } + None + } + + /// See `Fat::find_one`. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + prev0: &mut V, + prev1: &mut V, + ) -> Option { + let c = self.candidate(cur, prev0, prev1); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur.sub(2), end, c) { + return Some(m); + } + } + None + } + + /// See `Fat::candidate`. + #[inline(always)] + unsafe fn candidate( + &self, + cur: *const u8, + prev0: &mut V, + prev1: &mut V, + ) -> V { + let chunk = V::load_half_unaligned(cur); + let (res0, res1, res2) = Mask::members3(chunk, self.masks); + let res0prev0 = res0.half_shift_in_two_bytes(*prev0); + let res1prev1 = res1.half_shift_in_one_byte(*prev1); + let res = res0prev0.and(res1prev1).and(res2); + *prev0 = res0; + *prev1 = res1; + res + } +} + +impl Fat { + /// See `Fat::find`. + #[inline(always)] + pub(crate) unsafe fn find( + &self, + start: *const u8, + end: *const u8, + ) -> Option { + let len = end.distance(start); + debug_assert!(len >= self.minimum_len()); + let mut cur = start.add(3); + let mut prev0 = V::splat(0xFF); + let mut prev1 = V::splat(0xFF); + let mut prev2 = V::splat(0xFF); + while cur <= end.sub(V::Half::BYTES) { + if let Some(m) = + self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2) + { + return Some(m); + } + cur = cur.add(V::Half::BYTES); + } + if cur < end { + cur = end.sub(V::Half::BYTES); + prev0 = V::splat(0xFF); + prev1 = V::splat(0xFF); + prev2 = V::splat(0xFF); + if let Some(m) = + self.find_one(cur, end, &mut prev0, &mut prev1, &mut prev2) + { + return Some(m); + } + } + None + } + + /// See `Fat::find_one`. + #[inline(always)] + unsafe fn find_one( + &self, + cur: *const u8, + end: *const u8, + prev0: &mut V, + prev1: &mut V, + prev2: &mut V, + ) -> Option { + let c = self.candidate(cur, prev0, prev1, prev2); + if !c.is_zero() { + if let Some(m) = self.teddy.verify(cur.sub(3), end, c) { + return Some(m); + } + } + None + } + + /// See `Fat::candidate`. + #[inline(always)] + unsafe fn candidate( + &self, + cur: *const u8, + prev0: &mut V, + prev1: &mut V, + prev2: &mut V, + ) -> V { + let chunk = V::load_half_unaligned(cur); + let (res0, res1, res2, res3) = Mask::members4(chunk, self.masks); + let res0prev0 = res0.half_shift_in_three_bytes(*prev0); + let res1prev1 = res1.half_shift_in_two_bytes(*prev1); + let res2prev2 = res2.half_shift_in_one_byte(*prev2); + let res = res0prev0.and(res1prev1).and(res2prev2).and(res3); + *prev0 = res0; + *prev1 = res1; + *prev2 = res2; + res + } +} + +/// The common elements of all "slim" and "fat" Teddy search implementations. +/// +/// Essentially, this contains the patterns and the buckets. Namely, it +/// contains enough to implement the verification step after candidates are +/// identified via the shuffle masks. +/// +/// It is generic over the number of buckets used. In general, the number of +/// buckets is either 8 (for "slim" Teddy) or 16 (for "fat" Teddy). The generic +/// parameter isn't really meant to be instantiated for any value other than +/// 8 or 16, although it is technically possible. The main hiccup is that there +/// is some bit-shifting done in the critical part of verification that could +/// be quite expensive if `N` is not a multiple of 2. +#[derive(Clone, Debug)] +struct Teddy { + /// The patterns we are searching for. + /// + /// A pattern string can be found by its `PatternID`. + patterns: Arc, + /// The allocation of patterns in buckets. This only contains the IDs of + /// patterns. In order to do full verification, callers must provide the + /// actual patterns when using Teddy. + buckets: [Vec; BUCKETS], + // N.B. The above representation is very simple, but it definitely results + // in ping-ponging between different allocations during verification. I've + // tried experimenting with other representations that flatten the pattern + // strings into a single allocation, but it doesn't seem to help much. + // Probably everything is small enough to fit into cache anyway, and so the + // pointer chasing isn't a big deal? + // + // One other avenue I haven't explored is some kind of hashing trick + // that let's us do another high-confidence check before launching into + // `memcmp`. +} + +impl Teddy { + /// Create a new generic data structure for Teddy verification. + fn new(patterns: Arc) -> Teddy { + assert_ne!(0, patterns.len(), "Teddy requires at least one pattern"); + assert_ne!( + 0, + patterns.minimum_len(), + "Teddy does not support zero-length patterns" + ); + assert!( + BUCKETS == 8 || BUCKETS == 16, + "Teddy only supports 8 or 16 buckets" + ); + // MSRV(1.63): Use core::array::from_fn below instead of allocating a + // superfluous outer Vec. Not a big deal (especially given the BTreeMap + // allocation below), but nice to not do it. + let buckets = + <[Vec; BUCKETS]>::try_from(vec![vec![]; BUCKETS]) + .unwrap(); + let mut t = Teddy { patterns, buckets }; + + let mut map: BTreeMap, usize> = BTreeMap::new(); + for (id, pattern) in t.patterns.iter() { + // We try to be slightly clever in how we assign patterns into + // buckets. Generally speaking, we want patterns with the same + // prefix to be in the same bucket, since it minimizes the amount + // of time we spend churning through buckets in the verification + // step. + // + // So we could assign patterns with the same N-prefix (where N is + // the size of the mask, which is one of {1, 2, 3}) to the same + // bucket. However, case insensitive searches are fairly common, so + // we'd for example, ideally want to treat `abc` and `ABC` as if + // they shared the same prefix. ASCII has the nice property that + // the lower 4 bits of A and a are the same, so we therefore group + // patterns with the same low-nybble-N-prefix into the same bucket. + // + // MOREOVER, this is actually necessary for correctness! In + // particular, by grouping patterns with the same prefix into the + // same bucket, we ensure that we preserve correct leftmost-first + // and leftmost-longest match semantics. In addition to the fact + // that `patterns.iter()` iterates in the correct order, this + // guarantees that all possible ambiguous matches will occur in + // the same bucket. The verification routine could be adjusted to + // support correct leftmost match semantics regardless of bucket + // allocation, but that results in a performance hit. It's much + // nicer to be able to just stop as soon as a match is found. + let lonybs = pattern.low_nybbles(t.mask_len()); + if let Some(&bucket) = map.get(&lonybs) { + t.buckets[bucket].push(id); + } else { + // N.B. We assign buckets in reverse because it shouldn't have + // any influence on performance, but it does make it harder to + // get leftmost match semantics accidentally correct. + let bucket = (BUCKETS - 1) - (id.as_usize() % BUCKETS); + t.buckets[bucket].push(id); + map.insert(lonybs, bucket); + } + } + t + } + + /// Verify whether there are any matches starting at or after `cur` in the + /// haystack. The candidate chunk given should correspond to 8-bit bitsets + /// for N buckets. + /// + /// # Safety + /// + /// The given pointers representing the haystack must be valid to read + /// from. + #[inline(always)] + unsafe fn verify64( + &self, + cur: *const u8, + end: *const u8, + mut candidate_chunk: u64, + ) -> Option { + while candidate_chunk != 0 { + let bit = candidate_chunk.trailing_zeros().as_usize(); + candidate_chunk &= !(1 << bit); + + let cur = cur.add(bit / BUCKETS); + let bucket = bit % BUCKETS; + if let Some(m) = self.verify_bucket(cur, end, bucket) { + return Some(m); + } + } + None + } + + /// Verify whether there are any matches starting at `at` in the given + /// `haystack` corresponding only to patterns in the given bucket. + /// + /// # Safety + /// + /// The given pointers representing the haystack must be valid to read + /// from. + /// + /// The bucket index must be less than or equal to `self.buckets.len()`. + #[inline(always)] + unsafe fn verify_bucket( + &self, + cur: *const u8, + end: *const u8, + bucket: usize, + ) -> Option { + debug_assert!(bucket < self.buckets.len()); + // SAFETY: The caller must ensure that the bucket index is correct. + for pid in self.buckets.get_unchecked(bucket).iter().copied() { + // SAFETY: This is safe because we are guaranteed that every + // index in a Teddy bucket is a valid index into `pats`, by + // construction. + debug_assert!(pid.as_usize() < self.patterns.len()); + let pat = self.patterns.get_unchecked(pid); + if pat.is_prefix_raw(cur, end) { + let start = cur; + let end = start.add(pat.len()); + return Some(Match { pid, start, end }); + } + } + None + } + + /// Returns the total number of masks required by the patterns in this + /// Teddy searcher. + /// + /// Basically, the mask length corresponds to the type of Teddy searcher + /// to use: a 1-byte, 2-byte, 3-byte or 4-byte searcher. The bigger the + /// better, typically, since searching for longer substrings usually + /// decreases the rate of false positives. Therefore, the number of masks + /// needed is the length of the shortest pattern in this searcher. If the + /// length of the shortest pattern (in bytes) is bigger than 4, then the + /// mask length is 4 since there are no Teddy searchers for more than 4 + /// bytes. + fn mask_len(&self) -> usize { + core::cmp::min(4, self.patterns.minimum_len()) + } + + /// Returns the approximate total amount of heap used by this type, in + /// units of bytes. + fn memory_usage(&self) -> usize { + // This is an upper bound rather than a precise accounting. No + // particular reason, other than it's probably very close to actual + // memory usage in practice. + self.patterns.len() * core::mem::size_of::() + } +} + +impl Teddy<8> { + /// Runs the verification routine for "slim" Teddy. + /// + /// The candidate given should be a collection of 8-bit bitsets (one bitset + /// per lane), where the ith bit is set in the jth lane if and only if the + /// byte occurring at `at + j` in `cur` is in the bucket `i`. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + /// + /// The given pointers must be valid to read from. + #[inline(always)] + unsafe fn verify( + &self, + mut cur: *const u8, + end: *const u8, + candidate: V, + ) -> Option { + debug_assert!(!candidate.is_zero()); + // Convert the candidate into 64-bit chunks, and then verify each of + // those chunks. + candidate.for_each_64bit_lane( + #[inline(always)] + |_, chunk| { + let result = self.verify64(cur, end, chunk); + cur = cur.add(8); + result + }, + ) + } +} + +impl Teddy<16> { + /// Runs the verification routine for "fat" Teddy. + /// + /// The candidate given should be a collection of 8-bit bitsets (one bitset + /// per lane), where the ith bit is set in the jth lane if and only if the + /// byte occurring at `at + (j < 16 ? j : j - 16)` in `cur` is in the + /// bucket `j < 16 ? i : i + 8`. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + /// + /// The given pointers must be valid to read from. + #[inline(always)] + unsafe fn verify( + &self, + mut cur: *const u8, + end: *const u8, + candidate: V, + ) -> Option { + // This is a bit tricky, but we basically want to convert our + // candidate, which looks like this (assuming a 256-bit vector): + // + // a31 a30 ... a17 a16 a15 a14 ... a01 a00 + // + // where each a(i) is an 8-bit bitset corresponding to the activated + // buckets, to this + // + // a31 a15 a30 a14 a29 a13 ... a18 a02 a17 a01 a16 a00 + // + // Namely, for Fat Teddy, the high 128-bits of the candidate correspond + // to the same bytes in the haystack in the low 128-bits (so we only + // scan 16 bytes at a time), but are for buckets 8-15 instead of 0-7. + // + // The verification routine wants to look at all potentially matching + // buckets before moving on to the next lane. So for example, both + // a16 and a00 both correspond to the first byte in our window; a00 + // contains buckets 0-7 and a16 contains buckets 8-15. Specifically, + // a16 should be checked before a01. So the transformation shown above + // allows us to use our normal verification procedure with one small + // change: we treat each bitset as 16 bits instead of 8 bits. + debug_assert!(!candidate.is_zero()); + + // Swap the 128-bit lanes in the candidate vector. + let swapped = candidate.swap_halves(); + // Interleave the bytes from the low 128-bit lanes, starting with + // cand first. + let r1 = candidate.interleave_low_8bit_lanes(swapped); + // Interleave the bytes from the high 128-bit lanes, starting with + // cand first. + let r2 = candidate.interleave_high_8bit_lanes(swapped); + // Now just take the 2 low 64-bit integers from both r1 and r2. We + // can drop the high 64-bit integers because they are a mirror image + // of the low 64-bit integers. All we care about are the low 128-bit + // lanes of r1 and r2. Combined, they contain all our 16-bit bitsets + // laid out in the desired order, as described above. + r1.for_each_low_64bit_lane( + r2, + #[inline(always)] + |_, chunk| { + let result = self.verify64(cur, end, chunk); + cur = cur.add(4); + result + }, + ) + } +} + +/// A vector generic mask for the low and high nybbles in a set of patterns. +/// Each 8-bit lane `j` in a vector corresponds to a bitset where the `i`th bit +/// is set if and only if the nybble `j` is in the bucket `i` at a particular +/// position. +/// +/// This is slightly tweaked dependending on whether Slim or Fat Teddy is being +/// used. For Slim Teddy, the bitsets in the lower half are the same as the +/// bitsets in the higher half, so that we can search `V::BYTES` bytes at a +/// time. (Remember, the nybbles in the haystack are used as indices into these +/// masks, and 256-bit shuffles only operate on 128-bit lanes.) +/// +/// For Fat Teddy, the bitsets are not repeated, but instead, the high half +/// bits correspond to an addition 8 buckets. So that a bitset `00100010` has +/// buckets 1 and 5 set if it's in the lower half, but has buckets 9 and 13 set +/// if it's in the higher half. +#[derive(Clone, Copy, Debug)] +struct Mask { + lo: V, + hi: V, +} + +impl Mask { + /// Return a candidate for Teddy (fat or slim) that is searching for 1-byte + /// candidates. + /// + /// If a candidate is returned, it will be a collection of 8-bit bitsets + /// (one bitset per lane), where the ith bit is set in the jth lane if and + /// only if the byte occurring at the jth lane in `chunk` is in the bucket + /// `i`. If no candidate is found, then the vector returned will have all + /// lanes set to zero. + /// + /// `chunk` should correspond to a `V::BYTES` window of the haystack (where + /// the least significant byte corresponds to the start of the window). For + /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with + /// the window repeated in each half of the vector. + /// + /// `mask1` should correspond to a low/high mask for the first byte of all + /// patterns that are being searched. + #[inline(always)] + unsafe fn members1(chunk: V, masks: [Mask; 1]) -> V { + let lomask = V::splat(0xF); + let hlo = chunk.and(lomask); + let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask); + let locand = masks[0].lo.shuffle_bytes(hlo); + let hicand = masks[0].hi.shuffle_bytes(hhi); + locand.and(hicand) + } + + /// Return a candidate for Teddy (fat or slim) that is searching for 2-byte + /// candidates. + /// + /// If candidates are returned, each will be a collection of 8-bit bitsets + /// (one bitset per lane), where the ith bit is set in the jth lane if and + /// only if the byte occurring at the jth lane in `chunk` is in the bucket + /// `i`. Each candidate returned corresponds to the first and second bytes + /// of the patterns being searched. If no candidate is found, then all of + /// the lanes will be set to zero in at least one of the vectors returned. + /// + /// `chunk` should correspond to a `V::BYTES` window of the haystack (where + /// the least significant byte corresponds to the start of the window). For + /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with + /// the window repeated in each half of the vector. + /// + /// The masks should correspond to the masks computed for the first and + /// second bytes of all patterns that are being searched. + #[inline(always)] + unsafe fn members2(chunk: V, masks: [Mask; 2]) -> (V, V) { + let lomask = V::splat(0xF); + let hlo = chunk.and(lomask); + let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask); + + let locand1 = masks[0].lo.shuffle_bytes(hlo); + let hicand1 = masks[0].hi.shuffle_bytes(hhi); + let cand1 = locand1.and(hicand1); + + let locand2 = masks[1].lo.shuffle_bytes(hlo); + let hicand2 = masks[1].hi.shuffle_bytes(hhi); + let cand2 = locand2.and(hicand2); + + (cand1, cand2) + } + + /// Return a candidate for Teddy (fat or slim) that is searching for 3-byte + /// candidates. + /// + /// If candidates are returned, each will be a collection of 8-bit bitsets + /// (one bitset per lane), where the ith bit is set in the jth lane if and + /// only if the byte occurring at the jth lane in `chunk` is in the bucket + /// `i`. Each candidate returned corresponds to the first, second and third + /// bytes of the patterns being searched. If no candidate is found, then + /// all of the lanes will be set to zero in at least one of the vectors + /// returned. + /// + /// `chunk` should correspond to a `V::BYTES` window of the haystack (where + /// the least significant byte corresponds to the start of the window). For + /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with + /// the window repeated in each half of the vector. + /// + /// The masks should correspond to the masks computed for the first, second + /// and third bytes of all patterns that are being searched. + #[inline(always)] + unsafe fn members3(chunk: V, masks: [Mask; 3]) -> (V, V, V) { + let lomask = V::splat(0xF); + let hlo = chunk.and(lomask); + let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask); + + let locand1 = masks[0].lo.shuffle_bytes(hlo); + let hicand1 = masks[0].hi.shuffle_bytes(hhi); + let cand1 = locand1.and(hicand1); + + let locand2 = masks[1].lo.shuffle_bytes(hlo); + let hicand2 = masks[1].hi.shuffle_bytes(hhi); + let cand2 = locand2.and(hicand2); + + let locand3 = masks[2].lo.shuffle_bytes(hlo); + let hicand3 = masks[2].hi.shuffle_bytes(hhi); + let cand3 = locand3.and(hicand3); + + (cand1, cand2, cand3) + } + + /// Return a candidate for Teddy (fat or slim) that is searching for 4-byte + /// candidates. + /// + /// If candidates are returned, each will be a collection of 8-bit bitsets + /// (one bitset per lane), where the ith bit is set in the jth lane if and + /// only if the byte occurring at the jth lane in `chunk` is in the bucket + /// `i`. Each candidate returned corresponds to the first, second, third + /// and fourth bytes of the patterns being searched. If no candidate is + /// found, then all of the lanes will be set to zero in at least one of the + /// vectors returned. + /// + /// `chunk` should correspond to a `V::BYTES` window of the haystack (where + /// the least significant byte corresponds to the start of the window). For + /// fat Teddy, the haystack window length should be `V::BYTES / 2`, with + /// the window repeated in each half of the vector. + /// + /// The masks should correspond to the masks computed for the first, + /// second, third and fourth bytes of all patterns that are being searched. + #[inline(always)] + unsafe fn members4(chunk: V, masks: [Mask; 4]) -> (V, V, V, V) { + let lomask = V::splat(0xF); + let hlo = chunk.and(lomask); + let hhi = chunk.shift_8bit_lane_right::<4>().and(lomask); + + let locand1 = masks[0].lo.shuffle_bytes(hlo); + let hicand1 = masks[0].hi.shuffle_bytes(hhi); + let cand1 = locand1.and(hicand1); + + let locand2 = masks[1].lo.shuffle_bytes(hlo); + let hicand2 = masks[1].hi.shuffle_bytes(hhi); + let cand2 = locand2.and(hicand2); + + let locand3 = masks[2].lo.shuffle_bytes(hlo); + let hicand3 = masks[2].hi.shuffle_bytes(hhi); + let cand3 = locand3.and(hicand3); + + let locand4 = masks[3].lo.shuffle_bytes(hlo); + let hicand4 = masks[3].hi.shuffle_bytes(hhi); + let cand4 = locand4.and(hicand4); + + (cand1, cand2, cand3, cand4) + } +} + +/// Represents the low and high nybble masks that will be used during +/// search. Each mask is 32 bytes wide, although only the first 16 bytes are +/// used for 128-bit vectors. +/// +/// Each byte in the mask corresponds to a 8-bit bitset, where bit `i` is set +/// if and only if the corresponding nybble is in the ith bucket. The index of +/// the byte (0-15, inclusive) corresponds to the nybble. +/// +/// Each mask is used as the target of a shuffle, where the indices for the +/// shuffle are taken from the haystack. AND'ing the shuffles for both the +/// low and high masks together also results in 8-bit bitsets, but where bit +/// `i` is set if and only if the correspond *byte* is in the ith bucket. +#[derive(Clone, Default)] +struct SlimMaskBuilder { + lo: [u8; 32], + hi: [u8; 32], +} + +impl SlimMaskBuilder { + /// Update this mask by adding the given byte to the given bucket. The + /// given bucket must be in the range 0-7. + /// + /// # Panics + /// + /// When `bucket >= 8`. + fn add(&mut self, bucket: usize, byte: u8) { + assert!(bucket < 8); + + let bucket = u8::try_from(bucket).unwrap(); + let byte_lo = usize::from(byte & 0xF); + let byte_hi = usize::from((byte >> 4) & 0xF); + // When using 256-bit vectors, we need to set this bucket assignment in + // the low and high 128-bit portions of the mask. This allows us to + // process 32 bytes at a time. Namely, AVX2 shuffles operate on each + // of the 128-bit lanes, rather than the full 256-bit vector at once. + self.lo[byte_lo] |= 1 << bucket; + self.lo[byte_lo + 16] |= 1 << bucket; + self.hi[byte_hi] |= 1 << bucket; + self.hi[byte_hi + 16] |= 1 << bucket; + } + + /// Turn this builder into a vector mask. + /// + /// # Panics + /// + /// When `V` represents a vector bigger than what `MaskBytes` can contain. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn build(&self) -> Mask { + assert!(V::BYTES <= self.lo.len()); + assert!(V::BYTES <= self.hi.len()); + Mask { + lo: V::load_unaligned(self.lo[..].as_ptr()), + hi: V::load_unaligned(self.hi[..].as_ptr()), + } + } + + /// A convenience function for building `N` vector masks from a slim + /// `Teddy` value. + /// + /// # Panics + /// + /// When `V` represents a vector bigger than what `MaskBytes` can contain. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn from_teddy( + teddy: &Teddy<8>, + ) -> [Mask; BYTES] { + // MSRV(1.63): Use core::array::from_fn to just build the array here + // instead of creating a vector and turning it into an array. + let mut mask_builders = vec![SlimMaskBuilder::default(); BYTES]; + for (bucket_index, bucket) in teddy.buckets.iter().enumerate() { + for pid in bucket.iter().copied() { + let pat = teddy.patterns.get(pid); + for (i, builder) in mask_builders.iter_mut().enumerate() { + builder.add(bucket_index, pat.bytes()[i]); + } + } + } + let array = + <[SlimMaskBuilder; BYTES]>::try_from(mask_builders).unwrap(); + array.map(|builder| builder.build()) + } +} + +impl Debug for SlimMaskBuilder { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let (mut parts_lo, mut parts_hi) = (vec![], vec![]); + for i in 0..32 { + parts_lo.push(format!("{:02}: {:08b}", i, self.lo[i])); + parts_hi.push(format!("{:02}: {:08b}", i, self.hi[i])); + } + f.debug_struct("SlimMaskBuilder") + .field("lo", &parts_lo) + .field("hi", &parts_hi) + .finish() + } +} + +/// Represents the low and high nybble masks that will be used during "fat" +/// Teddy search. +/// +/// Each mask is 32 bytes wide, and at the time of writing, only 256-bit vectors +/// support fat Teddy. +/// +/// A fat Teddy mask is like a slim Teddy mask, except that instead of +/// repeating the bitsets in the high and low 128-bits in 256-bit vectors, the +/// high and low 128-bit halves each represent distinct buckets. (Bringing the +/// total to 16 instead of 8.) This permits spreading the patterns out a bit +/// more and thus putting less pressure on verification to be fast. +/// +/// Each byte in the mask corresponds to a 8-bit bitset, where bit `i` is set +/// if and only if the corresponding nybble is in the ith bucket. The index of +/// the byte (0-15, inclusive) corresponds to the nybble. +#[derive(Clone, Copy, Default)] +struct FatMaskBuilder { + lo: [u8; 32], + hi: [u8; 32], +} + +impl FatMaskBuilder { + /// Update this mask by adding the given byte to the given bucket. The + /// given bucket must be in the range 0-15. + /// + /// # Panics + /// + /// When `bucket >= 16`. + fn add(&mut self, bucket: usize, byte: u8) { + assert!(bucket < 16); + + let bucket = u8::try_from(bucket).unwrap(); + let byte_lo = usize::from(byte & 0xF); + let byte_hi = usize::from((byte >> 4) & 0xF); + // Unlike slim teddy, fat teddy only works with AVX2. For fat teddy, + // the high 128 bits of our mask correspond to buckets 8-15, while the + // low 128 bits correspond to buckets 0-7. + if bucket < 8 { + self.lo[byte_lo] |= 1 << bucket; + self.hi[byte_hi] |= 1 << bucket; + } else { + self.lo[byte_lo + 16] |= 1 << (bucket % 8); + self.hi[byte_hi + 16] |= 1 << (bucket % 8); + } + } + + /// Turn this builder into a vector mask. + /// + /// # Panics + /// + /// When `V` represents a vector bigger than what `MaskBytes` can contain. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn build(&self) -> Mask { + assert!(V::BYTES <= self.lo.len()); + assert!(V::BYTES <= self.hi.len()); + Mask { + lo: V::load_unaligned(self.lo[..].as_ptr()), + hi: V::load_unaligned(self.hi[..].as_ptr()), + } + } + + /// A convenience function for building `N` vector masks from a fat + /// `Teddy` value. + /// + /// # Panics + /// + /// When `V` represents a vector bigger than what `MaskBytes` can contain. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + #[inline(always)] + unsafe fn from_teddy( + teddy: &Teddy<16>, + ) -> [Mask; BYTES] { + // MSRV(1.63): Use core::array::from_fn to just build the array here + // instead of creating a vector and turning it into an array. + let mut mask_builders = vec![FatMaskBuilder::default(); BYTES]; + for (bucket_index, bucket) in teddy.buckets.iter().enumerate() { + for pid in bucket.iter().copied() { + let pat = teddy.patterns.get(pid); + for (i, builder) in mask_builders.iter_mut().enumerate() { + builder.add(bucket_index, pat.bytes()[i]); + } + } + } + let array = + <[FatMaskBuilder; BYTES]>::try_from(mask_builders).unwrap(); + array.map(|builder| builder.build()) + } +} + +impl Debug for FatMaskBuilder { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let (mut parts_lo, mut parts_hi) = (vec![], vec![]); + for i in 0..32 { + parts_lo.push(format!("{:02}: {:08b}", i, self.lo[i])); + parts_hi.push(format!("{:02}: {:08b}", i, self.hi[i])); + } + f.debug_struct("FatMaskBuilder") + .field("lo", &parts_lo) + .field("hi", &parts_hi) + .finish() + } +} diff --git a/vendor/aho-corasick/src/packed/teddy/mod.rs b/vendor/aho-corasick/src/packed/teddy/mod.rs index fba14d4..26cfcdc 100644 --- a/vendor/aho-corasick/src/packed/teddy/mod.rs +++ b/vendor/aho-corasick/src/packed/teddy/mod.rs @@ -1,57 +1,9 @@ -#[cfg(not(all(feature = "std", target_arch = "x86_64")))] -pub use crate::packed::teddy::fallback::{Builder, Teddy}; -#[cfg(all(feature = "std", target_arch = "x86_64"))] -pub use crate::packed::teddy::{compile::Builder, runtime::Teddy}; +// Regrettable, but Teddy stuff just isn't used on all targets. And for some +// targets, like aarch64, only "slim" Teddy is used and so "fat" Teddy gets a +// bunch of dead-code warnings. Just not worth trying to squash them. Blech. +#![allow(dead_code)] -#[cfg(all(feature = "std", target_arch = "x86_64"))] -mod compile; -#[cfg(all(feature = "std", target_arch = "x86_64"))] -mod runtime; +pub(crate) use self::builder::{Builder, Searcher}; -#[cfg(not(all(feature = "std", target_arch = "x86_64")))] -mod fallback { - use crate::{packed::pattern::Patterns, Match}; - - #[derive(Clone, Debug, Default)] - pub struct Builder(()); - - impl Builder { - pub fn new() -> Builder { - Builder(()) - } - - pub fn build(&self, _: &Patterns) -> Option { - None - } - - pub fn fat(&mut self, _: Option) -> &mut Builder { - self - } - - pub fn avx(&mut self, _: Option) -> &mut Builder { - self - } - } - - #[derive(Clone, Debug)] - pub struct Teddy(()); - - impl Teddy { - pub fn find_at( - &self, - _: &Patterns, - _: &[u8], - _: usize, - ) -> Option { - None - } - - pub fn minimum_len(&self) -> usize { - 0 - } - - pub fn memory_usage(&self) -> usize { - 0 - } - } -} +mod builder; +mod generic; diff --git a/vendor/aho-corasick/src/packed/teddy/runtime.rs b/vendor/aho-corasick/src/packed/teddy/runtime.rs deleted file mode 100644 index f29707a..0000000 --- a/vendor/aho-corasick/src/packed/teddy/runtime.rs +++ /dev/null @@ -1,1573 +0,0 @@ -// See the README in this directory for an explanation of the Teddy algorithm. -// It is strongly recommended to peruse the README before trying to grok this -// code, as its use of SIMD is pretty opaque, although I tried to add comments -// where appropriate. -// -// Moreover, while there is a lot of code in this file, most of it is -// repeated variants of the same thing. Specifically, there are three Teddy -// variants: Slim 128-bit Teddy (8 buckets), Slim 256-bit Teddy (8 buckets) -// and Fat 256-bit Teddy (16 buckets). For each variant, there are three -// implementations, corresponding to mask lengths of 1, 2 and 3. Bringing it to -// a total of nine variants. Each one is structured roughly the same: -// -// while at <= len(haystack) - CHUNK_SIZE: -// let candidate = find_candidate_in_chunk(haystack, at) -// if not all zeroes(candidate): -// if match = verify(haystack, at, candidate): -// return match -// -// For the most part, this remains unchanged. The parts that vary are the -// verification routine (for slim vs fat Teddy) and the candidate extraction -// (based on the number of masks). -// -// In the code below, a "candidate" corresponds to a single vector with 8-bit -// lanes. Each lane is itself an 8-bit bitset, where the ith bit is set in the -// jth lane if and only if the byte occurring at position `j` is in the -// bucket `i` (where the `j`th position is the position in the current window -// of the haystack, which is always 16 or 32 bytes). Note to be careful here: -// the ith bit and the jth lane correspond to the least significant bits of the -// vector. So when visualizing how the current window of bytes is stored in a -// vector, you often need to flip it around. For example, the text `abcd` in a -// 4-byte vector would look like this: -// -// 01100100 01100011 01100010 01100001 -// d c b a -// -// When the mask length is 1, then finding the candidate is pretty straight -// forward: you just apply the shuffle indices (from the haystack window) to -// the masks, and then AND them together, as described in the README. But for -// masks of length 2 and 3, you need to keep a little state. Specifically, -// you need to store the final 1 (for mask length 2) or 2 (for mask length 3) -// bytes of the candidate for use when searching the next window. This is for -// handling matches that span two windows. -// -// With respect to the repeated code, it would likely be possible to reduce -// the number of copies of code below using polymorphism, but I find this -// formulation clearer instead of needing to reason through generics. However, -// I admit, there may be a simpler generic construction that I'm missing. -// -// All variants are fairly heavily tested in src/packed/tests.rs. - -use core::{arch::x86_64::*, mem}; - -use alloc::vec::Vec; - -use crate::{ - packed::{ - pattern::{PatternID, Patterns}, - teddy::compile, - vector, - }, - util::search::Match, -}; - -/// The Teddy runtime. -/// -/// A Teddy runtime can be used to quickly search for occurrences of one or -/// more patterns. While it does not scale to an arbitrary number of patterns -/// like Aho-Corasick, it does find occurrences for a small set of patterns -/// much more quickly than Aho-Corasick. -/// -/// Teddy cannot run on small haystacks below a certain size, which is -/// dependent on the type of matcher used. This size can be queried via the -/// `minimum_len` method. Violating this will result in a panic. -/// -/// Finally, when callers use a Teddy runtime, they must provide precisely the -/// patterns used to construct the Teddy matcher. Violating this will result -/// in either a panic or incorrect results, but will never sacrifice memory -/// safety. -#[derive(Clone, Debug)] -pub struct Teddy { - /// The allocation of patterns in buckets. This only contains the IDs of - /// patterns. In order to do full verification, callers must provide the - /// actual patterns when using Teddy. - pub buckets: Vec>, - /// The maximum identifier of a pattern. This is used as a sanity check to - /// ensure that the patterns provided by the caller are the same as the - /// patterns that were used to compile the matcher. This sanity check - /// permits safely eliminating bounds checks regardless of what patterns - /// are provided by the caller. - /// - /// Note that users of the aho-corasick crate cannot get this wrong. Only - /// code internal to this crate can get it wrong, since neither `Patterns` - /// type nor the Teddy runtime are public API items. - pub max_pattern_id: PatternID, - /// The actual runtime to use. - pub exec: Exec, -} - -impl Teddy { - /// Return the first occurrence of a match in the given haystack after or - /// starting at `at`. - /// - /// The patterns provided must be precisely the same patterns given to the - /// Teddy builder, otherwise this may panic or produce incorrect results. - /// - /// All matches are consistent with the match semantics (leftmost-first or - /// leftmost-longest) set on `pats`. - pub fn find_at( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - ) -> Option { - // This assert is a bit subtle, but it's an important guarantee. - // Namely, if the maximum pattern ID seen by Teddy is the same as the - // one in the patterns given, then we are guaranteed that every pattern - // ID in all Teddy buckets are valid indices into `pats`. While this - // is nominally true, there is no guarantee that callers provide the - // same `pats` to both the Teddy builder and the searcher, which would - // otherwise make `find_at` unsafe to call. But this assert lets us - // keep this routine safe and eliminate an important bounds check in - // verification. - assert_eq!( - self.max_pattern_id, - pats.max_pattern_id(), - "teddy must be called with same patterns it was built with", - ); - // SAFETY: The haystack must have at least a minimum number of bytes - // for Teddy to be able to work. The minimum number varies depending on - // which matcher is used below. If this is violated, then it's possible - // for searching to do out-of-bounds writes. - assert!(haystack[at..].len() >= self.minimum_len()); - // SAFETY: The various Teddy matchers are always safe to call because - // the Teddy builder guarantees that a particular Exec variant is - // built only when it can be run the current CPU. That is, the Teddy - // builder will not produce a Exec::TeddySlim1Mask256 unless AVX2 is - // enabled. That is, our dynamic CPU feature detection is performed - // once in the builder, and we rely on the type system to avoid needing - // to do it again. - unsafe { - match self.exec { - Exec::TeddySlim1Mask128(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim1Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddyFat1Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim2Mask128(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim2Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddyFat2Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim3Mask128(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim3Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddyFat3Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim4Mask128(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddySlim4Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - Exec::TeddyFat4Mask256(ref e) => { - e.find_at(pats, self, haystack, at) - } - } - } - } - - /// Returns the minimum length of a haystack that must be provided by - /// callers to this Teddy searcher. Providing a haystack shorter than this - /// will result in a panic, but will never violate memory safety. - pub fn minimum_len(&self) -> usize { - // SAFETY: These values must be correct in order to ensure safety. - // The Teddy runtime assumes their haystacks have at least these - // lengths. Violating this will sacrifice memory safety. - match self.exec { - Exec::TeddySlim1Mask128(_) => 16, - Exec::TeddySlim1Mask256(_) => 32, - Exec::TeddyFat1Mask256(_) => 16, - Exec::TeddySlim2Mask128(_) => 17, - Exec::TeddySlim2Mask256(_) => 33, - Exec::TeddyFat2Mask256(_) => 17, - Exec::TeddySlim3Mask128(_) => 18, - Exec::TeddySlim3Mask256(_) => 34, - Exec::TeddyFat3Mask256(_) => 18, - Exec::TeddySlim4Mask128(_) => 19, - Exec::TeddySlim4Mask256(_) => 35, - Exec::TeddyFat4Mask256(_) => 19, - } - } - - /// Returns the approximate total amount of heap used by this searcher, in - /// units of bytes. - pub fn memory_usage(&self) -> usize { - let num_patterns = self.max_pattern_id as usize + 1; - self.buckets.len() * mem::size_of::>() - + num_patterns * mem::size_of::() - } - - /// Runs the verification routine for Slim 128-bit Teddy. - /// - /// The candidate given should be a collection of 8-bit bitsets (one bitset - /// per lane), where the ith bit is set in the jth lane if and only if the - /// byte occurring at `at + j` in `haystack` is in the bucket `i`. - /// - /// This is not safe to call unless the SSSE3 target feature is enabled. - /// The `target_feature` attribute is not applied since this function is - /// always forcefully inlined. - #[inline(always)] - unsafe fn verify128( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - cand: __m128i, - ) -> Option { - debug_assert!(!vector::is_all_zeroes128(cand)); - debug_assert_eq!(8, self.buckets.len()); - - // Convert the candidate into 64-bit chunks, and then verify each of - // those chunks. - let parts = vector::unpack64x128(cand); - for (i, &part) in parts.iter().enumerate() { - let pos = at + i * 8; - if let Some(m) = self.verify64(pats, 8, haystack, pos, part) { - return Some(m); - } - } - None - } - - /// Runs the verification routine for Slim 256-bit Teddy. - /// - /// The candidate given should be a collection of 8-bit bitsets (one bitset - /// per lane), where the ith bit is set in the jth lane if and only if the - /// byte occurring at `at + j` in `haystack` is in the bucket `i`. - /// - /// This is not safe to call unless the AVX2 target feature is enabled. - /// The `target_feature` attribute is not applied since this function is - /// always forcefully inlined. - #[inline(always)] - unsafe fn verify256( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - cand: __m256i, - ) -> Option { - debug_assert!(!vector::is_all_zeroes256(cand)); - debug_assert_eq!(8, self.buckets.len()); - - // Convert the candidate into 64-bit chunks, and then verify each of - // those chunks. - let parts = vector::unpack64x256(cand); - let mut pos = at; - if let Some(m) = self.verify64(pats, 8, haystack, pos, parts[0]) { - return Some(m); - } - pos += 8; - if let Some(m) = self.verify64(pats, 8, haystack, pos, parts[1]) { - return Some(m); - } - pos += 8; - if let Some(m) = self.verify64(pats, 8, haystack, pos, parts[2]) { - return Some(m); - } - pos += 8; - if let Some(m) = self.verify64(pats, 8, haystack, pos, parts[3]) { - return Some(m); - } - None - } - - /// Runs the verification routine for Fat 256-bit Teddy. - /// - /// The candidate given should be a collection of 8-bit bitsets (one bitset - /// per lane), where the ith bit is set in the jth lane if and only if the - /// byte occurring at `at + (j < 16 ? j : j - 16)` in `haystack` is in the - /// bucket `j < 16 ? i : i + 8`. - /// - /// This is not safe to call unless the AVX2 target feature is enabled. - /// The `target_feature` attribute is not applied since this function is - /// always forcefully inlined. - #[inline(always)] - unsafe fn verify_fat256( - &self, - pats: &Patterns, - haystack: &[u8], - at: usize, - cand: __m256i, - ) -> Option { - debug_assert!(!vector::is_all_zeroes256(cand)); - debug_assert_eq!(16, self.buckets.len()); - - // This is a bit tricky, but we basically want to convert our - // candidate, which looks like this - // - // a31 a30 ... a17 a16 a15 a14 ... a01 a00 - // - // where each a(i) is an 8-bit bitset corresponding to the activated - // buckets, to this - // - // a31 a15 a30 a14 a29 a13 ... a18 a02 a17 a01 a16 a00 - // - // Namely, for Fat Teddy, the high 128-bits of the candidate correspond - // to the same bytes in the haystack in the low 128-bits (so we only - // scan 16 bytes at a time), but are for buckets 8-15 instead of 0-7. - // - // The verification routine wants to look at all potentially matching - // buckets before moving on to the next lane. So for example, both - // a16 and a00 both correspond to the first byte in our window; a00 - // contains buckets 0-7 and a16 contains buckets 8-15. Specifically, - // a16 should be checked before a01. So the transformation shown above - // allows us to use our normal verification procedure with one small - // change: we treat each bitset as 16 bits instead of 8 bits. - - // Swap the 128-bit lanes in the candidate vector. - let swap = _mm256_permute4x64_epi64(cand, 0x4E); - // Interleave the bytes from the low 128-bit lanes, starting with - // cand first. - let r1 = _mm256_unpacklo_epi8(cand, swap); - // Interleave the bytes from the high 128-bit lanes, starting with - // cand first. - let r2 = _mm256_unpackhi_epi8(cand, swap); - // Now just take the 2 low 64-bit integers from both r1 and r2. We - // can drop the high 64-bit integers because they are a mirror image - // of the low 64-bit integers. All we care about are the low 128-bit - // lanes of r1 and r2. Combined, they contain all our 16-bit bitsets - // laid out in the desired order, as described above. - let parts = vector::unpacklo64x256(r1, r2); - for (i, &part) in parts.iter().enumerate() { - let pos = at + i * 4; - if let Some(m) = self.verify64(pats, 16, haystack, pos, part) { - return Some(m); - } - } - None - } - - /// Verify whether there are any matches starting at or after `at` in the - /// given `haystack`. The candidate given should correspond to either 8-bit - /// (for 8 buckets) or 16-bit (16 buckets) bitsets. - #[inline(always)] - fn verify64( - &self, - pats: &Patterns, - bucket_count: usize, - haystack: &[u8], - at: usize, - mut cand: u64, - ) -> Option { - // N.B. While the bucket count is known from self.buckets.len(), - // requiring it as a parameter makes it easier for the optimizer to - // know its value, and thus produce more efficient codegen. - debug_assert!(bucket_count == 8 || bucket_count == 16); - while cand != 0 { - let bit = cand.trailing_zeros() as usize; - cand &= !(1 << bit); - - let at = at + (bit / bucket_count); - let bucket = bit % bucket_count; - if let Some(m) = self.verify_bucket(pats, haystack, bucket, at) { - return Some(m); - } - } - None - } - - /// Verify whether there are any matches starting at `at` in the given - /// `haystack` corresponding only to patterns in the given bucket. - #[inline(always)] - fn verify_bucket( - &self, - pats: &Patterns, - haystack: &[u8], - bucket: usize, - at: usize, - ) -> Option { - // Forcing this function to not inline and be "cold" seems to help - // the codegen for Teddy overall. Interestingly, this is good for a - // 16% boost in the sherlock/packed/teddy/name/alt1 benchmark (among - // others). Overall, this seems like a problem with codegen, since - // creating the Match itself is a very small amount of code. - #[cold] - #[inline(never)] - fn match_from_span( - pati: PatternID, - start: usize, - end: usize, - ) -> Match { - Match::must(pati as usize, start..end) - } - - // N.B. The bounds check for this bucket lookup *should* be elided - // since we assert the number of buckets in each `find_at` routine, - // and the compiler can prove that the `% 8` (or `% 16`) in callers - // of this routine will always be in bounds. - for &pati in &self.buckets[bucket] { - // SAFETY: This is safe because we are guaranteed that every - // index in a Teddy bucket is a valid index into `pats`. This - // guarantee is upheld by the assert checking `max_pattern_id` in - // the beginning of `find_at` above. - // - // This explicit bounds check elision is (amazingly) good for a - // 25-50% boost in some benchmarks, particularly ones with a lot - // of short literals. - let pat = unsafe { pats.get_unchecked(pati) }; - if pat.is_prefix(&haystack[at..]) { - return Some(match_from_span(pati, at, at + pat.len())); - } - } - None - } -} - -/// Exec represents the different search strategies supported by the Teddy -/// runtime. -/// -/// This enum is an important safety abstraction. Namely, callers should only -/// construct a variant in this enum if it is safe to execute its corresponding -/// target features on the current CPU. The 128-bit searchers require SSSE3, -/// while the 256-bit searchers require AVX2. -#[derive(Clone, Debug)] -pub enum Exec { - TeddySlim1Mask128(TeddySlim1Mask128), - TeddySlim1Mask256(TeddySlim1Mask256), - TeddyFat1Mask256(TeddyFat1Mask256), - TeddySlim2Mask128(TeddySlim2Mask128), - TeddySlim2Mask256(TeddySlim2Mask256), - TeddyFat2Mask256(TeddyFat2Mask256), - TeddySlim3Mask128(TeddySlim3Mask128), - TeddySlim3Mask256(TeddySlim3Mask256), - TeddyFat3Mask256(TeddyFat3Mask256), - TeddySlim4Mask128(TeddySlim4Mask128), - TeddySlim4Mask256(TeddySlim4Mask256), - TeddyFat4Mask256(TeddyFat4Mask256), -} - -// Most of the code below remains undocumented because they are effectively -// repeated versions of themselves. The general structure is described in the -// README and in the comments above. - -#[derive(Clone, Debug)] -pub struct TeddySlim1Mask128 { - pub mask1: Mask128, -} - -impl TeddySlim1Mask128 { - #[target_feature(enable = "ssse3")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - let len = haystack.len(); - while at <= len - 16 { - let c = self.candidate(haystack, at); - if !vector::is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - let c = self.candidate(haystack, at); - if !vector::is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate(&self, haystack: &[u8], at: usize) -> __m128i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = vector::loadu128(haystack, at); - members1m128(chunk, self.mask1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim1Mask256 { - pub mask1: Mask256, -} - -impl TeddySlim1Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - let len = haystack.len(); - while at <= len - 32 { - let c = self.candidate(haystack, at); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at, c) { - return Some(m); - } - } - at += 32; - } - if at < len { - at = len - 32; - let c = self.candidate(haystack, at); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate(&self, haystack: &[u8], at: usize) -> __m256i { - debug_assert!(haystack[at..].len() >= 32); - - let chunk = vector::loadu256(haystack, at); - members1m256(chunk, self.mask1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddyFat1Mask256 { - pub mask1: Mask256, -} - -impl TeddyFat1Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(16, teddy.buckets.len()); - - let len = haystack.len(); - while at <= len - 16 { - let c = self.candidate(haystack, at); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - let c = self.candidate(haystack, at); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate(&self, haystack: &[u8], at: usize) -> __m256i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = - _mm256_broadcastsi128_si256(vector::loadu128(haystack, at)); - members1m256(chunk, self.mask1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim2Mask128 { - pub mask1: Mask128, - pub mask2: Mask128, -} - -impl TeddySlim2Mask128 { - #[target_feature(enable = "ssse3")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 1; - let len = haystack.len(); - let mut prev0 = vector::ones128(); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0); - if !vector::is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 1, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = vector::ones128(); - - let c = self.candidate(haystack, at, &mut prev0); - if !vector::is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 1, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m128i, - ) -> __m128i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = vector::loadu128(haystack, at); - let (res0, res1) = members2m128(chunk, self.mask1, self.mask2); - let res0prev0 = _mm_alignr_epi8(res0, *prev0, 15); - _mm_and_si128(res0prev0, res1) - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim2Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, -} - -impl TeddySlim2Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 1; - let len = haystack.len(); - let mut prev0 = vector::ones256(); - while at <= len - 32 { - let c = self.candidate(haystack, at, &mut prev0); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 1, c) { - return Some(m); - } - } - at += 32; - } - if at < len { - at = len - 32; - prev0 = vector::ones256(); - - let c = self.candidate(haystack, at, &mut prev0); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 1, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 32); - - let chunk = vector::loadu256(haystack, at); - let (res0, res1) = members2m256(chunk, self.mask1, self.mask2); - let res0prev0 = vector::alignr256_15(res0, *prev0); - let res = _mm256_and_si256(res0prev0, res1); - *prev0 = res0; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddyFat2Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, -} - -impl TeddyFat2Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(16, teddy.buckets.len()); - - at += 1; - let len = haystack.len(); - let mut prev0 = vector::ones256(); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 1, c) - { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = vector::ones256(); - - let c = self.candidate(haystack, at, &mut prev0); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 1, c) - { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = - _mm256_broadcastsi128_si256(vector::loadu128(haystack, at)); - let (res0, res1) = members2m256(chunk, self.mask1, self.mask2); - let res0prev0 = _mm256_alignr_epi8(res0, *prev0, 15); - let res = _mm256_and_si256(res0prev0, res1); - *prev0 = res0; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim3Mask128 { - pub mask1: Mask128, - pub mask2: Mask128, - pub mask3: Mask128, -} - -impl TeddySlim3Mask128 { - #[target_feature(enable = "ssse3")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 2; - let len = haystack.len(); - let (mut prev0, mut prev1) = (vector::ones128(), vector::ones128()); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !vector::is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 2, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = vector::ones128(); - prev1 = vector::ones128(); - - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !vector::is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 2, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m128i, - prev1: &mut __m128i, - ) -> __m128i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = vector::loadu128(haystack, at); - let (res0, res1, res2) = - members3m128(chunk, self.mask1, self.mask2, self.mask3); - let res0prev0 = _mm_alignr_epi8(res0, *prev0, 14); - let res1prev1 = _mm_alignr_epi8(res1, *prev1, 15); - let res = _mm_and_si128(_mm_and_si128(res0prev0, res1prev1), res2); - *prev0 = res0; - *prev1 = res1; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim3Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, - pub mask3: Mask256, -} - -impl TeddySlim3Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 2; - let len = haystack.len(); - let (mut prev0, mut prev1) = (vector::ones256(), vector::ones256()); - while at <= len - 32 { - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 2, c) { - return Some(m); - } - } - at += 32; - } - if at < len { - at = len - 32; - prev0 = vector::ones256(); - prev1 = vector::ones256(); - - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 2, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - prev1: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 32); - - let chunk = vector::loadu256(haystack, at); - let (res0, res1, res2) = - members3m256(chunk, self.mask1, self.mask2, self.mask3); - let res0prev0 = vector::alignr256_14(res0, *prev0); - let res1prev1 = vector::alignr256_15(res1, *prev1); - let res = - _mm256_and_si256(_mm256_and_si256(res0prev0, res1prev1), res2); - *prev0 = res0; - *prev1 = res1; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddyFat3Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, - pub mask3: Mask256, -} - -impl TeddyFat3Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(16, teddy.buckets.len()); - - at += 2; - let len = haystack.len(); - let (mut prev0, mut prev1) = (vector::ones256(), vector::ones256()); - while at <= len - 16 { - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 2, c) - { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = vector::ones256(); - prev1 = vector::ones256(); - - let c = self.candidate(haystack, at, &mut prev0, &mut prev1); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 2, c) - { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - prev1: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = - _mm256_broadcastsi128_si256(vector::loadu128(haystack, at)); - let (res0, res1, res2) = - members3m256(chunk, self.mask1, self.mask2, self.mask3); - let res0prev0 = _mm256_alignr_epi8(res0, *prev0, 14); - let res1prev1 = _mm256_alignr_epi8(res1, *prev1, 15); - let res = - _mm256_and_si256(_mm256_and_si256(res0prev0, res1prev1), res2); - *prev0 = res0; - *prev1 = res1; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim4Mask128 { - pub mask1: Mask128, - pub mask2: Mask128, - pub mask3: Mask128, - pub mask4: Mask128, -} - -impl TeddySlim4Mask128 { - #[target_feature(enable = "ssse3")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 3; - let len = haystack.len(); - let mut prev0 = vector::ones128(); - let mut prev1 = vector::ones128(); - let mut prev2 = vector::ones128(); - while at <= len - 16 { - let c = self - .candidate(haystack, at, &mut prev0, &mut prev1, &mut prev2); - if !vector::is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 3, c) { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = vector::ones128(); - prev1 = vector::ones128(); - prev2 = vector::ones128(); - - let c = self - .candidate(haystack, at, &mut prev0, &mut prev1, &mut prev2); - if !vector::is_all_zeroes128(c) { - if let Some(m) = teddy.verify128(pats, haystack, at - 3, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m128i, - prev1: &mut __m128i, - prev2: &mut __m128i, - ) -> __m128i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = vector::loadu128(haystack, at); - let (res0, res1, res2, res3) = members4m128( - chunk, self.mask1, self.mask2, self.mask3, self.mask4, - ); - let res0prev0 = _mm_alignr_epi8(res0, *prev0, 13); - let res1prev1 = _mm_alignr_epi8(res1, *prev1, 14); - let res2prev2 = _mm_alignr_epi8(res2, *prev2, 15); - let res = _mm_and_si128( - _mm_and_si128(_mm_and_si128(res0prev0, res1prev1), res2prev2), - res3, - ); - *prev0 = res0; - *prev1 = res1; - *prev2 = res2; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddySlim4Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, - pub mask3: Mask256, - pub mask4: Mask256, -} - -impl TeddySlim4Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(8, teddy.buckets.len()); - - at += 3; - let len = haystack.len(); - let mut prev0 = vector::ones256(); - let mut prev1 = vector::ones256(); - let mut prev2 = vector::ones256(); - while at <= len - 32 { - let c = self - .candidate(haystack, at, &mut prev0, &mut prev1, &mut prev2); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 3, c) { - return Some(m); - } - } - at += 32; - } - if at < len { - at = len - 32; - prev0 = vector::ones256(); - prev1 = vector::ones256(); - prev2 = vector::ones256(); - - let c = self - .candidate(haystack, at, &mut prev0, &mut prev1, &mut prev2); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify256(pats, haystack, at - 3, c) { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - prev1: &mut __m256i, - prev2: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 32); - - let chunk = vector::loadu256(haystack, at); - let (res0, res1, res2, res3) = members4m256( - chunk, self.mask1, self.mask2, self.mask3, self.mask4, - ); - let res0prev0 = vector::alignr256_13(res0, *prev0); - let res1prev1 = vector::alignr256_14(res1, *prev1); - let res2prev2 = vector::alignr256_15(res2, *prev2); - let res = _mm256_and_si256( - _mm256_and_si256( - _mm256_and_si256(res0prev0, res1prev1), - res2prev2, - ), - res3, - ); - *prev0 = res0; - *prev1 = res1; - *prev2 = res2; - res - } -} - -#[derive(Clone, Debug)] -pub struct TeddyFat4Mask256 { - pub mask1: Mask256, - pub mask2: Mask256, - pub mask3: Mask256, - pub mask4: Mask256, -} - -impl TeddyFat4Mask256 { - #[target_feature(enable = "avx2")] - unsafe fn find_at( - &self, - pats: &Patterns, - teddy: &Teddy, - haystack: &[u8], - mut at: usize, - ) -> Option { - debug_assert!(haystack[at..].len() >= teddy.minimum_len()); - // This assert helps eliminate bounds checks for bucket lookups in - // Teddy::verify_bucket, which has a small (3-4%) performance boost. - assert_eq!(16, teddy.buckets.len()); - - at += 3; - let len = haystack.len(); - let mut prev0 = vector::ones256(); - let mut prev1 = vector::ones256(); - let mut prev2 = vector::ones256(); - while at <= len - 16 { - let c = self - .candidate(haystack, at, &mut prev0, &mut prev1, &mut prev2); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 3, c) - { - return Some(m); - } - } - at += 16; - } - if at < len { - at = len - 16; - prev0 = vector::ones256(); - prev1 = vector::ones256(); - prev2 = vector::ones256(); - - let c = self - .candidate(haystack, at, &mut prev0, &mut prev1, &mut prev2); - if !vector::is_all_zeroes256(c) { - if let Some(m) = teddy.verify_fat256(pats, haystack, at - 3, c) - { - return Some(m); - } - } - } - None - } - - #[inline(always)] - unsafe fn candidate( - &self, - haystack: &[u8], - at: usize, - prev0: &mut __m256i, - prev1: &mut __m256i, - prev2: &mut __m256i, - ) -> __m256i { - debug_assert!(haystack[at..].len() >= 16); - - let chunk = - _mm256_broadcastsi128_si256(vector::loadu128(haystack, at)); - let (res0, res1, res2, res3) = members4m256( - chunk, self.mask1, self.mask2, self.mask3, self.mask4, - ); - let res0prev0 = _mm256_alignr_epi8(res0, *prev0, 13); - let res1prev1 = _mm256_alignr_epi8(res1, *prev1, 14); - let res2prev2 = _mm256_alignr_epi8(res2, *prev2, 15); - let res = _mm256_and_si256( - _mm256_and_si256( - _mm256_and_si256(res0prev0, res1prev1), - res2prev2, - ), - res3, - ); - *prev0 = res0; - *prev1 = res1; - *prev2 = res2; - res - } -} - -/// A 128-bit mask for the low and high nybbles in a set of patterns. Each -/// lane `j` corresponds to a bitset where the `i`th bit is set if and only if -/// the nybble `j` is in the bucket `i` at a particular position. -#[derive(Clone, Copy, Debug)] -pub struct Mask128 { - lo: __m128i, - hi: __m128i, -} - -impl Mask128 { - /// Create a new SIMD mask from the mask produced by the Teddy builder. - pub fn new(mask: compile::Mask) -> Mask128 { - // SAFETY: This is safe since [u8; 16] has the same representation - // as __m128i. - unsafe { - Mask128 { - lo: mem::transmute(mask.lo128()), - hi: mem::transmute(mask.hi128()), - } - } - } -} - -/// A 256-bit mask for the low and high nybbles in a set of patterns. Each -/// lane `j` corresponds to a bitset where the `i`th bit is set if and only if -/// the nybble `j` is in the bucket `i` at a particular position. -/// -/// This is slightly tweaked dependending on whether Slim or Fat Teddy is being -/// used. For Slim Teddy, the bitsets in the lower 128-bits are the same as -/// the bitsets in the higher 128-bits, so that we can search 32 bytes at a -/// time. (Remember, the nybbles in the haystack are used as indices into these -/// masks, and 256-bit shuffles only operate on 128-bit lanes.) -/// -/// For Fat Teddy, the bitsets are not repeated, but instead, the high 128 -/// bits correspond to buckets 8-15. So that a bitset `00100010` has buckets -/// 1 and 5 set if it's in the lower 128 bits, but has buckets 9 and 13 set -/// if it's in the higher 128 bits. -#[derive(Clone, Copy, Debug)] -pub struct Mask256 { - lo: __m256i, - hi: __m256i, -} - -impl Mask256 { - /// Create a new SIMD mask from the mask produced by the Teddy builder. - pub fn new(mask: compile::Mask) -> Mask256 { - // SAFETY: This is safe since [u8; 32] has the same representation - // as __m256i. - unsafe { - Mask256 { - lo: mem::transmute(mask.lo256()), - hi: mem::transmute(mask.hi256()), - } - } - } -} - -// The "members" routines below are responsible for taking a chunk of bytes, -// a number of nybble masks and returning the result of using the masks to -// lookup bytes in the chunk. The results of the high and low nybble masks are -// AND'ed together, such that each candidate returned is a vector, with byte -// sized lanes, and where each lane is an 8-bit bitset corresponding to the -// buckets that contain the corresponding byte. -// -// In the case of masks of length greater than 1, callers will need to keep -// the results from the previous haystack's window, and then shift the vectors -// so that they all line up. Then they can be AND'ed together. - -/// Return a candidate for Slim 128-bit Teddy, where `chunk` corresponds to a -/// 16-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and `mask1` corresponds to a -/// low/high mask for the first byte of all patterns that are being searched. -#[target_feature(enable = "ssse3")] -unsafe fn members1m128(chunk: __m128i, mask1: Mask128) -> __m128i { - let lomask = _mm_set1_epi8(0xF); - let hlo = _mm_and_si128(chunk, lomask); - let hhi = _mm_and_si128(_mm_srli_epi16(chunk, 4), lomask); - _mm_and_si128( - _mm_shuffle_epi8(mask1.lo, hlo), - _mm_shuffle_epi8(mask1.hi, hhi), - ) -} - -/// Return a candidate for Slim 256-bit Teddy, where `chunk` corresponds to a -/// 32-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and `mask1` corresponds to a -/// low/high mask for the first byte of all patterns that are being searched. -/// -/// Note that this can also be used for Fat Teddy, where the high 128 bits in -/// `chunk` is the same as the low 128 bits, which corresponds to a 16 byte -/// window in the haystack. -#[target_feature(enable = "avx2")] -unsafe fn members1m256(chunk: __m256i, mask1: Mask256) -> __m256i { - let lomask = _mm256_set1_epi8(0xF); - let hlo = _mm256_and_si256(chunk, lomask); - let hhi = _mm256_and_si256(_mm256_srli_epi16(chunk, 4), lomask); - _mm256_and_si256( - _mm256_shuffle_epi8(mask1.lo, hlo), - _mm256_shuffle_epi8(mask1.hi, hhi), - ) -} - -/// Return candidates for Slim 128-bit Teddy, where `chunk` corresponds -/// to a 16-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first and second bytes of all patterns that are being -/// searched. The vectors returned correspond to candidates for the first and -/// second bytes in the patterns represented by the masks. -#[target_feature(enable = "ssse3")] -unsafe fn members2m128( - chunk: __m128i, - mask1: Mask128, - mask2: Mask128, -) -> (__m128i, __m128i) { - let lomask = _mm_set1_epi8(0xF); - let hlo = _mm_and_si128(chunk, lomask); - let hhi = _mm_and_si128(_mm_srli_epi16(chunk, 4), lomask); - let res0 = _mm_and_si128( - _mm_shuffle_epi8(mask1.lo, hlo), - _mm_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm_and_si128( - _mm_shuffle_epi8(mask2.lo, hlo), - _mm_shuffle_epi8(mask2.hi, hhi), - ); - (res0, res1) -} - -/// Return candidates for Slim 256-bit Teddy, where `chunk` corresponds -/// to a 32-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first and second bytes of all patterns that are being -/// searched. The vectors returned correspond to candidates for the first and -/// second bytes in the patterns represented by the masks. -/// -/// Note that this can also be used for Fat Teddy, where the high 128 bits in -/// `chunk` is the same as the low 128 bits, which corresponds to a 16 byte -/// window in the haystack. -#[target_feature(enable = "avx2")] -unsafe fn members2m256( - chunk: __m256i, - mask1: Mask256, - mask2: Mask256, -) -> (__m256i, __m256i) { - let lomask = _mm256_set1_epi8(0xF); - let hlo = _mm256_and_si256(chunk, lomask); - let hhi = _mm256_and_si256(_mm256_srli_epi16(chunk, 4), lomask); - let res0 = _mm256_and_si256( - _mm256_shuffle_epi8(mask1.lo, hlo), - _mm256_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm256_and_si256( - _mm256_shuffle_epi8(mask2.lo, hlo), - _mm256_shuffle_epi8(mask2.hi, hhi), - ); - (res0, res1) -} - -/// Return candidates for Slim 128-bit Teddy, where `chunk` corresponds -/// to a 16-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first, second and third bytes of all patterns that -/// are being searched. The vectors returned correspond to candidates for the -/// first, second and third bytes in the patterns represented by the masks. -#[target_feature(enable = "ssse3")] -unsafe fn members3m128( - chunk: __m128i, - mask1: Mask128, - mask2: Mask128, - mask3: Mask128, -) -> (__m128i, __m128i, __m128i) { - let lomask = _mm_set1_epi8(0xF); - let hlo = _mm_and_si128(chunk, lomask); - let hhi = _mm_and_si128(_mm_srli_epi16(chunk, 4), lomask); - let res0 = _mm_and_si128( - _mm_shuffle_epi8(mask1.lo, hlo), - _mm_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm_and_si128( - _mm_shuffle_epi8(mask2.lo, hlo), - _mm_shuffle_epi8(mask2.hi, hhi), - ); - let res2 = _mm_and_si128( - _mm_shuffle_epi8(mask3.lo, hlo), - _mm_shuffle_epi8(mask3.hi, hhi), - ); - (res0, res1, res2) -} - -/// Return candidates for Slim 256-bit Teddy, where `chunk` corresponds -/// to a 32-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first, second and third bytes of all patterns that -/// are being searched. The vectors returned correspond to candidates for the -/// first, second and third bytes in the patterns represented by the masks. -/// -/// Note that this can also be used for Fat Teddy, where the high 128 bits in -/// `chunk` is the same as the low 128 bits, which corresponds to a 16 byte -/// window in the haystack. -#[target_feature(enable = "avx2")] -unsafe fn members3m256( - chunk: __m256i, - mask1: Mask256, - mask2: Mask256, - mask3: Mask256, -) -> (__m256i, __m256i, __m256i) { - let lomask = _mm256_set1_epi8(0xF); - let hlo = _mm256_and_si256(chunk, lomask); - let hhi = _mm256_and_si256(_mm256_srli_epi16(chunk, 4), lomask); - let res0 = _mm256_and_si256( - _mm256_shuffle_epi8(mask1.lo, hlo), - _mm256_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm256_and_si256( - _mm256_shuffle_epi8(mask2.lo, hlo), - _mm256_shuffle_epi8(mask2.hi, hhi), - ); - let res2 = _mm256_and_si256( - _mm256_shuffle_epi8(mask3.lo, hlo), - _mm256_shuffle_epi8(mask3.hi, hhi), - ); - (res0, res1, res2) -} - -/// Return candidates for Slim 128-bit Teddy, where `chunk` corresponds -/// to a 16-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first, second, third and fourth bytes of all patterns -/// that are being searched. The vectors returned correspond to candidates for -/// the first, second, third and fourth bytes in the patterns represented by -/// the masks. -#[target_feature(enable = "ssse3")] -unsafe fn members4m128( - chunk: __m128i, - mask1: Mask128, - mask2: Mask128, - mask3: Mask128, - mask4: Mask128, -) -> (__m128i, __m128i, __m128i, __m128i) { - let lomask = _mm_set1_epi8(0xF); - let hlo = _mm_and_si128(chunk, lomask); - let hhi = _mm_and_si128(_mm_srli_epi16(chunk, 4), lomask); - let res0 = _mm_and_si128( - _mm_shuffle_epi8(mask1.lo, hlo), - _mm_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm_and_si128( - _mm_shuffle_epi8(mask2.lo, hlo), - _mm_shuffle_epi8(mask2.hi, hhi), - ); - let res2 = _mm_and_si128( - _mm_shuffle_epi8(mask3.lo, hlo), - _mm_shuffle_epi8(mask3.hi, hhi), - ); - let res3 = _mm_and_si128( - _mm_shuffle_epi8(mask4.lo, hlo), - _mm_shuffle_epi8(mask4.hi, hhi), - ); - (res0, res1, res2, res3) -} - -/// Return candidates for Slim 256-bit Teddy, where `chunk` corresponds -/// to a 32-byte window of the haystack (where the least significant byte -/// corresponds to the start of the window), and the masks correspond to a -/// low/high mask for the first, second, third and fourth bytes of all patterns -/// that are being searched. The vectors returned correspond to candidates for -/// the first, second, third and fourth bytes in the patterns represented by -/// the masks. -/// -/// Note that this can also be used for Fat Teddy, where the high 128 bits in -/// `chunk` is the same as the low 128 bits, which corresponds to a 16 byte -/// window in the haystack. -#[target_feature(enable = "avx2")] -unsafe fn members4m256( - chunk: __m256i, - mask1: Mask256, - mask2: Mask256, - mask3: Mask256, - mask4: Mask256, -) -> (__m256i, __m256i, __m256i, __m256i) { - let lomask = _mm256_set1_epi8(0xF); - let hlo = _mm256_and_si256(chunk, lomask); - let hhi = _mm256_and_si256(_mm256_srli_epi16(chunk, 4), lomask); - let res0 = _mm256_and_si256( - _mm256_shuffle_epi8(mask1.lo, hlo), - _mm256_shuffle_epi8(mask1.hi, hhi), - ); - let res1 = _mm256_and_si256( - _mm256_shuffle_epi8(mask2.lo, hlo), - _mm256_shuffle_epi8(mask2.hi, hhi), - ); - let res2 = _mm256_and_si256( - _mm256_shuffle_epi8(mask3.lo, hlo), - _mm256_shuffle_epi8(mask3.hi, hhi), - ); - let res3 = _mm256_and_si256( - _mm256_shuffle_epi8(mask4.lo, hlo), - _mm256_shuffle_epi8(mask4.hi, hhi), - ); - (res0, res1, res2, res3) -} diff --git a/vendor/aho-corasick/src/packed/tests.rs b/vendor/aho-corasick/src/packed/tests.rs index ea76dd0..2b0d44e 100644 --- a/vendor/aho-corasick/src/packed/tests.rs +++ b/vendor/aho-corasick/src/packed/tests.rs @@ -40,8 +40,9 @@ struct SearchTestOwned { impl SearchTest { fn variations(&self) -> Vec { + let count = if cfg!(miri) { 1 } else { 261 }; let mut tests = vec![]; - for i in 0..=260 { + for i in 0..count { tests.push(self.offset_prefix(i)); tests.push(self.offset_suffix(i)); tests.push(self.offset_both(i)); @@ -91,15 +92,6 @@ impl SearchTest { matches: self.matches.to_vec(), } } - - // fn to_owned(&self) -> SearchTestOwned { - // SearchTestOwned { - // name: self.name.to_string(), - // patterns: self.patterns.iter().map(|s| s.to_string()).collect(), - // haystack: self.haystack.to_string(), - // matches: self.matches.iter().cloned().collect(), - // } - // } } /// Short-hand constructor for SearchTest. We use it a lot below. @@ -392,26 +384,34 @@ macro_rules! testconfig { run_search_tests($collection, |test| { let mut config = Config::new(); $with(&mut config); - config - .builder() - .extend(test.patterns.iter().map(|p| p.as_bytes())) - .build() - .unwrap() - .find_iter(&test.haystack) - .collect() + let mut builder = config.builder(); + builder.extend(test.patterns.iter().map(|p| p.as_bytes())); + let searcher = match builder.build() { + Some(searcher) => searcher, + None => { + // For x86-64 and aarch64, not building a searcher is + // probably a bug, so be loud. + if cfg!(any( + target_arch = "x86_64", + target_arch = "aarch64" + )) { + panic!("failed to build packed searcher") + } + return None; + } + }; + Some(searcher.find_iter(&test.haystack).collect()) }); } }; } -#[cfg(target_arch = "x86_64")] testconfig!( search_default_leftmost_first, PACKED_LEFTMOST_FIRST, |_: &mut Config| {} ); -#[cfg(target_arch = "x86_64")] testconfig!( search_default_leftmost_longest, PACKED_LEFTMOST_LONGEST, @@ -420,92 +420,90 @@ testconfig!( } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_leftmost_first, PACKED_LEFTMOST_FIRST, |c: &mut Config| { - c.force_teddy(true); + c.only_teddy(true); } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_leftmost_longest, PACKED_LEFTMOST_LONGEST, |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); + c.only_teddy(true).match_kind(MatchKind::LeftmostLongest); } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_ssse3_leftmost_first, PACKED_LEFTMOST_FIRST, |c: &mut Config| { - c.force_teddy(true); + c.only_teddy(true); + #[cfg(target_arch = "x86_64")] if std::is_x86_feature_detected!("ssse3") { - c.force_avx(Some(false)); + c.only_teddy_256bit(Some(false)); } } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_ssse3_leftmost_longest, PACKED_LEFTMOST_LONGEST, |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); + c.only_teddy(true).match_kind(MatchKind::LeftmostLongest); + #[cfg(target_arch = "x86_64")] if std::is_x86_feature_detected!("ssse3") { - c.force_avx(Some(false)); + c.only_teddy_256bit(Some(false)); } } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_avx2_leftmost_first, PACKED_LEFTMOST_FIRST, |c: &mut Config| { - c.force_teddy(true); + c.only_teddy(true); + #[cfg(target_arch = "x86_64")] if std::is_x86_feature_detected!("avx2") { - c.force_avx(Some(true)); + c.only_teddy_256bit(Some(true)); } } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_avx2_leftmost_longest, PACKED_LEFTMOST_LONGEST, |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); + c.only_teddy(true).match_kind(MatchKind::LeftmostLongest); + #[cfg(target_arch = "x86_64")] if std::is_x86_feature_detected!("avx2") { - c.force_avx(Some(true)); + c.only_teddy_256bit(Some(true)); } } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_fat_leftmost_first, PACKED_LEFTMOST_FIRST, |c: &mut Config| { - c.force_teddy(true); + c.only_teddy(true); + #[cfg(target_arch = "x86_64")] if std::is_x86_feature_detected!("avx2") { - c.force_teddy_fat(Some(true)); + c.only_teddy_fat(Some(true)); } } ); -#[cfg(target_arch = "x86_64")] testconfig!( search_teddy_fat_leftmost_longest, PACKED_LEFTMOST_LONGEST, |c: &mut Config| { - c.force_teddy(true).match_kind(MatchKind::LeftmostLongest); + c.only_teddy(true).match_kind(MatchKind::LeftmostLongest); + #[cfg(target_arch = "x86_64")] if std::is_x86_feature_detected!("avx2") { - c.force_teddy_fat(Some(true)); + c.only_teddy_fat(Some(true)); } } ); @@ -514,7 +512,7 @@ testconfig!( search_rabinkarp_leftmost_first, PACKED_LEFTMOST_FIRST, |c: &mut Config| { - c.force_rabin_karp(true); + c.only_rabin_karp(true); } ); @@ -522,7 +520,7 @@ testconfig!( search_rabinkarp_leftmost_longest, PACKED_LEFTMOST_LONGEST, |c: &mut Config| { - c.force_rabin_karp(true).match_kind(MatchKind::LeftmostLongest); + c.only_rabin_karp(true).match_kind(MatchKind::LeftmostLongest); } ); @@ -550,7 +548,7 @@ fn search_tests_have_unique_names() { assert("TEDDY", TEDDY); } -fn run_search_tests Vec>( +fn run_search_tests Option>>( which: TestCollection, mut f: F, ) { @@ -564,12 +562,18 @@ fn run_search_tests Vec>( for &tests in which { for spec in tests { for test in spec.variations() { + let results = match f(&test) { + None => continue, + Some(results) => results, + }; assert_eq!( test.matches, - get_match_triples(f(&test)).as_slice(), - "test: {}, patterns: {:?}, haystack: {:?}, offset: {:?}", + get_match_triples(results).as_slice(), + "test: {}, patterns: {:?}, haystack(len={:?}): {:?}, \ + offset: {:?}", test.name, test.patterns, + test.haystack.len(), test.haystack, test.offset, ); diff --git a/vendor/aho-corasick/src/packed/vector.rs b/vendor/aho-corasick/src/packed/vector.rs index 9590a36..ed3f890 100644 --- a/vendor/aho-corasick/src/packed/vector.rs +++ b/vendor/aho-corasick/src/packed/vector.rs @@ -1,190 +1,1752 @@ -// This file contains a set of fairly generic utility functions when working -// with SIMD vectors. -// -// SAFETY: All of the routines below are unsafe to call because they assume -// the necessary CPU target features in order to use particular vendor -// intrinsics. Calling these routines when the underlying CPU does not support -// the appropriate target features is NOT safe. Callers must ensure this -// themselves. -// -// Note that it may not look like this safety invariant is being upheld when -// these routines are called. Namely, the CPU feature check is typically pretty -// far away from when these routines are used. Instead, we rely on the fact -// that certain types serve as a guaranteed receipt that pertinent target -// features are enabled. For example, the only way TeddySlim3Mask256 can be -// constructed is if the AVX2 CPU feature is available. Thus, any code running -// inside of TeddySlim3Mask256 can use any of the functions below without any -// additional checks: its very existence *is* the check. - -use core::arch::x86_64::*; - -/// Shift `a` to the left by two bytes (removing its two most significant -/// bytes), and concatenate it with the the two most significant bytes of `b`. -#[target_feature(enable = "avx2")] -pub unsafe fn alignr256_14(a: __m256i, b: __m256i) -> __m256i { - // Credit goes to jneem for figuring this out: - // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184 - // - // TL;DR avx2's PALIGNR instruction is actually just two 128-bit PALIGNR - // instructions, which is not what we want, so we need to do some extra - // shuffling. - - // This permute gives us the low 16 bytes of a concatenated with the high - // 16 bytes of b, in order of most significant to least significant. So - // `v = a[15:0] b[31:16]`. - let v = _mm256_permute2x128_si256(b, a, 0x21); - // This effectively does this (where we deal in terms of byte-indexing - // and byte-shifting, and use inclusive ranges): - // - // ret[15:0] := ((a[15:0] << 16) | v[15:0]) >> 14 - // = ((a[15:0] << 16) | b[31:16]) >> 14 - // ret[31:16] := ((a[31:16] << 16) | v[31:16]) >> 14 - // = ((a[31:16] << 16) | a[15:0]) >> 14 - // - // Which therefore results in: - // - // ret[31:0] := a[29:16] a[15:14] a[13:0] b[31:30] - // - // The end result is that we've effectively done this: - // - // (a << 2) | (b >> 30) - // - // When `A` and `B` are strings---where the beginning of the string is in - // the least significant bits---we effectively result in the following - // semantic operation: - // - // (A >> 2) | (B << 30) - // - // The reversal being attributed to the fact that we are in little-endian. - _mm256_alignr_epi8(a, v, 14) -} +// NOTE: The descriptions for each of the vector methods on the traits below +// are pretty inscrutable. For this reason, there are tests for every method +// on for every trait impl below. If you're confused about what an op does, +// consult its test. (They probably should be doc tests, but I couldn't figure +// out how to write them in a non-annoying way.) -/// Shift `a` to the left by three byte (removing its most significant byte), -/// and concatenate it with the the most significant byte of `b`. -#[target_feature(enable = "avx2")] -pub unsafe fn alignr256_13(a: __m256i, b: __m256i) -> __m256i { - // For explanation, see alignr256_14. - let v = _mm256_permute2x128_si256(b, a, 0x21); - _mm256_alignr_epi8(a, v, 13) -} +use core::{ + fmt::Debug, + panic::{RefUnwindSafe, UnwindSafe}, +}; -/// Shift `a` to the left by one byte (removing its most significant byte), and -/// concatenate it with the the most significant byte of `b`. -#[target_feature(enable = "avx2")] -pub unsafe fn alignr256_15(a: __m256i, b: __m256i) -> __m256i { - // For explanation, see alignr256_14. - let v = _mm256_permute2x128_si256(b, a, 0x21); - _mm256_alignr_epi8(a, v, 15) -} +/// A trait for describing vector operations used by vectorized searchers. +/// +/// The trait is highly constrained to low level vector operations needed for +/// the specific algorithms used in this crate. In general, it was invented +/// mostly to be generic over x86's __m128i and __m256i types. At time of +/// writing, it also supports wasm and aarch64 128-bit vector types as well. +/// +/// # Safety +/// +/// All methods are not safe since they are intended to be implemented using +/// vendor intrinsics, which are also not safe. Callers must ensure that +/// the appropriate target features are enabled in the calling function, +/// and that the current CPU supports them. All implementations should +/// avoid marking the routines with `#[target_feature]` and instead mark +/// them as `#[inline(always)]` to ensure they get appropriately inlined. +/// (`inline(always)` cannot be used with target_feature.) +pub(crate) trait Vector: + Copy + Debug + Send + Sync + UnwindSafe + RefUnwindSafe +{ + /// The number of bits in the vector. + const BITS: usize; + /// The number of bytes in the vector. That is, this is the size of the + /// vector in memory. + const BYTES: usize; -/// Unpack the given 128-bit vector into its 64-bit components. The first -/// element of the array returned corresponds to the least significant 64-bit -/// lane in `a`. -#[target_feature(enable = "ssse3")] -pub unsafe fn unpack64x128(a: __m128i) -> [u64; 2] { - [ - _mm_cvtsi128_si64(a) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(a, 8)) as u64, - ] -} + /// Create a vector with 8-bit lanes with the given byte repeated into each + /// lane. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn splat(byte: u8) -> Self; + + /// Read a vector-size number of bytes from the given pointer. The pointer + /// does not need to be aligned. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + /// + /// Callers must guarantee that at least `BYTES` bytes are readable from + /// `data`. + unsafe fn load_unaligned(data: *const u8) -> Self; + + /// Returns true if and only if this vector has zero in all of its lanes. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn is_zero(self) -> bool; -/// Unpack the given 256-bit vector into its 64-bit components. The first -/// element of the array returned corresponds to the least significant 64-bit -/// lane in `a`. -#[target_feature(enable = "avx2")] -pub unsafe fn unpack64x256(a: __m256i) -> [u64; 4] { - // Using transmute here is precisely equivalent, but actually slower. It's - // not quite clear why. - let lo = _mm256_extracti128_si256(a, 0); - let hi = _mm256_extracti128_si256(a, 1); - [ - _mm_cvtsi128_si64(lo) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64, - _mm_cvtsi128_si64(hi) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64, - ] + /// Do an 8-bit pairwise equality check. If lane `i` is equal in this + /// vector and the one given, then lane `i` in the resulting vector is set + /// to `0xFF`. Otherwise, it is set to `0x00`. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn cmpeq(self, vector2: Self) -> Self; + + /// Perform a bitwise 'and' of this vector and the one given and return + /// the result. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn and(self, vector2: Self) -> Self; + + /// Perform a bitwise 'or' of this vector and the one given and return + /// the result. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn or(self, vector2: Self) -> Self; + + /// Shift each 8-bit lane in this vector to the right by the number of + /// bits indictated by the `BITS` type parameter. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn shift_8bit_lane_right(self) -> Self; + + /// Shift this vector to the left by one byte and shift the most + /// significant byte of `vector2` into the least significant position of + /// this vector. + /// + /// Stated differently, this behaves as if `self` and `vector2` were + /// concatenated into a `2 * Self::BITS` temporary buffer and then shifted + /// right by `Self::BYTES - 1` bytes. + /// + /// With respect to the Teddy algorithm, `vector2` is usually a previous + /// `Self::BYTES` chunk from the haystack and `self` is the chunk + /// immediately following it. This permits combining the last two bytes + /// from the previous chunk (`vector2`) with the first `Self::BYTES - 1` + /// bytes from the current chunk. This permits aligning the result of + /// various shuffles so that they can be and-ed together and a possible + /// candidate discovered. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn shift_in_one_byte(self, vector2: Self) -> Self; + + /// Shift this vector to the left by two bytes and shift the two most + /// significant bytes of `vector2` into the least significant position of + /// this vector. + /// + /// Stated differently, this behaves as if `self` and `vector2` were + /// concatenated into a `2 * Self::BITS` temporary buffer and then shifted + /// right by `Self::BYTES - 2` bytes. + /// + /// With respect to the Teddy algorithm, `vector2` is usually a previous + /// `Self::BYTES` chunk from the haystack and `self` is the chunk + /// immediately following it. This permits combining the last two bytes + /// from the previous chunk (`vector2`) with the first `Self::BYTES - 2` + /// bytes from the current chunk. This permits aligning the result of + /// various shuffles so that they can be and-ed together and a possible + /// candidate discovered. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self; + + /// Shift this vector to the left by three bytes and shift the three most + /// significant bytes of `vector2` into the least significant position of + /// this vector. + /// + /// Stated differently, this behaves as if `self` and `vector2` were + /// concatenated into a `2 * Self::BITS` temporary buffer and then shifted + /// right by `Self::BYTES - 3` bytes. + /// + /// With respect to the Teddy algorithm, `vector2` is usually a previous + /// `Self::BYTES` chunk from the haystack and `self` is the chunk + /// immediately following it. This permits combining the last three bytes + /// from the previous chunk (`vector2`) with the first `Self::BYTES - 3` + /// bytes from the current chunk. This permits aligning the result of + /// various shuffles so that they can be and-ed together and a possible + /// candidate discovered. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self; + + /// Shuffles the bytes in this vector according to the indices in each of + /// the corresponding lanes in `indices`. + /// + /// If `i` is the index of corresponding lanes, `A` is this vector, `B` is + /// indices and `C` is the resulting vector, then `C = A[B[i]]`. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn shuffle_bytes(self, indices: Self) -> Self; + + /// Call the provided function for each 64-bit lane in this vector. The + /// given function is provided the lane index and lane value as a `u64`. + /// + /// If `f` returns `Some`, then iteration over the lanes is stopped and the + /// value is returned. Otherwise, this returns `None`. + /// + /// # Notes + /// + /// Conceptually it would be nice if we could have a + /// `unpack64(self) -> [u64; BITS / 64]` method, but defining that is + /// tricky given Rust's [current support for const generics][support]. + /// And even if we could, it would be tricky to write generic code over + /// it. (Not impossible. We could introduce another layer that requires + /// `AsRef<[u64]>` or something.) + /// + /// [support]: https://github.com/rust-lang/rust/issues/60551 + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn for_each_64bit_lane( + self, + f: impl FnMut(usize, u64) -> Option, + ) -> Option; } -/// Unpack the low 128-bits of `a` and `b`, and return them as 4 64-bit -/// integers. +/// This trait extends the `Vector` trait with additional operations to support +/// Fat Teddy. /// -/// More precisely, if a = a4 a3 a2 a1 and b = b4 b3 b2 b1, where each element -/// is a 64-bit integer and a1/b1 correspond to the least significant 64 bits, -/// then the return value is `b2 b1 a2 a1`. -#[target_feature(enable = "avx2")] -pub unsafe fn unpacklo64x256(a: __m256i, b: __m256i) -> [u64; 4] { - let lo = _mm256_castsi256_si128(a); - let hi = _mm256_castsi256_si128(b); - [ - _mm_cvtsi128_si64(lo) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(lo, 8)) as u64, - _mm_cvtsi128_si64(hi) as u64, - _mm_cvtsi128_si64(_mm_srli_si128(hi, 8)) as u64, - ] -} +/// Fat Teddy uses 16 buckets instead of 8, but reads half as many bytes (as +/// the vector size) instead of the full size of a vector per iteration. For +/// example, when using a 256-bit vector, Slim Teddy reads 32 bytes at a timr +/// but Fat Teddy reads 16 bytes at a time. +/// +/// Fat Teddy is useful when searching for a large number of literals. +/// The extra number of buckets spreads the literals out more and reduces +/// verification time. +/// +/// Currently we only implement this for AVX on x86_64. It would be nice to +/// implement this for SSE on x86_64 and NEON on aarch64, with the latter two +/// only reading 8 bytes at a time. It's not clear how well it would work, but +/// there are some tricky things to figure out in terms of implementation. The +/// `half_shift_in_{one,two,three}_bytes` methods in particular are probably +/// the trickiest of the bunch. For AVX2, these are implemented by taking +/// advantage of the fact that `_mm256_alignr_epi8` operates on each 128-bit +/// half instead of the full 256-bit vector. (Where as `_mm_alignr_epi8` +/// operates on the full 128-bit vector and not on each 64-bit half.) I didn't +/// do a careful survey of NEON to see if it could easily support these +/// operations. +pub(crate) trait FatVector: Vector { + type Half: Vector; -/// Returns true if and only if all bits in the given 128-bit vector are 0. -#[target_feature(enable = "ssse3")] -pub unsafe fn is_all_zeroes128(a: __m128i) -> bool { - let cmp = _mm_cmpeq_epi8(a, zeroes128()); - _mm_movemask_epi8(cmp) as u32 == 0xFFFF -} + /// Read a half-vector-size number of bytes from the given pointer, and + /// broadcast it across both halfs of a full vector. The pointer does not + /// need to be aligned. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + /// + /// Callers must guarantee that at least `Self::HALF::BYTES` bytes are + /// readable from `data`. + unsafe fn load_half_unaligned(data: *const u8) -> Self; + + /// Like `Vector::shift_in_one_byte`, except this is done for each half + /// of the vector instead. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn half_shift_in_one_byte(self, vector2: Self) -> Self; + + /// Like `Vector::shift_in_two_bytes`, except this is done for each half + /// of the vector instead. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn half_shift_in_two_bytes(self, vector2: Self) -> Self; + + /// Like `Vector::shift_in_two_bytes`, except this is done for each half + /// of the vector instead. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn half_shift_in_three_bytes(self, vector2: Self) -> Self; + + /// Swap the 128-bit lanes in this vector. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn swap_halves(self) -> Self; + + /// Unpack and interleave the 8-bit lanes from the low 128 bits of each + /// vector and return the result. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn interleave_low_8bit_lanes(self, vector2: Self) -> Self; + + /// Unpack and interleave the 8-bit lanes from the high 128 bits of each + /// vector and return the result. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn interleave_high_8bit_lanes(self, vector2: Self) -> Self; -/// Returns true if and only if all bits in the given 256-bit vector are 0. -#[target_feature(enable = "avx2")] -pub unsafe fn is_all_zeroes256(a: __m256i) -> bool { - let cmp = _mm256_cmpeq_epi8(a, zeroes256()); - _mm256_movemask_epi8(cmp) as u32 == 0xFFFFFFFF + /// Call the provided function for each 64-bit lane in the lower half + /// of this vector and then in the other vector. The given function is + /// provided the lane index and lane value as a `u64`. (The high 128-bits + /// of each vector are ignored.) + /// + /// If `f` returns `Some`, then iteration over the lanes is stopped and the + /// value is returned. Otherwise, this returns `None`. + /// + /// # Safety + /// + /// Callers must ensure that this is okay to call in the current target for + /// the current CPU. + unsafe fn for_each_low_64bit_lane( + self, + vector2: Self, + f: impl FnMut(usize, u64) -> Option, + ) -> Option; } -/// Load a 128-bit vector from slice at the given position. The slice does -/// not need to be unaligned. -/// -/// Since this code assumes little-endian (there is no big-endian x86), the -/// bytes starting in `slice[at..]` will be at the least significant bits of -/// the returned vector. This is important for the surrounding code, since for -/// example, shifting the resulting vector right is equivalent to logically -/// shifting the bytes in `slice` left. -#[target_feature(enable = "sse2")] -pub unsafe fn loadu128(slice: &[u8], at: usize) -> __m128i { - let ptr = slice.get_unchecked(at..).as_ptr(); - _mm_loadu_si128(ptr as *const u8 as *const __m128i) +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +mod x86_64_ssse3 { + use core::arch::x86_64::*; + + use crate::util::int::{I32, I8}; + + use super::Vector; + + impl Vector for __m128i { + const BITS: usize = 128; + const BYTES: usize = 16; + + #[inline(always)] + unsafe fn splat(byte: u8) -> __m128i { + _mm_set1_epi8(i8::from_bits(byte)) + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> __m128i { + _mm_loadu_si128(data.cast::<__m128i>()) + } + + #[inline(always)] + unsafe fn is_zero(self) -> bool { + let cmp = self.cmpeq(Self::splat(0)); + _mm_movemask_epi8(cmp).to_bits() == 0xFFFF + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> __m128i { + _mm_cmpeq_epi8(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> __m128i { + _mm_and_si128(self, vector2) + } + + #[inline(always)] + unsafe fn or(self, vector2: Self) -> __m128i { + _mm_or_si128(self, vector2) + } + + #[inline(always)] + unsafe fn shift_8bit_lane_right(self) -> Self { + // Apparently there is no _mm_srli_epi8, so we emulate it by + // shifting 16-bit integers and masking out the high nybble of each + // 8-bit lane (since that nybble will contain bits from the low + // nybble of the previous lane). + let lomask = Self::splat(0xF); + _mm_srli_epi16(self, BITS).and(lomask) + } + + #[inline(always)] + unsafe fn shift_in_one_byte(self, vector2: Self) -> Self { + _mm_alignr_epi8(self, vector2, 15) + } + + #[inline(always)] + unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self { + _mm_alignr_epi8(self, vector2, 14) + } + + #[inline(always)] + unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self { + _mm_alignr_epi8(self, vector2, 13) + } + + #[inline(always)] + unsafe fn shuffle_bytes(self, indices: Self) -> Self { + _mm_shuffle_epi8(self, indices) + } + + #[inline(always)] + unsafe fn for_each_64bit_lane( + self, + mut f: impl FnMut(usize, u64) -> Option, + ) -> Option { + // We could just use _mm_extract_epi64 here, but that requires + // SSE 4.1. It isn't necessarily a problem to just require SSE 4.1, + // but everything else works with SSSE3 so we stick to that subset. + let lanes: [u64; 2] = core::mem::transmute(self); + if let Some(t) = f(0, lanes[0]) { + return Some(t); + } + if let Some(t) = f(1, lanes[1]) { + return Some(t); + } + None + } + } } -/// Load a 256-bit vector from slice at the given position. The slice does -/// not need to be unaligned. -/// -/// Since this code assumes little-endian (there is no big-endian x86), the -/// bytes starting in `slice[at..]` will be at the least significant bits of -/// the returned vector. This is important for the surrounding code, since for -/// example, shifting the resulting vector right is equivalent to logically -/// shifting the bytes in `slice` left. -#[target_feature(enable = "avx2")] -pub unsafe fn loadu256(slice: &[u8], at: usize) -> __m256i { - let ptr = slice.get_unchecked(at..).as_ptr(); - _mm256_loadu_si256(ptr as *const u8 as *const __m256i) +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +mod x86_64_avx2 { + use core::arch::x86_64::*; + + use crate::util::int::{I32, I64, I8}; + + use super::{FatVector, Vector}; + + impl Vector for __m256i { + const BITS: usize = 256; + const BYTES: usize = 32; + + #[inline(always)] + unsafe fn splat(byte: u8) -> __m256i { + _mm256_set1_epi8(i8::from_bits(byte)) + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> __m256i { + _mm256_loadu_si256(data.cast::<__m256i>()) + } + + #[inline(always)] + unsafe fn is_zero(self) -> bool { + let cmp = self.cmpeq(Self::splat(0)); + _mm256_movemask_epi8(cmp).to_bits() == 0xFFFFFFFF + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> __m256i { + _mm256_cmpeq_epi8(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> __m256i { + _mm256_and_si256(self, vector2) + } + + #[inline(always)] + unsafe fn or(self, vector2: Self) -> __m256i { + _mm256_or_si256(self, vector2) + } + + #[inline(always)] + unsafe fn shift_8bit_lane_right(self) -> Self { + let lomask = Self::splat(0xF); + _mm256_srli_epi16(self, BITS).and(lomask) + } + + #[inline(always)] + unsafe fn shift_in_one_byte(self, vector2: Self) -> Self { + // Credit goes to jneem for figuring this out: + // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184 + // + // TL;DR avx2's PALIGNR instruction is actually just two 128-bit + // PALIGNR instructions, which is not what we want, so we need to + // do some extra shuffling. + let v = _mm256_permute2x128_si256(vector2, self, 0x21); + _mm256_alignr_epi8(self, v, 15) + } + + #[inline(always)] + unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self { + // Credit goes to jneem for figuring this out: + // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184 + // + // TL;DR avx2's PALIGNR instruction is actually just two 128-bit + // PALIGNR instructions, which is not what we want, so we need to + // do some extra shuffling. + let v = _mm256_permute2x128_si256(vector2, self, 0x21); + _mm256_alignr_epi8(self, v, 14) + } + + #[inline(always)] + unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self { + // Credit goes to jneem for figuring this out: + // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184 + // + // TL;DR avx2's PALIGNR instruction is actually just two 128-bit + // PALIGNR instructions, which is not what we want, so we need to + // do some extra shuffling. + let v = _mm256_permute2x128_si256(vector2, self, 0x21); + _mm256_alignr_epi8(self, v, 13) + } + + #[inline(always)] + unsafe fn shuffle_bytes(self, indices: Self) -> Self { + _mm256_shuffle_epi8(self, indices) + } + + #[inline(always)] + unsafe fn for_each_64bit_lane( + self, + mut f: impl FnMut(usize, u64) -> Option, + ) -> Option { + // NOTE: At one point in the past, I used transmute to this to + // get a [u64; 4], but it turned out to lead to worse codegen IIRC. + // I've tried it more recently, and it looks like that's no longer + // the case. But since there's no difference, we stick with the + // slightly more complicated but transmute-free version. + let lane = _mm256_extract_epi64(self, 0).to_bits(); + if let Some(t) = f(0, lane) { + return Some(t); + } + let lane = _mm256_extract_epi64(self, 1).to_bits(); + if let Some(t) = f(1, lane) { + return Some(t); + } + let lane = _mm256_extract_epi64(self, 2).to_bits(); + if let Some(t) = f(2, lane) { + return Some(t); + } + let lane = _mm256_extract_epi64(self, 3).to_bits(); + if let Some(t) = f(3, lane) { + return Some(t); + } + None + } + } + + impl FatVector for __m256i { + type Half = __m128i; + + #[inline(always)] + unsafe fn load_half_unaligned(data: *const u8) -> Self { + let half = Self::Half::load_unaligned(data); + _mm256_broadcastsi128_si256(half) + } + + #[inline(always)] + unsafe fn half_shift_in_one_byte(self, vector2: Self) -> Self { + _mm256_alignr_epi8(self, vector2, 15) + } + + #[inline(always)] + unsafe fn half_shift_in_two_bytes(self, vector2: Self) -> Self { + _mm256_alignr_epi8(self, vector2, 14) + } + + #[inline(always)] + unsafe fn half_shift_in_three_bytes(self, vector2: Self) -> Self { + _mm256_alignr_epi8(self, vector2, 13) + } + + #[inline(always)] + unsafe fn swap_halves(self) -> Self { + _mm256_permute4x64_epi64(self, 0x4E) + } + + #[inline(always)] + unsafe fn interleave_low_8bit_lanes(self, vector2: Self) -> Self { + _mm256_unpacklo_epi8(self, vector2) + } + + #[inline(always)] + unsafe fn interleave_high_8bit_lanes(self, vector2: Self) -> Self { + _mm256_unpackhi_epi8(self, vector2) + } + + #[inline(always)] + unsafe fn for_each_low_64bit_lane( + self, + vector2: Self, + mut f: impl FnMut(usize, u64) -> Option, + ) -> Option { + let lane = _mm256_extract_epi64(self, 0).to_bits(); + if let Some(t) = f(0, lane) { + return Some(t); + } + let lane = _mm256_extract_epi64(self, 1).to_bits(); + if let Some(t) = f(1, lane) { + return Some(t); + } + let lane = _mm256_extract_epi64(vector2, 0).to_bits(); + if let Some(t) = f(2, lane) { + return Some(t); + } + let lane = _mm256_extract_epi64(vector2, 1).to_bits(); + if let Some(t) = f(3, lane) { + return Some(t); + } + None + } + } } -/// Returns a 128-bit vector with all bits set to 0. -#[target_feature(enable = "sse2")] -pub unsafe fn zeroes128() -> __m128i { - _mm_set1_epi8(0) +#[cfg(target_arch = "aarch64")] +mod aarch64_neon { + use core::arch::aarch64::*; + + use super::Vector; + + impl Vector for uint8x16_t { + const BITS: usize = 128; + const BYTES: usize = 16; + + #[inline(always)] + unsafe fn splat(byte: u8) -> uint8x16_t { + vdupq_n_u8(byte) + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> uint8x16_t { + vld1q_u8(data) + } + + #[inline(always)] + unsafe fn is_zero(self) -> bool { + // Could also use vmaxvq_u8. + // ... I tried that and couldn't observe any meaningful difference + // in benchmarks. + let maxes = vreinterpretq_u64_u8(vpmaxq_u8(self, self)); + vgetq_lane_u64(maxes, 0) == 0 + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> uint8x16_t { + vceqq_u8(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> uint8x16_t { + vandq_u8(self, vector2) + } + + #[inline(always)] + unsafe fn or(self, vector2: Self) -> uint8x16_t { + vorrq_u8(self, vector2) + } + + #[inline(always)] + unsafe fn shift_8bit_lane_right(self) -> Self { + debug_assert!(BITS <= 7); + vshrq_n_u8(self, BITS) + } + + #[inline(always)] + unsafe fn shift_in_one_byte(self, vector2: Self) -> Self { + vextq_u8(vector2, self, 15) + } + + #[inline(always)] + unsafe fn shift_in_two_bytes(self, vector2: Self) -> Self { + vextq_u8(vector2, self, 14) + } + + #[inline(always)] + unsafe fn shift_in_three_bytes(self, vector2: Self) -> Self { + vextq_u8(vector2, self, 13) + } + + #[inline(always)] + unsafe fn shuffle_bytes(self, indices: Self) -> Self { + vqtbl1q_u8(self, indices) + } + + #[inline(always)] + unsafe fn for_each_64bit_lane( + self, + mut f: impl FnMut(usize, u64) -> Option, + ) -> Option { + let this = vreinterpretq_u64_u8(self); + let lane = vgetq_lane_u64(this, 0); + if let Some(t) = f(0, lane) { + return Some(t); + } + let lane = vgetq_lane_u64(this, 1); + if let Some(t) = f(1, lane) { + return Some(t); + } + None + } + } } -/// Returns a 256-bit vector with all bits set to 0. -#[target_feature(enable = "avx2")] -pub unsafe fn zeroes256() -> __m256i { - _mm256_set1_epi8(0) +#[cfg(all(test, target_arch = "x86_64", target_feature = "sse2"))] +mod tests_x86_64_ssse3 { + use core::arch::x86_64::*; + + use crate::util::int::{I32, U32}; + + use super::*; + + fn is_runnable() -> bool { + std::is_x86_feature_detected!("ssse3") + } + + #[target_feature(enable = "ssse3")] + unsafe fn load(lanes: [u8; 16]) -> __m128i { + __m128i::load_unaligned(&lanes as *const u8) + } + + #[target_feature(enable = "ssse3")] + unsafe fn unload(v: __m128i) -> [u8; 16] { + [ + _mm_extract_epi8(v, 0).to_bits().low_u8(), + _mm_extract_epi8(v, 1).to_bits().low_u8(), + _mm_extract_epi8(v, 2).to_bits().low_u8(), + _mm_extract_epi8(v, 3).to_bits().low_u8(), + _mm_extract_epi8(v, 4).to_bits().low_u8(), + _mm_extract_epi8(v, 5).to_bits().low_u8(), + _mm_extract_epi8(v, 6).to_bits().low_u8(), + _mm_extract_epi8(v, 7).to_bits().low_u8(), + _mm_extract_epi8(v, 8).to_bits().low_u8(), + _mm_extract_epi8(v, 9).to_bits().low_u8(), + _mm_extract_epi8(v, 10).to_bits().low_u8(), + _mm_extract_epi8(v, 11).to_bits().low_u8(), + _mm_extract_epi8(v, 12).to_bits().low_u8(), + _mm_extract_epi8(v, 13).to_bits().low_u8(), + _mm_extract_epi8(v, 14).to_bits().low_u8(), + _mm_extract_epi8(v, 15).to_bits().low_u8(), + ] + } + + #[test] + fn vector_splat() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v = __m128i::splat(0xAF); + assert_eq!( + unload(v), + [ + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_is_zero() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert!(!v.is_zero()); + let v = load([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert!(v.is_zero()); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_cmpeq() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1]); + let v2 = + load([16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]); + assert_eq!( + unload(v1.cmpeq(v2)), + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_and() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v1 = + load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let v2 = + load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!( + unload(v1.and(v2)), + [0, 0, 0, 0, 0, 0b1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_or() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v1 = + load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let v2 = + load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!( + unload(v1.or(v2)), + [0, 0, 0, 0, 0, 0b1011, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_8bit_lane_right() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v = load([ + 0, 0, 0, 0, 0b1011, 0b0101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + assert_eq!( + unload(v.shift_8bit_lane_right::<2>()), + [0, 0, 0, 0, 0b0010, 0b0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_one_byte() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = load([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.shift_in_one_byte(v2)), + [32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_two_bytes() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = load([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.shift_in_two_bytes(v2)), + [31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_three_bytes() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = load([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.shift_in_three_bytes(v2)), + [30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shuffle_bytes() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = + load([0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12]); + assert_eq!( + unload(v1.shuffle_bytes(v2)), + [1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_for_each_64bit_lane() { + #[target_feature(enable = "ssse3")] + unsafe fn test() { + let v = load([ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, + 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, + ]); + let mut lanes = [0u64; 2]; + v.for_each_64bit_lane(|i, lane| { + lanes[i] = lane; + None::<()> + }); + assert_eq!(lanes, [0x0807060504030201, 0x100F0E0D0C0B0A09],); + } + if !is_runnable() { + return; + } + unsafe { test() } + } } -/// Returns a 128-bit vector with all bits set to 1. -#[target_feature(enable = "sse2")] -pub unsafe fn ones128() -> __m128i { - _mm_set1_epi8(0xFF as u8 as i8) +#[cfg(all(test, target_arch = "x86_64", target_feature = "sse2"))] +mod tests_x86_64_avx2 { + use core::arch::x86_64::*; + + use crate::util::int::{I32, U32}; + + use super::*; + + fn is_runnable() -> bool { + std::is_x86_feature_detected!("avx2") + } + + #[target_feature(enable = "avx2")] + unsafe fn load(lanes: [u8; 32]) -> __m256i { + __m256i::load_unaligned(&lanes as *const u8) + } + + #[target_feature(enable = "avx2")] + unsafe fn load_half(lanes: [u8; 16]) -> __m256i { + __m256i::load_half_unaligned(&lanes as *const u8) + } + + #[target_feature(enable = "avx2")] + unsafe fn unload(v: __m256i) -> [u8; 32] { + [ + _mm256_extract_epi8(v, 0).to_bits().low_u8(), + _mm256_extract_epi8(v, 1).to_bits().low_u8(), + _mm256_extract_epi8(v, 2).to_bits().low_u8(), + _mm256_extract_epi8(v, 3).to_bits().low_u8(), + _mm256_extract_epi8(v, 4).to_bits().low_u8(), + _mm256_extract_epi8(v, 5).to_bits().low_u8(), + _mm256_extract_epi8(v, 6).to_bits().low_u8(), + _mm256_extract_epi8(v, 7).to_bits().low_u8(), + _mm256_extract_epi8(v, 8).to_bits().low_u8(), + _mm256_extract_epi8(v, 9).to_bits().low_u8(), + _mm256_extract_epi8(v, 10).to_bits().low_u8(), + _mm256_extract_epi8(v, 11).to_bits().low_u8(), + _mm256_extract_epi8(v, 12).to_bits().low_u8(), + _mm256_extract_epi8(v, 13).to_bits().low_u8(), + _mm256_extract_epi8(v, 14).to_bits().low_u8(), + _mm256_extract_epi8(v, 15).to_bits().low_u8(), + _mm256_extract_epi8(v, 16).to_bits().low_u8(), + _mm256_extract_epi8(v, 17).to_bits().low_u8(), + _mm256_extract_epi8(v, 18).to_bits().low_u8(), + _mm256_extract_epi8(v, 19).to_bits().low_u8(), + _mm256_extract_epi8(v, 20).to_bits().low_u8(), + _mm256_extract_epi8(v, 21).to_bits().low_u8(), + _mm256_extract_epi8(v, 22).to_bits().low_u8(), + _mm256_extract_epi8(v, 23).to_bits().low_u8(), + _mm256_extract_epi8(v, 24).to_bits().low_u8(), + _mm256_extract_epi8(v, 25).to_bits().low_u8(), + _mm256_extract_epi8(v, 26).to_bits().low_u8(), + _mm256_extract_epi8(v, 27).to_bits().low_u8(), + _mm256_extract_epi8(v, 28).to_bits().low_u8(), + _mm256_extract_epi8(v, 29).to_bits().low_u8(), + _mm256_extract_epi8(v, 30).to_bits().low_u8(), + _mm256_extract_epi8(v, 31).to_bits().low_u8(), + ] + } + + #[test] + fn vector_splat() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v = __m256i::splat(0xAF); + assert_eq!( + unload(v), + [ + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_is_zero() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v = load([ + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + assert!(!v.is_zero()); + let v = load([ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + assert!(v.is_zero()); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_cmpeq() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 1, + ]); + let v2 = load([ + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, + 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + ]); + assert_eq!( + unload(v1.cmpeq(v2)), + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_and() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + let v2 = load([ + 0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + assert_eq!( + unload(v1.and(v2)), + [ + 0, 0, 0, 0, 0, 0b1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_or() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + let v2 = load([ + 0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + assert_eq!( + unload(v1.or(v2)), + [ + 0, 0, 0, 0, 0, 0b1011, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_8bit_lane_right() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v = load([ + 0, 0, 0, 0, 0b1011, 0b0101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + assert_eq!( + unload(v.shift_8bit_lane_right::<2>()), + [ + 0, 0, 0, 0, 0b0010, 0b0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_one_byte() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + let v2 = load([ + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, + ]); + assert_eq!( + unload(v1.shift_in_one_byte(v2)), + [ + 64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_two_bytes() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + let v2 = load([ + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, + ]); + assert_eq!( + unload(v1.shift_in_two_bytes(v2)), + [ + 63, 64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_three_bytes() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + let v2 = load([ + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, + ]); + assert_eq!( + unload(v1.shift_in_three_bytes(v2)), + [ + 62, 63, 64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 29, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_shuffle_bytes() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + let v2 = load([ + 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12, 16, 16, + 16, 16, 20, 20, 20, 20, 24, 24, 24, 24, 28, 28, 28, 28, + ]); + assert_eq!( + unload(v1.shuffle_bytes(v2)), + [ + 1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13, 17, + 17, 17, 17, 21, 21, 21, 21, 25, 25, 25, 25, 29, 29, 29, + 29 + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn vector_for_each_64bit_lane() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v = load([ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, + 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, + 0x1F, 0x20, + ]); + let mut lanes = [0u64; 4]; + v.for_each_64bit_lane(|i, lane| { + lanes[i] = lane; + None::<()> + }); + assert_eq!( + lanes, + [ + 0x0807060504030201, + 0x100F0E0D0C0B0A09, + 0x1817161514131211, + 0x201F1E1D1C1B1A19 + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn fat_vector_half_shift_in_one_byte() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load_half([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ]); + let v2 = load_half([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.half_shift_in_one_byte(v2)), + [ + 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn fat_vector_half_shift_in_two_bytes() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load_half([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ]); + let v2 = load_half([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.half_shift_in_two_bytes(v2)), + [ + 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 31, + 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn fat_vector_half_shift_in_three_bytes() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load_half([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + ]); + let v2 = load_half([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.half_shift_in_three_bytes(v2)), + [ + 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 30, + 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn fat_vector_swap_halves() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v.swap_halves()), + [ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn fat_vector_interleave_low_8bit_lanes() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + let v2 = load([ + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, + ]); + assert_eq!( + unload(v1.interleave_low_8bit_lanes(v2)), + [ + 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, 8, 40, + 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55, + 24, 56, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn fat_vector_interleave_high_8bit_lanes() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + let v2 = load([ + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, + ]); + assert_eq!( + unload(v1.interleave_high_8bit_lanes(v2)), + [ + 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47, 16, + 48, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, + 63, 32, 64, + ], + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } + + #[test] + fn fat_vector_for_each_low_64bit_lane() { + #[target_feature(enable = "avx2")] + unsafe fn test() { + let v1 = load([ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, + 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, + 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, + 0x1F, 0x20, + ]); + let v2 = load([ + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, + 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, + 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, + 0x3F, 0x40, + ]); + let mut lanes = [0u64; 4]; + v1.for_each_low_64bit_lane(v2, |i, lane| { + lanes[i] = lane; + None::<()> + }); + assert_eq!( + lanes, + [ + 0x0807060504030201, + 0x100F0E0D0C0B0A09, + 0x2827262524232221, + 0x302F2E2D2C2B2A29 + ] + ); + } + if !is_runnable() { + return; + } + unsafe { test() } + } } -/// Returns a 256-bit vector with all bits set to 1. -#[target_feature(enable = "avx2")] -pub unsafe fn ones256() -> __m256i { - _mm256_set1_epi8(0xFF as u8 as i8) +#[cfg(all(test, target_arch = "aarch64", target_feature = "neon"))] +mod tests_aarch64_neon { + use core::arch::aarch64::*; + + use super::*; + + #[target_feature(enable = "neon")] + unsafe fn load(lanes: [u8; 16]) -> uint8x16_t { + uint8x16_t::load_unaligned(&lanes as *const u8) + } + + #[target_feature(enable = "neon")] + unsafe fn unload(v: uint8x16_t) -> [u8; 16] { + [ + vgetq_lane_u8(v, 0), + vgetq_lane_u8(v, 1), + vgetq_lane_u8(v, 2), + vgetq_lane_u8(v, 3), + vgetq_lane_u8(v, 4), + vgetq_lane_u8(v, 5), + vgetq_lane_u8(v, 6), + vgetq_lane_u8(v, 7), + vgetq_lane_u8(v, 8), + vgetq_lane_u8(v, 9), + vgetq_lane_u8(v, 10), + vgetq_lane_u8(v, 11), + vgetq_lane_u8(v, 12), + vgetq_lane_u8(v, 13), + vgetq_lane_u8(v, 14), + vgetq_lane_u8(v, 15), + ] + } + + // Example functions. These don't test the Vector traits, but rather, + // specific NEON instructions. They are basically little experiments I + // wrote to figure out what an instruction does since their descriptions + // are so dense. I decided to keep the experiments around as example tests + // in case there' useful. + + #[test] + fn example_vmaxvq_u8_non_zero() { + #[target_feature(enable = "neon")] + unsafe fn example() { + let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(vmaxvq_u8(v), 1); + } + unsafe { example() } + } + + #[test] + fn example_vmaxvq_u8_zero() { + #[target_feature(enable = "neon")] + unsafe fn example() { + let v = load([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(vmaxvq_u8(v), 0); + } + unsafe { example() } + } + + #[test] + fn example_vpmaxq_u8_non_zero() { + #[target_feature(enable = "neon")] + unsafe fn example() { + let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let r = vpmaxq_u8(v, v); + assert_eq!( + unload(r), + [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0] + ); + } + unsafe { example() } + } + + #[test] + fn example_vpmaxq_u8_self() { + #[target_feature(enable = "neon")] + unsafe fn example() { + let v = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let r = vpmaxq_u8(v, v); + assert_eq!( + unload(r), + [2, 4, 6, 8, 10, 12, 14, 16, 2, 4, 6, 8, 10, 12, 14, 16] + ); + } + unsafe { example() } + } + + #[test] + fn example_vpmaxq_u8_other() { + #[target_feature(enable = "neon")] + unsafe fn example() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = load([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + let r = vpmaxq_u8(v1, v2); + assert_eq!( + unload(r), + [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32] + ); + } + unsafe { example() } + } + + // Now we test the actual methods on the Vector trait. + + #[test] + fn vector_splat() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v = uint8x16_t::splat(0xAF); + assert_eq!( + unload(v), + [ + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, + 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF, 0xAF + ] + ); + } + unsafe { test() } + } + + #[test] + fn vector_is_zero() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v = load([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert!(!v.is_zero()); + let v = load([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert!(v.is_zero()); + } + unsafe { test() } + } + + #[test] + fn vector_cmpeq() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1]); + let v2 = + load([16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]); + assert_eq!( + unload(v1.cmpeq(v2)), + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF] + ); + } + unsafe { test() } + } + + #[test] + fn vector_and() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v1 = + load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let v2 = + load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!( + unload(v1.and(v2)), + [0, 0, 0, 0, 0, 0b1000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + } + unsafe { test() } + } + + #[test] + fn vector_or() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v1 = + load([0, 0, 0, 0, 0, 0b1001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + let v2 = + load([0, 0, 0, 0, 0, 0b1010, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!( + unload(v1.or(v2)), + [0, 0, 0, 0, 0, 0b1011, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + } + unsafe { test() } + } + + #[test] + fn vector_shift_8bit_lane_right() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v = load([ + 0, 0, 0, 0, 0b1011, 0b0101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]); + assert_eq!( + unload(v.shift_8bit_lane_right::<2>()), + [0, 0, 0, 0, 0b0010, 0b0001, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_one_byte() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = load([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.shift_in_one_byte(v2)), + [32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + ); + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_two_bytes() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = load([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.shift_in_two_bytes(v2)), + [31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], + ); + } + unsafe { test() } + } + + #[test] + fn vector_shift_in_three_bytes() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = load([ + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + ]); + assert_eq!( + unload(v1.shift_in_three_bytes(v2)), + [30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + ); + } + unsafe { test() } + } + + #[test] + fn vector_shuffle_bytes() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v1 = + load([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]); + let v2 = + load([0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12]); + assert_eq!( + unload(v1.shuffle_bytes(v2)), + [1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13], + ); + } + unsafe { test() } + } + + #[test] + fn vector_for_each_64bit_lane() { + #[target_feature(enable = "neon")] + unsafe fn test() { + let v = load([ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, + 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, + ]); + let mut lanes = [0u64; 2]; + v.for_each_64bit_lane(|i, lane| { + lanes[i] = lane; + None::<()> + }); + assert_eq!(lanes, [0x0807060504030201, 0x100F0E0D0C0B0A09],); + } + unsafe { test() } + } } diff --git a/vendor/aho-corasick/src/util/int.rs b/vendor/aho-corasick/src/util/int.rs index 1412aa5..28ede7a 100644 --- a/vendor/aho-corasick/src/util/int.rs +++ b/vendor/aho-corasick/src/util/int.rs @@ -118,6 +118,33 @@ impl U64 for u64 { } } +pub(crate) trait I8 { + fn as_usize(self) -> usize; + fn to_bits(self) -> u8; + fn from_bits(n: u8) -> i8; +} + +impl I8 for i8 { + fn as_usize(self) -> usize { + #[cfg(debug_assertions)] + { + usize::try_from(self).expect("i8 overflowed usize") + } + #[cfg(not(debug_assertions))] + { + self as usize + } + } + + fn to_bits(self) -> u8 { + self as u8 + } + + fn from_bits(n: u8) -> i8 { + n as i8 + } +} + pub(crate) trait I32 { fn as_usize(self) -> usize; fn to_bits(self) -> u32; @@ -145,6 +172,33 @@ impl I32 for i32 { } } +pub(crate) trait I64 { + fn as_usize(self) -> usize; + fn to_bits(self) -> u64; + fn from_bits(n: u64) -> i64; +} + +impl I64 for i64 { + fn as_usize(self) -> usize { + #[cfg(debug_assertions)] + { + usize::try_from(self).expect("i64 overflowed usize") + } + #[cfg(not(debug_assertions))] + { + self as usize + } + } + + fn to_bits(self) -> u64 { + self as u64 + } + + fn from_bits(n: u64) -> i64 { + n as i64 + } +} + pub(crate) trait Usize { fn as_u8(self) -> u8; fn as_u16(self) -> u16; diff --git a/vendor/aho-corasick/src/util/prefilter.rs b/vendor/aho-corasick/src/util/prefilter.rs index fc63004..f5ddc75 100644 --- a/vendor/aho-corasick/src/util/prefilter.rs +++ b/vendor/aho-corasick/src/util/prefilter.rs @@ -162,6 +162,7 @@ impl Builder { /// builder before attempting to construct the prefilter. pub(crate) fn build(&self) -> Option { if !self.enabled { + debug!("prefilter not enabled, skipping"); return None; } // If we only have one pattern, then deferring to memmem is always @@ -173,15 +174,55 @@ impl Builder { // them. if !self.ascii_case_insensitive { if let Some(pre) = self.memmem.build() { + debug!("using memmem prefilter"); return Some(pre); } } + let (packed, patlen, minlen) = if self.ascii_case_insensitive { + (None, usize::MAX, 0) + } else { + let patlen = self.packed.as_ref().map_or(usize::MAX, |p| p.len()); + let minlen = self.packed.as_ref().map_or(0, |p| p.minimum_len()); + let packed = + self.packed.as_ref().and_then(|b| b.build()).map(|s| { + let memory_usage = s.memory_usage(); + debug!( + "built packed prefilter (len: {}, \ + minimum pattern len: {}, memory usage: {}) \ + for consideration", + patlen, minlen, memory_usage, + ); + Prefilter { finder: Arc::new(Packed(s)), memory_usage } + }); + (packed, patlen, minlen) + }; match (self.start_bytes.build(), self.rare_bytes.build()) { // If we could build both start and rare prefilters, then there are // a few cases in which we'd want to use the start-byte prefilter // over the rare-byte prefilter, since the former has lower // overhead. (prestart @ Some(_), prerare @ Some(_)) => { + debug!( + "both start (len={}, rank={}) and \ + rare (len={}, rank={}) byte prefilters \ + are available", + self.start_bytes.count, + self.start_bytes.rank_sum, + self.rare_bytes.count, + self.rare_bytes.rank_sum, + ); + if patlen <= 16 + && minlen >= 2 + && self.start_bytes.count >= 3 + && self.rare_bytes.count >= 3 + { + debug!( + "start and rare byte prefilters available, but \ + they're probably slower than packed so using \ + packed" + ); + return packed; + } // If the start-byte prefilter can scan for a smaller number // of bytes than the rare-byte prefilter, then it's probably // faster. @@ -196,20 +237,69 @@ impl Builder { // prefer the start-byte prefilter when we can. let has_rarer_bytes = self.start_bytes.rank_sum <= self.rare_bytes.rank_sum + 50; - if has_fewer_bytes || has_rarer_bytes { + if has_fewer_bytes { + debug!( + "using start byte prefilter because it has fewer + bytes to search for than the rare byte prefilter", + ); + prestart + } else if has_rarer_bytes { + debug!( + "using start byte prefilter because its byte \ + frequency rank was determined to be \ + \"good enough\" relative to the rare byte prefilter \ + byte frequency rank", + ); prestart } else { + debug!("using rare byte prefilter"); prerare } } - (prestart @ Some(_), None) => prestart, - (None, prerare @ Some(_)) => prerare, - (None, None) if self.ascii_case_insensitive => None, + (prestart @ Some(_), None) => { + if patlen <= 16 && minlen >= 2 && self.start_bytes.count >= 3 { + debug!( + "start byte prefilter available, but \ + it's probably slower than packed so using \ + packed" + ); + return packed; + } + debug!( + "have start byte prefilter but not rare byte prefilter, \ + so using start byte prefilter", + ); + prestart + } + (None, prerare @ Some(_)) => { + if patlen <= 16 && minlen >= 2 && self.rare_bytes.count >= 3 { + debug!( + "rare byte prefilter available, but \ + it's probably slower than packed so using \ + packed" + ); + return packed; + } + debug!( + "have rare byte prefilter but not start byte prefilter, \ + so using rare byte prefilter", + ); + prerare + } + (None, None) if self.ascii_case_insensitive => { + debug!( + "no start or rare byte prefilter and ASCII case \ + insensitivity was enabled, so skipping prefilter", + ); + None + } (None, None) => { - self.packed.as_ref().and_then(|b| b.build()).map(|s| { - let memory_usage = s.memory_usage(); - Prefilter { finder: Arc::new(Packed(s)), memory_usage } - }) + if packed.is_some() { + debug!("falling back to packed prefilter"); + } else { + debug!("no prefilter available"); + } + packed } } } diff --git a/vendor/itoa/.cargo-checksum.json b/vendor/itoa/.cargo-checksum.json index 8f2af5b..efd76a8 100644 --- a/vendor/itoa/.cargo-checksum.json +++ b/vendor/itoa/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"0ab21af5f24b50e34aa9e2cc6ae7339b869271787d4aca54de1f6c36fb20081c","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"48573443063fa4e0786c3b46f42b6efd1f171c6b73408a64afc1b34de89f31fe","benches/bench.rs":"636f3093bd461210ad3063289d455f90669c4a1be3273bcd30898de39f02c641","src/lib.rs":"9bf12618207e123f879537afa11d74cd03baffe31f8b7ae7fe87f9335b759c80","src/udiv128.rs":"d28c1872c37ee2185931babcb20a221b8706a5aa8abc4963419763888023ff17","tests/test.rs":"f7404fc5f7cd1bdaf74a3b64a70d5b30586241ddc1ce2c82bd1b564999fcce0e"},"package":"453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"} \ No newline at end of file +{"files":{"Cargo.toml":"fc26fe5442702edfa4cd1a9b0330aa51043d9f7b6a653decddabdf930543a424","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"48573443063fa4e0786c3b46f42b6efd1f171c6b73408a64afc1b34de89f31fe","benches/bench.rs":"636f3093bd461210ad3063289d455f90669c4a1be3273bcd30898de39f02c641","src/lib.rs":"c4c7f2e0bb9cb5090837629bfa8752e5b6cd537c98947e6ddd9467c81460ea51","src/udiv128.rs":"d28c1872c37ee2185931babcb20a221b8706a5aa8abc4963419763888023ff17","tests/test.rs":"aa1e910573a1d847d39773b4a2e4c597a8d3810070332673df0f6864cab24807"},"package":"b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"} \ No newline at end of file diff --git a/vendor/itoa/Cargo.toml b/vendor/itoa/Cargo.toml index 6c2b1c6..889aa5d 100644 --- a/vendor/itoa/Cargo.toml +++ b/vendor/itoa/Cargo.toml @@ -13,7 +13,7 @@ edition = "2018" rust-version = "1.36" name = "itoa" -version = "1.0.6" +version = "1.0.10" authors = ["David Tolnay "] exclude = [ "performance.png", @@ -26,11 +26,13 @@ keywords = ["integer"] categories = [ "value-formatting", "no-std", + "no-std::no-alloc", ] license = "MIT OR Apache-2.0" repository = "https://github.com/dtolnay/itoa" [package.metadata.docs.rs] +rustdoc-args = ["--generate-link-to-definition"] targets = ["x86_64-unknown-linux-gnu"] [lib] diff --git a/vendor/itoa/src/lib.rs b/vendor/itoa/src/lib.rs index fa0d990..e2fad15 100644 --- a/vendor/itoa/src/lib.rs +++ b/vendor/itoa/src/lib.rs @@ -30,12 +30,14 @@ //! //! ![performance](https://raw.githubusercontent.com/dtolnay/itoa/master/performance.png) -#![doc(html_root_url = "https://docs.rs/itoa/1.0.6")] +#![doc(html_root_url = "https://docs.rs/itoa/1.0.10")] #![no_std] #![allow( clippy::cast_lossless, clippy::cast_possible_truncation, + clippy::expl_impl_clone_on_copy, clippy::must_use_candidate, + clippy::needless_doctest_main, clippy::unreadable_literal )] @@ -67,8 +69,11 @@ impl Default for Buffer { } } +impl Copy for Buffer {} + impl Clone for Buffer { #[inline] + #[allow(clippy::non_canonical_clone_impl)] // false positive https://github.com/rust-lang/rust-clippy/issues/11072 fn clone(&self) -> Self { Buffer::new() } diff --git a/vendor/itoa/tests/test.rs b/vendor/itoa/tests/test.rs index 1d7e8cb..f8275d6 100644 --- a/vendor/itoa/tests/test.rs +++ b/vendor/itoa/tests/test.rs @@ -26,4 +26,5 @@ test! { test_u128_0(0u128, "0") test_u128_max(u128::max_value(), "340282366920938463463374607431768211455") test_i128_min(i128::min_value(), "-170141183460469231731687303715884105728") + test_i128_max(i128::max_value(), "170141183460469231731687303715884105727") } diff --git a/vendor/memchr/.cargo-checksum.json b/vendor/memchr/.cargo-checksum.json index f496380..4b915f3 100644 --- a/vendor/memchr/.cargo-checksum.json +++ b/vendor/memchr/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"fdeda7d32fa12e4a1589d13c74ae5fd4f1065d0219ba73f8492e28248d84d146","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"51d941627e004588863b137918e908e34c4d599d12e03afd3e489e2bb61e3704","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","build.rs":"5638d9b60d40f44db96767ce32246de42158571364cce92531a85307ac7eda6c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","scripts/make-byte-frequency-table":"21d1ded41fe5a780507bb88e1910d471b4081cc626a48891a408712e45b7b2bf","src/cow.rs":"a23c3b009e5215b5c3ac46627a5dd844235bef0136d76b3fc1eeeb744565c125","src/lib.rs":"9430cd37b13399df8f8c27a752ccdf6422a563e24171d1b4802424f9193a8f37","src/memchr/c.rs":"34f7caf79316f4b03908832fdbd4aff367f2bc30eae291478cc5a0a108ce6e76","src/memchr/fallback.rs":"48764f18b7ff1f00a9ac1c4ed8ec96ad11f7b09b2d062a8ed3fe81160add627d","src/memchr/iter.rs":"61463e7fa22ca8f212c2cbfb882af0c87b0fb1bc6b4676678a4822a581ec1037","src/memchr/mod.rs":"d5bfc881c7c089e1a0825209a4d21c3f792f38c6f16f3bc715d0d539477376b6","src/memchr/naive.rs":"c7453bc99cc4e58eb37cf5a50c88688833e50a270ee1849baefddb8acc0ccd94","src/memchr/x86/avx.rs":"3c2750174ce7ff033daa4096e7961bbee9a2da898068266b27dee22ef8cfddad","src/memchr/x86/mod.rs":"a642d5aefdb7452ead4ab7946b5c6cfb6cc6df636dcd0ebbd6f5e6e1ac8305c0","src/memchr/x86/sse2.rs":"79ede1aba71a655e86eb5873d682c5da26933bffa4fffd7042a2313f18cf4675","src/memchr/x86/sse42.rs":"de4c6f354dbfec170876cddb8d9157b35928f96ed2339a0c5d094cc953a2f52d","src/memmem/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/memmem/genericsimd.rs":"9ce7283db0994438eb6df2bea6ad984e80512b6f643ebae7ae7d82eb5d39fa11","src/memmem/mod.rs":"949fb8e11a23030d59b34fd8c7c196150f133e909a8448705c77a751c436907d","src/memmem/prefilter/fallback.rs":"d32248c41aa09701c2410c52f948bbe009dd1b13a01b444ce0fb8c4b4e404ede","src/memmem/prefilter/genericsimd.rs":"57d5523cf0299b37ef1dd1b351e3d387d5070f2f7ecffc9a9ca66528101ebd3f","src/memmem/prefilter/mod.rs":"ad8b4ac72c025f11d6b641c5fc0888468112758dcdc6bb72b43f932d2005ea4e","src/memmem/prefilter/wasm.rs":"14f684412fca35445a94760a6973d772dfd22d329ebae3b52b525d2a1f3acd63","src/memmem/prefilter/x86/avx.rs":"e344cae36a88b59c07a1c1d395edeb9c636a399e1528ce69b2bc7c94d8d8bb0b","src/memmem/prefilter/x86/mod.rs":"df2d84b23b22574383c281d33671a121b5faf7b1a48dd6f67c3085cd02cd4498","src/memmem/prefilter/x86/sse.rs":"daa648fc2a90d37299803a80d632e8a47a30ce8719d0ac2a2ea2cde3b30b6fef","src/memmem/rabinkarp.rs":"9b44eb092524a51792eba4deaca6c6d3cbc51db98cb548ea4fa7e5d8988cc71a","src/memmem/rarebytes.rs":"571082c71fc3dca5e4304171d41fb3c44e241df6dcd88bac4d7a15b52f9521e0","src/memmem/twoway.rs":"102f8bbb29696d5656cd2f5a1769a3af96d044fb09972881455cfb6424d6b50a","src/memmem/util.rs":"0194d40b912137e2352863af9cc1c0273baf97fdf6b27799628680846c06febd","src/memmem/vector.rs":"96e6f45f8ad11a822c4f18393839225d7f40f898ad657e109ba1b3288af0ef8f","src/memmem/wasm.rs":"87da03c964f054db30cc972d07a74e8902ec1248e2338ecd1dbac430f43fffc2","src/memmem/x86/avx.rs":"de85dbc415603c844baf94fbc92d676a738dd4b99246be468bd5f7be5921b25f","src/memmem/x86/mod.rs":"5012fca41b91caf229278aa221e8dd514ede497fe4938d64562d03fef2fc46e6","src/memmem/x86/sse.rs":"148a40c0952aca8b16d9eb3e724a5b9b60693bc7b2bcc5209bcc43c94faf560a","src/tests/memchr/iter.rs":"b68c7ecdb6222c5dbf61212e6863f78f98ad343868a74cb8612692fc790240b2","src/tests/memchr/memchr.rs":"09589c5899324c9b26ea4513c80389a2ffdf6ddc460031e2ca8da43bd493ae3f","src/tests/memchr/mod.rs":"29e0855f946c7babf603b3d610a29235a56a26a4c867fef0768542388eac4c95","src/tests/memchr/simple.rs":"b9997903ede972272c01c1750522a20692a28488cc7c5cf745ea83ff96d65fe3","src/tests/memchr/testdata.rs":"3e34377fe60eca3687d1ebc66127bd631af27ceaccc8f08806a293199b69a83f","src/tests/mod.rs":"9054a2a2f9af140f305ca29155d942fafbac9fb0874067611adc8a5990546be4","src/tests/x86_64-soft_float.json":"c0e416487fe9b4809534edb7db2a9eff3453dc40d9f1e23362c37f45a77ec717"},"package":"2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"} \ No newline at end of file +{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"8f521805349c8df78da2ccaeec8b48f2f9fa1667887aaf89950ae555cbf6f8b2","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"92a74aaffe011bdaa06fbc34a01686a6eba58ca1322e976759417a547fddf734","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/arch/aarch64/memchr.rs":"5bb70f915084e629d940dbc322f5b9096b2e658cf63fea8a2f6e7550412e73a0","src/arch/aarch64/mod.rs":"44cd1a614bd66f1e66fc86c541d3c3b8d3a14a644c13e8bf816df3f555eac2d4","src/arch/aarch64/neon/memchr.rs":"e8c00b8fb2c7e2711832ae3cedefe59f32ebedd7dfa4d0ec6de2a566c979daea","src/arch/aarch64/neon/mod.rs":"eab6d56c2b2354db4ee395f40282cd49f97e2ab853547be5de6e65fbe1b2f634","src/arch/aarch64/neon/packedpair.rs":"fbdfdbfaf7b76b234db261fbe55a55c4479d32cdc65a654d60417c2d1c237849","src/arch/all/memchr.rs":"f6c51e50309f80e749887572297aa5efc19a3c7e5434b4404a101b3e6b152be1","src/arch/all/mod.rs":"05f3fc2b069682eb1545fc6366d167bb620a454365dac8b8dd6cde6cd64de18a","src/arch/all/packedpair/default_rank.rs":"abffd1b5b8b7a3be95c03dd1105b905c246a379854dc56f1e846ea7c4408f2c7","src/arch/all/packedpair/mod.rs":"292b66042c5b5c78bba33db6526aeae6904db803d601fcdd29032b87b3eb3754","src/arch/all/rabinkarp.rs":"236f69c04b90c14c253ae6c8d9b78150b4a56df75bb50af6d63b15145668b7cc","src/arch/all/shiftor.rs":"0d79117f52a1e4795843603a3bb0b45397df4ad5e4184bbc923658dab9dc3b5f","src/arch/all/twoway.rs":"47c97a265bfbafde90a618946643d3e97dfd9a85f01aa4ac758cd4c1573a450d","src/arch/generic/memchr.rs":"88290761bab740878401e914d71866da6501cdcef53d1249ec6fda4c7f9c12ae","src/arch/generic/mod.rs":"1dd75f61e0ea2563b8205a08aaa7b55500130aa331d18b9e9f995724b66c7a39","src/arch/generic/packedpair.rs":"a4a6efb29877ced9cf4c4e5ae9f36a79f019a16b831f2b9424899a1513d458ad","src/arch/mod.rs":"6dbd9e0b1b89fecb9faac5df6edfc87e24607e9099136aa831f3f056b14e22db","src/arch/wasm32/memchr.rs":"bfaaeca702cc32e605a06d5078d26ac59263d3c4eb04f9756e6be5e2850c3d0d","src/arch/wasm32/mod.rs":"a20377aa8fe07d68594879101dc73061e4f51d9c8d812b593b1f376e3c8add79","src/arch/wasm32/simd128/memchr.rs":"bac2c4c43fe710c83a6f2b1118fede043be89dd821d4b532907f129f09fdb5cf","src/arch/wasm32/simd128/mod.rs":"c157b373faedbfd65323be432e25bc411d97aa1b7bc58e76048614c7b2bf3bf6","src/arch/wasm32/simd128/packedpair.rs":"47e7875f1a0b502f3f30ddfd9257ed7ad4568fb7d968b5e6c01ba9e2aab2a459","src/arch/x86_64/avx2/memchr.rs":"576ec0c30f49874f7fd9f6caeb490d56132c0fbbaa4d877b1aa532cafce19323","src/arch/x86_64/avx2/mod.rs":"0033d1b712d0b10f0f273ef9aa8caa53e05e49f4c56a64f39af0b9df97eec584","src/arch/x86_64/avx2/packedpair.rs":"87b69cb4301815906127db4f6370f572c7c5d5dad35c0946c00ad888dbcaec8c","src/arch/x86_64/memchr.rs":"99a1dbe4156d498e6f910d06d3d3b31e7f6d06dff7d13a4c51b33a02b7e2fba9","src/arch/x86_64/mod.rs":"61b2aa876942fd3e78714c2ae21e356c8634545c06995020f443fa50218df027","src/arch/x86_64/sse2/memchr.rs":"68fc3b8f9eddf82192979c3aa11e5141f085cbb993c49c340558719a904679dc","src/arch/x86_64/sse2/mod.rs":"38b70ae52a64ec974dbb91d04d6ca8013d9e06d1fe4af852206bbc2faf1c59aa","src/arch/x86_64/sse2/packedpair.rs":"241ea981d8eea6024769f1c9375f726a9bb9700160c5857781d4befd9f5ef55d","src/cow.rs":"34eddd02cb82cc2d5a2c640891d64efe332dabcc1eea5115764200d8f46b66f7","src/ext.rs":"c472bcc41a7ef48980d976e954e87ef9fdfdfd30ac0199b959cc7e5b9d563ab3","src/lib.rs":"22509be5f0c201773ee26bb21bf5c0491e287c4414c02b7faa6ea5177ce0f32f","src/macros.rs":"3e4b39252bfa471fad384160a43f113ebfec7bec46a85d16f006622881dd2081","src/memchr.rs":"36f1c03304261877cd7f75c7ed8f7daff7a5c570cedce375e38e9b1ca44467f7","src/memmem/mod.rs":"1b0a9d6a681fd0887c677c4fc8d4c8f9719ddde250bdd5ea545365c1a7fb9094","src/memmem/searcher.rs":"69c38fb33d8f1a2a26769a81e514428240c8f8f15cea5302873d90b80391dd89","src/tests/memchr/mod.rs":"269f8e4b4f7f5ea458f27a3c174eb1020ffb2484eeba9464170beb51747df69b","src/tests/memchr/naive.rs":"6a0bee033e5edfb5b1d5769a5fa1c78388f7e9ff7bb91cb67f0ad029289e00e7","src/tests/memchr/prop.rs":"7bf7435087fbf08c5014c216b76575349735590d6b1d0e448921a1dc17bc0ea7","src/tests/mod.rs":"7cec8f809e279310a465c6a7725087970f219a676cc76c83de30c695bb490740","src/tests/packedpair.rs":"b02ec4fbb61a8653cb5f2268c31bc9168b8043347f2abdcc74081acf83b98e15","src/tests/substring/mod.rs":"c7660d10749363ac4687e7da2b5fda60768230425df8ba416c0c28b8d56a5c74","src/tests/substring/naive.rs":"df6f55d165382b8a53762ba4c324926cac13ebc62cde1805f4ce08740b326483","src/tests/substring/prop.rs":"38c15992609b5681a95d838ae6f2933e00a1219f2c971bfba245f96e0729fcdc","src/tests/x86_64-soft_float.json":"c0e416487fe9b4809534edb7db2a9eff3453dc40d9f1e23362c37f45a77ec717","src/vector.rs":"ef823ae8c54053780a0e7aeaee14b6c6ac2aea4567bf701ae8be137806c6d293"},"package":"523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"} \ No newline at end of file diff --git a/vendor/memchr/Cargo.toml b/vendor/memchr/Cargo.toml index 6301952..9e95051 100644 --- a/vendor/memchr/Cargo.toml +++ b/vendor/memchr/Cargo.toml @@ -10,41 +10,50 @@ # See Cargo.toml.orig for the original contents. [package] -edition = "2018" +edition = "2021" +rust-version = "1.61" name = "memchr" -version = "2.5.0" +version = "2.7.1" authors = [ "Andrew Gallant ", "bluss", ] exclude = [ - "/bench", "/.github", + "/benchmarks", "/fuzz", + "/scripts", + "/tmp", ] -description = "Safe interface to memchr." +description = """ +Provides extremely fast (uses SIMD on x86_64, aarch64 and wasm32) routines for +1, 2 or 3 byte search and single substring search. +""" homepage = "https://github.com/BurntSushi/memchr" documentation = "https://docs.rs/memchr/" readme = "README.md" keywords = [ "memchr", - "char", - "scan", - "strchr", - "string", + "memmem", + "substring", + "find", + "search", ] -license = "Unlicense/MIT" +license = "Unlicense OR MIT" repository = "https://github.com/BurntSushi/memchr" +[package.metadata.docs.rs] +rustdoc-args = ["--generate-link-to-definition"] + [profile.bench] -debug = true +debug = 2 [profile.release] -debug = true +debug = 2 [profile.test] opt-level = 3 -debug = true +debug = 2 [lib] name = "memchr" @@ -59,20 +68,22 @@ version = "1.0.0" optional = true package = "rustc-std-workspace-core" -[dependencies.libc] -version = "0.2.18" +[dependencies.log] +version = "0.4.20" optional = true -default-features = false [dev-dependencies.quickcheck] version = "1.0.3" default-features = false [features] +alloc = [] default = ["std"] +libc = [] +logging = ["dep:log"] rustc-dep-of-std = [ "core", "compiler_builtins", ] -std = [] +std = ["alloc"] use_std = ["std"] diff --git a/vendor/memchr/README.md b/vendor/memchr/README.md index 77a7a0f..db00ebb 100644 --- a/vendor/memchr/README.md +++ b/vendor/memchr/README.md @@ -35,30 +35,19 @@ memchr links to the standard library by default, but you can disable the memchr = { version = "2", default-features = false } ``` -On x86 platforms, when the `std` feature is disabled, the SSE2 accelerated -implementations will be used. When `std` is enabled, AVX accelerated +On `x86_64` platforms, when the `std` feature is disabled, the SSE2 accelerated +implementations will be used. When `std` is enabled, AVX2 accelerated implementations will be used if the CPU is determined to support it at runtime. -### Using libc - -`memchr` is a routine that is part of libc, although this crate does not use -libc by default. Instead, it uses its own routines, which are either vectorized -or generic fallback routines. In general, these should be competitive with -what's in libc, although this has not been tested for all architectures. If -using `memchr` from libc is desirable and a vectorized routine is not otherwise -available in this crate, then enabling the `libc` feature will use libc's -version of `memchr`. - -The rest of the functions in this crate, e.g., `memchr2` or `memrchr3` and the -substring search routines, will always use the implementations in this crate. -One exception to this is `memrchr`, which is an extension in `libc` found on -Linux. On Linux, `memrchr` is used in precisely the same scenario as `memchr`, -as described above. +SIMD accelerated routines are also available on the `wasm32` and `aarch64` +targets. The `std` feature is not required to use them. +When a SIMD version is not available, then this crate falls back to +[SWAR](https://en.wikipedia.org/wiki/SWAR) techniques. ### Minimum Rust version policy -This crate's minimum supported `rustc` version is `1.41.1`. +This crate's minimum supported `rustc` version is `1.61.0`. The current policy is that the minimum Rust version required to use this crate can be increased in minor version updates. For example, if `crate 1.0` requires @@ -105,3 +94,103 @@ has a few different algorithms to choose from depending on the situation. is used. If possible, a prefilter based on the "Generic SIMD" algorithm linked above is used to find candidates quickly. A dynamic heuristic is used to detect if the prefilter is ineffective, and if so, disables it. + + +### Why is the standard library's substring search so much slower? + +We'll start by establishing what the difference in performance actually +is. There are two relevant benchmark classes to consider: `prebuilt` and +`oneshot`. The `prebuilt` benchmarks are designed to measure---to the extent +possible---search time only. That is, the benchmark first starts by building a +searcher and then only tracking the time for _using_ the searcher: + +``` +$ rebar rank benchmarks/record/x86_64/2023-08-26.csv --intersection -e memchr/memmem/prebuilt -e std/memmem/prebuilt +Engine Version Geometric mean of speed ratios Benchmark count +------ ------- ------------------------------ --------------- +rust/memchr/memmem/prebuilt 2.5.0 1.03 53 +rust/std/memmem/prebuilt 1.73.0-nightly 180dffba1 6.50 53 +``` + +Conversely, the `oneshot` benchmark class measures the time it takes to both +build the searcher _and_ use it: + +``` +$ rebar rank benchmarks/record/x86_64/2023-08-26.csv --intersection -e memchr/memmem/oneshot -e std/memmem/oneshot +Engine Version Geometric mean of speed ratios Benchmark count +------ ------- ------------------------------ --------------- +rust/memchr/memmem/oneshot 2.5.0 1.04 53 +rust/std/memmem/oneshot 1.73.0-nightly 180dffba1 5.26 53 +``` + +**NOTE:** Replace `rebar rank` with `rebar cmp` in the above commands to +explore the specific benchmarks and their differences. + +So in both cases, this crate is quite a bit faster over a broad sampling of +benchmarks regardless of whether you measure only search time or search time +plus construction time. The difference is a little smaller when you include +construction time in your measurements. + +These two different types of benchmark classes make for a nice segue into +one reason why the standard library's substring search can be slower: API +design. In the standard library, the only APIs available to you require +one to re-construct the searcher for every search. While you can benefit +from building a searcher once and iterating over all matches in a single +string, you cannot reuse that searcher to search other strings. This might +come up when, for example, searching a file one line at a time. You'll need +to re-build the searcher for every line searched, and this can [really +matter][burntsushi-bstr-blog]. + +**NOTE:** The `prebuilt` benchmark for the standard library can't actually +avoid measuring searcher construction at some level, because there is no API +for it. Instead, the benchmark consists of building the searcher once and then +finding all matches in a single string via an iterator. This tends to +approximate a benchmark where searcher construction isn't measured, but it +isn't perfect. While this means the comparison is not strictly +apples-to-apples, it does reflect what is maximally possible with the standard +library, and thus reflects the best that one could do in a real world scenario. + +While there is more to the story than just API design here, it's important to +point out that even if the standard library's substring search were a precise +clone of this crate internally, it would still be at a disadvantage in some +workloads because of its API. (The same also applies to C's standard library +`memmem` function. There is no way to amortize construction of the searcher. +You need to pay for it on every call.) + +The other reason for the difference in performance is that +the standard library has trouble using SIMD. In particular, substring search +is implemented in the `core` library, where platform specific code generally +can't exist. That's an issue because in order to utilize SIMD beyond SSE2 +while maintaining portable binaries, one needs to use [dynamic CPU feature +detection][dynamic-cpu], and that in turn requires platform specific code. +While there is [an RFC for enabling target feature detection in +`core`][core-feature], it doesn't yet exist. + +The bottom line here is that `core`'s substring search implementation is +limited to making use of SSE2, but not AVX. + +Still though, this crate does accelerate substring search even when only SSE2 +is available. The standard library could therefore adopt the techniques in this +crate just for SSE2. The reason why that hasn't happened yet isn't totally +clear to me. It likely needs a champion to push it through. The standard +library tends to be more conservative in these things. With that said, the +standard library does use some [SSE2 acceleration on `x86-64`][std-sse2] added +in [this PR][std-sse2-pr]. However, at the time of writing, it is only used +for short needles and doesn't use the frequency based heuristics found in this +crate. + +**NOTE:** Another thing worth mentioning is that the standard library's +substring search routine requires that both the needle and haystack have type +`&str`. Unless you can assume that your data is valid UTF-8, building a `&str` +will come with the overhead of UTF-8 validation. This may in turn result in +overall slower searching depending on your workload. In contrast, the `memchr` +crate permits both the needle and the haystack to have type `&[u8]`, where +`&[u8]` can be created from a `&str` with zero cost. Therefore, the substring +search in this crate is strictly more flexible than what the standard library +provides. + +[burntsushi-bstr-blog]: https://blog.burntsushi.net/bstr/#motivation-based-on-performance +[dynamic-cpu]: https://doc.rust-lang.org/std/arch/index.html#dynamic-cpu-feature-detection +[core-feature]: https://github.com/rust-lang/rfcs/pull/3469 +[std-sse2]: https://github.com/rust-lang/rust/blob/bf9229a2e366b4c311f059014a4aa08af16de5d8/library/core/src/str/pattern.rs#L1719-L1857 +[std-sse2-pr]: https://github.com/rust-lang/rust/pull/103779 diff --git a/vendor/memchr/build.rs b/vendor/memchr/build.rs deleted file mode 100644 index 584a608..0000000 --- a/vendor/memchr/build.rs +++ /dev/null @@ -1,88 +0,0 @@ -use std::env; - -fn main() { - enable_simd_optimizations(); - enable_libc(); -} - -// This adds various simd cfgs if this compiler and target support it. -// -// This can be disabled with RUSTFLAGS="--cfg memchr_disable_auto_simd", but -// this is generally only intended for testing. -// -// On targets which don't feature SSE2, this is disabled, as LLVM wouln't know -// how to work with SSE2 operands. Enabling SSE4.2 and AVX on SSE2-only targets -// is not a problem. In that case, the fastest option will be chosen at -// runtime. -fn enable_simd_optimizations() { - if is_env_set("CARGO_CFG_MEMCHR_DISABLE_AUTO_SIMD") { - return; - } - let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); - match &arch[..] { - "x86_64" => { - if !target_has_feature("sse2") { - return; - } - println!("cargo:rustc-cfg=memchr_runtime_simd"); - println!("cargo:rustc-cfg=memchr_runtime_sse2"); - println!("cargo:rustc-cfg=memchr_runtime_sse42"); - println!("cargo:rustc-cfg=memchr_runtime_avx"); - } - "wasm32" | "wasm64" => { - if !target_has_feature("simd128") { - return; - } - println!("cargo:rustc-cfg=memchr_runtime_simd"); - println!("cargo:rustc-cfg=memchr_runtime_wasm128"); - } - _ => {} - } -} - -// This adds a `memchr_libc` cfg if and only if libc can be used, if no other -// better option is available. -// -// This could be performed in the source code, but it's simpler to do it once -// here and consolidate it into one cfg knob. -// -// Basically, we use libc only if its enabled and if we aren't targeting a -// known bad platform. For example, wasm32 doesn't have a libc and the -// performance of memchr on Windows is seemingly worse than the fallback -// implementation. -fn enable_libc() { - const NO_ARCH: &'static [&'static str] = &["wasm32", "windows"]; - const NO_ENV: &'static [&'static str] = &["sgx"]; - - if !is_feature_set("LIBC") { - return; - } - - let arch = match env::var("CARGO_CFG_TARGET_ARCH") { - Err(_) => return, - Ok(arch) => arch, - }; - let env = match env::var("CARGO_CFG_TARGET_ENV") { - Err(_) => return, - Ok(env) => env, - }; - if NO_ARCH.contains(&&*arch) || NO_ENV.contains(&&*env) { - return; - } - - println!("cargo:rustc-cfg=memchr_libc"); -} - -fn is_feature_set(name: &str) -> bool { - is_env_set(&format!("CARGO_FEATURE_{}", name)) -} - -fn is_env_set(name: &str) -> bool { - env::var_os(name).is_some() -} - -fn target_has_feature(feature: &str) -> bool { - env::var("CARGO_CFG_TARGET_FEATURE") - .map(|features| features.contains(feature)) - .unwrap_or(false) -} diff --git a/vendor/memchr/scripts/make-byte-frequency-table b/vendor/memchr/scripts/make-byte-frequency-table deleted file mode 100755 index 37eeca7..0000000 --- a/vendor/memchr/scripts/make-byte-frequency-table +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python - -# This does simple normalized frequency analysis on UTF-8 encoded text. The -# result of the analysis is translated to a ranked list, where every byte is -# assigned a rank. This list is written to src/freqs.rs. -# -# Currently, the frequencies are generated from the following corpuses: -# -# * The CIA world fact book -# * The source code of rustc -# * Septuaginta - -from __future__ import absolute_import, division, print_function - -import argparse -from collections import Counter -import sys - -preamble = ''' -// NOTE: The following code was generated by "scripts/frequencies.py", do not -// edit directly -'''.lstrip() - - -def eprint(*args, **kwargs): - kwargs['file'] = sys.stderr - print(*args, **kwargs) - - -def main(): - p = argparse.ArgumentParser() - p.add_argument('corpus', metavar='FILE', nargs='+') - args = p.parse_args() - - # Get frequency counts of each byte. - freqs = Counter() - for i in range(0, 256): - freqs[i] = 0 - - eprint('reading entire corpus into memory') - corpus = [] - for fpath in args.corpus: - corpus.append(open(fpath, 'rb').read()) - - eprint('computing byte frequencies') - for c in corpus: - for byte in c: - freqs[byte] += 1.0 / float(len(c)) - - eprint('writing Rust code') - # Get the rank of each byte. A lower rank => lower relative frequency. - rank = [0] * 256 - for i, (byte, _) in enumerate(freqs.most_common()): - # print(byte) - rank[byte] = 255 - i - - # Forcefully set the highest rank possible for bytes that start multi-byte - # UTF-8 sequences. The idea here is that a continuation byte will be more - # discerning in a homogenous haystack. - for byte in range(0xC0, 0xFF + 1): - rank[byte] = 255 - - # Now write Rust. - olines = ['pub const BYTE_FREQUENCIES: [u8; 256] = ['] - for byte in range(256): - olines.append(' %3d, // %r' % (rank[byte], chr(byte))) - olines.append('];') - - print(preamble) - print('\n'.join(olines)) - - -if __name__ == '__main__': - main() diff --git a/vendor/memchr/src/arch/aarch64/memchr.rs b/vendor/memchr/src/arch/aarch64/memchr.rs new file mode 100644 index 0000000..e0053b2 --- /dev/null +++ b/vendor/memchr/src/arch/aarch64/memchr.rs @@ -0,0 +1,137 @@ +/*! +Wrapper routines for `memchr` and friends. + +These routines choose the best implementation at compile time. (This is +different from `x86_64` because it is expected that `neon` is almost always +available for `aarch64` targets.) +*/ + +macro_rules! defraw { + ($ty:ident, $find:ident, $start:ident, $end:ident, $($needles:ident),+) => {{ + #[cfg(target_feature = "neon")] + { + use crate::arch::aarch64::neon::memchr::$ty; + + debug!("chose neon for {}", stringify!($ty)); + debug_assert!($ty::is_available()); + // SAFETY: We know that wasm memchr is always available whenever + // code is compiled for `aarch64` with the `neon` target feature + // enabled. + $ty::new_unchecked($($needles),+).$find($start, $end) + } + #[cfg(not(target_feature = "neon"))] + { + use crate::arch::all::memchr::$ty; + + debug!( + "no neon feature available, using fallback for {}", + stringify!($ty), + ); + $ty::new($($needles),+).$find($start, $end) + } + }} +} + +/// memchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(One, find_raw, start, end, n1) +} + +/// memrchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(One, rfind_raw, start, end, n1) +} + +/// memchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr2_raw( + n1: u8, + n2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Two, find_raw, start, end, n1, n2) +} + +/// memrchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr2_raw( + n1: u8, + n2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Two, rfind_raw, start, end, n1, n2) +} + +/// memchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr3_raw( + n1: u8, + n2: u8, + n3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Three, find_raw, start, end, n1, n2, n3) +} + +/// memrchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr3_raw( + n1: u8, + n2: u8, + n3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Three, rfind_raw, start, end, n1, n2, n3) +} + +/// Count all matching bytes, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::count_raw`. +#[inline(always)] +pub(crate) unsafe fn count_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> usize { + defraw!(One, count_raw, start, end, n1) +} diff --git a/vendor/memchr/src/arch/aarch64/mod.rs b/vendor/memchr/src/arch/aarch64/mod.rs new file mode 100644 index 0000000..7b32912 --- /dev/null +++ b/vendor/memchr/src/arch/aarch64/mod.rs @@ -0,0 +1,7 @@ +/*! +Vector algorithms for the `aarch64` target. +*/ + +pub mod neon; + +pub(crate) mod memchr; diff --git a/vendor/memchr/src/arch/aarch64/neon/memchr.rs b/vendor/memchr/src/arch/aarch64/neon/memchr.rs new file mode 100644 index 0000000..5fcc762 --- /dev/null +++ b/vendor/memchr/src/arch/aarch64/neon/memchr.rs @@ -0,0 +1,1031 @@ +/*! +This module defines 128-bit vector implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers are +typically much faster than scalar routines accomplishing the same task. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use core::arch::aarch64::uint8x16_t; + +use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One(generic::One); + +impl One { + /// Create a new searcher that finds occurrences of the needle byte given. + /// + /// This particular searcher is specialized to use neon vector instructions + /// that typically make it quite fast. + /// + /// If neon is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle: u8) -> Option { + if One::is_available() { + // SAFETY: we check that neon is available above. + unsafe { Some(One::new_unchecked(needle)) } + } else { + None + } + } + + /// Create a new finder specific to neon vectors and routines without + /// checking that neon is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `neon` instructions + /// in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to neon + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "neon")] + #[inline] + pub unsafe fn new_unchecked(needle: u8) -> One { + One(generic::One::new(needle)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`One::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `One::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "neon")] + { + true + } + #[cfg(not(target_feature = "neon"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Counts all occurrences of this byte in the given haystack. + #[inline] + pub fn count(&self, haystack: &[u8]) -> usize { + // SAFETY: All of our pointers are derived directly from a borrowed + // slice, which is guaranteed to be valid. + unsafe { + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + self.count_raw(start, end) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < uint8x16_t::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'neon' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < uint8x16_t::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'neon' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.rfind_raw_impl(start, end) + } + + /// Like `count`, but accepts and returns raw pointers. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { + if start >= end { + return 0; + } + if end.distance(start) < uint8x16_t::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::count_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'neon' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.count_raw_impl(start, end) + } + + /// Execute a search using neon vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::find_raw`], except the distance between `start` and + /// `end` must be at least the size of a neon vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using neon vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of a neon vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Execute a count using neon vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::count_raw`], except the distance between `start` and + /// `end` must be at least the size of a neon vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn count_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> usize { + self.0.count_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { + OneIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { + searcher: &'a One, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn count(self) -> usize { + self.it.count(|s, e| { + // SAFETY: We rely on our generic iterator to return valid start + // and end pointers. + unsafe { self.searcher.count_raw(s, e) } + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two(generic::Two); + +impl Two { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use neon vector instructions + /// that typically make it quite fast. + /// + /// If neon is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle1: u8, needle2: u8) -> Option { + if Two::is_available() { + // SAFETY: we check that neon is available above. + unsafe { Some(Two::new_unchecked(needle1, needle2)) } + } else { + None + } + } + + /// Create a new finder specific to neon vectors and routines without + /// checking that neon is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `neon` instructions + /// in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to neon + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "neon")] + #[inline] + pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { + Two(generic::Two::new(needle1, needle2)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Two::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Two::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "neon")] + { + true + } + #[cfg(not(target_feature = "neon"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < uint8x16_t::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() || b == self.0.needle2() + }); + } + // SAFETY: Building a `Two` means it's safe to call 'neon' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < uint8x16_t::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() || b == self.0.needle2() + }); + } + // SAFETY: Building a `Two` means it's safe to call 'neon' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.rfind_raw_impl(start, end) + } + + /// Execute a search using neon vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::find_raw`], except the distance between `start` and + /// `end` must be at least the size of a neon vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using neon vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of a neon vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { + TwoIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { + searcher: &'a Two, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three(generic::Three); + +impl Three { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use neon vector instructions + /// that typically make it quite fast. + /// + /// If neon is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option { + if Three::is_available() { + // SAFETY: we check that neon is available above. + unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } + } else { + None + } + } + + /// Create a new finder specific to neon vectors and routines without + /// checking that neon is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `neon` instructions + /// in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to neon + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "neon")] + #[inline] + pub unsafe fn new_unchecked( + needle1: u8, + needle2: u8, + needle3: u8, + ) -> Three { + Three(generic::Three::new(needle1, needle2, needle3)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Three::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Three::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "neon")] + { + true + } + #[cfg(not(target_feature = "neon"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < uint8x16_t::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() + || b == self.0.needle2() + || b == self.0.needle3() + }); + } + // SAFETY: Building a `Three` means it's safe to call 'neon' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < uint8x16_t::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() + || b == self.0.needle2() + || b == self.0.needle3() + }); + } + // SAFETY: Building a `Three` means it's safe to call 'neon' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.rfind_raw_impl(start, end) + } + + /// Execute a search using neon vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::find_raw`], except the distance between `start` and + /// `end` must be at least the size of a neon vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using neon vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of a neon vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { + ThreeIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { + searcher: &'a Three, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} + +#[cfg(test)] +mod tests { + use super::*; + + define_memchr_quickcheck!(super); + + #[test] + fn forward_one() { + crate::tests::memchr::Runner::new(1).forward_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_one() { + crate::tests::memchr::Runner::new(1).reverse_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn count_one() { + crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).count()) + }) + } + + #[test] + fn forward_two() { + crate::tests::memchr::Runner::new(2).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_two() { + crate::tests::memchr::Runner::new(2).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn forward_three() { + crate::tests::memchr::Runner::new(3).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_three() { + crate::tests::memchr::Runner::new(3).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) + }, + ) + } +} diff --git a/vendor/memchr/src/arch/aarch64/neon/mod.rs b/vendor/memchr/src/arch/aarch64/neon/mod.rs new file mode 100644 index 0000000..ccf9cf8 --- /dev/null +++ b/vendor/memchr/src/arch/aarch64/neon/mod.rs @@ -0,0 +1,6 @@ +/*! +Algorithms for the `aarch64` target using 128-bit vectors via NEON. +*/ + +pub mod memchr; +pub mod packedpair; diff --git a/vendor/memchr/src/arch/aarch64/neon/packedpair.rs b/vendor/memchr/src/arch/aarch64/neon/packedpair.rs new file mode 100644 index 0000000..6884882 --- /dev/null +++ b/vendor/memchr/src/arch/aarch64/neon/packedpair.rs @@ -0,0 +1,236 @@ +/*! +A 128-bit vector implementation of the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use core::arch::aarch64::uint8x16_t; + +use crate::arch::{all::packedpair::Pair, generic::packedpair}; + +/// A "packed pair" finder that uses 128-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +#[derive(Clone, Copy, Debug)] +pub struct Finder(packedpair::Finder); + +/// A "packed pair" finder that uses 128-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +impl Finder { + /// Create a new pair searcher. The searcher returned can either report + /// exact matches of `needle` or act as a prefilter and report candidate + /// positions of `needle`. + /// + /// If neon is unavailable in the current environment or if a [`Pair`] + /// could not be constructed from the needle given, then `None` is + /// returned. + #[inline] + pub fn new(needle: &[u8]) -> Option { + Finder::with_pair(needle, Pair::new(needle)?) + } + + /// Create a new "packed pair" finder using the pair of bytes given. + /// + /// This constructor permits callers to control precisely which pair of + /// bytes is used as a predicate. + /// + /// If neon is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn with_pair(needle: &[u8], pair: Pair) -> Option { + if Finder::is_available() { + // SAFETY: we check that sse2 is available above. We are also + // guaranteed to have needle.len() > 1 because we have a valid + // Pair. + unsafe { Some(Finder::with_pair_impl(needle, pair)) } + } else { + None + } + } + + /// Create a new `Finder` specific to neon vectors and routines. + /// + /// # Safety + /// + /// Same as the safety for `packedpair::Finder::new`, and callers must also + /// ensure that neon is available. + #[target_feature(enable = "neon")] + #[inline] + unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { + let finder = packedpair::Finder::::new(needle, pair); + Finder(finder) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Finder::with_pair`] will + /// return a `Some` value. Similarly, when it is false, it is guaranteed + /// that `Finder::with_pair` will return a `None` value. Notice that this + /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, + /// even when `Finder::is_available` is true, it is not guaranteed that a + /// valid [`Pair`] can be found from the needle given. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "neon")] + { + true + } + #[cfg(not(target_feature = "neon"))] + { + false + } + } + + /// Execute a search using neon vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option { + // SAFETY: Building a `Finder` means it's safe to call 'neon' routines. + unsafe { self.find_impl(haystack, needle) } + } + + /// Execute a search using neon vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find_prefilter(&self, haystack: &[u8]) -> Option { + // SAFETY: Building a `Finder` means it's safe to call 'neon' routines. + unsafe { self.find_prefilter_impl(haystack) } + } + + /// Execute a search using neon vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn find_impl( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option { + self.0.find(haystack, needle) + } + + /// Execute a prefilter search using neon vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `neon` routines.) + #[target_feature(enable = "neon")] + #[inline] + unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option { + self.0.find_prefilter(haystack) + } + + /// Returns the pair of offsets (into the needle) used to check as a + /// predicate before confirming whether a needle exists at a particular + /// position. + #[inline] + pub fn pair(&self) -> &Pair { + self.0.pair() + } + + /// Returns the minimum haystack length that this `Finder` can search. + /// + /// Using a haystack with length smaller than this in a search will result + /// in a panic. The reason for this restriction is that this finder is + /// meant to be a low-level component that is part of a larger substring + /// strategy. In that sense, it avoids trying to handle all cases and + /// instead only handles the cases that it can handle very well. + #[inline] + pub fn min_haystack_len(&self) -> usize { + self.0.min_haystack_len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn find(haystack: &[u8], needle: &[u8]) -> Option> { + let f = Finder::new(needle)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + + define_substring_forward_quickcheck!(find); + + #[test] + fn forward_substring() { + crate::tests::substring::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair_prefilter() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find_prefilter(haystack)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } +} diff --git a/vendor/memchr/src/arch/all/memchr.rs b/vendor/memchr/src/arch/all/memchr.rs new file mode 100644 index 0000000..435b1be --- /dev/null +++ b/vendor/memchr/src/arch/all/memchr.rs @@ -0,0 +1,996 @@ +/*! +Provides architecture independent implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers +are typically slower than hand-coded vector routines accomplishing the same +task, but are also typically faster than naive scalar code. These routines +effectively work by treating a `usize` as a vector of 8-bit lanes, and thus +achieves some level of data parallelism even without explicit vector support. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use crate::{arch::generic::memchr as generic, ext::Pointer}; + +/// The number of bytes in a single `usize` value. +const USIZE_BYTES: usize = (usize::BITS / 8) as usize; +/// The bits that must be zero for a `*const usize` to be properly aligned. +const USIZE_ALIGN: usize = USIZE_BYTES - 1; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One { + s1: u8, + v1: usize, +} + +impl One { + /// The number of bytes we examine per each iteration of our search loop. + const LOOP_BYTES: usize = 2 * USIZE_BYTES; + + /// Create a new searcher that finds occurrences of the byte given. + #[inline] + pub fn new(needle: u8) -> One { + One { s1: needle, v1: splat(needle) } + } + + /// A test-only routine so that we can bundle a bunch of quickcheck + /// properties into a single macro. Basically, this provides a constructor + /// that makes it identical to most other memchr implementations, which + /// have fallible constructors. + #[cfg(test)] + pub(crate) fn try_new(needle: u8) -> Option { + Some(One::new(needle)) + } + + /// Return the first occurrence of the needle in the given haystack. If no + /// such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value for a non-empty haystack is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of the needle in the given haystack. If no + /// such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value for a non-empty haystack is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Counts all occurrences of this byte in the given haystack. + #[inline] + pub fn count(&self, haystack: &[u8]) -> usize { + // SAFETY: All of our pointers are derived directly from a borrowed + // slice, which is guaranteed to be valid. + unsafe { + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + self.count_raw(start, end) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let confirm = |b| self.confirm(b); + let len = end.distance(start); + if len < USIZE_BYTES { + return generic::fwd_byte_by_byte(start, end, confirm); + } + + // The start of the search may not be aligned to `*const usize`, + // so we do an unaligned load here. + let chunk = start.cast::().read_unaligned(); + if self.has_needle(chunk) { + return generic::fwd_byte_by_byte(start, end, confirm); + } + + // And now we start our search at a guaranteed aligned position. + // The first iteration of the loop below will overlap with the the + // unaligned chunk above in cases where the search starts at an + // unaligned offset, but that's okay as we're only here if that + // above didn't find a match. + let mut cur = + start.add(USIZE_BYTES - (start.as_usize() & USIZE_ALIGN)); + debug_assert!(cur > start); + if len <= One::LOOP_BYTES { + return generic::fwd_byte_by_byte(cur, end, confirm); + } + debug_assert!(end.sub(One::LOOP_BYTES) >= start); + while cur <= end.sub(One::LOOP_BYTES) { + debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + + let a = cur.cast::().read(); + let b = cur.add(USIZE_BYTES).cast::().read(); + if self.has_needle(a) || self.has_needle(b) { + break; + } + cur = cur.add(One::LOOP_BYTES); + } + generic::fwd_byte_by_byte(cur, end, confirm) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let confirm = |b| self.confirm(b); + let len = end.distance(start); + if len < USIZE_BYTES { + return generic::rev_byte_by_byte(start, end, confirm); + } + + let chunk = end.sub(USIZE_BYTES).cast::().read_unaligned(); + if self.has_needle(chunk) { + return generic::rev_byte_by_byte(start, end, confirm); + } + + let mut cur = end.sub(end.as_usize() & USIZE_ALIGN); + debug_assert!(start <= cur && cur <= end); + if len <= One::LOOP_BYTES { + return generic::rev_byte_by_byte(start, cur, confirm); + } + while cur >= start.add(One::LOOP_BYTES) { + debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + + let a = cur.sub(2 * USIZE_BYTES).cast::().read(); + let b = cur.sub(1 * USIZE_BYTES).cast::().read(); + if self.has_needle(a) || self.has_needle(b) { + break; + } + cur = cur.sub(One::LOOP_BYTES); + } + generic::rev_byte_by_byte(start, cur, confirm) + } + + /// Counts all occurrences of this byte in the given haystack represented + /// by raw pointers. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `0` will always be returned. + #[inline] + pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { + if start >= end { + return 0; + } + // Sadly I couldn't get the SWAR approach to work here, so we just do + // one byte at a time for now. PRs to improve this are welcome. + let mut ptr = start; + let mut count = 0; + while ptr < end { + count += (ptr.read() == self.s1) as usize; + ptr = ptr.offset(1); + } + count + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { + OneIter { searcher: self, it: generic::Iter::new(haystack) } + } + + #[inline(always)] + fn has_needle(&self, chunk: usize) -> bool { + has_zero_byte(self.v1 ^ chunk) + } + + #[inline(always)] + fn confirm(&self, haystack_byte: u8) -> bool { + self.s1 == haystack_byte + } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { + /// The underlying memchr searcher. + searcher: &'a One, + /// Generic iterator implementation. + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn count(self) -> usize { + self.it.count(|s, e| { + // SAFETY: We rely on our generic iterator to return valid start + // and end pointers. + unsafe { self.searcher.count_raw(s, e) } + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two { + s1: u8, + s2: u8, + v1: usize, + v2: usize, +} + +impl Two { + /// Create a new searcher that finds occurrences of the two needle bytes + /// given. + #[inline] + pub fn new(needle1: u8, needle2: u8) -> Two { + Two { + s1: needle1, + s2: needle2, + v1: splat(needle1), + v2: splat(needle2), + } + } + + /// A test-only routine so that we can bundle a bunch of quickcheck + /// properties into a single macro. Basically, this provides a constructor + /// that makes it identical to most other memchr implementations, which + /// have fallible constructors. + #[cfg(test)] + pub(crate) fn try_new(needle1: u8, needle2: u8) -> Option { + Some(Two::new(needle1, needle2)) + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value for a non-empty haystack is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value for a non-empty haystack is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let confirm = |b| self.confirm(b); + let len = end.distance(start); + if len < USIZE_BYTES { + return generic::fwd_byte_by_byte(start, end, confirm); + } + + // The start of the search may not be aligned to `*const usize`, + // so we do an unaligned load here. + let chunk = start.cast::().read_unaligned(); + if self.has_needle(chunk) { + return generic::fwd_byte_by_byte(start, end, confirm); + } + + // And now we start our search at a guaranteed aligned position. + // The first iteration of the loop below will overlap with the the + // unaligned chunk above in cases where the search starts at an + // unaligned offset, but that's okay as we're only here if that + // above didn't find a match. + let mut cur = + start.add(USIZE_BYTES - (start.as_usize() & USIZE_ALIGN)); + debug_assert!(cur > start); + debug_assert!(end.sub(USIZE_BYTES) >= start); + while cur <= end.sub(USIZE_BYTES) { + debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + + let chunk = cur.cast::().read(); + if self.has_needle(chunk) { + break; + } + cur = cur.add(USIZE_BYTES); + } + generic::fwd_byte_by_byte(cur, end, confirm) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let confirm = |b| self.confirm(b); + let len = end.distance(start); + if len < USIZE_BYTES { + return generic::rev_byte_by_byte(start, end, confirm); + } + + let chunk = end.sub(USIZE_BYTES).cast::().read_unaligned(); + if self.has_needle(chunk) { + return generic::rev_byte_by_byte(start, end, confirm); + } + + let mut cur = end.sub(end.as_usize() & USIZE_ALIGN); + debug_assert!(start <= cur && cur <= end); + while cur >= start.add(USIZE_BYTES) { + debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + + let chunk = cur.sub(USIZE_BYTES).cast::().read(); + if self.has_needle(chunk) { + break; + } + cur = cur.sub(USIZE_BYTES); + } + generic::rev_byte_by_byte(start, cur, confirm) + } + + /// Returns an iterator over all occurrences of one of the needle bytes in + /// the given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { + TwoIter { searcher: self, it: generic::Iter::new(haystack) } + } + + #[inline(always)] + fn has_needle(&self, chunk: usize) -> bool { + has_zero_byte(self.v1 ^ chunk) || has_zero_byte(self.v2 ^ chunk) + } + + #[inline(always)] + fn confirm(&self, haystack_byte: u8) -> bool { + self.s1 == haystack_byte || self.s2 == haystack_byte + } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { + /// The underlying memchr searcher. + searcher: &'a Two, + /// Generic iterator implementation. + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three { + s1: u8, + s2: u8, + s3: u8, + v1: usize, + v2: usize, + v3: usize, +} + +impl Three { + /// Create a new searcher that finds occurrences of the three needle bytes + /// given. + #[inline] + pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Three { + Three { + s1: needle1, + s2: needle2, + s3: needle3, + v1: splat(needle1), + v2: splat(needle2), + v3: splat(needle3), + } + } + + /// A test-only routine so that we can bundle a bunch of quickcheck + /// properties into a single macro. Basically, this provides a constructor + /// that makes it identical to most other memchr implementations, which + /// have fallible constructors. + #[cfg(test)] + pub(crate) fn try_new( + needle1: u8, + needle2: u8, + needle3: u8, + ) -> Option { + Some(Three::new(needle1, needle2, needle3)) + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value for a non-empty haystack is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value for a non-empty haystack is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let confirm = |b| self.confirm(b); + let len = end.distance(start); + if len < USIZE_BYTES { + return generic::fwd_byte_by_byte(start, end, confirm); + } + + // The start of the search may not be aligned to `*const usize`, + // so we do an unaligned load here. + let chunk = start.cast::().read_unaligned(); + if self.has_needle(chunk) { + return generic::fwd_byte_by_byte(start, end, confirm); + } + + // And now we start our search at a guaranteed aligned position. + // The first iteration of the loop below will overlap with the the + // unaligned chunk above in cases where the search starts at an + // unaligned offset, but that's okay as we're only here if that + // above didn't find a match. + let mut cur = + start.add(USIZE_BYTES - (start.as_usize() & USIZE_ALIGN)); + debug_assert!(cur > start); + debug_assert!(end.sub(USIZE_BYTES) >= start); + while cur <= end.sub(USIZE_BYTES) { + debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + + let chunk = cur.cast::().read(); + if self.has_needle(chunk) { + break; + } + cur = cur.add(USIZE_BYTES); + } + generic::fwd_byte_by_byte(cur, end, confirm) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let confirm = |b| self.confirm(b); + let len = end.distance(start); + if len < USIZE_BYTES { + return generic::rev_byte_by_byte(start, end, confirm); + } + + let chunk = end.sub(USIZE_BYTES).cast::().read_unaligned(); + if self.has_needle(chunk) { + return generic::rev_byte_by_byte(start, end, confirm); + } + + let mut cur = end.sub(end.as_usize() & USIZE_ALIGN); + debug_assert!(start <= cur && cur <= end); + while cur >= start.add(USIZE_BYTES) { + debug_assert_eq!(0, cur.as_usize() % USIZE_BYTES); + + let chunk = cur.sub(USIZE_BYTES).cast::().read(); + if self.has_needle(chunk) { + break; + } + cur = cur.sub(USIZE_BYTES); + } + generic::rev_byte_by_byte(start, cur, confirm) + } + + /// Returns an iterator over all occurrences of one of the needle bytes in + /// the given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { + ThreeIter { searcher: self, it: generic::Iter::new(haystack) } + } + + #[inline(always)] + fn has_needle(&self, chunk: usize) -> bool { + has_zero_byte(self.v1 ^ chunk) + || has_zero_byte(self.v2 ^ chunk) + || has_zero_byte(self.v3 ^ chunk) + } + + #[inline(always)] + fn confirm(&self, haystack_byte: u8) -> bool { + self.s1 == haystack_byte + || self.s2 == haystack_byte + || self.s3 == haystack_byte + } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { + /// The underlying memchr searcher. + searcher: &'a Three, + /// Generic iterator implementation. + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +/// Return `true` if `x` contains any zero byte. +/// +/// That is, this routine treats `x` as a register of 8-bit lanes and returns +/// true when any of those lanes is `0`. +/// +/// From "Matters Computational" by J. Arndt. +#[inline(always)] +fn has_zero_byte(x: usize) -> bool { + // "The idea is to subtract one from each of the bytes and then look for + // bytes where the borrow propagated all the way to the most significant + // bit." + const LO: usize = splat(0x01); + const HI: usize = splat(0x80); + + (x.wrapping_sub(LO) & !x & HI) != 0 +} + +/// Repeat the given byte into a word size number. That is, every 8 bits +/// is equivalent to the given byte. For example, if `b` is `\x4E` or +/// `01001110` in binary, then the returned value on a 32-bit system would be: +/// `01001110_01001110_01001110_01001110`. +#[inline(always)] +const fn splat(b: u8) -> usize { + // TODO: use `usize::from` once it can be used in const context. + (b as usize) * (usize::MAX / 255) +} + +#[cfg(test)] +mod tests { + use super::*; + + define_memchr_quickcheck!(super, try_new); + + #[test] + fn forward_one() { + crate::tests::memchr::Runner::new(1).forward_iter( + |haystack, needles| { + Some(One::new(needles[0]).iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_one() { + crate::tests::memchr::Runner::new(1).reverse_iter( + |haystack, needles| { + Some(One::new(needles[0]).iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn count_one() { + crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { + Some(One::new(needles[0]).iter(haystack).count()) + }) + } + + #[test] + fn forward_two() { + crate::tests::memchr::Runner::new(2).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2).iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_two() { + crate::tests::memchr::Runner::new(2).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2).iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn forward_three() { + crate::tests::memchr::Runner::new(3).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3).iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_three() { + crate::tests::memchr::Runner::new(3).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3).iter(haystack).rev().collect()) + }, + ) + } + + // This was found by quickcheck in the course of refactoring this crate + // after memchr 2.5.0. + #[test] + fn regression_double_ended_iterator() { + let finder = One::new(b'a'); + let haystack = "a"; + let mut it = finder.iter(haystack.as_bytes()); + assert_eq!(Some(0), it.next()); + assert_eq!(None, it.next_back()); + } + + // This regression test was caught by ripgrep's test suite on i686 when + // upgrading to memchr 2.6. Namely, something about the \x0B bytes here + // screws with the SWAR counting approach I was using. This regression test + // prompted me to remove the SWAR counting approach and just replace it + // with a byte-at-a-time loop. + #[test] + fn regression_count_new_lines() { + let haystack = "01234567\x0b\n\x0b\n\x0b\n\x0b\nx"; + let count = One::new(b'\n').count(haystack.as_bytes()); + assert_eq!(4, count); + } +} diff --git a/vendor/memchr/src/arch/all/mod.rs b/vendor/memchr/src/arch/all/mod.rs new file mode 100644 index 0000000..559cb75 --- /dev/null +++ b/vendor/memchr/src/arch/all/mod.rs @@ -0,0 +1,234 @@ +/*! +Contains architecture independent routines. + +These routines are often used as a "fallback" implementation when the more +specialized architecture dependent routines are unavailable. +*/ + +pub mod memchr; +pub mod packedpair; +pub mod rabinkarp; +#[cfg(feature = "alloc")] +pub mod shiftor; +pub mod twoway; + +/// Returns true if and only if `needle` is a prefix of `haystack`. +/// +/// This uses a latency optimized variant of `memcmp` internally which *might* +/// make this faster for very short strings. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +#[inline(always)] +pub fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { + needle.len() <= haystack.len() + && is_equal(&haystack[..needle.len()], needle) +} + +/// Returns true if and only if `needle` is a suffix of `haystack`. +/// +/// This uses a latency optimized variant of `memcmp` internally which *might* +/// make this faster for very short strings. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +#[inline(always)] +pub fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { + needle.len() <= haystack.len() + && is_equal(&haystack[haystack.len() - needle.len()..], needle) +} + +/// Compare corresponding bytes in `x` and `y` for equality. +/// +/// That is, this returns true if and only if `x.len() == y.len()` and +/// `x[i] == y[i]` for all `0 <= i < x.len()`. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +/// +/// # Motivation +/// +/// Why not use slice equality instead? Well, slice equality usually results in +/// a call out to the current platform's `libc` which might not be inlineable +/// or have other overhead. This routine isn't guaranteed to be a win, but it +/// might be in some cases. +#[inline(always)] +pub fn is_equal(x: &[u8], y: &[u8]) -> bool { + if x.len() != y.len() { + return false; + } + // SAFETY: Our pointers are derived directly from borrowed slices which + // uphold all of our safety guarantees except for length. We account for + // length with the check above. + unsafe { is_equal_raw(x.as_ptr(), y.as_ptr(), x.len()) } +} + +/// Compare `n` bytes at the given pointers for equality. +/// +/// This returns true if and only if `*x.add(i) == *y.add(i)` for all +/// `0 <= i < n`. +/// +/// # Inlining +/// +/// This routine is marked `inline(always)`. If you want to call this function +/// in a way that is not always inlined, you'll need to wrap a call to it in +/// another function that is marked as `inline(never)` or just `inline`. +/// +/// # Motivation +/// +/// Why not use slice equality instead? Well, slice equality usually results in +/// a call out to the current platform's `libc` which might not be inlineable +/// or have other overhead. This routine isn't guaranteed to be a win, but it +/// might be in some cases. +/// +/// # Safety +/// +/// * Both `x` and `y` must be valid for reads of up to `n` bytes. +/// * Both `x` and `y` must point to an initialized value. +/// * Both `x` and `y` must each point to an allocated object and +/// must either be in bounds or at most one byte past the end of the +/// allocated object. `x` and `y` do not need to point to the same allocated +/// object, but they may. +/// * Both `x` and `y` must be _derived from_ a pointer to their respective +/// allocated objects. +/// * The distance between `x` and `x+n` must not overflow `isize`. Similarly +/// for `y` and `y+n`. +/// * The distance being in bounds must not rely on "wrapping around" the +/// address space. +#[inline(always)] +pub unsafe fn is_equal_raw( + mut x: *const u8, + mut y: *const u8, + mut n: usize, +) -> bool { + // When we have 4 or more bytes to compare, then proceed in chunks of 4 at + // a time using unaligned loads. + // + // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is + // that this particular version of memcmp is likely to be called with tiny + // needles. That means that if we do 8 byte loads, then a higher proportion + // of memcmp calls will use the slower variant above. With that said, this + // is a hypothesis and is only loosely supported by benchmarks. There's + // likely some improvement that could be made here. The main thing here + // though is to optimize for latency, not throughput. + + // SAFETY: The caller is responsible for ensuring the pointers we get are + // valid and readable for at least `n` bytes. We also do unaligned loads, + // so there's no need to ensure we're aligned. (This is justified by this + // routine being specifically for short strings.) + while n >= 4 { + let vx = x.cast::().read_unaligned(); + let vy = y.cast::().read_unaligned(); + if vx != vy { + return false; + } + x = x.add(4); + y = y.add(4); + n -= 4; + } + // If we don't have enough bytes to do 4-byte at a time loads, then + // do partial loads. Note that I used to have a byte-at-a-time + // loop here and that turned out to be quite a bit slower for the + // memmem/pathological/defeat-simple-vector-alphabet benchmark. + if n >= 2 { + let vx = x.cast::().read_unaligned(); + let vy = y.cast::().read_unaligned(); + if vx != vy { + return false; + } + x = x.add(2); + y = y.add(2); + n -= 2; + } + if n > 0 { + if x.read() != y.read() { + return false; + } + } + true +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn equals_different_lengths() { + assert!(!is_equal(b"", b"a")); + assert!(!is_equal(b"a", b"")); + assert!(!is_equal(b"ab", b"a")); + assert!(!is_equal(b"a", b"ab")); + } + + #[test] + fn equals_mismatch() { + let one_mismatch = [ + (&b"a"[..], &b"x"[..]), + (&b"ab"[..], &b"ax"[..]), + (&b"abc"[..], &b"abx"[..]), + (&b"abcd"[..], &b"abcx"[..]), + (&b"abcde"[..], &b"abcdx"[..]), + (&b"abcdef"[..], &b"abcdex"[..]), + (&b"abcdefg"[..], &b"abcdefx"[..]), + (&b"abcdefgh"[..], &b"abcdefgx"[..]), + (&b"abcdefghi"[..], &b"abcdefghx"[..]), + (&b"abcdefghij"[..], &b"abcdefghix"[..]), + (&b"abcdefghijk"[..], &b"abcdefghijx"[..]), + (&b"abcdefghijkl"[..], &b"abcdefghijkx"[..]), + (&b"abcdefghijklm"[..], &b"abcdefghijklx"[..]), + (&b"abcdefghijklmn"[..], &b"abcdefghijklmx"[..]), + ]; + for (x, y) in one_mismatch { + assert_eq!(x.len(), y.len(), "lengths should match"); + assert!(!is_equal(x, y)); + assert!(!is_equal(y, x)); + } + } + + #[test] + fn equals_yes() { + assert!(is_equal(b"", b"")); + assert!(is_equal(b"a", b"a")); + assert!(is_equal(b"ab", b"ab")); + assert!(is_equal(b"abc", b"abc")); + assert!(is_equal(b"abcd", b"abcd")); + assert!(is_equal(b"abcde", b"abcde")); + assert!(is_equal(b"abcdef", b"abcdef")); + assert!(is_equal(b"abcdefg", b"abcdefg")); + assert!(is_equal(b"abcdefgh", b"abcdefgh")); + assert!(is_equal(b"abcdefghi", b"abcdefghi")); + } + + #[test] + fn prefix() { + assert!(is_prefix(b"", b"")); + assert!(is_prefix(b"a", b"")); + assert!(is_prefix(b"ab", b"")); + assert!(is_prefix(b"foo", b"foo")); + assert!(is_prefix(b"foobar", b"foo")); + + assert!(!is_prefix(b"foo", b"fob")); + assert!(!is_prefix(b"foobar", b"fob")); + } + + #[test] + fn suffix() { + assert!(is_suffix(b"", b"")); + assert!(is_suffix(b"a", b"")); + assert!(is_suffix(b"ab", b"")); + assert!(is_suffix(b"foo", b"foo")); + assert!(is_suffix(b"foobar", b"bar")); + + assert!(!is_suffix(b"foo", b"goo")); + assert!(!is_suffix(b"foobar", b"gar")); + } +} diff --git a/vendor/memchr/src/memmem/byte_frequencies.rs b/vendor/memchr/src/arch/all/packedpair/default_rank.rs similarity index 99% rename from vendor/memchr/src/memmem/byte_frequencies.rs rename to vendor/memchr/src/arch/all/packedpair/default_rank.rs index c313b62..6aa3895 100644 --- a/vendor/memchr/src/memmem/byte_frequencies.rs +++ b/vendor/memchr/src/arch/all/packedpair/default_rank.rs @@ -1,4 +1,4 @@ -pub const BYTE_FREQUENCIES: [u8; 256] = [ +pub(crate) const RANK: [u8; 256] = [ 55, // '\x00' 52, // '\x01' 51, // '\x02' diff --git a/vendor/memchr/src/arch/all/packedpair/mod.rs b/vendor/memchr/src/arch/all/packedpair/mod.rs new file mode 100644 index 0000000..148a985 --- /dev/null +++ b/vendor/memchr/src/arch/all/packedpair/mod.rs @@ -0,0 +1,359 @@ +/*! +Provides an architecture independent implementation of the "packed pair" +algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. Note that +this module provides an architecture independent version that doesn't do as +good of a job keeping the search for candidates inside a SIMD hot path. It +however can be good enough in many circumstances. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use crate::memchr; + +mod default_rank; + +/// An architecture independent "packed pair" finder. +/// +/// This finder picks two bytes that it believes have high predictive power for +/// indicating an overall match of a needle. At search time, it reports offsets +/// where the needle could match based on whether the pair of bytes it chose +/// match. +/// +/// This is architecture independent because it utilizes `memchr` to find the +/// occurrence of one of the bytes in the pair, and then checks whether the +/// second byte matches. If it does, in the case of [`Finder::find_prefilter`], +/// the location at which the needle could match is returned. +/// +/// It is generally preferred to use architecture specific routines for a +/// "packed pair" prefilter, but this can be a useful fallback when the +/// architecture independent routines are unavailable. +#[derive(Clone, Copy, Debug)] +pub struct Finder { + pair: Pair, + byte1: u8, + byte2: u8, +} + +impl Finder { + /// Create a new prefilter that reports possible locations where the given + /// needle matches. + #[inline] + pub fn new(needle: &[u8]) -> Option { + Finder::with_pair(needle, Pair::new(needle)?) + } + + /// Create a new prefilter using the pair given. + /// + /// If the prefilter could not be constructed, then `None` is returned. + /// + /// This constructor permits callers to control precisely which pair of + /// bytes is used as a predicate. + #[inline] + pub fn with_pair(needle: &[u8], pair: Pair) -> Option { + let byte1 = needle[usize::from(pair.index1())]; + let byte2 = needle[usize::from(pair.index2())]; + // Currently this can never fail so we could just return a Finder, + // but it's conceivable this could change. + Some(Finder { pair, byte1, byte2 }) + } + + /// Run this finder on the given haystack as a prefilter. + /// + /// If a candidate match is found, then an offset where the needle *could* + /// begin in the haystack is returned. + #[inline] + pub fn find_prefilter(&self, haystack: &[u8]) -> Option { + let mut i = 0; + let index1 = usize::from(self.pair.index1()); + let index2 = usize::from(self.pair.index2()); + loop { + // Use a fast vectorized implementation to skip to the next + // occurrence of the rarest byte (heuristically chosen) in the + // needle. + i += memchr(self.byte1, &haystack[i..])?; + let found = i; + i += 1; + + // If we can't align our first byte match with the haystack, then a + // match is impossible. + let aligned1 = match found.checked_sub(index1) { + None => continue, + Some(aligned1) => aligned1, + }; + + // Now align the second byte match with the haystack. A mismatch + // means that a match is impossible. + let aligned2 = match aligned1.checked_add(index2) { + None => continue, + Some(aligned_index2) => aligned_index2, + }; + if haystack.get(aligned2).map_or(true, |&b| b != self.byte2) { + continue; + } + + // We've done what we can. There might be a match here. + return Some(aligned1); + } + } + + /// Returns the pair of offsets (into the needle) used to check as a + /// predicate before confirming whether a needle exists at a particular + /// position. + #[inline] + pub fn pair(&self) -> &Pair { + &self.pair + } +} + +/// A pair of byte offsets into a needle to use as a predicate. +/// +/// This pair is used as a predicate to quickly filter out positions in a +/// haystack in which a needle cannot match. In some cases, this pair can even +/// be used in vector algorithms such that the vector algorithm only switches +/// over to scalar code once this pair has been found. +/// +/// A pair of offsets can be used in both substring search implementations and +/// in prefilters. The former will report matches of a needle in a haystack +/// where as the latter will only report possible matches of a needle. +/// +/// The offsets are limited each to a maximum of 255 to keep memory usage low. +/// Moreover, it's rarely advantageous to create a predicate using offsets +/// greater than 255 anyway. +/// +/// The only guarantee enforced on the pair of offsets is that they are not +/// equivalent. It is not necessarily the case that `index1 < index2` for +/// example. By convention, `index1` corresponds to the byte in the needle +/// that is believed to be most the predictive. Note also that because of the +/// requirement that the indices be both valid for the needle used to build +/// the pair and not equal, it follows that a pair can only be constructed for +/// needles with length at least 2. +#[derive(Clone, Copy, Debug)] +pub struct Pair { + index1: u8, + index2: u8, +} + +impl Pair { + /// Create a new pair of offsets from the given needle. + /// + /// If a pair could not be created (for example, if the needle is too + /// short), then `None` is returned. + /// + /// This chooses the pair in the needle that is believed to be as + /// predictive of an overall match of the needle as possible. + #[inline] + pub fn new(needle: &[u8]) -> Option { + Pair::with_ranker(needle, DefaultFrequencyRank) + } + + /// Create a new pair of offsets from the given needle and ranker. + /// + /// This permits the caller to choose a background frequency distribution + /// with which bytes are selected. The idea is to select a pair of bytes + /// that is believed to strongly predict a match in the haystack. This + /// usually means selecting bytes that occur rarely in a haystack. + /// + /// If a pair could not be created (for example, if the needle is too + /// short), then `None` is returned. + #[inline] + pub fn with_ranker( + needle: &[u8], + ranker: R, + ) -> Option { + if needle.len() <= 1 { + return None; + } + // Find the rarest two bytes. We make them distinct indices by + // construction. (The actual byte value may be the same in degenerate + // cases, but that's OK.) + let (mut rare1, mut index1) = (needle[0], 0); + let (mut rare2, mut index2) = (needle[1], 1); + if ranker.rank(rare2) < ranker.rank(rare1) { + core::mem::swap(&mut rare1, &mut rare2); + core::mem::swap(&mut index1, &mut index2); + } + let max = usize::from(core::u8::MAX); + for (i, &b) in needle.iter().enumerate().take(max).skip(2) { + if ranker.rank(b) < ranker.rank(rare1) { + rare2 = rare1; + index2 = index1; + rare1 = b; + index1 = u8::try_from(i).unwrap(); + } else if b != rare1 && ranker.rank(b) < ranker.rank(rare2) { + rare2 = b; + index2 = u8::try_from(i).unwrap(); + } + } + // While not strictly required for how a Pair is normally used, we + // really don't want these to be equivalent. If they were, it would + // reduce the effectiveness of candidate searching using these rare + // bytes by increasing the rate of false positives. + assert_ne!(index1, index2); + Some(Pair { index1, index2 }) + } + + /// Create a new pair using the offsets given for the needle given. + /// + /// This bypasses any sort of heuristic process for choosing the offsets + /// and permits the caller to choose the offsets themselves. + /// + /// Indices are limited to valid `u8` values so that a `Pair` uses less + /// memory. It is not possible to create a `Pair` with offsets bigger than + /// `u8::MAX`. It's likely that such a thing is not needed, but if it is, + /// it's suggested to build your own bespoke algorithm because you're + /// likely working on a very niche case. (File an issue if this suggestion + /// does not make sense to you.) + /// + /// If a pair could not be created (for example, if the needle is too + /// short), then `None` is returned. + #[inline] + pub fn with_indices( + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option { + // While not strictly required for how a Pair is normally used, we + // really don't want these to be equivalent. If they were, it would + // reduce the effectiveness of candidate searching using these rare + // bytes by increasing the rate of false positives. + if index1 == index2 { + return None; + } + // Similarly, invalid indices means the Pair is invalid too. + if usize::from(index1) >= needle.len() { + return None; + } + if usize::from(index2) >= needle.len() { + return None; + } + Some(Pair { index1, index2 }) + } + + /// Returns the first offset of the pair. + #[inline] + pub fn index1(&self) -> u8 { + self.index1 + } + + /// Returns the second offset of the pair. + #[inline] + pub fn index2(&self) -> u8 { + self.index2 + } +} + +/// This trait allows the user to customize the heuristic used to determine the +/// relative frequency of a given byte in the dataset being searched. +/// +/// The use of this trait can have a dramatic impact on performance depending +/// on the type of data being searched. The details of why are explained in the +/// docs of [`crate::memmem::Prefilter`]. To summarize, the core algorithm uses +/// a prefilter to quickly identify candidate matches that are later verified +/// more slowly. This prefilter is implemented in terms of trying to find +/// `rare` bytes at specific offsets that will occur less frequently in the +/// dataset. While the concept of a `rare` byte is similar for most datasets, +/// there are some specific datasets (like binary executables) that have +/// dramatically different byte distributions. For these datasets customizing +/// the byte frequency heuristic can have a massive impact on performance, and +/// might even need to be done at runtime. +/// +/// The default implementation of `HeuristicFrequencyRank` reads from the +/// static frequency table defined in `src/memmem/byte_frequencies.rs`. This +/// is optimal for most inputs, so if you are unsure of the impact of using a +/// custom `HeuristicFrequencyRank` you should probably just use the default. +/// +/// # Example +/// +/// ``` +/// use memchr::{ +/// arch::all::packedpair::HeuristicFrequencyRank, +/// memmem::FinderBuilder, +/// }; +/// +/// /// A byte-frequency table that is good for scanning binary executables. +/// struct Binary; +/// +/// impl HeuristicFrequencyRank for Binary { +/// fn rank(&self, byte: u8) -> u8 { +/// const TABLE: [u8; 256] = [ +/// 255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17, +/// 89, 58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16, +/// 68, 11, 9, 6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11, +/// 9, 24, 11, 5, 5, 5, 19, 11, 6, 17, 9, 9, 6, 8, +/// 48, 58, 11, 14, 53, 40, 9, 9, 254, 35, 3, 6, 52, 23, 6, 6, 27, +/// 4, 7, 11, 14, 13, 10, 11, 11, 5, 2, 10, 16, 12, 6, 19, +/// 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19, 8, 18, 20, 24, +/// 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5, +/// 51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15, +/// 0, 0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, +/// 12, 2, 1, 1, 3, 1, 1, 1, 6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0, +/// 2, 2, 4, 4, 11, 6, 7, 3, 6, 9, 4, 5, +/// 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13, +/// 3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2, +/// 16, 3, 5, 1, 6, 1, 1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5, +/// 8, 3, 1, 8, 6, 17, 6, 5, 3, 8, 6, 13, 175, +/// ]; +/// TABLE[byte as usize] +/// } +/// } +/// // Create a new finder with the custom heuristic. +/// let finder = FinderBuilder::new() +/// .build_forward_with_ranker(Binary, b"\x00\x00\xdd\xdd"); +/// // Find needle with custom heuristic. +/// assert!(finder.find(b"\x00\x00\x00\xdd\xdd").is_some()); +/// ``` +pub trait HeuristicFrequencyRank { + /// Return the heuristic frequency rank of the given byte. A lower rank + /// means the byte is believed to occur less frequently in the haystack. + /// + /// Some uses of this heuristic may treat arbitrary absolute rank values as + /// significant. For example, an implementation detail in this crate may + /// determine that heuristic prefilters are inappropriate if every byte in + /// the needle has a "high" rank. + fn rank(&self, byte: u8) -> u8; +} + +/// The default byte frequency heuristic that is good for most haystacks. +pub(crate) struct DefaultFrequencyRank; + +impl HeuristicFrequencyRank for DefaultFrequencyRank { + fn rank(&self, byte: u8) -> u8 { + self::default_rank::RANK[usize::from(byte)] + } +} + +/// This permits passing any implementation of `HeuristicFrequencyRank` as a +/// borrowed version of itself. +impl<'a, R> HeuristicFrequencyRank for &'a R +where + R: HeuristicFrequencyRank, +{ + fn rank(&self, byte: u8) -> u8 { + (**self).rank(byte) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn forward_packedpair() { + fn find( + haystack: &[u8], + needle: &[u8], + _index1: u8, + _index2: u8, + ) -> Option> { + // We ignore the index positions requested since it winds up making + // this test too slow overall. + let f = Finder::new(needle)?; + Some(f.find_prefilter(haystack)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } +} diff --git a/vendor/memchr/src/arch/all/rabinkarp.rs b/vendor/memchr/src/arch/all/rabinkarp.rs new file mode 100644 index 0000000..e0bafba --- /dev/null +++ b/vendor/memchr/src/arch/all/rabinkarp.rs @@ -0,0 +1,390 @@ +/*! +An implementation of the [Rabin-Karp substring search algorithm][rabinkarp]. + +Rabin-Karp works by creating a hash of the needle provided and then computing +a rolling hash for each needle sized window in the haystack. When the rolling +hash matches the hash of the needle, a byte-wise comparison is done to check +if a match exists. The worst case time complexity of Rabin-Karp is `O(m * +n)` where `m ~ len(needle)` and `n ~ len(haystack)`. Its worst case space +complexity is constant. + +The main utility of Rabin-Karp is that the searcher can be constructed very +quickly with very little memory. This makes it especially useful when searching +for small needles in small haystacks, as it might finish its search before a +beefier algorithm (like Two-Way) even starts. + +[rabinkarp]: https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm +*/ + +/* +(This was the comment I wrote for this module originally when it was not +exposed. The comment still looks useful, but it's a bit in the weeds, so it's +not public itself.) + +This module implements the classical Rabin-Karp substring search algorithm, +with no extra frills. While its use would seem to break our time complexity +guarantee of O(m+n) (RK's time complexity is O(mn)), we are careful to only +ever use RK on a constant subset of haystacks. The main point here is that +RK has good latency properties for small needles/haystacks. It's very quick +to compute a needle hash and zip through the haystack when compared to +initializing Two-Way, for example. And this is especially useful for cases +where the haystack is just too short for vector instructions to do much good. + +The hashing function used here is the same one recommended by ESMAJ. + +Another choice instead of Rabin-Karp would be Shift-Or. But its latency +isn't quite as good since its preprocessing time is a bit more expensive +(both in practice and in theory). However, perhaps Shift-Or has a place +somewhere else for short patterns. I think the main problem is that it +requires space proportional to the alphabet and the needle. If we, for +example, supported needles up to length 16, then the total table size would be +len(alphabet)*size_of::()==512 bytes. Which isn't exactly small, and it's +probably bad to put that on the stack. So ideally, we'd throw it on the heap, +but we'd really like to write as much code without using alloc/std as possible. +But maybe it's worth the special casing. It's a TODO to benchmark. + +Wikipedia has a decent explanation, if a bit heavy on the theory: +https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm + +But ESMAJ provides something a bit more concrete: +http://www-igm.univ-mlv.fr/~lecroq/string/node5.html + +Finally, aho-corasick uses Rabin-Karp for multiple pattern match in some cases: +https://github.com/BurntSushi/aho-corasick/blob/3852632f10587db0ff72ef29e88d58bf305a0946/src/packed/rabinkarp.rs +*/ + +use crate::ext::Pointer; + +/// A forward substring searcher using the Rabin-Karp algorithm. +/// +/// Note that, as a lower level API, a `Finder` does not have access to the +/// needle it was constructed with. For this reason, executing a search +/// with a `Finder` requires passing both the needle and the haystack, +/// where the needle is exactly equivalent to the one given to the `Finder` +/// at construction time. This design was chosen so that callers can have +/// more precise control over where and how many times a needle is stored. +/// For example, in cases where Rabin-Karp is just one of several possible +/// substring search algorithms. +#[derive(Clone, Debug)] +pub struct Finder { + /// The actual hash. + hash: Hash, + /// The factor needed to multiply a byte by in order to subtract it from + /// the hash. It is defined to be 2^(n-1) (using wrapping exponentiation), + /// where n is the length of the needle. This is how we "remove" a byte + /// from the hash once the hash window rolls past it. + hash_2pow: u32, +} + +impl Finder { + /// Create a new Rabin-Karp forward searcher for the given `needle`. + /// + /// The needle may be empty. The empty needle matches at every byte offset. + /// + /// Note that callers must pass the same needle to all search calls using + /// this `Finder`. + #[inline] + pub fn new(needle: &[u8]) -> Finder { + let mut s = Finder { hash: Hash::new(), hash_2pow: 1 }; + let first_byte = match needle.get(0) { + None => return s, + Some(&first_byte) => first_byte, + }; + s.hash.add(first_byte); + for b in needle.iter().copied().skip(1) { + s.hash.add(b); + s.hash_2pow = s.hash_2pow.wrapping_shl(1); + } + s + } + + /// Return the first occurrence of the `needle` in the `haystack` + /// given. If no such occurrence exists, then `None` is returned. + /// + /// The `needle` provided must match the needle given to this finder at + /// construction time. + /// + /// The maximum value this can return is `haystack.len()`, which can only + /// occur when the needle and haystack both have length zero. Otherwise, + /// for non-empty haystacks, the maximum value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option { + unsafe { + let hstart = haystack.as_ptr(); + let hend = hstart.add(haystack.len()); + let nstart = needle.as_ptr(); + let nend = nstart.add(needle.len()); + let found = self.find_raw(hstart, hend, nstart, nend)?; + Some(found.distance(hstart)) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `<= end`. The pointer returned is only ever equivalent + /// to `end` when both the needle and haystack are empty. (That is, the + /// empty string matches the empty string.) + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// Note that `start` and `end` below refer to both pairs of pointers given + /// to this routine. That is, the conditions apply to both `hstart`/`hend` + /// and `nstart`/`nend`. + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// * It must be the case that `start <= end`. + #[inline] + pub unsafe fn find_raw( + &self, + hstart: *const u8, + hend: *const u8, + nstart: *const u8, + nend: *const u8, + ) -> Option<*const u8> { + let hlen = hend.distance(hstart); + let nlen = nend.distance(nstart); + if nlen > hlen { + return None; + } + let mut cur = hstart; + let end = hend.sub(nlen); + let mut hash = Hash::forward(cur, cur.add(nlen)); + loop { + if self.hash == hash && is_equal_raw(cur, nstart, nlen) { + return Some(cur); + } + if cur >= end { + return None; + } + hash.roll(self, cur.read(), cur.add(nlen).read()); + cur = cur.add(1); + } + } +} + +/// A reverse substring searcher using the Rabin-Karp algorithm. +#[derive(Clone, Debug)] +pub struct FinderRev(Finder); + +impl FinderRev { + /// Create a new Rabin-Karp reverse searcher for the given `needle`. + #[inline] + pub fn new(needle: &[u8]) -> FinderRev { + let mut s = FinderRev(Finder { hash: Hash::new(), hash_2pow: 1 }); + let last_byte = match needle.last() { + None => return s, + Some(&last_byte) => last_byte, + }; + s.0.hash.add(last_byte); + for b in needle.iter().rev().copied().skip(1) { + s.0.hash.add(b); + s.0.hash_2pow = s.0.hash_2pow.wrapping_shl(1); + } + s + } + + /// Return the last occurrence of the `needle` in the `haystack` + /// given. If no such occurrence exists, then `None` is returned. + /// + /// The `needle` provided must match the needle given to this finder at + /// construction time. + /// + /// The maximum value this can return is `haystack.len()`, which can only + /// occur when the needle and haystack both have length zero. Otherwise, + /// for non-empty haystacks, the maximum value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8], needle: &[u8]) -> Option { + unsafe { + let hstart = haystack.as_ptr(); + let hend = hstart.add(haystack.len()); + let nstart = needle.as_ptr(); + let nend = nstart.add(needle.len()); + let found = self.rfind_raw(hstart, hend, nstart, nend)?; + Some(found.distance(hstart)) + } + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `<= end`. The pointer returned is only ever equivalent + /// to `end` when both the needle and haystack are empty. (That is, the + /// empty string matches the empty string.) + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// Note that `start` and `end` below refer to both pairs of pointers given + /// to this routine. That is, the conditions apply to both `hstart`/`hend` + /// and `nstart`/`nend`. + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// * It must be the case that `start <= end`. + #[inline] + pub unsafe fn rfind_raw( + &self, + hstart: *const u8, + hend: *const u8, + nstart: *const u8, + nend: *const u8, + ) -> Option<*const u8> { + let hlen = hend.distance(hstart); + let nlen = nend.distance(nstart); + if nlen > hlen { + return None; + } + let mut cur = hend.sub(nlen); + let start = hstart; + let mut hash = Hash::reverse(cur, cur.add(nlen)); + loop { + if self.0.hash == hash && is_equal_raw(cur, nstart, nlen) { + return Some(cur); + } + if cur <= start { + return None; + } + cur = cur.sub(1); + hash.roll(&self.0, cur.add(nlen).read(), cur.read()); + } + } +} + +/// Whether RK is believed to be very fast for the given needle/haystack. +#[inline] +pub(crate) fn is_fast(haystack: &[u8], _needle: &[u8]) -> bool { + haystack.len() < 16 +} + +/// A Rabin-Karp hash. This might represent the hash of a needle, or the hash +/// of a rolling window in the haystack. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +struct Hash(u32); + +impl Hash { + /// Create a new hash that represents the empty string. + #[inline(always)] + fn new() -> Hash { + Hash(0) + } + + /// Create a new hash from the bytes given for use in forward searches. + /// + /// # Safety + /// + /// The given pointers must be valid to read from within their range. + #[inline(always)] + unsafe fn forward(mut start: *const u8, end: *const u8) -> Hash { + let mut hash = Hash::new(); + while start < end { + hash.add(start.read()); + start = start.add(1); + } + hash + } + + /// Create a new hash from the bytes given for use in reverse searches. + /// + /// # Safety + /// + /// The given pointers must be valid to read from within their range. + #[inline(always)] + unsafe fn reverse(start: *const u8, mut end: *const u8) -> Hash { + let mut hash = Hash::new(); + while start < end { + end = end.sub(1); + hash.add(end.read()); + } + hash + } + + /// Add 'new' and remove 'old' from this hash. The given needle hash should + /// correspond to the hash computed for the needle being searched for. + /// + /// This is meant to be used when the rolling window of the haystack is + /// advanced. + #[inline(always)] + fn roll(&mut self, finder: &Finder, old: u8, new: u8) { + self.del(finder, old); + self.add(new); + } + + /// Add a byte to this hash. + #[inline(always)] + fn add(&mut self, byte: u8) { + self.0 = self.0.wrapping_shl(1).wrapping_add(u32::from(byte)); + } + + /// Remove a byte from this hash. The given needle hash should correspond + /// to the hash computed for the needle being searched for. + #[inline(always)] + fn del(&mut self, finder: &Finder, byte: u8) { + let factor = finder.hash_2pow; + self.0 = self.0.wrapping_sub(u32::from(byte).wrapping_mul(factor)); + } +} + +/// Returns true when `x[i] == y[i]` for all `0 <= i < n`. +/// +/// We forcefully don't inline this to hint at the compiler that it is unlikely +/// to be called. This causes the inner rabinkarp loop above to be a bit +/// tighter and leads to some performance improvement. See the +/// memmem/krate/prebuilt/sliceslice-words/words benchmark. +/// +/// # Safety +/// +/// Same as `crate::arch::all::is_equal_raw`. +#[cold] +#[inline(never)] +unsafe fn is_equal_raw(x: *const u8, y: *const u8, n: usize) -> bool { + crate::arch::all::is_equal_raw(x, y, n) +} + +#[cfg(test)] +mod tests { + use super::*; + + define_substring_forward_quickcheck!(|h, n| Some( + Finder::new(n).find(h, n) + )); + define_substring_reverse_quickcheck!(|h, n| Some( + FinderRev::new(n).rfind(h, n) + )); + + #[test] + fn forward() { + crate::tests::substring::Runner::new() + .fwd(|h, n| Some(Finder::new(n).find(h, n))) + .run(); + } + + #[test] + fn reverse() { + crate::tests::substring::Runner::new() + .rev(|h, n| Some(FinderRev::new(n).rfind(h, n))) + .run(); + } +} diff --git a/vendor/memchr/src/arch/all/shiftor.rs b/vendor/memchr/src/arch/all/shiftor.rs new file mode 100644 index 0000000..b690564 --- /dev/null +++ b/vendor/memchr/src/arch/all/shiftor.rs @@ -0,0 +1,89 @@ +/*! +An implementation of the [Shift-Or substring search algorithm][shiftor]. + +[shiftor]: https://en.wikipedia.org/wiki/Bitap_algorithm +*/ + +use alloc::boxed::Box; + +/// The type of our mask. +/// +/// While we don't expose anyway to configure this in the public API, if one +/// really needs less memory usage or support for longer needles, then it is +/// suggested to copy the code from this module and modify it to fit your +/// needs. The code below is written to be correct regardless of whether Mask +/// is a u8, u16, u32, u64 or u128. +type Mask = u16; + +/// A forward substring searcher using the Shift-Or algorithm. +#[derive(Debug)] +pub struct Finder { + masks: Box<[Mask; 256]>, + needle_len: usize, +} + +impl Finder { + const MAX_NEEDLE_LEN: usize = (Mask::BITS - 1) as usize; + + /// Create a new Shift-Or forward searcher for the given `needle`. + /// + /// The needle may be empty. The empty needle matches at every byte offset. + #[inline] + pub fn new(needle: &[u8]) -> Option { + let needle_len = needle.len(); + if needle_len > Finder::MAX_NEEDLE_LEN { + // A match is found when bit 7 is set in 'result' in the search + // routine below. So our needle can't be bigger than 7. We could + // permit bigger needles by using u16, u32 or u64 for our mask + // entries. But this is all we need for this example. + return None; + } + let mut searcher = Finder { masks: Box::from([!0; 256]), needle_len }; + for (i, &byte) in needle.iter().enumerate() { + searcher.masks[usize::from(byte)] &= !(1 << i); + } + Some(searcher) + } + + /// Return the first occurrence of the needle given to `Finder::new` in + /// the `haystack` given. If no such occurrence exists, then `None` is + /// returned. + /// + /// Unlike most other substring search implementations in this crate, this + /// finder does not require passing the needle at search time. A match can + /// be determined without the needle at all since the required information + /// is already encoded into this finder at construction time. + /// + /// The maximum value this can return is `haystack.len()`, which can only + /// occur when the needle and haystack both have length zero. Otherwise, + /// for non-empty haystacks, the maximum value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + if self.needle_len == 0 { + return Some(0); + } + let mut result = !1; + for (i, &byte) in haystack.iter().enumerate() { + result |= self.masks[usize::from(byte)]; + result <<= 1; + if result & (1 << self.needle_len) == 0 { + return Some(i + 1 - self.needle_len); + } + } + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n)?.find(h))); + + #[test] + fn forward() { + crate::tests::substring::Runner::new() + .fwd(|h, n| Some(Finder::new(n)?.find(h))) + .run(); + } +} diff --git a/vendor/memchr/src/memmem/twoway.rs b/vendor/memchr/src/arch/all/twoway.rs similarity index 79% rename from vendor/memchr/src/memmem/twoway.rs rename to vendor/memchr/src/arch/all/twoway.rs index 7f82ed1..0df3b4a 100644 --- a/vendor/memchr/src/memmem/twoway.rs +++ b/vendor/memchr/src/arch/all/twoway.rs @@ -1,31 +1,62 @@ +/*! +An implementation of the [Two-Way substring search algorithm][two-way]. + +[`Finder`] can be built for forward searches, while [`FinderRev`] can be built +for reverse searches. + +Two-Way makes for a nice general purpose substring search algorithm because of +its time and space complexity properties. It also performs well in practice. +Namely, with `m = len(needle)` and `n = len(haystack)`, Two-Way takes `O(m)` +time to create a finder, `O(1)` space and `O(n)` search time. In other words, +the preprocessing step is quick, doesn't require any heap memory and the worst +case search time is guaranteed to be linear in the haystack regardless of the +size of the needle. + +While vector algorithms will usually beat Two-Way handedly, vector algorithms +also usually have pathological or edge cases that are better handled by Two-Way. +Moreover, not all targets support vector algorithms or implementations for them +simply may not exist yet. + +Two-Way can be found in the `memmem` implementations in at least [GNU libc] and +[musl]. + +[two-way]: https://en.wikipedia.org/wiki/Two-way_string-matching_algorithm +[GNU libc]: https://www.gnu.org/software/libc/ +[musl]: https://www.musl-libc.org/ +*/ + use core::cmp; -use crate::memmem::{prefilter::Pre, util}; +use crate::{ + arch::all::{is_prefix, is_suffix}, + memmem::Pre, +}; -/// Two-Way search in the forward direction. +/// A forward substring searcher that uses the Two-Way algorithm. #[derive(Clone, Copy, Debug)] -pub(crate) struct Forward(TwoWay); +pub struct Finder(TwoWay); -/// Two-Way search in the reverse direction. +/// A reverse substring searcher that uses the Two-Way algorithm. #[derive(Clone, Copy, Debug)] -pub(crate) struct Reverse(TwoWay); +pub struct FinderRev(TwoWay); -/// An implementation of the TwoWay substring search algorithm, with heuristics -/// for accelerating search based on frequency analysis. +/// An implementation of the TwoWay substring search algorithm. /// /// This searcher supports forward and reverse search, although not -/// simultaneously. It runs in O(n + m) time and O(1) space, where +/// simultaneously. It runs in `O(n + m)` time and `O(1)` space, where /// `n ~ len(needle)` and `m ~ len(haystack)`. /// /// The implementation here roughly matches that which was developed by /// Crochemore and Perrin in their 1991 paper "Two-way string-matching." The /// changes in this implementation are 1) the use of zero-based indices, 2) a /// heuristic skip table based on the last byte (borrowed from Rust's standard -/// library) and 3) the addition of heuristics for a fast skip loop. That is, -/// (3) this will detect bytes that are believed to be rare in the needle and -/// use fast vectorized instructions to find their occurrences quickly. The -/// Two-Way algorithm is then used to confirm whether a match at that location -/// occurred. +/// library) and 3) the addition of heuristics for a fast skip loop. For (3), +/// callers can pass any kind of prefilter they want, but usually it's one +/// based on a heuristic that uses an approximate background frequency of bytes +/// to choose rare bytes to quickly look for candidate match positions. Note +/// though that currently, this prefilter functionality is not exposed directly +/// in the public API. (File an issue if you want it and provide a use case +/// please.) /// /// The heuristic for fast skipping is automatically shut off if it's /// detected to be ineffective at search time. Generally, this only occurs in @@ -36,20 +67,20 @@ pub(crate) struct Reverse(TwoWay); /// likely necessary to read the Two-Way paper cited above in order to fully /// grok this code. The essence of it is: /// -/// 1) Do something to detect a "critical" position in the needle. -/// 2) For the current position in the haystack, look if needle[critical..] -/// matches at that position. -/// 3) If so, look if needle[..critical] matches. -/// 4) If a mismatch occurs, shift the search by some amount based on the -/// critical position and a pre-computed shift. +/// 1. Do something to detect a "critical" position in the needle. +/// 2. For the current position in the haystack, look if `needle[critical..]` +/// matches at that position. +/// 3. If so, look if `needle[..critical]` matches. +/// 4. If a mismatch occurs, shift the search by some amount based on the +/// critical position and a pre-computed shift. /// -/// This type is wrapped in Forward and Reverse types that expose consistent -/// forward or reverse APIs. +/// This type is wrapped in the forward and reverse finders that expose +/// consistent forward or reverse APIs. #[derive(Clone, Copy, Debug)] struct TwoWay { - /// A small bitset used as a quick prefilter (in addition to the faster - /// SIMD based prefilter). Namely, a bit 'i' is set if and only if b%64==i - /// for any b in the needle. + /// A small bitset used as a quick prefilter (in addition to any prefilter + /// given by the caller). Namely, a bit `i` is set if and only if `b%64==i` + /// for any `b == needle[i]`. /// /// When used as a prefilter, if the last byte at the current candidate /// position is NOT in this set, then we can skip that entire candidate @@ -74,14 +105,13 @@ struct TwoWay { shift: Shift, } -impl Forward { - /// Create a searcher that uses the Two-Way algorithm by searching forwards - /// through any haystack. - pub(crate) fn new(needle: &[u8]) -> Forward { - if needle.is_empty() { - return Forward(TwoWay::empty()); - } - +impl Finder { + /// Create a searcher that finds occurrences of the given `needle`. + /// + /// An empty `needle` results in a match at every position in a haystack, + /// including at `haystack.len()`. + #[inline] + pub fn new(needle: &[u8]) -> Finder { let byteset = ApproximateByteSet::new(needle); let min_suffix = Suffix::forward(needle, SuffixKind::Minimal); let max_suffix = Suffix::forward(needle, SuffixKind::Maximal); @@ -92,27 +122,38 @@ impl Forward { (max_suffix.period, max_suffix.pos) }; let shift = Shift::forward(needle, period_lower_bound, critical_pos); - Forward(TwoWay { byteset, critical_pos, shift }) + Finder(TwoWay { byteset, critical_pos, shift }) } - /// Find the position of the first occurrence of this searcher's needle in - /// the given haystack. If one does not exist, then return None. + /// Returns the first occurrence of `needle` in the given `haystack`, or + /// `None` if no such occurrence could be found. + /// + /// The `needle` given must be the same as the `needle` provided to + /// [`Finder::new`]. /// - /// This accepts prefilter state that is useful when using the same - /// searcher multiple times, such as in an iterator. + /// An empty `needle` results in a match at every position in a haystack, + /// including at `haystack.len()`. + #[inline] + pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option { + self.find_with_prefilter(None, haystack, needle) + } + + /// This is like [`Finder::find`], but it accepts a prefilter for + /// accelerating searches. /// - /// Callers must guarantee that the needle is non-empty and its length is - /// <= the haystack's length. + /// Currently this is not exposed in the public API because, at the time + /// of writing, I didn't want to spend time thinking about how to expose + /// the prefilter infrastructure (if at all). If you have a compelling use + /// case for exposing this routine, please create an issue. Do *not* open + /// a PR that just exposes `Pre` and friends. Exporting this routine will + /// require API design. #[inline(always)] - pub(crate) fn find( + pub(crate) fn find_with_prefilter( &self, - pre: Option<&mut Pre<'_>>, + pre: Option>, haystack: &[u8], needle: &[u8], ) -> Option { - debug_assert!(!needle.is_empty(), "needle should not be empty"); - debug_assert!(needle.len() <= haystack.len(), "haystack too short"); - match self.0.shift { Shift::Small { period } => { self.find_small_imp(pre, haystack, needle, period) @@ -123,25 +164,6 @@ impl Forward { } } - /// Like find, but handles the degenerate substring test cases. This is - /// only useful for conveniently testing this substring implementation in - /// isolation. - #[cfg(test)] - fn find_general( - &self, - pre: Option<&mut Pre<'_>>, - haystack: &[u8], - needle: &[u8], - ) -> Option { - if needle.is_empty() { - Some(0) - } else if haystack.len() < needle.len() { - None - } else { - self.find(pre, haystack, needle) - } - } - // Each of the two search implementations below can be accelerated by a // prefilter, but it is not always enabled. To avoid its overhead when // its disabled, we explicitly inline each search implementation based on @@ -151,19 +173,22 @@ impl Forward { #[inline(always)] fn find_small_imp( &self, - mut pre: Option<&mut Pre<'_>>, + mut pre: Option>, haystack: &[u8], needle: &[u8], period: usize, ) -> Option { - let last_byte = needle.len() - 1; let mut pos = 0; let mut shift = 0; + let last_byte_pos = match needle.len().checked_sub(1) { + None => return Some(pos), + Some(last_byte) => last_byte, + }; while pos + needle.len() <= haystack.len() { let mut i = cmp::max(self.0.critical_pos, shift); if let Some(pre) = pre.as_mut() { - if pre.should_call() { - pos += pre.call(&haystack[pos..], needle)?; + if pre.is_effective() { + pos += pre.find(&haystack[pos..])?; shift = 0; i = self.0.critical_pos; if pos + needle.len() > haystack.len() { @@ -171,7 +196,7 @@ impl Forward { } } } - if !self.0.byteset.contains(haystack[pos + last_byte]) { + if !self.0.byteset.contains(haystack[pos + last_byte_pos]) { pos += needle.len(); shift = 0; continue; @@ -200,24 +225,27 @@ impl Forward { #[inline(always)] fn find_large_imp( &self, - mut pre: Option<&mut Pre<'_>>, + mut pre: Option>, haystack: &[u8], needle: &[u8], shift: usize, ) -> Option { - let last_byte = needle.len() - 1; let mut pos = 0; + let last_byte_pos = match needle.len().checked_sub(1) { + None => return Some(pos), + Some(last_byte) => last_byte, + }; 'outer: while pos + needle.len() <= haystack.len() { if let Some(pre) = pre.as_mut() { - if pre.should_call() { - pos += pre.call(&haystack[pos..], needle)?; + if pre.is_effective() { + pos += pre.find(&haystack[pos..])?; if pos + needle.len() > haystack.len() { return None; } } } - if !self.0.byteset.contains(haystack[pos + last_byte]) { + if !self.0.byteset.contains(haystack[pos + last_byte_pos]) { pos += needle.len(); continue; } @@ -241,14 +269,13 @@ impl Forward { } } -impl Reverse { - /// Create a searcher that uses the Two-Way algorithm by searching in - /// reverse through any haystack. - pub(crate) fn new(needle: &[u8]) -> Reverse { - if needle.is_empty() { - return Reverse(TwoWay::empty()); - } - +impl FinderRev { + /// Create a searcher that finds occurrences of the given `needle`. + /// + /// An empty `needle` results in a match at every position in a haystack, + /// including at `haystack.len()`. + #[inline] + pub fn new(needle: &[u8]) -> FinderRev { let byteset = ApproximateByteSet::new(needle); let min_suffix = Suffix::reverse(needle, SuffixKind::Minimal); let max_suffix = Suffix::reverse(needle, SuffixKind::Maximal); @@ -258,27 +285,20 @@ impl Reverse { } else { (max_suffix.period, max_suffix.pos) }; - // let critical_pos = needle.len() - critical_pos; let shift = Shift::reverse(needle, period_lower_bound, critical_pos); - Reverse(TwoWay { byteset, critical_pos, shift }) + FinderRev(TwoWay { byteset, critical_pos, shift }) } - /// Find the position of the last occurrence of this searcher's needle - /// in the given haystack. If one does not exist, then return None. + /// Returns the last occurrence of `needle` in the given `haystack`, or + /// `None` if no such occurrence could be found. /// - /// This will automatically initialize prefilter state. This should only - /// be used for one-off searches. + /// The `needle` given must be the same as the `needle` provided to + /// [`FinderRev::new`]. /// - /// Callers must guarantee that the needle is non-empty and its length is - /// <= the haystack's length. - #[inline(always)] - pub(crate) fn rfind( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - debug_assert!(!needle.is_empty(), "needle should not be empty"); - debug_assert!(needle.len() <= haystack.len(), "haystack too short"); + /// An empty `needle` results in a match at every position in a haystack, + /// including at `haystack.len()`. + #[inline] + pub fn rfind(&self, haystack: &[u8], needle: &[u8]) -> Option { // For the reverse case, we don't use a prefilter. It's plausible that // perhaps we should, but it's a lot of additional code to do it, and // it's not clear that it's actually worth it. If you have a really @@ -293,20 +313,6 @@ impl Reverse { } } - /// Like rfind, but handles the degenerate substring test cases. This is - /// only useful for conveniently testing this substring implementation in - /// isolation. - #[cfg(test)] - fn rfind_general(&self, haystack: &[u8], needle: &[u8]) -> Option { - if needle.is_empty() { - Some(haystack.len()) - } else if haystack.len() < needle.len() { - None - } else { - self.rfind(haystack, needle) - } - } - #[inline(always)] fn rfind_small_imp( &self, @@ -317,6 +323,10 @@ impl Reverse { let nlen = needle.len(); let mut pos = haystack.len(); let mut shift = nlen; + let first_byte = match needle.get(0) { + None => return Some(pos), + Some(&first_byte) => first_byte, + }; while pos >= nlen { if !self.0.byteset.contains(haystack[pos - nlen]) { pos -= nlen; @@ -327,7 +337,7 @@ impl Reverse { while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] { i -= 1; } - if i > 0 || needle[0] != haystack[pos - nlen] { + if i > 0 || first_byte != haystack[pos - nlen] { pos -= self.0.critical_pos - i + 1; shift = nlen; } else { @@ -354,6 +364,10 @@ impl Reverse { ) -> Option { let nlen = needle.len(); let mut pos = haystack.len(); + let first_byte = match needle.get(0) { + None => return Some(pos), + Some(&first_byte) => first_byte, + }; while pos >= nlen { if !self.0.byteset.contains(haystack[pos - nlen]) { pos -= nlen; @@ -363,7 +377,7 @@ impl Reverse { while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] { i -= 1; } - if i > 0 || needle[0] != haystack[pos - nlen] { + if i > 0 || first_byte != haystack[pos - nlen] { pos -= self.0.critical_pos - i + 1; } else { let mut j = self.0.critical_pos; @@ -380,16 +394,6 @@ impl Reverse { } } -impl TwoWay { - fn empty() -> TwoWay { - TwoWay { - byteset: ApproximateByteSet::new(b""), - critical_pos: 0, - shift: Shift::Large { shift: 0 }, - } - } -} - /// A representation of the amount we're allowed to shift by during Two-Way /// search. /// @@ -444,7 +448,7 @@ impl Shift { } let (u, v) = needle.split_at(critical_pos); - if !util::is_suffix(&v[..period_lower_bound], u) { + if !is_suffix(&v[..period_lower_bound], u) { return Shift::Large { shift: large }; } Shift::Small { period: period_lower_bound } @@ -467,7 +471,7 @@ impl Shift { } let (v, u) = needle.split_at(critical_pos); - if !util::is_prefix(&v[v.len() - period_lower_bound..], u) { + if !is_prefix(&v[v.len() - period_lower_bound..], u) { return Shift::Large { shift: large }; } Shift::Small { period: period_lower_bound } @@ -494,8 +498,6 @@ struct Suffix { impl Suffix { fn forward(needle: &[u8], kind: SuffixKind) -> Suffix { - debug_assert!(!needle.is_empty()); - // suffix represents our maximal (or minimal) suffix, along with // its period. let mut suffix = Suffix { pos: 0, period: 1 }; @@ -544,14 +546,15 @@ impl Suffix { } fn reverse(needle: &[u8], kind: SuffixKind) -> Suffix { - debug_assert!(!needle.is_empty()); - // See the comments in `forward` for how this works. let mut suffix = Suffix { pos: needle.len(), period: 1 }; if needle.len() == 1 { return suffix; } - let mut candidate_start = needle.len() - 1; + let mut candidate_start = match needle.len().checked_sub(1) { + None => return suffix, + Some(candidate_start) => candidate_start, + }; let mut offset = 0; while offset < candidate_start { @@ -665,17 +668,12 @@ impl ApproximateByteSet { } } -#[cfg(all(test, feature = "std", not(miri)))] +#[cfg(test)] mod tests { - use quickcheck::quickcheck; + use alloc::vec::Vec; use super::*; - define_memmem_quickcheck_tests!( - super::simpletests::twoway_find, - super::simpletests::twoway_rfind - ); - /// Convenience wrapper for computing the suffix as a byte string. fn get_suffix_forward(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) { let s = Suffix::forward(needle, kind); @@ -710,13 +708,34 @@ mod tests { got } + define_substring_forward_quickcheck!(|h, n| Some( + Finder::new(n).find(h, n) + )); + define_substring_reverse_quickcheck!(|h, n| Some( + FinderRev::new(n).rfind(h, n) + )); + + #[test] + fn forward() { + crate::tests::substring::Runner::new() + .fwd(|h, n| Some(Finder::new(n).find(h, n))) + .run(); + } + + #[test] + fn reverse() { + crate::tests::substring::Runner::new() + .rev(|h, n| Some(FinderRev::new(n).rfind(h, n))) + .run(); + } + #[test] fn suffix_forward() { macro_rules! assert_suffix_min { ($given:expr, $expected:expr, $period:expr) => { let (got_suffix, got_period) = get_suffix_forward($given.as_bytes(), SuffixKind::Minimal); - let got_suffix = std::str::from_utf8(got_suffix).unwrap(); + let got_suffix = core::str::from_utf8(got_suffix).unwrap(); assert_eq!(($expected, $period), (got_suffix, got_period)); }; } @@ -725,7 +744,7 @@ mod tests { ($given:expr, $expected:expr, $period:expr) => { let (got_suffix, got_period) = get_suffix_forward($given.as_bytes(), SuffixKind::Maximal); - let got_suffix = std::str::from_utf8(got_suffix).unwrap(); + let got_suffix = core::str::from_utf8(got_suffix).unwrap(); assert_eq!(($expected, $period), (got_suffix, got_period)); }; } @@ -773,7 +792,7 @@ mod tests { ($given:expr, $expected:expr, $period:expr) => { let (got_suffix, got_period) = get_suffix_reverse($given.as_bytes(), SuffixKind::Minimal); - let got_suffix = std::str::from_utf8(got_suffix).unwrap(); + let got_suffix = core::str::from_utf8(got_suffix).unwrap(); assert_eq!(($expected, $period), (got_suffix, got_period)); }; } @@ -782,7 +801,7 @@ mod tests { ($given:expr, $expected:expr, $period:expr) => { let (got_suffix, got_period) = get_suffix_reverse($given.as_bytes(), SuffixKind::Maximal); - let got_suffix = std::str::from_utf8(got_suffix).unwrap(); + let got_suffix = core::str::from_utf8(got_suffix).unwrap(); assert_eq!(($expected, $period), (got_suffix, got_period)); }; } @@ -821,7 +840,8 @@ mod tests { assert_suffix_max!("aaa", "aaa", 1); } - quickcheck! { + #[cfg(not(miri))] + quickcheck::quickcheck! { fn qc_suffix_forward_maximal(bytes: Vec) -> bool { if bytes.is_empty() { return true; @@ -842,27 +862,6 @@ mod tests { expected == got } } -} - -#[cfg(test)] -mod simpletests { - use super::*; - - pub(crate) fn twoway_find( - haystack: &[u8], - needle: &[u8], - ) -> Option { - Forward::new(needle).find_general(None, haystack, needle) - } - - pub(crate) fn twoway_rfind( - haystack: &[u8], - needle: &[u8], - ) -> Option { - Reverse::new(needle).rfind_general(haystack, needle) - } - - define_memmem_simple_tests!(twoway_find, twoway_rfind); // This is a regression test caught by quickcheck that exercised a bug in // the reverse small period handling. The bug was that we were using 'if j @@ -870,7 +869,7 @@ mod simpletests { // j >= shift', which matches the corresponding guard in the forward impl. #[test] fn regression_rev_small_period() { - let rfind = super::simpletests::twoway_rfind; + let rfind = |h, n| FinderRev::new(n).rfind(h, n); let haystack = "ababaz"; let needle = "abab"; assert_eq!(Some(0), rfind(haystack.as_bytes(), needle.as_bytes())); diff --git a/vendor/memchr/src/arch/generic/memchr.rs b/vendor/memchr/src/arch/generic/memchr.rs new file mode 100644 index 0000000..580b3cc --- /dev/null +++ b/vendor/memchr/src/arch/generic/memchr.rs @@ -0,0 +1,1214 @@ +/*! +Generic crate-internal routines for the `memchr` family of functions. +*/ + +// What follows is a vector algorithm generic over the specific vector +// type to detect the position of one, two or three needles in a haystack. +// From what I know, this is a "classic" algorithm, although I don't +// believe it has been published in any peer reviewed journal. I believe +// it can be found in places like glibc and Go's standard library. It +// appears to be well known and is elaborated on in more detail here: +// https://gms.tf/stdfind-and-memchr-optimizations.html +// +// While the routine below is fairly long and perhaps intimidating, the basic +// idea is actually very simple and can be expressed straight-forwardly in +// pseudo code. The psuedo code below is written for 128 bit vectors, but the +// actual code below works for anything that implements the Vector trait. +// +// needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1 +// // Note: shift amount is in bytes +// +// while i <= haystack.len() - 16: +// // A 16 byte vector. Each byte in chunk corresponds to a byte in +// // the haystack. +// chunk = haystack[i:i+16] +// // Compare bytes in needle with bytes in chunk. The result is a 16 +// // byte chunk where each byte is 0xFF if the corresponding bytes +// // in needle and chunk were equal, or 0x00 otherwise. +// eqs = cmpeq(needle, chunk) +// // Return a 32 bit integer where the most significant 16 bits +// // are always 0 and the lower 16 bits correspond to whether the +// // most significant bit in the correspond byte in `eqs` is set. +// // In other words, `mask as u16` has bit i set if and only if +// // needle[i] == chunk[i]. +// mask = movemask(eqs) +// +// // Mask is 0 if there is no match, and non-zero otherwise. +// if mask != 0: +// // trailing_zeros tells us the position of the least significant +// // bit that is set. +// return i + trailing_zeros(mask) +// +// // haystack length may not be a multiple of 16, so search the rest. +// while i < haystack.len(): +// if haystack[i] == n1: +// return i +// +// // No match found. +// return NULL +// +// In fact, we could loosely translate the above code to Rust line-for-line +// and it would be a pretty fast algorithm. But, we pull out all the stops +// to go as fast as possible: +// +// 1. We use aligned loads. That is, we do some finagling to make sure our +// primary loop not only proceeds in increments of 16 bytes, but that +// the address of haystack's pointer that we dereference is aligned to +// 16 bytes. 16 is a magic number here because it is the size of SSE2 +// 128-bit vector. (For the AVX2 algorithm, 32 is the magic number.) +// Therefore, to get aligned loads, our pointer's address must be evenly +// divisible by 16. +// 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's +// kind of like loop unrolling, but we combine the equality comparisons +// using a vector OR such that we only need to extract a single mask to +// determine whether a match exists or not. If so, then we do some +// book-keeping to determine the precise location but otherwise mush on. +// 3. We use our "chunk" comparison routine in as many places as possible, +// even if it means using unaligned loads. In particular, if haystack +// starts with an unaligned address, then we do an unaligned load to +// search the first 16 bytes. We then start our primary loop at the +// smallest subsequent aligned address, which will actually overlap with +// previously searched bytes. But we're OK with that. We do a similar +// dance at the end of our primary loop. Finally, to avoid a +// byte-at-a-time loop at the end, we do a final 16 byte unaligned load +// that may overlap with a previous load. This is OK because it converts +// a loop into a small number of very fast vector instructions. The overlap +// is OK because we know the place where the overlap occurs does not +// contain a match. +// +// And that's pretty all there is to it. Note that since the below is +// generic and since it's meant to be inlined into routines with a +// `#[target_feature(enable = "...")]` annotation, we must mark all routines as +// both unsafe and `#[inline(always)]`. +// +// The fact that the code below is generic does somewhat inhibit us. For +// example, I've noticed that introducing an unlineable `#[cold]` function to +// handle the match case in the loop generates tighter assembly, but there is +// no way to do this in the generic code below because the generic code doesn't +// know what `target_feature` annotation to apply to the unlineable function. +// We could make such functions part of the `Vector` trait, but we instead live +// with the slightly sub-optimal codegen for now since it doesn't seem to have +// a noticeable perf difference. + +use crate::{ + ext::Pointer, + vector::{MoveMask, Vector}, +}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub(crate) struct One { + s1: u8, + v1: V, +} + +impl One { + /// The number of bytes we examine per each iteration of our search loop. + const LOOP_SIZE: usize = 4 * V::BYTES; + + /// Create a new searcher that finds occurrences of the byte given. + #[inline(always)] + pub(crate) unsafe fn new(needle: u8) -> One { + One { s1: needle, v1: V::splat(needle) } + } + + /// Returns the needle given to `One::new`. + #[inline(always)] + pub(crate) fn needle1(&self) -> u8 { + self.s1 + } + + /// Return a pointer to the first occurrence of the needle in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + #[inline(always)] + pub(crate) unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + // If we want to support vectors bigger than 256 bits, we probably + // need to move up to using a u64 for the masks used below. Currently + // they are 32 bits, which means we're SOL for vectors that need masks + // bigger than 32 bits. Overall unclear until there's a use case. + debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + + let topos = V::Mask::first_offset; + let len = end.distance(start); + debug_assert!( + len >= V::BYTES, + "haystack has length {}, but must be at least {}", + len, + V::BYTES + ); + + // Search a possibly unaligned chunk at `start`. This covers any part + // of the haystack prior to where aligned loads can start. + if let Some(cur) = self.search_chunk(start, topos) { + return Some(cur); + } + // Set `cur` to the first V-aligned pointer greater than `start`. + let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); + debug_assert!(cur > start && end.sub(V::BYTES) >= start); + if len >= Self::LOOP_SIZE { + while cur <= end.sub(Self::LOOP_SIZE) { + debug_assert_eq!(0, cur.as_usize() % V::BYTES); + + let a = V::load_aligned(cur); + let b = V::load_aligned(cur.add(1 * V::BYTES)); + let c = V::load_aligned(cur.add(2 * V::BYTES)); + let d = V::load_aligned(cur.add(3 * V::BYTES)); + let eqa = self.v1.cmpeq(a); + let eqb = self.v1.cmpeq(b); + let eqc = self.v1.cmpeq(c); + let eqd = self.v1.cmpeq(d); + let or1 = eqa.or(eqb); + let or2 = eqc.or(eqd); + let or3 = or1.or(or2); + if or3.movemask_will_have_non_zero() { + let mask = eqa.movemask(); + if mask.has_non_zero() { + return Some(cur.add(topos(mask))); + } + + let mask = eqb.movemask(); + if mask.has_non_zero() { + return Some(cur.add(1 * V::BYTES).add(topos(mask))); + } + + let mask = eqc.movemask(); + if mask.has_non_zero() { + return Some(cur.add(2 * V::BYTES).add(topos(mask))); + } + + let mask = eqd.movemask(); + debug_assert!(mask.has_non_zero()); + return Some(cur.add(3 * V::BYTES).add(topos(mask))); + } + cur = cur.add(Self::LOOP_SIZE); + } + } + // Handle any leftovers after the aligned loop above. We use unaligned + // loads here, but I believe we are guaranteed that they are aligned + // since `cur` is aligned. + while cur <= end.sub(V::BYTES) { + debug_assert!(end.distance(cur) >= V::BYTES); + if let Some(cur) = self.search_chunk(cur, topos) { + return Some(cur); + } + cur = cur.add(V::BYTES); + } + // Finally handle any remaining bytes less than the size of V. In this + // case, our pointer may indeed be unaligned and the load may overlap + // with the previous one. But that's okay since we know the previous + // load didn't lead to a match (otherwise we wouldn't be here). + if cur < end { + debug_assert!(end.distance(cur) < V::BYTES); + cur = cur.sub(V::BYTES - end.distance(cur)); + debug_assert_eq!(end.distance(cur), V::BYTES); + return self.search_chunk(cur, topos); + } + None + } + + /// Return a pointer to the last occurrence of the needle in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + #[inline(always)] + pub(crate) unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + // If we want to support vectors bigger than 256 bits, we probably + // need to move up to using a u64 for the masks used below. Currently + // they are 32 bits, which means we're SOL for vectors that need masks + // bigger than 32 bits. Overall unclear until there's a use case. + debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + + let topos = V::Mask::last_offset; + let len = end.distance(start); + debug_assert!( + len >= V::BYTES, + "haystack has length {}, but must be at least {}", + len, + V::BYTES + ); + + if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) { + return Some(cur); + } + let mut cur = end.sub(end.as_usize() & V::ALIGN); + debug_assert!(start <= cur && cur <= end); + if len >= Self::LOOP_SIZE { + while cur >= start.add(Self::LOOP_SIZE) { + debug_assert_eq!(0, cur.as_usize() % V::BYTES); + + cur = cur.sub(Self::LOOP_SIZE); + let a = V::load_aligned(cur); + let b = V::load_aligned(cur.add(1 * V::BYTES)); + let c = V::load_aligned(cur.add(2 * V::BYTES)); + let d = V::load_aligned(cur.add(3 * V::BYTES)); + let eqa = self.v1.cmpeq(a); + let eqb = self.v1.cmpeq(b); + let eqc = self.v1.cmpeq(c); + let eqd = self.v1.cmpeq(d); + let or1 = eqa.or(eqb); + let or2 = eqc.or(eqd); + let or3 = or1.or(or2); + if or3.movemask_will_have_non_zero() { + let mask = eqd.movemask(); + if mask.has_non_zero() { + return Some(cur.add(3 * V::BYTES).add(topos(mask))); + } + + let mask = eqc.movemask(); + if mask.has_non_zero() { + return Some(cur.add(2 * V::BYTES).add(topos(mask))); + } + + let mask = eqb.movemask(); + if mask.has_non_zero() { + return Some(cur.add(1 * V::BYTES).add(topos(mask))); + } + + let mask = eqa.movemask(); + debug_assert!(mask.has_non_zero()); + return Some(cur.add(topos(mask))); + } + } + } + while cur >= start.add(V::BYTES) { + debug_assert!(cur.distance(start) >= V::BYTES); + cur = cur.sub(V::BYTES); + if let Some(cur) = self.search_chunk(cur, topos) { + return Some(cur); + } + } + if cur > start { + debug_assert!(cur.distance(start) < V::BYTES); + return self.search_chunk(start, topos); + } + None + } + + /// Return a count of all matching bytes in the given haystack. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + #[inline(always)] + pub(crate) unsafe fn count_raw( + &self, + start: *const u8, + end: *const u8, + ) -> usize { + debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + + let confirm = |b| b == self.needle1(); + let len = end.distance(start); + debug_assert!( + len >= V::BYTES, + "haystack has length {}, but must be at least {}", + len, + V::BYTES + ); + + // Set `cur` to the first V-aligned pointer greater than `start`. + let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); + // Count any matching bytes before we start our aligned loop. + let mut count = count_byte_by_byte(start, cur, confirm); + debug_assert!(cur > start && end.sub(V::BYTES) >= start); + if len >= Self::LOOP_SIZE { + while cur <= end.sub(Self::LOOP_SIZE) { + debug_assert_eq!(0, cur.as_usize() % V::BYTES); + + let a = V::load_aligned(cur); + let b = V::load_aligned(cur.add(1 * V::BYTES)); + let c = V::load_aligned(cur.add(2 * V::BYTES)); + let d = V::load_aligned(cur.add(3 * V::BYTES)); + let eqa = self.v1.cmpeq(a); + let eqb = self.v1.cmpeq(b); + let eqc = self.v1.cmpeq(c); + let eqd = self.v1.cmpeq(d); + count += eqa.movemask().count_ones(); + count += eqb.movemask().count_ones(); + count += eqc.movemask().count_ones(); + count += eqd.movemask().count_ones(); + cur = cur.add(Self::LOOP_SIZE); + } + } + // Handle any leftovers after the aligned loop above. We use unaligned + // loads here, but I believe we are guaranteed that they are aligned + // since `cur` is aligned. + while cur <= end.sub(V::BYTES) { + debug_assert!(end.distance(cur) >= V::BYTES); + let chunk = V::load_unaligned(cur); + count += self.v1.cmpeq(chunk).movemask().count_ones(); + cur = cur.add(V::BYTES); + } + // And finally count any leftovers that weren't caught above. + count += count_byte_by_byte(cur, end, confirm); + count + } + + /// Search `V::BYTES` starting at `cur` via an unaligned load. + /// + /// `mask_to_offset` should be a function that converts a `movemask` to + /// an offset such that `cur.add(offset)` corresponds to a pointer to the + /// match location if one is found. Generally it is expected to use either + /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether + /// one is implementing a forward or reverse search, respectively. + /// + /// # Safety + /// + /// `cur` must be a valid pointer and it must be valid to do an unaligned + /// load of size `V::BYTES` at `cur`. + #[inline(always)] + unsafe fn search_chunk( + &self, + cur: *const u8, + mask_to_offset: impl Fn(V::Mask) -> usize, + ) -> Option<*const u8> { + let chunk = V::load_unaligned(cur); + let mask = self.v1.cmpeq(chunk).movemask(); + if mask.has_non_zero() { + Some(cur.add(mask_to_offset(mask))) + } else { + None + } + } +} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Two { + s1: u8, + s2: u8, + v1: V, + v2: V, +} + +impl Two { + /// The number of bytes we examine per each iteration of our search loop. + const LOOP_SIZE: usize = 2 * V::BYTES; + + /// Create a new searcher that finds occurrences of the byte given. + #[inline(always)] + pub(crate) unsafe fn new(needle1: u8, needle2: u8) -> Two { + Two { + s1: needle1, + s2: needle2, + v1: V::splat(needle1), + v2: V::splat(needle2), + } + } + + /// Returns the first needle given to `Two::new`. + #[inline(always)] + pub(crate) fn needle1(&self) -> u8 { + self.s1 + } + + /// Returns the second needle given to `Two::new`. + #[inline(always)] + pub(crate) fn needle2(&self) -> u8 { + self.s2 + } + + /// Return a pointer to the first occurrence of one of the needles in the + /// given haystack. If no such occurrence exists, then `None` is returned. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + #[inline(always)] + pub(crate) unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + // If we want to support vectors bigger than 256 bits, we probably + // need to move up to using a u64 for the masks used below. Currently + // they are 32 bits, which means we're SOL for vectors that need masks + // bigger than 32 bits. Overall unclear until there's a use case. + debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + + let topos = V::Mask::first_offset; + let len = end.distance(start); + debug_assert!( + len >= V::BYTES, + "haystack has length {}, but must be at least {}", + len, + V::BYTES + ); + + // Search a possibly unaligned chunk at `start`. This covers any part + // of the haystack prior to where aligned loads can start. + if let Some(cur) = self.search_chunk(start, topos) { + return Some(cur); + } + // Set `cur` to the first V-aligned pointer greater than `start`. + let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); + debug_assert!(cur > start && end.sub(V::BYTES) >= start); + if len >= Self::LOOP_SIZE { + while cur <= end.sub(Self::LOOP_SIZE) { + debug_assert_eq!(0, cur.as_usize() % V::BYTES); + + let a = V::load_aligned(cur); + let b = V::load_aligned(cur.add(V::BYTES)); + let eqa1 = self.v1.cmpeq(a); + let eqb1 = self.v1.cmpeq(b); + let eqa2 = self.v2.cmpeq(a); + let eqb2 = self.v2.cmpeq(b); + let or1 = eqa1.or(eqb1); + let or2 = eqa2.or(eqb2); + let or3 = or1.or(or2); + if or3.movemask_will_have_non_zero() { + let mask = eqa1.movemask().or(eqa2.movemask()); + if mask.has_non_zero() { + return Some(cur.add(topos(mask))); + } + + let mask = eqb1.movemask().or(eqb2.movemask()); + debug_assert!(mask.has_non_zero()); + return Some(cur.add(V::BYTES).add(topos(mask))); + } + cur = cur.add(Self::LOOP_SIZE); + } + } + // Handle any leftovers after the aligned loop above. We use unaligned + // loads here, but I believe we are guaranteed that they are aligned + // since `cur` is aligned. + while cur <= end.sub(V::BYTES) { + debug_assert!(end.distance(cur) >= V::BYTES); + if let Some(cur) = self.search_chunk(cur, topos) { + return Some(cur); + } + cur = cur.add(V::BYTES); + } + // Finally handle any remaining bytes less than the size of V. In this + // case, our pointer may indeed be unaligned and the load may overlap + // with the previous one. But that's okay since we know the previous + // load didn't lead to a match (otherwise we wouldn't be here). + if cur < end { + debug_assert!(end.distance(cur) < V::BYTES); + cur = cur.sub(V::BYTES - end.distance(cur)); + debug_assert_eq!(end.distance(cur), V::BYTES); + return self.search_chunk(cur, topos); + } + None + } + + /// Return a pointer to the last occurrence of the needle in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + #[inline(always)] + pub(crate) unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + // If we want to support vectors bigger than 256 bits, we probably + // need to move up to using a u64 for the masks used below. Currently + // they are 32 bits, which means we're SOL for vectors that need masks + // bigger than 32 bits. Overall unclear until there's a use case. + debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + + let topos = V::Mask::last_offset; + let len = end.distance(start); + debug_assert!( + len >= V::BYTES, + "haystack has length {}, but must be at least {}", + len, + V::BYTES + ); + + if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) { + return Some(cur); + } + let mut cur = end.sub(end.as_usize() & V::ALIGN); + debug_assert!(start <= cur && cur <= end); + if len >= Self::LOOP_SIZE { + while cur >= start.add(Self::LOOP_SIZE) { + debug_assert_eq!(0, cur.as_usize() % V::BYTES); + + cur = cur.sub(Self::LOOP_SIZE); + let a = V::load_aligned(cur); + let b = V::load_aligned(cur.add(V::BYTES)); + let eqa1 = self.v1.cmpeq(a); + let eqb1 = self.v1.cmpeq(b); + let eqa2 = self.v2.cmpeq(a); + let eqb2 = self.v2.cmpeq(b); + let or1 = eqa1.or(eqb1); + let or2 = eqa2.or(eqb2); + let or3 = or1.or(or2); + if or3.movemask_will_have_non_zero() { + let mask = eqb1.movemask().or(eqb2.movemask()); + if mask.has_non_zero() { + return Some(cur.add(V::BYTES).add(topos(mask))); + } + + let mask = eqa1.movemask().or(eqa2.movemask()); + debug_assert!(mask.has_non_zero()); + return Some(cur.add(topos(mask))); + } + } + } + while cur >= start.add(V::BYTES) { + debug_assert!(cur.distance(start) >= V::BYTES); + cur = cur.sub(V::BYTES); + if let Some(cur) = self.search_chunk(cur, topos) { + return Some(cur); + } + } + if cur > start { + debug_assert!(cur.distance(start) < V::BYTES); + return self.search_chunk(start, topos); + } + None + } + + /// Search `V::BYTES` starting at `cur` via an unaligned load. + /// + /// `mask_to_offset` should be a function that converts a `movemask` to + /// an offset such that `cur.add(offset)` corresponds to a pointer to the + /// match location if one is found. Generally it is expected to use either + /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether + /// one is implementing a forward or reverse search, respectively. + /// + /// # Safety + /// + /// `cur` must be a valid pointer and it must be valid to do an unaligned + /// load of size `V::BYTES` at `cur`. + #[inline(always)] + unsafe fn search_chunk( + &self, + cur: *const u8, + mask_to_offset: impl Fn(V::Mask) -> usize, + ) -> Option<*const u8> { + let chunk = V::load_unaligned(cur); + let eq1 = self.v1.cmpeq(chunk); + let eq2 = self.v2.cmpeq(chunk); + let mask = eq1.or(eq2).movemask(); + if mask.has_non_zero() { + let mask1 = eq1.movemask(); + let mask2 = eq2.movemask(); + Some(cur.add(mask_to_offset(mask1.or(mask2)))) + } else { + None + } + } +} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Three { + s1: u8, + s2: u8, + s3: u8, + v1: V, + v2: V, + v3: V, +} + +impl Three { + /// The number of bytes we examine per each iteration of our search loop. + const LOOP_SIZE: usize = 2 * V::BYTES; + + /// Create a new searcher that finds occurrences of the byte given. + #[inline(always)] + pub(crate) unsafe fn new( + needle1: u8, + needle2: u8, + needle3: u8, + ) -> Three { + Three { + s1: needle1, + s2: needle2, + s3: needle3, + v1: V::splat(needle1), + v2: V::splat(needle2), + v3: V::splat(needle3), + } + } + + /// Returns the first needle given to `Three::new`. + #[inline(always)] + pub(crate) fn needle1(&self) -> u8 { + self.s1 + } + + /// Returns the second needle given to `Three::new`. + #[inline(always)] + pub(crate) fn needle2(&self) -> u8 { + self.s2 + } + + /// Returns the third needle given to `Three::new`. + #[inline(always)] + pub(crate) fn needle3(&self) -> u8 { + self.s3 + } + + /// Return a pointer to the first occurrence of one of the needles in the + /// given haystack. If no such occurrence exists, then `None` is returned. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + #[inline(always)] + pub(crate) unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + // If we want to support vectors bigger than 256 bits, we probably + // need to move up to using a u64 for the masks used below. Currently + // they are 32 bits, which means we're SOL for vectors that need masks + // bigger than 32 bits. Overall unclear until there's a use case. + debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + + let topos = V::Mask::first_offset; + let len = end.distance(start); + debug_assert!( + len >= V::BYTES, + "haystack has length {}, but must be at least {}", + len, + V::BYTES + ); + + // Search a possibly unaligned chunk at `start`. This covers any part + // of the haystack prior to where aligned loads can start. + if let Some(cur) = self.search_chunk(start, topos) { + return Some(cur); + } + // Set `cur` to the first V-aligned pointer greater than `start`. + let mut cur = start.add(V::BYTES - (start.as_usize() & V::ALIGN)); + debug_assert!(cur > start && end.sub(V::BYTES) >= start); + if len >= Self::LOOP_SIZE { + while cur <= end.sub(Self::LOOP_SIZE) { + debug_assert_eq!(0, cur.as_usize() % V::BYTES); + + let a = V::load_aligned(cur); + let b = V::load_aligned(cur.add(V::BYTES)); + let eqa1 = self.v1.cmpeq(a); + let eqb1 = self.v1.cmpeq(b); + let eqa2 = self.v2.cmpeq(a); + let eqb2 = self.v2.cmpeq(b); + let eqa3 = self.v3.cmpeq(a); + let eqb3 = self.v3.cmpeq(b); + let or1 = eqa1.or(eqb1); + let or2 = eqa2.or(eqb2); + let or3 = eqa3.or(eqb3); + let or4 = or1.or(or2); + let or5 = or3.or(or4); + if or5.movemask_will_have_non_zero() { + let mask = eqa1 + .movemask() + .or(eqa2.movemask()) + .or(eqa3.movemask()); + if mask.has_non_zero() { + return Some(cur.add(topos(mask))); + } + + let mask = eqb1 + .movemask() + .or(eqb2.movemask()) + .or(eqb3.movemask()); + debug_assert!(mask.has_non_zero()); + return Some(cur.add(V::BYTES).add(topos(mask))); + } + cur = cur.add(Self::LOOP_SIZE); + } + } + // Handle any leftovers after the aligned loop above. We use unaligned + // loads here, but I believe we are guaranteed that they are aligned + // since `cur` is aligned. + while cur <= end.sub(V::BYTES) { + debug_assert!(end.distance(cur) >= V::BYTES); + if let Some(cur) = self.search_chunk(cur, topos) { + return Some(cur); + } + cur = cur.add(V::BYTES); + } + // Finally handle any remaining bytes less than the size of V. In this + // case, our pointer may indeed be unaligned and the load may overlap + // with the previous one. But that's okay since we know the previous + // load didn't lead to a match (otherwise we wouldn't be here). + if cur < end { + debug_assert!(end.distance(cur) < V::BYTES); + cur = cur.sub(V::BYTES - end.distance(cur)); + debug_assert_eq!(end.distance(cur), V::BYTES); + return self.search_chunk(cur, topos); + } + None + } + + /// Return a pointer to the last occurrence of the needle in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// # Safety + /// + /// * It must be the case that `start < end` and that the distance between + /// them is at least equal to `V::BYTES`. That is, it must always be valid + /// to do at least an unaligned load of `V` at `start`. + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + #[inline(always)] + pub(crate) unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + // If we want to support vectors bigger than 256 bits, we probably + // need to move up to using a u64 for the masks used below. Currently + // they are 32 bits, which means we're SOL for vectors that need masks + // bigger than 32 bits. Overall unclear until there's a use case. + debug_assert!(V::BYTES <= 32, "vector cannot be bigger than 32 bytes"); + + let topos = V::Mask::last_offset; + let len = end.distance(start); + debug_assert!( + len >= V::BYTES, + "haystack has length {}, but must be at least {}", + len, + V::BYTES + ); + + if let Some(cur) = self.search_chunk(end.sub(V::BYTES), topos) { + return Some(cur); + } + let mut cur = end.sub(end.as_usize() & V::ALIGN); + debug_assert!(start <= cur && cur <= end); + if len >= Self::LOOP_SIZE { + while cur >= start.add(Self::LOOP_SIZE) { + debug_assert_eq!(0, cur.as_usize() % V::BYTES); + + cur = cur.sub(Self::LOOP_SIZE); + let a = V::load_aligned(cur); + let b = V::load_aligned(cur.add(V::BYTES)); + let eqa1 = self.v1.cmpeq(a); + let eqb1 = self.v1.cmpeq(b); + let eqa2 = self.v2.cmpeq(a); + let eqb2 = self.v2.cmpeq(b); + let eqa3 = self.v3.cmpeq(a); + let eqb3 = self.v3.cmpeq(b); + let or1 = eqa1.or(eqb1); + let or2 = eqa2.or(eqb2); + let or3 = eqa3.or(eqb3); + let or4 = or1.or(or2); + let or5 = or3.or(or4); + if or5.movemask_will_have_non_zero() { + let mask = eqb1 + .movemask() + .or(eqb2.movemask()) + .or(eqb3.movemask()); + if mask.has_non_zero() { + return Some(cur.add(V::BYTES).add(topos(mask))); + } + + let mask = eqa1 + .movemask() + .or(eqa2.movemask()) + .or(eqa3.movemask()); + debug_assert!(mask.has_non_zero()); + return Some(cur.add(topos(mask))); + } + } + } + while cur >= start.add(V::BYTES) { + debug_assert!(cur.distance(start) >= V::BYTES); + cur = cur.sub(V::BYTES); + if let Some(cur) = self.search_chunk(cur, topos) { + return Some(cur); + } + } + if cur > start { + debug_assert!(cur.distance(start) < V::BYTES); + return self.search_chunk(start, topos); + } + None + } + + /// Search `V::BYTES` starting at `cur` via an unaligned load. + /// + /// `mask_to_offset` should be a function that converts a `movemask` to + /// an offset such that `cur.add(offset)` corresponds to a pointer to the + /// match location if one is found. Generally it is expected to use either + /// `mask_to_first_offset` or `mask_to_last_offset`, depending on whether + /// one is implementing a forward or reverse search, respectively. + /// + /// # Safety + /// + /// `cur` must be a valid pointer and it must be valid to do an unaligned + /// load of size `V::BYTES` at `cur`. + #[inline(always)] + unsafe fn search_chunk( + &self, + cur: *const u8, + mask_to_offset: impl Fn(V::Mask) -> usize, + ) -> Option<*const u8> { + let chunk = V::load_unaligned(cur); + let eq1 = self.v1.cmpeq(chunk); + let eq2 = self.v2.cmpeq(chunk); + let eq3 = self.v3.cmpeq(chunk); + let mask = eq1.or(eq2).or(eq3).movemask(); + if mask.has_non_zero() { + let mask1 = eq1.movemask(); + let mask2 = eq2.movemask(); + let mask3 = eq3.movemask(); + Some(cur.add(mask_to_offset(mask1.or(mask2).or(mask3)))) + } else { + None + } + } +} + +/// An iterator over all occurrences of a set of bytes in a haystack. +/// +/// This iterator implements the routines necessary to provide a +/// `DoubleEndedIterator` impl, which means it can also be used to find +/// occurrences in reverse order. +/// +/// The lifetime parameters are as follows: +/// +/// * `'h` refers to the lifetime of the haystack being searched. +/// +/// This type is intended to be used to implement all iterators for the +/// `memchr` family of functions. It handles a tiny bit of marginally tricky +/// raw pointer math, but otherwise expects the caller to provide `find_raw` +/// and `rfind_raw` routines for each call of `next` and `next_back`, +/// respectively. +#[derive(Clone, Debug)] +pub(crate) struct Iter<'h> { + /// The original starting point into the haystack. We use this to convert + /// pointers to offsets. + original_start: *const u8, + /// The current starting point into the haystack. That is, where the next + /// search will begin. + start: *const u8, + /// The current ending point into the haystack. That is, where the next + /// reverse search will begin. + end: *const u8, + /// A marker for tracking the lifetime of the start/cur_start/cur_end + /// pointers above, which all point into the haystack. + haystack: core::marker::PhantomData<&'h [u8]>, +} + +// SAFETY: Iter contains no shared references to anything that performs any +// interior mutations. Also, the lifetime guarantees that Iter will not outlive +// the haystack. +unsafe impl<'h> Send for Iter<'h> {} + +// SAFETY: Iter perform no interior mutations, therefore no explicit +// synchronization is necessary. Also, the lifetime guarantees that Iter will +// not outlive the haystack. +unsafe impl<'h> Sync for Iter<'h> {} + +impl<'h> Iter<'h> { + /// Create a new generic memchr iterator. + #[inline(always)] + pub(crate) fn new(haystack: &'h [u8]) -> Iter<'h> { + Iter { + original_start: haystack.as_ptr(), + start: haystack.as_ptr(), + end: haystack.as_ptr().wrapping_add(haystack.len()), + haystack: core::marker::PhantomData, + } + } + + /// Returns the next occurrence in the forward direction. + /// + /// # Safety + /// + /// Callers must ensure that if a pointer is returned from the closure + /// provided, then it must be greater than or equal to the start pointer + /// and less than the end pointer. + #[inline(always)] + pub(crate) unsafe fn next( + &mut self, + mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, + ) -> Option { + // SAFETY: Pointers are derived directly from the same &[u8] haystack. + // We only ever modify start/end corresponding to a matching offset + // found between start and end. Thus all changes to start/end maintain + // our safety requirements. + // + // The only other assumption we rely on is that the pointer returned + // by `find_raw` satisfies `self.start <= found < self.end`, and that + // safety contract is forwarded to the caller. + let found = find_raw(self.start, self.end)?; + let result = found.distance(self.original_start); + self.start = found.add(1); + Some(result) + } + + /// Returns the number of remaining elements in this iterator. + #[inline(always)] + pub(crate) fn count( + self, + mut count_raw: impl FnMut(*const u8, *const u8) -> usize, + ) -> usize { + // SAFETY: Pointers are derived directly from the same &[u8] haystack. + // We only ever modify start/end corresponding to a matching offset + // found between start and end. Thus all changes to start/end maintain + // our safety requirements. + count_raw(self.start, self.end) + } + + /// Returns the next occurrence in reverse. + /// + /// # Safety + /// + /// Callers must ensure that if a pointer is returned from the closure + /// provided, then it must be greater than or equal to the start pointer + /// and less than the end pointer. + #[inline(always)] + pub(crate) unsafe fn next_back( + &mut self, + mut rfind_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, + ) -> Option { + // SAFETY: Pointers are derived directly from the same &[u8] haystack. + // We only ever modify start/end corresponding to a matching offset + // found between start and end. Thus all changes to start/end maintain + // our safety requirements. + // + // The only other assumption we rely on is that the pointer returned + // by `rfind_raw` satisfies `self.start <= found < self.end`, and that + // safety contract is forwarded to the caller. + let found = rfind_raw(self.start, self.end)?; + let result = found.distance(self.original_start); + self.end = found; + Some(result) + } + + /// Provides an implementation of `Iterator::size_hint`. + #[inline(always)] + pub(crate) fn size_hint(&self) -> (usize, Option) { + (0, Some(self.end.as_usize().saturating_sub(self.start.as_usize()))) + } +} + +/// Search a slice using a function that operates on raw pointers. +/// +/// Given a function to search a contiguous sequence of memory for the location +/// of a non-empty set of bytes, this will execute that search on a slice of +/// bytes. The pointer returned by the given function will be converted to an +/// offset relative to the starting point of the given slice. That is, if a +/// match is found, the offset returned by this routine is guaranteed to be a +/// valid index into `haystack`. +/// +/// Callers may use this for a forward or reverse search. +/// +/// # Safety +/// +/// Callers must ensure that if a pointer is returned by `find_raw`, then the +/// pointer must be greater than or equal to the starting pointer and less than +/// the end pointer. +#[inline(always)] +pub(crate) unsafe fn search_slice_with_raw( + haystack: &[u8], + mut find_raw: impl FnMut(*const u8, *const u8) -> Option<*const u8>, +) -> Option { + // SAFETY: We rely on `find_raw` to return a correct and valid pointer, but + // otherwise, `start` and `end` are valid due to the guarantees provided by + // a &[u8]. + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + let found = find_raw(start, end)?; + Some(found.distance(start)) +} + +/// Performs a forward byte-at-a-time loop until either `ptr >= end_ptr` or +/// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is +/// returned. If the latter occurs, then the pointer at which `confirm` returns +/// `true` is returned. +/// +/// # Safety +/// +/// Callers must provide valid pointers and they must satisfy `start_ptr <= +/// ptr` and `ptr <= end_ptr`. +#[inline(always)] +pub(crate) unsafe fn fwd_byte_by_byte bool>( + start: *const u8, + end: *const u8, + confirm: F, +) -> Option<*const u8> { + debug_assert!(start <= end); + let mut ptr = start; + while ptr < end { + if confirm(*ptr) { + return Some(ptr); + } + ptr = ptr.offset(1); + } + None +} + +/// Performs a reverse byte-at-a-time loop until either `ptr < start_ptr` or +/// until `confirm(*ptr)` returns `true`. If the former occurs, then `None` is +/// returned. If the latter occurs, then the pointer at which `confirm` returns +/// `true` is returned. +/// +/// # Safety +/// +/// Callers must provide valid pointers and they must satisfy `start_ptr <= +/// ptr` and `ptr <= end_ptr`. +#[inline(always)] +pub(crate) unsafe fn rev_byte_by_byte bool>( + start: *const u8, + end: *const u8, + confirm: F, +) -> Option<*const u8> { + debug_assert!(start <= end); + + let mut ptr = end; + while ptr > start { + ptr = ptr.offset(-1); + if confirm(*ptr) { + return Some(ptr); + } + } + None +} + +/// Performs a forward byte-at-a-time loop until `ptr >= end_ptr` and returns +/// the number of times `confirm(*ptr)` returns `true`. +/// +/// # Safety +/// +/// Callers must provide valid pointers and they must satisfy `start_ptr <= +/// ptr` and `ptr <= end_ptr`. +#[inline(always)] +pub(crate) unsafe fn count_byte_by_byte bool>( + start: *const u8, + end: *const u8, + confirm: F, +) -> usize { + debug_assert!(start <= end); + let mut ptr = start; + let mut count = 0; + while ptr < end { + if confirm(*ptr) { + count += 1; + } + ptr = ptr.offset(1); + } + count +} diff --git a/vendor/memchr/src/arch/generic/mod.rs b/vendor/memchr/src/arch/generic/mod.rs new file mode 100644 index 0000000..63ee3f0 --- /dev/null +++ b/vendor/memchr/src/arch/generic/mod.rs @@ -0,0 +1,14 @@ +/*! +This module defines "generic" routines that can be specialized to specific +architectures. + +We don't expose this module primarily because it would require exposing all +of the internal infrastructure required to write these generic routines. +That infrastructure should be treated as an implementation detail so that +it is allowed to evolve. Instead, what we expose are architecture specific +instantiations of these generic implementations. The generic code just lets us +write the code once (usually). +*/ + +pub(crate) mod memchr; +pub(crate) mod packedpair; diff --git a/vendor/memchr/src/arch/generic/packedpair.rs b/vendor/memchr/src/arch/generic/packedpair.rs new file mode 100644 index 0000000..8d97cf2 --- /dev/null +++ b/vendor/memchr/src/arch/generic/packedpair.rs @@ -0,0 +1,317 @@ +/*! +Generic crate-internal routines for the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use crate::{ + arch::all::{is_equal_raw, packedpair::Pair}, + ext::Pointer, + vector::{MoveMask, Vector}, +}; + +/// A generic architecture dependent "packed pair" finder. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +/// +/// This is architecture dependent because it uses specific vector operations +/// to look for occurrences of the pair of bytes. +/// +/// This type is not meant to be exported and is instead meant to be used as +/// the implementation for architecture specific facades. Why? Because it's a +/// bit of a quirky API that requires `inline(always)` annotations. And pretty +/// much everything has safety obligations due (at least) to the caller needing +/// to inline calls into routines marked with +/// `#[target_feature(enable = "...")]`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Finder { + pair: Pair, + v1: V, + v2: V, + min_haystack_len: usize, +} + +impl Finder { + /// Create a new pair searcher. The searcher returned can either report + /// exact matches of `needle` or act as a prefilter and report candidate + /// positions of `needle`. + /// + /// # Safety + /// + /// Callers must ensure that whatever vector type this routine is called + /// with is supported by the current environment. + /// + /// Callers must also ensure that `needle.len() >= 2`. + #[inline(always)] + pub(crate) unsafe fn new(needle: &[u8], pair: Pair) -> Finder { + let max_index = pair.index1().max(pair.index2()); + let min_haystack_len = + core::cmp::max(needle.len(), usize::from(max_index) + V::BYTES); + let v1 = V::splat(needle[usize::from(pair.index1())]); + let v2 = V::splat(needle[usize::from(pair.index2())]); + Finder { pair, v1, v2, min_haystack_len } + } + + /// Searches the given haystack for the given needle. The needle given + /// should be the same as the needle that this finder was initialized + /// with. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// Since this is meant to be used with vector functions, callers need to + /// specialize this inside of a function with a `target_feature` attribute. + /// Therefore, callers must ensure that whatever target feature is being + /// used supports the vector functions that this function is specialized + /// for. (For the specific vector functions used, see the Vector trait + /// implementations.) + #[inline(always)] + pub(crate) unsafe fn find( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option { + assert!( + haystack.len() >= self.min_haystack_len, + "haystack too small, should be at least {} but got {}", + self.min_haystack_len, + haystack.len(), + ); + + let all = V::Mask::all_zeros_except_least_significant(0); + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + let max = end.sub(self.min_haystack_len); + let mut cur = start; + + // N.B. I did experiment with unrolling the loop to deal with size(V) + // bytes at a time and 2*size(V) bytes at a time. The double unroll + // was marginally faster while the quadruple unroll was unambiguously + // slower. In the end, I decided the complexity from unrolling wasn't + // worth it. I used the memmem/krate/prebuilt/huge-en/ benchmarks to + // compare. + while cur <= max { + if let Some(chunki) = self.find_in_chunk(needle, cur, end, all) { + return Some(matched(start, cur, chunki)); + } + cur = cur.add(V::BYTES); + } + if cur < end { + let remaining = end.distance(cur); + debug_assert!( + remaining < self.min_haystack_len, + "remaining bytes should be smaller than the minimum haystack \ + length of {}, but there are {} bytes remaining", + self.min_haystack_len, + remaining, + ); + if remaining < needle.len() { + return None; + } + debug_assert!( + max < cur, + "after main loop, cur should have exceeded max", + ); + let overlap = cur.distance(max); + debug_assert!( + overlap > 0, + "overlap ({}) must always be non-zero", + overlap, + ); + debug_assert!( + overlap < V::BYTES, + "overlap ({}) cannot possibly be >= than a vector ({})", + overlap, + V::BYTES, + ); + // The mask has all of its bits set except for the first N least + // significant bits, where N=overlap. This way, any matches that + // occur in find_in_chunk within the overlap are automatically + // ignored. + let mask = V::Mask::all_zeros_except_least_significant(overlap); + cur = max; + let m = self.find_in_chunk(needle, cur, end, mask); + if let Some(chunki) = m { + return Some(matched(start, cur, chunki)); + } + } + None + } + + /// Searches the given haystack for offsets that represent candidate + /// matches of the `needle` given to this finder's constructor. The offsets + /// returned, if they are a match, correspond to the starting offset of + /// `needle` in the given `haystack`. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// Since this is meant to be used with vector functions, callers need to + /// specialize this inside of a function with a `target_feature` attribute. + /// Therefore, callers must ensure that whatever target feature is being + /// used supports the vector functions that this function is specialized + /// for. (For the specific vector functions used, see the Vector trait + /// implementations.) + #[inline(always)] + pub(crate) unsafe fn find_prefilter( + &self, + haystack: &[u8], + ) -> Option { + assert!( + haystack.len() >= self.min_haystack_len, + "haystack too small, should be at least {} but got {}", + self.min_haystack_len, + haystack.len(), + ); + + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + let max = end.sub(self.min_haystack_len); + let mut cur = start; + + // N.B. I did experiment with unrolling the loop to deal with size(V) + // bytes at a time and 2*size(V) bytes at a time. The double unroll + // was marginally faster while the quadruple unroll was unambiguously + // slower. In the end, I decided the complexity from unrolling wasn't + // worth it. I used the memmem/krate/prebuilt/huge-en/ benchmarks to + // compare. + while cur <= max { + if let Some(chunki) = self.find_prefilter_in_chunk(cur) { + return Some(matched(start, cur, chunki)); + } + cur = cur.add(V::BYTES); + } + if cur < end { + // This routine immediately quits if a candidate match is found. + // That means that if we're here, no candidate matches have been + // found at or before 'ptr'. Thus, we don't need to mask anything + // out even though we might technically search part of the haystack + // that we've already searched (because we know it can't match). + cur = max; + if let Some(chunki) = self.find_prefilter_in_chunk(cur) { + return Some(matched(start, cur, chunki)); + } + } + None + } + + /// Search for an occurrence of our byte pair from the needle in the chunk + /// pointed to by cur, with the end of the haystack pointed to by end. + /// When an occurrence is found, memcmp is run to check if a match occurs + /// at the corresponding position. + /// + /// `mask` should have bits set corresponding the positions in the chunk + /// in which matches are considered. This is only used for the last vector + /// load where the beginning of the vector might have overlapped with the + /// last load in the main loop. The mask lets us avoid visiting positions + /// that have already been discarded as matches. + /// + /// # Safety + /// + /// It must be safe to do an unaligned read of size(V) bytes starting at + /// both (cur + self.index1) and (cur + self.index2). It must also be safe + /// to do unaligned loads on cur up to (end - needle.len()). + #[inline(always)] + unsafe fn find_in_chunk( + &self, + needle: &[u8], + cur: *const u8, + end: *const u8, + mask: V::Mask, + ) -> Option { + let index1 = usize::from(self.pair.index1()); + let index2 = usize::from(self.pair.index2()); + let chunk1 = V::load_unaligned(cur.add(index1)); + let chunk2 = V::load_unaligned(cur.add(index2)); + let eq1 = chunk1.cmpeq(self.v1); + let eq2 = chunk2.cmpeq(self.v2); + + let mut offsets = eq1.and(eq2).movemask().and(mask); + while offsets.has_non_zero() { + let offset = offsets.first_offset(); + let cur = cur.add(offset); + if end.sub(needle.len()) < cur { + return None; + } + if is_equal_raw(needle.as_ptr(), cur, needle.len()) { + return Some(offset); + } + offsets = offsets.clear_least_significant_bit(); + } + None + } + + /// Search for an occurrence of our byte pair from the needle in the chunk + /// pointed to by cur, with the end of the haystack pointed to by end. + /// When an occurrence is found, memcmp is run to check if a match occurs + /// at the corresponding position. + /// + /// # Safety + /// + /// It must be safe to do an unaligned read of size(V) bytes starting at + /// both (cur + self.index1) and (cur + self.index2). It must also be safe + /// to do unaligned reads on cur up to (end - needle.len()). + #[inline(always)] + unsafe fn find_prefilter_in_chunk(&self, cur: *const u8) -> Option { + let index1 = usize::from(self.pair.index1()); + let index2 = usize::from(self.pair.index2()); + let chunk1 = V::load_unaligned(cur.add(index1)); + let chunk2 = V::load_unaligned(cur.add(index2)); + let eq1 = chunk1.cmpeq(self.v1); + let eq2 = chunk2.cmpeq(self.v2); + + let offsets = eq1.and(eq2).movemask(); + if !offsets.has_non_zero() { + return None; + } + Some(offsets.first_offset()) + } + + /// Returns the pair of offsets (into the needle) used to check as a + /// predicate before confirming whether a needle exists at a particular + /// position. + #[inline] + pub(crate) fn pair(&self) -> &Pair { + &self.pair + } + + /// Returns the minimum haystack length that this `Finder` can search. + /// + /// Providing a haystack to this `Finder` shorter than this length is + /// guaranteed to result in a panic. + #[inline(always)] + pub(crate) fn min_haystack_len(&self) -> usize { + self.min_haystack_len + } +} + +/// Accepts a chunk-relative offset and returns a haystack relative offset. +/// +/// This used to be marked `#[cold]` and `#[inline(never)]`, but I couldn't +/// observe a consistent measureable difference between that and just inlining +/// it. So we go with inlining it. +/// +/// # Safety +/// +/// Same at `ptr::offset_from` in addition to `cur >= start`. +#[inline(always)] +unsafe fn matched(start: *const u8, cur: *const u8, chunki: usize) -> usize { + cur.distance(start) + chunki +} + +// If you're looking for tests, those are run for each instantiation of the +// above code. So for example, see arch::x86_64::sse2::packedpair. diff --git a/vendor/memchr/src/arch/mod.rs b/vendor/memchr/src/arch/mod.rs new file mode 100644 index 0000000..2f63a1a --- /dev/null +++ b/vendor/memchr/src/arch/mod.rs @@ -0,0 +1,16 @@ +/*! +A module with low-level architecture dependent routines. + +These routines are useful as primitives for tasks not covered by the higher +level crate API. +*/ + +pub mod all; +pub(crate) mod generic; + +#[cfg(target_arch = "aarch64")] +pub mod aarch64; +#[cfg(target_arch = "wasm32")] +pub mod wasm32; +#[cfg(target_arch = "x86_64")] +pub mod x86_64; diff --git a/vendor/memchr/src/arch/wasm32/memchr.rs b/vendor/memchr/src/arch/wasm32/memchr.rs new file mode 100644 index 0000000..b0bbd1c --- /dev/null +++ b/vendor/memchr/src/arch/wasm32/memchr.rs @@ -0,0 +1,137 @@ +/*! +Wrapper routines for `memchr` and friends. + +These routines choose the best implementation at compile time. (This is +different from `x86_64` because it is expected that `simd128` is almost always +available for `wasm32` targets.) +*/ + +macro_rules! defraw { + ($ty:ident, $find:ident, $start:ident, $end:ident, $($needles:ident),+) => {{ + #[cfg(target_feature = "simd128")] + { + use crate::arch::wasm32::simd128::memchr::$ty; + + debug!("chose simd128 for {}", stringify!($ty)); + debug_assert!($ty::is_available()); + // SAFETY: We know that wasm memchr is always available whenever + // code is compiled for `wasm32` with the `simd128` target feature + // enabled. + $ty::new_unchecked($($needles),+).$find($start, $end) + } + #[cfg(not(target_feature = "simd128"))] + { + use crate::arch::all::memchr::$ty; + + debug!( + "no simd128 feature available, using fallback for {}", + stringify!($ty), + ); + $ty::new($($needles),+).$find($start, $end) + } + }} +} + +/// memchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(One, find_raw, start, end, n1) +} + +/// memrchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(One, rfind_raw, start, end, n1) +} + +/// memchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr2_raw( + n1: u8, + n2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Two, find_raw, start, end, n1, n2) +} + +/// memrchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr2_raw( + n1: u8, + n2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Two, rfind_raw, start, end, n1, n2) +} + +/// memchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::find_raw`. +#[inline(always)] +pub(crate) unsafe fn memchr3_raw( + n1: u8, + n2: u8, + n3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Three, find_raw, start, end, n1, n2, n3) +} + +/// memrchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::rfind_raw`. +#[inline(always)] +pub(crate) unsafe fn memrchr3_raw( + n1: u8, + n2: u8, + n3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + defraw!(Three, rfind_raw, start, end, n1, n2, n3) +} + +/// Count all matching bytes, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::count_raw`. +#[inline(always)] +pub(crate) unsafe fn count_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> usize { + defraw!(One, count_raw, start, end, n1) +} diff --git a/vendor/memchr/src/arch/wasm32/mod.rs b/vendor/memchr/src/arch/wasm32/mod.rs new file mode 100644 index 0000000..209f876 --- /dev/null +++ b/vendor/memchr/src/arch/wasm32/mod.rs @@ -0,0 +1,7 @@ +/*! +Vector algorithms for the `wasm32` target. +*/ + +pub mod simd128; + +pub(crate) mod memchr; diff --git a/vendor/memchr/src/arch/wasm32/simd128/memchr.rs b/vendor/memchr/src/arch/wasm32/simd128/memchr.rs new file mode 100644 index 0000000..fa314c9 --- /dev/null +++ b/vendor/memchr/src/arch/wasm32/simd128/memchr.rs @@ -0,0 +1,1020 @@ +/*! +This module defines 128-bit vector implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers are +typically much faster than scalar routines accomplishing the same task. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use core::arch::wasm32::v128; + +use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One(generic::One); + +impl One { + /// Create a new searcher that finds occurrences of the needle byte given. + /// + /// This particular searcher is specialized to use simd128 vector + /// instructions that typically make it quite fast. + /// + /// If simd128 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle: u8) -> Option { + if One::is_available() { + // SAFETY: we check that simd128 is available above. + unsafe { Some(One::new_unchecked(needle)) } + } else { + None + } + } + + /// Create a new finder specific to simd128 vectors and routines without + /// checking that simd128 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `simd128` + /// instructions in the current environment. + #[target_feature(enable = "simd128")] + #[inline] + pub unsafe fn new_unchecked(needle: u8) -> One { + One(generic::One::new(needle)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`One::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `One::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "simd128")] + { + true + } + #[cfg(not(target_feature = "simd128"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Counts all occurrences of this byte in the given haystack. + #[inline] + pub fn count(&self, haystack: &[u8]) -> usize { + // SAFETY: All of our pointers are derived directly from a borrowed + // slice, which is guaranteed to be valid. + unsafe { + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + self.count_raw(start, end) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < v128::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'simd128' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < v128::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'simd128' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.rfind_raw_impl(start, end) + } + + /// Counts all occurrences of this byte in the given haystack represented + /// by raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { + if start >= end { + return 0; + } + if end.distance(start) < v128::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::count_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'simd128' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.count_raw_impl(start, end) + } + + /// Execute a search using simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::find_raw`], except the distance between `start` and + /// `end` must be at least the size of a simd128 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of a simd128 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Execute a count using simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::count_raw`], except the distance between `start` and + /// `end` must be at least the size of a simd128 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn count_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> usize { + self.0.count_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { + OneIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { + searcher: &'a One, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn count(self) -> usize { + self.it.count(|s, e| { + // SAFETY: We rely on our generic iterator to return valid start + // and end pointers. + unsafe { self.searcher.count_raw(s, e) } + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two(generic::Two); + +impl Two { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use simd128 vector + /// instructions that typically make it quite fast. + /// + /// If simd128 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle1: u8, needle2: u8) -> Option { + if Two::is_available() { + // SAFETY: we check that simd128 is available above. + unsafe { Some(Two::new_unchecked(needle1, needle2)) } + } else { + None + } + } + + /// Create a new finder specific to simd128 vectors and routines without + /// checking that simd128 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `simd128` + /// instructions in the current environment. + #[target_feature(enable = "simd128")] + #[inline] + pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { + Two(generic::Two::new(needle1, needle2)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Two::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Two::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "simd128")] + { + true + } + #[cfg(not(target_feature = "simd128"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < v128::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() || b == self.0.needle2() + }); + } + // SAFETY: Building a `Two` means it's safe to call 'simd128' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < v128::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() || b == self.0.needle2() + }); + } + // SAFETY: Building a `Two` means it's safe to call 'simd128' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.rfind_raw_impl(start, end) + } + + /// Execute a search using simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::find_raw`], except the distance between `start` and + /// `end` must be at least the size of a simd128 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of a simd128 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { + TwoIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { + searcher: &'a Two, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three(generic::Three); + +impl Three { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use simd128 vector + /// instructions that typically make it quite fast. + /// + /// If simd128 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option { + if Three::is_available() { + // SAFETY: we check that simd128 is available above. + unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } + } else { + None + } + } + + /// Create a new finder specific to simd128 vectors and routines without + /// checking that simd128 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `simd128` + /// instructions in the current environment. + #[target_feature(enable = "simd128")] + #[inline] + pub unsafe fn new_unchecked( + needle1: u8, + needle2: u8, + needle3: u8, + ) -> Three { + Three(generic::Three::new(needle1, needle2, needle3)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Three::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Three::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "simd128")] + { + true + } + #[cfg(not(target_feature = "simd128"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < v128::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() + || b == self.0.needle2() + || b == self.0.needle3() + }); + } + // SAFETY: Building a `Three` means it's safe to call 'simd128' + // routines. Also, we've checked that our haystack is big enough to run + // on the vector routine. Pointer validity is caller's responsibility. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < v128::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() + || b == self.0.needle2() + || b == self.0.needle3() + }); + } + // SAFETY: Building a `Three` means it's safe to call 'simd128' + // routines. Also, we've checked that our haystack is big enough to run + // on the vector routine. Pointer validity is caller's responsibility. + self.rfind_raw_impl(start, end) + } + + /// Execute a search using simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::find_raw`], except the distance between `start` and + /// `end` must be at least the size of a simd128 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of a simd128 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { + ThreeIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { + searcher: &'a Three, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} + +#[cfg(test)] +mod tests { + use super::*; + + define_memchr_quickcheck!(super); + + #[test] + fn forward_one() { + crate::tests::memchr::Runner::new(1).forward_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_one() { + crate::tests::memchr::Runner::new(1).reverse_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn count_one() { + crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).count()) + }) + } + + #[test] + fn forward_two() { + crate::tests::memchr::Runner::new(2).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_two() { + crate::tests::memchr::Runner::new(2).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn forward_three() { + crate::tests::memchr::Runner::new(3).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_three() { + crate::tests::memchr::Runner::new(3).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) + }, + ) + } +} diff --git a/vendor/memchr/src/arch/wasm32/simd128/mod.rs b/vendor/memchr/src/arch/wasm32/simd128/mod.rs new file mode 100644 index 0000000..b55d1f0 --- /dev/null +++ b/vendor/memchr/src/arch/wasm32/simd128/mod.rs @@ -0,0 +1,6 @@ +/*! +Algorithms for the `wasm32` target using 128-bit vectors via simd128. +*/ + +pub mod memchr; +pub mod packedpair; diff --git a/vendor/memchr/src/arch/wasm32/simd128/packedpair.rs b/vendor/memchr/src/arch/wasm32/simd128/packedpair.rs new file mode 100644 index 0000000..b629377 --- /dev/null +++ b/vendor/memchr/src/arch/wasm32/simd128/packedpair.rs @@ -0,0 +1,229 @@ +/*! +A 128-bit vector implementation of the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use core::arch::wasm32::v128; + +use crate::arch::{all::packedpair::Pair, generic::packedpair}; + +/// A "packed pair" finder that uses 128-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +#[derive(Clone, Copy, Debug)] +pub struct Finder(packedpair::Finder); + +impl Finder { + /// Create a new pair searcher. The searcher returned can either report + /// exact matches of `needle` or act as a prefilter and report candidate + /// positions of `needle`. + /// + /// If simd128 is unavailable in the current environment or if a [`Pair`] + /// could not be constructed from the needle given, then `None` is + /// returned. + #[inline] + pub fn new(needle: &[u8]) -> Option { + Finder::with_pair(needle, Pair::new(needle)?) + } + + /// Create a new "packed pair" finder using the pair of bytes given. + /// + /// This constructor permits callers to control precisely which pair of + /// bytes is used as a predicate. + /// + /// If simd128 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn with_pair(needle: &[u8], pair: Pair) -> Option { + if Finder::is_available() { + // SAFETY: we check that simd128 is available above. We are also + // guaranteed to have needle.len() > 1 because we have a valid + // Pair. + unsafe { Some(Finder::with_pair_impl(needle, pair)) } + } else { + None + } + } + + /// Create a new `Finder` specific to simd128 vectors and routines. + /// + /// # Safety + /// + /// Same as the safety for `packedpair::Finder::new`, and callers must also + /// ensure that simd128 is available. + #[target_feature(enable = "simd128")] + #[inline] + unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { + let finder = packedpair::Finder::::new(needle, pair); + Finder(finder) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Finder::with_pair`] will + /// return a `Some` value. Similarly, when it is false, it is guaranteed + /// that `Finder::with_pair` will return a `None` value. Notice that this + /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, + /// even when `Finder::is_available` is true, it is not guaranteed that a + /// valid [`Pair`] can be found from the needle given. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "simd128")] + { + true + } + #[cfg(not(target_feature = "simd128"))] + { + false + } + } + + /// Execute a search using wasm32 v128 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option { + self.find_impl(haystack, needle) + } + + /// Execute a search using wasm32 v128 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find_prefilter(&self, haystack: &[u8]) -> Option { + self.find_prefilter_impl(haystack) + } + + /// Execute a search using wasm32 v128 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + fn find_impl(&self, haystack: &[u8], needle: &[u8]) -> Option { + // SAFETY: The target feature safety obligation is automatically + // fulfilled by virtue of being a method on `Finder`, which can only be + // constructed when it is safe to call `simd128` routines. + unsafe { self.0.find(haystack, needle) } + } + + /// Execute a prefilter search using wasm32 v128 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `simd128` routines.) + #[target_feature(enable = "simd128")] + #[inline] + fn find_prefilter_impl(&self, haystack: &[u8]) -> Option { + // SAFETY: The target feature safety obligation is automatically + // fulfilled by virtue of being a method on `Finder`, which can only be + // constructed when it is safe to call `simd128` routines. + unsafe { self.0.find_prefilter(haystack) } + } + + /// Returns the pair of offsets (into the needle) used to check as a + /// predicate before confirming whether a needle exists at a particular + /// position. + #[inline] + pub fn pair(&self) -> &Pair { + self.0.pair() + } + + /// Returns the minimum haystack length that this `Finder` can search. + /// + /// Using a haystack with length smaller than this in a search will result + /// in a panic. The reason for this restriction is that this finder is + /// meant to be a low-level component that is part of a larger substring + /// strategy. In that sense, it avoids trying to handle all cases and + /// instead only handles the cases that it can handle very well. + #[inline] + pub fn min_haystack_len(&self) -> usize { + self.0.min_haystack_len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn find(haystack: &[u8], needle: &[u8]) -> Option> { + let f = Finder::new(needle)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + + define_substring_forward_quickcheck!(find); + + #[test] + fn forward_substring() { + crate::tests::substring::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair_prefilter() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find_prefilter(haystack)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } +} diff --git a/vendor/memchr/src/arch/x86_64/avx2/memchr.rs b/vendor/memchr/src/arch/x86_64/avx2/memchr.rs new file mode 100644 index 0000000..59f8c7f --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/avx2/memchr.rs @@ -0,0 +1,1352 @@ +/*! +This module defines 256-bit vector implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers are +typically much faster than scalar routines accomplishing the same task. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use core::arch::x86_64::{__m128i, __m256i}; + +use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One { + /// Used for haystacks less than 32 bytes. + sse2: generic::One<__m128i>, + /// Used for haystacks bigger than 32 bytes. + avx2: generic::One<__m256i>, +} + +impl One { + /// Create a new searcher that finds occurrences of the needle byte given. + /// + /// This particular searcher is specialized to use AVX2 vector instructions + /// that typically make it quite fast. (SSE2 is used for haystacks that + /// are too short to accommodate an AVX2 vector.) + /// + /// If either SSE2 or AVX2 is unavailable in the current environment, then + /// `None` is returned. + #[inline] + pub fn new(needle: u8) -> Option { + if One::is_available() { + // SAFETY: we check that sse2 and avx2 are available above. + unsafe { Some(One::new_unchecked(needle)) } + } else { + None + } + } + + /// Create a new finder specific to AVX2 vectors and routines without + /// checking that either SSE2 or AVX2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute both `sse2` and + /// `avx2` instructions in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + pub unsafe fn new_unchecked(needle: u8) -> One { + One { + sse2: generic::One::new(needle), + avx2: generic::One::new(needle), + } + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`One::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `One::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "avx2")] + { + true + } + #[cfg(not(target_feature = "avx2"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("avx2") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Counts all occurrences of this byte in the given haystack. + #[inline] + pub fn count(&self, haystack: &[u8]) -> usize { + // SAFETY: All of our pointers are derived directly from a borrowed + // slice, which is guaranteed to be valid. + unsafe { + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + self.count_raw(start, end) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::fwd_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.find_raw_sse2(start, end) + }; + } + // SAFETY: Building a `One` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // Note that we could call `self.avx2.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `One`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless + // the caller code has the same target feature annotations. Namely, + // the common case (at time of writing) is for calling code to not + // have the `avx2` target feature enabled *at compile time*. Without + // `target_feature` on this routine, it can be inlined which will + // handle some of the short-haystack cases above without touching the + // architecture specific code. + self.find_raw_avx2(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::rev_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.rfind_raw_sse2(start, end) + }; + } + // SAFETY: Building a `One` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // See note in forward routine above for why we don't just call + // `self.avx2.rfind_raw` directly here. + self.rfind_raw_avx2(start, end) + } + + /// Counts all occurrences of this byte in the given haystack represented + /// by raw pointers. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `0` will always be returned. + #[inline] + pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { + if start >= end { + return 0; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::count_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.count_raw_sse2(start, end) + }; + } + // SAFETY: Building a `One` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + self.count_raw_avx2(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.rfind_raw(start, end) + } + + /// Execute a count using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::count_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn count_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> usize { + self.sse2.count_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn find_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.find_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn rfind_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.rfind_raw(start, end) + } + + /// Execute a count using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::count_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn count_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> usize { + self.avx2.count_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { + OneIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { + searcher: &'a One, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn count(self) -> usize { + self.it.count(|s, e| { + // SAFETY: We rely on our generic iterator to return valid start + // and end pointers. + unsafe { self.searcher.count_raw(s, e) } + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two { + /// Used for haystacks less than 32 bytes. + sse2: generic::Two<__m128i>, + /// Used for haystacks bigger than 32 bytes. + avx2: generic::Two<__m256i>, +} + +impl Two { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use AVX2 vector instructions + /// that typically make it quite fast. (SSE2 is used for haystacks that + /// are too short to accommodate an AVX2 vector.) + /// + /// If either SSE2 or AVX2 is unavailable in the current environment, then + /// `None` is returned. + #[inline] + pub fn new(needle1: u8, needle2: u8) -> Option { + if Two::is_available() { + // SAFETY: we check that sse2 and avx2 are available above. + unsafe { Some(Two::new_unchecked(needle1, needle2)) } + } else { + None + } + } + + /// Create a new finder specific to AVX2 vectors and routines without + /// checking that either SSE2 or AVX2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute both `sse2` and + /// `avx2` instructions in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { + Two { + sse2: generic::Two::new(needle1, needle2), + avx2: generic::Two::new(needle1, needle2), + } + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Two::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Two::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "avx2")] + { + true + } + #[cfg(not(target_feature = "avx2"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("avx2") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::fwd_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() || b == self.sse2.needle2() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.find_raw_sse2(start, end) + }; + } + // SAFETY: Building a `Two` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // Note that we could call `self.avx2.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `Two`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless + // the caller code has the same target feature annotations. Namely, + // the common case (at time of writing) is for calling code to not + // have the `avx2` target feature enabled *at compile time*. Without + // `target_feature` on this routine, it can be inlined which will + // handle some of the short-haystack cases above without touching the + // architecture specific code. + self.find_raw_avx2(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::rev_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() || b == self.sse2.needle2() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.rfind_raw_sse2(start, end) + }; + } + // SAFETY: Building a `Two` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // See note in forward routine above for why we don't just call + // `self.avx2.rfind_raw` directly here. + self.rfind_raw_avx2(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.rfind_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn find_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.find_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn rfind_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { + TwoIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { + searcher: &'a Two, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three { + /// Used for haystacks less than 32 bytes. + sse2: generic::Three<__m128i>, + /// Used for haystacks bigger than 32 bytes. + avx2: generic::Three<__m256i>, +} + +impl Three { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use AVX2 vector instructions + /// that typically make it quite fast. (SSE2 is used for haystacks that + /// are too short to accommodate an AVX2 vector.) + /// + /// If either SSE2 or AVX2 is unavailable in the current environment, then + /// `None` is returned. + #[inline] + pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option { + if Three::is_available() { + // SAFETY: we check that sse2 and avx2 are available above. + unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } + } else { + None + } + } + + /// Create a new finder specific to AVX2 vectors and routines without + /// checking that either SSE2 or AVX2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute both `sse2` and + /// `avx2` instructions in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + pub unsafe fn new_unchecked( + needle1: u8, + needle2: u8, + needle3: u8, + ) -> Three { + Three { + sse2: generic::Three::new(needle1, needle2, needle3), + avx2: generic::Three::new(needle1, needle2, needle3), + } + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Three::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Three::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "avx2")] + { + true + } + #[cfg(not(target_feature = "avx2"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("avx2") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::fwd_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() + || b == self.sse2.needle2() + || b == self.sse2.needle3() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.find_raw_sse2(start, end) + }; + } + // SAFETY: Building a `Three` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // Note that we could call `self.avx2.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `Three`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless + // the caller code has the same target feature annotations. Namely, + // the common case (at time of writing) is for calling code to not + // have the `avx2` target feature enabled *at compile time*. Without + // `target_feature` on this routine, it can be inlined which will + // handle some of the short-haystack cases above without touching the + // architecture specific code. + self.find_raw_avx2(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + let len = end.distance(start); + if len < __m256i::BYTES { + return if len < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end + // pointers. + generic::rev_byte_by_byte(start, end, |b| { + b == self.sse2.needle1() + || b == self.sse2.needle2() + || b == self.sse2.needle3() + }) + } else { + // SAFETY: We require the caller to pass valid start/end + // pointers. + self.rfind_raw_sse2(start, end) + }; + } + // SAFETY: Building a `Three` means it's safe to call both 'sse2' and + // 'avx2' routines. Also, we've checked that our haystack is big + // enough to run on the vector routine. Pointer validity is caller's + // responsibility. + // + // See note in forward routine above for why we don't just call + // `self.avx2.rfind_raw` directly here. + self.rfind_raw_avx2(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_sse2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.sse2.rfind_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn find_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.find_raw(start, end) + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an AVX2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2`/`avx2` routines.) + #[target_feature(enable = "avx2")] + #[inline] + unsafe fn rfind_raw_avx2( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.avx2.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { + ThreeIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { + searcher: &'a Three, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} + +#[cfg(test)] +mod tests { + use super::*; + + define_memchr_quickcheck!(super); + + #[test] + fn forward_one() { + crate::tests::memchr::Runner::new(1).forward_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_one() { + crate::tests::memchr::Runner::new(1).reverse_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn count_one() { + crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).count()) + }) + } + + #[test] + fn forward_two() { + crate::tests::memchr::Runner::new(2).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_two() { + crate::tests::memchr::Runner::new(2).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn forward_three() { + crate::tests::memchr::Runner::new(3).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_three() { + crate::tests::memchr::Runner::new(3).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) + }, + ) + } +} diff --git a/vendor/memchr/src/arch/x86_64/avx2/mod.rs b/vendor/memchr/src/arch/x86_64/avx2/mod.rs new file mode 100644 index 0000000..ee4097d --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/avx2/mod.rs @@ -0,0 +1,6 @@ +/*! +Algorithms for the `x86_64` target using 256-bit vectors via AVX2. +*/ + +pub mod memchr; +pub mod packedpair; diff --git a/vendor/memchr/src/arch/x86_64/avx2/packedpair.rs b/vendor/memchr/src/arch/x86_64/avx2/packedpair.rs new file mode 100644 index 0000000..efae7b6 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/avx2/packedpair.rs @@ -0,0 +1,272 @@ +/*! +A 256-bit vector implementation of the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use core::arch::x86_64::{__m128i, __m256i}; + +use crate::arch::{all::packedpair::Pair, generic::packedpair}; + +/// A "packed pair" finder that uses 256-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +#[derive(Clone, Copy, Debug)] +pub struct Finder { + sse2: packedpair::Finder<__m128i>, + avx2: packedpair::Finder<__m256i>, +} + +impl Finder { + /// Create a new pair searcher. The searcher returned can either report + /// exact matches of `needle` or act as a prefilter and report candidate + /// positions of `needle`. + /// + /// If AVX2 is unavailable in the current environment or if a [`Pair`] + /// could not be constructed from the needle given, then `None` is + /// returned. + #[inline] + pub fn new(needle: &[u8]) -> Option { + Finder::with_pair(needle, Pair::new(needle)?) + } + + /// Create a new "packed pair" finder using the pair of bytes given. + /// + /// This constructor permits callers to control precisely which pair of + /// bytes is used as a predicate. + /// + /// If AVX2 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn with_pair(needle: &[u8], pair: Pair) -> Option { + if Finder::is_available() { + // SAFETY: we check that sse2/avx2 is available above. We are also + // guaranteed to have needle.len() > 1 because we have a valid + // Pair. + unsafe { Some(Finder::with_pair_impl(needle, pair)) } + } else { + None + } + } + + /// Create a new `Finder` specific to SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as the safety for `packedpair::Finder::new`, and callers must also + /// ensure that both SSE2 and AVX2 are available. + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { + let sse2 = packedpair::Finder::<__m128i>::new(needle, pair); + let avx2 = packedpair::Finder::<__m256i>::new(needle, pair); + Finder { sse2, avx2 } + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Finder::with_pair`] will + /// return a `Some` value. Similarly, when it is false, it is guaranteed + /// that `Finder::with_pair` will return a `None` value. Notice that this + /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, + /// even when `Finder::is_available` is true, it is not guaranteed that a + /// valid [`Pair`] can be found from the needle given. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + #[cfg(target_feature = "avx2")] + { + true + } + #[cfg(not(target_feature = "avx2"))] + { + #[cfg(feature = "std")] + { + std::is_x86_feature_detected!("avx2") + } + #[cfg(not(feature = "std"))] + { + false + } + } + } + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option { + // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. + unsafe { self.find_impl(haystack, needle) } + } + + /// Run this finder on the given haystack as a prefilter. + /// + /// If a candidate match is found, then an offset where the needle *could* + /// begin in the haystack is returned. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find_prefilter(&self, haystack: &[u8]) -> Option { + // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. + unsafe { self.find_prefilter_impl(haystack) } + } + + /// Execute a search using AVX2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `sse2` and `avx2` routines.) + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + unsafe fn find_impl( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option { + if haystack.len() < self.avx2.min_haystack_len() { + self.sse2.find(haystack, needle) + } else { + self.avx2.find(haystack, needle) + } + } + + /// Execute a prefilter search using AVX2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `sse2` and `avx2` routines.) + #[target_feature(enable = "sse2", enable = "avx2")] + #[inline] + unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option { + if haystack.len() < self.avx2.min_haystack_len() { + self.sse2.find_prefilter(haystack) + } else { + self.avx2.find_prefilter(haystack) + } + } + + /// Returns the pair of offsets (into the needle) used to check as a + /// predicate before confirming whether a needle exists at a particular + /// position. + #[inline] + pub fn pair(&self) -> &Pair { + self.avx2.pair() + } + + /// Returns the minimum haystack length that this `Finder` can search. + /// + /// Using a haystack with length smaller than this in a search will result + /// in a panic. The reason for this restriction is that this finder is + /// meant to be a low-level component that is part of a larger substring + /// strategy. In that sense, it avoids trying to handle all cases and + /// instead only handles the cases that it can handle very well. + #[inline] + pub fn min_haystack_len(&self) -> usize { + // The caller doesn't need to care about AVX2's min_haystack_len + // since this implementation will automatically switch to the SSE2 + // implementation if the haystack is too short for AVX2. Therefore, the + // caller only needs to care about SSE2's min_haystack_len. + // + // This does assume that SSE2's min_haystack_len is less than or + // equal to AVX2's min_haystack_len. In practice, this is true and + // there is no way it could be false based on how this Finder is + // implemented. Namely, both SSE2 and AVX2 use the same `Pair`. If + // they used different pairs, then it's possible (although perhaps + // pathological) for SSE2's min_haystack_len to be bigger than AVX2's. + self.sse2.min_haystack_len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn find(haystack: &[u8], needle: &[u8]) -> Option> { + let f = Finder::new(needle)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + + define_substring_forward_quickcheck!(find); + + #[test] + fn forward_substring() { + crate::tests::substring::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair_prefilter() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + if !cfg!(target_feature = "sse2") { + return None; + } + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find_prefilter(haystack)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } +} diff --git a/vendor/memchr/src/arch/x86_64/memchr.rs b/vendor/memchr/src/arch/x86_64/memchr.rs new file mode 100644 index 0000000..fcb1399 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/memchr.rs @@ -0,0 +1,335 @@ +/*! +Wrapper routines for `memchr` and friends. + +These routines efficiently dispatch to the best implementation based on what +the CPU supports. +*/ + +/// Provides a way to run a memchr-like function while amortizing the cost of +/// runtime CPU feature detection. +/// +/// This works by loading a function pointer from an atomic global. Initially, +/// this global is set to a function that does CPU feature detection. For +/// example, if AVX2 is enabled, then the AVX2 implementation is used. +/// Otherwise, at least on x86_64, the SSE2 implementation is used. (And +/// in some niche cases, if SSE2 isn't available, then the architecture +/// independent fallback implementation is used.) +/// +/// After the first call to this function, the atomic global is replaced with +/// the specific AVX2, SSE2 or fallback routine chosen. Subsequent calls then +/// will directly call the chosen routine instead of needing to go through the +/// CPU feature detection branching again. +/// +/// This particular macro is specifically written to provide the implementation +/// of functions with the following signature: +/// +/// ```ignore +/// fn memchr(needle1: u8, start: *const u8, end: *const u8) -> Option; +/// ``` +/// +/// Where you can also have `memchr2` and `memchr3`, but with `needle2` and +/// `needle3`, respectively. The `start` and `end` parameters correspond to the +/// start and end of the haystack, respectively. +/// +/// We use raw pointers here instead of the more obvious `haystack: &[u8]` so +/// that the function is compatible with our lower level iterator logic that +/// operates on raw pointers. We use this macro to implement "raw" memchr +/// routines with the signature above, and then define memchr routines using +/// regular slices on top of them. +/// +/// Note that we use `#[cfg(target_feature = "sse2")]` below even though +/// it shouldn't be strictly necessary because without it, it seems to +/// cause the compiler to blow up. I guess it can't handle a function +/// pointer being created with a sse target feature? Dunno. See the +/// `build-for-x86-64-but-non-sse-target` CI job if you want to experiment with +/// this. +/// +/// # Safety +/// +/// Primarily callers must that `$fnty` is a correct function pointer type and +/// not something else. +/// +/// Callers must also ensure that `$memchrty::$memchrfind` corresponds to a +/// routine that returns a valid function pointer when a match is found. That +/// is, a pointer that is `>= start` and `< end`. +/// +/// Callers must also ensure that the `$hay_start` and `$hay_end` identifiers +/// correspond to valid pointers. +macro_rules! unsafe_ifunc { + ( + $memchrty:ident, + $memchrfind:ident, + $fnty:ty, + $retty:ty, + $hay_start:ident, + $hay_end:ident, + $($needle:ident),+ + ) => {{ + #![allow(unused_unsafe)] + + use core::sync::atomic::{AtomicPtr, Ordering}; + + type Fn = *mut (); + type RealFn = $fnty; + static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn); + + #[cfg(target_feature = "sse2")] + #[target_feature(enable = "sse2", enable = "avx2")] + unsafe fn find_avx2( + $($needle: u8),+, + $hay_start: *const u8, + $hay_end: *const u8, + ) -> $retty { + use crate::arch::x86_64::avx2::memchr::$memchrty; + $memchrty::new_unchecked($($needle),+) + .$memchrfind($hay_start, $hay_end) + } + + #[cfg(target_feature = "sse2")] + #[target_feature(enable = "sse2")] + unsafe fn find_sse2( + $($needle: u8),+, + $hay_start: *const u8, + $hay_end: *const u8, + ) -> $retty { + use crate::arch::x86_64::sse2::memchr::$memchrty; + $memchrty::new_unchecked($($needle),+) + .$memchrfind($hay_start, $hay_end) + } + + unsafe fn find_fallback( + $($needle: u8),+, + $hay_start: *const u8, + $hay_end: *const u8, + ) -> $retty { + use crate::arch::all::memchr::$memchrty; + $memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end) + } + + unsafe fn detect( + $($needle: u8),+, + $hay_start: *const u8, + $hay_end: *const u8, + ) -> $retty { + let fun = { + #[cfg(not(target_feature = "sse2"))] + { + debug!( + "no sse2 feature available, using fallback for {}", + stringify!($memchrty), + ); + find_fallback as RealFn + } + #[cfg(target_feature = "sse2")] + { + use crate::arch::x86_64::{sse2, avx2}; + if avx2::memchr::$memchrty::is_available() { + debug!("chose AVX2 for {}", stringify!($memchrty)); + find_avx2 as RealFn + } else if sse2::memchr::$memchrty::is_available() { + debug!("chose SSE2 for {}", stringify!($memchrty)); + find_sse2 as RealFn + } else { + debug!("chose fallback for {}", stringify!($memchrty)); + find_fallback as RealFn + } + } + }; + FN.store(fun as Fn, Ordering::Relaxed); + // SAFETY: The only thing we need to uphold here is the + // `#[target_feature]` requirements. Since we check is_available + // above before using the corresponding implementation, we are + // guaranteed to only call code that is supported on the current + // CPU. + fun($($needle),+, $hay_start, $hay_end) + } + + // SAFETY: By virtue of the caller contract, RealFn is a function + // pointer, which is always safe to transmute with a *mut (). Also, + // since we use $memchrty::is_available, it is guaranteed to be safe + // to call $memchrty::$memchrfind. + unsafe { + let fun = FN.load(Ordering::Relaxed); + core::mem::transmute::(fun)( + $($needle),+, + $hay_start, + $hay_end, + ) + } + }}; +} + +// The routines below dispatch to AVX2, SSE2 or a fallback routine based on +// what's available in the current environment. The secret sauce here is that +// we only check for which one to use approximately once, and then "cache" that +// choice into a global function pointer. Subsequent invocations then just call +// the appropriate function directly. + +/// memchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::find_raw`. +#[inline(always)] +pub(crate) fn memchr_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + One, + find_raw, + unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1 + ) +} + +/// memrchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::rfind_raw`. +#[inline(always)] +pub(crate) fn memrchr_raw( + n1: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + One, + rfind_raw, + unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1 + ) +} + +/// memchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::find_raw`. +#[inline(always)] +pub(crate) fn memchr2_raw( + n1: u8, + n2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + Two, + find_raw, + unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1, + n2 + ) +} + +/// memrchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::rfind_raw`. +#[inline(always)] +pub(crate) fn memrchr2_raw( + n1: u8, + n2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + Two, + rfind_raw, + unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1, + n2 + ) +} + +/// memchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::find_raw`. +#[inline(always)] +pub(crate) fn memchr3_raw( + n1: u8, + n2: u8, + n3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + Three, + find_raw, + unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1, + n2, + n3 + ) +} + +/// memrchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::rfind_raw`. +#[inline(always)] +pub(crate) fn memrchr3_raw( + n1: u8, + n2: u8, + n3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + Three, + rfind_raw, + unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, + Option<*const u8>, + start, + end, + n1, + n2, + n3 + ) +} + +/// Count all matching bytes, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::count_raw`. +#[inline(always)] +pub(crate) fn count_raw(n1: u8, start: *const u8, end: *const u8) -> usize { + // SAFETY: We provide a valid function pointer type. + unsafe_ifunc!( + One, + count_raw, + unsafe fn(u8, *const u8, *const u8) -> usize, + usize, + start, + end, + n1 + ) +} diff --git a/vendor/memchr/src/arch/x86_64/mod.rs b/vendor/memchr/src/arch/x86_64/mod.rs new file mode 100644 index 0000000..5dad721 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/mod.rs @@ -0,0 +1,8 @@ +/*! +Vector algorithms for the `x86_64` target. +*/ + +pub mod avx2; +pub mod sse2; + +pub(crate) mod memchr; diff --git a/vendor/memchr/src/arch/x86_64/sse2/memchr.rs b/vendor/memchr/src/arch/x86_64/sse2/memchr.rs new file mode 100644 index 0000000..c6f75df --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/sse2/memchr.rs @@ -0,0 +1,1077 @@ +/*! +This module defines 128-bit vector implementations of `memchr` and friends. + +The main types in this module are [`One`], [`Two`] and [`Three`]. They are for +searching for one, two or three distinct bytes, respectively, in a haystack. +Each type also has corresponding double ended iterators. These searchers are +typically much faster than scalar routines accomplishing the same task. + +The `One` searcher also provides a [`One::count`] routine for efficiently +counting the number of times a single byte occurs in a haystack. This is +useful, for example, for counting the number of lines in a haystack. This +routine exists because it is usually faster, especially with a high match +count, then using [`One::find`] repeatedly. ([`OneIter`] specializes its +`Iterator::count` implementation to use this routine.) + +Only one, two and three bytes are supported because three bytes is about +the point where one sees diminishing returns. Beyond this point and it's +probably (but not necessarily) better to just use a simple `[bool; 256]` array +or similar. However, it depends mightily on the specific work-load and the +expected match frequency. +*/ + +use core::arch::x86_64::__m128i; + +use crate::{arch::generic::memchr as generic, ext::Pointer, vector::Vector}; + +/// Finds all occurrences of a single byte in a haystack. +#[derive(Clone, Copy, Debug)] +pub struct One(generic::One<__m128i>); + +impl One { + /// Create a new searcher that finds occurrences of the needle byte given. + /// + /// This particular searcher is specialized to use SSE2 vector instructions + /// that typically make it quite fast. + /// + /// If SSE2 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle: u8) -> Option { + if One::is_available() { + // SAFETY: we check that sse2 is available above. + unsafe { Some(One::new_unchecked(needle)) } + } else { + None + } + } + + /// Create a new finder specific to SSE2 vectors and routines without + /// checking that SSE2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `sse2` instructions + /// in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2")] + #[inline] + pub unsafe fn new_unchecked(needle: u8) -> One { + One(generic::One::new(needle)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`One::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `One::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "sse2")] + { + true + } + #[cfg(not(target_feature = "sse2"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Counts all occurrences of this byte in the given haystack. + #[inline] + pub fn count(&self, haystack: &[u8]) -> usize { + // SAFETY: All of our pointers are derived directly from a borrowed + // slice, which is guaranteed to be valid. + unsafe { + let start = haystack.as_ptr(); + let end = start.add(haystack.len()); + self.count_raw(start, end) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // Note that we could call `self.0.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `One`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless the + // caller code has the same target feature annotations. Which is maybe + // okay for SSE2, but we do the same thing for AVX2 where caller code + // probably usually doesn't have AVX2 enabled. That means that this + // routine can be inlined which will handle some of the short-haystack + // cases above without touching the architecture specific code. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // See note in forward routine above for why we don't just call + // `self.0.rfind_raw` directly here. + self.rfind_raw_impl(start, end) + } + + /// Counts all occurrences of this byte in the given haystack represented + /// by raw pointers. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `0` will always be returned. + #[inline] + pub unsafe fn count_raw(&self, start: *const u8, end: *const u8) -> usize { + if start >= end { + return 0; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::count_byte_by_byte(start, end, |b| { + b == self.0.needle1() + }); + } + // SAFETY: Building a `One` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + self.count_raw_impl(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Execute a count using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`One::count_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `One`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn count_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> usize { + self.0.count_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> OneIter<'a, 'h> { + OneIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`One::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`One`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct OneIter<'a, 'h> { + searcher: &'a One, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for OneIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn count(self) -> usize { + self.it.count(|s, e| { + // SAFETY: We rely on our generic iterator to return valid start + // and end pointers. + unsafe { self.searcher.count_raw(s, e) } + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for OneIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for OneIter<'a, 'h> {} + +/// Finds all occurrences of two bytes in a haystack. +/// +/// That is, this reports matches of one of two possible bytes. For example, +/// searching for `a` or `b` in `afoobar` would report matches at offsets `0`, +/// `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Two(generic::Two<__m128i>); + +impl Two { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use SSE2 vector instructions + /// that typically make it quite fast. + /// + /// If SSE2 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle1: u8, needle2: u8) -> Option { + if Two::is_available() { + // SAFETY: we check that sse2 is available above. + unsafe { Some(Two::new_unchecked(needle1, needle2)) } + } else { + None + } + } + + /// Create a new finder specific to SSE2 vectors and routines without + /// checking that SSE2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `sse2` instructions + /// in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2")] + #[inline] + pub unsafe fn new_unchecked(needle1: u8, needle2: u8) -> Two { + Two(generic::Two::new(needle1, needle2)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Two::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Two::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "sse2")] + { + true + } + #[cfg(not(target_feature = "sse2"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() || b == self.0.needle2() + }); + } + // SAFETY: Building a `Two` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // Note that we could call `self.0.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `Two`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless the + // caller code has the same target feature annotations. Which is maybe + // okay for SSE2, but we do the same thing for AVX2 where caller code + // probably usually doesn't have AVX2 enabled. That means that this + // routine can be inlined which will handle some of the short-haystack + // cases above without touching the architecture specific code. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() || b == self.0.needle2() + }); + } + // SAFETY: Building a `Two` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // See note in forward routine above for why we don't just call + // `self.0.rfind_raw` directly here. + self.rfind_raw_impl(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Two::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Two`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> TwoIter<'a, 'h> { + TwoIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Two::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Two`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct TwoIter<'a, 'h> { + searcher: &'a Two, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for TwoIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for TwoIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for TwoIter<'a, 'h> {} + +/// Finds all occurrences of three bytes in a haystack. +/// +/// That is, this reports matches of one of three possible bytes. For example, +/// searching for `a`, `b` or `o` in `afoobar` would report matches at offsets +/// `0`, `2`, `3`, `4` and `5`. +#[derive(Clone, Copy, Debug)] +pub struct Three(generic::Three<__m128i>); + +impl Three { + /// Create a new searcher that finds occurrences of the needle bytes given. + /// + /// This particular searcher is specialized to use SSE2 vector instructions + /// that typically make it quite fast. + /// + /// If SSE2 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn new(needle1: u8, needle2: u8, needle3: u8) -> Option { + if Three::is_available() { + // SAFETY: we check that sse2 is available above. + unsafe { Some(Three::new_unchecked(needle1, needle2, needle3)) } + } else { + None + } + } + + /// Create a new finder specific to SSE2 vectors and routines without + /// checking that SSE2 is available. + /// + /// # Safety + /// + /// Callers must guarantee that it is safe to execute `sse2` instructions + /// in the current environment. + /// + /// Note that it is a common misconception that if one compiles for an + /// `x86_64` target, then they therefore automatically have access to SSE2 + /// instructions. While this is almost always the case, it isn't true in + /// 100% of cases. + #[target_feature(enable = "sse2")] + #[inline] + pub unsafe fn new_unchecked( + needle1: u8, + needle2: u8, + needle3: u8, + ) -> Three { + Three(generic::Three::new(needle1, needle2, needle3)) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Three::new`] will return + /// a `Some` value. Similarly, when it is false, it is guaranteed that + /// `Three::new` will return a `None` value. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(target_feature = "sse2")] + { + true + } + #[cfg(not(target_feature = "sse2"))] + { + false + } + } + + /// Return the first occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: `find_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.find_raw(s, e) + }) + } + } + + /// Return the last occurrence of one of the needle bytes in the given + /// haystack. If no such occurrence exists, then `None` is returned. + /// + /// The occurrence is reported as an offset into `haystack`. Its maximum + /// value is `haystack.len() - 1`. + #[inline] + pub fn rfind(&self, haystack: &[u8]) -> Option { + // SAFETY: `rfind_raw` guarantees that if a pointer is returned, it + // falls within the bounds of the start and end pointers. + unsafe { + generic::search_slice_with_raw(haystack, |s, e| { + self.rfind_raw(s, e) + }) + } + } + + /// Like `find`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn find_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::fwd_byte_by_byte(start, end, |b| { + b == self.0.needle1() + || b == self.0.needle2() + || b == self.0.needle3() + }); + } + // SAFETY: Building a `Three` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // Note that we could call `self.0.find_raw` directly here. But that + // means we'd have to annotate this routine with `target_feature`. + // Which is fine, because this routine is `unsafe` anyway and the + // `target_feature` obligation is met by virtue of building a `Three`. + // The real problem is that a routine with a `target_feature` + // annotation generally can't be inlined into caller code unless the + // caller code has the same target feature annotations. Which is maybe + // okay for SSE2, but we do the same thing for AVX2 where caller code + // probably usually doesn't have AVX2 enabled. That means that this + // routine can be inlined which will handle some of the short-haystack + // cases above without touching the architecture specific code. + self.find_raw_impl(start, end) + } + + /// Like `rfind`, but accepts and returns raw pointers. + /// + /// When a match is found, the pointer returned is guaranteed to be + /// `>= start` and `< end`. + /// + /// This routine is useful if you're already using raw pointers and would + /// like to avoid converting back to a slice before executing a search. + /// + /// # Safety + /// + /// * Both `start` and `end` must be valid for reads. + /// * Both `start` and `end` must point to an initialized value. + /// * Both `start` and `end` must point to the same allocated object and + /// must either be in bounds or at most one byte past the end of the + /// allocated object. + /// * Both `start` and `end` must be _derived from_ a pointer to the same + /// object. + /// * The distance between `start` and `end` must not overflow `isize`. + /// * The distance being in bounds must not rely on "wrapping around" the + /// address space. + /// + /// Note that callers may pass a pair of pointers such that `start >= end`. + /// In that case, `None` will always be returned. + #[inline] + pub unsafe fn rfind_raw( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + if start >= end { + return None; + } + if end.distance(start) < __m128i::BYTES { + // SAFETY: We require the caller to pass valid start/end pointers. + return generic::rev_byte_by_byte(start, end, |b| { + b == self.0.needle1() + || b == self.0.needle2() + || b == self.0.needle3() + }); + } + // SAFETY: Building a `Three` means it's safe to call 'sse2' routines. + // Also, we've checked that our haystack is big enough to run on the + // vector routine. Pointer validity is caller's responsibility. + // + // See note in forward routine above for why we don't just call + // `self.0.rfind_raw` directly here. + self.rfind_raw_impl(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::find_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.find_raw(start, end) + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as [`Three::rfind_raw`], except the distance between `start` and + /// `end` must be at least the size of an SSE2 vector (in bytes). + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Three`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn rfind_raw_impl( + &self, + start: *const u8, + end: *const u8, + ) -> Option<*const u8> { + self.0.rfind_raw(start, end) + } + + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn iter<'a, 'h>(&'a self, haystack: &'h [u8]) -> ThreeIter<'a, 'h> { + ThreeIter { searcher: self, it: generic::Iter::new(haystack) } + } +} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`Three::iter`] method. +/// +/// The lifetime parameters are as follows: +/// +/// * `'a` refers to the lifetime of the underlying [`Three`] searcher. +/// * `'h` refers to the lifetime of the haystack being searched. +#[derive(Clone, Debug)] +pub struct ThreeIter<'a, 'h> { + searcher: &'a Three, + it: generic::Iter<'h>, +} + +impl<'a, 'h> Iterator for ThreeIter<'a, 'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'find_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next(|s, e| self.searcher.find_raw(s, e)) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'a, 'h> DoubleEndedIterator for ThreeIter<'a, 'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: We rely on the generic iterator to provide valid start + // and end pointers, but we guarantee that any pointer returned by + // 'rfind_raw' falls within the bounds of the start and end pointer. + unsafe { self.it.next_back(|s, e| self.searcher.rfind_raw(s, e)) } + } +} + +impl<'a, 'h> core::iter::FusedIterator for ThreeIter<'a, 'h> {} + +#[cfg(test)] +mod tests { + use super::*; + + define_memchr_quickcheck!(super); + + #[test] + fn forward_one() { + crate::tests::memchr::Runner::new(1).forward_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_one() { + crate::tests::memchr::Runner::new(1).reverse_iter( + |haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn count_one() { + crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { + Some(One::new(needles[0])?.iter(haystack).count()) + }) + } + + #[test] + fn forward_two() { + crate::tests::memchr::Runner::new(2).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_two() { + crate::tests::memchr::Runner::new(2).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(Two::new(n1, n2)?.iter(haystack).rev().collect()) + }, + ) + } + + #[test] + fn forward_three() { + crate::tests::memchr::Runner::new(3).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).collect()) + }, + ) + } + + #[test] + fn reverse_three() { + crate::tests::memchr::Runner::new(3).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(Three::new(n1, n2, n3)?.iter(haystack).rev().collect()) + }, + ) + } +} diff --git a/vendor/memchr/src/arch/x86_64/sse2/mod.rs b/vendor/memchr/src/arch/x86_64/sse2/mod.rs new file mode 100644 index 0000000..bcb8307 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/sse2/mod.rs @@ -0,0 +1,6 @@ +/*! +Algorithms for the `x86_64` target using 128-bit vectors via SSE2. +*/ + +pub mod memchr; +pub mod packedpair; diff --git a/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs b/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs new file mode 100644 index 0000000..c8b5b99 --- /dev/null +++ b/vendor/memchr/src/arch/x86_64/sse2/packedpair.rs @@ -0,0 +1,232 @@ +/*! +A 128-bit vector implementation of the "packed pair" SIMD algorithm. + +The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main +difference is that it (by default) uses a background distribution of byte +frequencies to heuristically select the pair of bytes to search for. + +[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last +*/ + +use core::arch::x86_64::__m128i; + +use crate::arch::{all::packedpair::Pair, generic::packedpair}; + +/// A "packed pair" finder that uses 128-bit vector operations. +/// +/// This finder picks two bytes that it believes have high predictive power +/// for indicating an overall match of a needle. Depending on whether +/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets +/// where the needle matches or could match. In the prefilter case, candidates +/// are reported whenever the [`Pair`] of bytes given matches. +#[derive(Clone, Copy, Debug)] +pub struct Finder(packedpair::Finder<__m128i>); + +impl Finder { + /// Create a new pair searcher. The searcher returned can either report + /// exact matches of `needle` or act as a prefilter and report candidate + /// positions of `needle`. + /// + /// If SSE2 is unavailable in the current environment or if a [`Pair`] + /// could not be constructed from the needle given, then `None` is + /// returned. + #[inline] + pub fn new(needle: &[u8]) -> Option { + Finder::with_pair(needle, Pair::new(needle)?) + } + + /// Create a new "packed pair" finder using the pair of bytes given. + /// + /// This constructor permits callers to control precisely which pair of + /// bytes is used as a predicate. + /// + /// If SSE2 is unavailable in the current environment, then `None` is + /// returned. + #[inline] + pub fn with_pair(needle: &[u8], pair: Pair) -> Option { + if Finder::is_available() { + // SAFETY: we check that sse2 is available above. We are also + // guaranteed to have needle.len() > 1 because we have a valid + // Pair. + unsafe { Some(Finder::with_pair_impl(needle, pair)) } + } else { + None + } + } + + /// Create a new `Finder` specific to SSE2 vectors and routines. + /// + /// # Safety + /// + /// Same as the safety for `packedpair::Finder::new`, and callers must also + /// ensure that SSE2 is available. + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder { + let finder = packedpair::Finder::<__m128i>::new(needle, pair); + Finder(finder) + } + + /// Returns true when this implementation is available in the current + /// environment. + /// + /// When this is true, it is guaranteed that [`Finder::with_pair`] will + /// return a `Some` value. Similarly, when it is false, it is guaranteed + /// that `Finder::with_pair` will return a `None` value. Notice that this + /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely, + /// even when `Finder::is_available` is true, it is not guaranteed that a + /// valid [`Pair`] can be found from the needle given. + /// + /// Note also that for the lifetime of a single program, if this returns + /// true then it will always return true. + #[inline] + pub fn is_available() -> bool { + #[cfg(not(target_feature = "sse2"))] + { + false + } + #[cfg(target_feature = "sse2")] + { + true + } + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option { + // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. + unsafe { self.find_impl(haystack, needle) } + } + + /// Run this finder on the given haystack as a prefilter. + /// + /// If a candidate match is found, then an offset where the needle *could* + /// begin in the haystack is returned. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + #[inline] + pub fn find_prefilter(&self, haystack: &[u8]) -> Option { + // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines. + unsafe { self.find_prefilter_impl(haystack) } + } + + /// Execute a search using SSE2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_impl( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option { + self.0.find(haystack, needle) + } + + /// Execute a prefilter search using SSE2 vectors and routines. + /// + /// # Panics + /// + /// When `haystack.len()` is less than [`Finder::min_haystack_len`]. + /// + /// # Safety + /// + /// (The target feature safety obligation is automatically fulfilled by + /// virtue of being a method on `Finder`, which can only be constructed + /// when it is safe to call `sse2` routines.) + #[target_feature(enable = "sse2")] + #[inline] + unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option { + self.0.find_prefilter(haystack) + } + + /// Returns the pair of offsets (into the needle) used to check as a + /// predicate before confirming whether a needle exists at a particular + /// position. + #[inline] + pub fn pair(&self) -> &Pair { + self.0.pair() + } + + /// Returns the minimum haystack length that this `Finder` can search. + /// + /// Using a haystack with length smaller than this in a search will result + /// in a panic. The reason for this restriction is that this finder is + /// meant to be a low-level component that is part of a larger substring + /// strategy. In that sense, it avoids trying to handle all cases and + /// instead only handles the cases that it can handle very well. + #[inline] + pub fn min_haystack_len(&self) -> usize { + self.0.min_haystack_len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn find(haystack: &[u8], needle: &[u8]) -> Option> { + let f = Finder::new(needle)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + + define_substring_forward_quickcheck!(find); + + #[test] + fn forward_substring() { + crate::tests::substring::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find(haystack, needle)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } + + #[test] + fn forward_packedpair_prefilter() { + fn find( + haystack: &[u8], + needle: &[u8], + index1: u8, + index2: u8, + ) -> Option> { + let pair = Pair::with_indices(needle, index1, index2)?; + let f = Finder::with_pair(needle, pair)?; + if haystack.len() < f.min_haystack_len() { + return None; + } + Some(f.find_prefilter(haystack)) + } + crate::tests::packedpair::Runner::new().fwd(find).run() + } +} diff --git a/vendor/memchr/src/cow.rs b/vendor/memchr/src/cow.rs index 0b7d0da..f291645 100644 --- a/vendor/memchr/src/cow.rs +++ b/vendor/memchr/src/cow.rs @@ -4,22 +4,23 @@ use core::ops; /// /// The purpose of this type is to permit usage of a "borrowed or owned /// byte string" in a way that keeps std/no-std compatibility. That is, in -/// no-std mode, this type devolves into a simple &[u8] with no owned variant -/// available. We can't just use a plain Cow because Cow is not in core. +/// no-std/alloc mode, this type devolves into a simple &[u8] with no owned +/// variant available. We can't just use a plain Cow because Cow is not in +/// core. #[derive(Clone, Debug)] pub struct CowBytes<'a>(Imp<'a>); -// N.B. We don't use std::borrow::Cow here since we can get away with a +// N.B. We don't use alloc::borrow::Cow here since we can get away with a // Box<[u8]> for our use case, which is 1/3 smaller than the Vec that // a Cow<[u8]> would use. -#[cfg(feature = "std")] +#[cfg(feature = "alloc")] #[derive(Clone, Debug)] enum Imp<'a> { Borrowed(&'a [u8]), - Owned(Box<[u8]>), + Owned(alloc::boxed::Box<[u8]>), } -#[cfg(not(feature = "std"))] +#[cfg(not(feature = "alloc"))] #[derive(Clone, Debug)] struct Imp<'a>(&'a [u8]); @@ -35,21 +36,21 @@ impl<'a> ops::Deref for CowBytes<'a> { impl<'a> CowBytes<'a> { /// Create a new borrowed CowBytes. #[inline(always)] - pub fn new>(bytes: &'a B) -> CowBytes<'a> { + pub(crate) fn new>(bytes: &'a B) -> CowBytes<'a> { CowBytes(Imp::new(bytes.as_ref())) } /// Create a new owned CowBytes. - #[cfg(feature = "std")] + #[cfg(feature = "alloc")] #[inline(always)] - pub fn new_owned(bytes: Box<[u8]>) -> CowBytes<'static> { + fn new_owned(bytes: alloc::boxed::Box<[u8]>) -> CowBytes<'static> { CowBytes(Imp::Owned(bytes)) } /// Return a borrowed byte string, regardless of whether this is an owned /// or borrowed byte string internally. #[inline(always)] - pub fn as_slice(&self) -> &[u8] { + pub(crate) fn as_slice(&self) -> &[u8] { self.0.as_slice() } @@ -57,39 +58,48 @@ impl<'a> CowBytes<'a> { /// /// If this is already an owned byte string internally, then this is a /// no-op. Otherwise, the internal byte string is copied. - #[cfg(feature = "std")] + #[cfg(feature = "alloc")] #[inline(always)] - pub fn into_owned(self) -> CowBytes<'static> { + pub(crate) fn into_owned(self) -> CowBytes<'static> { match self.0 { - Imp::Borrowed(b) => CowBytes::new_owned(Box::from(b)), + Imp::Borrowed(b) => { + CowBytes::new_owned(alloc::boxed::Box::from(b)) + } Imp::Owned(b) => CowBytes::new_owned(b), } } } impl<'a> Imp<'a> { - #[cfg(feature = "std")] - #[inline(always)] - pub fn new(bytes: &'a [u8]) -> Imp<'a> { - Imp::Borrowed(bytes) - } - - #[cfg(not(feature = "std"))] #[inline(always)] pub fn new(bytes: &'a [u8]) -> Imp<'a> { - Imp(bytes) + #[cfg(feature = "alloc")] + { + Imp::Borrowed(bytes) + } + #[cfg(not(feature = "alloc"))] + { + Imp(bytes) + } } - #[cfg(feature = "std")] + #[cfg(feature = "alloc")] #[inline(always)] pub fn as_slice(&self) -> &[u8] { - match self { - Imp::Owned(ref x) => x, - Imp::Borrowed(x) => x, + #[cfg(feature = "alloc")] + { + match self { + Imp::Owned(ref x) => x, + Imp::Borrowed(x) => x, + } + } + #[cfg(not(feature = "alloc"))] + { + self.0 } } - #[cfg(not(feature = "std"))] + #[cfg(not(feature = "alloc"))] #[inline(always)] pub fn as_slice(&self) -> &[u8] { self.0 diff --git a/vendor/memchr/src/ext.rs b/vendor/memchr/src/ext.rs new file mode 100644 index 0000000..1bb21dd --- /dev/null +++ b/vendor/memchr/src/ext.rs @@ -0,0 +1,52 @@ +/// A trait for adding some helper routines to pointers. +pub(crate) trait Pointer { + /// Returns the distance, in units of `T`, between `self` and `origin`. + /// + /// # Safety + /// + /// Same as `ptr::offset_from` in addition to `self >= origin`. + unsafe fn distance(self, origin: Self) -> usize; + + /// Casts this pointer to `usize`. + /// + /// Callers should not convert the `usize` back to a pointer if at all + /// possible. (And if you believe it's necessary, open an issue to discuss + /// why. Otherwise, it has the potential to violate pointer provenance.) + /// The purpose of this function is just to be able to do arithmetic, i.e., + /// computing offsets or alignments. + fn as_usize(self) -> usize; +} + +impl Pointer for *const T { + unsafe fn distance(self, origin: *const T) -> usize { + // TODO: Replace with `ptr::sub_ptr` once stabilized. + usize::try_from(self.offset_from(origin)).unwrap_unchecked() + } + + fn as_usize(self) -> usize { + self as usize + } +} + +impl Pointer for *mut T { + unsafe fn distance(self, origin: *mut T) -> usize { + (self as *const T).distance(origin as *const T) + } + + fn as_usize(self) -> usize { + (self as *const T).as_usize() + } +} + +/// A trait for adding some helper routines to raw bytes. +pub(crate) trait Byte { + /// Converts this byte to a `char` if it's ASCII. Otherwise panics. + fn to_char(self) -> char; +} + +impl Byte for u8 { + fn to_char(self) -> char { + assert!(self.is_ascii()); + char::from(self) + } +} diff --git a/vendor/memchr/src/lib.rs b/vendor/memchr/src/lib.rs index e0b4ce3..de366fb 100644 --- a/vendor/memchr/src/lib.rs +++ b/vendor/memchr/src/lib.rs @@ -113,9 +113,9 @@ solution presented above, however, its throughput can easily be over an order of magnitude faster. This is a good general purpose trade off to make. You rarely lose, but often gain big. -**NOTE:** The name `memchr` comes from the corresponding routine in libc. A key -advantage of using this library is that its performance is not tied to its -quality of implementation in the libc you happen to be using, which can vary +**NOTE:** The name `memchr` comes from the corresponding routine in `libc`. A +key advantage of using this library is that its performance is not tied to its +quality of implementation in the `libc` you happen to be using, which can vary greatly from platform to platform. But what about substring search? This one is a bit more complicated. The @@ -131,32 +131,58 @@ implementation in the standard library, even if only for searching on UTF-8? The reason is that the implementation details for using SIMD in the standard library haven't quite been worked out yet. -**NOTE:** Currently, only `x86_64` targets have highly accelerated -implementations of substring search. For `memchr`, all targets have -somewhat-accelerated implementations, while only `x86_64` targets have highly -accelerated implementations. This limitation is expected to be lifted once the -standard library exposes a platform independent SIMD API. +**NOTE:** Currently, only `x86_64`, `wasm32` and `aarch64` targets have vector +accelerated implementations of `memchr` (and friends) and `memmem`. # Crate features -* **std** - When enabled (the default), this will permit this crate to use - features specific to the standard library. Currently, the only thing used - from the standard library is runtime SIMD CPU feature detection. This means - that this feature must be enabled to get AVX accelerated routines. When - `std` is not enabled, this crate will still attempt to use SSE2 accelerated - routines on `x86_64`. -* **libc** - When enabled (**not** the default), this library will use your - platform's libc implementation of `memchr` (and `memrchr` on Linux). This - can be useful on non-`x86_64` targets where the fallback implementation in - this crate is not as good as the one found in your libc. All other routines - (e.g., `memchr[23]` and substring search) unconditionally use the - implementation in this crate. +* **std** - When enabled (the default), this will permit features specific to +the standard library. Currently, the only thing used from the standard library +is runtime SIMD CPU feature detection. This means that this feature must be +enabled to get AVX2 accelerated routines on `x86_64` targets without enabling +the `avx2` feature at compile time, for example. When `std` is not enabled, +this crate will still attempt to use SSE2 accelerated routines on `x86_64`. It +will also use AVX2 accelerated routines when the `avx2` feature is enabled at +compile time. In general, enable this feature if you can. +* **alloc** - When enabled (the default), APIs in this crate requiring some +kind of allocation will become available. For example, the +[`memmem::Finder::into_owned`](crate::memmem::Finder::into_owned) API and the +[`arch::all::shiftor`](crate::arch::all::shiftor) substring search +implementation. Otherwise, this crate is designed from the ground up to be +usable in core-only contexts, so the `alloc` feature doesn't add much +currently. Notably, disabling `std` but enabling `alloc` will **not** result +in the use of AVX2 on `x86_64` targets unless the `avx2` feature is enabled +at compile time. (With `std` enabled, AVX2 can be used even without the `avx2` +feature enabled at compile time by way of runtime CPU feature detection.) +* **logging** - When enabled (disabled by default), the `log` crate is used +to emit log messages about what kinds of `memchr` and `memmem` algorithms +are used. Namely, both `memchr` and `memmem` have a number of different +implementation choices depending on the target and CPU, and the log messages +can help show what specific implementations are being used. Generally, this is +useful for debugging performance issues. +* **libc** - **DEPRECATED**. Previously, this enabled the use of the target's +`memchr` function from whatever `libc` was linked into the program. This +feature is now a no-op because this crate's implementation of `memchr` should +now be sufficiently fast on a number of platforms that `libc` should no longer +be needed. (This feature is somewhat of a holdover from this crate's origins. +Originally, this crate was literally just a safe wrapper function around the +`memchr` function from `libc`.) */ #![deny(missing_docs)] -#![cfg_attr(not(feature = "std"), no_std)] -// It's not worth trying to gate all code on just miri, so turn off relevant -// dead code warnings. +#![no_std] +// It's just not worth trying to squash all dead code warnings. Pretty +// unfortunate IMO. Not really sure how to fix this other than to either +// live with it or sprinkle a whole mess of `cfg` annotations everywhere. +#![cfg_attr( + not(any( + all(target_arch = "x86_64", target_feature = "sse2"), + target_arch = "wasm32", + target_arch = "aarch64", + )), + allow(dead_code) +)] +// Same deal for miri. #![cfg_attr(miri, allow(dead_code, unused_macros))] // Supporting 8-bit (or others) would be fine. If you need it, please submit a @@ -168,14 +194,28 @@ standard library exposes a platform independent SIMD API. )))] compile_error!("memchr currently not supported on non-{16,32,64}"); +#[cfg(any(test, feature = "std"))] +extern crate std; + +#[cfg(any(test, feature = "alloc"))] +extern crate alloc; + pub use crate::memchr::{ memchr, memchr2, memchr2_iter, memchr3, memchr3_iter, memchr_iter, memrchr, memrchr2, memrchr2_iter, memrchr3, memrchr3_iter, memrchr_iter, Memchr, Memchr2, Memchr3, }; +#[macro_use] +mod macros; + +#[cfg(test)] +#[macro_use] +mod tests; + +pub mod arch; mod cow; +mod ext; mod memchr; pub mod memmem; -#[cfg(test)] -mod tests; +mod vector; diff --git a/vendor/memchr/src/macros.rs b/vendor/memchr/src/macros.rs new file mode 100644 index 0000000..31b4ca3 --- /dev/null +++ b/vendor/memchr/src/macros.rs @@ -0,0 +1,20 @@ +// Some feature combinations result in some of these macros never being used. +// Which is fine. Just squash the warnings. +#![allow(unused_macros)] + +macro_rules! log { + ($($tt:tt)*) => { + #[cfg(feature = "logging")] + { + $($tt)* + } + } +} + +macro_rules! debug { + ($($tt:tt)*) => { log!(log::debug!($($tt)*)) } +} + +macro_rules! trace { + ($($tt:tt)*) => { log!(log::trace!($($tt)*)) } +} diff --git a/vendor/memchr/src/memchr.rs b/vendor/memchr/src/memchr.rs new file mode 100644 index 0000000..68adb9a --- /dev/null +++ b/vendor/memchr/src/memchr.rs @@ -0,0 +1,903 @@ +use core::iter::Rev; + +use crate::arch::generic::memchr as generic; + +/// Search for the first occurrence of a byte in a slice. +/// +/// This returns the index corresponding to the first occurrence of `needle` in +/// `haystack`, or `None` if one is not found. If an index is returned, it is +/// guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().position(|&b| b == needle)`, this routine will attempt to +/// use highly optimized vector operations that can be an order of magnitude +/// faster (or more). +/// +/// # Example +/// +/// This shows how to find the first position of a byte in a byte string. +/// +/// ``` +/// use memchr::memchr; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memchr(b'k', haystack), Some(8)); +/// ``` +#[inline] +pub fn memchr(needle: u8, haystack: &[u8]) -> Option { + // SAFETY: memchr_raw, when a match is found, always returns a valid + // pointer between start and end. + unsafe { + generic::search_slice_with_raw(haystack, |start, end| { + memchr_raw(needle, start, end) + }) + } +} + +/// Search for the last occurrence of a byte in a slice. +/// +/// This returns the index corresponding to the last occurrence of `needle` in +/// `haystack`, or `None` if one is not found. If an index is returned, it is +/// guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().rposition(|&b| b == needle)`, this routine will attempt to +/// use highly optimized vector operations that can be an order of magnitude +/// faster (or more). +/// +/// # Example +/// +/// This shows how to find the last position of a byte in a byte string. +/// +/// ``` +/// use memchr::memrchr; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memrchr(b'o', haystack), Some(17)); +/// ``` +#[inline] +pub fn memrchr(needle: u8, haystack: &[u8]) -> Option { + // SAFETY: memrchr_raw, when a match is found, always returns a valid + // pointer between start and end. + unsafe { + generic::search_slice_with_raw(haystack, |start, end| { + memrchr_raw(needle, start, end) + }) + } +} + +/// Search for the first occurrence of two possible bytes in a haystack. +/// +/// This returns the index corresponding to the first occurrence of one of the +/// needle bytes in `haystack`, or `None` if one is not found. If an index is +/// returned, it is guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().position(|&b| b == needle1 || b == needle2)`, this routine +/// will attempt to use highly optimized vector operations that can be an order +/// of magnitude faster (or more). +/// +/// # Example +/// +/// This shows how to find the first position of one of two possible bytes in a +/// haystack. +/// +/// ``` +/// use memchr::memchr2; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memchr2(b'k', b'q', haystack), Some(4)); +/// ``` +#[inline] +pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option { + // SAFETY: memchr2_raw, when a match is found, always returns a valid + // pointer between start and end. + unsafe { + generic::search_slice_with_raw(haystack, |start, end| { + memchr2_raw(needle1, needle2, start, end) + }) + } +} + +/// Search for the last occurrence of two possible bytes in a haystack. +/// +/// This returns the index corresponding to the last occurrence of one of the +/// needle bytes in `haystack`, or `None` if one is not found. If an index is +/// returned, it is guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2)`, this +/// routine will attempt to use highly optimized vector operations that can be +/// an order of magnitude faster (or more). +/// +/// # Example +/// +/// This shows how to find the last position of one of two possible bytes in a +/// haystack. +/// +/// ``` +/// use memchr::memrchr2; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memrchr2(b'k', b'o', haystack), Some(17)); +/// ``` +#[inline] +pub fn memrchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option { + // SAFETY: memrchr2_raw, when a match is found, always returns a valid + // pointer between start and end. + unsafe { + generic::search_slice_with_raw(haystack, |start, end| { + memrchr2_raw(needle1, needle2, start, end) + }) + } +} + +/// Search for the first occurrence of three possible bytes in a haystack. +/// +/// This returns the index corresponding to the first occurrence of one of the +/// needle bytes in `haystack`, or `None` if one is not found. If an index is +/// returned, it is guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().position(|&b| b == needle1 || b == needle2 || b == needle3)`, +/// this routine will attempt to use highly optimized vector operations that +/// can be an order of magnitude faster (or more). +/// +/// # Example +/// +/// This shows how to find the first position of one of three possible bytes in +/// a haystack. +/// +/// ``` +/// use memchr::memchr3; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memchr3(b'k', b'q', b'u', haystack), Some(4)); +/// ``` +#[inline] +pub fn memchr3( + needle1: u8, + needle2: u8, + needle3: u8, + haystack: &[u8], +) -> Option { + // SAFETY: memchr3_raw, when a match is found, always returns a valid + // pointer between start and end. + unsafe { + generic::search_slice_with_raw(haystack, |start, end| { + memchr3_raw(needle1, needle2, needle3, start, end) + }) + } +} + +/// Search for the last occurrence of three possible bytes in a haystack. +/// +/// This returns the index corresponding to the last occurrence of one of the +/// needle bytes in `haystack`, or `None` if one is not found. If an index is +/// returned, it is guaranteed to be less than `haystack.len()`. +/// +/// While this is semantically the same as something like +/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2 || b == needle3)`, +/// this routine will attempt to use highly optimized vector operations that +/// can be an order of magnitude faster (or more). +/// +/// # Example +/// +/// This shows how to find the last position of one of three possible bytes in +/// a haystack. +/// +/// ``` +/// use memchr::memrchr3; +/// +/// let haystack = b"the quick brown fox"; +/// assert_eq!(memrchr3(b'k', b'o', b'n', haystack), Some(17)); +/// ``` +#[inline] +pub fn memrchr3( + needle1: u8, + needle2: u8, + needle3: u8, + haystack: &[u8], +) -> Option { + // SAFETY: memrchr3_raw, when a match is found, always returns a valid + // pointer between start and end. + unsafe { + generic::search_slice_with_raw(haystack, |start, end| { + memrchr3_raw(needle1, needle2, needle3, start, end) + }) + } +} + +/// Returns an iterator over all occurrences of the needle in a haystack. +/// +/// The iterator returned implements `DoubleEndedIterator`. This means it +/// can also be used to find occurrences in reverse order. +#[inline] +pub fn memchr_iter<'h>(needle: u8, haystack: &'h [u8]) -> Memchr<'h> { + Memchr::new(needle, haystack) +} + +/// Returns an iterator over all occurrences of the needle in a haystack, in +/// reverse. +#[inline] +pub fn memrchr_iter(needle: u8, haystack: &[u8]) -> Rev> { + Memchr::new(needle, haystack).rev() +} + +/// Returns an iterator over all occurrences of the needles in a haystack. +/// +/// The iterator returned implements `DoubleEndedIterator`. This means it +/// can also be used to find occurrences in reverse order. +#[inline] +pub fn memchr2_iter<'h>( + needle1: u8, + needle2: u8, + haystack: &'h [u8], +) -> Memchr2<'h> { + Memchr2::new(needle1, needle2, haystack) +} + +/// Returns an iterator over all occurrences of the needles in a haystack, in +/// reverse. +#[inline] +pub fn memrchr2_iter( + needle1: u8, + needle2: u8, + haystack: &[u8], +) -> Rev> { + Memchr2::new(needle1, needle2, haystack).rev() +} + +/// Returns an iterator over all occurrences of the needles in a haystack. +/// +/// The iterator returned implements `DoubleEndedIterator`. This means it +/// can also be used to find occurrences in reverse order. +#[inline] +pub fn memchr3_iter<'h>( + needle1: u8, + needle2: u8, + needle3: u8, + haystack: &'h [u8], +) -> Memchr3<'h> { + Memchr3::new(needle1, needle2, needle3, haystack) +} + +/// Returns an iterator over all occurrences of the needles in a haystack, in +/// reverse. +#[inline] +pub fn memrchr3_iter( + needle1: u8, + needle2: u8, + needle3: u8, + haystack: &[u8], +) -> Rev> { + Memchr3::new(needle1, needle2, needle3, haystack).rev() +} + +/// An iterator over all occurrences of a single byte in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`memchr_iter`] or `[memrchr_iter`] +/// functions. It can also be created with the [`Memchr::new`] method. +/// +/// The lifetime parameter `'h` refers to the lifetime of the haystack being +/// searched. +#[derive(Clone, Debug)] +pub struct Memchr<'h> { + needle1: u8, + it: crate::arch::generic::memchr::Iter<'h>, +} + +impl<'h> Memchr<'h> { + /// Returns an iterator over all occurrences of the needle byte in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn new(needle1: u8, haystack: &'h [u8]) -> Memchr<'h> { + Memchr { + needle1, + it: crate::arch::generic::memchr::Iter::new(haystack), + } + } +} + +impl<'h> Iterator for Memchr<'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: All of our implementations of memchr ensure that any + // pointers returns will fall within the start and end bounds, and this + // upholds the safety contract of `self.it.next`. + unsafe { + // NOTE: I attempted to define an enum of previously created + // searchers and then switch on those here instead of just + // calling `memchr_raw` (or `One::new(..).find_raw(..)`). But + // that turned out to have a fair bit of extra overhead when + // searching very small haystacks. + self.it.next(|s, e| memchr_raw(self.needle1, s, e)) + } + } + + #[inline] + fn count(self) -> usize { + self.it.count(|s, e| { + // SAFETY: We rely on our generic iterator to return valid start + // and end pointers. + unsafe { count_raw(self.needle1, s, e) } + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'h> DoubleEndedIterator for Memchr<'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: All of our implementations of memchr ensure that any + // pointers returns will fall within the start and end bounds, and this + // upholds the safety contract of `self.it.next_back`. + unsafe { self.it.next_back(|s, e| memrchr_raw(self.needle1, s, e)) } + } +} + +impl<'h> core::iter::FusedIterator for Memchr<'h> {} + +/// An iterator over all occurrences of two possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`memchr2_iter`] or `[memrchr2_iter`] +/// functions. It can also be created with the [`Memchr2::new`] method. +/// +/// The lifetime parameter `'h` refers to the lifetime of the haystack being +/// searched. +#[derive(Clone, Debug)] +pub struct Memchr2<'h> { + needle1: u8, + needle2: u8, + it: crate::arch::generic::memchr::Iter<'h>, +} + +impl<'h> Memchr2<'h> { + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn new(needle1: u8, needle2: u8, haystack: &'h [u8]) -> Memchr2<'h> { + Memchr2 { + needle1, + needle2, + it: crate::arch::generic::memchr::Iter::new(haystack), + } + } +} + +impl<'h> Iterator for Memchr2<'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: All of our implementations of memchr ensure that any + // pointers returns will fall within the start and end bounds, and this + // upholds the safety contract of `self.it.next`. + unsafe { + self.it.next(|s, e| memchr2_raw(self.needle1, self.needle2, s, e)) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'h> DoubleEndedIterator for Memchr2<'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: All of our implementations of memchr ensure that any + // pointers returns will fall within the start and end bounds, and this + // upholds the safety contract of `self.it.next_back`. + unsafe { + self.it.next_back(|s, e| { + memrchr2_raw(self.needle1, self.needle2, s, e) + }) + } + } +} + +impl<'h> core::iter::FusedIterator for Memchr2<'h> {} + +/// An iterator over all occurrences of three possible bytes in a haystack. +/// +/// This iterator implements `DoubleEndedIterator`, which means it can also be +/// used to find occurrences in reverse order. +/// +/// This iterator is created by the [`memchr2_iter`] or `[memrchr2_iter`] +/// functions. It can also be created with the [`Memchr3::new`] method. +/// +/// The lifetime parameter `'h` refers to the lifetime of the haystack being +/// searched. +#[derive(Clone, Debug)] +pub struct Memchr3<'h> { + needle1: u8, + needle2: u8, + needle3: u8, + it: crate::arch::generic::memchr::Iter<'h>, +} + +impl<'h> Memchr3<'h> { + /// Returns an iterator over all occurrences of the needle bytes in the + /// given haystack. + /// + /// The iterator returned implements `DoubleEndedIterator`. This means it + /// can also be used to find occurrences in reverse order. + #[inline] + pub fn new( + needle1: u8, + needle2: u8, + needle3: u8, + haystack: &'h [u8], + ) -> Memchr3<'h> { + Memchr3 { + needle1, + needle2, + needle3, + it: crate::arch::generic::memchr::Iter::new(haystack), + } + } +} + +impl<'h> Iterator for Memchr3<'h> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: All of our implementations of memchr ensure that any + // pointers returns will fall within the start and end bounds, and this + // upholds the safety contract of `self.it.next`. + unsafe { + self.it.next(|s, e| { + memchr3_raw(self.needle1, self.needle2, self.needle3, s, e) + }) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +impl<'h> DoubleEndedIterator for Memchr3<'h> { + #[inline] + fn next_back(&mut self) -> Option { + // SAFETY: All of our implementations of memchr ensure that any + // pointers returns will fall within the start and end bounds, and this + // upholds the safety contract of `self.it.next_back`. + unsafe { + self.it.next_back(|s, e| { + memrchr3_raw(self.needle1, self.needle2, self.needle3, s, e) + }) + } + } +} + +impl<'h> core::iter::FusedIterator for Memchr3<'h> {} + +/// memchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::find_raw`. +#[inline] +unsafe fn memchr_raw( + needle: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + #[cfg(target_arch = "x86_64")] + { + // x86_64 does CPU feature detection at runtime in order to use AVX2 + // instructions even when the `avx2` feature isn't enabled at compile + // time. This function also handles using a fallback if neither AVX2 + // nor SSE2 (unusual) are available. + crate::arch::x86_64::memchr::memchr_raw(needle, start, end) + } + #[cfg(target_arch = "wasm32")] + { + crate::arch::wasm32::memchr::memchr_raw(needle, start, end) + } + #[cfg(target_arch = "aarch64")] + { + crate::arch::aarch64::memchr::memchr_raw(needle, start, end) + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + crate::arch::all::memchr::One::new(needle).find_raw(start, end) + } +} + +/// memrchr, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::rfind_raw`. +#[inline] +unsafe fn memrchr_raw( + needle: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + #[cfg(target_arch = "x86_64")] + { + crate::arch::x86_64::memchr::memrchr_raw(needle, start, end) + } + #[cfg(target_arch = "wasm32")] + { + crate::arch::wasm32::memchr::memrchr_raw(needle, start, end) + } + #[cfg(target_arch = "aarch64")] + { + crate::arch::aarch64::memchr::memrchr_raw(needle, start, end) + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + crate::arch::all::memchr::One::new(needle).rfind_raw(start, end) + } +} + +/// memchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::find_raw`. +#[inline] +unsafe fn memchr2_raw( + needle1: u8, + needle2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + #[cfg(target_arch = "x86_64")] + { + crate::arch::x86_64::memchr::memchr2_raw(needle1, needle2, start, end) + } + #[cfg(target_arch = "wasm32")] + { + crate::arch::wasm32::memchr::memchr2_raw(needle1, needle2, start, end) + } + #[cfg(target_arch = "aarch64")] + { + crate::arch::aarch64::memchr::memchr2_raw(needle1, needle2, start, end) + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + crate::arch::all::memchr::Two::new(needle1, needle2) + .find_raw(start, end) + } +} + +/// memrchr2, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Two::rfind_raw`. +#[inline] +unsafe fn memrchr2_raw( + needle1: u8, + needle2: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + #[cfg(target_arch = "x86_64")] + { + crate::arch::x86_64::memchr::memrchr2_raw(needle1, needle2, start, end) + } + #[cfg(target_arch = "wasm32")] + { + crate::arch::wasm32::memchr::memrchr2_raw(needle1, needle2, start, end) + } + #[cfg(target_arch = "aarch64")] + { + crate::arch::aarch64::memchr::memrchr2_raw( + needle1, needle2, start, end, + ) + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + crate::arch::all::memchr::Two::new(needle1, needle2) + .rfind_raw(start, end) + } +} + +/// memchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::find_raw`. +#[inline] +unsafe fn memchr3_raw( + needle1: u8, + needle2: u8, + needle3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + #[cfg(target_arch = "x86_64")] + { + crate::arch::x86_64::memchr::memchr3_raw( + needle1, needle2, needle3, start, end, + ) + } + #[cfg(target_arch = "wasm32")] + { + crate::arch::wasm32::memchr::memchr3_raw( + needle1, needle2, needle3, start, end, + ) + } + #[cfg(target_arch = "aarch64")] + { + crate::arch::aarch64::memchr::memchr3_raw( + needle1, needle2, needle3, start, end, + ) + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + crate::arch::all::memchr::Three::new(needle1, needle2, needle3) + .find_raw(start, end) + } +} + +/// memrchr3, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `Three::rfind_raw`. +#[inline] +unsafe fn memrchr3_raw( + needle1: u8, + needle2: u8, + needle3: u8, + start: *const u8, + end: *const u8, +) -> Option<*const u8> { + #[cfg(target_arch = "x86_64")] + { + crate::arch::x86_64::memchr::memrchr3_raw( + needle1, needle2, needle3, start, end, + ) + } + #[cfg(target_arch = "wasm32")] + { + crate::arch::wasm32::memchr::memrchr3_raw( + needle1, needle2, needle3, start, end, + ) + } + #[cfg(target_arch = "aarch64")] + { + crate::arch::aarch64::memchr::memrchr3_raw( + needle1, needle2, needle3, start, end, + ) + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + crate::arch::all::memchr::Three::new(needle1, needle2, needle3) + .rfind_raw(start, end) + } +} + +/// Count all matching bytes, but using raw pointers to represent the haystack. +/// +/// # Safety +/// +/// Pointers must be valid. See `One::count_raw`. +#[inline] +unsafe fn count_raw(needle: u8, start: *const u8, end: *const u8) -> usize { + #[cfg(target_arch = "x86_64")] + { + crate::arch::x86_64::memchr::count_raw(needle, start, end) + } + #[cfg(target_arch = "wasm32")] + { + crate::arch::wasm32::memchr::count_raw(needle, start, end) + } + #[cfg(target_arch = "aarch64")] + { + crate::arch::aarch64::memchr::count_raw(needle, start, end) + } + #[cfg(not(any( + target_arch = "x86_64", + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + crate::arch::all::memchr::One::new(needle).count_raw(start, end) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn forward1_iter() { + crate::tests::memchr::Runner::new(1).forward_iter( + |haystack, needles| { + Some(memchr_iter(needles[0], haystack).collect()) + }, + ) + } + + #[test] + fn forward1_oneshot() { + crate::tests::memchr::Runner::new(1).forward_oneshot( + |haystack, needles| Some(memchr(needles[0], haystack)), + ) + } + + #[test] + fn reverse1_iter() { + crate::tests::memchr::Runner::new(1).reverse_iter( + |haystack, needles| { + Some(memrchr_iter(needles[0], haystack).collect()) + }, + ) + } + + #[test] + fn reverse1_oneshot() { + crate::tests::memchr::Runner::new(1).reverse_oneshot( + |haystack, needles| Some(memrchr(needles[0], haystack)), + ) + } + + #[test] + fn count1_iter() { + crate::tests::memchr::Runner::new(1).count_iter(|haystack, needles| { + Some(memchr_iter(needles[0], haystack).count()) + }) + } + + #[test] + fn forward2_iter() { + crate::tests::memchr::Runner::new(2).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(memchr2_iter(n1, n2, haystack).collect()) + }, + ) + } + + #[test] + fn forward2_oneshot() { + crate::tests::memchr::Runner::new(2).forward_oneshot( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(memchr2(n1, n2, haystack)) + }, + ) + } + + #[test] + fn reverse2_iter() { + crate::tests::memchr::Runner::new(2).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(memrchr2_iter(n1, n2, haystack).collect()) + }, + ) + } + + #[test] + fn reverse2_oneshot() { + crate::tests::memchr::Runner::new(2).reverse_oneshot( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + Some(memrchr2(n1, n2, haystack)) + }, + ) + } + + #[test] + fn forward3_iter() { + crate::tests::memchr::Runner::new(3).forward_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(memchr3_iter(n1, n2, n3, haystack).collect()) + }, + ) + } + + #[test] + fn forward3_oneshot() { + crate::tests::memchr::Runner::new(3).forward_oneshot( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(memchr3(n1, n2, n3, haystack)) + }, + ) + } + + #[test] + fn reverse3_iter() { + crate::tests::memchr::Runner::new(3).reverse_iter( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(memrchr3_iter(n1, n2, n3, haystack).collect()) + }, + ) + } + + #[test] + fn reverse3_oneshot() { + crate::tests::memchr::Runner::new(3).reverse_oneshot( + |haystack, needles| { + let n1 = needles.get(0).copied()?; + let n2 = needles.get(1).copied()?; + let n3 = needles.get(2).copied()?; + Some(memrchr3(n1, n2, n3, haystack)) + }, + ) + } + + // Prior to memchr 2.6, the memchr iterators both implemented Send and + // Sync. But in memchr 2.6, the iterator changed to use raw pointers + // internally and I didn't add explicit Send/Sync impls. This ended up + // regressing the API. This test ensures we don't do that again. + // + // See: https://github.com/BurntSushi/memchr/issues/133 + #[test] + fn sync_regression() { + use core::panic::{RefUnwindSafe, UnwindSafe}; + + fn assert_send_sync() {} + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::() + } +} diff --git a/vendor/memchr/src/memchr/c.rs b/vendor/memchr/src/memchr/c.rs deleted file mode 100644 index 608aabc..0000000 --- a/vendor/memchr/src/memchr/c.rs +++ /dev/null @@ -1,44 +0,0 @@ -// This module defines safe wrappers around memchr (POSIX) and memrchr (GNU -// extension). - -#![allow(dead_code)] - -use libc::{c_int, c_void, size_t}; - -pub fn memchr(needle: u8, haystack: &[u8]) -> Option { - // SAFETY: This is safe to call since all pointers are valid. - let p = unsafe { - libc::memchr( - haystack.as_ptr() as *const c_void, - needle as c_int, - haystack.len() as size_t, - ) - }; - if p.is_null() { - None - } else { - Some(p as usize - (haystack.as_ptr() as usize)) - } -} - -// memrchr is a GNU extension. We know it's available on Linux at least. -#[cfg(target_os = "linux")] -pub fn memrchr(needle: u8, haystack: &[u8]) -> Option { - // GNU's memrchr() will - unlike memchr() - error if haystack is empty. - if haystack.is_empty() { - return None; - } - // SAFETY: This is safe to call since all pointers are valid. - let p = unsafe { - libc::memrchr( - haystack.as_ptr() as *const c_void, - needle as c_int, - haystack.len() as size_t, - ) - }; - if p.is_null() { - None - } else { - Some(p as usize - (haystack.as_ptr() as usize)) - } -} diff --git a/vendor/memchr/src/memchr/fallback.rs b/vendor/memchr/src/memchr/fallback.rs deleted file mode 100644 index b01f224..0000000 --- a/vendor/memchr/src/memchr/fallback.rs +++ /dev/null @@ -1,329 +0,0 @@ -// This module defines pure Rust platform independent implementations of all -// the memchr routines. We do our best to make them fast. Some of them may even -// get auto-vectorized. - -use core::{cmp, usize}; - -#[cfg(target_pointer_width = "16")] -const USIZE_BYTES: usize = 2; - -#[cfg(target_pointer_width = "32")] -const USIZE_BYTES: usize = 4; - -#[cfg(target_pointer_width = "64")] -const USIZE_BYTES: usize = 8; - -// The number of bytes to loop at in one iteration of memchr/memrchr. -const LOOP_SIZE: usize = 2 * USIZE_BYTES; - -/// Return `true` if `x` contains any zero byte. -/// -/// From *Matters Computational*, J. Arndt -/// -/// "The idea is to subtract one from each of the bytes and then look for -/// bytes where the borrow propagated all the way to the most significant -/// bit." -#[inline(always)] -fn contains_zero_byte(x: usize) -> bool { - const LO_U64: u64 = 0x0101010101010101; - const HI_U64: u64 = 0x8080808080808080; - - const LO_USIZE: usize = LO_U64 as usize; - const HI_USIZE: usize = HI_U64 as usize; - - x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0 -} - -/// Repeat the given byte into a word size number. That is, every 8 bits -/// is equivalent to the given byte. For example, if `b` is `\x4E` or -/// `01001110` in binary, then the returned value on a 32-bit system would be: -/// `01001110_01001110_01001110_01001110`. -#[inline(always)] -fn repeat_byte(b: u8) -> usize { - (b as usize) * (usize::MAX / 255) -} - -pub fn memchr(n1: u8, haystack: &[u8]) -> Option { - let vn1 = repeat_byte(n1); - let confirm = |byte| byte == n1; - let loop_size = cmp::min(LOOP_SIZE, haystack.len()); - let align = USIZE_BYTES - 1; - let start_ptr = haystack.as_ptr(); - let mut ptr = start_ptr; - - unsafe { - let end_ptr = start_ptr.add(haystack.len()); - if haystack.len() < USIZE_BYTES { - return forward_search(start_ptr, end_ptr, ptr, confirm); - } - - let chunk = (ptr as *const usize).read_unaligned(); - if contains_zero_byte(chunk ^ vn1) { - return forward_search(start_ptr, end_ptr, ptr, confirm); - } - - ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align)); - debug_assert!(ptr > start_ptr); - debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr); - while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) { - debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); - - let a = *(ptr as *const usize); - let b = *(ptr.add(USIZE_BYTES) as *const usize); - let eqa = contains_zero_byte(a ^ vn1); - let eqb = contains_zero_byte(b ^ vn1); - if eqa || eqb { - break; - } - ptr = ptr.add(LOOP_SIZE); - } - forward_search(start_ptr, end_ptr, ptr, confirm) - } -} - -/// Like `memchr`, but searches for two bytes instead of one. -pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - let vn1 = repeat_byte(n1); - let vn2 = repeat_byte(n2); - let confirm = |byte| byte == n1 || byte == n2; - let align = USIZE_BYTES - 1; - let start_ptr = haystack.as_ptr(); - let mut ptr = start_ptr; - - unsafe { - let end_ptr = start_ptr.add(haystack.len()); - if haystack.len() < USIZE_BYTES { - return forward_search(start_ptr, end_ptr, ptr, confirm); - } - - let chunk = (ptr as *const usize).read_unaligned(); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - if eq1 || eq2 { - return forward_search(start_ptr, end_ptr, ptr, confirm); - } - - ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align)); - debug_assert!(ptr > start_ptr); - debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr); - while ptr <= end_ptr.sub(USIZE_BYTES) { - debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); - - let chunk = *(ptr as *const usize); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - if eq1 || eq2 { - break; - } - ptr = ptr.add(USIZE_BYTES); - } - forward_search(start_ptr, end_ptr, ptr, confirm) - } -} - -/// Like `memchr`, but searches for three bytes instead of one. -pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - let vn1 = repeat_byte(n1); - let vn2 = repeat_byte(n2); - let vn3 = repeat_byte(n3); - let confirm = |byte| byte == n1 || byte == n2 || byte == n3; - let align = USIZE_BYTES - 1; - let start_ptr = haystack.as_ptr(); - let mut ptr = start_ptr; - - unsafe { - let end_ptr = start_ptr.add(haystack.len()); - if haystack.len() < USIZE_BYTES { - return forward_search(start_ptr, end_ptr, ptr, confirm); - } - - let chunk = (ptr as *const usize).read_unaligned(); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - let eq3 = contains_zero_byte(chunk ^ vn3); - if eq1 || eq2 || eq3 { - return forward_search(start_ptr, end_ptr, ptr, confirm); - } - - ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align)); - debug_assert!(ptr > start_ptr); - debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr); - while ptr <= end_ptr.sub(USIZE_BYTES) { - debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); - - let chunk = *(ptr as *const usize); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - let eq3 = contains_zero_byte(chunk ^ vn3); - if eq1 || eq2 || eq3 { - break; - } - ptr = ptr.add(USIZE_BYTES); - } - forward_search(start_ptr, end_ptr, ptr, confirm) - } -} - -/// Return the last index matching the byte `x` in `text`. -pub fn memrchr(n1: u8, haystack: &[u8]) -> Option { - let vn1 = repeat_byte(n1); - let confirm = |byte| byte == n1; - let loop_size = cmp::min(LOOP_SIZE, haystack.len()); - let align = USIZE_BYTES - 1; - let start_ptr = haystack.as_ptr(); - - unsafe { - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - if haystack.len() < USIZE_BYTES { - return reverse_search(start_ptr, end_ptr, ptr, confirm); - } - - let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned(); - if contains_zero_byte(chunk ^ vn1) { - return reverse_search(start_ptr, end_ptr, ptr, confirm); - } - - ptr = (end_ptr as usize & !align) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) { - debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); - - let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize); - let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize); - let eqa = contains_zero_byte(a ^ vn1); - let eqb = contains_zero_byte(b ^ vn1); - if eqa || eqb { - break; - } - ptr = ptr.sub(loop_size); - } - reverse_search(start_ptr, end_ptr, ptr, confirm) - } -} - -/// Like `memrchr`, but searches for two bytes instead of one. -pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - let vn1 = repeat_byte(n1); - let vn2 = repeat_byte(n2); - let confirm = |byte| byte == n1 || byte == n2; - let align = USIZE_BYTES - 1; - let start_ptr = haystack.as_ptr(); - - unsafe { - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - if haystack.len() < USIZE_BYTES { - return reverse_search(start_ptr, end_ptr, ptr, confirm); - } - - let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned(); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - if eq1 || eq2 { - return reverse_search(start_ptr, end_ptr, ptr, confirm); - } - - ptr = (end_ptr as usize & !align) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while ptr >= start_ptr.add(USIZE_BYTES) { - debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); - - let chunk = *(ptr.sub(USIZE_BYTES) as *const usize); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - if eq1 || eq2 { - break; - } - ptr = ptr.sub(USIZE_BYTES); - } - reverse_search(start_ptr, end_ptr, ptr, confirm) - } -} - -/// Like `memrchr`, but searches for three bytes instead of one. -pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - let vn1 = repeat_byte(n1); - let vn2 = repeat_byte(n2); - let vn3 = repeat_byte(n3); - let confirm = |byte| byte == n1 || byte == n2 || byte == n3; - let align = USIZE_BYTES - 1; - let start_ptr = haystack.as_ptr(); - - unsafe { - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - if haystack.len() < USIZE_BYTES { - return reverse_search(start_ptr, end_ptr, ptr, confirm); - } - - let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned(); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - let eq3 = contains_zero_byte(chunk ^ vn3); - if eq1 || eq2 || eq3 { - return reverse_search(start_ptr, end_ptr, ptr, confirm); - } - - ptr = (end_ptr as usize & !align) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while ptr >= start_ptr.add(USIZE_BYTES) { - debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); - - let chunk = *(ptr.sub(USIZE_BYTES) as *const usize); - let eq1 = contains_zero_byte(chunk ^ vn1); - let eq2 = contains_zero_byte(chunk ^ vn2); - let eq3 = contains_zero_byte(chunk ^ vn3); - if eq1 || eq2 || eq3 { - break; - } - ptr = ptr.sub(USIZE_BYTES); - } - reverse_search(start_ptr, end_ptr, ptr, confirm) - } -} - -#[inline(always)] -unsafe fn forward_search bool>( - start_ptr: *const u8, - end_ptr: *const u8, - mut ptr: *const u8, - confirm: F, -) -> Option { - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr); - - while ptr < end_ptr { - if confirm(*ptr) { - return Some(sub(ptr, start_ptr)); - } - ptr = ptr.offset(1); - } - None -} - -#[inline(always)] -unsafe fn reverse_search bool>( - start_ptr: *const u8, - end_ptr: *const u8, - mut ptr: *const u8, - confirm: F, -) -> Option { - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr); - - while ptr > start_ptr { - ptr = ptr.offset(-1); - if confirm(*ptr) { - return Some(sub(ptr, start_ptr)); - } - } - None -} - -/// Subtract `b` from `a` and return the difference. `a` should be greater than -/// or equal to `b`. -fn sub(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} diff --git a/vendor/memchr/src/memchr/iter.rs b/vendor/memchr/src/memchr/iter.rs deleted file mode 100644 index 16e203f..0000000 --- a/vendor/memchr/src/memchr/iter.rs +++ /dev/null @@ -1,173 +0,0 @@ -use crate::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3}; - -macro_rules! iter_next { - // Common code for the memchr iterators: - // update haystack and position and produce the index - // - // self: &mut Self where Self is the iterator - // search_result: Option which is the result of the corresponding - // memchr function. - // - // Returns Option (the next iterator element) - ($self_:expr, $search_result:expr) => { - $search_result.map(move |index| { - // split and take the remaining back half - $self_.haystack = $self_.haystack.split_at(index + 1).1; - let found_position = $self_.position + index; - $self_.position = found_position + 1; - found_position - }) - }; -} - -macro_rules! iter_next_back { - ($self_:expr, $search_result:expr) => { - $search_result.map(move |index| { - // split and take the remaining front half - $self_.haystack = $self_.haystack.split_at(index).0; - $self_.position + index - }) - }; -} - -/// An iterator for `memchr`. -pub struct Memchr<'a> { - needle: u8, - // The haystack to iterate over - haystack: &'a [u8], - // The index - position: usize, -} - -impl<'a> Memchr<'a> { - /// Creates a new iterator that yields all positions of needle in haystack. - #[inline] - pub fn new(needle: u8, haystack: &[u8]) -> Memchr<'_> { - Memchr { needle: needle, haystack: haystack, position: 0 } - } -} - -impl<'a> Iterator for Memchr<'a> { - type Item = usize; - - #[inline] - fn next(&mut self) -> Option { - iter_next!(self, memchr(self.needle, self.haystack)) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - (0, Some(self.haystack.len())) - } -} - -impl<'a> DoubleEndedIterator for Memchr<'a> { - #[inline] - fn next_back(&mut self) -> Option { - iter_next_back!(self, memrchr(self.needle, self.haystack)) - } -} - -/// An iterator for `memchr2`. -pub struct Memchr2<'a> { - needle1: u8, - needle2: u8, - // The haystack to iterate over - haystack: &'a [u8], - // The index - position: usize, -} - -impl<'a> Memchr2<'a> { - /// Creates a new iterator that yields all positions of needle in haystack. - #[inline] - pub fn new(needle1: u8, needle2: u8, haystack: &[u8]) -> Memchr2<'_> { - Memchr2 { - needle1: needle1, - needle2: needle2, - haystack: haystack, - position: 0, - } - } -} - -impl<'a> Iterator for Memchr2<'a> { - type Item = usize; - - #[inline] - fn next(&mut self) -> Option { - iter_next!(self, memchr2(self.needle1, self.needle2, self.haystack)) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - (0, Some(self.haystack.len())) - } -} - -impl<'a> DoubleEndedIterator for Memchr2<'a> { - #[inline] - fn next_back(&mut self) -> Option { - iter_next_back!( - self, - memrchr2(self.needle1, self.needle2, self.haystack) - ) - } -} - -/// An iterator for `memchr3`. -pub struct Memchr3<'a> { - needle1: u8, - needle2: u8, - needle3: u8, - // The haystack to iterate over - haystack: &'a [u8], - // The index - position: usize, -} - -impl<'a> Memchr3<'a> { - /// Create a new `Memchr3` that's initialized to zero with a haystack - #[inline] - pub fn new( - needle1: u8, - needle2: u8, - needle3: u8, - haystack: &[u8], - ) -> Memchr3<'_> { - Memchr3 { - needle1: needle1, - needle2: needle2, - needle3: needle3, - haystack: haystack, - position: 0, - } - } -} - -impl<'a> Iterator for Memchr3<'a> { - type Item = usize; - - #[inline] - fn next(&mut self) -> Option { - iter_next!( - self, - memchr3(self.needle1, self.needle2, self.needle3, self.haystack) - ) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - (0, Some(self.haystack.len())) - } -} - -impl<'a> DoubleEndedIterator for Memchr3<'a> { - #[inline] - fn next_back(&mut self) -> Option { - iter_next_back!( - self, - memrchr3(self.needle1, self.needle2, self.needle3, self.haystack) - ) - } -} diff --git a/vendor/memchr/src/memchr/mod.rs b/vendor/memchr/src/memchr/mod.rs deleted file mode 100644 index 09ce6ef..0000000 --- a/vendor/memchr/src/memchr/mod.rs +++ /dev/null @@ -1,410 +0,0 @@ -use core::iter::Rev; - -pub use self::iter::{Memchr, Memchr2, Memchr3}; - -// N.B. If you're looking for the cfg knobs for libc, see build.rs. -#[cfg(memchr_libc)] -mod c; -#[allow(dead_code)] -pub mod fallback; -mod iter; -pub mod naive; -#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] -mod x86; - -/// An iterator over all occurrences of the needle in a haystack. -#[inline] -pub fn memchr_iter(needle: u8, haystack: &[u8]) -> Memchr<'_> { - Memchr::new(needle, haystack) -} - -/// An iterator over all occurrences of the needles in a haystack. -#[inline] -pub fn memchr2_iter(needle1: u8, needle2: u8, haystack: &[u8]) -> Memchr2<'_> { - Memchr2::new(needle1, needle2, haystack) -} - -/// An iterator over all occurrences of the needles in a haystack. -#[inline] -pub fn memchr3_iter( - needle1: u8, - needle2: u8, - needle3: u8, - haystack: &[u8], -) -> Memchr3<'_> { - Memchr3::new(needle1, needle2, needle3, haystack) -} - -/// An iterator over all occurrences of the needle in a haystack, in reverse. -#[inline] -pub fn memrchr_iter(needle: u8, haystack: &[u8]) -> Rev> { - Memchr::new(needle, haystack).rev() -} - -/// An iterator over all occurrences of the needles in a haystack, in reverse. -#[inline] -pub fn memrchr2_iter( - needle1: u8, - needle2: u8, - haystack: &[u8], -) -> Rev> { - Memchr2::new(needle1, needle2, haystack).rev() -} - -/// An iterator over all occurrences of the needles in a haystack, in reverse. -#[inline] -pub fn memrchr3_iter( - needle1: u8, - needle2: u8, - needle3: u8, - haystack: &[u8], -) -> Rev> { - Memchr3::new(needle1, needle2, needle3, haystack).rev() -} - -/// Search for the first occurrence of a byte in a slice. -/// -/// This returns the index corresponding to the first occurrence of `needle` in -/// `haystack`, or `None` if one is not found. If an index is returned, it is -/// guaranteed to be less than `usize::MAX`. -/// -/// While this is operationally the same as something like -/// `haystack.iter().position(|&b| b == needle)`, `memchr` will use a highly -/// optimized routine that can be up to an order of magnitude faster in some -/// cases. -/// -/// # Example -/// -/// This shows how to find the first position of a byte in a byte string. -/// -/// ``` -/// use memchr::memchr; -/// -/// let haystack = b"the quick brown fox"; -/// assert_eq!(memchr(b'k', haystack), Some(8)); -/// ``` -#[inline] -pub fn memchr(needle: u8, haystack: &[u8]) -> Option { - #[cfg(miri)] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - naive::memchr(n1, haystack) - } - - #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - x86::memchr(n1, haystack) - } - - #[cfg(all( - memchr_libc, - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri), - ))] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - c::memchr(n1, haystack) - } - - #[cfg(all( - not(memchr_libc), - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri), - ))] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - fallback::memchr(n1, haystack) - } - - if haystack.is_empty() { - None - } else { - imp(needle, haystack) - } -} - -/// Like `memchr`, but searches for either of two bytes instead of just one. -/// -/// This returns the index corresponding to the first occurrence of `needle1` -/// or the first occurrence of `needle2` in `haystack` (whichever occurs -/// earlier), or `None` if neither one is found. If an index is returned, it is -/// guaranteed to be less than `usize::MAX`. -/// -/// While this is operationally the same as something like -/// `haystack.iter().position(|&b| b == needle1 || b == needle2)`, `memchr2` -/// will use a highly optimized routine that can be up to an order of magnitude -/// faster in some cases. -/// -/// # Example -/// -/// This shows how to find the first position of either of two bytes in a byte -/// string. -/// -/// ``` -/// use memchr::memchr2; -/// -/// let haystack = b"the quick brown fox"; -/// assert_eq!(memchr2(b'k', b'q', haystack), Some(4)); -/// ``` -#[inline] -pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option { - #[cfg(miri)] - #[inline(always)] - fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { - naive::memchr2(n1, n2, haystack) - } - - #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] - #[inline(always)] - fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { - x86::memchr2(n1, n2, haystack) - } - - #[cfg(all( - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri), - ))] - #[inline(always)] - fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { - fallback::memchr2(n1, n2, haystack) - } - - if haystack.is_empty() { - None - } else { - imp(needle1, needle2, haystack) - } -} - -/// Like `memchr`, but searches for any of three bytes instead of just one. -/// -/// This returns the index corresponding to the first occurrence of `needle1`, -/// the first occurrence of `needle2`, or the first occurrence of `needle3` in -/// `haystack` (whichever occurs earliest), or `None` if none are found. If an -/// index is returned, it is guaranteed to be less than `usize::MAX`. -/// -/// While this is operationally the same as something like -/// `haystack.iter().position(|&b| b == needle1 || b == needle2 || -/// b == needle3)`, `memchr3` will use a highly optimized routine that can be -/// up to an order of magnitude faster in some cases. -/// -/// # Example -/// -/// This shows how to find the first position of any of three bytes in a byte -/// string. -/// -/// ``` -/// use memchr::memchr3; -/// -/// let haystack = b"the quick brown fox"; -/// assert_eq!(memchr3(b'k', b'q', b'e', haystack), Some(2)); -/// ``` -#[inline] -pub fn memchr3( - needle1: u8, - needle2: u8, - needle3: u8, - haystack: &[u8], -) -> Option { - #[cfg(miri)] - #[inline(always)] - fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - naive::memchr3(n1, n2, n3, haystack) - } - - #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] - #[inline(always)] - fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - x86::memchr3(n1, n2, n3, haystack) - } - - #[cfg(all( - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri), - ))] - #[inline(always)] - fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - fallback::memchr3(n1, n2, n3, haystack) - } - - if haystack.is_empty() { - None - } else { - imp(needle1, needle2, needle3, haystack) - } -} - -/// Search for the last occurrence of a byte in a slice. -/// -/// This returns the index corresponding to the last occurrence of `needle` in -/// `haystack`, or `None` if one is not found. If an index is returned, it is -/// guaranteed to be less than `usize::MAX`. -/// -/// While this is operationally the same as something like -/// `haystack.iter().rposition(|&b| b == needle)`, `memrchr` will use a highly -/// optimized routine that can be up to an order of magnitude faster in some -/// cases. -/// -/// # Example -/// -/// This shows how to find the last position of a byte in a byte string. -/// -/// ``` -/// use memchr::memrchr; -/// -/// let haystack = b"the quick brown fox"; -/// assert_eq!(memrchr(b'o', haystack), Some(17)); -/// ``` -#[inline] -pub fn memrchr(needle: u8, haystack: &[u8]) -> Option { - #[cfg(miri)] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - naive::memrchr(n1, haystack) - } - - #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - x86::memrchr(n1, haystack) - } - - #[cfg(all( - memchr_libc, - target_os = "linux", - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri) - ))] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - c::memrchr(n1, haystack) - } - - #[cfg(all( - not(all(memchr_libc, target_os = "linux")), - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri), - ))] - #[inline(always)] - fn imp(n1: u8, haystack: &[u8]) -> Option { - fallback::memrchr(n1, haystack) - } - - if haystack.is_empty() { - None - } else { - imp(needle, haystack) - } -} - -/// Like `memrchr`, but searches for either of two bytes instead of just one. -/// -/// This returns the index corresponding to the last occurrence of `needle1` or -/// the last occurrence of `needle2` in `haystack` (whichever occurs later), or -/// `None` if neither one is found. If an index is returned, it is guaranteed -/// to be less than `usize::MAX`. -/// -/// While this is operationally the same as something like -/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2)`, `memrchr2` -/// will use a highly optimized routine that can be up to an order of magnitude -/// faster in some cases. -/// -/// # Example -/// -/// This shows how to find the last position of either of two bytes in a byte -/// string. -/// -/// ``` -/// use memchr::memrchr2; -/// -/// let haystack = b"the quick brown fox"; -/// assert_eq!(memrchr2(b'k', b'q', haystack), Some(8)); -/// ``` -#[inline] -pub fn memrchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option { - #[cfg(miri)] - #[inline(always)] - fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { - naive::memrchr2(n1, n2, haystack) - } - - #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] - #[inline(always)] - fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { - x86::memrchr2(n1, n2, haystack) - } - - #[cfg(all( - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri), - ))] - #[inline(always)] - fn imp(n1: u8, n2: u8, haystack: &[u8]) -> Option { - fallback::memrchr2(n1, n2, haystack) - } - - if haystack.is_empty() { - None - } else { - imp(needle1, needle2, haystack) - } -} - -/// Like `memrchr`, but searches for any of three bytes instead of just one. -/// -/// This returns the index corresponding to the last occurrence of `needle1`, -/// the last occurrence of `needle2`, or the last occurrence of `needle3` in -/// `haystack` (whichever occurs later), or `None` if none are found. If an -/// index is returned, it is guaranteed to be less than `usize::MAX`. -/// -/// While this is operationally the same as something like -/// `haystack.iter().rposition(|&b| b == needle1 || b == needle2 || -/// b == needle3)`, `memrchr3` will use a highly optimized routine that can be -/// up to an order of magnitude faster in some cases. -/// -/// # Example -/// -/// This shows how to find the last position of any of three bytes in a byte -/// string. -/// -/// ``` -/// use memchr::memrchr3; -/// -/// let haystack = b"the quick brown fox"; -/// assert_eq!(memrchr3(b'k', b'q', b'e', haystack), Some(8)); -/// ``` -#[inline] -pub fn memrchr3( - needle1: u8, - needle2: u8, - needle3: u8, - haystack: &[u8], -) -> Option { - #[cfg(miri)] - #[inline(always)] - fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - naive::memrchr3(n1, n2, n3, haystack) - } - - #[cfg(all(target_arch = "x86_64", memchr_runtime_simd, not(miri)))] - #[inline(always)] - fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - x86::memrchr3(n1, n2, n3, haystack) - } - - #[cfg(all( - not(all(target_arch = "x86_64", memchr_runtime_simd)), - not(miri), - ))] - #[inline(always)] - fn imp(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - fallback::memrchr3(n1, n2, n3, haystack) - } - - if haystack.is_empty() { - None - } else { - imp(needle1, needle2, needle3, haystack) - } -} diff --git a/vendor/memchr/src/memchr/naive.rs b/vendor/memchr/src/memchr/naive.rs deleted file mode 100644 index 3f3053d..0000000 --- a/vendor/memchr/src/memchr/naive.rs +++ /dev/null @@ -1,25 +0,0 @@ -#![allow(dead_code)] - -pub fn memchr(n1: u8, haystack: &[u8]) -> Option { - haystack.iter().position(|&b| b == n1) -} - -pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - haystack.iter().position(|&b| b == n1 || b == n2) -} - -pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - haystack.iter().position(|&b| b == n1 || b == n2 || b == n3) -} - -pub fn memrchr(n1: u8, haystack: &[u8]) -> Option { - haystack.iter().rposition(|&b| b == n1) -} - -pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - haystack.iter().rposition(|&b| b == n1 || b == n2) -} - -pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3) -} diff --git a/vendor/memchr/src/memchr/x86/avx.rs b/vendor/memchr/src/memchr/x86/avx.rs deleted file mode 100644 index 5351230..0000000 --- a/vendor/memchr/src/memchr/x86/avx.rs +++ /dev/null @@ -1,755 +0,0 @@ -use core::{arch::x86_64::*, cmp, mem::size_of}; - -use super::sse2; - -const VECTOR_SIZE: usize = size_of::<__m256i>(); -const VECTOR_ALIGN: usize = VECTOR_SIZE - 1; - -// The number of bytes to loop at in one iteration of memchr/memrchr. -const LOOP_SIZE: usize = 4 * VECTOR_SIZE; - -// The number of bytes to loop at in one iteration of memchr2/memrchr2 and -// memchr3/memrchr3. There was no observable difference between 128 and 64 -// bytes in benchmarks. memchr3 in particular only gets a very slight speed up -// from the loop unrolling. -const LOOP_SIZE2: usize = 2 * VECTOR_SIZE; - -#[target_feature(enable = "avx2")] -pub unsafe fn memchr(n1: u8, haystack: &[u8]) -> Option { - // For a high level explanation for how this algorithm works, see the - // sse2 implementation. The avx implementation here is the same, but with - // 256-bit vectors instead of 128-bit vectors. - - // This routine is called whenever a match is detected. It is specifically - // marked as unlineable because it improves the codegen of the unrolled - // loop below. Inlining this seems to cause codegen with some extra adds - // and a load that aren't necessary. This seems to result in about a 10% - // improvement for the memchr1/crate/huge/never benchmark. - // - // Interestingly, I couldn't observe a similar improvement for memrchr. - #[cold] - #[inline(never)] - #[target_feature(enable = "avx2")] - unsafe fn matched( - start_ptr: *const u8, - ptr: *const u8, - eqa: __m256i, - eqb: __m256i, - eqc: __m256i, - eqd: __m256i, - ) -> usize { - let mut at = sub(ptr, start_ptr); - let mask = _mm256_movemask_epi8(eqa); - if mask != 0 { - return at + forward_pos(mask); - } - - at += VECTOR_SIZE; - let mask = _mm256_movemask_epi8(eqb); - if mask != 0 { - return at + forward_pos(mask); - } - - at += VECTOR_SIZE; - let mask = _mm256_movemask_epi8(eqc); - if mask != 0 { - return at + forward_pos(mask); - } - - at += VECTOR_SIZE; - let mask = _mm256_movemask_epi8(eqd); - debug_assert!(mask != 0); - at + forward_pos(mask) - } - - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = start_ptr; - - if haystack.len() < VECTOR_SIZE { - // For small haystacks, defer to the SSE2 implementation. Codegen - // suggests this completely avoids touching the AVX vectors. - return sse2::memchr(n1, haystack); - } - - let vn1 = _mm256_set1_epi8(n1 as i8); - let loop_size = cmp::min(LOOP_SIZE, haystack.len()); - if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - - ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); - debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); - while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - let a = _mm256_load_si256(ptr as *const __m256i); - let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); - let c = _mm256_load_si256(ptr.add(2 * VECTOR_SIZE) as *const __m256i); - let d = _mm256_load_si256(ptr.add(3 * VECTOR_SIZE) as *const __m256i); - let eqa = _mm256_cmpeq_epi8(vn1, a); - let eqb = _mm256_cmpeq_epi8(vn1, b); - let eqc = _mm256_cmpeq_epi8(vn1, c); - let eqd = _mm256_cmpeq_epi8(vn1, d); - let or1 = _mm256_or_si256(eqa, eqb); - let or2 = _mm256_or_si256(eqc, eqd); - let or3 = _mm256_or_si256(or1, or2); - - if _mm256_movemask_epi8(or3) != 0 { - return Some(matched(start_ptr, ptr, eqa, eqb, eqc, eqd)); - } - ptr = ptr.add(loop_size); - } - while ptr <= end_ptr.sub(VECTOR_SIZE) { - debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE); - - if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - ptr = ptr.add(VECTOR_SIZE); - } - if ptr < end_ptr { - debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); - ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); - debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); - - return forward_search1(start_ptr, end_ptr, ptr, vn1); - } - None -} - -#[target_feature(enable = "avx2")] -pub unsafe fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - #[cold] - #[inline(never)] - #[target_feature(enable = "avx2")] - unsafe fn matched( - start_ptr: *const u8, - ptr: *const u8, - eqa1: __m256i, - eqa2: __m256i, - eqb1: __m256i, - eqb2: __m256i, - ) -> usize { - let mut at = sub(ptr, start_ptr); - let mask1 = _mm256_movemask_epi8(eqa1); - let mask2 = _mm256_movemask_epi8(eqa2); - if mask1 != 0 || mask2 != 0 { - return at + forward_pos2(mask1, mask2); - } - - at += VECTOR_SIZE; - let mask1 = _mm256_movemask_epi8(eqb1); - let mask2 = _mm256_movemask_epi8(eqb2); - at + forward_pos2(mask1, mask2) - } - - let vn1 = _mm256_set1_epi8(n1 as i8); - let vn2 = _mm256_set1_epi8(n2 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = start_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr < end_ptr { - if *ptr == n1 || *ptr == n2 { - return Some(sub(ptr, start_ptr)); - } - ptr = ptr.offset(1); - } - return None; - } - - if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - - ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); - debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); - while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - let a = _mm256_load_si256(ptr as *const __m256i); - let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); - let eqa1 = _mm256_cmpeq_epi8(vn1, a); - let eqb1 = _mm256_cmpeq_epi8(vn1, b); - let eqa2 = _mm256_cmpeq_epi8(vn2, a); - let eqb2 = _mm256_cmpeq_epi8(vn2, b); - let or1 = _mm256_or_si256(eqa1, eqb1); - let or2 = _mm256_or_si256(eqa2, eqb2); - let or3 = _mm256_or_si256(or1, or2); - if _mm256_movemask_epi8(or3) != 0 { - return Some(matched(start_ptr, ptr, eqa1, eqa2, eqb1, eqb2)); - } - ptr = ptr.add(loop_size); - } - while ptr <= end_ptr.sub(VECTOR_SIZE) { - if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - ptr = ptr.add(VECTOR_SIZE); - } - if ptr < end_ptr { - debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); - ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); - debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); - - return forward_search2(start_ptr, end_ptr, ptr, vn1, vn2); - } - None -} - -#[target_feature(enable = "avx2")] -pub unsafe fn memchr3( - n1: u8, - n2: u8, - n3: u8, - haystack: &[u8], -) -> Option { - #[cold] - #[inline(never)] - #[target_feature(enable = "avx2")] - unsafe fn matched( - start_ptr: *const u8, - ptr: *const u8, - eqa1: __m256i, - eqa2: __m256i, - eqa3: __m256i, - eqb1: __m256i, - eqb2: __m256i, - eqb3: __m256i, - ) -> usize { - let mut at = sub(ptr, start_ptr); - let mask1 = _mm256_movemask_epi8(eqa1); - let mask2 = _mm256_movemask_epi8(eqa2); - let mask3 = _mm256_movemask_epi8(eqa3); - if mask1 != 0 || mask2 != 0 || mask3 != 0 { - return at + forward_pos3(mask1, mask2, mask3); - } - - at += VECTOR_SIZE; - let mask1 = _mm256_movemask_epi8(eqb1); - let mask2 = _mm256_movemask_epi8(eqb2); - let mask3 = _mm256_movemask_epi8(eqb3); - at + forward_pos3(mask1, mask2, mask3) - } - - let vn1 = _mm256_set1_epi8(n1 as i8); - let vn2 = _mm256_set1_epi8(n2 as i8); - let vn3 = _mm256_set1_epi8(n3 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = start_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr < end_ptr { - if *ptr == n1 || *ptr == n2 || *ptr == n3 { - return Some(sub(ptr, start_ptr)); - } - ptr = ptr.offset(1); - } - return None; - } - - if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { - return Some(i); - } - - ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); - debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); - while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - let a = _mm256_load_si256(ptr as *const __m256i); - let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); - let eqa1 = _mm256_cmpeq_epi8(vn1, a); - let eqb1 = _mm256_cmpeq_epi8(vn1, b); - let eqa2 = _mm256_cmpeq_epi8(vn2, a); - let eqb2 = _mm256_cmpeq_epi8(vn2, b); - let eqa3 = _mm256_cmpeq_epi8(vn3, a); - let eqb3 = _mm256_cmpeq_epi8(vn3, b); - let or1 = _mm256_or_si256(eqa1, eqb1); - let or2 = _mm256_or_si256(eqa2, eqb2); - let or3 = _mm256_or_si256(eqa3, eqb3); - let or4 = _mm256_or_si256(or1, or2); - let or5 = _mm256_or_si256(or3, or4); - if _mm256_movemask_epi8(or5) != 0 { - return Some(matched( - start_ptr, ptr, eqa1, eqa2, eqa3, eqb1, eqb2, eqb3, - )); - } - ptr = ptr.add(loop_size); - } - while ptr <= end_ptr.sub(VECTOR_SIZE) { - if let Some(i) = - forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) - { - return Some(i); - } - ptr = ptr.add(VECTOR_SIZE); - } - if ptr < end_ptr { - debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); - ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); - debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); - - return forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3); - } - None -} - -#[target_feature(enable = "avx2")] -pub unsafe fn memrchr(n1: u8, haystack: &[u8]) -> Option { - let vn1 = _mm256_set1_epi8(n1 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr > start_ptr { - ptr = ptr.offset(-1); - if *ptr == n1 { - return Some(sub(ptr, start_ptr)); - } - } - return None; - } - - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - - ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - ptr = ptr.sub(loop_size); - let a = _mm256_load_si256(ptr as *const __m256i); - let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); - let c = _mm256_load_si256(ptr.add(2 * VECTOR_SIZE) as *const __m256i); - let d = _mm256_load_si256(ptr.add(3 * VECTOR_SIZE) as *const __m256i); - let eqa = _mm256_cmpeq_epi8(vn1, a); - let eqb = _mm256_cmpeq_epi8(vn1, b); - let eqc = _mm256_cmpeq_epi8(vn1, c); - let eqd = _mm256_cmpeq_epi8(vn1, d); - let or1 = _mm256_or_si256(eqa, eqb); - let or2 = _mm256_or_si256(eqc, eqd); - let or3 = _mm256_or_si256(or1, or2); - if _mm256_movemask_epi8(or3) != 0 { - let mut at = sub(ptr.add(3 * VECTOR_SIZE), start_ptr); - let mask = _mm256_movemask_epi8(eqd); - if mask != 0 { - return Some(at + reverse_pos(mask)); - } - - at -= VECTOR_SIZE; - let mask = _mm256_movemask_epi8(eqc); - if mask != 0 { - return Some(at + reverse_pos(mask)); - } - - at -= VECTOR_SIZE; - let mask = _mm256_movemask_epi8(eqb); - if mask != 0 { - return Some(at + reverse_pos(mask)); - } - - at -= VECTOR_SIZE; - let mask = _mm256_movemask_epi8(eqa); - debug_assert!(mask != 0); - return Some(at + reverse_pos(mask)); - } - } - while ptr >= start_ptr.add(VECTOR_SIZE) { - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - } - if ptr > start_ptr { - debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); - return reverse_search1(start_ptr, end_ptr, start_ptr, vn1); - } - None -} - -#[target_feature(enable = "avx2")] -pub unsafe fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - let vn1 = _mm256_set1_epi8(n1 as i8); - let vn2 = _mm256_set1_epi8(n2 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr > start_ptr { - ptr = ptr.offset(-1); - if *ptr == n1 || *ptr == n2 { - return Some(sub(ptr, start_ptr)); - } - } - return None; - } - - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - - ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - ptr = ptr.sub(loop_size); - let a = _mm256_load_si256(ptr as *const __m256i); - let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); - let eqa1 = _mm256_cmpeq_epi8(vn1, a); - let eqb1 = _mm256_cmpeq_epi8(vn1, b); - let eqa2 = _mm256_cmpeq_epi8(vn2, a); - let eqb2 = _mm256_cmpeq_epi8(vn2, b); - let or1 = _mm256_or_si256(eqa1, eqb1); - let or2 = _mm256_or_si256(eqa2, eqb2); - let or3 = _mm256_or_si256(or1, or2); - if _mm256_movemask_epi8(or3) != 0 { - let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr); - let mask1 = _mm256_movemask_epi8(eqb1); - let mask2 = _mm256_movemask_epi8(eqb2); - if mask1 != 0 || mask2 != 0 { - return Some(at + reverse_pos2(mask1, mask2)); - } - - at -= VECTOR_SIZE; - let mask1 = _mm256_movemask_epi8(eqa1); - let mask2 = _mm256_movemask_epi8(eqa2); - return Some(at + reverse_pos2(mask1, mask2)); - } - } - while ptr >= start_ptr.add(VECTOR_SIZE) { - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - } - if ptr > start_ptr { - debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); - return reverse_search2(start_ptr, end_ptr, start_ptr, vn1, vn2); - } - None -} - -#[target_feature(enable = "avx2")] -pub unsafe fn memrchr3( - n1: u8, - n2: u8, - n3: u8, - haystack: &[u8], -) -> Option { - let vn1 = _mm256_set1_epi8(n1 as i8); - let vn2 = _mm256_set1_epi8(n2 as i8); - let vn3 = _mm256_set1_epi8(n3 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr > start_ptr { - ptr = ptr.offset(-1); - if *ptr == n1 || *ptr == n2 || *ptr == n3 { - return Some(sub(ptr, start_ptr)); - } - } - return None; - } - - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { - return Some(i); - } - - ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - ptr = ptr.sub(loop_size); - let a = _mm256_load_si256(ptr as *const __m256i); - let b = _mm256_load_si256(ptr.add(VECTOR_SIZE) as *const __m256i); - let eqa1 = _mm256_cmpeq_epi8(vn1, a); - let eqb1 = _mm256_cmpeq_epi8(vn1, b); - let eqa2 = _mm256_cmpeq_epi8(vn2, a); - let eqb2 = _mm256_cmpeq_epi8(vn2, b); - let eqa3 = _mm256_cmpeq_epi8(vn3, a); - let eqb3 = _mm256_cmpeq_epi8(vn3, b); - let or1 = _mm256_or_si256(eqa1, eqb1); - let or2 = _mm256_or_si256(eqa2, eqb2); - let or3 = _mm256_or_si256(eqa3, eqb3); - let or4 = _mm256_or_si256(or1, or2); - let or5 = _mm256_or_si256(or3, or4); - if _mm256_movemask_epi8(or5) != 0 { - let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr); - let mask1 = _mm256_movemask_epi8(eqb1); - let mask2 = _mm256_movemask_epi8(eqb2); - let mask3 = _mm256_movemask_epi8(eqb3); - if mask1 != 0 || mask2 != 0 || mask3 != 0 { - return Some(at + reverse_pos3(mask1, mask2, mask3)); - } - - at -= VECTOR_SIZE; - let mask1 = _mm256_movemask_epi8(eqa1); - let mask2 = _mm256_movemask_epi8(eqa2); - let mask3 = _mm256_movemask_epi8(eqa3); - return Some(at + reverse_pos3(mask1, mask2, mask3)); - } - } - while ptr >= start_ptr.add(VECTOR_SIZE) { - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = - reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) - { - return Some(i); - } - } - if ptr > start_ptr { - debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); - return reverse_search3(start_ptr, end_ptr, start_ptr, vn1, vn2, vn3); - } - None -} - -#[target_feature(enable = "avx2")] -unsafe fn forward_search1( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m256i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm256_loadu_si256(ptr as *const __m256i); - let mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(chunk, vn1)); - if mask != 0 { - Some(sub(ptr, start_ptr) + forward_pos(mask)) - } else { - None - } -} - -#[target_feature(enable = "avx2")] -unsafe fn forward_search2( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m256i, - vn2: __m256i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm256_loadu_si256(ptr as *const __m256i); - let eq1 = _mm256_cmpeq_epi8(chunk, vn1); - let eq2 = _mm256_cmpeq_epi8(chunk, vn2); - if _mm256_movemask_epi8(_mm256_or_si256(eq1, eq2)) != 0 { - let mask1 = _mm256_movemask_epi8(eq1); - let mask2 = _mm256_movemask_epi8(eq2); - Some(sub(ptr, start_ptr) + forward_pos2(mask1, mask2)) - } else { - None - } -} - -#[target_feature(enable = "avx2")] -unsafe fn forward_search3( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m256i, - vn2: __m256i, - vn3: __m256i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm256_loadu_si256(ptr as *const __m256i); - let eq1 = _mm256_cmpeq_epi8(chunk, vn1); - let eq2 = _mm256_cmpeq_epi8(chunk, vn2); - let eq3 = _mm256_cmpeq_epi8(chunk, vn3); - let or = _mm256_or_si256(eq1, eq2); - if _mm256_movemask_epi8(_mm256_or_si256(or, eq3)) != 0 { - let mask1 = _mm256_movemask_epi8(eq1); - let mask2 = _mm256_movemask_epi8(eq2); - let mask3 = _mm256_movemask_epi8(eq3); - Some(sub(ptr, start_ptr) + forward_pos3(mask1, mask2, mask3)) - } else { - None - } -} - -#[target_feature(enable = "avx2")] -unsafe fn reverse_search1( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m256i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm256_loadu_si256(ptr as *const __m256i); - let mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(vn1, chunk)); - if mask != 0 { - Some(sub(ptr, start_ptr) + reverse_pos(mask)) - } else { - None - } -} - -#[target_feature(enable = "avx2")] -unsafe fn reverse_search2( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m256i, - vn2: __m256i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm256_loadu_si256(ptr as *const __m256i); - let eq1 = _mm256_cmpeq_epi8(chunk, vn1); - let eq2 = _mm256_cmpeq_epi8(chunk, vn2); - if _mm256_movemask_epi8(_mm256_or_si256(eq1, eq2)) != 0 { - let mask1 = _mm256_movemask_epi8(eq1); - let mask2 = _mm256_movemask_epi8(eq2); - Some(sub(ptr, start_ptr) + reverse_pos2(mask1, mask2)) - } else { - None - } -} - -#[target_feature(enable = "avx2")] -unsafe fn reverse_search3( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m256i, - vn2: __m256i, - vn3: __m256i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm256_loadu_si256(ptr as *const __m256i); - let eq1 = _mm256_cmpeq_epi8(chunk, vn1); - let eq2 = _mm256_cmpeq_epi8(chunk, vn2); - let eq3 = _mm256_cmpeq_epi8(chunk, vn3); - let or = _mm256_or_si256(eq1, eq2); - if _mm256_movemask_epi8(_mm256_or_si256(or, eq3)) != 0 { - let mask1 = _mm256_movemask_epi8(eq1); - let mask2 = _mm256_movemask_epi8(eq2); - let mask3 = _mm256_movemask_epi8(eq3); - Some(sub(ptr, start_ptr) + reverse_pos3(mask1, mask2, mask3)) - } else { - None - } -} - -/// Compute the position of the first matching byte from the given mask. The -/// position returned is always in the range [0, 31]. -/// -/// The mask given is expected to be the result of _mm256_movemask_epi8. -fn forward_pos(mask: i32) -> usize { - // We are dealing with little endian here, where the most significant byte - // is at a higher address. That means the least significant bit that is set - // corresponds to the position of our first matching byte. That position - // corresponds to the number of zeros after the least significant bit. - mask.trailing_zeros() as usize -} - -/// Compute the position of the first matching byte from the given masks. The -/// position returned is always in the range [0, 31]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm256_movemask_epi8, -/// where at least one of the masks is non-zero (i.e., indicates a match). -fn forward_pos2(mask1: i32, mask2: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0); - - forward_pos(mask1 | mask2) -} - -/// Compute the position of the first matching byte from the given masks. The -/// position returned is always in the range [0, 31]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm256_movemask_epi8, -/// where at least one of the masks is non-zero (i.e., indicates a match). -fn forward_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0); - - forward_pos(mask1 | mask2 | mask3) -} - -/// Compute the position of the last matching byte from the given mask. The -/// position returned is always in the range [0, 31]. -/// -/// The mask given is expected to be the result of _mm256_movemask_epi8. -fn reverse_pos(mask: i32) -> usize { - // We are dealing with little endian here, where the most significant byte - // is at a higher address. That means the most significant bit that is set - // corresponds to the position of our last matching byte. The position from - // the end of the mask is therefore the number of leading zeros in a 32 - // bit integer, and the position from the start of the mask is therefore - // 32 - (leading zeros) - 1. - VECTOR_SIZE - (mask as u32).leading_zeros() as usize - 1 -} - -/// Compute the position of the last matching byte from the given masks. The -/// position returned is always in the range [0, 31]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm256_movemask_epi8, -/// where at least one of the masks is non-zero (i.e., indicates a match). -fn reverse_pos2(mask1: i32, mask2: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0); - - reverse_pos(mask1 | mask2) -} - -/// Compute the position of the last matching byte from the given masks. The -/// position returned is always in the range [0, 31]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm256_movemask_epi8, -/// where at least one of the masks is non-zero (i.e., indicates a match). -fn reverse_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0); - - reverse_pos(mask1 | mask2 | mask3) -} - -/// Subtract `b` from `a` and return the difference. `a` should be greater than -/// or equal to `b`. -fn sub(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} diff --git a/vendor/memchr/src/memchr/x86/mod.rs b/vendor/memchr/src/memchr/x86/mod.rs deleted file mode 100644 index aec35db..0000000 --- a/vendor/memchr/src/memchr/x86/mod.rs +++ /dev/null @@ -1,148 +0,0 @@ -use super::fallback; - -// We only use AVX when we can detect at runtime whether it's available, which -// requires std. -#[cfg(feature = "std")] -mod avx; -mod sse2; - -/// This macro employs a gcc-like "ifunc" trick where by upon first calling -/// `memchr` (for example), CPU feature detection will be performed at runtime -/// to determine the best implementation to use. After CPU feature detection -/// is done, we replace `memchr`'s function pointer with the selection. Upon -/// subsequent invocations, the CPU-specific routine is invoked directly, which -/// skips the CPU feature detection and subsequent branch that's required. -/// -/// While this typically doesn't matter for rare occurrences or when used on -/// larger haystacks, `memchr` can be called in tight loops where the overhead -/// of this branch can actually add up *and is measurable*. This trick was -/// necessary to bring this implementation up to glibc's speeds for the 'tiny' -/// benchmarks, for example. -/// -/// At some point, I expect the Rust ecosystem will get a nice macro for doing -/// exactly this, at which point, we can replace our hand-jammed version of it. -/// -/// N.B. The ifunc strategy does prevent function inlining of course, but -/// on modern CPUs, you'll probably end up with the AVX2 implementation, -/// which probably can't be inlined anyway---unless you've compiled your -/// entire program with AVX2 enabled. However, even then, the various memchr -/// implementations aren't exactly small, so inlining might not help anyway! -/// -/// # Safety -/// -/// Callers must ensure that fnty is function pointer type. -#[cfg(feature = "std")] -macro_rules! unsafe_ifunc { - ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{ - use std::{mem, sync::atomic::{AtomicPtr, Ordering}}; - - type FnRaw = *mut (); - - static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw); - - fn detect($($needle: u8),+, haystack: &[u8]) -> Option { - let fun = - if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") { - avx::$name as FnRaw - } else if cfg!(memchr_runtime_sse2) { - sse2::$name as FnRaw - } else { - fallback::$name as FnRaw - }; - FN.store(fun as FnRaw, Ordering::Relaxed); - // SAFETY: By virtue of the caller contract, $fnty is a function - // pointer, which is always safe to transmute with a *mut (). - // Also, if 'fun is the AVX routine, then it is guaranteed to be - // supported since we checked the avx2 feature. - unsafe { - mem::transmute::(fun)($($needle),+, haystack) - } - } - - // SAFETY: By virtue of the caller contract, $fnty is a function - // pointer, which is always safe to transmute with a *mut (). Also, if - // 'fun is the AVX routine, then it is guaranteed to be supported since - // we checked the avx2 feature. - unsafe { - let fun = FN.load(Ordering::Relaxed); - mem::transmute::(fun)($($needle),+, $haystack) - } - }} -} - -/// When std isn't available to provide runtime CPU feature detection, or if -/// runtime CPU feature detection has been explicitly disabled, then just -/// call our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64 -/// targets, so no CPU feature detection is necessary. -/// -/// # Safety -/// -/// There are no safety requirements for this definition of the macro. It is -/// safe for all inputs since it is restricted to either the fallback routine -/// or the SSE routine, which is always safe to call on x86_64. -#[cfg(not(feature = "std"))] -macro_rules! unsafe_ifunc { - ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{ - if cfg!(memchr_runtime_sse2) { - unsafe { sse2::$name($($needle),+, $haystack) } - } else { - fallback::$name($($needle),+, $haystack) - } - }} -} - -#[inline(always)] -pub fn memchr(n1: u8, haystack: &[u8]) -> Option { - unsafe_ifunc!(fn(u8, &[u8]) -> Option, memchr, haystack, n1) -} - -#[inline(always)] -pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - unsafe_ifunc!( - fn(u8, u8, &[u8]) -> Option, - memchr2, - haystack, - n1, - n2 - ) -} - -#[inline(always)] -pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - unsafe_ifunc!( - fn(u8, u8, u8, &[u8]) -> Option, - memchr3, - haystack, - n1, - n2, - n3 - ) -} - -#[inline(always)] -pub fn memrchr(n1: u8, haystack: &[u8]) -> Option { - unsafe_ifunc!(fn(u8, &[u8]) -> Option, memrchr, haystack, n1) -} - -#[inline(always)] -pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - unsafe_ifunc!( - fn(u8, u8, &[u8]) -> Option, - memrchr2, - haystack, - n1, - n2 - ) -} - -#[inline(always)] -pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option { - unsafe_ifunc!( - fn(u8, u8, u8, &[u8]) -> Option, - memrchr3, - haystack, - n1, - n2, - n3 - ) -} diff --git a/vendor/memchr/src/memchr/x86/sse2.rs b/vendor/memchr/src/memchr/x86/sse2.rs deleted file mode 100644 index b7b3a93..0000000 --- a/vendor/memchr/src/memchr/x86/sse2.rs +++ /dev/null @@ -1,791 +0,0 @@ -use core::{arch::x86_64::*, cmp, mem::size_of}; - -const VECTOR_SIZE: usize = size_of::<__m128i>(); -const VECTOR_ALIGN: usize = VECTOR_SIZE - 1; - -// The number of bytes to loop at in one iteration of memchr/memrchr. -const LOOP_SIZE: usize = 4 * VECTOR_SIZE; - -// The number of bytes to loop at in one iteration of memchr2/memrchr2 and -// memchr3/memrchr3. There was no observable difference between 64 and 32 bytes -// in benchmarks. memchr3 in particular only gets a very slight speed up from -// the loop unrolling. -const LOOP_SIZE2: usize = 2 * VECTOR_SIZE; - -#[target_feature(enable = "sse2")] -pub unsafe fn memchr(n1: u8, haystack: &[u8]) -> Option { - // What follows is a fast SSE2-only algorithm to detect the position of - // `n1` in `haystack` if it exists. From what I know, this is the "classic" - // algorithm. I believe it can be found in places like glibc and Go's - // standard library. It appears to be well known and is elaborated on in - // more detail here: https://gms.tf/stdfind-and-memchr-optimizations.html - // - // While this routine is very long, the basic idea is actually very simple - // and can be expressed straight-forwardly in pseudo code: - // - // needle = (n1 << 15) | (n1 << 14) | ... | (n1 << 1) | n1 - // // Note: shift amount in bytes - // - // while i <= haystack.len() - 16: - // // A 16 byte vector. Each byte in chunk corresponds to a byte in - // // the haystack. - // chunk = haystack[i:i+16] - // // Compare bytes in needle with bytes in chunk. The result is a 16 - // // byte chunk where each byte is 0xFF if the corresponding bytes - // // in needle and chunk were equal, or 0x00 otherwise. - // eqs = cmpeq(needle, chunk) - // // Return a 32 bit integer where the most significant 16 bits - // // are always 0 and the lower 16 bits correspond to whether the - // // most significant bit in the correspond byte in `eqs` is set. - // // In other words, `mask as u16` has bit i set if and only if - // // needle[i] == chunk[i]. - // mask = movemask(eqs) - // - // // Mask is 0 if there is no match, and non-zero otherwise. - // if mask != 0: - // // trailing_zeros tells us the position of the least significant - // // bit that is set. - // return i + trailing_zeros(mask) - // - // // haystack length may not be a multiple of 16, so search the rest. - // while i < haystack.len(): - // if haystack[i] == n1: - // return i - // - // // No match found. - // return NULL - // - // In fact, we could loosely translate the above code to Rust line-for-line - // and it would be a pretty fast algorithm. But, we pull out all the stops - // to go as fast as possible: - // - // 1. We use aligned loads. That is, we do some finagling to make sure our - // primary loop not only proceeds in increments of 16 bytes, but that - // the address of haystack's pointer that we dereference is aligned to - // 16 bytes. 16 is a magic number here because it is the size of SSE2 - // 128-bit vector. (For the AVX2 algorithm, 32 is the magic number.) - // Therefore, to get aligned loads, our pointer's address must be evenly - // divisible by 16. - // 2. Our primary loop proceeds 64 bytes at a time instead of 16. It's - // kind of like loop unrolling, but we combine the equality comparisons - // using a vector OR such that we only need to extract a single mask to - // determine whether a match exists or not. If so, then we do some - // book-keeping to determine the precise location but otherwise mush on. - // 3. We use our "chunk" comparison routine in as many places as possible, - // even if it means using unaligned loads. In particular, if haystack - // starts with an unaligned address, then we do an unaligned load to - // search the first 16 bytes. We then start our primary loop at the - // smallest subsequent aligned address, which will actually overlap with - // previously searched bytes. But we're OK with that. We do a similar - // dance at the end of our primary loop. Finally, to avoid a - // byte-at-a-time loop at the end, we do a final 16 byte unaligned load - // that may overlap with a previous load. This is OK because it converts - // a loop into a small number of very fast vector instructions. - // - // The primary downside of this algorithm is that it's effectively - // completely unsafe. Therefore, we have to be super careful to avoid - // undefined behavior: - // - // 1. We use raw pointers everywhere. Not only does dereferencing a pointer - // require the pointer to be valid, but we actually can't even store the - // address of an invalid pointer (unless it's 1 past the end of - // haystack) without sacrificing performance. - // 2. _mm_loadu_si128 is used when you don't care about alignment, and - // _mm_load_si128 is used when you do care. You cannot use the latter - // on unaligned pointers. - // 3. We make liberal use of debug_assert! to check assumptions. - // 4. We make a concerted effort to stick with pointers instead of indices. - // Indices are nicer because there's less to worry about with them (see - // above about pointer offsets), but I could not get the compiler to - // produce as good of code as what the below produces. In any case, - // pointers are what we really care about here, and alignment is - // expressed a bit more naturally with them. - // - // In general, most of the algorithms in this crate have a similar - // structure to what you see below, so this comment applies fairly well to - // all of them. - - let vn1 = _mm_set1_epi8(n1 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = start_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr < end_ptr { - if *ptr == n1 { - return Some(sub(ptr, start_ptr)); - } - ptr = ptr.offset(1); - } - return None; - } - - if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - - ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); - debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); - while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - let a = _mm_load_si128(ptr as *const __m128i); - let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); - let c = _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i); - let d = _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i); - let eqa = _mm_cmpeq_epi8(vn1, a); - let eqb = _mm_cmpeq_epi8(vn1, b); - let eqc = _mm_cmpeq_epi8(vn1, c); - let eqd = _mm_cmpeq_epi8(vn1, d); - let or1 = _mm_or_si128(eqa, eqb); - let or2 = _mm_or_si128(eqc, eqd); - let or3 = _mm_or_si128(or1, or2); - if _mm_movemask_epi8(or3) != 0 { - let mut at = sub(ptr, start_ptr); - let mask = _mm_movemask_epi8(eqa); - if mask != 0 { - return Some(at + forward_pos(mask)); - } - - at += VECTOR_SIZE; - let mask = _mm_movemask_epi8(eqb); - if mask != 0 { - return Some(at + forward_pos(mask)); - } - - at += VECTOR_SIZE; - let mask = _mm_movemask_epi8(eqc); - if mask != 0 { - return Some(at + forward_pos(mask)); - } - - at += VECTOR_SIZE; - let mask = _mm_movemask_epi8(eqd); - debug_assert!(mask != 0); - return Some(at + forward_pos(mask)); - } - ptr = ptr.add(loop_size); - } - while ptr <= end_ptr.sub(VECTOR_SIZE) { - debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE); - - if let Some(i) = forward_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - ptr = ptr.add(VECTOR_SIZE); - } - if ptr < end_ptr { - debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); - ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); - debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); - - return forward_search1(start_ptr, end_ptr, ptr, vn1); - } - None -} - -#[target_feature(enable = "sse2")] -pub unsafe fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - let vn1 = _mm_set1_epi8(n1 as i8); - let vn2 = _mm_set1_epi8(n2 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = start_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr < end_ptr { - if *ptr == n1 || *ptr == n2 { - return Some(sub(ptr, start_ptr)); - } - ptr = ptr.offset(1); - } - return None; - } - - if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - - ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); - debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); - while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - let a = _mm_load_si128(ptr as *const __m128i); - let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); - let eqa1 = _mm_cmpeq_epi8(vn1, a); - let eqb1 = _mm_cmpeq_epi8(vn1, b); - let eqa2 = _mm_cmpeq_epi8(vn2, a); - let eqb2 = _mm_cmpeq_epi8(vn2, b); - let or1 = _mm_or_si128(eqa1, eqb1); - let or2 = _mm_or_si128(eqa2, eqb2); - let or3 = _mm_or_si128(or1, or2); - if _mm_movemask_epi8(or3) != 0 { - let mut at = sub(ptr, start_ptr); - let mask1 = _mm_movemask_epi8(eqa1); - let mask2 = _mm_movemask_epi8(eqa2); - if mask1 != 0 || mask2 != 0 { - return Some(at + forward_pos2(mask1, mask2)); - } - - at += VECTOR_SIZE; - let mask1 = _mm_movemask_epi8(eqb1); - let mask2 = _mm_movemask_epi8(eqb2); - return Some(at + forward_pos2(mask1, mask2)); - } - ptr = ptr.add(loop_size); - } - while ptr <= end_ptr.sub(VECTOR_SIZE) { - if let Some(i) = forward_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - ptr = ptr.add(VECTOR_SIZE); - } - if ptr < end_ptr { - debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); - ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); - debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); - - return forward_search2(start_ptr, end_ptr, ptr, vn1, vn2); - } - None -} - -#[target_feature(enable = "sse2")] -pub unsafe fn memchr3( - n1: u8, - n2: u8, - n3: u8, - haystack: &[u8], -) -> Option { - let vn1 = _mm_set1_epi8(n1 as i8); - let vn2 = _mm_set1_epi8(n2 as i8); - let vn3 = _mm_set1_epi8(n3 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = start_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr < end_ptr { - if *ptr == n1 || *ptr == n2 || *ptr == n3 { - return Some(sub(ptr, start_ptr)); - } - ptr = ptr.offset(1); - } - return None; - } - - if let Some(i) = forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { - return Some(i); - } - - ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN)); - debug_assert!(ptr > start_ptr && end_ptr.sub(VECTOR_SIZE) >= start_ptr); - while loop_size == LOOP_SIZE2 && ptr <= end_ptr.sub(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - let a = _mm_load_si128(ptr as *const __m128i); - let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); - let eqa1 = _mm_cmpeq_epi8(vn1, a); - let eqb1 = _mm_cmpeq_epi8(vn1, b); - let eqa2 = _mm_cmpeq_epi8(vn2, a); - let eqb2 = _mm_cmpeq_epi8(vn2, b); - let eqa3 = _mm_cmpeq_epi8(vn3, a); - let eqb3 = _mm_cmpeq_epi8(vn3, b); - let or1 = _mm_or_si128(eqa1, eqb1); - let or2 = _mm_or_si128(eqa2, eqb2); - let or3 = _mm_or_si128(eqa3, eqb3); - let or4 = _mm_or_si128(or1, or2); - let or5 = _mm_or_si128(or3, or4); - if _mm_movemask_epi8(or5) != 0 { - let mut at = sub(ptr, start_ptr); - let mask1 = _mm_movemask_epi8(eqa1); - let mask2 = _mm_movemask_epi8(eqa2); - let mask3 = _mm_movemask_epi8(eqa3); - if mask1 != 0 || mask2 != 0 || mask3 != 0 { - return Some(at + forward_pos3(mask1, mask2, mask3)); - } - - at += VECTOR_SIZE; - let mask1 = _mm_movemask_epi8(eqb1); - let mask2 = _mm_movemask_epi8(eqb2); - let mask3 = _mm_movemask_epi8(eqb3); - return Some(at + forward_pos3(mask1, mask2, mask3)); - } - ptr = ptr.add(loop_size); - } - while ptr <= end_ptr.sub(VECTOR_SIZE) { - if let Some(i) = - forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) - { - return Some(i); - } - ptr = ptr.add(VECTOR_SIZE); - } - if ptr < end_ptr { - debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); - ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); - debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); - - return forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3); - } - None -} - -#[target_feature(enable = "sse2")] -pub unsafe fn memrchr(n1: u8, haystack: &[u8]) -> Option { - let vn1 = _mm_set1_epi8(n1 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr > start_ptr { - ptr = ptr.offset(-1); - if *ptr == n1 { - return Some(sub(ptr, start_ptr)); - } - } - return None; - } - - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - - ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - ptr = ptr.sub(loop_size); - let a = _mm_load_si128(ptr as *const __m128i); - let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); - let c = _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i); - let d = _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i); - let eqa = _mm_cmpeq_epi8(vn1, a); - let eqb = _mm_cmpeq_epi8(vn1, b); - let eqc = _mm_cmpeq_epi8(vn1, c); - let eqd = _mm_cmpeq_epi8(vn1, d); - let or1 = _mm_or_si128(eqa, eqb); - let or2 = _mm_or_si128(eqc, eqd); - let or3 = _mm_or_si128(or1, or2); - if _mm_movemask_epi8(or3) != 0 { - let mut at = sub(ptr.add(3 * VECTOR_SIZE), start_ptr); - let mask = _mm_movemask_epi8(eqd); - if mask != 0 { - return Some(at + reverse_pos(mask)); - } - - at -= VECTOR_SIZE; - let mask = _mm_movemask_epi8(eqc); - if mask != 0 { - return Some(at + reverse_pos(mask)); - } - - at -= VECTOR_SIZE; - let mask = _mm_movemask_epi8(eqb); - if mask != 0 { - return Some(at + reverse_pos(mask)); - } - - at -= VECTOR_SIZE; - let mask = _mm_movemask_epi8(eqa); - debug_assert!(mask != 0); - return Some(at + reverse_pos(mask)); - } - } - while ptr >= start_ptr.add(VECTOR_SIZE) { - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search1(start_ptr, end_ptr, ptr, vn1) { - return Some(i); - } - } - if ptr > start_ptr { - debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); - return reverse_search1(start_ptr, end_ptr, start_ptr, vn1); - } - None -} - -#[target_feature(enable = "sse2")] -pub unsafe fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { - let vn1 = _mm_set1_epi8(n1 as i8); - let vn2 = _mm_set1_epi8(n2 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr > start_ptr { - ptr = ptr.offset(-1); - if *ptr == n1 || *ptr == n2 { - return Some(sub(ptr, start_ptr)); - } - } - return None; - } - - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - - ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - ptr = ptr.sub(loop_size); - let a = _mm_load_si128(ptr as *const __m128i); - let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); - let eqa1 = _mm_cmpeq_epi8(vn1, a); - let eqb1 = _mm_cmpeq_epi8(vn1, b); - let eqa2 = _mm_cmpeq_epi8(vn2, a); - let eqb2 = _mm_cmpeq_epi8(vn2, b); - let or1 = _mm_or_si128(eqa1, eqb1); - let or2 = _mm_or_si128(eqa2, eqb2); - let or3 = _mm_or_si128(or1, or2); - if _mm_movemask_epi8(or3) != 0 { - let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr); - let mask1 = _mm_movemask_epi8(eqb1); - let mask2 = _mm_movemask_epi8(eqb2); - if mask1 != 0 || mask2 != 0 { - return Some(at + reverse_pos2(mask1, mask2)); - } - - at -= VECTOR_SIZE; - let mask1 = _mm_movemask_epi8(eqa1); - let mask2 = _mm_movemask_epi8(eqa2); - return Some(at + reverse_pos2(mask1, mask2)); - } - } - while ptr >= start_ptr.add(VECTOR_SIZE) { - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search2(start_ptr, end_ptr, ptr, vn1, vn2) { - return Some(i); - } - } - if ptr > start_ptr { - debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); - return reverse_search2(start_ptr, end_ptr, start_ptr, vn1, vn2); - } - None -} - -#[target_feature(enable = "sse2")] -pub unsafe fn memrchr3( - n1: u8, - n2: u8, - n3: u8, - haystack: &[u8], -) -> Option { - let vn1 = _mm_set1_epi8(n1 as i8); - let vn2 = _mm_set1_epi8(n2 as i8); - let vn3 = _mm_set1_epi8(n3 as i8); - let len = haystack.len(); - let loop_size = cmp::min(LOOP_SIZE2, len); - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let mut ptr = end_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr > start_ptr { - ptr = ptr.offset(-1); - if *ptr == n1 || *ptr == n2 || *ptr == n3 { - return Some(sub(ptr, start_ptr)); - } - } - return None; - } - - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) { - return Some(i); - } - - ptr = (end_ptr as usize & !VECTOR_ALIGN) as *const u8; - debug_assert!(start_ptr <= ptr && ptr <= end_ptr); - while loop_size == LOOP_SIZE2 && ptr >= start_ptr.add(loop_size) { - debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE); - - ptr = ptr.sub(loop_size); - let a = _mm_load_si128(ptr as *const __m128i); - let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i); - let eqa1 = _mm_cmpeq_epi8(vn1, a); - let eqb1 = _mm_cmpeq_epi8(vn1, b); - let eqa2 = _mm_cmpeq_epi8(vn2, a); - let eqb2 = _mm_cmpeq_epi8(vn2, b); - let eqa3 = _mm_cmpeq_epi8(vn3, a); - let eqb3 = _mm_cmpeq_epi8(vn3, b); - let or1 = _mm_or_si128(eqa1, eqb1); - let or2 = _mm_or_si128(eqa2, eqb2); - let or3 = _mm_or_si128(eqa3, eqb3); - let or4 = _mm_or_si128(or1, or2); - let or5 = _mm_or_si128(or3, or4); - if _mm_movemask_epi8(or5) != 0 { - let mut at = sub(ptr.add(VECTOR_SIZE), start_ptr); - let mask1 = _mm_movemask_epi8(eqb1); - let mask2 = _mm_movemask_epi8(eqb2); - let mask3 = _mm_movemask_epi8(eqb3); - if mask1 != 0 || mask2 != 0 || mask3 != 0 { - return Some(at + reverse_pos3(mask1, mask2, mask3)); - } - - at -= VECTOR_SIZE; - let mask1 = _mm_movemask_epi8(eqa1); - let mask2 = _mm_movemask_epi8(eqa2); - let mask3 = _mm_movemask_epi8(eqa3); - return Some(at + reverse_pos3(mask1, mask2, mask3)); - } - } - while ptr >= start_ptr.add(VECTOR_SIZE) { - ptr = ptr.sub(VECTOR_SIZE); - if let Some(i) = - reverse_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3) - { - return Some(i); - } - } - if ptr > start_ptr { - debug_assert!(sub(ptr, start_ptr) < VECTOR_SIZE); - return reverse_search3(start_ptr, end_ptr, start_ptr, vn1, vn2, vn3); - } - None -} - -#[target_feature(enable = "sse2")] -pub unsafe fn forward_search1( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m128i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let mask = _mm_movemask_epi8(_mm_cmpeq_epi8(chunk, vn1)); - if mask != 0 { - Some(sub(ptr, start_ptr) + forward_pos(mask)) - } else { - None - } -} - -#[target_feature(enable = "sse2")] -unsafe fn forward_search2( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m128i, - vn2: __m128i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let eq1 = _mm_cmpeq_epi8(chunk, vn1); - let eq2 = _mm_cmpeq_epi8(chunk, vn2); - if _mm_movemask_epi8(_mm_or_si128(eq1, eq2)) != 0 { - let mask1 = _mm_movemask_epi8(eq1); - let mask2 = _mm_movemask_epi8(eq2); - Some(sub(ptr, start_ptr) + forward_pos2(mask1, mask2)) - } else { - None - } -} - -#[target_feature(enable = "sse2")] -pub unsafe fn forward_search3( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m128i, - vn2: __m128i, - vn3: __m128i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let eq1 = _mm_cmpeq_epi8(chunk, vn1); - let eq2 = _mm_cmpeq_epi8(chunk, vn2); - let eq3 = _mm_cmpeq_epi8(chunk, vn3); - let or = _mm_or_si128(eq1, eq2); - if _mm_movemask_epi8(_mm_or_si128(or, eq3)) != 0 { - let mask1 = _mm_movemask_epi8(eq1); - let mask2 = _mm_movemask_epi8(eq2); - let mask3 = _mm_movemask_epi8(eq3); - Some(sub(ptr, start_ptr) + forward_pos3(mask1, mask2, mask3)) - } else { - None - } -} - -#[target_feature(enable = "sse2")] -unsafe fn reverse_search1( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m128i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let mask = _mm_movemask_epi8(_mm_cmpeq_epi8(vn1, chunk)); - if mask != 0 { - Some(sub(ptr, start_ptr) + reverse_pos(mask)) - } else { - None - } -} - -#[target_feature(enable = "sse2")] -unsafe fn reverse_search2( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m128i, - vn2: __m128i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let eq1 = _mm_cmpeq_epi8(chunk, vn1); - let eq2 = _mm_cmpeq_epi8(chunk, vn2); - if _mm_movemask_epi8(_mm_or_si128(eq1, eq2)) != 0 { - let mask1 = _mm_movemask_epi8(eq1); - let mask2 = _mm_movemask_epi8(eq2); - Some(sub(ptr, start_ptr) + reverse_pos2(mask1, mask2)) - } else { - None - } -} - -#[target_feature(enable = "sse2")] -unsafe fn reverse_search3( - start_ptr: *const u8, - end_ptr: *const u8, - ptr: *const u8, - vn1: __m128i, - vn2: __m128i, - vn3: __m128i, -) -> Option { - debug_assert!(sub(end_ptr, start_ptr) >= VECTOR_SIZE); - debug_assert!(start_ptr <= ptr); - debug_assert!(ptr <= end_ptr.sub(VECTOR_SIZE)); - - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let eq1 = _mm_cmpeq_epi8(chunk, vn1); - let eq2 = _mm_cmpeq_epi8(chunk, vn2); - let eq3 = _mm_cmpeq_epi8(chunk, vn3); - let or = _mm_or_si128(eq1, eq2); - if _mm_movemask_epi8(_mm_or_si128(or, eq3)) != 0 { - let mask1 = _mm_movemask_epi8(eq1); - let mask2 = _mm_movemask_epi8(eq2); - let mask3 = _mm_movemask_epi8(eq3); - Some(sub(ptr, start_ptr) + reverse_pos3(mask1, mask2, mask3)) - } else { - None - } -} - -/// Compute the position of the first matching byte from the given mask. The -/// position returned is always in the range [0, 15]. -/// -/// The mask given is expected to be the result of _mm_movemask_epi8. -fn forward_pos(mask: i32) -> usize { - // We are dealing with little endian here, where the most significant byte - // is at a higher address. That means the least significant bit that is set - // corresponds to the position of our first matching byte. That position - // corresponds to the number of zeros after the least significant bit. - mask.trailing_zeros() as usize -} - -/// Compute the position of the first matching byte from the given masks. The -/// position returned is always in the range [0, 15]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm_movemask_epi8, where -/// at least one of the masks is non-zero (i.e., indicates a match). -fn forward_pos2(mask1: i32, mask2: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0); - - forward_pos(mask1 | mask2) -} - -/// Compute the position of the first matching byte from the given masks. The -/// position returned is always in the range [0, 15]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm_movemask_epi8, where -/// at least one of the masks is non-zero (i.e., indicates a match). -fn forward_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0); - - forward_pos(mask1 | mask2 | mask3) -} - -/// Compute the position of the last matching byte from the given mask. The -/// position returned is always in the range [0, 15]. -/// -/// The mask given is expected to be the result of _mm_movemask_epi8. -fn reverse_pos(mask: i32) -> usize { - // We are dealing with little endian here, where the most significant byte - // is at a higher address. That means the most significant bit that is set - // corresponds to the position of our last matching byte. The position from - // the end of the mask is therefore the number of leading zeros in a 16 - // bit integer, and the position from the start of the mask is therefore - // 16 - (leading zeros) - 1. - VECTOR_SIZE - (mask as u16).leading_zeros() as usize - 1 -} - -/// Compute the position of the last matching byte from the given masks. The -/// position returned is always in the range [0, 15]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm_movemask_epi8, where -/// at least one of the masks is non-zero (i.e., indicates a match). -fn reverse_pos2(mask1: i32, mask2: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0); - - reverse_pos(mask1 | mask2) -} - -/// Compute the position of the last matching byte from the given masks. The -/// position returned is always in the range [0, 15]. Each mask corresponds to -/// the equality comparison of a single byte. -/// -/// The masks given are expected to be the result of _mm_movemask_epi8, where -/// at least one of the masks is non-zero (i.e., indicates a match). -fn reverse_pos3(mask1: i32, mask2: i32, mask3: i32) -> usize { - debug_assert!(mask1 != 0 || mask2 != 0 || mask3 != 0); - - reverse_pos(mask1 | mask2 | mask3) -} - -/// Subtract `b` from `a` and return the difference. `a` should be greater than -/// or equal to `b`. -fn sub(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} diff --git a/vendor/memchr/src/memchr/x86/sse42.rs b/vendor/memchr/src/memchr/x86/sse42.rs deleted file mode 100644 index da38e50..0000000 --- a/vendor/memchr/src/memchr/x86/sse42.rs +++ /dev/null @@ -1,72 +0,0 @@ -// This code is unused. PCMPESTRI is gratuitously slow. I imagine it might -// start winning with a hypothetical memchr4 (or greater). This technique might -// also be good for exposing searches over ranges of bytes, but that departs -// from the standard memchr API, so it's not clear whether we actually want -// that or not. -// -// N.B. PCMPISTRI appears to be about twice as fast as PCMPESTRI, which is kind -// of neat. Unfortunately, UTF-8 strings can contain NUL bytes, which means -// I don't see a way of effectively using PCMPISTRI unless there's some fast -// way to replace zero bytes with a byte that is not not a needle byte. - -use core::{arch::x86_64::*, mem::size_of}; - -use x86::sse2; - -const VECTOR_SIZE: usize = size_of::<__m128i>(); -const CONTROL_ANY: i32 = _SIDD_UBYTE_OPS - | _SIDD_CMP_EQUAL_ANY - | _SIDD_POSITIVE_POLARITY - | _SIDD_LEAST_SIGNIFICANT; - -#[target_feature(enable = "sse4.2")] -pub unsafe fn memchr3( - n1: u8, - n2: u8, - n3: u8, - haystack: &[u8], -) -> Option { - let vn1 = _mm_set1_epi8(n1 as i8); - let vn2 = _mm_set1_epi8(n2 as i8); - let vn3 = _mm_set1_epi8(n3 as i8); - let vn = _mm_setr_epi8( - n1 as i8, n2 as i8, n3 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ); - let len = haystack.len(); - let start_ptr = haystack.as_ptr(); - let end_ptr = haystack[haystack.len()..].as_ptr(); - let mut ptr = start_ptr; - - if haystack.len() < VECTOR_SIZE { - while ptr < end_ptr { - if *ptr == n1 || *ptr == n2 || *ptr == n3 { - return Some(sub(ptr, start_ptr)); - } - ptr = ptr.offset(1); - } - return None; - } - while ptr <= end_ptr.sub(VECTOR_SIZE) { - let chunk = _mm_loadu_si128(ptr as *const __m128i); - let res = _mm_cmpestri(vn, 3, chunk, 16, CONTROL_ANY); - if res < 16 { - return Some(sub(ptr, start_ptr) + res as usize); - } - ptr = ptr.add(VECTOR_SIZE); - } - if ptr < end_ptr { - debug_assert!(sub(end_ptr, ptr) < VECTOR_SIZE); - ptr = ptr.sub(VECTOR_SIZE - sub(end_ptr, ptr)); - debug_assert_eq!(sub(end_ptr, ptr), VECTOR_SIZE); - - return sse2::forward_search3(start_ptr, end_ptr, ptr, vn1, vn2, vn3); - } - None -} - -/// Subtract `b` from `a` and return the difference. `a` should be greater than -/// or equal to `b`. -fn sub(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} diff --git a/vendor/memchr/src/memmem/genericsimd.rs b/vendor/memchr/src/memmem/genericsimd.rs deleted file mode 100644 index 28bfdab..0000000 --- a/vendor/memchr/src/memmem/genericsimd.rs +++ /dev/null @@ -1,266 +0,0 @@ -use core::mem::size_of; - -use crate::memmem::{util::memcmp, vector::Vector, NeedleInfo}; - -/// The minimum length of a needle required for this algorithm. The minimum -/// is 2 since a length of 1 should just use memchr and a length of 0 isn't -/// a case handled by this searcher. -pub(crate) const MIN_NEEDLE_LEN: usize = 2; - -/// The maximum length of a needle required for this algorithm. -/// -/// In reality, there is no hard max here. The code below can handle any -/// length needle. (Perhaps that suggests there are missing optimizations.) -/// Instead, this is a heuristic and a bound guaranteeing our linear time -/// complexity. -/// -/// It is a heuristic because when a candidate match is found, memcmp is run. -/// For very large needles with lots of false positives, memcmp can make the -/// code run quite slow. -/// -/// It is a bound because the worst case behavior with memcmp is multiplicative -/// in the size of the needle and haystack, and we want to keep that additive. -/// This bound ensures we still meet that bound theoretically, since it's just -/// a constant. We aren't acting in bad faith here, memcmp on tiny needles -/// is so fast that even in pathological cases (see pathological vector -/// benchmarks), this is still just as fast or faster in practice. -/// -/// This specific number was chosen by tweaking a bit and running benchmarks. -/// The rare-medium-needle, for example, gets about 5% faster by using this -/// algorithm instead of a prefilter-accelerated Two-Way. There's also a -/// theoretical desire to keep this number reasonably low, to mitigate the -/// impact of pathological cases. I did try 64, and some benchmarks got a -/// little better, and others (particularly the pathological ones), got a lot -/// worse. So... 32 it is? -pub(crate) const MAX_NEEDLE_LEN: usize = 32; - -/// The implementation of the forward vector accelerated substring search. -/// -/// This is extremely similar to the prefilter vector module by the same name. -/// The key difference is that this is not a prefilter. Instead, it handles -/// confirming its own matches. The trade off is that this only works with -/// smaller needles. The speed up here is that an inlined memcmp on a tiny -/// needle is very quick, even on pathological inputs. This is much better than -/// combining a prefilter with Two-Way, where using Two-Way to confirm the -/// match has higher latency. -/// -/// So why not use this for all needles? We could, and it would probably work -/// really well on most inputs. But its worst case is multiplicative and we -/// want to guarantee worst case additive time. Some of the benchmarks try to -/// justify this (see the pathological ones). -/// -/// The prefilter variant of this has more comments. Also note that we only -/// implement this for forward searches for now. If you have a compelling use -/// case for accelerated reverse search, please file an issue. -#[derive(Clone, Copy, Debug)] -pub(crate) struct Forward { - rare1i: u8, - rare2i: u8, -} - -impl Forward { - /// Create a new "generic simd" forward searcher. If one could not be - /// created from the given inputs, then None is returned. - pub(crate) fn new(ninfo: &NeedleInfo, needle: &[u8]) -> Option { - let (rare1i, rare2i) = ninfo.rarebytes.as_rare_ordered_u8(); - // If the needle is too short or too long, give up. Also, give up - // if the rare bytes detected are at the same position. (It likely - // suggests a degenerate case, although it should technically not be - // possible.) - if needle.len() < MIN_NEEDLE_LEN - || needle.len() > MAX_NEEDLE_LEN - || rare1i == rare2i - { - return None; - } - Some(Forward { rare1i, rare2i }) - } - - /// Returns the minimum length of haystack that is needed for this searcher - /// to work for a particular vector. Passing a haystack with a length - /// smaller than this will cause `fwd_find` to panic. - #[inline(always)] - pub(crate) fn min_haystack_len(&self) -> usize { - self.rare2i as usize + size_of::() - } -} - -/// Searches the given haystack for the given needle. The needle given should -/// be the same as the needle that this searcher was initialized with. -/// -/// # Panics -/// -/// When the given haystack has a length smaller than `min_haystack_len`. -/// -/// # Safety -/// -/// Since this is meant to be used with vector functions, callers need to -/// specialize this inside of a function with a `target_feature` attribute. -/// Therefore, callers must ensure that whatever target feature is being used -/// supports the vector functions that this function is specialized for. (For -/// the specific vector functions used, see the Vector trait implementations.) -#[inline(always)] -pub(crate) unsafe fn fwd_find( - fwd: &Forward, - haystack: &[u8], - needle: &[u8], -) -> Option { - // It would be nice if we didn't have this check here, since the meta - // searcher should handle it for us. But without this, I don't think we - // guarantee that end_ptr.sub(needle.len()) won't result in UB. We could - // put it as part of the safety contract, but it makes it more complicated - // than necessary. - if haystack.len() < needle.len() { - return None; - } - let min_haystack_len = fwd.min_haystack_len::(); - assert!(haystack.len() >= min_haystack_len, "haystack too small"); - debug_assert!(needle.len() <= haystack.len()); - debug_assert!( - needle.len() >= MIN_NEEDLE_LEN, - "needle must be at least {} bytes", - MIN_NEEDLE_LEN, - ); - debug_assert!( - needle.len() <= MAX_NEEDLE_LEN, - "needle must be at most {} bytes", - MAX_NEEDLE_LEN, - ); - - let (rare1i, rare2i) = (fwd.rare1i as usize, fwd.rare2i as usize); - let rare1chunk = V::splat(needle[rare1i]); - let rare2chunk = V::splat(needle[rare2i]); - - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let max_ptr = end_ptr.sub(min_haystack_len); - let mut ptr = start_ptr; - - // N.B. I did experiment with unrolling the loop to deal with size(V) - // bytes at a time and 2*size(V) bytes at a time. The double unroll was - // marginally faster while the quadruple unroll was unambiguously slower. - // In the end, I decided the complexity from unrolling wasn't worth it. I - // used the memmem/krate/prebuilt/huge-en/ benchmarks to compare. - while ptr <= max_ptr { - let m = fwd_find_in_chunk( - fwd, needle, ptr, end_ptr, rare1chunk, rare2chunk, !0, - ); - if let Some(chunki) = m { - return Some(matched(start_ptr, ptr, chunki)); - } - ptr = ptr.add(size_of::()); - } - if ptr < end_ptr { - let remaining = diff(end_ptr, ptr); - debug_assert!( - remaining < min_haystack_len, - "remaining bytes should be smaller than the minimum haystack \ - length of {}, but there are {} bytes remaining", - min_haystack_len, - remaining, - ); - if remaining < needle.len() { - return None; - } - debug_assert!( - max_ptr < ptr, - "after main loop, ptr should have exceeded max_ptr", - ); - let overlap = diff(ptr, max_ptr); - debug_assert!( - overlap > 0, - "overlap ({}) must always be non-zero", - overlap, - ); - debug_assert!( - overlap < size_of::(), - "overlap ({}) cannot possibly be >= than a vector ({})", - overlap, - size_of::(), - ); - // The mask has all of its bits set except for the first N least - // significant bits, where N=overlap. This way, any matches that - // occur in find_in_chunk within the overlap are automatically - // ignored. - let mask = !((1 << overlap) - 1); - ptr = max_ptr; - let m = fwd_find_in_chunk( - fwd, needle, ptr, end_ptr, rare1chunk, rare2chunk, mask, - ); - if let Some(chunki) = m { - return Some(matched(start_ptr, ptr, chunki)); - } - } - None -} - -/// Search for an occurrence of two rare bytes from the needle in the chunk -/// pointed to by ptr, with the end of the haystack pointed to by end_ptr. When -/// an occurrence is found, memcmp is run to check if a match occurs at the -/// corresponding position. -/// -/// rare1chunk and rare2chunk correspond to vectors with the rare1 and rare2 -/// bytes repeated in each 8-bit lane, respectively. -/// -/// mask should have bits set corresponding the positions in the chunk in which -/// matches are considered. This is only used for the last vector load where -/// the beginning of the vector might have overlapped with the last load in -/// the main loop. The mask lets us avoid visiting positions that have already -/// been discarded as matches. -/// -/// # Safety -/// -/// It must be safe to do an unaligned read of size(V) bytes starting at both -/// (ptr + rare1i) and (ptr + rare2i). It must also be safe to do unaligned -/// loads on ptr up to (end_ptr - needle.len()). -#[inline(always)] -unsafe fn fwd_find_in_chunk( - fwd: &Forward, - needle: &[u8], - ptr: *const u8, - end_ptr: *const u8, - rare1chunk: V, - rare2chunk: V, - mask: u32, -) -> Option { - let chunk0 = V::load_unaligned(ptr.add(fwd.rare1i as usize)); - let chunk1 = V::load_unaligned(ptr.add(fwd.rare2i as usize)); - - let eq0 = chunk0.cmpeq(rare1chunk); - let eq1 = chunk1.cmpeq(rare2chunk); - - let mut match_offsets = eq0.and(eq1).movemask() & mask; - while match_offsets != 0 { - let offset = match_offsets.trailing_zeros() as usize; - let ptr = ptr.add(offset); - if end_ptr.sub(needle.len()) < ptr { - return None; - } - let chunk = core::slice::from_raw_parts(ptr, needle.len()); - if memcmp(needle, chunk) { - return Some(offset); - } - match_offsets &= match_offsets - 1; - } - None -} - -/// Accepts a chunk-relative offset and returns a haystack relative offset -/// after updating the prefilter state. -/// -/// See the same function with the same name in the prefilter variant of this -/// algorithm to learned why it's tagged with inline(never). Even here, where -/// the function is simpler, inlining it leads to poorer codegen. (Although -/// it does improve some benchmarks, like prebuiltiter/huge-en/common-you.) -#[cold] -#[inline(never)] -fn matched(start_ptr: *const u8, ptr: *const u8, chunki: usize) -> usize { - diff(ptr, start_ptr) + chunki -} - -/// Subtract `b` from `a` and return the difference. `a` must be greater than -/// or equal to `b`. -fn diff(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} diff --git a/vendor/memchr/src/memmem/mod.rs b/vendor/memchr/src/memmem/mod.rs index e1cd1ae..4f04943 100644 --- a/vendor/memchr/src/memmem/mod.rs +++ b/vendor/memchr/src/memmem/mod.rs @@ -66,99 +66,25 @@ assert_eq!(None, finder.find(b"quux baz bar")); ``` */ -pub use self::prefilter::Prefilter; +pub use crate::memmem::searcher::PrefilterConfig as Prefilter; + +// This is exported here for use in the crate::arch::all::twoway +// implementation. This is essentially an abstraction breaker. Namely, the +// public API of twoway doesn't support providing a prefilter, but its crate +// internal API does. The main reason for this is that I didn't want to do the +// API design required to support it without a concrete use case. +pub(crate) use crate::memmem::searcher::Pre; use crate::{ - cow::CowBytes, - memmem::{ - prefilter::{Pre, PrefilterFn, PrefilterState}, - rabinkarp::NeedleHash, - rarebytes::RareNeedleBytes, + arch::all::{ + packedpair::{DefaultFrequencyRank, HeuristicFrequencyRank}, + rabinkarp, }, + cow::CowBytes, + memmem::searcher::{PrefilterState, Searcher, SearcherRev}, }; -/// Defines a suite of quickcheck properties for forward and reverse -/// substring searching. -/// -/// This is defined in this specific spot so that it can be used freely among -/// the different substring search implementations. I couldn't be bothered to -/// fight with the macro-visibility rules enough to figure out how to stuff it -/// somewhere more convenient. -#[cfg(all(test, feature = "std"))] -macro_rules! define_memmem_quickcheck_tests { - ($fwd:expr, $rev:expr) => { - use crate::memmem::proptests; - - quickcheck::quickcheck! { - fn qc_fwd_prefix_is_substring(bs: Vec) -> bool { - proptests::prefix_is_substring(false, &bs, $fwd) - } - - fn qc_fwd_suffix_is_substring(bs: Vec) -> bool { - proptests::suffix_is_substring(false, &bs, $fwd) - } - - fn qc_fwd_matches_naive( - haystack: Vec, - needle: Vec - ) -> bool { - proptests::matches_naive(false, &haystack, &needle, $fwd) - } - - fn qc_rev_prefix_is_substring(bs: Vec) -> bool { - proptests::prefix_is_substring(true, &bs, $rev) - } - - fn qc_rev_suffix_is_substring(bs: Vec) -> bool { - proptests::suffix_is_substring(true, &bs, $rev) - } - - fn qc_rev_matches_naive( - haystack: Vec, - needle: Vec - ) -> bool { - proptests::matches_naive(true, &haystack, &needle, $rev) - } - } - }; -} - -/// Defines a suite of "simple" hand-written tests for a substring -/// implementation. -/// -/// This is defined here for the same reason that -/// define_memmem_quickcheck_tests is defined here. -#[cfg(test)] -macro_rules! define_memmem_simple_tests { - ($fwd:expr, $rev:expr) => { - use crate::memmem::testsimples; - - #[test] - fn simple_forward() { - testsimples::run_search_tests_fwd($fwd); - } - - #[test] - fn simple_reverse() { - testsimples::run_search_tests_rev($rev); - } - }; -} - -mod byte_frequencies; -#[cfg(memchr_runtime_simd)] -mod genericsimd; -mod prefilter; -mod rabinkarp; -mod rarebytes; -mod twoway; -mod util; -#[cfg(memchr_runtime_simd)] -mod vector; -#[cfg(all(memchr_runtime_wasm128))] -mod wasm; -#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] -mod x86; +mod searcher; /// Returns an iterator over all non-overlapping occurrences of a substring in /// a haystack. @@ -258,7 +184,7 @@ pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( #[inline] pub fn find(haystack: &[u8], needle: &[u8]) -> Option { if haystack.len() < 64 { - rabinkarp::find(haystack, needle) + rabinkarp::Finder::new(needle).find(haystack, needle) } else { Finder::new(needle).find(haystack) } @@ -295,7 +221,7 @@ pub fn find(haystack: &[u8], needle: &[u8]) -> Option { #[inline] pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option { if haystack.len() < 64 { - rabinkarp::rfind(haystack, needle) + rabinkarp::FinderRev::new(needle).rfind(haystack, needle) } else { FinderRev::new(needle).rfind(haystack) } @@ -307,7 +233,7 @@ pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option { /// /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the /// needle. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct FindIter<'h, 'n> { haystack: &'h [u8], prestate: PrefilterState, @@ -321,7 +247,7 @@ impl<'h, 'n> FindIter<'h, 'n> { haystack: &'h [u8], finder: Finder<'n>, ) -> FindIter<'h, 'n> { - let prestate = finder.searcher.prefilter_state(); + let prestate = PrefilterState::new(); FindIter { haystack, prestate, finder, pos: 0 } } @@ -331,8 +257,8 @@ impl<'h, 'n> FindIter<'h, 'n> { /// If this is already an owned iterator, then this is a no-op. Otherwise, /// this copies the needle. /// - /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + /// This is only available when the `alloc` feature is enabled. + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> FindIter<'h, 'static> { FindIter { @@ -348,20 +274,32 @@ impl<'h, 'n> Iterator for FindIter<'h, 'n> { type Item = usize; fn next(&mut self) -> Option { - if self.pos > self.haystack.len() { - return None; - } - let result = self - .finder - .searcher - .find(&mut self.prestate, &self.haystack[self.pos..]); - match result { - None => None, - Some(i) => { - let pos = self.pos + i; - self.pos = pos + core::cmp::max(1, self.finder.needle().len()); - Some(pos) - } + let needle = self.finder.needle(); + let haystack = self.haystack.get(self.pos..)?; + let idx = + self.finder.searcher.find(&mut self.prestate, haystack, needle)?; + + let pos = self.pos + idx; + self.pos = pos + needle.len().max(1); + + Some(pos) + } + + fn size_hint(&self) -> (usize, Option) { + // The largest possible number of non-overlapping matches is the + // quotient of the haystack and the needle (or the length of the + // haystack, if the needle is empty) + match self.haystack.len().checked_sub(self.pos) { + None => (0, Some(0)), + Some(haystack_len) => match self.finder.needle().len() { + // Empty needles always succeed and match at every point + // (including the very end) + 0 => ( + haystack_len.saturating_add(1), + haystack_len.checked_add(1), + ), + needle_len => (0, Some(haystack_len / needle_len)), + }, } } } @@ -372,7 +310,7 @@ impl<'h, 'n> Iterator for FindIter<'h, 'n> { /// /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the /// needle. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct FindRevIter<'h, 'n> { haystack: &'h [u8], finder: FinderRev<'n>, @@ -398,7 +336,7 @@ impl<'h, 'n> FindRevIter<'h, 'n> { /// this copies the needle. /// /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> FindRevIter<'h, 'static> { FindRevIter { @@ -447,7 +385,8 @@ impl<'h, 'n> Iterator for FindRevIter<'h, 'n> { /// the lifetime of its needle. #[derive(Clone, Debug)] pub struct Finder<'n> { - searcher: Searcher<'n>, + needle: CowBytes<'n>, + searcher: Searcher, } impl<'n> Finder<'n> { @@ -481,8 +420,11 @@ impl<'n> Finder<'n> { /// assert_eq!(Some(4), Finder::new("bar").find(haystack)); /// assert_eq!(None, Finder::new("quux").find(haystack)); /// ``` + #[inline] pub fn find(&self, haystack: &[u8]) -> Option { - self.searcher.find(&mut self.searcher.prefilter_state(), haystack) + let mut prestate = PrefilterState::new(); + let needle = self.needle.as_slice(); + self.searcher.find(&mut prestate, haystack, needle) } /// Returns an iterator over all occurrences of a substring in a haystack. @@ -525,11 +467,14 @@ impl<'n> Finder<'n> { /// If this is already an owned finder, then this is a no-op. Otherwise, /// this copies the needle. /// - /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + /// This is only available when the `alloc` feature is enabled. + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> Finder<'static> { - Finder { searcher: self.searcher.into_owned() } + Finder { + needle: self.needle.into_owned(), + searcher: self.searcher.clone(), + } } /// Convert this finder into its borrowed variant. @@ -544,7 +489,10 @@ impl<'n> Finder<'n> { /// shorter of the two. #[inline] pub fn as_ref(&self) -> Finder<'_> { - Finder { searcher: self.searcher.as_ref() } + Finder { + needle: CowBytes::new(self.needle()), + searcher: self.searcher.clone(), + } } /// Returns the needle that this finder searches for. @@ -555,7 +503,7 @@ impl<'n> Finder<'n> { /// needle returned must necessarily be the shorter of the two. #[inline] pub fn needle(&self) -> &[u8] { - self.searcher.needle() + self.needle.as_slice() } } @@ -574,7 +522,8 @@ impl<'n> Finder<'n> { /// the lifetime of its needle. #[derive(Clone, Debug)] pub struct FinderRev<'n> { - searcher: SearcherRev<'n>, + needle: CowBytes<'n>, + searcher: SearcherRev, } impl<'n> FinderRev<'n> { @@ -612,7 +561,7 @@ impl<'n> FinderRev<'n> { /// assert_eq!(None, FinderRev::new("quux").rfind(haystack)); /// ``` pub fn rfind>(&self, haystack: B) -> Option { - self.searcher.rfind(haystack.as_ref()) + self.searcher.rfind(haystack.as_ref(), self.needle.as_slice()) } /// Returns a reverse iterator over all occurrences of a substring in a @@ -657,10 +606,13 @@ impl<'n> FinderRev<'n> { /// this copies the needle. /// /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> FinderRev<'static> { - FinderRev { searcher: self.searcher.into_owned() } + FinderRev { + needle: self.needle.into_owned(), + searcher: self.searcher.clone(), + } } /// Convert this finder into its borrowed variant. @@ -675,7 +627,10 @@ impl<'n> FinderRev<'n> { /// shorter of the two. #[inline] pub fn as_ref(&self) -> FinderRev<'_> { - FinderRev { searcher: self.searcher.as_ref() } + FinderRev { + needle: CowBytes::new(self.needle()), + searcher: self.searcher.clone(), + } } /// Returns the needle that this finder searches for. @@ -686,7 +641,7 @@ impl<'n> FinderRev<'n> { /// needle returned must necessarily be the shorter of the two. #[inline] pub fn needle(&self) -> &[u8] { - self.searcher.needle() + self.needle.as_slice() } } @@ -697,7 +652,7 @@ impl<'n> FinderRev<'n> { /// heuristic prefilters used to speed up certain searches. #[derive(Clone, Debug, Default)] pub struct FinderBuilder { - config: SearcherConfig, + prefilter: Prefilter, } impl FinderBuilder { @@ -712,7 +667,26 @@ impl FinderBuilder { &self, needle: &'n B, ) -> Finder<'n> { - Finder { searcher: Searcher::new(self.config, needle.as_ref()) } + self.build_forward_with_ranker(DefaultFrequencyRank, needle) + } + + /// Build a forward finder using the given needle and a custom heuristic for + /// determining the frequency of a given byte in the dataset. + /// See [`HeuristicFrequencyRank`] for more details. + pub fn build_forward_with_ranker< + 'n, + R: HeuristicFrequencyRank, + B: ?Sized + AsRef<[u8]>, + >( + &self, + ranker: R, + needle: &'n B, + ) -> Finder<'n> { + let needle = needle.as_ref(); + Finder { + needle: CowBytes::new(needle), + searcher: Searcher::new(self.prefilter, ranker, needle), + } } /// Build a reverse finder using the given needle from the current @@ -721,7 +695,11 @@ impl FinderBuilder { &self, needle: &'n B, ) -> FinderRev<'n> { - FinderRev { searcher: SearcherRev::new(needle.as_ref()) } + let needle = needle.as_ref(); + FinderRev { + needle: CowBytes::new(needle), + searcher: SearcherRev::new(needle), + } } /// Configure the prefilter setting for the finder. @@ -729,593 +707,31 @@ impl FinderBuilder { /// See the documentation for [`Prefilter`] for more discussion on why /// you might want to configure this. pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder { - self.config.prefilter = prefilter; + self.prefilter = prefilter; self } } -/// The internal implementation of a forward substring searcher. -/// -/// The reality is that this is a "meta" searcher. Namely, depending on a -/// variety of parameters (CPU support, target, needle size, haystack size and -/// even dynamic properties such as prefilter effectiveness), the actual -/// algorithm employed to do substring search may change. -#[derive(Clone, Debug)] -struct Searcher<'n> { - /// The actual needle we're searching for. - /// - /// A CowBytes is like a Cow<[u8]>, except in no_std environments, it is - /// specialized to a single variant (the borrowed form). - needle: CowBytes<'n>, - /// A collection of facts computed on the needle that are useful for more - /// than one substring search algorithm. - ninfo: NeedleInfo, - /// A prefilter function, if it was deemed appropriate. - /// - /// Some substring search implementations (like Two-Way) benefit greatly - /// if we can quickly find candidate starting positions for a match. - prefn: Option, - /// The actual substring implementation in use. - kind: SearcherKind, -} - -/// A collection of facts computed about a search needle. -/// -/// We group these things together because it's useful to be able to hand them -/// to prefilters or substring algorithms that want them. -#[derive(Clone, Copy, Debug)] -pub(crate) struct NeedleInfo { - /// The offsets of "rare" bytes detected in the needle. - /// - /// This is meant to be a heuristic in order to maximize the effectiveness - /// of vectorized code. Namely, vectorized code tends to focus on only - /// one or two bytes. If we pick bytes from the needle that occur - /// infrequently, then more time will be spent in the vectorized code and - /// will likely make the overall search (much) faster. - /// - /// Of course, this is only a heuristic based on a background frequency - /// distribution of bytes. But it tends to work very well in practice. - pub(crate) rarebytes: RareNeedleBytes, - /// A Rabin-Karp hash of the needle. - /// - /// This is store here instead of in a more specific Rabin-Karp search - /// since Rabin-Karp may be used even if another SearchKind corresponds - /// to some other search implementation. e.g., If measurements suggest RK - /// is faster in some cases or if a search implementation can't handle - /// particularly small haystack. (Moreover, we cannot use RK *generally*, - /// since its worst case time is multiplicative. Instead, we only use it - /// some small haystacks, where "small" is a constant.) - pub(crate) nhash: NeedleHash, -} - -/// Configuration for substring search. -#[derive(Clone, Copy, Debug, Default)] -struct SearcherConfig { - /// This permits changing the behavior of the prefilter, since it can have - /// a variable impact on performance. - prefilter: Prefilter, -} - -#[derive(Clone, Debug)] -enum SearcherKind { - /// A special case for empty needles. An empty needle always matches, even - /// in an empty haystack. - Empty, - /// This is used whenever the needle is a single byte. In this case, we - /// always use memchr. - OneByte(u8), - /// Two-Way is the generic work horse and is what provides our additive - /// linear time guarantee. In general, it's used when the needle is bigger - /// than 8 bytes or so. - TwoWay(twoway::Forward), - #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] - GenericSIMD128(x86::sse::Forward), - #[cfg(memchr_runtime_wasm128)] - GenericSIMD128(wasm::Forward), - #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] - GenericSIMD256(x86::avx::Forward), -} - -impl<'n> Searcher<'n> { - fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> { - use self::SearcherKind::*; - - let ninfo = NeedleInfo::new(needle); - let mk = |kind: SearcherKind| { - let prefn = prefilter::forward( - &config.prefilter, - &ninfo.rarebytes, - needle, - ); - Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind } - }; - if needle.len() == 0 { - return mk(Empty); - } - if needle.len() == 1 { - return mk(OneByte(needle[0])); - } - #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] - { - if let Some(fwd) = x86::avx::Forward::new(&ninfo, needle) { - return mk(GenericSIMD256(fwd)); - } else if let Some(fwd) = x86::sse::Forward::new(&ninfo, needle) { - return mk(GenericSIMD128(fwd)); - } - } - #[cfg(all(target_arch = "wasm32", memchr_runtime_simd))] - { - if let Some(fwd) = wasm::Forward::new(&ninfo, needle) { - return mk(GenericSIMD128(fwd)); - } - } - - mk(TwoWay(twoway::Forward::new(needle))) - } - - /// Return a fresh prefilter state that can be used with this searcher. - /// A prefilter state is used to track the effectiveness of a searcher's - /// prefilter for speeding up searches. Therefore, the prefilter state - /// should generally be reused on subsequent searches (such as in an - /// iterator). For searches on a different haystack, then a new prefilter - /// state should be used. - /// - /// This always initializes a valid (but possibly inert) prefilter state - /// even if this searcher does not have a prefilter enabled. - fn prefilter_state(&self) -> PrefilterState { - if self.prefn.is_none() { - PrefilterState::inert() - } else { - PrefilterState::new() - } - } - - fn needle(&self) -> &[u8] { - self.needle.as_slice() - } - - fn as_ref(&self) -> Searcher<'_> { - use self::SearcherKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - #[cfg(all(not(miri), memchr_runtime_simd))] - GenericSIMD128(gs) => GenericSIMD128(gs), - #[cfg(all( - not(miri), - target_arch = "x86_64", - memchr_runtime_simd - ))] - GenericSIMD256(gs) => GenericSIMD256(gs), - }; - Searcher { - needle: CowBytes::new(self.needle()), - ninfo: self.ninfo, - prefn: self.prefn, - kind, - } - } - - #[cfg(feature = "std")] - fn into_owned(self) -> Searcher<'static> { - use self::SearcherKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - #[cfg(all(not(miri), memchr_runtime_simd))] - GenericSIMD128(gs) => GenericSIMD128(gs), - #[cfg(all( - not(miri), - target_arch = "x86_64", - memchr_runtime_simd - ))] - GenericSIMD256(gs) => GenericSIMD256(gs), - }; - Searcher { - needle: self.needle.into_owned(), - ninfo: self.ninfo, - prefn: self.prefn, - kind, - } - } - - /// Implements forward substring search by selecting the implementation - /// chosen at construction and executing it on the given haystack with the - /// prefilter's current state of effectiveness. - #[inline(always)] - fn find( - &self, - state: &mut PrefilterState, - haystack: &[u8], - ) -> Option { - use self::SearcherKind::*; - - let needle = self.needle(); - if haystack.len() < needle.len() { - return None; - } - match self.kind { - Empty => Some(0), - OneByte(b) => crate::memchr(b, haystack), - TwoWay(ref tw) => { - // For very short haystacks (e.g., where the prefilter probably - // can't run), it's faster to just run RK. - if rabinkarp::is_fast(haystack, needle) { - rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) - } else { - self.find_tw(tw, state, haystack, needle) - } - } - #[cfg(all(not(miri), memchr_runtime_simd))] - GenericSIMD128(ref gs) => { - // The SIMD matcher can't handle particularly short haystacks, - // so we fall back to RK in these cases. - if haystack.len() < gs.min_haystack_len() { - rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) - } else { - gs.find(haystack, needle) - } - } - #[cfg(all( - not(miri), - target_arch = "x86_64", - memchr_runtime_simd - ))] - GenericSIMD256(ref gs) => { - // The SIMD matcher can't handle particularly short haystacks, - // so we fall back to RK in these cases. - if haystack.len() < gs.min_haystack_len() { - rabinkarp::find_with(&self.ninfo.nhash, haystack, needle) - } else { - gs.find(haystack, needle) - } - } - } - } - - /// Calls Two-Way on the given haystack/needle. - /// - /// This is marked as unlineable since it seems to have a better overall - /// effect on benchmarks. However, this is one of those cases where - /// inlining it results an improvement in other benchmarks too, so I - /// suspect we just don't have enough data yet to make the right call here. - /// - /// I suspect the main problem is that this function contains two different - /// inlined copies of Two-Way: one with and one without prefilters enabled. - #[inline(never)] - fn find_tw( - &self, - tw: &twoway::Forward, - state: &mut PrefilterState, - haystack: &[u8], - needle: &[u8], - ) -> Option { - if let Some(prefn) = self.prefn { - // We used to look at the length of a haystack here. That is, if - // it was too small, then don't bother with the prefilter. But two - // things changed: the prefilter falls back to memchr for small - // haystacks, and, above, Rabin-Karp is employed for tiny haystacks - // anyway. - if state.is_effective() { - let mut pre = Pre { state, prefn, ninfo: &self.ninfo }; - return tw.find(Some(&mut pre), haystack, needle); - } - } - tw.find(None, haystack, needle) - } -} - -impl NeedleInfo { - pub(crate) fn new(needle: &[u8]) -> NeedleInfo { - NeedleInfo { - rarebytes: RareNeedleBytes::forward(needle), - nhash: NeedleHash::forward(needle), - } - } -} - -/// The internal implementation of a reverse substring searcher. -/// -/// See the forward searcher docs for more details. Currently, the reverse -/// searcher is considerably simpler since it lacks prefilter support. This -/// was done because it adds a lot of code, and more surface area to test. And -/// in particular, it's not clear whether a prefilter on reverse searching is -/// worth it. (If you have a compelling use case, please file an issue!) -#[derive(Clone, Debug)] -struct SearcherRev<'n> { - /// The actual needle we're searching for. - needle: CowBytes<'n>, - /// A Rabin-Karp hash of the needle. - nhash: NeedleHash, - /// The actual substring implementation in use. - kind: SearcherRevKind, -} - -#[derive(Clone, Debug)] -enum SearcherRevKind { - /// A special case for empty needles. An empty needle always matches, even - /// in an empty haystack. - Empty, - /// This is used whenever the needle is a single byte. In this case, we - /// always use memchr. - OneByte(u8), - /// Two-Way is the generic work horse and is what provides our additive - /// linear time guarantee. In general, it's used when the needle is bigger - /// than 8 bytes or so. - TwoWay(twoway::Reverse), -} - -impl<'n> SearcherRev<'n> { - fn new(needle: &'n [u8]) -> SearcherRev<'n> { - use self::SearcherRevKind::*; - - let kind = if needle.len() == 0 { - Empty - } else if needle.len() == 1 { - OneByte(needle[0]) - } else { - TwoWay(twoway::Reverse::new(needle)) - }; - SearcherRev { - needle: CowBytes::new(needle), - nhash: NeedleHash::reverse(needle), - kind, - } - } - - fn needle(&self) -> &[u8] { - self.needle.as_slice() - } - - fn as_ref(&self) -> SearcherRev<'_> { - use self::SearcherRevKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - }; - SearcherRev { - needle: CowBytes::new(self.needle()), - nhash: self.nhash, - kind, - } - } - - #[cfg(feature = "std")] - fn into_owned(self) -> SearcherRev<'static> { - use self::SearcherRevKind::*; - - let kind = match self.kind { - Empty => Empty, - OneByte(b) => OneByte(b), - TwoWay(tw) => TwoWay(tw), - }; - SearcherRev { - needle: self.needle.into_owned(), - nhash: self.nhash, - kind, - } - } - - /// Implements reverse substring search by selecting the implementation - /// chosen at construction and executing it on the given haystack with the - /// prefilter's current state of effectiveness. - #[inline(always)] - fn rfind(&self, haystack: &[u8]) -> Option { - use self::SearcherRevKind::*; - - let needle = self.needle(); - if haystack.len() < needle.len() { - return None; - } - match self.kind { - Empty => Some(haystack.len()), - OneByte(b) => crate::memrchr(b, haystack), - TwoWay(ref tw) => { - // For very short haystacks (e.g., where the prefilter probably - // can't run), it's faster to just run RK. - if rabinkarp::is_fast(haystack, needle) { - rabinkarp::rfind_with(&self.nhash, haystack, needle) - } else { - tw.rfind(haystack, needle) - } - } - } - } -} - -/// This module defines some generic quickcheck properties useful for testing -/// any substring search algorithm. It also runs those properties for the -/// top-level public API memmem routines. (The properties are also used to -/// test various substring search implementations more granularly elsewhere as -/// well.) -#[cfg(all(test, feature = "std", not(miri)))] -mod proptests { - // N.B. This defines the quickcheck tests using the properties defined - // below. Because of macro-visibility weirdness, the actual macro is - // defined at the top of this file. - define_memmem_quickcheck_tests!(super::find, super::rfind); - - /// Check that every prefix of the given byte string is a substring. - pub(crate) fn prefix_is_substring( - reverse: bool, - bs: &[u8], - mut search: impl FnMut(&[u8], &[u8]) -> Option, - ) -> bool { - if bs.is_empty() { - return true; - } - for i in 0..(bs.len() - 1) { - let prefix = &bs[..i]; - if reverse { - assert_eq!(naive_rfind(bs, prefix), search(bs, prefix)); - } else { - assert_eq!(naive_find(bs, prefix), search(bs, prefix)); - } - } - true - } - - /// Check that every suffix of the given byte string is a substring. - pub(crate) fn suffix_is_substring( - reverse: bool, - bs: &[u8], - mut search: impl FnMut(&[u8], &[u8]) -> Option, - ) -> bool { - if bs.is_empty() { - return true; - } - for i in 0..(bs.len() - 1) { - let suffix = &bs[i..]; - if reverse { - assert_eq!(naive_rfind(bs, suffix), search(bs, suffix)); - } else { - assert_eq!(naive_find(bs, suffix), search(bs, suffix)); - } - } - true - } - - /// Check that naive substring search matches the result of the given search - /// algorithm. - pub(crate) fn matches_naive( - reverse: bool, - haystack: &[u8], - needle: &[u8], - mut search: impl FnMut(&[u8], &[u8]) -> Option, - ) -> bool { - if reverse { - naive_rfind(haystack, needle) == search(haystack, needle) - } else { - naive_find(haystack, needle) == search(haystack, needle) - } - } - - /// Naively search forwards for the given needle in the given haystack. - fn naive_find(haystack: &[u8], needle: &[u8]) -> Option { - if needle.is_empty() { - return Some(0); - } else if haystack.len() < needle.len() { - return None; - } - for i in 0..(haystack.len() - needle.len() + 1) { - if needle == &haystack[i..i + needle.len()] { - return Some(i); - } - } - None - } - - /// Naively search in reverse for the given needle in the given haystack. - fn naive_rfind(haystack: &[u8], needle: &[u8]) -> Option { - if needle.is_empty() { - return Some(haystack.len()); - } else if haystack.len() < needle.len() { - return None; - } - for i in (0..(haystack.len() - needle.len() + 1)).rev() { - if needle == &haystack[i..i + needle.len()] { - return Some(i); - } - } - None - } -} - -/// This module defines some hand-written "simple" substring tests. It -/// also provides routines for easily running them on any substring search -/// implementation. #[cfg(test)] -mod testsimples { - define_memmem_simple_tests!(super::find, super::rfind); - - /// Each test is a (needle, haystack, expected_fwd, expected_rev) tuple. - type SearchTest = - (&'static str, &'static str, Option, Option); - - const SEARCH_TESTS: &'static [SearchTest] = &[ - ("", "", Some(0), Some(0)), - ("", "a", Some(0), Some(1)), - ("", "ab", Some(0), Some(2)), - ("", "abc", Some(0), Some(3)), - ("a", "", None, None), - ("a", "a", Some(0), Some(0)), - ("a", "aa", Some(0), Some(1)), - ("a", "ba", Some(1), Some(1)), - ("a", "bba", Some(2), Some(2)), - ("a", "bbba", Some(3), Some(3)), - ("a", "bbbab", Some(3), Some(3)), - ("a", "bbbabb", Some(3), Some(3)), - ("a", "bbbabbb", Some(3), Some(3)), - ("a", "bbbbbb", None, None), - ("ab", "", None, None), - ("ab", "a", None, None), - ("ab", "b", None, None), - ("ab", "ab", Some(0), Some(0)), - ("ab", "aab", Some(1), Some(1)), - ("ab", "aaab", Some(2), Some(2)), - ("ab", "abaab", Some(0), Some(3)), - ("ab", "baaab", Some(3), Some(3)), - ("ab", "acb", None, None), - ("ab", "abba", Some(0), Some(0)), - ("abc", "ab", None, None), - ("abc", "abc", Some(0), Some(0)), - ("abc", "abcz", Some(0), Some(0)), - ("abc", "abczz", Some(0), Some(0)), - ("abc", "zabc", Some(1), Some(1)), - ("abc", "zzabc", Some(2), Some(2)), - ("abc", "azbc", None, None), - ("abc", "abzc", None, None), - ("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)), - ("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)), - ("xyz", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxyz", Some(32), Some(32)), - // Failures caught by quickcheck. - ("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)), - ("\u{0}\u{1e}", "\u{1e}\u{0}", None, None), - ]; - - /// Run the substring search tests. `search` should be a closure that - /// accepts a haystack and a needle and returns the starting position - /// of the first occurrence of needle in the haystack, or `None` if one - /// doesn't exist. - pub(crate) fn run_search_tests_fwd( - mut search: impl FnMut(&[u8], &[u8]) -> Option, - ) { - for &(needle, haystack, expected_fwd, _) in SEARCH_TESTS { - let (n, h) = (needle.as_bytes(), haystack.as_bytes()); - assert_eq!( - expected_fwd, - search(h, n), - "needle: {:?}, haystack: {:?}, expected: {:?}", - n, - h, - expected_fwd - ); - } - } - - /// Run the substring search tests. `search` should be a closure that - /// accepts a haystack and a needle and returns the starting position of - /// the last occurrence of needle in the haystack, or `None` if one doesn't - /// exist. - pub(crate) fn run_search_tests_rev( - mut search: impl FnMut(&[u8], &[u8]) -> Option, - ) { - for &(needle, haystack, _, expected_rev) in SEARCH_TESTS { - let (n, h) = (needle.as_bytes(), haystack.as_bytes()); - assert_eq!( - expected_rev, - search(h, n), - "needle: {:?}, haystack: {:?}, expected: {:?}", - n, - h, - expected_rev - ); - } +mod tests { + use super::*; + + define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n).find(h))); + define_substring_reverse_quickcheck!(|h, n| Some( + FinderRev::new(n).rfind(h) + )); + + #[test] + fn forward() { + crate::tests::substring::Runner::new() + .fwd(|h, n| Some(Finder::new(n).find(h))) + .run(); + } + + #[test] + fn reverse() { + crate::tests::substring::Runner::new() + .rev(|h, n| Some(FinderRev::new(n).rfind(h))) + .run(); } } diff --git a/vendor/memchr/src/memmem/prefilter/fallback.rs b/vendor/memchr/src/memmem/prefilter/fallback.rs deleted file mode 100644 index ae1bbcc..0000000 --- a/vendor/memchr/src/memmem/prefilter/fallback.rs +++ /dev/null @@ -1,122 +0,0 @@ -/* -This module implements a "fallback" prefilter that only relies on memchr to -function. While memchr works best when it's explicitly vectorized, its -fallback implementations are fast enough to make a prefilter like this -worthwhile. - -The essence of this implementation is to identify two rare bytes in a needle -based on a background frequency distribution of bytes. We then run memchr on the -rarer byte. For each match, we use the second rare byte as a guard to quickly -check if a match is possible. If the position passes the guard test, then we do -a naive memcmp to confirm the match. - -In practice, this formulation works amazingly well, primarily because of the -heuristic use of a background frequency distribution. However, it does have a -number of weaknesses where it can get quite slow when its background frequency -distribution doesn't line up with the haystack being searched. This is why we -have specialized vector routines that essentially take this idea and move the -guard check into vectorized code. (Those specialized vector routines do still -make use of the background frequency distribution of bytes though.) - -This fallback implementation was originally formulated in regex many moons ago: -https://github.com/rust-lang/regex/blob/3db8722d0b204a85380fe2a65e13d7065d7dd968/src/literal/imp.rs#L370-L501 -Prior to that, I'm not aware of anyone using this technique in any prominent -substring search implementation. Although, I'm sure folks have had this same -insight long before me. - -Another version of this also appeared in bstr: -https://github.com/BurntSushi/bstr/blob/a444256ca7407fe180ee32534688549655b7a38e/src/search/prefilter.rs#L83-L340 -*/ - -use crate::memmem::{ - prefilter::{PrefilterFnTy, PrefilterState}, - NeedleInfo, -}; - -// Check that the functions below satisfy the Prefilter function type. -const _: PrefilterFnTy = find; - -/// Look for a possible occurrence of needle. The position returned -/// corresponds to the beginning of the occurrence, if one exists. -/// -/// Callers may assume that this never returns false negatives (i.e., it -/// never misses an actual occurrence), but must check that the returned -/// position corresponds to a match. That is, it can return false -/// positives. -/// -/// This should only be used when Freqy is constructed for forward -/// searching. -pub(crate) fn find( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option { - let mut i = 0; - let (rare1i, rare2i) = ninfo.rarebytes.as_rare_usize(); - let (rare1, rare2) = ninfo.rarebytes.as_rare_bytes(needle); - while prestate.is_effective() { - // Use a fast vectorized implementation to skip to the next - // occurrence of the rarest byte (heuristically chosen) in the - // needle. - let found = crate::memchr(rare1, &haystack[i..])?; - prestate.update(found); - i += found; - - // If we can't align our first match with the haystack, then a - // match is impossible. - if i < rare1i { - i += 1; - continue; - } - - // Align our rare2 byte with the haystack. A mismatch means that - // a match is impossible. - let aligned_rare2i = i - rare1i + rare2i; - if haystack.get(aligned_rare2i) != Some(&rare2) { - i += 1; - continue; - } - - // We've done what we can. There might be a match here. - return Some(i - rare1i); - } - // The only way we get here is if we believe our skipping heuristic - // has become ineffective. We're allowed to return false positives, - // so return the position at which we advanced to, aligned to the - // haystack. - Some(i.saturating_sub(rare1i)) -} - -#[cfg(all(test, feature = "std"))] -mod tests { - use super::*; - - fn freqy_find(haystack: &[u8], needle: &[u8]) -> Option { - let ninfo = NeedleInfo::new(needle); - let mut prestate = PrefilterState::new(); - find(&mut prestate, &ninfo, haystack, needle) - } - - #[test] - fn freqy_forward() { - assert_eq!(Some(0), freqy_find(b"BARFOO", b"BAR")); - assert_eq!(Some(3), freqy_find(b"FOOBAR", b"BAR")); - assert_eq!(Some(0), freqy_find(b"zyzz", b"zyzy")); - assert_eq!(Some(2), freqy_find(b"zzzy", b"zyzy")); - assert_eq!(None, freqy_find(b"zazb", b"zyzy")); - assert_eq!(Some(0), freqy_find(b"yzyy", b"yzyz")); - assert_eq!(Some(2), freqy_find(b"yyyz", b"yzyz")); - assert_eq!(None, freqy_find(b"yayb", b"yzyz")); - } - - #[test] - #[cfg(not(miri))] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - - // SAFETY: super::find is safe to call for all inputs and on all - // platforms. - unsafe { PrefilterTest::run_all_tests(super::find) }; - } -} diff --git a/vendor/memchr/src/memmem/prefilter/genericsimd.rs b/vendor/memchr/src/memmem/prefilter/genericsimd.rs deleted file mode 100644 index 1a6e387..0000000 --- a/vendor/memchr/src/memmem/prefilter/genericsimd.rs +++ /dev/null @@ -1,207 +0,0 @@ -use core::mem::size_of; - -use crate::memmem::{ - prefilter::{PrefilterFnTy, PrefilterState}, - vector::Vector, - NeedleInfo, -}; - -/// The implementation of the forward vector accelerated candidate finder. -/// -/// This is inspired by the "generic SIMD" algorithm described here: -/// http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd -/// -/// The main difference is that this is just a prefilter. That is, it reports -/// candidates once they are seen and doesn't attempt to confirm them. Also, -/// the bytes this routine uses to check for candidates are selected based on -/// an a priori background frequency distribution. This means that on most -/// haystacks, this will on average spend more time in vectorized code than you -/// would if you just selected the first and last bytes of the needle. -/// -/// Note that a non-prefilter variant of this algorithm can be found in the -/// parent module, but it only works on smaller needles. -/// -/// `prestate`, `ninfo`, `haystack` and `needle` are the four prefilter -/// function parameters. `fallback` is a prefilter that is used if the haystack -/// is too small to be handled with the given vector size. -/// -/// This routine is not safe because it is intended for callers to specialize -/// this with a particular vector (e.g., __m256i) and then call it with the -/// relevant target feature (e.g., avx2) enabled. -/// -/// # Panics -/// -/// If `needle.len() <= 1`, then this panics. -/// -/// # Safety -/// -/// Since this is meant to be used with vector functions, callers need to -/// specialize this inside of a function with a `target_feature` attribute. -/// Therefore, callers must ensure that whatever target feature is being used -/// supports the vector functions that this function is specialized for. (For -/// the specific vector functions used, see the Vector trait implementations.) -#[inline(always)] -pub(crate) unsafe fn find( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], - fallback: PrefilterFnTy, -) -> Option { - assert!(needle.len() >= 2, "needle must be at least 2 bytes"); - let (rare1i, rare2i) = ninfo.rarebytes.as_rare_ordered_usize(); - let min_haystack_len = rare2i + size_of::(); - if haystack.len() < min_haystack_len { - return fallback(prestate, ninfo, haystack, needle); - } - - let start_ptr = haystack.as_ptr(); - let end_ptr = start_ptr.add(haystack.len()); - let max_ptr = end_ptr.sub(min_haystack_len); - let mut ptr = start_ptr; - - let rare1chunk = V::splat(needle[rare1i]); - let rare2chunk = V::splat(needle[rare2i]); - - // N.B. I did experiment with unrolling the loop to deal with size(V) - // bytes at a time and 2*size(V) bytes at a time. The double unroll - // was marginally faster while the quadruple unroll was unambiguously - // slower. In the end, I decided the complexity from unrolling wasn't - // worth it. I used the memmem/krate/prebuilt/huge-en/ benchmarks to - // compare. - while ptr <= max_ptr { - let m = find_in_chunk2(ptr, rare1i, rare2i, rare1chunk, rare2chunk); - if let Some(chunki) = m { - return Some(matched(prestate, start_ptr, ptr, chunki)); - } - ptr = ptr.add(size_of::()); - } - if ptr < end_ptr { - // This routine immediately quits if a candidate match is found. - // That means that if we're here, no candidate matches have been - // found at or before 'ptr'. Thus, we don't need to mask anything - // out even though we might technically search part of the haystack - // that we've already searched (because we know it can't match). - ptr = max_ptr; - let m = find_in_chunk2(ptr, rare1i, rare2i, rare1chunk, rare2chunk); - if let Some(chunki) = m { - return Some(matched(prestate, start_ptr, ptr, chunki)); - } - } - prestate.update(haystack.len()); - None -} - -// Below are two different techniques for checking whether a candidate -// match exists in a given chunk or not. find_in_chunk2 checks two bytes -// where as find_in_chunk3 checks three bytes. The idea behind checking -// three bytes is that while we do a bit more work per iteration, we -// decrease the chances of a false positive match being reported and thus -// make the search faster overall. This actually works out for the -// memmem/krate/prebuilt/huge-en/never-all-common-bytes benchmark, where -// using find_in_chunk3 is about 25% faster than find_in_chunk2. However, -// it turns out that find_in_chunk2 is faster for all other benchmarks, so -// perhaps the extra check isn't worth it in practice. -// -// For now, we go with find_in_chunk2, but we leave find_in_chunk3 around -// to make it easy to switch to and benchmark when possible. - -/// Search for an occurrence of two rare bytes from the needle in the current -/// chunk pointed to by ptr. -/// -/// rare1chunk and rare2chunk correspond to vectors with the rare1 and rare2 -/// bytes repeated in each 8-bit lane, respectively. -/// -/// # Safety -/// -/// It must be safe to do an unaligned read of size(V) bytes starting at both -/// (ptr + rare1i) and (ptr + rare2i). -#[inline(always)] -unsafe fn find_in_chunk2( - ptr: *const u8, - rare1i: usize, - rare2i: usize, - rare1chunk: V, - rare2chunk: V, -) -> Option { - let chunk0 = V::load_unaligned(ptr.add(rare1i)); - let chunk1 = V::load_unaligned(ptr.add(rare2i)); - - let eq0 = chunk0.cmpeq(rare1chunk); - let eq1 = chunk1.cmpeq(rare2chunk); - - let match_offsets = eq0.and(eq1).movemask(); - if match_offsets == 0 { - return None; - } - Some(match_offsets.trailing_zeros() as usize) -} - -/// Search for an occurrence of two rare bytes and the first byte (even if one -/// of the rare bytes is equivalent to the first byte) from the needle in the -/// current chunk pointed to by ptr. -/// -/// firstchunk, rare1chunk and rare2chunk correspond to vectors with the first, -/// rare1 and rare2 bytes repeated in each 8-bit lane, respectively. -/// -/// # Safety -/// -/// It must be safe to do an unaligned read of size(V) bytes starting at ptr, -/// (ptr + rare1i) and (ptr + rare2i). -#[allow(dead_code)] -#[inline(always)] -unsafe fn find_in_chunk3( - ptr: *const u8, - rare1i: usize, - rare2i: usize, - firstchunk: V, - rare1chunk: V, - rare2chunk: V, -) -> Option { - let chunk0 = V::load_unaligned(ptr); - let chunk1 = V::load_unaligned(ptr.add(rare1i)); - let chunk2 = V::load_unaligned(ptr.add(rare2i)); - - let eq0 = chunk0.cmpeq(firstchunk); - let eq1 = chunk1.cmpeq(rare1chunk); - let eq2 = chunk2.cmpeq(rare2chunk); - - let match_offsets = eq0.and(eq1).and(eq2).movemask(); - if match_offsets == 0 { - return None; - } - Some(match_offsets.trailing_zeros() as usize) -} - -/// Accepts a chunk-relative offset and returns a haystack relative offset -/// after updating the prefilter state. -/// -/// Why do we use this unlineable function when a search completes? Well, -/// I don't know. Really. Obviously this function was not here initially. -/// When doing profiling, the codegen for the inner loop here looked bad and -/// I didn't know why. There were a couple extra 'add' instructions and an -/// extra 'lea' instruction that I couldn't explain. I hypothesized that the -/// optimizer was having trouble untangling the hot code in the loop from the -/// code that deals with a candidate match. By putting the latter into an -/// unlineable function, it kind of forces the issue and it had the intended -/// effect: codegen improved measurably. It's good for a ~10% improvement -/// across the board on the memmem/krate/prebuilt/huge-en/ benchmarks. -#[cold] -#[inline(never)] -fn matched( - prestate: &mut PrefilterState, - start_ptr: *const u8, - ptr: *const u8, - chunki: usize, -) -> usize { - let found = diff(ptr, start_ptr) + chunki; - prestate.update(found); - found -} - -/// Subtract `b` from `a` and return the difference. `a` must be greater than -/// or equal to `b`. -fn diff(a: *const u8, b: *const u8) -> usize { - debug_assert!(a >= b); - (a as usize) - (b as usize) -} diff --git a/vendor/memchr/src/memmem/prefilter/mod.rs b/vendor/memchr/src/memmem/prefilter/mod.rs deleted file mode 100644 index 015d3b2..0000000 --- a/vendor/memchr/src/memmem/prefilter/mod.rs +++ /dev/null @@ -1,570 +0,0 @@ -use crate::memmem::{rarebytes::RareNeedleBytes, NeedleInfo}; - -mod fallback; -#[cfg(memchr_runtime_simd)] -mod genericsimd; -#[cfg(all(not(miri), target_arch = "wasm32", memchr_runtime_simd))] -mod wasm; -#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] -mod x86; - -/// The maximum frequency rank permitted for the fallback prefilter. If the -/// rarest byte in the needle has a frequency rank above this value, then no -/// prefilter is used if the fallback prefilter would otherwise be selected. -const MAX_FALLBACK_RANK: usize = 250; - -/// A combination of prefilter effectiveness state, the prefilter function and -/// the needle info required to run a prefilter. -/// -/// For the most part, these are grouped into a single type for convenience, -/// instead of needing to pass around all three as distinct function -/// parameters. -pub(crate) struct Pre<'a> { - /// State that tracks the effectiveness of a prefilter. - pub(crate) state: &'a mut PrefilterState, - /// The actual prefilter function. - pub(crate) prefn: PrefilterFn, - /// Information about a needle, such as its RK hash and rare byte offsets. - pub(crate) ninfo: &'a NeedleInfo, -} - -impl<'a> Pre<'a> { - /// Call this prefilter on the given haystack with the given needle. - #[inline(always)] - pub(crate) fn call( - &mut self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - self.prefn.call(self.state, self.ninfo, haystack, needle) - } - - /// Return true if and only if this prefilter should be used. - #[inline(always)] - pub(crate) fn should_call(&mut self) -> bool { - self.state.is_effective() - } -} - -/// A prefilter function. -/// -/// A prefilter function describes both forward and reverse searches. -/// (Although, we don't currently implement prefilters for reverse searching.) -/// In the case of a forward search, the position returned corresponds to -/// the starting offset of a match (confirmed or possible). Its minimum -/// value is `0`, and its maximum value is `haystack.len() - 1`. In the case -/// of a reverse search, the position returned corresponds to the position -/// immediately after a match (confirmed or possible). Its minimum value is `1` -/// and its maximum value is `haystack.len()`. -/// -/// In both cases, the position returned is the starting (or ending) point of a -/// _possible_ match. That is, returning a false positive is okay. A prefilter, -/// however, must never return any false negatives. That is, if a match exists -/// at a particular position `i`, then a prefilter _must_ return that position. -/// It cannot skip past it. -/// -/// # Safety -/// -/// A prefilter function is not safe to create, since not all prefilters are -/// safe to call in all contexts. (e.g., A prefilter that uses AVX instructions -/// may only be called on x86_64 CPUs with the relevant AVX feature enabled.) -/// Thus, callers must ensure that when a prefilter function is created that it -/// is safe to call for the current environment. -#[derive(Clone, Copy)] -pub(crate) struct PrefilterFn(PrefilterFnTy); - -/// The type of a prefilter function. All prefilters must satisfy this -/// signature. -/// -/// Using a function pointer like this does inhibit inlining, but it does -/// eliminate branching and the extra costs associated with copying a larger -/// enum. Note also, that using Box can't really work -/// here, since we want to work in contexts that don't have dynamic memory -/// allocation. Moreover, in the default configuration of this crate on x86_64 -/// CPUs released in the past ~decade, we will use an AVX2-optimized prefilter, -/// which generally won't be inlineable into the surrounding code anyway. -/// (Unless AVX2 is enabled at compile time, but this is typically rare, since -/// it produces a non-portable binary.) -pub(crate) type PrefilterFnTy = unsafe fn( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option; - -// If the haystack is too small for SSE2, then just run memchr on the -// rarest byte and be done with it. (It is likely that this code path is -// rarely exercised, since a higher level routine will probably dispatch to -// Rabin-Karp for such a small haystack.) -#[cfg(memchr_runtime_simd)] -fn simple_memchr_fallback( - _prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option { - let (rare, _) = ninfo.rarebytes.as_rare_ordered_usize(); - crate::memchr(needle[rare], haystack).map(|i| i.saturating_sub(rare)) -} - -impl PrefilterFn { - /// Create a new prefilter function from the function pointer given. - /// - /// # Safety - /// - /// Callers must ensure that the given prefilter function is safe to call - /// for all inputs in the current environment. For example, if the given - /// prefilter function uses AVX instructions, then the caller must ensure - /// that the appropriate AVX CPU features are enabled. - pub(crate) unsafe fn new(prefn: PrefilterFnTy) -> PrefilterFn { - PrefilterFn(prefn) - } - - /// Call the underlying prefilter function with the given arguments. - pub fn call( - self, - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], - ) -> Option { - // SAFETY: Callers have the burden of ensuring that a prefilter - // function is safe to call for all inputs in the current environment. - unsafe { (self.0)(prestate, ninfo, haystack, needle) } - } -} - -impl core::fmt::Debug for PrefilterFn { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - "".fmt(f) - } -} - -/// Prefilter controls whether heuristics are used to accelerate searching. -/// -/// A prefilter refers to the idea of detecting candidate matches very quickly, -/// and then confirming whether those candidates are full matches. This -/// idea can be quite effective since it's often the case that looking for -/// candidates can be a lot faster than running a complete substring search -/// over the entire input. Namely, looking for candidates can be done with -/// extremely fast vectorized code. -/// -/// The downside of a prefilter is that it assumes false positives (which are -/// candidates generated by a prefilter that aren't matches) are somewhat rare -/// relative to the frequency of full matches. That is, if a lot of false -/// positives are generated, then it's possible for search time to be worse -/// than if the prefilter wasn't enabled in the first place. -/// -/// Another downside of a prefilter is that it can result in highly variable -/// performance, where some cases are extraordinarily fast and others aren't. -/// Typically, variable performance isn't a problem, but it may be for your use -/// case. -/// -/// The use of prefilters in this implementation does use a heuristic to detect -/// when a prefilter might not be carrying its weight, and will dynamically -/// disable its use. Nevertheless, this configuration option gives callers -/// the ability to disable prefilters if you have knowledge that they won't be -/// useful. -#[derive(Clone, Copy, Debug)] -#[non_exhaustive] -pub enum Prefilter { - /// Never used a prefilter in substring search. - None, - /// Automatically detect whether a heuristic prefilter should be used. If - /// it is used, then heuristics will be used to dynamically disable the - /// prefilter if it is believed to not be carrying its weight. - Auto, -} - -impl Default for Prefilter { - fn default() -> Prefilter { - Prefilter::Auto - } -} - -impl Prefilter { - pub(crate) fn is_none(&self) -> bool { - match *self { - Prefilter::None => true, - _ => false, - } - } -} - -/// PrefilterState tracks state associated with the effectiveness of a -/// prefilter. It is used to track how many bytes, on average, are skipped by -/// the prefilter. If this average dips below a certain threshold over time, -/// then the state renders the prefilter inert and stops using it. -/// -/// A prefilter state should be created for each search. (Where creating an -/// iterator is treated as a single search.) A prefilter state should only be -/// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert -/// `PrefilterState`. -#[derive(Clone, Debug)] -pub(crate) struct PrefilterState { - /// The number of skips that has been executed. This is always 1 greater - /// than the actual number of skips. The special sentinel value of 0 - /// indicates that the prefilter is inert. This is useful to avoid - /// additional checks to determine whether the prefilter is still - /// "effective." Once a prefilter becomes inert, it should no longer be - /// used (according to our heuristics). - skips: u32, - /// The total number of bytes that have been skipped. - skipped: u32, -} - -impl PrefilterState { - /// The minimum number of skip attempts to try before considering whether - /// a prefilter is effective or not. - const MIN_SKIPS: u32 = 50; - - /// The minimum amount of bytes that skipping must average. - /// - /// This value was chosen based on varying it and checking - /// the microbenchmarks. In particular, this can impact the - /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set - /// too low. - const MIN_SKIP_BYTES: u32 = 8; - - /// Create a fresh prefilter state. - pub(crate) fn new() -> PrefilterState { - PrefilterState { skips: 1, skipped: 0 } - } - - /// Create a fresh prefilter state that is always inert. - pub(crate) fn inert() -> PrefilterState { - PrefilterState { skips: 0, skipped: 0 } - } - - /// Update this state with the number of bytes skipped on the last - /// invocation of the prefilter. - #[inline] - pub(crate) fn update(&mut self, skipped: usize) { - self.skips = self.skips.saturating_add(1); - // We need to do this dance since it's technically possible for - // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the - // size of a prefilter state.) - if skipped > core::u32::MAX as usize { - self.skipped = core::u32::MAX; - } else { - self.skipped = self.skipped.saturating_add(skipped as u32); - } - } - - /// Return true if and only if this state indicates that a prefilter is - /// still effective. - #[inline] - pub(crate) fn is_effective(&mut self) -> bool { - if self.is_inert() { - return false; - } - if self.skips() < PrefilterState::MIN_SKIPS { - return true; - } - if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() { - return true; - } - - // We're inert. - self.skips = 0; - false - } - - #[inline] - fn is_inert(&self) -> bool { - self.skips == 0 - } - - #[inline] - fn skips(&self) -> u32 { - self.skips.saturating_sub(1) - } -} - -/// Determine which prefilter function, if any, to use. -/// -/// This only applies to x86_64 when runtime SIMD detection is enabled (which -/// is the default). In general, we try to use an AVX prefilter, followed by -/// SSE and then followed by a generic one based on memchr. -#[inline(always)] -pub(crate) fn forward( - config: &Prefilter, - rare: &RareNeedleBytes, - needle: &[u8], -) -> Option { - if config.is_none() || needle.len() <= 1 { - return None; - } - - #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] - { - #[cfg(feature = "std")] - { - if cfg!(memchr_runtime_avx) { - if is_x86_feature_detected!("avx2") { - // SAFETY: x86::avx::find only requires the avx2 feature, - // which we've just checked above. - return unsafe { Some(PrefilterFn::new(x86::avx::find)) }; - } - } - } - if cfg!(memchr_runtime_sse2) { - // SAFETY: x86::sse::find only requires the sse2 feature, which is - // guaranteed to be available on x86_64. - return unsafe { Some(PrefilterFn::new(x86::sse::find)) }; - } - } - #[cfg(all(not(miri), target_arch = "wasm32", memchr_runtime_simd))] - { - // SAFETY: `wasm::find` is actually a safe function - // - // Also note that the `if true` is here to prevent, on wasm with simd, - // rustc warning about the code below being dead code. - if true { - return unsafe { Some(PrefilterFn::new(wasm::find)) }; - } - } - // Check that our rarest byte has a reasonably low rank. The main issue - // here is that the fallback prefilter can perform pretty poorly if it's - // given common bytes. So we try to avoid the worst cases here. - let (rare1_rank, _) = rare.as_ranks(needle); - if rare1_rank <= MAX_FALLBACK_RANK { - // SAFETY: fallback::find is safe to call in all environments. - return unsafe { Some(PrefilterFn::new(fallback::find)) }; - } - None -} - -/// Return the minimum length of the haystack in which a prefilter should be -/// used. If the haystack is below this length, then it's probably not worth -/// the overhead of running the prefilter. -/// -/// We used to look at the length of a haystack here. That is, if it was too -/// small, then don't bother with the prefilter. But two things changed: -/// the prefilter falls back to memchr for small haystacks, and, at the -/// meta-searcher level, Rabin-Karp is employed for tiny haystacks anyway. -/// -/// We keep it around for now in case we want to bring it back. -#[allow(dead_code)] -pub(crate) fn minimum_len(_haystack: &[u8], needle: &[u8]) -> usize { - // If the haystack length isn't greater than needle.len() * FACTOR, then - // no prefilter will be used. The presumption here is that since there - // are so few bytes to check, it's not worth running the prefilter since - // there will need to be a validation step anyway. Thus, the prefilter is - // largely redundant work. - // - // Increase the factor noticeably hurts the - // memmem/krate/prebuilt/teeny-*/never-john-watson benchmarks. - const PREFILTER_LENGTH_FACTOR: usize = 2; - const VECTOR_MIN_LENGTH: usize = 16; - let min = core::cmp::max( - VECTOR_MIN_LENGTH, - PREFILTER_LENGTH_FACTOR * needle.len(), - ); - // For haystacks with length==min, we still want to avoid the prefilter, - // so add 1. - min + 1 -} - -#[cfg(all(test, feature = "std", not(miri)))] -pub(crate) mod tests { - use std::convert::{TryFrom, TryInto}; - - use super::*; - use crate::memmem::{ - prefilter::PrefilterFnTy, rabinkarp, rarebytes::RareNeedleBytes, - }; - - // Below is a small jig that generates prefilter tests. The main purpose - // of this jig is to generate tests of varying needle/haystack lengths - // in order to try and exercise all code paths in our prefilters. And in - // particular, this is especially important for vectorized prefilters where - // certain code paths might only be exercised at certain lengths. - - /// A test that represents the input and expected output to a prefilter - /// function. The test should be able to run with any prefilter function - /// and get the expected output. - pub(crate) struct PrefilterTest { - // These fields represent the inputs and expected output of a forwards - // prefilter function. - pub(crate) ninfo: NeedleInfo, - pub(crate) haystack: Vec, - pub(crate) needle: Vec, - pub(crate) output: Option, - } - - impl PrefilterTest { - /// Run all generated forward prefilter tests on the given prefn. - /// - /// # Safety - /// - /// Callers must ensure that the given prefilter function pointer is - /// safe to call for all inputs in the current environment. - pub(crate) unsafe fn run_all_tests(prefn: PrefilterFnTy) { - PrefilterTest::run_all_tests_filter(prefn, |_| true) - } - - /// Run all generated forward prefilter tests that pass the given - /// predicate on the given prefn. - /// - /// # Safety - /// - /// Callers must ensure that the given prefilter function pointer is - /// safe to call for all inputs in the current environment. - pub(crate) unsafe fn run_all_tests_filter( - prefn: PrefilterFnTy, - mut predicate: impl FnMut(&PrefilterTest) -> bool, - ) { - for seed in PREFILTER_TEST_SEEDS { - for test in seed.generate() { - if predicate(&test) { - test.run(prefn); - } - } - } - } - - /// Create a new prefilter test from a seed and some chose offsets to - /// rare bytes in the seed's needle. - /// - /// If a valid test could not be constructed, then None is returned. - /// (Currently, we take the approach of massaging tests to be valid - /// instead of rejecting them outright.) - fn new( - seed: PrefilterTestSeed, - rare1i: usize, - rare2i: usize, - haystack_len: usize, - needle_len: usize, - output: Option, - ) -> Option { - let mut rare1i: u8 = rare1i.try_into().unwrap(); - let mut rare2i: u8 = rare2i.try_into().unwrap(); - // The '#' byte is never used in a haystack (unless we're expecting - // a match), while the '@' byte is never used in a needle. - let mut haystack = vec![b'@'; haystack_len]; - let mut needle = vec![b'#'; needle_len]; - needle[0] = seed.first; - needle[rare1i as usize] = seed.rare1; - needle[rare2i as usize] = seed.rare2; - // If we're expecting a match, then make sure the needle occurs - // in the haystack at the expected position. - if let Some(i) = output { - haystack[i..i + needle.len()].copy_from_slice(&needle); - } - // If the operations above lead to rare offsets pointing to the - // non-first occurrence of a byte, then adjust it. This might lead - // to redundant tests, but it's simpler than trying to change the - // generation process I think. - if let Some(i) = crate::memchr(seed.rare1, &needle) { - rare1i = u8::try_from(i).unwrap(); - } - if let Some(i) = crate::memchr(seed.rare2, &needle) { - rare2i = u8::try_from(i).unwrap(); - } - let ninfo = NeedleInfo { - rarebytes: RareNeedleBytes::new(rare1i, rare2i), - nhash: rabinkarp::NeedleHash::forward(&needle), - }; - Some(PrefilterTest { ninfo, haystack, needle, output }) - } - - /// Run this specific test on the given prefilter function. If the - /// outputs do no match, then this routine panics with a failure - /// message. - /// - /// # Safety - /// - /// Callers must ensure that the given prefilter function pointer is - /// safe to call for all inputs in the current environment. - unsafe fn run(&self, prefn: PrefilterFnTy) { - let mut prestate = PrefilterState::new(); - assert_eq!( - self.output, - prefn( - &mut prestate, - &self.ninfo, - &self.haystack, - &self.needle - ), - "ninfo: {:?}, haystack(len={}): {:?}, needle(len={}): {:?}", - self.ninfo, - self.haystack.len(), - std::str::from_utf8(&self.haystack).unwrap(), - self.needle.len(), - std::str::from_utf8(&self.needle).unwrap(), - ); - } - } - - /// A set of prefilter test seeds. Each seed serves as the base for the - /// generation of many other tests. In essence, the seed captures the - /// "rare" and first bytes among our needle. The tests generated from each - /// seed essentially vary the length of the needle and haystack, while - /// using the rare/first byte configuration from the seed. - /// - /// The purpose of this is to test many different needle/haystack lengths. - /// In particular, some of the vector optimizations might only have bugs - /// in haystacks of a certain size. - const PREFILTER_TEST_SEEDS: &[PrefilterTestSeed] = &[ - PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'z' }, - PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'z' }, - PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'x' }, - PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'x' }, - PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'y' }, - ]; - - /// Data that describes a single prefilter test seed. - #[derive(Clone, Copy)] - struct PrefilterTestSeed { - first: u8, - rare1: u8, - rare2: u8, - } - - impl PrefilterTestSeed { - /// Generate a series of prefilter tests from this seed. - fn generate(self) -> impl Iterator { - let len_start = 2; - // The iterator below generates *a lot* of tests. The number of - // tests was chosen somewhat empirically to be "bearable" when - // running the test suite. - // - // We use an iterator here because the collective haystacks of all - // these test cases add up to enough memory to OOM a conservative - // sandbox or a small laptop. - (len_start..=40).flat_map(move |needle_len| { - let rare_start = len_start - 1; - (rare_start..needle_len).flat_map(move |rare1i| { - (rare1i..needle_len).flat_map(move |rare2i| { - (needle_len..=66).flat_map(move |haystack_len| { - PrefilterTest::new( - self, - rare1i, - rare2i, - haystack_len, - needle_len, - None, - ) - .into_iter() - .chain( - (0..=(haystack_len - needle_len)).flat_map( - move |output| { - PrefilterTest::new( - self, - rare1i, - rare2i, - haystack_len, - needle_len, - Some(output), - ) - }, - ), - ) - }) - }) - }) - }) - } - } -} diff --git a/vendor/memchr/src/memmem/prefilter/wasm.rs b/vendor/memchr/src/memmem/prefilter/wasm.rs deleted file mode 100644 index 5470c92..0000000 --- a/vendor/memchr/src/memmem/prefilter/wasm.rs +++ /dev/null @@ -1,39 +0,0 @@ -use core::arch::wasm32::v128; - -use crate::memmem::{ - prefilter::{PrefilterFnTy, PrefilterState}, - NeedleInfo, -}; - -// Check that the functions below satisfy the Prefilter function type. -const _: PrefilterFnTy = find; - -/// A `v128`-accelerated candidate finder for single-substring search. -#[target_feature(enable = "simd128")] -pub(crate) fn find( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option { - unsafe { - super::genericsimd::find::( - prestate, - ninfo, - haystack, - needle, - super::simple_memchr_fallback, - ) - } -} - -#[cfg(all(test, feature = "std"))] -mod tests { - #[test] - #[cfg(not(miri))] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - // SAFETY: super::find is safe to call for all inputs on x86. - unsafe { PrefilterTest::run_all_tests(super::find) }; - } -} diff --git a/vendor/memchr/src/memmem/prefilter/x86/avx.rs b/vendor/memchr/src/memmem/prefilter/x86/avx.rs deleted file mode 100644 index fb11f33..0000000 --- a/vendor/memchr/src/memmem/prefilter/x86/avx.rs +++ /dev/null @@ -1,46 +0,0 @@ -use core::arch::x86_64::__m256i; - -use crate::memmem::{ - prefilter::{PrefilterFnTy, PrefilterState}, - NeedleInfo, -}; - -// Check that the functions below satisfy the Prefilter function type. -const _: PrefilterFnTy = find; - -/// An AVX2 accelerated candidate finder for single-substring search. -/// -/// # Safety -/// -/// Callers must ensure that the avx2 CPU feature is enabled in the current -/// environment. -#[target_feature(enable = "avx2")] -pub(crate) unsafe fn find( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option { - super::super::genericsimd::find::<__m256i>( - prestate, - ninfo, - haystack, - needle, - super::sse::find, - ) -} - -#[cfg(test)] -mod tests { - #[test] - #[cfg(not(miri))] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - if !is_x86_feature_detected!("avx2") { - return; - } - // SAFETY: The safety of super::find only requires that the current - // CPU support AVX2, which we checked above. - unsafe { PrefilterTest::run_all_tests(super::find) }; - } -} diff --git a/vendor/memchr/src/memmem/prefilter/x86/mod.rs b/vendor/memchr/src/memmem/prefilter/x86/mod.rs deleted file mode 100644 index 91381e5..0000000 --- a/vendor/memchr/src/memmem/prefilter/x86/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -// We only use AVX when we can detect at runtime whether it's available, which -// requires std. -#[cfg(feature = "std")] -pub(crate) mod avx; -pub(crate) mod sse; diff --git a/vendor/memchr/src/memmem/prefilter/x86/sse.rs b/vendor/memchr/src/memmem/prefilter/x86/sse.rs deleted file mode 100644 index b1c48e1..0000000 --- a/vendor/memchr/src/memmem/prefilter/x86/sse.rs +++ /dev/null @@ -1,42 +0,0 @@ -use core::arch::x86_64::__m128i; - -use crate::memmem::{ - prefilter::{PrefilterFnTy, PrefilterState}, - NeedleInfo, -}; - -// Check that the functions below satisfy the Prefilter function type. -const _: PrefilterFnTy = find; - -/// An SSE2 accelerated candidate finder for single-substring search. -/// -/// # Safety -/// -/// Callers must ensure that the sse2 CPU feature is enabled in the current -/// environment. This feature should be enabled in all x86_64 targets. -#[target_feature(enable = "sse2")] -pub(crate) unsafe fn find( - prestate: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], -) -> Option { - super::super::genericsimd::find::<__m128i>( - prestate, - ninfo, - haystack, - needle, - super::super::simple_memchr_fallback, - ) -} - -#[cfg(all(test, feature = "std"))] -mod tests { - #[test] - #[cfg(not(miri))] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - // SAFETY: super::find is safe to call for all inputs on x86. - unsafe { PrefilterTest::run_all_tests(super::find) }; - } -} diff --git a/vendor/memchr/src/memmem/rabinkarp.rs b/vendor/memchr/src/memmem/rabinkarp.rs deleted file mode 100644 index daa4015..0000000 --- a/vendor/memchr/src/memmem/rabinkarp.rs +++ /dev/null @@ -1,233 +0,0 @@ -/* -This module implements the classical Rabin-Karp substring search algorithm, -with no extra frills. While its use would seem to break our time complexity -guarantee of O(m+n) (RK's time complexity is O(mn)), we are careful to only -ever use RK on a constant subset of haystacks. The main point here is that -RK has good latency properties for small needles/haystacks. It's very quick -to compute a needle hash and zip through the haystack when compared to -initializing Two-Way, for example. And this is especially useful for cases -where the haystack is just too short for vector instructions to do much good. - -The hashing function used here is the same one recommended by ESMAJ. - -Another choice instead of Rabin-Karp would be Shift-Or. But its latency -isn't quite as good since its preprocessing time is a bit more expensive -(both in practice and in theory). However, perhaps Shift-Or has a place -somewhere else for short patterns. I think the main problem is that it -requires space proportional to the alphabet and the needle. If we, for -example, supported needles up to length 16, then the total table size would be -len(alphabet)*size_of::()==512 bytes. Which isn't exactly small, and it's -probably bad to put that on the stack. So ideally, we'd throw it on the heap, -but we'd really like to write as much code without using alloc/std as possible. -But maybe it's worth the special casing. It's a TODO to benchmark. - -Wikipedia has a decent explanation, if a bit heavy on the theory: -https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm - -But ESMAJ provides something a bit more concrete: -http://www-igm.univ-mlv.fr/~lecroq/string/node5.html - -Finally, aho-corasick uses Rabin-Karp for multiple pattern match in some cases: -https://github.com/BurntSushi/aho-corasick/blob/3852632f10587db0ff72ef29e88d58bf305a0946/src/packed/rabinkarp.rs -*/ - -/// Whether RK is believed to be very fast for the given needle/haystack. -pub(crate) fn is_fast(haystack: &[u8], _needle: &[u8]) -> bool { - haystack.len() < 16 -} - -/// Search for the first occurrence of needle in haystack using Rabin-Karp. -pub(crate) fn find(haystack: &[u8], needle: &[u8]) -> Option { - find_with(&NeedleHash::forward(needle), haystack, needle) -} - -/// Search for the first occurrence of needle in haystack using Rabin-Karp with -/// a pre-computed needle hash. -pub(crate) fn find_with( - nhash: &NeedleHash, - mut haystack: &[u8], - needle: &[u8], -) -> Option { - if haystack.len() < needle.len() { - return None; - } - let start = haystack.as_ptr() as usize; - let mut hash = Hash::from_bytes_fwd(&haystack[..needle.len()]); - // N.B. I've experimented with unrolling this loop, but couldn't realize - // any obvious gains. - loop { - if nhash.eq(hash) && is_prefix(haystack, needle) { - return Some(haystack.as_ptr() as usize - start); - } - if needle.len() >= haystack.len() { - return None; - } - hash.roll(&nhash, haystack[0], haystack[needle.len()]); - haystack = &haystack[1..]; - } -} - -/// Search for the last occurrence of needle in haystack using Rabin-Karp. -pub(crate) fn rfind(haystack: &[u8], needle: &[u8]) -> Option { - rfind_with(&NeedleHash::reverse(needle), haystack, needle) -} - -/// Search for the last occurrence of needle in haystack using Rabin-Karp with -/// a pre-computed needle hash. -pub(crate) fn rfind_with( - nhash: &NeedleHash, - mut haystack: &[u8], - needle: &[u8], -) -> Option { - if haystack.len() < needle.len() { - return None; - } - let mut hash = - Hash::from_bytes_rev(&haystack[haystack.len() - needle.len()..]); - loop { - if nhash.eq(hash) && is_suffix(haystack, needle) { - return Some(haystack.len() - needle.len()); - } - if needle.len() >= haystack.len() { - return None; - } - hash.roll( - &nhash, - haystack[haystack.len() - 1], - haystack[haystack.len() - needle.len() - 1], - ); - haystack = &haystack[..haystack.len() - 1]; - } -} - -/// A hash derived from a needle. -#[derive(Clone, Copy, Debug, Default)] -pub(crate) struct NeedleHash { - /// The actual hash. - hash: Hash, - /// The factor needed to multiply a byte by in order to subtract it from - /// the hash. It is defined to be 2^(n-1) (using wrapping exponentiation), - /// where n is the length of the needle. This is how we "remove" a byte - /// from the hash once the hash window rolls past it. - hash_2pow: u32, -} - -impl NeedleHash { - /// Create a new Rabin-Karp hash for the given needle for use in forward - /// searching. - pub(crate) fn forward(needle: &[u8]) -> NeedleHash { - let mut nh = NeedleHash { hash: Hash::new(), hash_2pow: 1 }; - if needle.is_empty() { - return nh; - } - nh.hash.add(needle[0]); - for &b in needle.iter().skip(1) { - nh.hash.add(b); - nh.hash_2pow = nh.hash_2pow.wrapping_shl(1); - } - nh - } - - /// Create a new Rabin-Karp hash for the given needle for use in reverse - /// searching. - pub(crate) fn reverse(needle: &[u8]) -> NeedleHash { - let mut nh = NeedleHash { hash: Hash::new(), hash_2pow: 1 }; - if needle.is_empty() { - return nh; - } - nh.hash.add(needle[needle.len() - 1]); - for &b in needle.iter().rev().skip(1) { - nh.hash.add(b); - nh.hash_2pow = nh.hash_2pow.wrapping_shl(1); - } - nh - } - - /// Return true if the hashes are equivalent. - fn eq(&self, hash: Hash) -> bool { - self.hash == hash - } -} - -/// A Rabin-Karp hash. This might represent the hash of a needle, or the hash -/// of a rolling window in the haystack. -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] -pub(crate) struct Hash(u32); - -impl Hash { - /// Create a new hash that represents the empty string. - pub(crate) fn new() -> Hash { - Hash(0) - } - - /// Create a new hash from the bytes given for use in forward searches. - pub(crate) fn from_bytes_fwd(bytes: &[u8]) -> Hash { - let mut hash = Hash::new(); - for &b in bytes { - hash.add(b); - } - hash - } - - /// Create a new hash from the bytes given for use in reverse searches. - fn from_bytes_rev(bytes: &[u8]) -> Hash { - let mut hash = Hash::new(); - for &b in bytes.iter().rev() { - hash.add(b); - } - hash - } - - /// Add 'new' and remove 'old' from this hash. The given needle hash should - /// correspond to the hash computed for the needle being searched for. - /// - /// This is meant to be used when the rolling window of the haystack is - /// advanced. - fn roll(&mut self, nhash: &NeedleHash, old: u8, new: u8) { - self.del(nhash, old); - self.add(new); - } - - /// Add a byte to this hash. - fn add(&mut self, byte: u8) { - self.0 = self.0.wrapping_shl(1).wrapping_add(byte as u32); - } - - /// Remove a byte from this hash. The given needle hash should correspond - /// to the hash computed for the needle being searched for. - fn del(&mut self, nhash: &NeedleHash, byte: u8) { - let factor = nhash.hash_2pow; - self.0 = self.0.wrapping_sub((byte as u32).wrapping_mul(factor)); - } -} - -/// Returns true if the given needle is a prefix of the given haystack. -/// -/// We forcefully don't inline the is_prefix call and hint at the compiler that -/// it is unlikely to be called. This causes the inner rabinkarp loop above -/// to be a bit tighter and leads to some performance improvement. See the -/// memmem/krate/prebuilt/sliceslice-words/words benchmark. -#[cold] -#[inline(never)] -fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { - crate::memmem::util::is_prefix(haystack, needle) -} - -/// Returns true if the given needle is a suffix of the given haystack. -/// -/// See is_prefix for why this is forcefully not inlined. -#[cold] -#[inline(never)] -fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { - crate::memmem::util::is_suffix(haystack, needle) -} - -#[cfg(test)] -mod simpletests { - define_memmem_simple_tests!(super::find, super::rfind); -} - -#[cfg(all(test, feature = "std", not(miri)))] -mod proptests { - define_memmem_quickcheck_tests!(super::find, super::rfind); -} diff --git a/vendor/memchr/src/memmem/rarebytes.rs b/vendor/memchr/src/memmem/rarebytes.rs deleted file mode 100644 index fb33f68..0000000 --- a/vendor/memchr/src/memmem/rarebytes.rs +++ /dev/null @@ -1,136 +0,0 @@ -/// A heuristic frequency based detection of rare bytes for substring search. -/// -/// This detector attempts to pick out two bytes in a needle that are predicted -/// to occur least frequently. The purpose is to use these bytes to implement -/// fast candidate search using vectorized code. -/// -/// A set of offsets is only computed for needles of length 2 or greater. -/// Smaller needles should be special cased by the substring search algorithm -/// in use. (e.g., Use memchr for single byte needles.) -/// -/// Note that we use `u8` to represent the offsets of the rare bytes in a -/// needle to reduce space usage. This means that rare byte occurring after the -/// first 255 bytes in a needle will never be used. -#[derive(Clone, Copy, Debug, Default)] -pub(crate) struct RareNeedleBytes { - /// The leftmost offset of the rarest byte in the needle, according to - /// pre-computed frequency analysis. The "leftmost offset" means that - /// rare1i <= i for all i where needle[i] == needle[rare1i]. - rare1i: u8, - /// The leftmost offset of the second rarest byte in the needle, according - /// to pre-computed frequency analysis. The "leftmost offset" means that - /// rare2i <= i for all i where needle[i] == needle[rare2i]. - /// - /// The second rarest byte is used as a type of guard for quickly detecting - /// a mismatch if the first byte matches. This is a hedge against - /// pathological cases where the pre-computed frequency analysis may be - /// off. (But of course, does not prevent *all* pathological cases.) - /// - /// In general, rare1i != rare2i by construction, although there is no hard - /// requirement that they be different. However, since the case of a single - /// byte needle is handled specially by memchr itself, rare2i generally - /// always should be different from rare1i since it would otherwise be - /// ineffective as a guard. - rare2i: u8, -} - -impl RareNeedleBytes { - /// Create a new pair of rare needle bytes with the given offsets. This is - /// only used in tests for generating input data. - #[cfg(all(test, feature = "std"))] - pub(crate) fn new(rare1i: u8, rare2i: u8) -> RareNeedleBytes { - RareNeedleBytes { rare1i, rare2i } - } - - /// Detect the leftmost offsets of the two rarest bytes in the given - /// needle. - pub(crate) fn forward(needle: &[u8]) -> RareNeedleBytes { - if needle.len() <= 1 || needle.len() > core::u8::MAX as usize { - // For needles bigger than u8::MAX, our offsets aren't big enough. - // (We make our offsets small to reduce stack copying.) - // If you have a use case for it, please file an issue. In that - // case, we should probably just adjust the routine below to pick - // some rare bytes from the first 255 bytes of the needle. - // - // Also note that for needles of size 0 or 1, they are special - // cased in Two-Way. - // - // TODO: Benchmar this. - return RareNeedleBytes { rare1i: 0, rare2i: 0 }; - } - - // Find the rarest two bytes. We make them distinct by construction. - let (mut rare1, mut rare1i) = (needle[0], 0); - let (mut rare2, mut rare2i) = (needle[1], 1); - if rank(rare2) < rank(rare1) { - core::mem::swap(&mut rare1, &mut rare2); - core::mem::swap(&mut rare1i, &mut rare2i); - } - for (i, &b) in needle.iter().enumerate().skip(2) { - if rank(b) < rank(rare1) { - rare2 = rare1; - rare2i = rare1i; - rare1 = b; - rare1i = i as u8; - } else if b != rare1 && rank(b) < rank(rare2) { - rare2 = b; - rare2i = i as u8; - } - } - // While not strictly required, we really don't want these to be - // equivalent. If they were, it would reduce the effectiveness of - // candidate searching using these rare bytes by increasing the rate of - // false positives. - assert_ne!(rare1i, rare2i); - RareNeedleBytes { rare1i, rare2i } - } - - /// Return the rare bytes in the given needle in the forward direction. - /// The needle given must be the same one given to the RareNeedleBytes - /// constructor. - pub(crate) fn as_rare_bytes(&self, needle: &[u8]) -> (u8, u8) { - (needle[self.rare1i as usize], needle[self.rare2i as usize]) - } - - /// Return the rare offsets such that the first offset is always <= to the - /// second offset. This is useful when the caller doesn't care whether - /// rare1 is rarer than rare2, but just wants to ensure that they are - /// ordered with respect to one another. - #[cfg(memchr_runtime_simd)] - pub(crate) fn as_rare_ordered_usize(&self) -> (usize, usize) { - let (rare1i, rare2i) = self.as_rare_ordered_u8(); - (rare1i as usize, rare2i as usize) - } - - /// Like as_rare_ordered_usize, but returns the offsets as their native - /// u8 values. - #[cfg(memchr_runtime_simd)] - pub(crate) fn as_rare_ordered_u8(&self) -> (u8, u8) { - if self.rare1i <= self.rare2i { - (self.rare1i, self.rare2i) - } else { - (self.rare2i, self.rare1i) - } - } - - /// Return the rare offsets as usize values in the order in which they were - /// constructed. rare1, for example, is constructed as the "rarer" byte, - /// and thus, callers may want to treat it differently from rare2. - pub(crate) fn as_rare_usize(&self) -> (usize, usize) { - (self.rare1i as usize, self.rare2i as usize) - } - - /// Return the byte frequency rank of each byte. The higher the rank, the - /// more frequency the byte is predicted to be. The needle given must be - /// the same one given to the RareNeedleBytes constructor. - pub(crate) fn as_ranks(&self, needle: &[u8]) -> (usize, usize) { - let (b1, b2) = self.as_rare_bytes(needle); - (rank(b1), rank(b2)) - } -} - -/// Return the heuristical frequency rank of the given byte. A lower rank -/// means the byte is believed to occur less frequently. -fn rank(b: u8) -> usize { - crate::memmem::byte_frequencies::BYTE_FREQUENCIES[b as usize] as usize -} diff --git a/vendor/memchr/src/memmem/searcher.rs b/vendor/memchr/src/memmem/searcher.rs new file mode 100644 index 0000000..98b9bd6 --- /dev/null +++ b/vendor/memchr/src/memmem/searcher.rs @@ -0,0 +1,1030 @@ +use crate::arch::all::{ + packedpair::{HeuristicFrequencyRank, Pair}, + rabinkarp, twoway, +}; + +#[cfg(target_arch = "aarch64")] +use crate::arch::aarch64::neon::packedpair as neon; +#[cfg(target_arch = "wasm32")] +use crate::arch::wasm32::simd128::packedpair as simd128; +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +use crate::arch::x86_64::{ + avx2::packedpair as avx2, sse2::packedpair as sse2, +}; + +/// A "meta" substring searcher. +/// +/// To a first approximation, this chooses what it believes to be the "best" +/// substring search implemnetation based on the needle at construction time. +/// Then, every call to `find` will execute that particular implementation. To +/// a second approximation, multiple substring search algorithms may be used, +/// depending on the haystack. For example, for supremely short haystacks, +/// Rabin-Karp is typically used. +/// +/// See the documentation on `Prefilter` for an explanation of the dispatching +/// mechanism. The quick summary is that an enum has too much overhead and +/// we can't use dynamic dispatch via traits because we need to work in a +/// core-only environment. (Dynamic dispatch works in core-only, but you +/// need `&dyn Trait` and we really need a `Box` here. The latter +/// requires `alloc`.) So instead, we use a union and an appropriately paired +/// free function to read from the correct field on the union and execute the +/// chosen substring search implementation. +#[derive(Clone)] +pub(crate) struct Searcher { + call: SearcherKindFn, + kind: SearcherKind, + rabinkarp: rabinkarp::Finder, +} + +impl Searcher { + /// Creates a new "meta" substring searcher that attempts to choose the + /// best algorithm based on the needle, heuristics and what the current + /// target supports. + #[inline] + pub(crate) fn new( + prefilter: PrefilterConfig, + ranker: R, + needle: &[u8], + ) -> Searcher { + let rabinkarp = rabinkarp::Finder::new(needle); + if needle.len() <= 1 { + return if needle.is_empty() { + trace!("building empty substring searcher"); + Searcher { + call: searcher_kind_empty, + kind: SearcherKind { empty: () }, + rabinkarp, + } + } else { + trace!("building one-byte substring searcher"); + debug_assert_eq!(1, needle.len()); + Searcher { + call: searcher_kind_one_byte, + kind: SearcherKind { one_byte: needle[0] }, + rabinkarp, + } + }; + } + let pair = match Pair::with_ranker(needle, &ranker) { + Some(pair) => pair, + None => return Searcher::twoway(needle, rabinkarp, None), + }; + debug_assert_ne!( + pair.index1(), + pair.index2(), + "pair offsets should not be equivalent" + ); + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + { + if let Some(pp) = avx2::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building x86_64 AVX2 substring searcher"); + let kind = SearcherKind { avx2: pp }; + Searcher { call: searcher_kind_avx2, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::avx2(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if let Some(pp) = sse2::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building x86_64 SSE2 substring searcher"); + let kind = SearcherKind { sse2: pp }; + Searcher { call: searcher_kind_sse2, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::sse2(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + // We're pretty unlikely to get to this point, but it is + // possible to be running on x86_64 without SSE2. Namely, it's + // really up to the OS whether it wants to support vector + // registers or not. + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + #[cfg(target_arch = "wasm32")] + { + if let Some(pp) = simd128::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building wasm32 simd128 substring searcher"); + let kind = SearcherKind { simd128: pp }; + Searcher { call: searcher_kind_simd128, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::simd128(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + #[cfg(target_arch = "aarch64")] + { + if let Some(pp) = neon::Finder::with_pair(needle, pair) { + if do_packed_search(needle) { + trace!("building aarch64 neon substring searcher"); + let kind = SearcherKind { neon: pp }; + Searcher { call: searcher_kind_neon, kind, rabinkarp } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::neon(pp, needle); + Searcher::twoway(needle, rabinkarp, Some(prestrat)) + } + } else if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + #[cfg(not(any( + all(target_arch = "x86_64", target_feature = "sse2"), + target_arch = "wasm32", + target_arch = "aarch64" + )))] + { + if prefilter.is_none() { + Searcher::twoway(needle, rabinkarp, None) + } else { + let prestrat = Prefilter::fallback(ranker, pair, needle); + Searcher::twoway(needle, rabinkarp, prestrat) + } + } + } + + /// Creates a new searcher that always uses the Two-Way algorithm. This is + /// typically used when vector algorithms are unavailable or inappropriate. + /// (For example, when the needle is "too long.") + /// + /// If a prefilter is given, then the searcher returned will be accelerated + /// by the prefilter. + #[inline] + fn twoway( + needle: &[u8], + rabinkarp: rabinkarp::Finder, + prestrat: Option, + ) -> Searcher { + let finder = twoway::Finder::new(needle); + match prestrat { + None => { + trace!("building scalar two-way substring searcher"); + let kind = SearcherKind { two_way: finder }; + Searcher { call: searcher_kind_two_way, kind, rabinkarp } + } + Some(prestrat) => { + trace!( + "building scalar two-way \ + substring searcher with a prefilter" + ); + let two_way_with_prefilter = + TwoWayWithPrefilter { finder, prestrat }; + let kind = SearcherKind { two_way_with_prefilter }; + Searcher { + call: searcher_kind_two_way_with_prefilter, + kind, + rabinkarp, + } + } + } + } + + /// Searches the given haystack for the given needle. The needle given + /// should be the same as the needle that this finder was initialized + /// with. + /// + /// Inlining this can lead to big wins for latency, and #[inline] doesn't + /// seem to be enough in some cases. + #[inline(always)] + pub(crate) fn find( + &self, + prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], + ) -> Option { + if haystack.len() < needle.len() { + None + } else { + // SAFETY: By construction, we've ensured that the function + // in `self.call` is properly paired with the union used in + // `self.kind`. + unsafe { (self.call)(self, prestate, haystack, needle) } + } + } +} + +impl core::fmt::Debug for Searcher { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("Searcher") + .field("call", &"") + .field("kind", &"") + .field("rabinkarp", &self.rabinkarp) + .finish() + } +} + +/// A union indicating one of several possible substring search implementations +/// that are in active use. +/// +/// This union should only be read by one of the functions prefixed with +/// `searcher_kind_`. Namely, the correct function is meant to be paired with +/// the union by the caller, such that the function always reads from the +/// designated union field. +#[derive(Clone, Copy)] +union SearcherKind { + empty: (), + one_byte: u8, + two_way: twoway::Finder, + two_way_with_prefilter: TwoWayWithPrefilter, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + sse2: crate::arch::x86_64::sse2::packedpair::Finder, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + avx2: crate::arch::x86_64::avx2::packedpair::Finder, + #[cfg(target_arch = "wasm32")] + simd128: crate::arch::wasm32::simd128::packedpair::Finder, + #[cfg(target_arch = "aarch64")] + neon: crate::arch::aarch64::neon::packedpair::Finder, +} + +/// A two-way substring searcher with a prefilter. +#[derive(Copy, Clone, Debug)] +struct TwoWayWithPrefilter { + finder: twoway::Finder, + prestrat: Prefilter, +} + +/// The type of a substring search function. +/// +/// # Safety +/// +/// When using a function of this type, callers must ensure that the correct +/// function is paired with the value populated in `SearcherKind` union. +type SearcherKindFn = unsafe fn( + searcher: &Searcher, + prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option; + +/// Reads from the `empty` field of `SearcherKind` to handle the case of +/// searching for the empty needle. Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.empty` union field is set. +unsafe fn searcher_kind_empty( + _searcher: &Searcher, + _prestate: &mut PrefilterState, + _haystack: &[u8], + _needle: &[u8], +) -> Option { + Some(0) +} + +/// Reads from the `one_byte` field of `SearcherKind` to handle the case of +/// searching for a single byte needle. Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.one_byte` union field is set. +unsafe fn searcher_kind_one_byte( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + _needle: &[u8], +) -> Option { + let needle = searcher.kind.one_byte; + crate::memchr(needle, haystack) +} + +/// Reads from the `two_way` field of `SearcherKind` to handle the case of +/// searching for an arbitrary needle without prefilter acceleration. Works on +/// all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.two_way` union field is set. +unsafe fn searcher_kind_two_way( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option { + if rabinkarp::is_fast(haystack, needle) { + searcher.rabinkarp.find(haystack, needle) + } else { + searcher.kind.two_way.find(haystack, needle) + } +} + +/// Reads from the `two_way_with_prefilter` field of `SearcherKind` to handle +/// the case of searching for an arbitrary needle with prefilter acceleration. +/// Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.two_way_with_prefilter` union +/// field is set. +unsafe fn searcher_kind_two_way_with_prefilter( + searcher: &Searcher, + prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option { + if rabinkarp::is_fast(haystack, needle) { + searcher.rabinkarp.find(haystack, needle) + } else { + let TwoWayWithPrefilter { ref finder, ref prestrat } = + searcher.kind.two_way_with_prefilter; + let pre = Pre { prestate, prestrat }; + finder.find_with_prefilter(Some(pre), haystack, needle) + } +} + +/// Reads from the `sse2` field of `SearcherKind` to execute the x86_64 SSE2 +/// vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.sse2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn searcher_kind_sse2( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option { + let finder = &searcher.kind.sse2; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// Reads from the `avx2` field of `SearcherKind` to execute the x86_64 AVX2 +/// vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.avx2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn searcher_kind_avx2( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option { + let finder = &searcher.kind.avx2; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// Reads from the `simd128` field of `SearcherKind` to execute the wasm32 +/// simd128 vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.simd128` union field is set. +#[cfg(target_arch = "wasm32")] +unsafe fn searcher_kind_simd128( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option { + let finder = &searcher.kind.simd128; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// Reads from the `neon` field of `SearcherKind` to execute the aarch64 neon +/// vectorized substring search implementation. +/// +/// # Safety +/// +/// Callers must ensure that the `searcher.kind.neon` union field is set. +#[cfg(target_arch = "aarch64")] +unsafe fn searcher_kind_neon( + searcher: &Searcher, + _prestate: &mut PrefilterState, + haystack: &[u8], + needle: &[u8], +) -> Option { + let finder = &searcher.kind.neon; + if haystack.len() < finder.min_haystack_len() { + searcher.rabinkarp.find(haystack, needle) + } else { + finder.find(haystack, needle) + } +} + +/// A reverse substring searcher. +#[derive(Clone, Debug)] +pub(crate) struct SearcherRev { + kind: SearcherRevKind, + rabinkarp: rabinkarp::FinderRev, +} + +/// The kind of the reverse searcher. +/// +/// For the reverse case, we don't do any SIMD acceleration or prefilters. +/// There is no specific technical reason why we don't, but rather don't do it +/// because it's not clear it's worth the extra code to do so. If you have a +/// use case for it, please file an issue. +/// +/// We also don't do the union trick as we do with the forward case and +/// prefilters. Basically for the same reason we don't have prefilters or +/// vector algorithms for reverse searching: it's not clear it's worth doing. +/// Please file an issue if you have a compelling use case for fast reverse +/// substring search. +#[derive(Clone, Debug)] +enum SearcherRevKind { + Empty, + OneByte { needle: u8 }, + TwoWay { finder: twoway::FinderRev }, +} + +impl SearcherRev { + /// Creates a new searcher for finding occurrences of the given needle in + /// reverse. That is, it reports the last (instead of the first) occurrence + /// of a needle in a haystack. + #[inline] + pub(crate) fn new(needle: &[u8]) -> SearcherRev { + let kind = if needle.len() <= 1 { + if needle.is_empty() { + trace!("building empty reverse substring searcher"); + SearcherRevKind::Empty + } else { + trace!("building one-byte reverse substring searcher"); + debug_assert_eq!(1, needle.len()); + SearcherRevKind::OneByte { needle: needle[0] } + } + } else { + trace!("building scalar two-way reverse substring searcher"); + let finder = twoway::FinderRev::new(needle); + SearcherRevKind::TwoWay { finder } + }; + let rabinkarp = rabinkarp::FinderRev::new(needle); + SearcherRev { kind, rabinkarp } + } + + /// Searches the given haystack for the last occurrence of the given + /// needle. The needle given should be the same as the needle that this + /// finder was initialized with. + #[inline] + pub(crate) fn rfind( + &self, + haystack: &[u8], + needle: &[u8], + ) -> Option { + if haystack.len() < needle.len() { + return None; + } + match self.kind { + SearcherRevKind::Empty => Some(haystack.len()), + SearcherRevKind::OneByte { needle } => { + crate::memrchr(needle, haystack) + } + SearcherRevKind::TwoWay { ref finder } => { + if rabinkarp::is_fast(haystack, needle) { + self.rabinkarp.rfind(haystack, needle) + } else { + finder.rfind(haystack, needle) + } + } + } + } +} + +/// Prefilter controls whether heuristics are used to accelerate searching. +/// +/// A prefilter refers to the idea of detecting candidate matches very quickly, +/// and then confirming whether those candidates are full matches. This +/// idea can be quite effective since it's often the case that looking for +/// candidates can be a lot faster than running a complete substring search +/// over the entire input. Namely, looking for candidates can be done with +/// extremely fast vectorized code. +/// +/// The downside of a prefilter is that it assumes false positives (which are +/// candidates generated by a prefilter that aren't matches) are somewhat rare +/// relative to the frequency of full matches. That is, if a lot of false +/// positives are generated, then it's possible for search time to be worse +/// than if the prefilter wasn't enabled in the first place. +/// +/// Another downside of a prefilter is that it can result in highly variable +/// performance, where some cases are extraordinarily fast and others aren't. +/// Typically, variable performance isn't a problem, but it may be for your use +/// case. +/// +/// The use of prefilters in this implementation does use a heuristic to detect +/// when a prefilter might not be carrying its weight, and will dynamically +/// disable its use. Nevertheless, this configuration option gives callers +/// the ability to disable prefilters if you have knowledge that they won't be +/// useful. +#[derive(Clone, Copy, Debug)] +#[non_exhaustive] +pub enum PrefilterConfig { + /// Never used a prefilter in substring search. + None, + /// Automatically detect whether a heuristic prefilter should be used. If + /// it is used, then heuristics will be used to dynamically disable the + /// prefilter if it is believed to not be carrying its weight. + Auto, +} + +impl Default for PrefilterConfig { + fn default() -> PrefilterConfig { + PrefilterConfig::Auto + } +} + +impl PrefilterConfig { + /// Returns true when this prefilter is set to the `None` variant. + fn is_none(&self) -> bool { + matches!(*self, PrefilterConfig::None) + } +} + +/// The implementation of a prefilter. +/// +/// This type encapsulates dispatch to one of several possible choices for a +/// prefilter. Generally speaking, all prefilters have the same approximate +/// algorithm: they choose a couple of bytes from the needle that are believed +/// to be rare, use a fast vector algorithm to look for those bytes and return +/// positions as candidates for some substring search algorithm (currently only +/// Two-Way) to confirm as a match or not. +/// +/// The differences between the algorithms are actually at the vector +/// implementation level. Namely, we need different routines based on both +/// which target architecture we're on and what CPU features are supported. +/// +/// The straight-forwardly obvious approach here is to use an enum, and make +/// `Prefilter::find` do case analysis to determine which algorithm was +/// selected and invoke it. However, I've observed that this leads to poor +/// codegen in some cases, especially in latency sensitive benchmarks. That is, +/// this approach comes with overhead that I wasn't able to eliminate. +/// +/// The second obvious approach is to use dynamic dispatch with traits. Doing +/// that in this context where `Prefilter` owns the selection generally +/// requires heap allocation, and this code is designed to run in core-only +/// environments. +/// +/// So we settle on using a union (that's `PrefilterKind`) and a function +/// pointer (that's `PrefilterKindFn`). We select the right function pointer +/// based on which field in the union we set, and that function in turn +/// knows which field of the union to access. The downside of this approach +/// is that it forces us to think about safety, but the upside is that +/// there are some nice latency improvements to benchmarks. (Especially the +/// `memmem/sliceslice/short` benchmark.) +/// +/// In cases where we've selected a vector algorithm and the haystack given +/// is too short, we fallback to the scalar version of `memchr` on the +/// `rarest_byte`. (The scalar version of `memchr` is still better than a naive +/// byte-at-a-time loop because it will read in `usize`-sized chunks at a +/// time.) +#[derive(Clone, Copy)] +struct Prefilter { + call: PrefilterKindFn, + kind: PrefilterKind, + rarest_byte: u8, + rarest_offset: u8, +} + +impl Prefilter { + /// Return a "fallback" prefilter, but only if it is believed to be + /// effective. + #[inline] + fn fallback( + ranker: R, + pair: Pair, + needle: &[u8], + ) -> Option { + /// The maximum frequency rank permitted for the fallback prefilter. + /// If the rarest byte in the needle has a frequency rank above this + /// value, then no prefilter is used if the fallback prefilter would + /// otherwise be selected. + const MAX_FALLBACK_RANK: u8 = 250; + + trace!("building fallback prefilter"); + let rarest_offset = pair.index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + let rarest_rank = ranker.rank(rarest_byte); + if rarest_rank > MAX_FALLBACK_RANK { + None + } else { + let finder = crate::arch::all::packedpair::Finder::with_pair( + needle, + pair.clone(), + )?; + let call = prefilter_kind_fallback; + let kind = PrefilterKind { fallback: finder }; + Some(Prefilter { call, kind, rarest_byte, rarest_offset }) + } + } + + /// Return a prefilter using a x86_64 SSE2 vector algorithm. + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + #[inline] + fn sse2(finder: sse2::Finder, needle: &[u8]) -> Prefilter { + trace!("building x86_64 SSE2 prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_sse2, + kind: PrefilterKind { sse2: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a prefilter using a x86_64 AVX2 vector algorithm. + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + #[inline] + fn avx2(finder: avx2::Finder, needle: &[u8]) -> Prefilter { + trace!("building x86_64 AVX2 prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_avx2, + kind: PrefilterKind { avx2: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a prefilter using a wasm32 simd128 vector algorithm. + #[cfg(target_arch = "wasm32")] + #[inline] + fn simd128(finder: simd128::Finder, needle: &[u8]) -> Prefilter { + trace!("building wasm32 simd128 prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_simd128, + kind: PrefilterKind { simd128: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a prefilter using a aarch64 neon vector algorithm. + #[cfg(target_arch = "aarch64")] + #[inline] + fn neon(finder: neon::Finder, needle: &[u8]) -> Prefilter { + trace!("building aarch64 neon prefilter"); + let rarest_offset = finder.pair().index1(); + let rarest_byte = needle[usize::from(rarest_offset)]; + Prefilter { + call: prefilter_kind_neon, + kind: PrefilterKind { neon: finder }, + rarest_byte, + rarest_offset, + } + } + + /// Return a *candidate* position for a match. + /// + /// When this returns an offset, it implies that a match could begin at + /// that offset, but it may not. That is, it is possible for a false + /// positive to be returned. + /// + /// When `None` is returned, then it is guaranteed that there are no + /// matches for the needle in the given haystack. That is, it is impossible + /// for a false negative to be returned. + /// + /// The purpose of this routine is to look for candidate matching positions + /// as quickly as possible before running a (likely) slower confirmation + /// step. + #[inline] + fn find(&self, haystack: &[u8]) -> Option { + // SAFETY: By construction, we've ensured that the function in + // `self.call` is properly paired with the union used in `self.kind`. + unsafe { (self.call)(self, haystack) } + } + + /// A "simple" prefilter that just looks for the occurrence of the rarest + /// byte from the needle. This is generally only used for very small + /// haystacks. + #[inline] + fn find_simple(&self, haystack: &[u8]) -> Option { + // We don't use crate::memchr here because the haystack should be small + // enough that memchr won't be able to use vector routines anyway. So + // we just skip straight to the fallback implementation which is likely + // faster. (A byte-at-a-time loop is only used when the haystack is + // smaller than `size_of::()`.) + crate::arch::all::memchr::One::new(self.rarest_byte) + .find(haystack) + .map(|i| i.saturating_sub(usize::from(self.rarest_offset))) + } +} + +impl core::fmt::Debug for Prefilter { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("Prefilter") + .field("call", &"") + .field("kind", &"") + .field("rarest_byte", &self.rarest_byte) + .field("rarest_offset", &self.rarest_offset) + .finish() + } +} + +/// A union indicating one of several possible prefilters that are in active +/// use. +/// +/// This union should only be read by one of the functions prefixed with +/// `prefilter_kind_`. Namely, the correct function is meant to be paired with +/// the union by the caller, such that the function always reads from the +/// designated union field. +#[derive(Clone, Copy)] +union PrefilterKind { + fallback: crate::arch::all::packedpair::Finder, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + sse2: crate::arch::x86_64::sse2::packedpair::Finder, + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + avx2: crate::arch::x86_64::avx2::packedpair::Finder, + #[cfg(target_arch = "wasm32")] + simd128: crate::arch::wasm32::simd128::packedpair::Finder, + #[cfg(target_arch = "aarch64")] + neon: crate::arch::aarch64::neon::packedpair::Finder, +} + +/// The type of a prefilter function. +/// +/// # Safety +/// +/// When using a function of this type, callers must ensure that the correct +/// function is paired with the value populated in `PrefilterKind` union. +type PrefilterKindFn = + unsafe fn(strat: &Prefilter, haystack: &[u8]) -> Option; + +/// Reads from the `fallback` field of `PrefilterKind` to execute the fallback +/// prefilter. Works on all platforms. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.fallback` union field is set. +unsafe fn prefilter_kind_fallback( + strat: &Prefilter, + haystack: &[u8], +) -> Option { + strat.kind.fallback.find_prefilter(haystack) +} + +/// Reads from the `sse2` field of `PrefilterKind` to execute the x86_64 SSE2 +/// prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.sse2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn prefilter_kind_sse2( + strat: &Prefilter, + haystack: &[u8], +) -> Option { + let finder = &strat.kind.sse2; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// Reads from the `avx2` field of `PrefilterKind` to execute the x86_64 AVX2 +/// prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.avx2` union field is set. +#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +unsafe fn prefilter_kind_avx2( + strat: &Prefilter, + haystack: &[u8], +) -> Option { + let finder = &strat.kind.avx2; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// Reads from the `simd128` field of `PrefilterKind` to execute the wasm32 +/// simd128 prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.simd128` union field is set. +#[cfg(target_arch = "wasm32")] +unsafe fn prefilter_kind_simd128( + strat: &Prefilter, + haystack: &[u8], +) -> Option { + let finder = &strat.kind.simd128; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// Reads from the `neon` field of `PrefilterKind` to execute the aarch64 neon +/// prefilter. +/// +/// # Safety +/// +/// Callers must ensure that the `strat.kind.neon` union field is set. +#[cfg(target_arch = "aarch64")] +unsafe fn prefilter_kind_neon( + strat: &Prefilter, + haystack: &[u8], +) -> Option { + let finder = &strat.kind.neon; + if haystack.len() < finder.min_haystack_len() { + strat.find_simple(haystack) + } else { + finder.find_prefilter(haystack) + } +} + +/// PrefilterState tracks state associated with the effectiveness of a +/// prefilter. It is used to track how many bytes, on average, are skipped by +/// the prefilter. If this average dips below a certain threshold over time, +/// then the state renders the prefilter inert and stops using it. +/// +/// A prefilter state should be created for each search. (Where creating an +/// iterator is treated as a single search.) A prefilter state should only be +/// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert +/// `PrefilterState`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct PrefilterState { + /// The number of skips that has been executed. This is always 1 greater + /// than the actual number of skips. The special sentinel value of 0 + /// indicates that the prefilter is inert. This is useful to avoid + /// additional checks to determine whether the prefilter is still + /// "effective." Once a prefilter becomes inert, it should no longer be + /// used (according to our heuristics). + skips: u32, + /// The total number of bytes that have been skipped. + skipped: u32, +} + +impl PrefilterState { + /// The minimum number of skip attempts to try before considering whether + /// a prefilter is effective or not. + const MIN_SKIPS: u32 = 50; + + /// The minimum amount of bytes that skipping must average. + /// + /// This value was chosen based on varying it and checking + /// the microbenchmarks. In particular, this can impact the + /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set + /// too low. + const MIN_SKIP_BYTES: u32 = 8; + + /// Create a fresh prefilter state. + #[inline] + pub(crate) fn new() -> PrefilterState { + PrefilterState { skips: 1, skipped: 0 } + } + + /// Update this state with the number of bytes skipped on the last + /// invocation of the prefilter. + #[inline] + fn update(&mut self, skipped: usize) { + self.skips = self.skips.saturating_add(1); + // We need to do this dance since it's technically possible for + // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the + // size of a prefilter state.) + self.skipped = match u32::try_from(skipped) { + Err(_) => core::u32::MAX, + Ok(skipped) => self.skipped.saturating_add(skipped), + }; + } + + /// Return true if and only if this state indicates that a prefilter is + /// still effective. + #[inline] + fn is_effective(&mut self) -> bool { + if self.is_inert() { + return false; + } + if self.skips() < PrefilterState::MIN_SKIPS { + return true; + } + if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() { + return true; + } + + // We're inert. + self.skips = 0; + false + } + + /// Returns true if the prefilter this state represents should no longer + /// be used. + #[inline] + fn is_inert(&self) -> bool { + self.skips == 0 + } + + /// Returns the total number of times the prefilter has been used. + #[inline] + fn skips(&self) -> u32 { + // Remember, `0` is a sentinel value indicating inertness, so we + // always need to subtract `1` to get our actual number of skips. + self.skips.saturating_sub(1) + } +} + +/// A combination of prefilter effectiveness state and the prefilter itself. +#[derive(Debug)] +pub(crate) struct Pre<'a> { + /// State that tracks the effectiveness of a prefilter. + prestate: &'a mut PrefilterState, + /// The actual prefilter. + prestrat: &'a Prefilter, +} + +impl<'a> Pre<'a> { + /// Call this prefilter on the given haystack with the given needle. + #[inline] + pub(crate) fn find(&mut self, haystack: &[u8]) -> Option { + let result = self.prestrat.find(haystack); + self.prestate.update(result.unwrap_or(haystack.len())); + result + } + + /// Return true if and only if this prefilter should be used. + #[inline] + pub(crate) fn is_effective(&mut self) -> bool { + self.prestate.is_effective() + } +} + +/// Returns true if the needle has the right characteristics for a vector +/// algorithm to handle the entirety of substring search. +/// +/// Vector algorithms can be used for prefilters for other substring search +/// algorithms (like Two-Way), but they can also be used for substring search +/// on their own. When used for substring search, vector algorithms will +/// quickly identify candidate match positions (just like in the prefilter +/// case), but instead of returning the candidate position they will try to +/// confirm the match themselves. Confirmation happens via `memcmp`. This +/// works well for short needles, but can break down when many false candidate +/// positions are generated for large needles. Thus, we only permit vector +/// algorithms to own substring search when the needle is of a certain length. +#[inline] +fn do_packed_search(needle: &[u8]) -> bool { + /// The minimum length of a needle required for this algorithm. The minimum + /// is 2 since a length of 1 should just use memchr and a length of 0 isn't + /// a case handled by this searcher. + const MIN_LEN: usize = 2; + + /// The maximum length of a needle required for this algorithm. + /// + /// In reality, there is no hard max here. The code below can handle any + /// length needle. (Perhaps that suggests there are missing optimizations.) + /// Instead, this is a heuristic and a bound guaranteeing our linear time + /// complexity. + /// + /// It is a heuristic because when a candidate match is found, memcmp is + /// run. For very large needles with lots of false positives, memcmp can + /// make the code run quite slow. + /// + /// It is a bound because the worst case behavior with memcmp is + /// multiplicative in the size of the needle and haystack, and we want + /// to keep that additive. This bound ensures we still meet that bound + /// theoretically, since it's just a constant. We aren't acting in bad + /// faith here, memcmp on tiny needles is so fast that even in pathological + /// cases (see pathological vector benchmarks), this is still just as fast + /// or faster in practice. + /// + /// This specific number was chosen by tweaking a bit and running + /// benchmarks. The rare-medium-needle, for example, gets about 5% faster + /// by using this algorithm instead of a prefilter-accelerated Two-Way. + /// There's also a theoretical desire to keep this number reasonably + /// low, to mitigate the impact of pathological cases. I did try 64, and + /// some benchmarks got a little better, and others (particularly the + /// pathological ones), got a lot worse. So... 32 it is? + const MAX_LEN: usize = 32; + MIN_LEN <= needle.len() && needle.len() <= MAX_LEN +} diff --git a/vendor/memchr/src/memmem/util.rs b/vendor/memchr/src/memmem/util.rs deleted file mode 100644 index de0e385..0000000 --- a/vendor/memchr/src/memmem/util.rs +++ /dev/null @@ -1,88 +0,0 @@ -// These routines are meant to be optimized specifically for low latency as -// compared to the equivalent routines offered by std. (Which may invoke the -// dynamic linker and call out to libc, which introduces a bit more latency -// than we'd like.) - -/// Returns true if and only if needle is a prefix of haystack. -#[inline(always)] -pub(crate) fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { - needle.len() <= haystack.len() && memcmp(&haystack[..needle.len()], needle) -} - -/// Returns true if and only if needle is a suffix of haystack. -#[inline(always)] -pub(crate) fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { - needle.len() <= haystack.len() - && memcmp(&haystack[haystack.len() - needle.len()..], needle) -} - -/// Return true if and only if x.len() == y.len() && x[i] == y[i] for all -/// 0 <= i < x.len(). -/// -/// Why not just use actual memcmp for this? Well, memcmp requires calling out -/// to libc, and this routine is called in fairly hot code paths. Other than -/// just calling out to libc, it also seems to result in worse codegen. By -/// rolling our own memcmp in pure Rust, it seems to appear more friendly to -/// the optimizer. -/// -/// We mark this as inline always, although, some callers may not want it -/// inlined for better codegen (like Rabin-Karp). In that case, callers are -/// advised to create a non-inlineable wrapper routine that calls memcmp. -#[inline(always)] -pub(crate) fn memcmp(x: &[u8], y: &[u8]) -> bool { - if x.len() != y.len() { - return false; - } - // If we don't have enough bytes to do 4-byte at a time loads, then - // fall back to the naive slow version. - // - // TODO: We could do a copy_nonoverlapping combined with a mask instead - // of a loop. Benchmark it. - if x.len() < 4 { - for (&b1, &b2) in x.iter().zip(y) { - if b1 != b2 { - return false; - } - } - return true; - } - // When we have 4 or more bytes to compare, then proceed in chunks of 4 at - // a time using unaligned loads. - // - // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is - // that this particular version of memcmp is likely to be called with tiny - // needles. That means that if we do 8 byte loads, then a higher proportion - // of memcmp calls will use the slower variant above. With that said, this - // is a hypothesis and is only loosely supported by benchmarks. There's - // likely some improvement that could be made here. The main thing here - // though is to optimize for latency, not throughput. - - // SAFETY: Via the conditional above, we know that both `px` and `py` - // have the same length, so `px < pxend` implies that `py < pyend`. - // Thus, derefencing both `px` and `py` in the loop below is safe. - // - // Moreover, we set `pxend` and `pyend` to be 4 bytes before the actual - // end of of `px` and `py`. Thus, the final dereference outside of the - // loop is guaranteed to be valid. (The final comparison will overlap with - // the last comparison done in the loop for lengths that aren't multiples - // of four.) - // - // Finally, we needn't worry about alignment here, since we do unaligned - // loads. - unsafe { - let (mut px, mut py) = (x.as_ptr(), y.as_ptr()); - let (pxend, pyend) = (px.add(x.len() - 4), py.add(y.len() - 4)); - while px < pxend { - let vx = (px as *const u32).read_unaligned(); - let vy = (py as *const u32).read_unaligned(); - if vx != vy { - return false; - } - px = px.add(4); - py = py.add(4); - } - let vx = (pxend as *const u32).read_unaligned(); - let vy = (pyend as *const u32).read_unaligned(); - vx == vy - } -} diff --git a/vendor/memchr/src/memmem/vector.rs b/vendor/memchr/src/memmem/vector.rs deleted file mode 100644 index b81165f..0000000 --- a/vendor/memchr/src/memmem/vector.rs +++ /dev/null @@ -1,131 +0,0 @@ -/// A trait for describing vector operations used by vectorized searchers. -/// -/// The trait is highly constrained to low level vector operations needed. In -/// general, it was invented mostly to be generic over x86's __m128i and -/// __m256i types. It's likely that once std::simd becomes a thing, we can -/// migrate to that since the operations required are quite simple. -/// -/// TODO: Consider moving this trait up a level and using it to implement -/// memchr as well. The trait might need to grow one or two methods, but -/// otherwise should be close to sufficient already. -/// -/// # Safety -/// -/// All methods are not safe since they are intended to be implemented using -/// vendor intrinsics, which are also not safe. Callers must ensure that the -/// appropriate target features are enabled in the calling function, and that -/// the current CPU supports them. All implementations should avoid marking the -/// routines with #[target_feature] and instead mark them as #[inline(always)] -/// to ensure they get appropriately inlined. (inline(always) cannot be used -/// with target_feature.) -pub(crate) trait Vector: Copy + core::fmt::Debug { - /// _mm_set1_epi8 or _mm256_set1_epi8 - unsafe fn splat(byte: u8) -> Self; - /// _mm_loadu_si128 or _mm256_loadu_si256 - unsafe fn load_unaligned(data: *const u8) -> Self; - /// _mm_movemask_epi8 or _mm256_movemask_epi8 - unsafe fn movemask(self) -> u32; - /// _mm_cmpeq_epi8 or _mm256_cmpeq_epi8 - unsafe fn cmpeq(self, vector2: Self) -> Self; - /// _mm_and_si128 or _mm256_and_si256 - unsafe fn and(self, vector2: Self) -> Self; -} - -#[cfg(target_arch = "x86_64")] -mod x86sse { - use super::Vector; - use core::arch::x86_64::*; - - impl Vector for __m128i { - #[inline(always)] - unsafe fn splat(byte: u8) -> __m128i { - _mm_set1_epi8(byte as i8) - } - - #[inline(always)] - unsafe fn load_unaligned(data: *const u8) -> __m128i { - _mm_loadu_si128(data as *const __m128i) - } - - #[inline(always)] - unsafe fn movemask(self) -> u32 { - _mm_movemask_epi8(self) as u32 - } - - #[inline(always)] - unsafe fn cmpeq(self, vector2: Self) -> __m128i { - _mm_cmpeq_epi8(self, vector2) - } - - #[inline(always)] - unsafe fn and(self, vector2: Self) -> __m128i { - _mm_and_si128(self, vector2) - } - } -} - -#[cfg(all(feature = "std", target_arch = "x86_64"))] -mod x86avx { - use super::Vector; - use core::arch::x86_64::*; - - impl Vector for __m256i { - #[inline(always)] - unsafe fn splat(byte: u8) -> __m256i { - _mm256_set1_epi8(byte as i8) - } - - #[inline(always)] - unsafe fn load_unaligned(data: *const u8) -> __m256i { - _mm256_loadu_si256(data as *const __m256i) - } - - #[inline(always)] - unsafe fn movemask(self) -> u32 { - _mm256_movemask_epi8(self) as u32 - } - - #[inline(always)] - unsafe fn cmpeq(self, vector2: Self) -> __m256i { - _mm256_cmpeq_epi8(self, vector2) - } - - #[inline(always)] - unsafe fn and(self, vector2: Self) -> __m256i { - _mm256_and_si256(self, vector2) - } - } -} - -#[cfg(target_arch = "wasm32")] -mod wasm_simd128 { - use super::Vector; - use core::arch::wasm32::*; - - impl Vector for v128 { - #[inline(always)] - unsafe fn splat(byte: u8) -> v128 { - u8x16_splat(byte) - } - - #[inline(always)] - unsafe fn load_unaligned(data: *const u8) -> v128 { - v128_load(data.cast()) - } - - #[inline(always)] - unsafe fn movemask(self) -> u32 { - u8x16_bitmask(self).into() - } - - #[inline(always)] - unsafe fn cmpeq(self, vector2: Self) -> v128 { - u8x16_eq(self, vector2) - } - - #[inline(always)] - unsafe fn and(self, vector2: Self) -> v128 { - v128_and(self, vector2) - } - } -} diff --git a/vendor/memchr/src/memmem/wasm.rs b/vendor/memchr/src/memmem/wasm.rs deleted file mode 100644 index 4e3ea98..0000000 --- a/vendor/memchr/src/memmem/wasm.rs +++ /dev/null @@ -1,75 +0,0 @@ -use core::arch::wasm32::v128; - -use crate::memmem::{genericsimd, NeedleInfo}; - -/// A `v128` accelerated vectorized substring search routine that only works on -/// small needles. -#[derive(Clone, Copy, Debug)] -pub(crate) struct Forward(genericsimd::Forward); - -impl Forward { - /// Create a new "generic simd" forward searcher. If one could not be - /// created from the given inputs, then None is returned. - pub(crate) fn new(ninfo: &NeedleInfo, needle: &[u8]) -> Option { - if !cfg!(memchr_runtime_simd) { - return None; - } - genericsimd::Forward::new(ninfo, needle).map(Forward) - } - - /// Returns the minimum length of haystack that is needed for this searcher - /// to work. Passing a haystack with a length smaller than this will cause - /// `find` to panic. - #[inline(always)] - pub(crate) fn min_haystack_len(&self) -> usize { - self.0.min_haystack_len::() - } - - #[inline(always)] - pub(crate) fn find( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - self.find_impl(haystack, needle) - } - - /// The implementation of find marked with the appropriate target feature. - #[target_feature(enable = "simd128")] - fn find_impl(&self, haystack: &[u8], needle: &[u8]) -> Option { - unsafe { genericsimd::fwd_find::(&self.0, haystack, needle) } - } -} - -#[cfg(all(test, feature = "std", not(miri)))] -mod tests { - use crate::memmem::{prefilter::PrefilterState, NeedleInfo}; - - fn find( - _: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], - ) -> Option { - super::Forward::new(ninfo, needle).unwrap().find(haystack, needle) - } - - #[test] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - - unsafe { - PrefilterTest::run_all_tests_filter(find, |t| { - // This substring searcher only works on certain configs, so - // filter our tests such that Forward::new will be guaranteed - // to succeed. (And also remove tests with a haystack that is - // too small.) - let fwd = match super::Forward::new(&t.ninfo, &t.needle) { - None => return false, - Some(fwd) => fwd, - }; - t.haystack.len() >= fwd.min_haystack_len() - }) - } - } -} diff --git a/vendor/memchr/src/memmem/x86/avx.rs b/vendor/memchr/src/memmem/x86/avx.rs deleted file mode 100644 index ce168dd..0000000 --- a/vendor/memchr/src/memmem/x86/avx.rs +++ /dev/null @@ -1,139 +0,0 @@ -#[cfg(not(feature = "std"))] -pub(crate) use self::nostd::Forward; -#[cfg(feature = "std")] -pub(crate) use self::std::Forward; - -#[cfg(feature = "std")] -mod std { - use core::arch::x86_64::{__m128i, __m256i}; - - use crate::memmem::{genericsimd, NeedleInfo}; - - /// An AVX accelerated vectorized substring search routine that only works - /// on small needles. - #[derive(Clone, Copy, Debug)] - pub(crate) struct Forward(genericsimd::Forward); - - impl Forward { - /// Create a new "generic simd" forward searcher. If one could not be - /// created from the given inputs, then None is returned. - pub(crate) fn new( - ninfo: &NeedleInfo, - needle: &[u8], - ) -> Option { - if !cfg!(memchr_runtime_avx) || !is_x86_feature_detected!("avx2") { - return None; - } - genericsimd::Forward::new(ninfo, needle).map(Forward) - } - - /// Returns the minimum length of haystack that is needed for this - /// searcher to work. Passing a haystack with a length smaller than - /// this will cause `find` to panic. - #[inline(always)] - pub(crate) fn min_haystack_len(&self) -> usize { - self.0.min_haystack_len::<__m128i>() - } - - #[inline(always)] - pub(crate) fn find( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - // SAFETY: The only way a Forward value can exist is if the avx2 - // target feature is enabled. This is the only safety requirement - // for calling the genericsimd searcher. - unsafe { self.find_impl(haystack, needle) } - } - - /// The implementation of find marked with the appropriate target - /// feature. - /// - /// # Safety - /// - /// Callers must ensure that the avx2 CPU feature is enabled in the - /// current environment. - #[target_feature(enable = "avx2")] - unsafe fn find_impl( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - if haystack.len() < self.0.min_haystack_len::<__m256i>() { - genericsimd::fwd_find::<__m128i>(&self.0, haystack, needle) - } else { - genericsimd::fwd_find::<__m256i>(&self.0, haystack, needle) - } - } - } -} - -// We still define the avx "forward" type on nostd to make caller code a bit -// simpler. This avoids needing a lot more conditional compilation. -#[cfg(not(feature = "std"))] -mod nostd { - use crate::memmem::NeedleInfo; - - #[derive(Clone, Copy, Debug)] - pub(crate) struct Forward(()); - - impl Forward { - pub(crate) fn new( - ninfo: &NeedleInfo, - needle: &[u8], - ) -> Option { - None - } - - pub(crate) fn min_haystack_len(&self) -> usize { - unreachable!() - } - - pub(crate) fn find( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - unreachable!() - } - } -} - -#[cfg(all(test, feature = "std", not(miri)))] -mod tests { - use crate::memmem::{prefilter::PrefilterState, NeedleInfo}; - - fn find( - _: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], - ) -> Option { - super::Forward::new(ninfo, needle).unwrap().find(haystack, needle) - } - - #[test] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - - if !is_x86_feature_detected!("avx2") { - return; - } - // SAFETY: The safety of find only requires that the current CPU - // support AVX2, which we checked above. - unsafe { - PrefilterTest::run_all_tests_filter(find, |t| { - // This substring searcher only works on certain configs, so - // filter our tests such that Forward::new will be guaranteed - // to succeed. (And also remove tests with a haystack that is - // too small.) - let fwd = match super::Forward::new(&t.ninfo, &t.needle) { - None => return false, - Some(fwd) => fwd, - }; - t.haystack.len() >= fwd.min_haystack_len() - }) - } - } -} diff --git a/vendor/memchr/src/memmem/x86/mod.rs b/vendor/memchr/src/memmem/x86/mod.rs deleted file mode 100644 index c1cc73f..0000000 --- a/vendor/memchr/src/memmem/x86/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub(crate) mod avx; -pub(crate) mod sse; diff --git a/vendor/memchr/src/memmem/x86/sse.rs b/vendor/memchr/src/memmem/x86/sse.rs deleted file mode 100644 index 22e7d99..0000000 --- a/vendor/memchr/src/memmem/x86/sse.rs +++ /dev/null @@ -1,89 +0,0 @@ -use core::arch::x86_64::__m128i; - -use crate::memmem::{genericsimd, NeedleInfo}; - -/// An SSE accelerated vectorized substring search routine that only works on -/// small needles. -#[derive(Clone, Copy, Debug)] -pub(crate) struct Forward(genericsimd::Forward); - -impl Forward { - /// Create a new "generic simd" forward searcher. If one could not be - /// created from the given inputs, then None is returned. - pub(crate) fn new(ninfo: &NeedleInfo, needle: &[u8]) -> Option { - if !cfg!(memchr_runtime_sse2) { - return None; - } - genericsimd::Forward::new(ninfo, needle).map(Forward) - } - - /// Returns the minimum length of haystack that is needed for this searcher - /// to work. Passing a haystack with a length smaller than this will cause - /// `find` to panic. - #[inline(always)] - pub(crate) fn min_haystack_len(&self) -> usize { - self.0.min_haystack_len::<__m128i>() - } - - #[inline(always)] - pub(crate) fn find( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - // SAFETY: sse2 is enabled on all x86_64 targets, so this is always - // safe to call. - unsafe { self.find_impl(haystack, needle) } - } - - /// The implementation of find marked with the appropriate target feature. - /// - /// # Safety - /// - /// This is safe to call in all cases since sse2 is guaranteed to be part - /// of x86_64. It is marked as unsafe because of the target feature - /// attribute. - #[target_feature(enable = "sse2")] - unsafe fn find_impl( - &self, - haystack: &[u8], - needle: &[u8], - ) -> Option { - genericsimd::fwd_find::<__m128i>(&self.0, haystack, needle) - } -} - -#[cfg(all(test, feature = "std", not(miri)))] -mod tests { - use crate::memmem::{prefilter::PrefilterState, NeedleInfo}; - - fn find( - _: &mut PrefilterState, - ninfo: &NeedleInfo, - haystack: &[u8], - needle: &[u8], - ) -> Option { - super::Forward::new(ninfo, needle).unwrap().find(haystack, needle) - } - - #[test] - fn prefilter_permutations() { - use crate::memmem::prefilter::tests::PrefilterTest; - - // SAFETY: sse2 is enabled on all x86_64 targets, so this is always - // safe to call. - unsafe { - PrefilterTest::run_all_tests_filter(find, |t| { - // This substring searcher only works on certain configs, so - // filter our tests such that Forward::new will be guaranteed - // to succeed. (And also remove tests with a haystack that is - // too small.) - let fwd = match super::Forward::new(&t.ninfo, &t.needle) { - None => return false, - Some(fwd) => fwd, - }; - t.haystack.len() >= fwd.min_haystack_len() - }) - } - } -} diff --git a/vendor/memchr/src/tests/memchr/iter.rs b/vendor/memchr/src/tests/memchr/iter.rs deleted file mode 100644 index 80ea5c2..0000000 --- a/vendor/memchr/src/tests/memchr/iter.rs +++ /dev/null @@ -1,230 +0,0 @@ -use quickcheck::quickcheck; - -use crate::{tests::memchr::testdata::memchr_tests, Memchr, Memchr2, Memchr3}; - -#[test] -fn memchr1_iter() { - for test in memchr_tests() { - test.iter_one(false, Memchr::new); - } -} - -#[test] -fn memchr2_iter() { - for test in memchr_tests() { - test.iter_two(false, Memchr2::new); - } -} - -#[test] -fn memchr3_iter() { - for test in memchr_tests() { - test.iter_three(false, Memchr3::new); - } -} - -#[test] -fn memrchr1_iter() { - for test in memchr_tests() { - test.iter_one(true, |n1, corpus| Memchr::new(n1, corpus).rev()); - } -} - -#[test] -fn memrchr2_iter() { - for test in memchr_tests() { - test.iter_two(true, |n1, n2, corpus| { - Memchr2::new(n1, n2, corpus).rev() - }) - } -} - -#[test] -fn memrchr3_iter() { - for test in memchr_tests() { - test.iter_three(true, |n1, n2, n3, corpus| { - Memchr3::new(n1, n2, n3, corpus).rev() - }) - } -} - -quickcheck! { - fn qc_memchr_double_ended_iter( - needle: u8, data: Vec, take_side: Vec - ) -> bool { - // make nonempty - let mut take_side = take_side; - if take_side.is_empty() { take_side.push(true) }; - - let iter = Memchr::new(needle, &data); - let all_found = double_ended_take( - iter, take_side.iter().cycle().cloned()); - - all_found.iter().cloned().eq(positions1(needle, &data)) - } - - fn qc_memchr2_double_ended_iter( - needle1: u8, needle2: u8, data: Vec, take_side: Vec - ) -> bool { - // make nonempty - let mut take_side = take_side; - if take_side.is_empty() { take_side.push(true) }; - - let iter = Memchr2::new(needle1, needle2, &data); - let all_found = double_ended_take( - iter, take_side.iter().cycle().cloned()); - - all_found.iter().cloned().eq(positions2(needle1, needle2, &data)) - } - - fn qc_memchr3_double_ended_iter( - needle1: u8, needle2: u8, needle3: u8, - data: Vec, take_side: Vec - ) -> bool { - // make nonempty - let mut take_side = take_side; - if take_side.is_empty() { take_side.push(true) }; - - let iter = Memchr3::new(needle1, needle2, needle3, &data); - let all_found = double_ended_take( - iter, take_side.iter().cycle().cloned()); - - all_found - .iter() - .cloned() - .eq(positions3(needle1, needle2, needle3, &data)) - } - - fn qc_memchr1_iter(data: Vec) -> bool { - let needle = 0; - let answer = positions1(needle, &data); - answer.eq(Memchr::new(needle, &data)) - } - - fn qc_memchr1_rev_iter(data: Vec) -> bool { - let needle = 0; - let answer = positions1(needle, &data); - answer.rev().eq(Memchr::new(needle, &data).rev()) - } - - fn qc_memchr2_iter(data: Vec) -> bool { - let needle1 = 0; - let needle2 = 1; - let answer = positions2(needle1, needle2, &data); - answer.eq(Memchr2::new(needle1, needle2, &data)) - } - - fn qc_memchr2_rev_iter(data: Vec) -> bool { - let needle1 = 0; - let needle2 = 1; - let answer = positions2(needle1, needle2, &data); - answer.rev().eq(Memchr2::new(needle1, needle2, &data).rev()) - } - - fn qc_memchr3_iter(data: Vec) -> bool { - let needle1 = 0; - let needle2 = 1; - let needle3 = 2; - let answer = positions3(needle1, needle2, needle3, &data); - answer.eq(Memchr3::new(needle1, needle2, needle3, &data)) - } - - fn qc_memchr3_rev_iter(data: Vec) -> bool { - let needle1 = 0; - let needle2 = 1; - let needle3 = 2; - let answer = positions3(needle1, needle2, needle3, &data); - answer.rev().eq(Memchr3::new(needle1, needle2, needle3, &data).rev()) - } - - fn qc_memchr1_iter_size_hint(data: Vec) -> bool { - // test that the size hint is within reasonable bounds - let needle = 0; - let mut iter = Memchr::new(needle, &data); - let mut real_count = data - .iter() - .filter(|&&elt| elt == needle) - .count(); - - while let Some(index) = iter.next() { - real_count -= 1; - let (lower, upper) = iter.size_hint(); - assert!(lower <= real_count); - assert!(upper.unwrap() >= real_count); - assert!(upper.unwrap() <= data.len() - index); - } - true - } -} - -// take items from a DEI, taking front for each true and back for each false. -// Return a vector with the concatenation of the fronts and the reverse of the -// backs. -fn double_ended_take(mut iter: I, take_side: J) -> Vec -where - I: DoubleEndedIterator, - J: Iterator, -{ - let mut found_front = Vec::new(); - let mut found_back = Vec::new(); - - for take_front in take_side { - if take_front { - if let Some(pos) = iter.next() { - found_front.push(pos); - } else { - break; - } - } else { - if let Some(pos) = iter.next_back() { - found_back.push(pos); - } else { - break; - } - }; - } - - let mut all_found = found_front; - all_found.extend(found_back.into_iter().rev()); - all_found -} - -// return an iterator of the 0-based indices of haystack that match the needle -fn positions1<'a>( - n1: u8, - haystack: &'a [u8], -) -> Box + 'a> { - let it = haystack - .iter() - .enumerate() - .filter(move |&(_, &b)| b == n1) - .map(|t| t.0); - Box::new(it) -} - -fn positions2<'a>( - n1: u8, - n2: u8, - haystack: &'a [u8], -) -> Box + 'a> { - let it = haystack - .iter() - .enumerate() - .filter(move |&(_, &b)| b == n1 || b == n2) - .map(|t| t.0); - Box::new(it) -} - -fn positions3<'a>( - n1: u8, - n2: u8, - n3: u8, - haystack: &'a [u8], -) -> Box + 'a> { - let it = haystack - .iter() - .enumerate() - .filter(move |&(_, &b)| b == n1 || b == n2 || b == n3) - .map(|t| t.0); - Box::new(it) -} diff --git a/vendor/memchr/src/tests/memchr/memchr.rs b/vendor/memchr/src/tests/memchr/memchr.rs deleted file mode 100644 index ac955ed..0000000 --- a/vendor/memchr/src/tests/memchr/memchr.rs +++ /dev/null @@ -1,134 +0,0 @@ -use quickcheck::quickcheck; - -use crate::{ - memchr, - memchr::{fallback, naive}, - memchr2, memchr3, memrchr, memrchr2, memrchr3, - tests::memchr::testdata::memchr_tests, -}; - -#[test] -fn memchr1_find() { - for test in memchr_tests() { - test.one(false, memchr); - } -} - -#[test] -fn memchr1_fallback_find() { - for test in memchr_tests() { - test.one(false, fallback::memchr); - } -} - -#[test] -fn memchr2_find() { - for test in memchr_tests() { - test.two(false, memchr2); - } -} - -#[test] -fn memchr2_fallback_find() { - for test in memchr_tests() { - test.two(false, fallback::memchr2); - } -} - -#[test] -fn memchr3_find() { - for test in memchr_tests() { - test.three(false, memchr3); - } -} - -#[test] -fn memchr3_fallback_find() { - for test in memchr_tests() { - test.three(false, fallback::memchr3); - } -} - -#[test] -fn memrchr1_find() { - for test in memchr_tests() { - test.one(true, memrchr); - } -} - -#[test] -fn memrchr1_fallback_find() { - for test in memchr_tests() { - test.one(true, fallback::memrchr); - } -} - -#[test] -fn memrchr2_find() { - for test in memchr_tests() { - test.two(true, memrchr2); - } -} - -#[test] -fn memrchr2_fallback_find() { - for test in memchr_tests() { - test.two(true, fallback::memrchr2); - } -} - -#[test] -fn memrchr3_find() { - for test in memchr_tests() { - test.three(true, memrchr3); - } -} - -#[test] -fn memrchr3_fallback_find() { - for test in memchr_tests() { - test.three(true, fallback::memrchr3); - } -} - -quickcheck! { - fn qc_memchr1_matches_naive(n1: u8, corpus: Vec) -> bool { - memchr(n1, &corpus) == naive::memchr(n1, &corpus) - } -} - -quickcheck! { - fn qc_memchr2_matches_naive(n1: u8, n2: u8, corpus: Vec) -> bool { - memchr2(n1, n2, &corpus) == naive::memchr2(n1, n2, &corpus) - } -} - -quickcheck! { - fn qc_memchr3_matches_naive( - n1: u8, n2: u8, n3: u8, - corpus: Vec - ) -> bool { - memchr3(n1, n2, n3, &corpus) == naive::memchr3(n1, n2, n3, &corpus) - } -} - -quickcheck! { - fn qc_memrchr1_matches_naive(n1: u8, corpus: Vec) -> bool { - memrchr(n1, &corpus) == naive::memrchr(n1, &corpus) - } -} - -quickcheck! { - fn qc_memrchr2_matches_naive(n1: u8, n2: u8, corpus: Vec) -> bool { - memrchr2(n1, n2, &corpus) == naive::memrchr2(n1, n2, &corpus) - } -} - -quickcheck! { - fn qc_memrchr3_matches_naive( - n1: u8, n2: u8, n3: u8, - corpus: Vec - ) -> bool { - memrchr3(n1, n2, n3, &corpus) == naive::memrchr3(n1, n2, n3, &corpus) - } -} diff --git a/vendor/memchr/src/tests/memchr/mod.rs b/vendor/memchr/src/tests/memchr/mod.rs index 79f94ab..0564ad4 100644 --- a/vendor/memchr/src/tests/memchr/mod.rs +++ b/vendor/memchr/src/tests/memchr/mod.rs @@ -1,7 +1,307 @@ -#[cfg(all(feature = "std", not(miri)))] -mod iter; -#[cfg(all(feature = "std", not(miri)))] -mod memchr; -mod simple; -#[cfg(all(feature = "std", not(miri)))] -mod testdata; +use alloc::{ + string::{String, ToString}, + vec, + vec::Vec, +}; + +use crate::ext::Byte; + +pub(crate) mod naive; +#[macro_use] +pub(crate) mod prop; + +const SEEDS: &'static [Seed] = &[ + Seed { haystack: "a", needles: &[b'a'], positions: &[0] }, + Seed { haystack: "aa", needles: &[b'a'], positions: &[0, 1] }, + Seed { haystack: "aaa", needles: &[b'a'], positions: &[0, 1, 2] }, + Seed { haystack: "", needles: &[b'a'], positions: &[] }, + Seed { haystack: "z", needles: &[b'a'], positions: &[] }, + Seed { haystack: "zz", needles: &[b'a'], positions: &[] }, + Seed { haystack: "zza", needles: &[b'a'], positions: &[2] }, + Seed { haystack: "zaza", needles: &[b'a'], positions: &[1, 3] }, + Seed { haystack: "zzza", needles: &[b'a'], positions: &[3] }, + Seed { haystack: "\x00a", needles: &[b'a'], positions: &[1] }, + Seed { haystack: "\x00", needles: &[b'\x00'], positions: &[0] }, + Seed { haystack: "\x00\x00", needles: &[b'\x00'], positions: &[0, 1] }, + Seed { haystack: "\x00a\x00", needles: &[b'\x00'], positions: &[0, 2] }, + Seed { haystack: "zzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[16] }, + Seed { + haystack: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza", + needles: &[b'a'], + positions: &[32], + }, + // two needles (applied to memchr2 + memchr3) + Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] }, + Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] }, + Seed { haystack: "az", needles: &[b'x', b'y'], positions: &[] }, + Seed { haystack: "az", needles: &[b'a', b'y'], positions: &[0] }, + Seed { haystack: "az", needles: &[b'x', b'z'], positions: &[1] }, + Seed { haystack: "yyyyaz", needles: &[b'a', b'z'], positions: &[4, 5] }, + Seed { haystack: "yyyyaz", needles: &[b'z', b'a'], positions: &[4, 5] }, + // three needles (applied to memchr3) + Seed { + haystack: "xyz", + needles: &[b'x', b'y', b'z'], + positions: &[0, 1, 2], + }, + Seed { + haystack: "zxy", + needles: &[b'x', b'y', b'z'], + positions: &[0, 1, 2], + }, + Seed { haystack: "zxy", needles: &[b'x', b'a', b'z'], positions: &[0, 1] }, + Seed { haystack: "zxy", needles: &[b't', b'a', b'z'], positions: &[0] }, + Seed { haystack: "yxz", needles: &[b't', b'a', b'z'], positions: &[2] }, +]; + +/// Runs a host of substring search tests. +/// +/// This has support for "partial" substring search implementations only work +/// for a subset of needles/haystacks. For example, the "packed pair" substring +/// search implementation only works for haystacks of some minimum length based +/// of the pair of bytes selected and the size of the vector used. +pub(crate) struct Runner { + needle_len: usize, +} + +impl Runner { + /// Create a new test runner for forward and reverse byte search + /// implementations. + /// + /// The `needle_len` given must be at most `3` and at least `1`. It + /// corresponds to the number of needle bytes to search for. + pub(crate) fn new(needle_len: usize) -> Runner { + assert!(needle_len >= 1, "needle_len must be at least 1"); + assert!(needle_len <= 3, "needle_len must be at most 3"); + Runner { needle_len } + } + + /// Run all tests. This panics on the first failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + pub(crate) fn forward_iter(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option> + 'static, + { + for seed in SEEDS.iter() { + if seed.needles.len() > self.needle_len { + continue; + } + for t in seed.generate() { + let results = match test(t.haystack.as_bytes(), &t.needles) { + None => continue, + Some(results) => results, + }; + assert_eq!( + t.expected, + results, + "needles: {:?}, haystack: {:?}", + t.needles + .iter() + .map(|&b| b.to_char()) + .collect::>(), + t.haystack, + ); + } + } + } + + /// Run all tests in the reverse direction. This panics on the first + /// failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + pub(crate) fn reverse_iter(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option> + 'static, + { + for seed in SEEDS.iter() { + if seed.needles.len() > self.needle_len { + continue; + } + for t in seed.generate() { + let mut results = match test(t.haystack.as_bytes(), &t.needles) + { + None => continue, + Some(results) => results, + }; + results.reverse(); + assert_eq!( + t.expected, + results, + "needles: {:?}, haystack: {:?}", + t.needles + .iter() + .map(|&b| b.to_char()) + .collect::>(), + t.haystack, + ); + } + } + } + + /// Run all tests as counting tests. This panics on the first failure. + /// + /// That is, this only checks that the number of matches is correct and + /// not whether the offsets of each match are. + pub(crate) fn count_iter(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option + 'static, + { + for seed in SEEDS.iter() { + if seed.needles.len() > self.needle_len { + continue; + } + for t in seed.generate() { + let got = match test(t.haystack.as_bytes(), &t.needles) { + None => continue, + Some(got) => got, + }; + assert_eq!( + t.expected.len(), + got, + "needles: {:?}, haystack: {:?}", + t.needles + .iter() + .map(|&b| b.to_char()) + .collect::>(), + t.haystack, + ); + } + } + } + + /// Like `Runner::forward`, but for a function that returns only the next + /// match and not all matches. + /// + /// If the function returns `None`, then it is skipped. + pub(crate) fn forward_oneshot(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option> + 'static, + { + self.forward_iter(move |haystack, needles| { + let mut start = 0; + let mut results = vec![]; + while let Some(i) = test(&haystack[start..], needles)? { + results.push(start + i); + start += i + 1; + } + Some(results) + }) + } + + /// Like `Runner::reverse`, but for a function that returns only the last + /// match and not all matches. + /// + /// If the function returns `None`, then it is skipped. + pub(crate) fn reverse_oneshot(self, mut test: F) + where + F: FnMut(&[u8], &[u8]) -> Option> + 'static, + { + self.reverse_iter(move |haystack, needles| { + let mut end = haystack.len(); + let mut results = vec![]; + while let Some(i) = test(&haystack[..end], needles)? { + results.push(i); + end = i; + } + Some(results) + }) + } +} + +/// A single test for memr?chr{,2,3}. +#[derive(Clone, Debug)] +struct Test { + /// The string to search in. + haystack: String, + /// The needles to look for. + needles: Vec, + /// The offsets that are expected to be found for all needles in the + /// forward direction. + expected: Vec, +} + +impl Test { + fn new(seed: &Seed) -> Test { + Test { + haystack: seed.haystack.to_string(), + needles: seed.needles.to_vec(), + expected: seed.positions.to_vec(), + } + } +} + +/// Data that can be expanded into many memchr tests by padding out the corpus. +#[derive(Clone, Debug)] +struct Seed { + /// The thing to search. We use `&str` instead of `&[u8]` because they + /// are nicer to write in tests, and we don't miss much since memchr + /// doesn't care about UTF-8. + /// + /// Corpora cannot contain either '%' or '#'. We use these bytes when + /// expanding test cases into many test cases, and we assume they are not + /// used. If they are used, `memchr_tests` will panic. + haystack: &'static str, + /// The needles to search for. This is intended to be an alternation of + /// needles. The number of needles may cause this test to be skipped for + /// some memchr variants. For example, a test with 2 needles cannot be used + /// to test `memchr`, but can be used to test `memchr2` and `memchr3`. + /// However, a test with only 1 needle can be used to test all of `memchr`, + /// `memchr2` and `memchr3`. We achieve this by filling in the needles with + /// bytes that we never used in the corpus (such as '#'). + needles: &'static [u8], + /// The positions expected to match for all of the needles. + positions: &'static [usize], +} + +impl Seed { + /// Controls how much we expand the haystack on either side for each test. + /// We lower this on Miri because otherwise running the tests would take + /// forever. + const EXPAND_LEN: usize = { + #[cfg(not(miri))] + { + 515 + } + #[cfg(miri)] + { + 6 + } + }; + + /// Expand this test into many variations of the same test. + /// + /// In particular, this will generate more tests with larger corpus sizes. + /// The expected positions are updated to maintain the integrity of the + /// test. + /// + /// This is important in testing a memchr implementation, because there are + /// often different cases depending on the length of the corpus. + /// + /// Note that we extend the corpus by adding `%` bytes, which we + /// don't otherwise use as a needle. + fn generate(&self) -> impl Iterator { + let mut more = vec![]; + + // Add bytes to the start of the corpus. + for i in 0..Seed::EXPAND_LEN { + let mut t = Test::new(self); + let mut new: String = core::iter::repeat('%').take(i).collect(); + new.push_str(&t.haystack); + t.haystack = new; + t.expected = t.expected.into_iter().map(|p| p + i).collect(); + more.push(t); + } + // Add bytes to the end of the corpus. + for i in 1..Seed::EXPAND_LEN { + let mut t = Test::new(self); + let padding: String = core::iter::repeat('%').take(i).collect(); + t.haystack.push_str(&padding); + more.push(t); + } + + more.into_iter() + } +} diff --git a/vendor/memchr/src/tests/memchr/naive.rs b/vendor/memchr/src/tests/memchr/naive.rs new file mode 100644 index 0000000..6ebcdae --- /dev/null +++ b/vendor/memchr/src/tests/memchr/naive.rs @@ -0,0 +1,33 @@ +pub(crate) fn memchr(n1: u8, haystack: &[u8]) -> Option { + haystack.iter().position(|&b| b == n1) +} + +pub(crate) fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { + haystack.iter().position(|&b| b == n1 || b == n2) +} + +pub(crate) fn memchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], +) -> Option { + haystack.iter().position(|&b| b == n1 || b == n2 || b == n3) +} + +pub(crate) fn memrchr(n1: u8, haystack: &[u8]) -> Option { + haystack.iter().rposition(|&b| b == n1) +} + +pub(crate) fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { + haystack.iter().rposition(|&b| b == n1 || b == n2) +} + +pub(crate) fn memrchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], +) -> Option { + haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3) +} diff --git a/vendor/memchr/src/tests/memchr/prop.rs b/vendor/memchr/src/tests/memchr/prop.rs new file mode 100644 index 0000000..b988260 --- /dev/null +++ b/vendor/memchr/src/tests/memchr/prop.rs @@ -0,0 +1,321 @@ +#[cfg(miri)] +#[macro_export] +macro_rules! define_memchr_quickcheck { + ($($tt:tt)*) => {}; +} + +#[cfg(not(miri))] +#[macro_export] +macro_rules! define_memchr_quickcheck { + ($mod:ident) => { + define_memchr_quickcheck!($mod, new); + }; + ($mod:ident, $cons:ident) => { + use alloc::vec::Vec; + + use quickcheck::TestResult; + + use crate::tests::memchr::{ + naive, + prop::{double_ended_take, naive1_iter, naive2_iter, naive3_iter}, + }; + + quickcheck::quickcheck! { + fn qc_memchr_matches_naive(n1: u8, corpus: Vec) -> TestResult { + let expected = naive::memchr(n1, &corpus); + let got = match $mod::One::$cons(n1) { + None => return TestResult::discard(), + Some(f) => f.find(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memrchr_matches_naive(n1: u8, corpus: Vec) -> TestResult { + let expected = naive::memrchr(n1, &corpus); + let got = match $mod::One::$cons(n1) { + None => return TestResult::discard(), + Some(f) => f.rfind(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memchr2_matches_naive(n1: u8, n2: u8, corpus: Vec) -> TestResult { + let expected = naive::memchr2(n1, n2, &corpus); + let got = match $mod::Two::$cons(n1, n2) { + None => return TestResult::discard(), + Some(f) => f.find(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memrchr2_matches_naive(n1: u8, n2: u8, corpus: Vec) -> TestResult { + let expected = naive::memrchr2(n1, n2, &corpus); + let got = match $mod::Two::$cons(n1, n2) { + None => return TestResult::discard(), + Some(f) => f.rfind(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memchr3_matches_naive( + n1: u8, n2: u8, n3: u8, + corpus: Vec + ) -> TestResult { + let expected = naive::memchr3(n1, n2, n3, &corpus); + let got = match $mod::Three::$cons(n1, n2, n3) { + None => return TestResult::discard(), + Some(f) => f.find(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memrchr3_matches_naive( + n1: u8, n2: u8, n3: u8, + corpus: Vec + ) -> TestResult { + let expected = naive::memrchr3(n1, n2, n3, &corpus); + let got = match $mod::Three::$cons(n1, n2, n3) { + None => return TestResult::discard(), + Some(f) => f.rfind(&corpus), + }; + TestResult::from_bool(expected == got) + } + + fn qc_memchr_double_ended_iter( + needle: u8, data: Vec, take_side: Vec + ) -> TestResult { + // make nonempty + let mut take_side = take_side; + if take_side.is_empty() { take_side.push(true) }; + + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let iter = finder.iter(&data); + let got = double_ended_take( + iter, + take_side.iter().cycle().cloned(), + ); + let expected = naive1_iter(needle, &data); + + TestResult::from_bool(got.iter().cloned().eq(expected)) + } + + fn qc_memchr2_double_ended_iter( + needle1: u8, needle2: u8, data: Vec, take_side: Vec + ) -> TestResult { + // make nonempty + let mut take_side = take_side; + if take_side.is_empty() { take_side.push(true) }; + + let finder = match $mod::Two::$cons(needle1, needle2) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let iter = finder.iter(&data); + let got = double_ended_take( + iter, + take_side.iter().cycle().cloned(), + ); + let expected = naive2_iter(needle1, needle2, &data); + + TestResult::from_bool(got.iter().cloned().eq(expected)) + } + + fn qc_memchr3_double_ended_iter( + needle1: u8, needle2: u8, needle3: u8, + data: Vec, take_side: Vec + ) -> TestResult { + // make nonempty + let mut take_side = take_side; + if take_side.is_empty() { take_side.push(true) }; + + let finder = match $mod::Three::$cons(needle1, needle2, needle3) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let iter = finder.iter(&data); + let got = double_ended_take( + iter, + take_side.iter().cycle().cloned(), + ); + let expected = naive3_iter(needle1, needle2, needle3, &data); + + TestResult::from_bool(got.iter().cloned().eq(expected)) + } + + fn qc_memchr1_iter(data: Vec) -> TestResult { + let needle = 0; + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data); + let expected = naive1_iter(needle, &data); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr1_rev_iter(data: Vec) -> TestResult { + let needle = 0; + + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data).rev(); + let expected = naive1_iter(needle, &data).rev(); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr2_iter(data: Vec) -> TestResult { + let needle1 = 0; + let needle2 = 1; + + let finder = match $mod::Two::$cons(needle1, needle2) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data); + let expected = naive2_iter(needle1, needle2, &data); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr2_rev_iter(data: Vec) -> TestResult { + let needle1 = 0; + let needle2 = 1; + + let finder = match $mod::Two::$cons(needle1, needle2) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data).rev(); + let expected = naive2_iter(needle1, needle2, &data).rev(); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr3_iter(data: Vec) -> TestResult { + let needle1 = 0; + let needle2 = 1; + let needle3 = 2; + + let finder = match $mod::Three::$cons(needle1, needle2, needle3) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data); + let expected = naive3_iter(needle1, needle2, needle3, &data); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr3_rev_iter(data: Vec) -> TestResult { + let needle1 = 0; + let needle2 = 1; + let needle3 = 2; + + let finder = match $mod::Three::$cons(needle1, needle2, needle3) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let got = finder.iter(&data).rev(); + let expected = naive3_iter(needle1, needle2, needle3, &data).rev(); + TestResult::from_bool(got.eq(expected)) + } + + fn qc_memchr1_iter_size_hint(data: Vec) -> TestResult { + // test that the size hint is within reasonable bounds + let needle = 0; + let finder = match $mod::One::$cons(needle) { + None => return TestResult::discard(), + Some(finder) => finder, + }; + let mut iter = finder.iter(&data); + let mut real_count = data + .iter() + .filter(|&&elt| elt == needle) + .count(); + + while let Some(index) = iter.next() { + real_count -= 1; + let (lower, upper) = iter.size_hint(); + assert!(lower <= real_count); + assert!(upper.unwrap() >= real_count); + assert!(upper.unwrap() <= data.len() - index); + } + TestResult::passed() + } + } + }; +} + +// take items from a DEI, taking front for each true and back for each false. +// Return a vector with the concatenation of the fronts and the reverse of the +// backs. +#[cfg(not(miri))] +pub(crate) fn double_ended_take( + mut iter: I, + take_side: J, +) -> alloc::vec::Vec +where + I: DoubleEndedIterator, + J: Iterator, +{ + let mut found_front = alloc::vec![]; + let mut found_back = alloc::vec![]; + + for take_front in take_side { + if take_front { + if let Some(pos) = iter.next() { + found_front.push(pos); + } else { + break; + } + } else { + if let Some(pos) = iter.next_back() { + found_back.push(pos); + } else { + break; + } + }; + } + + let mut all_found = found_front; + all_found.extend(found_back.into_iter().rev()); + all_found +} + +// return an iterator of the 0-based indices of haystack that match the needle +#[cfg(not(miri))] +pub(crate) fn naive1_iter<'a>( + n1: u8, + haystack: &'a [u8], +) -> impl DoubleEndedIterator + 'a { + haystack.iter().enumerate().filter(move |&(_, &b)| b == n1).map(|t| t.0) +} + +#[cfg(not(miri))] +pub(crate) fn naive2_iter<'a>( + n1: u8, + n2: u8, + haystack: &'a [u8], +) -> impl DoubleEndedIterator + 'a { + haystack + .iter() + .enumerate() + .filter(move |&(_, &b)| b == n1 || b == n2) + .map(|t| t.0) +} + +#[cfg(not(miri))] +pub(crate) fn naive3_iter<'a>( + n1: u8, + n2: u8, + n3: u8, + haystack: &'a [u8], +) -> impl DoubleEndedIterator + 'a { + haystack + .iter() + .enumerate() + .filter(move |&(_, &b)| b == n1 || b == n2 || b == n3) + .map(|t| t.0) +} diff --git a/vendor/memchr/src/tests/memchr/simple.rs b/vendor/memchr/src/tests/memchr/simple.rs deleted file mode 100644 index bed5b48..0000000 --- a/vendor/memchr/src/tests/memchr/simple.rs +++ /dev/null @@ -1,23 +0,0 @@ -// Simple tests using MIRI. These are intended only to be a simple exercise of -// memchr when tests are run under miri. These are mostly necessary because the -// other tests are far more extensive and take too long to run under miri. -// -// These tests are also run when the 'std' feature is not enabled. - -use crate::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3}; - -#[test] -fn simple() { - assert_eq!(memchr(b'a', b"abcda"), Some(0)); - assert_eq!(memchr(b'z', b"abcda"), None); - assert_eq!(memchr2(b'a', b'z', b"abcda"), Some(0)); - assert_eq!(memchr2(b'z', b'y', b"abcda"), None); - assert_eq!(memchr3(b'a', b'z', b'b', b"abcda"), Some(0)); - assert_eq!(memchr3(b'z', b'y', b'x', b"abcda"), None); - assert_eq!(memrchr(b'a', b"abcda"), Some(4)); - assert_eq!(memrchr(b'z', b"abcda"), None); - assert_eq!(memrchr2(b'a', b'z', b"abcda"), Some(4)); - assert_eq!(memrchr2(b'z', b'y', b"abcda"), None); - assert_eq!(memrchr3(b'a', b'z', b'b', b"abcda"), Some(4)); - assert_eq!(memrchr3(b'z', b'y', b'x', b"abcda"), None); -} diff --git a/vendor/memchr/src/tests/memchr/testdata.rs b/vendor/memchr/src/tests/memchr/testdata.rs deleted file mode 100644 index 6dda524..0000000 --- a/vendor/memchr/src/tests/memchr/testdata.rs +++ /dev/null @@ -1,351 +0,0 @@ -use std::iter::repeat; - -/// Create a sequence of tests that should be run by memchr implementations. -pub fn memchr_tests() -> Vec { - let mut tests = Vec::new(); - for statict in MEMCHR_TESTS { - assert!(!statict.corpus.contains("%"), "% is not allowed in corpora"); - assert!(!statict.corpus.contains("#"), "# is not allowed in corpora"); - assert!(!statict.needles.contains(&b'%'), "% is an invalid needle"); - assert!(!statict.needles.contains(&b'#'), "# is an invalid needle"); - - let t = MemchrTest { - corpus: statict.corpus.to_string(), - needles: statict.needles.to_vec(), - positions: statict.positions.to_vec(), - }; - tests.push(t.clone()); - tests.extend(t.expand()); - } - tests -} - -/// A set of tests for memchr-like functions. -/// -/// These tests mostly try to cover the short string cases. We cover the longer -/// string cases via the benchmarks (which are tests themselves), via -/// quickcheck tests and via automatic expansion of each test case (by -/// increasing the corpus size). Finally, we cover different alignment cases -/// in the tests by varying the starting point of the slice. -const MEMCHR_TESTS: &[MemchrTestStatic] = &[ - // one needle (applied to memchr + memchr2 + memchr3) - MemchrTestStatic { corpus: "a", needles: &[b'a'], positions: &[0] }, - MemchrTestStatic { corpus: "aa", needles: &[b'a'], positions: &[0, 1] }, - MemchrTestStatic { - corpus: "aaa", - needles: &[b'a'], - positions: &[0, 1, 2], - }, - MemchrTestStatic { corpus: "", needles: &[b'a'], positions: &[] }, - MemchrTestStatic { corpus: "z", needles: &[b'a'], positions: &[] }, - MemchrTestStatic { corpus: "zz", needles: &[b'a'], positions: &[] }, - MemchrTestStatic { corpus: "zza", needles: &[b'a'], positions: &[2] }, - MemchrTestStatic { corpus: "zaza", needles: &[b'a'], positions: &[1, 3] }, - MemchrTestStatic { corpus: "zzza", needles: &[b'a'], positions: &[3] }, - MemchrTestStatic { corpus: "\x00a", needles: &[b'a'], positions: &[1] }, - MemchrTestStatic { corpus: "\x00", needles: &[b'\x00'], positions: &[0] }, - MemchrTestStatic { - corpus: "\x00\x00", - needles: &[b'\x00'], - positions: &[0, 1], - }, - MemchrTestStatic { - corpus: "\x00a\x00", - needles: &[b'\x00'], - positions: &[0, 2], - }, - MemchrTestStatic { - corpus: "zzzzzzzzzzzzzzzza", - needles: &[b'a'], - positions: &[16], - }, - MemchrTestStatic { - corpus: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza", - needles: &[b'a'], - positions: &[32], - }, - // two needles (applied to memchr2 + memchr3) - MemchrTestStatic { - corpus: "az", - needles: &[b'a', b'z'], - positions: &[0, 1], - }, - MemchrTestStatic { - corpus: "az", - needles: &[b'a', b'z'], - positions: &[0, 1], - }, - MemchrTestStatic { corpus: "az", needles: &[b'x', b'y'], positions: &[] }, - MemchrTestStatic { corpus: "az", needles: &[b'a', b'y'], positions: &[0] }, - MemchrTestStatic { corpus: "az", needles: &[b'x', b'z'], positions: &[1] }, - MemchrTestStatic { - corpus: "yyyyaz", - needles: &[b'a', b'z'], - positions: &[4, 5], - }, - MemchrTestStatic { - corpus: "yyyyaz", - needles: &[b'z', b'a'], - positions: &[4, 5], - }, - // three needles (applied to memchr3) - MemchrTestStatic { - corpus: "xyz", - needles: &[b'x', b'y', b'z'], - positions: &[0, 1, 2], - }, - MemchrTestStatic { - corpus: "zxy", - needles: &[b'x', b'y', b'z'], - positions: &[0, 1, 2], - }, - MemchrTestStatic { - corpus: "zxy", - needles: &[b'x', b'a', b'z'], - positions: &[0, 1], - }, - MemchrTestStatic { - corpus: "zxy", - needles: &[b't', b'a', b'z'], - positions: &[0], - }, - MemchrTestStatic { - corpus: "yxz", - needles: &[b't', b'a', b'z'], - positions: &[2], - }, -]; - -/// A description of a test on a memchr like function. -#[derive(Clone, Debug)] -pub struct MemchrTest { - /// The thing to search. We use `&str` instead of `&[u8]` because they - /// are nicer to write in tests, and we don't miss much since memchr - /// doesn't care about UTF-8. - /// - /// Corpora cannot contain either '%' or '#'. We use these bytes when - /// expanding test cases into many test cases, and we assume they are not - /// used. If they are used, `memchr_tests` will panic. - corpus: String, - /// The needles to search for. This is intended to be an "alternation" of - /// needles. The number of needles may cause this test to be skipped for - /// some memchr variants. For example, a test with 2 needles cannot be used - /// to test `memchr`, but can be used to test `memchr2` and `memchr3`. - /// However, a test with only 1 needle can be used to test all of `memchr`, - /// `memchr2` and `memchr3`. We achieve this by filling in the needles with - /// bytes that we never used in the corpus (such as '#'). - needles: Vec, - /// The positions expected to match for all of the needles. - positions: Vec, -} - -/// Like MemchrTest, but easier to define as a constant. -#[derive(Clone, Debug)] -pub struct MemchrTestStatic { - corpus: &'static str, - needles: &'static [u8], - positions: &'static [usize], -} - -impl MemchrTest { - pub fn one Option>(&self, reverse: bool, f: F) { - let needles = match self.needles(1) { - None => return, - Some(needles) => needles, - }; - // We test different alignments here. Since some implementations use - // AVX2, which can read 32 bytes at a time, we test at least that. - // Moreover, with loop unrolling, we sometimes process 64 (sse2) or 128 - // (avx) bytes at a time, so we include that in our offsets as well. - // - // You might think this would cause most needles to not be found, but - // we actually expand our tests to include corpus sizes all the way up - // to >500 bytes, so we should exercise most branches. - for align in 0..130 { - let corpus = self.corpus(align); - assert_eq!( - self.positions(align, reverse).get(0).cloned(), - f(needles[0], corpus.as_bytes()), - "search for {:?} failed in: {:?} (len: {}, alignment: {})", - needles[0] as char, - corpus, - corpus.len(), - align - ); - } - } - - pub fn two Option>( - &self, - reverse: bool, - f: F, - ) { - let needles = match self.needles(2) { - None => return, - Some(needles) => needles, - }; - for align in 0..130 { - let corpus = self.corpus(align); - assert_eq!( - self.positions(align, reverse).get(0).cloned(), - f(needles[0], needles[1], corpus.as_bytes()), - "search for {:?}|{:?} failed in: {:?} \ - (len: {}, alignment: {})", - needles[0] as char, - needles[1] as char, - corpus, - corpus.len(), - align - ); - } - } - - pub fn three Option>( - &self, - reverse: bool, - f: F, - ) { - let needles = match self.needles(3) { - None => return, - Some(needles) => needles, - }; - for align in 0..130 { - let corpus = self.corpus(align); - assert_eq!( - self.positions(align, reverse).get(0).cloned(), - f(needles[0], needles[1], needles[2], corpus.as_bytes()), - "search for {:?}|{:?}|{:?} failed in: {:?} \ - (len: {}, alignment: {})", - needles[0] as char, - needles[1] as char, - needles[2] as char, - corpus, - corpus.len(), - align - ); - } - } - - pub fn iter_one<'a, I, F>(&'a self, reverse: bool, f: F) - where - F: FnOnce(u8, &'a [u8]) -> I, - I: Iterator, - { - if let Some(ns) = self.needles(1) { - self.iter(reverse, f(ns[0], self.corpus.as_bytes())); - } - } - - pub fn iter_two<'a, I, F>(&'a self, reverse: bool, f: F) - where - F: FnOnce(u8, u8, &'a [u8]) -> I, - I: Iterator, - { - if let Some(ns) = self.needles(2) { - self.iter(reverse, f(ns[0], ns[1], self.corpus.as_bytes())); - } - } - - pub fn iter_three<'a, I, F>(&'a self, reverse: bool, f: F) - where - F: FnOnce(u8, u8, u8, &'a [u8]) -> I, - I: Iterator, - { - if let Some(ns) = self.needles(3) { - self.iter(reverse, f(ns[0], ns[1], ns[2], self.corpus.as_bytes())); - } - } - - /// Test that the positions yielded by the given iterator match the - /// positions in this test. If reverse is true, then reverse the positions - /// before comparing them. - fn iter>(&self, reverse: bool, it: I) { - assert_eq!( - self.positions(0, reverse), - it.collect::>(), - r"search for {:?} failed in: {:?}", - self.needles.iter().map(|&b| b as char).collect::>(), - self.corpus - ); - } - - /// Expand this test into many variations of the same test. - /// - /// In particular, this will generate more tests with larger corpus sizes. - /// The expected positions are updated to maintain the integrity of the - /// test. - /// - /// This is important in testing a memchr implementation, because there are - /// often different cases depending on the length of the corpus. - /// - /// Note that we extend the corpus by adding `%` bytes, which we - /// don't otherwise use as a needle. - fn expand(&self) -> Vec { - let mut more = Vec::new(); - - // Add bytes to the start of the corpus. - for i in 1..515 { - let mut t = self.clone(); - let mut new_corpus: String = repeat('%').take(i).collect(); - new_corpus.push_str(&t.corpus); - t.corpus = new_corpus; - t.positions = t.positions.into_iter().map(|p| p + i).collect(); - more.push(t); - } - // Add bytes to the end of the corpus. - for i in 1..515 { - let mut t = self.clone(); - let padding: String = repeat('%').take(i).collect(); - t.corpus.push_str(&padding); - more.push(t); - } - - more - } - - /// Return the corpus at the given alignment. - /// - /// If the alignment exceeds the length of the corpus, then this returns - /// an empty slice. - fn corpus(&self, align: usize) -> &str { - self.corpus.get(align..).unwrap_or("") - } - - /// Return exactly `count` needles from this test. If this test has less - /// than `count` needles, then add `#` until the number of needles - /// matches `count`. If this test has more than `count` needles, then - /// return `None` (because there is no way to use this test data for a - /// search using fewer needles). - fn needles(&self, count: usize) -> Option> { - if self.needles.len() > count { - return None; - } - - let mut needles = self.needles.to_vec(); - for _ in needles.len()..count { - // we assume # is never used in tests. - needles.push(b'#'); - } - Some(needles) - } - - /// Return the positions in this test, reversed if `reverse` is true. - /// - /// If alignment is given, then all positions greater than or equal to that - /// alignment are offset by the alignment. Positions less than the - /// alignment are dropped. - fn positions(&self, align: usize, reverse: bool) -> Vec { - let positions = if reverse { - let mut positions = self.positions.to_vec(); - positions.reverse(); - positions - } else { - self.positions.to_vec() - }; - positions - .into_iter() - .filter(|&p| p >= align) - .map(|p| p - align) - .collect() - } -} diff --git a/vendor/memchr/src/tests/mod.rs b/vendor/memchr/src/tests/mod.rs index f4d406c..259b678 100644 --- a/vendor/memchr/src/tests/mod.rs +++ b/vendor/memchr/src/tests/mod.rs @@ -1,15 +1,15 @@ -mod memchr; +#[macro_use] +pub(crate) mod memchr; +pub(crate) mod packedpair; +#[macro_use] +pub(crate) mod substring; // For debugging, particularly in CI, print out the byte order of the current // target. -#[cfg(all(feature = "std", target_endian = "little"))] #[test] fn byte_order() { - eprintln!("LITTLE ENDIAN"); -} - -#[cfg(all(feature = "std", target_endian = "big"))] -#[test] -fn byte_order() { - eprintln!("BIG ENDIAN"); + #[cfg(target_endian = "little")] + std::eprintln!("LITTLE ENDIAN"); + #[cfg(target_endian = "big")] + std::eprintln!("BIG ENDIAN"); } diff --git a/vendor/memchr/src/tests/packedpair.rs b/vendor/memchr/src/tests/packedpair.rs new file mode 100644 index 0000000..204635b --- /dev/null +++ b/vendor/memchr/src/tests/packedpair.rs @@ -0,0 +1,216 @@ +use alloc::{boxed::Box, vec, vec::Vec}; + +/// A set of "packed pair" test seeds. Each seed serves as the base for the +/// generation of many other tests. In essence, the seed captures the pair of +/// bytes we used for a predicate and first byte among our needle. The tests +/// generated from each seed essentially vary the length of the needle and +/// haystack, while using the rare/first byte configuration from the seed. +/// +/// The purpose of this is to test many different needle/haystack lengths. +/// In particular, some of the vector optimizations might only have bugs +/// in haystacks of a certain size. +const SEEDS: &[Seed] = &[ + // Why not use different 'first' bytes? It seemed like a good idea to be + // able to configure it, but when I wrote the test generator below, it + // didn't seem necessary to use for reasons that I forget. + Seed { first: b'x', index1: b'y', index2: b'z' }, + Seed { first: b'x', index1: b'x', index2: b'z' }, + Seed { first: b'x', index1: b'y', index2: b'x' }, + Seed { first: b'x', index1: b'x', index2: b'x' }, + Seed { first: b'x', index1: b'y', index2: b'y' }, +]; + +/// Runs a host of "packed pair" search tests. +/// +/// These tests specifically look for the occurrence of a possible substring +/// match based on a pair of bytes matching at the right offsets. +pub(crate) struct Runner { + fwd: Option< + Box< + dyn FnMut(&[u8], &[u8], u8, u8) -> Option> + 'static, + >, + >, +} + +impl Runner { + /// Create a new test runner for "packed pair" substring search. + pub(crate) fn new() -> Runner { + Runner { fwd: None } + } + + /// Run all tests. This panics on the first failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + /// + /// This runs tests on both the forward and reverse implementations given. + /// If either (or both) are missing, then tests for that implementation are + /// skipped. + pub(crate) fn run(self) { + if let Some(mut fwd) = self.fwd { + for seed in SEEDS.iter() { + for t in seed.generate() { + match fwd(&t.haystack, &t.needle, t.index1, t.index2) { + None => continue, + Some(result) => { + assert_eq!( + t.fwd, result, + "FORWARD, needle: {:?}, haystack: {:?}, \ + index1: {:?}, index2: {:?}", + t.needle, t.haystack, t.index1, t.index2, + ) + } + } + } + } + } + } + + /// Set the implementation for forward "packed pair" substring search. + /// + /// If the closure returns `None`, then it is assumed that the given + /// test cannot be applied to the particular implementation and it is + /// skipped. For example, if a particular implementation only supports + /// needles or haystacks for some minimum length. + /// + /// If this is not set, then forward "packed pair" search is not tested. + pub(crate) fn fwd( + mut self, + search: impl FnMut(&[u8], &[u8], u8, u8) -> Option> + 'static, + ) -> Runner { + self.fwd = Some(Box::new(search)); + self + } +} + +/// A test that represents the input and expected output to a "packed pair" +/// search function. The test should be able to run with any "packed pair" +/// implementation and get the expected output. +struct Test { + haystack: Vec, + needle: Vec, + index1: u8, + index2: u8, + fwd: Option, +} + +impl Test { + /// Create a new "packed pair" test from a seed and some given offsets to + /// the pair of bytes to use as a predicate in the seed's needle. + /// + /// If a valid test could not be constructed, then None is returned. + /// (Currently, we take the approach of massaging tests to be valid + /// instead of rejecting them outright.) + fn new( + seed: Seed, + index1: usize, + index2: usize, + haystack_len: usize, + needle_len: usize, + fwd: Option, + ) -> Option { + let mut index1: u8 = index1.try_into().unwrap(); + let mut index2: u8 = index2.try_into().unwrap(); + // The '#' byte is never used in a haystack (unless we're expecting + // a match), while the '@' byte is never used in a needle. + let mut haystack = vec![b'@'; haystack_len]; + let mut needle = vec![b'#'; needle_len]; + needle[0] = seed.first; + needle[index1 as usize] = seed.index1; + needle[index2 as usize] = seed.index2; + // If we're expecting a match, then make sure the needle occurs + // in the haystack at the expected position. + if let Some(i) = fwd { + haystack[i..i + needle.len()].copy_from_slice(&needle); + } + // If the operations above lead to rare offsets pointing to the + // non-first occurrence of a byte, then adjust it. This might lead + // to redundant tests, but it's simpler than trying to change the + // generation process I think. + if let Some(i) = crate::memchr(seed.index1, &needle) { + index1 = u8::try_from(i).unwrap(); + } + if let Some(i) = crate::memchr(seed.index2, &needle) { + index2 = u8::try_from(i).unwrap(); + } + Some(Test { haystack, needle, index1, index2, fwd }) + } +} + +/// Data that describes a single prefilter test seed. +#[derive(Clone, Copy)] +struct Seed { + first: u8, + index1: u8, + index2: u8, +} + +impl Seed { + const NEEDLE_LENGTH_LIMIT: usize = { + #[cfg(not(miri))] + { + 33 + } + #[cfg(miri)] + { + 5 + } + }; + + const HAYSTACK_LENGTH_LIMIT: usize = { + #[cfg(not(miri))] + { + 65 + } + #[cfg(miri)] + { + 8 + } + }; + + /// Generate a series of prefilter tests from this seed. + fn generate(self) -> impl Iterator { + let len_start = 2; + // The iterator below generates *a lot* of tests. The number of + // tests was chosen somewhat empirically to be "bearable" when + // running the test suite. + // + // We use an iterator here because the collective haystacks of all + // these test cases add up to enough memory to OOM a conservative + // sandbox or a small laptop. + (len_start..=Seed::NEEDLE_LENGTH_LIMIT).flat_map(move |needle_len| { + let index_start = len_start - 1; + (index_start..needle_len).flat_map(move |index1| { + (index1..needle_len).flat_map(move |index2| { + (needle_len..=Seed::HAYSTACK_LENGTH_LIMIT).flat_map( + move |haystack_len| { + Test::new( + self, + index1, + index2, + haystack_len, + needle_len, + None, + ) + .into_iter() + .chain( + (0..=(haystack_len - needle_len)).flat_map( + move |output| { + Test::new( + self, + index1, + index2, + haystack_len, + needle_len, + Some(output), + ) + }, + ), + ) + }, + ) + }) + }) + }) + } +} diff --git a/vendor/memchr/src/tests/substring/mod.rs b/vendor/memchr/src/tests/substring/mod.rs new file mode 100644 index 0000000..dd10cbd --- /dev/null +++ b/vendor/memchr/src/tests/substring/mod.rs @@ -0,0 +1,232 @@ +/*! +This module defines tests and test helpers for substring implementations. +*/ + +use alloc::{ + boxed::Box, + format, + string::{String, ToString}, +}; + +pub(crate) mod naive; +#[macro_use] +pub(crate) mod prop; + +const SEEDS: &'static [Seed] = &[ + Seed::new("", "", Some(0), Some(0)), + Seed::new("", "a", Some(0), Some(1)), + Seed::new("", "ab", Some(0), Some(2)), + Seed::new("", "abc", Some(0), Some(3)), + Seed::new("a", "", None, None), + Seed::new("a", "a", Some(0), Some(0)), + Seed::new("a", "aa", Some(0), Some(1)), + Seed::new("a", "ba", Some(1), Some(1)), + Seed::new("a", "bba", Some(2), Some(2)), + Seed::new("a", "bbba", Some(3), Some(3)), + Seed::new("a", "bbbab", Some(3), Some(3)), + Seed::new("a", "bbbabb", Some(3), Some(3)), + Seed::new("a", "bbbabbb", Some(3), Some(3)), + Seed::new("a", "bbbbbb", None, None), + Seed::new("ab", "", None, None), + Seed::new("ab", "a", None, None), + Seed::new("ab", "b", None, None), + Seed::new("ab", "ab", Some(0), Some(0)), + Seed::new("ab", "aab", Some(1), Some(1)), + Seed::new("ab", "aaab", Some(2), Some(2)), + Seed::new("ab", "abaab", Some(0), Some(3)), + Seed::new("ab", "baaab", Some(3), Some(3)), + Seed::new("ab", "acb", None, None), + Seed::new("ab", "abba", Some(0), Some(0)), + Seed::new("abc", "ab", None, None), + Seed::new("abc", "abc", Some(0), Some(0)), + Seed::new("abc", "abcz", Some(0), Some(0)), + Seed::new("abc", "abczz", Some(0), Some(0)), + Seed::new("abc", "zabc", Some(1), Some(1)), + Seed::new("abc", "zzabc", Some(2), Some(2)), + Seed::new("abc", "azbc", None, None), + Seed::new("abc", "abzc", None, None), + Seed::new("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)), + Seed::new("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)), + Seed::new( + "xyz", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxyz", + Some(32), + Some(32), + ), + Seed::new("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)), + Seed::new("\u{0}\u{1e}", "\u{1e}\u{0}", None, None), +]; + +/// Runs a host of substring search tests. +/// +/// This has support for "partial" substring search implementations only work +/// for a subset of needles/haystacks. For example, the "packed pair" substring +/// search implementation only works for haystacks of some minimum length based +/// of the pair of bytes selected and the size of the vector used. +pub(crate) struct Runner { + fwd: Option< + Box Option> + 'static>, + >, + rev: Option< + Box Option> + 'static>, + >, +} + +impl Runner { + /// Create a new test runner for forward and reverse substring search + /// implementations. + pub(crate) fn new() -> Runner { + Runner { fwd: None, rev: None } + } + + /// Run all tests. This panics on the first failure. + /// + /// If the implementation being tested returns `None` for a particular + /// haystack/needle combination, then that test is skipped. + /// + /// This runs tests on both the forward and reverse implementations given. + /// If either (or both) are missing, then tests for that implementation are + /// skipped. + pub(crate) fn run(self) { + if let Some(mut fwd) = self.fwd { + for seed in SEEDS.iter() { + for t in seed.generate() { + match fwd(t.haystack.as_bytes(), t.needle.as_bytes()) { + None => continue, + Some(result) => { + assert_eq!( + t.fwd, result, + "FORWARD, needle: {:?}, haystack: {:?}", + t.needle, t.haystack, + ); + } + } + } + } + } + if let Some(mut rev) = self.rev { + for seed in SEEDS.iter() { + for t in seed.generate() { + match rev(t.haystack.as_bytes(), t.needle.as_bytes()) { + None => continue, + Some(result) => { + assert_eq!( + t.rev, result, + "REVERSE, needle: {:?}, haystack: {:?}", + t.needle, t.haystack, + ); + } + } + } + } + } + } + + /// Set the implementation for forward substring search. + /// + /// If the closure returns `None`, then it is assumed that the given + /// test cannot be applied to the particular implementation and it is + /// skipped. For example, if a particular implementation only supports + /// needles or haystacks for some minimum length. + /// + /// If this is not set, then forward substring search is not tested. + pub(crate) fn fwd( + mut self, + search: impl FnMut(&[u8], &[u8]) -> Option> + 'static, + ) -> Runner { + self.fwd = Some(Box::new(search)); + self + } + + /// Set the implementation for reverse substring search. + /// + /// If the closure returns `None`, then it is assumed that the given + /// test cannot be applied to the particular implementation and it is + /// skipped. For example, if a particular implementation only supports + /// needles or haystacks for some minimum length. + /// + /// If this is not set, then reverse substring search is not tested. + pub(crate) fn rev( + mut self, + search: impl FnMut(&[u8], &[u8]) -> Option> + 'static, + ) -> Runner { + self.rev = Some(Box::new(search)); + self + } +} + +/// A single substring test for forward and reverse searches. +#[derive(Clone, Debug)] +struct Test { + needle: String, + haystack: String, + fwd: Option, + rev: Option, +} + +/// A single substring test for forward and reverse searches. +/// +/// Each seed is valid on its own, but it also serves as a starting point +/// to generate more tests. Namely, we pad out the haystacks with other +/// characters so that we get more complete coverage. This is especially useful +/// for testing vector algorithms that tend to have weird special cases for +/// alignment and loop unrolling. +/// +/// Padding works by assuming certain characters never otherwise appear in a +/// needle or a haystack. Neither should contain a `#` character. +#[derive(Clone, Copy, Debug)] +struct Seed { + needle: &'static str, + haystack: &'static str, + fwd: Option, + rev: Option, +} + +impl Seed { + const MAX_PAD: usize = 34; + + const fn new( + needle: &'static str, + haystack: &'static str, + fwd: Option, + rev: Option, + ) -> Seed { + Seed { needle, haystack, fwd, rev } + } + + fn generate(self) -> impl Iterator { + assert!(!self.needle.contains('#'), "needle must not contain '#'"); + assert!(!self.haystack.contains('#'), "haystack must not contain '#'"); + (0..=Seed::MAX_PAD) + // Generate tests for padding at the beginning of haystack. + .map(move |pad| { + let needle = self.needle.to_string(); + let prefix = "#".repeat(pad); + let haystack = format!("{}{}", prefix, self.haystack); + let fwd = if needle.is_empty() { + Some(0) + } else { + self.fwd.map(|i| pad + i) + }; + let rev = if needle.is_empty() { + Some(haystack.len()) + } else { + self.rev.map(|i| pad + i) + }; + Test { needle, haystack, fwd, rev } + }) + // Generate tests for padding at the end of haystack. + .chain((1..=Seed::MAX_PAD).map(move |pad| { + let needle = self.needle.to_string(); + let suffix = "#".repeat(pad); + let haystack = format!("{}{}", self.haystack, suffix); + let fwd = if needle.is_empty() { Some(0) } else { self.fwd }; + let rev = if needle.is_empty() { + Some(haystack.len()) + } else { + self.rev + }; + Test { needle, haystack, fwd, rev } + })) + } +} diff --git a/vendor/memchr/src/tests/substring/naive.rs b/vendor/memchr/src/tests/substring/naive.rs new file mode 100644 index 0000000..1bc6009 --- /dev/null +++ b/vendor/memchr/src/tests/substring/naive.rs @@ -0,0 +1,45 @@ +/*! +This module defines "naive" implementations of substring search. + +These are sometimes useful to compare with "real" substring implementations. +The idea is that they are so simple that they are unlikely to be incorrect. +*/ + +/// Naively search forwards for the given needle in the given haystack. +pub(crate) fn find(haystack: &[u8], needle: &[u8]) -> Option { + let end = haystack.len().checked_sub(needle.len()).map_or(0, |i| i + 1); + for i in 0..end { + if needle == &haystack[i..i + needle.len()] { + return Some(i); + } + } + None +} + +/// Naively search in reverse for the given needle in the given haystack. +pub(crate) fn rfind(haystack: &[u8], needle: &[u8]) -> Option { + let end = haystack.len().checked_sub(needle.len()).map_or(0, |i| i + 1); + for i in (0..end).rev() { + if needle == &haystack[i..i + needle.len()] { + return Some(i); + } + } + None +} + +#[cfg(test)] +mod tests { + use crate::tests::substring; + + use super::*; + + #[test] + fn forward() { + substring::Runner::new().fwd(|h, n| Some(find(h, n))).run() + } + + #[test] + fn reverse() { + substring::Runner::new().rev(|h, n| Some(rfind(h, n))).run() + } +} diff --git a/vendor/memchr/src/tests/substring/prop.rs b/vendor/memchr/src/tests/substring/prop.rs new file mode 100644 index 0000000..a8352ec --- /dev/null +++ b/vendor/memchr/src/tests/substring/prop.rs @@ -0,0 +1,126 @@ +/*! +This module defines a few quickcheck properties for substring search. + +It also provides a forward and reverse macro for conveniently defining +quickcheck tests that run these properties over any substring search +implementation. +*/ + +use crate::tests::substring::naive; + +/// $fwd is a `impl FnMut(haystack, needle) -> Option>`. When the +/// routine returns `None`, then it's skipped, which is useful for substring +/// implementations that don't work for all inputs. +#[macro_export] +macro_rules! define_substring_forward_quickcheck { + ($fwd:expr) => { + #[cfg(not(miri))] + quickcheck::quickcheck! { + fn qc_fwd_prefix_is_substring(bs: alloc::vec::Vec) -> bool { + crate::tests::substring::prop::prefix_is_substring(&bs, $fwd) + } + + fn qc_fwd_suffix_is_substring(bs: alloc::vec::Vec) -> bool { + crate::tests::substring::prop::suffix_is_substring(&bs, $fwd) + } + + fn qc_fwd_matches_naive( + haystack: alloc::vec::Vec, + needle: alloc::vec::Vec + ) -> bool { + crate::tests::substring::prop::same_as_naive( + false, + &haystack, + &needle, + $fwd, + ) + } + } + }; +} + +/// $rev is a `impl FnMut(haystack, needle) -> Option>`. When the +/// routine returns `None`, then it's skipped, which is useful for substring +/// implementations that don't work for all inputs. +#[macro_export] +macro_rules! define_substring_reverse_quickcheck { + ($rev:expr) => { + #[cfg(not(miri))] + quickcheck::quickcheck! { + fn qc_rev_prefix_is_substring(bs: alloc::vec::Vec) -> bool { + crate::tests::substring::prop::prefix_is_substring(&bs, $rev) + } + + fn qc_rev_suffix_is_substring(bs: alloc::vec::Vec) -> bool { + crate::tests::substring::prop::suffix_is_substring(&bs, $rev) + } + + fn qc_rev_matches_naive( + haystack: alloc::vec::Vec, + needle: alloc::vec::Vec + ) -> bool { + crate::tests::substring::prop::same_as_naive( + true, + &haystack, + &needle, + $rev, + ) + } + } + }; +} + +/// Check that every prefix of the given byte string is a substring. +pub(crate) fn prefix_is_substring( + bs: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option>, +) -> bool { + for i in 0..bs.len().saturating_sub(1) { + let prefix = &bs[..i]; + let result = match search(bs, prefix) { + None => continue, + Some(result) => result, + }; + if !result.is_some() { + return false; + } + } + true +} + +/// Check that every suffix of the given byte string is a substring. +pub(crate) fn suffix_is_substring( + bs: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option>, +) -> bool { + for i in 0..bs.len().saturating_sub(1) { + let suffix = &bs[i..]; + let result = match search(bs, suffix) { + None => continue, + Some(result) => result, + }; + if !result.is_some() { + return false; + } + } + true +} + +/// Check that naive substring search matches the result of the given search +/// algorithm. +pub(crate) fn same_as_naive( + reverse: bool, + haystack: &[u8], + needle: &[u8], + mut search: impl FnMut(&[u8], &[u8]) -> Option>, +) -> bool { + let result = match search(haystack, needle) { + None => return true, + Some(result) => result, + }; + if reverse { + result == naive::rfind(haystack, needle) + } else { + result == naive::find(haystack, needle) + } +} diff --git a/vendor/memchr/src/vector.rs b/vendor/memchr/src/vector.rs new file mode 100644 index 0000000..f360176 --- /dev/null +++ b/vendor/memchr/src/vector.rs @@ -0,0 +1,515 @@ +/// A trait for describing vector operations used by vectorized searchers. +/// +/// The trait is highly constrained to low level vector operations needed. +/// In general, it was invented mostly to be generic over x86's __m128i and +/// __m256i types. At time of writing, it also supports wasm and aarch64 +/// 128-bit vector types as well. +/// +/// # Safety +/// +/// All methods are not safe since they are intended to be implemented using +/// vendor intrinsics, which are also not safe. Callers must ensure that the +/// appropriate target features are enabled in the calling function, and that +/// the current CPU supports them. All implementations should avoid marking the +/// routines with #[target_feature] and instead mark them as #[inline(always)] +/// to ensure they get appropriately inlined. (inline(always) cannot be used +/// with target_feature.) +pub(crate) trait Vector: Copy + core::fmt::Debug { + /// The number of bits in the vector. + const BITS: usize; + /// The number of bytes in the vector. That is, this is the size of the + /// vector in memory. + const BYTES: usize; + /// The bits that must be zero in order for a `*const u8` pointer to be + /// correctly aligned to read vector values. + const ALIGN: usize; + + /// The type of the value returned by `Vector::movemask`. + /// + /// This supports abstracting over the specific representation used in + /// order to accommodate different representations in different ISAs. + type Mask: MoveMask; + + /// Create a vector with 8-bit lanes with the given byte repeated into each + /// lane. + unsafe fn splat(byte: u8) -> Self; + + /// Read a vector-size number of bytes from the given pointer. The pointer + /// must be aligned to the size of the vector. + /// + /// # Safety + /// + /// Callers must guarantee that at least `BYTES` bytes are readable from + /// `data` and that `data` is aligned to a `BYTES` boundary. + unsafe fn load_aligned(data: *const u8) -> Self; + + /// Read a vector-size number of bytes from the given pointer. The pointer + /// does not need to be aligned. + /// + /// # Safety + /// + /// Callers must guarantee that at least `BYTES` bytes are readable from + /// `data`. + unsafe fn load_unaligned(data: *const u8) -> Self; + + /// _mm_movemask_epi8 or _mm256_movemask_epi8 + unsafe fn movemask(self) -> Self::Mask; + /// _mm_cmpeq_epi8 or _mm256_cmpeq_epi8 + unsafe fn cmpeq(self, vector2: Self) -> Self; + /// _mm_and_si128 or _mm256_and_si256 + unsafe fn and(self, vector2: Self) -> Self; + /// _mm_or or _mm256_or_si256 + unsafe fn or(self, vector2: Self) -> Self; + /// Returns true if and only if `Self::movemask` would return a mask that + /// contains at least one non-zero bit. + unsafe fn movemask_will_have_non_zero(self) -> bool { + self.movemask().has_non_zero() + } +} + +/// A trait that abstracts over a vector-to-scalar operation called +/// "move mask." +/// +/// On x86-64, this is `_mm_movemask_epi8` for SSE2 and `_mm256_movemask_epi8` +/// for AVX2. It takes a vector of `u8` lanes and returns a scalar where the +/// `i`th bit is set if and only if the most significant bit in the `i`th lane +/// of the vector is set. The simd128 ISA for wasm32 also supports this +/// exact same operation natively. +/// +/// ... But aarch64 doesn't. So we have to fake it with more instructions and +/// a slightly different representation. We could do extra work to unify the +/// representations, but then would require additional costs in the hot path +/// for `memchr` and `packedpair`. So instead, we abstraction over the specific +/// representation with this trait an ddefine the operations we actually need. +pub(crate) trait MoveMask: Copy + core::fmt::Debug { + /// Return a mask that is all zeros except for the least significant `n` + /// lanes in a corresponding vector. + fn all_zeros_except_least_significant(n: usize) -> Self; + + /// Returns true if and only if this mask has a a non-zero bit anywhere. + fn has_non_zero(self) -> bool; + + /// Returns the number of bits set to 1 in this mask. + fn count_ones(self) -> usize; + + /// Does a bitwise `and` operation between `self` and `other`. + fn and(self, other: Self) -> Self; + + /// Does a bitwise `or` operation between `self` and `other`. + fn or(self, other: Self) -> Self; + + /// Returns a mask that is equivalent to `self` but with the least + /// significant 1-bit set to 0. + fn clear_least_significant_bit(self) -> Self; + + /// Returns the offset of the first non-zero lane this mask represents. + fn first_offset(self) -> usize; + + /// Returns the offset of the last non-zero lane this mask represents. + fn last_offset(self) -> usize; +} + +/// This is a "sensible" movemask implementation where each bit represents +/// whether the most significant bit is set in each corresponding lane of a +/// vector. This is used on x86-64 and wasm, but such a mask is more expensive +/// to get on aarch64 so we use something a little different. +/// +/// We call this "sensible" because this is what we get using native sse/avx +/// movemask instructions. But neon has no such native equivalent. +#[derive(Clone, Copy, Debug)] +pub(crate) struct SensibleMoveMask(u32); + +impl SensibleMoveMask { + /// Get the mask in a form suitable for computing offsets. + /// + /// Basically, this normalizes to little endian. On big endian, this swaps + /// the bytes. + #[inline(always)] + fn get_for_offset(self) -> u32 { + #[cfg(target_endian = "big")] + { + self.0.swap_bytes() + } + #[cfg(target_endian = "little")] + { + self.0 + } + } +} + +impl MoveMask for SensibleMoveMask { + #[inline(always)] + fn all_zeros_except_least_significant(n: usize) -> SensibleMoveMask { + debug_assert!(n < 32); + SensibleMoveMask(!((1 << n) - 1)) + } + + #[inline(always)] + fn has_non_zero(self) -> bool { + self.0 != 0 + } + + #[inline(always)] + fn count_ones(self) -> usize { + self.0.count_ones() as usize + } + + #[inline(always)] + fn and(self, other: SensibleMoveMask) -> SensibleMoveMask { + SensibleMoveMask(self.0 & other.0) + } + + #[inline(always)] + fn or(self, other: SensibleMoveMask) -> SensibleMoveMask { + SensibleMoveMask(self.0 | other.0) + } + + #[inline(always)] + fn clear_least_significant_bit(self) -> SensibleMoveMask { + SensibleMoveMask(self.0 & (self.0 - 1)) + } + + #[inline(always)] + fn first_offset(self) -> usize { + // We are dealing with little endian here (and if we aren't, we swap + // the bytes so we are in practice), where the most significant byte + // is at a higher address. That means the least significant bit that + // is set corresponds to the position of our first matching byte. + // That position corresponds to the number of zeros after the least + // significant bit. + self.get_for_offset().trailing_zeros() as usize + } + + #[inline(always)] + fn last_offset(self) -> usize { + // We are dealing with little endian here (and if we aren't, we swap + // the bytes so we are in practice), where the most significant byte is + // at a higher address. That means the most significant bit that is set + // corresponds to the position of our last matching byte. The position + // from the end of the mask is therefore the number of leading zeros + // in a 32 bit integer, and the position from the start of the mask is + // therefore 32 - (leading zeros) - 1. + 32 - self.get_for_offset().leading_zeros() as usize - 1 + } +} + +#[cfg(target_arch = "x86_64")] +mod x86sse2 { + use core::arch::x86_64::*; + + use super::{SensibleMoveMask, Vector}; + + impl Vector for __m128i { + const BITS: usize = 128; + const BYTES: usize = 16; + const ALIGN: usize = Self::BYTES - 1; + + type Mask = SensibleMoveMask; + + #[inline(always)] + unsafe fn splat(byte: u8) -> __m128i { + _mm_set1_epi8(byte as i8) + } + + #[inline(always)] + unsafe fn load_aligned(data: *const u8) -> __m128i { + _mm_load_si128(data as *const __m128i) + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> __m128i { + _mm_loadu_si128(data as *const __m128i) + } + + #[inline(always)] + unsafe fn movemask(self) -> SensibleMoveMask { + SensibleMoveMask(_mm_movemask_epi8(self) as u32) + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> __m128i { + _mm_cmpeq_epi8(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> __m128i { + _mm_and_si128(self, vector2) + } + + #[inline(always)] + unsafe fn or(self, vector2: Self) -> __m128i { + _mm_or_si128(self, vector2) + } + } +} + +#[cfg(target_arch = "x86_64")] +mod x86avx2 { + use core::arch::x86_64::*; + + use super::{SensibleMoveMask, Vector}; + + impl Vector for __m256i { + const BITS: usize = 256; + const BYTES: usize = 32; + const ALIGN: usize = Self::BYTES - 1; + + type Mask = SensibleMoveMask; + + #[inline(always)] + unsafe fn splat(byte: u8) -> __m256i { + _mm256_set1_epi8(byte as i8) + } + + #[inline(always)] + unsafe fn load_aligned(data: *const u8) -> __m256i { + _mm256_load_si256(data as *const __m256i) + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> __m256i { + _mm256_loadu_si256(data as *const __m256i) + } + + #[inline(always)] + unsafe fn movemask(self) -> SensibleMoveMask { + SensibleMoveMask(_mm256_movemask_epi8(self) as u32) + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> __m256i { + _mm256_cmpeq_epi8(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> __m256i { + _mm256_and_si256(self, vector2) + } + + #[inline(always)] + unsafe fn or(self, vector2: Self) -> __m256i { + _mm256_or_si256(self, vector2) + } + } +} + +#[cfg(target_arch = "aarch64")] +mod aarch64neon { + use core::arch::aarch64::*; + + use super::{MoveMask, Vector}; + + impl Vector for uint8x16_t { + const BITS: usize = 128; + const BYTES: usize = 16; + const ALIGN: usize = Self::BYTES - 1; + + type Mask = NeonMoveMask; + + #[inline(always)] + unsafe fn splat(byte: u8) -> uint8x16_t { + vdupq_n_u8(byte) + } + + #[inline(always)] + unsafe fn load_aligned(data: *const u8) -> uint8x16_t { + // I've tried `data.cast::().read()` instead, but + // couldn't observe any benchmark differences. + Self::load_unaligned(data) + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> uint8x16_t { + vld1q_u8(data) + } + + #[inline(always)] + unsafe fn movemask(self) -> NeonMoveMask { + let asu16s = vreinterpretq_u16_u8(self); + let mask = vshrn_n_u16(asu16s, 4); + let asu64 = vreinterpret_u64_u8(mask); + let scalar64 = vget_lane_u64(asu64, 0); + NeonMoveMask(scalar64 & 0x8888888888888888) + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> uint8x16_t { + vceqq_u8(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> uint8x16_t { + vandq_u8(self, vector2) + } + + #[inline(always)] + unsafe fn or(self, vector2: Self) -> uint8x16_t { + vorrq_u8(self, vector2) + } + + /// This is the only interesting implementation of this routine. + /// Basically, instead of doing the "shift right narrow" dance, we use + /// adajacent folding max to determine whether there are any non-zero + /// bytes in our mask. If there are, *then* we'll do the "shift right + /// narrow" dance. In benchmarks, this does lead to slightly better + /// throughput, but the win doesn't appear huge. + #[inline(always)] + unsafe fn movemask_will_have_non_zero(self) -> bool { + let low = vreinterpretq_u64_u8(vpmaxq_u8(self, self)); + vgetq_lane_u64(low, 0) != 0 + } + } + + /// Neon doesn't have a `movemask` that works like the one in x86-64, so we + /// wind up using a different method[1]. The different method also produces + /// a mask, but 4 bits are set in the neon case instead of a single bit set + /// in the x86-64 case. We do an extra step to zero out 3 of the 4 bits, + /// but we still wind up with at least 3 zeroes between each set bit. This + /// generally means that we need to do some division by 4 before extracting + /// offsets. + /// + /// In fact, the existence of this type is the entire reason that we have + /// the `MoveMask` trait in the first place. This basically lets us keep + /// the different representations of masks without being forced to unify + /// them into a single representation, which could result in extra and + /// unnecessary work. + /// + /// [1]: https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon + #[derive(Clone, Copy, Debug)] + pub(crate) struct NeonMoveMask(u64); + + impl NeonMoveMask { + /// Get the mask in a form suitable for computing offsets. + /// + /// Basically, this normalizes to little endian. On big endian, this + /// swaps the bytes. + #[inline(always)] + fn get_for_offset(self) -> u64 { + #[cfg(target_endian = "big")] + { + self.0.swap_bytes() + } + #[cfg(target_endian = "little")] + { + self.0 + } + } + } + + impl MoveMask for NeonMoveMask { + #[inline(always)] + fn all_zeros_except_least_significant(n: usize) -> NeonMoveMask { + debug_assert!(n < 16); + NeonMoveMask(!(((1 << n) << 2) - 1)) + } + + #[inline(always)] + fn has_non_zero(self) -> bool { + self.0 != 0 + } + + #[inline(always)] + fn count_ones(self) -> usize { + self.0.count_ones() as usize + } + + #[inline(always)] + fn and(self, other: NeonMoveMask) -> NeonMoveMask { + NeonMoveMask(self.0 & other.0) + } + + #[inline(always)] + fn or(self, other: NeonMoveMask) -> NeonMoveMask { + NeonMoveMask(self.0 | other.0) + } + + #[inline(always)] + fn clear_least_significant_bit(self) -> NeonMoveMask { + NeonMoveMask(self.0 & (self.0 - 1)) + } + + #[inline(always)] + fn first_offset(self) -> usize { + // We are dealing with little endian here (and if we aren't, + // we swap the bytes so we are in practice), where the most + // significant byte is at a higher address. That means the least + // significant bit that is set corresponds to the position of our + // first matching byte. That position corresponds to the number of + // zeros after the least significant bit. + // + // Note that unlike `SensibleMoveMask`, this mask has its bits + // spread out over 64 bits instead of 16 bits (for a 128 bit + // vector). Namely, where as x86-64 will turn + // + // 0x00 0xFF 0x00 0x00 0xFF + // + // into 10010, our neon approach will turn it into + // + // 10000000000010000000 + // + // And this happens because neon doesn't have a native `movemask` + // instruction, so we kind of fake it[1]. Thus, we divide the + // number of trailing zeros by 4 to get the "real" offset. + // + // [1]: https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon + (self.get_for_offset().trailing_zeros() >> 2) as usize + } + + #[inline(always)] + fn last_offset(self) -> usize { + // See comment in `first_offset` above. This is basically the same, + // but coming from the other direction. + 16 - (self.get_for_offset().leading_zeros() >> 2) as usize - 1 + } + } +} + +#[cfg(target_arch = "wasm32")] +mod wasm_simd128 { + use core::arch::wasm32::*; + + use super::{SensibleMoveMask, Vector}; + + impl Vector for v128 { + const BITS: usize = 128; + const BYTES: usize = 16; + const ALIGN: usize = Self::BYTES - 1; + + type Mask = SensibleMoveMask; + + #[inline(always)] + unsafe fn splat(byte: u8) -> v128 { + u8x16_splat(byte) + } + + #[inline(always)] + unsafe fn load_aligned(data: *const u8) -> v128 { + *data.cast() + } + + #[inline(always)] + unsafe fn load_unaligned(data: *const u8) -> v128 { + v128_load(data.cast()) + } + + #[inline(always)] + unsafe fn movemask(self) -> SensibleMoveMask { + SensibleMoveMask(u8x16_bitmask(self).into()) + } + + #[inline(always)] + unsafe fn cmpeq(self, vector2: Self) -> v128 { + u8x16_eq(self, vector2) + } + + #[inline(always)] + unsafe fn and(self, vector2: Self) -> v128 { + v128_and(self, vector2) + } + + #[inline(always)] + unsafe fn or(self, vector2: Self) -> v128 { + v128_or(self, vector2) + } + } +} diff --git a/vendor/proc-macro2/.cargo-checksum.json b/vendor/proc-macro2/.cargo-checksum.json index 1c03247..9368d27 100644 --- a/vendor/proc-macro2/.cargo-checksum.json +++ b/vendor/proc-macro2/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"2a5128e9951d0b4cd6aa4862baa7da3afed9c5acd9e95ccd61742eed2ec0e098","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"32cbd395594db59ecc43d7866cfa2663f3687bb7df631781d60ae83200dae8a8","build.rs":"211e23cee324ff7e1784b02ae49d8aafa0de6ff100e352d38eb05903f426fc2a","src/convert.rs":"0f8e0f472e49e0be79e65654065a752df1ac9ad55da43952ee7c86cb56940171","src/detection.rs":"ed9a5f9a979ab01247d7a68eeb1afa3c13209334c5bfff0f9289cb07e5bb4e8b","src/extra.rs":"3447c89e4d83a94ebdf3599adb64050b92502da2a1f99a5cf36706e52d2c56dc","src/fallback.rs":"be0c5d0a78b88462a5508e2d2b314d40df595dff989ac6da926cd661bfe19cbe","src/lib.rs":"9d3d8421094c3ba3786d7c15ae5ef93dccfab7790329e9520282f975e61b966a","src/location.rs":"f55d2e61f1bb1af65e14ed04c9e91eb1ddbf8430e8c05f2048d1cd538d27368e","src/marker.rs":"344a8394f06a1d43355b514920e7e3c0c6dce507be767e3a590bbe3552edd110","src/parse.rs":"5b0171c73228f4daa350af678c3e593e08207fd989ebd4f1c77fca097f87e76b","src/rcvec.rs":"6233164ae0afc5c74ddc9e27c7869ec523385a3e5bdb83c3662841e78af14982","src/wrapper.rs":"be58a1d86bb422e6ff7668fce3244ee8ca39dc9128272cabb4a0f0c739e36794","tests/comments.rs":"31115b3a56c83d93eef2fb4c9566bf4543e302560732986161b98aef504785ed","tests/features.rs":"a86deb8644992a4eb64d9fd493eff16f9cf9c5cb6ade3a634ce0c990cf87d559","tests/marker.rs":"bc86b7260e29dfc8cd3e01b0d3fb9e88f17442dc83235f264e8cacc5ab4fe23d","tests/test.rs":"0445ac5c5993b5195c2bcba766984349e5b0bc69f180f45562411bb5cd6bd03b","tests/test_fmt.rs":"9357769945784354909259084ec8b34d2aa52081dd3967cac6dae3a5e3df3bc0","tests/test_size.rs":"acf05963c1e62052d769d237b50844a2c59b4182b491231b099a4f74e5456ab0"},"package":"2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435"} \ No newline at end of file +{"files":{"Cargo.toml":"cf58b9b5cd9abb5dca91d4d61809edcf425156bd8e9bb69e0398e00e74aa7af1","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"c609b6865476d6c35879784e9155367a97a0da496aa5c3c61488440a20f59883","build.rs":"8b4facae0d125ca3b437b4f5ebcd6ea3da3fcc65fcfc2cf357ae544423aa4568","build/probe.rs":"971fd2178dc506ccdc5c2065c37b77696a4aee8e00330ca52625db4a857f68d3","rust-toolchain.toml":"6bbb61302978c736b2da03e4fb40e3beab908f85d533ab46fd541e637b5f3e0f","src/detection.rs":"ed9a5f9a979ab01247d7a68eeb1afa3c13209334c5bfff0f9289cb07e5bb4e8b","src/extra.rs":"7c3864497cb5298fd5d0e9f5ae5797860338a9a4263220a8e8eabecda1583797","src/fallback.rs":"2e668a1ed90243e6f627a0c85c73c61f5c4107d82e149de5960d806d5eae99f9","src/lib.rs":"ea7a33758942e0911d5545e57b6a726c5bca7960fe3eed58a46b692244ac575a","src/location.rs":"f55d2e61f1bb1af65e14ed04c9e91eb1ddbf8430e8c05f2048d1cd538d27368e","src/marker.rs":"c11c5a1be8bdf18be3fcd224393f350a9aae7ce282e19ce583c84910c6903a8f","src/parse.rs":"4b77cddbc2752bc4d38a65acd8b96b6786c5220d19b1e1b37810257b5d24132d","src/rcvec.rs":"1c3c48c4f819927cc445ae15ca3bb06775feff2fd1cb21901ae4c40c7e6b4e82","src/wrapper.rs":"029fc07e8adbea2dd2a0aab49b07c3fb9cd8bc0539ea85bf9166c46922933742","tests/comments.rs":"31115b3a56c83d93eef2fb4c9566bf4543e302560732986161b98aef504785ed","tests/features.rs":"a86deb8644992a4eb64d9fd493eff16f9cf9c5cb6ade3a634ce0c990cf87d559","tests/marker.rs":"3190ee07dae510251f360db701ce257030f94a479b6689c3a9ef804bd5d8d099","tests/test.rs":"de9163d0b7d53a56de4c3d00acb415785e55c161becfe37b2a0c5d8d1931f14f","tests/test_fmt.rs":"b7743b612af65f2c88cbe109d50a093db7aa7e87f9e37bf45b7bbaeb240aa020","tests/test_size.rs":"acf05963c1e62052d769d237b50844a2c59b4182b491231b099a4f74e5456ab0"},"package":"e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"} \ No newline at end of file diff --git a/vendor/proc-macro2/Cargo.toml b/vendor/proc-macro2/Cargo.toml index 76c0df6..65506c2 100644 --- a/vendor/proc-macro2/Cargo.toml +++ b/vendor/proc-macro2/Cargo.toml @@ -10,10 +10,10 @@ # See Cargo.toml.orig for the original contents. [package] -edition = "2018" -rust-version = "1.31" +edition = "2021" +rust-version = "1.56" name = "proc-macro2" -version = "1.0.56" +version = "1.0.78" authors = [ "David Tolnay ", "Alex Crichton ", @@ -40,6 +40,7 @@ rustdoc-args = [ "procmacro2_semver_exempt", "--cfg", "doc_cfg", + "--generate-link-to-definition", ] targets = ["x86_64-unknown-linux-gnu"] @@ -52,13 +53,22 @@ doc-scrape-examples = false [dependencies.unicode-ident] version = "1.0" +[dev-dependencies.flate2] +version = "1.0" + [dev-dependencies.quote] version = "1.0" default_features = false +[dev-dependencies.rayon] +version = "1.0" + [dev-dependencies.rustversion] version = "1" +[dev-dependencies.tar] +version = "0.4" + [features] default = ["proc-macro"] nightly = [] diff --git a/vendor/proc-macro2/README.md b/vendor/proc-macro2/README.md index 131ba51..3a29ce8 100644 --- a/vendor/proc-macro2/README.md +++ b/vendor/proc-macro2/README.md @@ -52,7 +52,7 @@ pub fn my_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream { If parsing with [Syn], you'll use [`parse_macro_input!`] instead to propagate parse errors correctly back to the compiler when parsing fails. -[`parse_macro_input!`]: https://docs.rs/syn/1.0/syn/macro.parse_macro_input.html +[`parse_macro_input!`]: https://docs.rs/syn/2.0/syn/macro.parse_macro_input.html ## Unstable features @@ -62,7 +62,7 @@ proc-macro2 by default. To opt into the additional APIs available in the most recent nightly compiler, the `procmacro2_semver_exempt` config flag must be passed to rustc. We will -polyfill those nightly-only APIs back to Rust 1.31.0. As these are unstable APIs +polyfill those nightly-only APIs back to Rust 1.56.0. As these are unstable APIs that track the nightly compiler, minor versions of proc-macro2 may make breaking changes to them at any time. diff --git a/vendor/proc-macro2/build.rs b/vendor/proc-macro2/build.rs index 3ee8a9f..3347f87 100644 --- a/vendor/proc-macro2/build.rs +++ b/vendor/proc-macro2/build.rs @@ -1,11 +1,5 @@ // rustc-cfg emitted by the build script: // -// "use_proc_macro" -// Link to extern crate proc_macro. Available on any compiler and any target -// except wasm32. Requires "proc-macro" Cargo cfg to be enabled (default is -// enabled). On wasm32 we never link to proc_macro even if "proc-macro" cfg -// is enabled. -// // "wrap_proc_macro" // Wrap types from libproc_macro rather than polyfilling the whole API. // Enabled on rustc 1.29+ as long as procmacro2_semver_exempt is not set, @@ -41,21 +35,14 @@ // 1.57+. use std::env; -use std::process::{self, Command}; +use std::ffi::OsString; +use std::path::Path; +use std::process::{self, Command, Stdio}; use std::str; +use std::u32; fn main() { - println!("cargo:rerun-if-changed=build.rs"); - - let version = match rustc_version() { - Some(version) => version, - None => return, - }; - - if version.minor < 31 { - eprintln!("Minimum supported rustc version is 1.31"); - process::exit(1); - } + let rustc = rustc_minor_version().unwrap_or(u32::MAX); let docs_rs = env::var_os("DOCS_RS").is_some(); let semver_exempt = cfg!(procmacro2_semver_exempt) || docs_rs; @@ -68,124 +55,148 @@ fn main() { println!("cargo:rustc-cfg=span_locations"); } - if version.minor < 32 { - println!("cargo:rustc-cfg=no_libprocmacro_unwind_safe"); - } - - if version.minor < 34 { - println!("cargo:rustc-cfg=no_try_from"); + if rustc < 57 { + println!("cargo:rustc-cfg=no_is_available"); } - if version.minor < 39 { - println!("cargo:rustc-cfg=no_bind_by_move_pattern_guard"); + if rustc < 66 { + println!("cargo:rustc-cfg=no_source_text"); } - if version.minor < 44 { - println!("cargo:rustc-cfg=no_lexerror_display"); + if !cfg!(feature = "proc-macro") { + println!("cargo:rerun-if-changed=build.rs"); + return; } - if version.minor < 45 { - println!("cargo:rustc-cfg=no_hygiene"); + println!("cargo:rerun-if-changed=build/probe.rs"); + + let proc_macro_span; + let consider_rustc_bootstrap; + if compile_probe(false) { + // This is a nightly or dev compiler, so it supports unstable features + // regardless of RUSTC_BOOTSTRAP. No need to rerun build script if + // RUSTC_BOOTSTRAP is changed. + proc_macro_span = true; + consider_rustc_bootstrap = false; + } else if let Some(rustc_bootstrap) = env::var_os("RUSTC_BOOTSTRAP") { + if compile_probe(true) { + // This is a stable or beta compiler for which the user has set + // RUSTC_BOOTSTRAP to turn on unstable features. Rerun build script + // if they change it. + proc_macro_span = true; + consider_rustc_bootstrap = true; + } else if rustc_bootstrap == "1" { + // This compiler does not support the proc macro Span API in the + // form that proc-macro2 expects. No need to pay attention to + // RUSTC_BOOTSTRAP. + proc_macro_span = false; + consider_rustc_bootstrap = false; + } else { + // This is a stable or beta compiler for which RUSTC_BOOTSTRAP is + // set to restrict the use of unstable features by this crate. + proc_macro_span = false; + consider_rustc_bootstrap = true; + } + } else { + // Without RUSTC_BOOTSTRAP, this compiler does not support the proc + // macro Span API in the form that proc-macro2 expects, but try again if + // the user turns on unstable features. + proc_macro_span = false; + consider_rustc_bootstrap = true; } - if version.minor < 47 { - println!("cargo:rustc-cfg=no_ident_new_raw"); + if proc_macro_span || !semver_exempt { + println!("cargo:rustc-cfg=wrap_proc_macro"); } - if version.minor < 54 { - println!("cargo:rustc-cfg=no_literal_from_str"); + if proc_macro_span { + println!("cargo:rustc-cfg=proc_macro_span"); } - if version.minor < 55 { - println!("cargo:rustc-cfg=no_group_open_close"); + if semver_exempt && proc_macro_span { + println!("cargo:rustc-cfg=super_unstable"); } - if version.minor < 57 { - println!("cargo:rustc-cfg=no_is_available"); + if consider_rustc_bootstrap { + println!("cargo:rerun-if-env-changed=RUSTC_BOOTSTRAP"); } +} - if version.minor < 66 { - println!("cargo:rustc-cfg=no_source_text"); +fn compile_probe(rustc_bootstrap: bool) -> bool { + if env::var_os("RUSTC_STAGE").is_some() { + // We are running inside rustc bootstrap. This is a highly non-standard + // environment with issues such as: + // + // https://github.com/rust-lang/cargo/issues/11138 + // https://github.com/rust-lang/rust/issues/114839 + // + // Let's just not use nightly features here. + return false; } - let target = env::var("TARGET").unwrap(); - if !enable_use_proc_macro(&target) { - return; - } + let rustc = cargo_env_var("RUSTC"); + let out_dir = cargo_env_var("OUT_DIR"); + let probefile = Path::new("build").join("probe.rs"); - println!("cargo:rustc-cfg=use_proc_macro"); + // Make sure to pick up Cargo rustc configuration. + let mut cmd = if let Some(wrapper) = env::var_os("RUSTC_WRAPPER") { + let mut cmd = Command::new(wrapper); + // The wrapper's first argument is supposed to be the path to rustc. + cmd.arg(rustc); + cmd + } else { + Command::new(rustc) + }; - if version.nightly || !semver_exempt { - println!("cargo:rustc-cfg=wrap_proc_macro"); + if !rustc_bootstrap { + cmd.env_remove("RUSTC_BOOTSTRAP"); } - if version.nightly - && feature_allowed("proc_macro_span") - && feature_allowed("proc_macro_span_shrink") - { - println!("cargo:rustc-cfg=proc_macro_span"); - } + cmd.stderr(Stdio::null()) + .arg("--edition=2021") + .arg("--crate-name=proc_macro2") + .arg("--crate-type=lib") + .arg("--emit=dep-info,metadata") + .arg("--out-dir") + .arg(out_dir) + .arg(probefile); - if semver_exempt && version.nightly { - println!("cargo:rustc-cfg=super_unstable"); + if let Some(target) = env::var_os("TARGET") { + cmd.arg("--target").arg(target); } -} -fn enable_use_proc_macro(target: &str) -> bool { - // wasm targets don't have the `proc_macro` crate, disable this feature. - if target.contains("wasm32") { - return false; + // If Cargo wants to set RUSTFLAGS, use that. + if let Ok(rustflags) = env::var("CARGO_ENCODED_RUSTFLAGS") { + if !rustflags.is_empty() { + for arg in rustflags.split('\x1f') { + cmd.arg(arg); + } + } } - // Otherwise, only enable it if our feature is actually enabled. - cfg!(feature = "proc-macro") -} - -struct RustcVersion { - minor: u32, - nightly: bool, + match cmd.status() { + Ok(status) => status.success(), + Err(_) => false, + } } -fn rustc_version() -> Option { - let rustc = env::var_os("RUSTC")?; +fn rustc_minor_version() -> Option { + let rustc = cargo_env_var("RUSTC"); let output = Command::new(rustc).arg("--version").output().ok()?; let version = str::from_utf8(&output.stdout).ok()?; - let nightly = version.contains("nightly") || version.contains("dev"); let mut pieces = version.split('.'); if pieces.next() != Some("rustc 1") { return None; } - let minor = pieces.next()?.parse().ok()?; - Some(RustcVersion { minor, nightly }) + pieces.next()?.parse().ok() } -fn feature_allowed(feature: &str) -> bool { - // Recognized formats: - // - // -Z allow-features=feature1,feature2 - // - // -Zallow-features=feature1,feature2 - - let flags_var; - let flags_var_string; - let flags = if let Some(encoded_rustflags) = env::var_os("CARGO_ENCODED_RUSTFLAGS") { - flags_var = encoded_rustflags; - flags_var_string = flags_var.to_string_lossy(); - flags_var_string.split('\x1f') - } else { - return true; - }; - - for mut flag in flags { - if flag.starts_with("-Z") { - flag = &flag["-Z".len()..]; - } - if flag.starts_with("allow-features=") { - flag = &flag["allow-features=".len()..]; - return flag.split(',').any(|allowed| allowed == feature); - } - } - - // No allow-features= flag, allowed by default. - true +fn cargo_env_var(key: &str) -> OsString { + env::var_os(key).unwrap_or_else(|| { + eprintln!( + "Environment variable ${} is not set during execution of build script", + key, + ); + process::exit(1); + }) } diff --git a/vendor/proc-macro2/build/probe.rs b/vendor/proc-macro2/build/probe.rs new file mode 100644 index 0000000..2c4947a --- /dev/null +++ b/vendor/proc-macro2/build/probe.rs @@ -0,0 +1,25 @@ +// This code exercises the surface area that we expect of Span's unstable API. +// If the current toolchain is able to compile it, then proc-macro2 is able to +// offer these APIs too. + +#![feature(proc_macro_span)] + +extern crate proc_macro; + +use core::ops::{Range, RangeBounds}; +use proc_macro::{Literal, Span}; + +pub fn byte_range(this: &Span) -> Range { + this.byte_range() +} + +pub fn join(this: &Span, other: Span) -> Option { + this.join(other) +} + +pub fn subspan>(this: &Literal, range: R) -> Option { + this.subspan(range) +} + +// Include in sccache cache key. +const _: Option<&str> = option_env!("RUSTC_BOOTSTRAP"); diff --git a/vendor/proc-macro2/rust-toolchain.toml b/vendor/proc-macro2/rust-toolchain.toml new file mode 100644 index 0000000..20fe888 --- /dev/null +++ b/vendor/proc-macro2/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +components = ["rust-src"] diff --git a/vendor/proc-macro2/src/convert.rs b/vendor/proc-macro2/src/convert.rs deleted file mode 100644 index afc5faf..0000000 --- a/vendor/proc-macro2/src/convert.rs +++ /dev/null @@ -1,19 +0,0 @@ -pub(crate) fn usize_to_u32(u: usize) -> Option { - #[cfg(not(no_try_from))] - { - use core::convert::TryFrom; - - u32::try_from(u).ok() - } - - #[cfg(no_try_from)] - { - use core::mem; - - if mem::size_of::() <= mem::size_of::() || u <= u32::max_value() as usize { - Some(u as u32) - } else { - None - } - } -} diff --git a/vendor/proc-macro2/src/extra.rs b/vendor/proc-macro2/src/extra.rs index cbce162..543ec1d 100644 --- a/vendor/proc-macro2/src/extra.rs +++ b/vendor/proc-macro2/src/extra.rs @@ -3,18 +3,85 @@ use crate::fallback; use crate::imp; -use crate::marker::Marker; +use crate::marker::{ProcMacroAutoTraits, MARKER}; use crate::Span; use core::fmt::{self, Debug}; +/// Invalidate any `proc_macro2::Span` that exist on the current thread. +/// +/// The implementation of `Span` uses thread-local data structures and this +/// function clears them. Calling any method on a `Span` on the current thread +/// created prior to the invalidation will return incorrect values or crash. +/// +/// This function is useful for programs that process more than 232 +/// bytes of Rust source code on the same thread. Just like rustc, proc-macro2 +/// uses 32-bit source locations, and these wrap around when the total source +/// code processed by the same thread exceeds 232 bytes (4 +/// gigabytes). After a wraparound, `Span` methods such as `source_text()` can +/// return wrong data. +/// +/// # Example +/// +/// As of late 2023, there is 200 GB of Rust code published on crates.io. +/// Looking at just the newest version of every crate, it is 16 GB of code. So a +/// workload that involves parsing it all would overflow a 32-bit source +/// location unless spans are being invalidated. +/// +/// ``` +/// use flate2::read::GzDecoder; +/// use std::ffi::OsStr; +/// use std::io::{BufReader, Read}; +/// use std::str::FromStr; +/// use tar::Archive; +/// +/// rayon::scope(|s| { +/// for krate in every_version_of_every_crate() { +/// s.spawn(move |_| { +/// proc_macro2::extra::invalidate_current_thread_spans(); +/// +/// let reader = BufReader::new(krate); +/// let tar = GzDecoder::new(reader); +/// let mut archive = Archive::new(tar); +/// for entry in archive.entries().unwrap() { +/// let mut entry = entry.unwrap(); +/// let path = entry.path().unwrap(); +/// if path.extension() != Some(OsStr::new("rs")) { +/// continue; +/// } +/// let mut content = String::new(); +/// entry.read_to_string(&mut content).unwrap(); +/// match proc_macro2::TokenStream::from_str(&content) { +/// Ok(tokens) => {/* ... */}, +/// Err(_) => continue, +/// } +/// } +/// }); +/// } +/// }); +/// # +/// # fn every_version_of_every_crate() -> Vec { +/// # Vec::new() +/// # } +/// ``` +/// +/// # Panics +/// +/// This function is not applicable to and will panic if called from a +/// procedural macro. +#[cfg(span_locations)] +#[cfg_attr(doc_cfg, doc(cfg(feature = "span-locations")))] +pub fn invalidate_current_thread_spans() { + crate::imp::invalidate_current_thread_spans(); +} + /// An object that holds a [`Group`]'s `span_open()` and `span_close()` together -/// (in a more compact representation than holding those 2 spans individually. +/// in a more compact representation than holding those 2 spans individually. /// /// [`Group`]: crate::Group #[derive(Copy, Clone)] pub struct DelimSpan { inner: DelimSpanEnum, - _marker: Marker, + _marker: ProcMacroAutoTraits, } #[derive(Copy, Clone)] @@ -22,9 +89,7 @@ enum DelimSpanEnum { #[cfg(wrap_proc_macro)] Compiler { join: proc_macro::Span, - #[cfg(not(no_group_open_close))] open: proc_macro::Span, - #[cfg(not(no_group_open_close))] close: proc_macro::Span, }, Fallback(fallback::Span), @@ -36,9 +101,7 @@ impl DelimSpan { let inner = match group { imp::Group::Compiler(group) => DelimSpanEnum::Compiler { join: group.span(), - #[cfg(not(no_group_open_close))] open: group.span_open(), - #[cfg(not(no_group_open_close))] close: group.span_close(), }, imp::Group::Fallback(group) => DelimSpanEnum::Fallback(group.span()), @@ -49,7 +112,7 @@ impl DelimSpan { DelimSpan { inner, - _marker: Marker, + _marker: MARKER, } } @@ -66,13 +129,7 @@ impl DelimSpan { pub fn open(&self) -> Span { match &self.inner { #[cfg(wrap_proc_macro)] - DelimSpanEnum::Compiler { - #[cfg(not(no_group_open_close))] - open, - #[cfg(no_group_open_close)] - join: open, - .. - } => Span::_new(imp::Span::Compiler(*open)), + DelimSpanEnum::Compiler { open, .. } => Span::_new(imp::Span::Compiler(*open)), DelimSpanEnum::Fallback(span) => Span::_new_fallback(span.first_byte()), } } @@ -81,13 +138,7 @@ impl DelimSpan { pub fn close(&self) -> Span { match &self.inner { #[cfg(wrap_proc_macro)] - DelimSpanEnum::Compiler { - #[cfg(not(no_group_open_close))] - close, - #[cfg(no_group_open_close)] - join: close, - .. - } => Span::_new(imp::Span::Compiler(*close)), + DelimSpanEnum::Compiler { close, .. } => Span::_new(imp::Span::Compiler(*close)), DelimSpanEnum::Fallback(span) => Span::_new_fallback(span.last_byte()), } } diff --git a/vendor/proc-macro2/src/fallback.rs b/vendor/proc-macro2/src/fallback.rs index bbea473..16bf645 100644 --- a/vendor/proc-macro2/src/fallback.rs +++ b/vendor/proc-macro2/src/fallback.rs @@ -4,12 +4,15 @@ use crate::parse::{self, Cursor}; use crate::rcvec::{RcVec, RcVecBuilder, RcVecIntoIter, RcVecMut}; use crate::{Delimiter, Spacing, TokenTree}; #[cfg(all(span_locations, not(fuzzing)))] +use alloc::collections::BTreeMap; +#[cfg(all(span_locations, not(fuzzing)))] use core::cell::RefCell; #[cfg(span_locations)] use core::cmp; use core::fmt::{self, Debug, Display, Write}; -use core::iter::FromIterator; use core::mem::ManuallyDrop; +#[cfg(span_locations)] +use core::ops::Range; use core::ops::RangeBounds; use core::ptr; use core::str::FromStr; @@ -44,7 +47,7 @@ impl LexError { self.span } - fn call_site() -> Self { + pub(crate) fn call_site() -> Self { LexError { span: Span::call_site(), } @@ -71,7 +74,6 @@ impl TokenStream { fn push_token_from_proc_macro(mut vec: RcVecMut, token: TokenTree) { // https://github.com/dtolnay/proc-macro2/issues/235 match token { - #[cfg(not(no_bind_by_move_pattern_guard))] TokenTree::Literal(crate::Literal { #[cfg(wrap_proc_macro)] inner: crate::imp::Literal::Fallback(literal), @@ -81,20 +83,6 @@ fn push_token_from_proc_macro(mut vec: RcVecMut, token: TokenTree) { }) if literal.repr.starts_with('-') => { push_negative_literal(vec, literal); } - #[cfg(no_bind_by_move_pattern_guard)] - TokenTree::Literal(crate::Literal { - #[cfg(wrap_proc_macro)] - inner: crate::imp::Literal::Fallback(literal), - #[cfg(not(wrap_proc_macro))] - inner: literal, - .. - }) => { - if literal.repr.starts_with('-') { - push_negative_literal(vec, literal); - } else { - vec.push(TokenTree::Literal(crate::Literal::_new_fallback(literal))); - } - } _ => vec.push(token), } @@ -165,9 +153,9 @@ fn get_cursor(src: &str) -> Cursor { // Create a dummy file & add it to the source map #[cfg(not(fuzzing))] - SOURCE_MAP.with(|cm| { - let mut cm = cm.borrow_mut(); - let span = cm.add_file(src); + SOURCE_MAP.with(|sm| { + let mut sm = sm.borrow_mut(); + let span = sm.add_file(src); Cursor { rest: src, off: span.lo, @@ -233,7 +221,7 @@ impl Debug for TokenStream { } } -#[cfg(use_proc_macro)] +#[cfg(feature = "proc-macro")] impl From for TokenStream { fn from(inner: proc_macro::TokenStream) -> Self { inner @@ -243,7 +231,7 @@ impl From for TokenStream { } } -#[cfg(use_proc_macro)] +#[cfg(feature = "proc-macro")] impl From for proc_macro::TokenStream { fn from(inner: TokenStream) -> Self { inner @@ -321,7 +309,6 @@ impl SourceFile { } pub fn is_real(&self) -> bool { - // XXX(nika): Support real files in the future? false } } @@ -338,21 +325,29 @@ impl Debug for SourceFile { #[cfg(all(span_locations, not(fuzzing)))] thread_local! { static SOURCE_MAP: RefCell = RefCell::new(SourceMap { - // NOTE: We start with a single dummy file which all call_site() and - // def_site() spans reference. + // Start with a single dummy file which all call_site() and def_site() + // spans reference. files: vec![FileInfo { source_text: String::new(), span: Span { lo: 0, hi: 0 }, lines: vec![0], + char_index_to_byte_offset: BTreeMap::new(), }], }); } +#[cfg(span_locations)] +pub(crate) fn invalidate_current_thread_spans() { + #[cfg(not(fuzzing))] + SOURCE_MAP.with(|sm| sm.borrow_mut().files.truncate(1)); +} + #[cfg(all(span_locations, not(fuzzing)))] struct FileInfo { source_text: String, span: Span, lines: Vec, + char_index_to_byte_offset: BTreeMap, } #[cfg(all(span_locations, not(fuzzing)))] @@ -360,7 +355,7 @@ impl FileInfo { fn offset_line_column(&self, offset: usize) -> LineColumn { assert!(self.span_within(Span { lo: offset as u32, - hi: offset as u32 + hi: offset as u32, })); let offset = offset - self.span.lo as usize; match self.lines.binary_search(&offset) { @@ -379,10 +374,44 @@ impl FileInfo { span.lo >= self.span.lo && span.hi <= self.span.hi } - fn source_text(&self, span: Span) -> String { - let lo = (span.lo - self.span.lo) as usize; - let hi = (span.hi - self.span.lo) as usize; - self.source_text[lo..hi].to_owned() + fn byte_range(&mut self, span: Span) -> Range { + let lo_char = (span.lo - self.span.lo) as usize; + + // Look up offset of the largest already-computed char index that is + // less than or equal to the current requested one. We resume counting + // chars from that point. + let (&last_char_index, &last_byte_offset) = self + .char_index_to_byte_offset + .range(..=lo_char) + .next_back() + .unwrap_or((&0, &0)); + + let lo_byte = if last_char_index == lo_char { + last_byte_offset + } else { + let total_byte_offset = match self.source_text[last_byte_offset..] + .char_indices() + .nth(lo_char - last_char_index) + { + Some((additional_offset, _ch)) => last_byte_offset + additional_offset, + None => self.source_text.len(), + }; + self.char_index_to_byte_offset + .insert(lo_char, total_byte_offset); + total_byte_offset + }; + + let trunc_lo = &self.source_text[lo_byte..]; + let char_len = (span.hi - span.lo) as usize; + lo_byte..match trunc_lo.char_indices().nth(char_len) { + Some((offset, _ch)) => lo_byte + offset, + None => self.source_text.len(), + } + } + + fn source_text(&mut self, span: Span) -> String { + let byte_range = self.byte_range(span); + self.source_text[byte_range].to_owned() } } @@ -421,7 +450,6 @@ impl SourceMap { fn add_file(&mut self, src: &str) -> Span { let (len, lines) = lines_offsets(src); let lo = self.next_start_pos(); - // XXX(nika): Should we bother doing a checked cast or checked add here? let span = Span { lo, hi: lo + (len as u32), @@ -431,6 +459,8 @@ impl SourceMap { source_text: src.to_owned(), span, lines, + // Populated lazily by source_text(). + char_index_to_byte_offset: BTreeMap::new(), }); span @@ -458,6 +488,15 @@ impl SourceMap { } unreachable!("Invalid span with no related FileInfo!"); } + + fn fileinfo_mut(&mut self, span: Span) -> &mut FileInfo { + for file in &mut self.files { + if file.span_within(span) { + return file; + } + } + unreachable!("Invalid span with no related FileInfo!"); + } } #[derive(Clone, Copy, PartialEq, Eq)] @@ -479,7 +518,6 @@ impl Span { Span { lo: 0, hi: 0 } } - #[cfg(not(no_hygiene))] pub fn mixed_site() -> Self { Span::call_site() } @@ -508,22 +546,37 @@ impl Span { }; #[cfg(not(fuzzing))] - SOURCE_MAP.with(|cm| { - let cm = cm.borrow(); - let path = cm.filepath(*self); + SOURCE_MAP.with(|sm| { + let sm = sm.borrow(); + let path = sm.filepath(*self); SourceFile { path } }) } + #[cfg(span_locations)] + pub fn byte_range(&self) -> Range { + #[cfg(fuzzing)] + return 0..0; + + #[cfg(not(fuzzing))] + { + if self.is_call_site() { + 0..0 + } else { + SOURCE_MAP.with(|sm| sm.borrow_mut().fileinfo_mut(*self).byte_range(*self)) + } + } + } + #[cfg(span_locations)] pub fn start(&self) -> LineColumn { #[cfg(fuzzing)] return LineColumn { line: 0, column: 0 }; #[cfg(not(fuzzing))] - SOURCE_MAP.with(|cm| { - let cm = cm.borrow(); - let fi = cm.fileinfo(*self); + SOURCE_MAP.with(|sm| { + let sm = sm.borrow(); + let fi = sm.fileinfo(*self); fi.offset_line_column(self.lo as usize) }) } @@ -534,33 +587,13 @@ impl Span { return LineColumn { line: 0, column: 0 }; #[cfg(not(fuzzing))] - SOURCE_MAP.with(|cm| { - let cm = cm.borrow(); - let fi = cm.fileinfo(*self); + SOURCE_MAP.with(|sm| { + let sm = sm.borrow(); + let fi = sm.fileinfo(*self); fi.offset_line_column(self.hi as usize) }) } - #[cfg(procmacro2_semver_exempt)] - pub fn before(&self) -> Span { - Span { - #[cfg(span_locations)] - lo: self.lo, - #[cfg(span_locations)] - hi: self.lo, - } - } - - #[cfg(procmacro2_semver_exempt)] - pub fn after(&self) -> Span { - Span { - #[cfg(span_locations)] - lo: self.hi, - #[cfg(span_locations)] - hi: self.hi, - } - } - #[cfg(not(span_locations))] pub fn join(&self, _other: Span) -> Option { Some(Span {}) @@ -575,10 +608,10 @@ impl Span { }; #[cfg(not(fuzzing))] - SOURCE_MAP.with(|cm| { - let cm = cm.borrow(); + SOURCE_MAP.with(|sm| { + let sm = sm.borrow(); // If `other` is not within the same FileInfo as us, return None. - if !cm.fileinfo(*self).span_within(other) { + if !sm.fileinfo(*self).span_within(other) { return None; } Some(Span { @@ -603,7 +636,7 @@ impl Span { if self.is_call_site() { None } else { - Some(SOURCE_MAP.with(|cm| cm.borrow().fileinfo(*self).source_text(*self))) + Some(SOURCE_MAP.with(|sm| sm.borrow_mut().fileinfo_mut(*self).source_text(*self))) } } } @@ -749,22 +782,32 @@ pub(crate) struct Ident { } impl Ident { - fn _new(string: &str, raw: bool, span: Span) -> Self { - validate_ident(string, raw); + #[track_caller] + pub fn new_checked(string: &str, span: Span) -> Self { + validate_ident(string); + Ident::new_unchecked(string, span) + } + pub fn new_unchecked(string: &str, span: Span) -> Self { Ident { sym: string.to_owned(), span, - raw, + raw: false, } } - pub fn new(string: &str, span: Span) -> Self { - Ident::_new(string, false, span) + #[track_caller] + pub fn new_raw_checked(string: &str, span: Span) -> Self { + validate_ident_raw(string); + Ident::new_raw_unchecked(string, span) } - pub fn new_raw(string: &str, span: Span) -> Self { - Ident::_new(string, true, span) + pub fn new_raw_unchecked(string: &str, span: Span) -> Self { + Ident { + sym: string.to_owned(), + span, + raw: true, + } } pub fn span(&self) -> Span { @@ -784,12 +827,13 @@ pub(crate) fn is_ident_continue(c: char) -> bool { unicode_ident::is_xid_continue(c) } -fn validate_ident(string: &str, raw: bool) { +#[track_caller] +fn validate_ident(string: &str) { if string.is_empty() { panic!("Ident is not allowed to be empty; use Option"); } - if string.bytes().all(|digit| digit >= b'0' && digit <= b'9') { + if string.bytes().all(|digit| b'0' <= digit && digit <= b'9') { panic!("Ident cannot be a number; use Literal instead"); } @@ -810,14 +854,17 @@ fn validate_ident(string: &str, raw: bool) { if !ident_ok(string) { panic!("{:?} is not a valid Ident", string); } +} - if raw { - match string { - "_" | "super" | "self" | "Self" | "crate" => { - panic!("`r#{}` cannot be a raw identifier", string); - } - _ => {} +#[track_caller] +fn validate_ident_raw(string: &str) { + validate_ident(string); + + match string { + "_" | "super" | "self" | "Self" | "crate" => { + panic!("`r#{}` cannot be a raw identifier", string); } + _ => {} } } @@ -850,6 +897,7 @@ impl Display for Ident { } } +#[allow(clippy::missing_fields_in_debug)] impl Debug for Ident { // Ident(proc_macro), Ident(r#union) #[cfg(not(span_locations))] @@ -1039,27 +1087,26 @@ impl Literal { #[cfg(span_locations)] { - use crate::convert::usize_to_u32; use core::ops::Bound; let lo = match range.start_bound() { Bound::Included(start) => { - let start = usize_to_u32(*start)?; + let start = u32::try_from(*start).ok()?; self.span.lo.checked_add(start)? } Bound::Excluded(start) => { - let start = usize_to_u32(*start)?; + let start = u32::try_from(*start).ok()?; self.span.lo.checked_add(start)?.checked_add(1)? } Bound::Unbounded => self.span.lo, }; let hi = match range.end_bound() { Bound::Included(end) => { - let end = usize_to_u32(*end)?; + let end = u32::try_from(*end).ok()?; self.span.lo.checked_add(end)?.checked_add(1)? } Bound::Excluded(end) => { - let end = usize_to_u32(*end)?; + let end = u32::try_from(*end).ok()?; self.span.lo.checked_add(end)? } Bound::Unbounded => self.span.hi, diff --git a/vendor/proc-macro2/src/lib.rs b/vendor/proc-macro2/src/lib.rs index 6ce679d..01f2049 100644 --- a/vendor/proc-macro2/src/lib.rs +++ b/vendor/proc-macro2/src/lib.rs @@ -55,7 +55,7 @@ //! If parsing with [Syn], you'll use [`parse_macro_input!`] instead to //! propagate parse errors correctly back to the compiler when parsing fails. //! -//! [`parse_macro_input!`]: https://docs.rs/syn/1.0/syn/macro.parse_macro_input.html +//! [`parse_macro_input!`]: https://docs.rs/syn/2.0/syn/macro.parse_macro_input.html //! //! # Unstable features //! @@ -65,7 +65,7 @@ //! //! To opt into the additional APIs available in the most recent nightly //! compiler, the `procmacro2_semver_exempt` config flag must be passed to -//! rustc. We will polyfill those nightly-only APIs back to Rust 1.31.0. As +//! rustc. We will polyfill those nightly-only APIs back to Rust 1.56.0. As //! these are unstable APIs that track the nightly compiler, minor versions of //! proc-macro2 may make breaking changes to them at any time. //! @@ -86,22 +86,25 @@ //! a different thread. // Proc-macro2 types in rustdoc of other crates get linked to here. -#![doc(html_root_url = "https://docs.rs/proc-macro2/1.0.56")] -#![cfg_attr( - any(proc_macro_span, super_unstable), - feature(proc_macro_span, proc_macro_span_shrink) -)] +#![doc(html_root_url = "https://docs.rs/proc-macro2/1.0.78")] +#![cfg_attr(any(proc_macro_span, super_unstable), feature(proc_macro_span))] #![cfg_attr(super_unstable, feature(proc_macro_def_site))] #![cfg_attr(doc_cfg, feature(doc_cfg))] +#![deny(unsafe_op_in_unsafe_fn)] #![allow( clippy::cast_lossless, clippy::cast_possible_truncation, + clippy::checked_conversions, clippy::doc_markdown, clippy::items_after_statements, + clippy::iter_without_into_iter, clippy::let_underscore_untyped, clippy::manual_assert, + clippy::manual_range_contains, + clippy::missing_safety_doc, clippy::must_use_candidate, clippy::needless_doctest_main, + clippy::new_without_default, clippy::return_self_not_must_use, clippy::shadow_unrelated, clippy::trivially_copy_pass_by_ref, @@ -119,7 +122,18 @@ compile_error! {"\ build script as well. "} -#[cfg(use_proc_macro)] +#[cfg(all( + procmacro2_nightly_testing, + feature = "proc-macro", + not(proc_macro_span) +))] +compile_error! {"\ + Build script probe failed to compile. +"} + +extern crate alloc; + +#[cfg(feature = "proc-macro")] extern crate proc_macro; mod marker; @@ -142,17 +156,16 @@ use crate::fallback as imp; #[cfg(wrap_proc_macro)] mod imp; -#[cfg(span_locations)] -mod convert; #[cfg(span_locations)] mod location; use crate::extra::DelimSpan; -use crate::marker::Marker; +use crate::marker::{ProcMacroAutoTraits, MARKER}; use core::cmp::Ordering; use core::fmt::{self, Debug, Display}; use core::hash::{Hash, Hasher}; -use core::iter::FromIterator; +#[cfg(span_locations)] +use core::ops::Range; use core::ops::RangeBounds; use core::str::FromStr; use std::error::Error; @@ -160,6 +173,7 @@ use std::error::Error; use std::path::PathBuf; #[cfg(span_locations)] +#[cfg_attr(doc_cfg, doc(cfg(feature = "span-locations")))] pub use crate::location::LineColumn; /// An abstract stream of tokens, or more concretely a sequence of token trees. @@ -172,27 +186,27 @@ pub use crate::location::LineColumn; #[derive(Clone)] pub struct TokenStream { inner: imp::TokenStream, - _marker: Marker, + _marker: ProcMacroAutoTraits, } /// Error returned from `TokenStream::from_str`. pub struct LexError { inner: imp::LexError, - _marker: Marker, + _marker: ProcMacroAutoTraits, } impl TokenStream { fn _new(inner: imp::TokenStream) -> Self { TokenStream { inner, - _marker: Marker, + _marker: MARKER, } } fn _new_fallback(inner: fallback::TokenStream) -> Self { TokenStream { inner: inner.into(), - _marker: Marker, + _marker: MARKER, } } @@ -229,20 +243,22 @@ impl FromStr for TokenStream { fn from_str(src: &str) -> Result { let e = src.parse().map_err(|e| LexError { inner: e, - _marker: Marker, + _marker: MARKER, })?; Ok(TokenStream::_new(e)) } } -#[cfg(use_proc_macro)] +#[cfg(feature = "proc-macro")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "proc-macro")))] impl From for TokenStream { fn from(inner: proc_macro::TokenStream) -> Self { TokenStream::_new(inner.into()) } } -#[cfg(use_proc_macro)] +#[cfg(feature = "proc-macro")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "proc-macro")))] impl From for proc_macro::TokenStream { fn from(inner: TokenStream) -> Self { inner.inner.into() @@ -325,7 +341,7 @@ impl Error for LexError {} #[derive(Clone, PartialEq, Eq)] pub struct SourceFile { inner: imp::SourceFile, - _marker: Marker, + _marker: ProcMacroAutoTraits, } #[cfg(all(procmacro2_semver_exempt, any(not(wrap_proc_macro), super_unstable)))] @@ -333,7 +349,7 @@ impl SourceFile { fn _new(inner: imp::SourceFile) -> Self { SourceFile { inner, - _marker: Marker, + _marker: MARKER, } } @@ -372,21 +388,21 @@ impl Debug for SourceFile { #[derive(Copy, Clone)] pub struct Span { inner: imp::Span, - _marker: Marker, + _marker: ProcMacroAutoTraits, } impl Span { fn _new(inner: imp::Span) -> Self { Span { inner, - _marker: Marker, + _marker: MARKER, } } fn _new_fallback(inner: fallback::Span) -> Self { Span { inner: inner.into(), - _marker: Marker, + _marker: MARKER, } } @@ -402,9 +418,6 @@ impl Span { /// The span located at the invocation of the procedural macro, but with /// local variables, labels, and `$crate` resolved at the definition site /// of the macro. This is the same hygiene behavior as `macro_rules`. - /// - /// This function requires Rust 1.45 or later. - #[cfg(not(no_hygiene))] pub fn mixed_site() -> Self { Span::_new(imp::Span::mixed_site()) } @@ -461,6 +474,21 @@ impl Span { SourceFile::_new(self.inner.source_file()) } + /// Returns the span's byte position range in the source file. + /// + /// This method requires the `"span-locations"` feature to be enabled. + /// + /// When executing in a procedural macro context, the returned range is only + /// accurate if compiled with a nightly toolchain. The stable toolchain does + /// not have this information available. When executing outside of a + /// procedural macro, such as main.rs or build.rs, the byte range is always + /// accurate regardless of toolchain. + #[cfg(span_locations)] + #[cfg_attr(doc_cfg, doc(cfg(feature = "span-locations")))] + pub fn byte_range(&self) -> Range { + self.inner.byte_range() + } + /// Get the starting line/column in the source file for this span. /// /// This method requires the `"span-locations"` feature to be enabled. @@ -491,24 +519,6 @@ impl Span { self.inner.end() } - /// Creates an empty span pointing to directly before this span. - /// - /// This method is semver exempt and not exposed by default. - #[cfg(all(procmacro2_semver_exempt, any(not(wrap_proc_macro), super_unstable)))] - #[cfg_attr(doc_cfg, doc(cfg(procmacro2_semver_exempt)))] - pub fn before(&self) -> Span { - Span::_new(self.inner.before()) - } - - /// Creates an empty span pointing to directly after this span. - /// - /// This method is semver exempt and not exposed by default. - #[cfg(all(procmacro2_semver_exempt, any(not(wrap_proc_macro), super_unstable)))] - #[cfg_attr(doc_cfg, doc(cfg(procmacro2_semver_exempt)))] - pub fn after(&self) -> Span { - Span::_new(self.inner.after()) - } - /// Create a new span encompassing `self` and `other`. /// /// Returns `None` if `self` and `other` are from different files. @@ -873,7 +883,7 @@ impl Debug for Punct { /// Rust keywords. Use `input.call(Ident::parse_any)` when parsing to match the /// behaviour of `Ident::new`. /// -/// [`Parse`]: https://docs.rs/syn/1.0/syn/parse/trait.Parse.html +/// [`Parse`]: https://docs.rs/syn/2.0/syn/parse/trait.Parse.html /// /// # Examples /// @@ -926,14 +936,14 @@ impl Debug for Punct { #[derive(Clone)] pub struct Ident { inner: imp::Ident, - _marker: Marker, + _marker: ProcMacroAutoTraits, } impl Ident { fn _new(inner: imp::Ident) -> Self { Ident { inner, - _marker: Marker, + _marker: MARKER, } } @@ -964,12 +974,13 @@ impl Ident { /// Panics if the input string is neither a keyword nor a legal variable /// name. If you are not sure whether the string contains an identifier and /// need to handle an error case, use - /// syn::parse_str::<Ident> /// rather than `Ident::new`. + #[track_caller] pub fn new(string: &str, span: Span) -> Self { - Ident::_new(imp::Ident::new(string, span.inner)) + Ident::_new(imp::Ident::new_checked(string, span.inner)) } /// Same as `Ident::new`, but creates a raw identifier (`r#ident`). The @@ -977,12 +988,9 @@ impl Ident { /// (including keywords, e.g. `fn`). Keywords which are usable in path /// segments (e.g. `self`, `super`) are not supported, and will cause a /// panic. + #[track_caller] pub fn new_raw(string: &str, span: Span) -> Self { - Ident::_new_raw(string, span) - } - - fn _new_raw(string: &str, span: Span) -> Self { - Ident::_new(imp::Ident::new_raw(string, span.inner)) + Ident::_new(imp::Ident::new_raw_checked(string, span.inner)) } /// Returns the span of this `Ident`. @@ -1055,7 +1063,7 @@ impl Debug for Ident { #[derive(Clone)] pub struct Literal { inner: imp::Literal, - _marker: Marker, + _marker: ProcMacroAutoTraits, } macro_rules! suffixed_int_literals { @@ -1102,14 +1110,14 @@ impl Literal { fn _new(inner: imp::Literal) -> Self { Literal { inner, - _marker: Marker, + _marker: MARKER, } } fn _new_fallback(inner: fallback::Literal) -> Self { Literal { inner: inner.into(), - _marker: Marker, + _marker: MARKER, } } @@ -1259,7 +1267,7 @@ impl Literal { // representation. This is not public API other than for quote. #[doc(hidden)] pub unsafe fn from_str_unchecked(repr: &str) -> Self { - Literal::_new(imp::Literal::from_str_unchecked(repr)) + Literal::_new(unsafe { imp::Literal::from_str_unchecked(repr) }) } } @@ -1269,7 +1277,7 @@ impl FromStr for Literal { fn from_str(repr: &str) -> Result { repr.parse().map(Literal::_new).map_err(|inner| LexError { inner, - _marker: Marker, + _marker: MARKER, }) } } @@ -1288,7 +1296,7 @@ impl Display for Literal { /// Public implementation details for the `TokenStream` type, such as iterators. pub mod token_stream { - use crate::marker::Marker; + use crate::marker::{ProcMacroAutoTraits, MARKER}; use crate::{imp, TokenTree}; use core::fmt::{self, Debug}; @@ -1301,7 +1309,7 @@ pub mod token_stream { #[derive(Clone)] pub struct IntoIter { inner: imp::TokenTreeIter, - _marker: Marker, + _marker: ProcMacroAutoTraits, } impl Iterator for IntoIter { @@ -1330,7 +1338,7 @@ pub mod token_stream { fn into_iter(self) -> IntoIter { IntoIter { inner: self.inner.into_iter(), - _marker: Marker, + _marker: MARKER, } } } diff --git a/vendor/proc-macro2/src/marker.rs b/vendor/proc-macro2/src/marker.rs index 59fd096..23b94ce 100644 --- a/vendor/proc-macro2/src/marker.rs +++ b/vendor/proc-macro2/src/marker.rs @@ -1,18 +1,17 @@ +use alloc::rc::Rc; use core::marker::PhantomData; -use std::panic::{RefUnwindSafe, UnwindSafe}; -use std::rc::Rc; +use core::panic::{RefUnwindSafe, UnwindSafe}; // Zero sized marker with the correct set of autotrait impls we want all proc // macro types to have. -pub(crate) type Marker = PhantomData; +#[derive(Copy, Clone)] +#[cfg_attr( + all(procmacro2_semver_exempt, any(not(wrap_proc_macro), super_unstable)), + derive(PartialEq, Eq) +)] +pub(crate) struct ProcMacroAutoTraits(PhantomData>); -pub(crate) use self::value::*; - -mod value { - pub(crate) use core::marker::PhantomData as Marker; -} - -pub(crate) struct ProcMacroAutoTraits(Rc<()>); +pub(crate) const MARKER: ProcMacroAutoTraits = ProcMacroAutoTraits(PhantomData); impl UnwindSafe for ProcMacroAutoTraits {} impl RefUnwindSafe for ProcMacroAutoTraits {} diff --git a/vendor/proc-macro2/src/parse.rs b/vendor/proc-macro2/src/parse.rs index be2425b..07239bc 100644 --- a/vendor/proc-macro2/src/parse.rs +++ b/vendor/proc-macro2/src/parse.rs @@ -1,5 +1,5 @@ use crate::fallback::{ - is_ident_continue, is_ident_start, Group, LexError, Literal, Span, TokenStream, + self, is_ident_continue, is_ident_start, Group, LexError, Literal, Span, TokenStream, TokenStreamBuilder, }; use crate::{Delimiter, Punct, Spacing, TokenTree}; @@ -108,7 +108,7 @@ fn skip_whitespace(input: Cursor) -> Cursor { s = s.advance(1); continue; } - b if b <= 0x7f => {} + b if b.is_ascii() => {} _ => { let ch = s.chars().next().unwrap(); if is_whitespace(ch) { @@ -161,6 +161,10 @@ fn word_break(input: Cursor) -> Result { } } +// Rustc's representation of a macro expansion error in expression position or +// type position. +const ERROR: &str = "(/*ERROR*/)"; + pub(crate) fn token_stream(mut input: Cursor) -> Result { let mut trees = TokenStreamBuilder::new(); let mut stack = Vec::new(); @@ -192,7 +196,7 @@ pub(crate) fn token_stream(mut input: Cursor) -> Result { }; if let Some(open_delimiter) = match first { - b'(' => Some(Delimiter::Parenthesis), + b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis), b'[' => Some(Delimiter::Bracket), b'{' => Some(Delimiter::Brace), _ => None, @@ -267,15 +271,21 @@ fn leaf_token(input: Cursor) -> PResult { Ok((input, TokenTree::Punct(p))) } else if let Ok((input, i)) = ident(input) { Ok((input, TokenTree::Ident(i))) + } else if input.starts_with(ERROR) { + let rest = input.advance(ERROR.len()); + let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned())); + Ok((rest, TokenTree::Literal(repr))) } else { Err(Reject) } } fn ident(input: Cursor) -> PResult { - if ["r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#"] - .iter() - .any(|prefix| input.starts_with(prefix)) + if [ + "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#", + ] + .iter() + .any(|prefix| input.starts_with(prefix)) { Err(Reject) } else { @@ -290,7 +300,10 @@ fn ident_any(input: Cursor) -> PResult { let (rest, sym) = ident_not_raw(rest)?; if !raw { - let ident = crate::Ident::new(sym, crate::Span::call_site()); + let ident = crate::Ident::_new(crate::imp::Ident::new_unchecked( + sym, + fallback::Span::call_site(), + )); return Ok((rest, ident)); } @@ -299,7 +312,10 @@ fn ident_any(input: Cursor) -> PResult { _ => {} } - let ident = crate::Ident::_new_raw(sym, crate::Span::call_site()); + let ident = crate::Ident::_new(crate::imp::Ident::new_raw_unchecked( + sym, + fallback::Span::call_site(), + )); Ok((rest, ident)) } @@ -333,6 +349,8 @@ fn literal_nocapture(input: Cursor) -> Result { Ok(ok) } else if let Ok(ok) = byte_string(input) { Ok(ok) + } else if let Ok(ok) = c_string(input) { + Ok(ok) } else if let Ok(ok) = byte(input) { Ok(ok) } else if let Ok(ok) = character(input) { @@ -363,8 +381,8 @@ fn string(input: Cursor) -> Result { } } -fn cooked_string(input: Cursor) -> Result { - let mut chars = input.char_indices().peekable(); +fn cooked_string(mut input: Cursor) -> Result { + let mut chars = input.char_indices(); while let Some((i, ch)) = chars.next() { match ch { @@ -378,31 +396,16 @@ fn cooked_string(input: Cursor) -> Result { }, '\\' => match chars.next() { Some((_, 'x')) => { - if !backslash_x_char(&mut chars) { - break; - } + backslash_x_char(&mut chars)?; } - Some((_, 'n')) | Some((_, 'r')) | Some((_, 't')) | Some((_, '\\')) - | Some((_, '\'')) | Some((_, '"')) | Some((_, '0')) => {} + Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {} Some((_, 'u')) => { - if !backslash_u(&mut chars) { - break; - } + backslash_u(&mut chars)?; } - Some((_, ch @ '\n')) | Some((_, ch @ '\r')) => { - let mut last = ch; - loop { - if last == '\r' && chars.next().map_or(true, |(_, ch)| ch != '\n') { - return Err(Reject); - } - match chars.peek() { - Some((_, ch)) if ch.is_whitespace() => { - last = *ch; - chars.next(); - } - _ => break, - } - } + Some((newline, ch @ ('\n' | '\r'))) => { + input = input.advance(newline + 1); + trailing_backslash(&mut input, ch as u8)?; + chars = input.char_indices(); } _ => break, }, @@ -412,11 +415,30 @@ fn cooked_string(input: Cursor) -> Result { Err(Reject) } +fn raw_string(input: Cursor) -> Result { + let (input, delimiter) = delimiter_of_raw_string(input)?; + let mut bytes = input.bytes().enumerate(); + while let Some((i, byte)) = bytes.next() { + match byte { + b'"' if input.rest[i + 1..].starts_with(delimiter) => { + let rest = input.advance(i + 1 + delimiter.len()); + return Ok(literal_suffix(rest)); + } + b'\r' => match bytes.next() { + Some((_, b'\n')) => {} + _ => break, + }, + _ => {} + } + } + Err(Reject) +} + fn byte_string(input: Cursor) -> Result { if let Ok(input) = input.parse("b\"") { cooked_byte_string(input) } else if let Ok(input) = input.parse("br") { - raw_string(input) + raw_byte_string(input) } else { Err(Reject) } @@ -436,68 +458,125 @@ fn cooked_byte_string(mut input: Cursor) -> Result { }, b'\\' => match bytes.next() { Some((_, b'x')) => { - if !backslash_x_byte(&mut bytes) { - break; - } + backslash_x_byte(&mut bytes)?; } - Some((_, b'n')) | Some((_, b'r')) | Some((_, b't')) | Some((_, b'\\')) - | Some((_, b'0')) | Some((_, b'\'')) | Some((_, b'"')) => {} - Some((newline, b @ b'\n')) | Some((newline, b @ b'\r')) => { - let mut last = b as char; - let rest = input.advance(newline + 1); - let mut chars = rest.char_indices(); - loop { - if last == '\r' && chars.next().map_or(true, |(_, ch)| ch != '\n') { - return Err(Reject); - } - match chars.next() { - Some((_, ch)) if ch.is_whitespace() => last = ch, - Some((offset, _)) => { - input = rest.advance(offset); - bytes = input.bytes().enumerate(); - break; - } - None => return Err(Reject), - } - } + Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {} + Some((newline, b @ (b'\n' | b'\r'))) => { + input = input.advance(newline + 1); + trailing_backslash(&mut input, b)?; + bytes = input.bytes().enumerate(); } _ => break, }, - b if b < 0x80 => {} + b if b.is_ascii() => {} _ => break, } } Err(Reject) } -fn raw_string(input: Cursor) -> Result { - let mut chars = input.char_indices(); - let mut n = 0; - for (i, ch) in &mut chars { - match ch { - '"' => { - n = i; - break; +fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> { + for (i, byte) in input.bytes().enumerate() { + match byte { + b'"' => { + if i > 255 { + // https://github.com/rust-lang/rust/pull/95251 + return Err(Reject); + } + return Ok((input.advance(i + 1), &input.rest[..i])); } - '#' => {} - _ => return Err(Reject), + b'#' => {} + _ => break, } } - if n > 255 { - // https://github.com/rust-lang/rust/pull/95251 - return Err(Reject); + Err(Reject) +} + +fn raw_byte_string(input: Cursor) -> Result { + let (input, delimiter) = delimiter_of_raw_string(input)?; + let mut bytes = input.bytes().enumerate(); + while let Some((i, byte)) = bytes.next() { + match byte { + b'"' if input.rest[i + 1..].starts_with(delimiter) => { + let rest = input.advance(i + 1 + delimiter.len()); + return Ok(literal_suffix(rest)); + } + b'\r' => match bytes.next() { + Some((_, b'\n')) => {} + _ => break, + }, + other => { + if !other.is_ascii() { + break; + } + } + } } + Err(Reject) +} + +fn c_string(input: Cursor) -> Result { + if let Ok(input) = input.parse("c\"") { + cooked_c_string(input) + } else if let Ok(input) = input.parse("cr") { + raw_c_string(input) + } else { + Err(Reject) + } +} + +fn raw_c_string(input: Cursor) -> Result { + let (input, delimiter) = delimiter_of_raw_string(input)?; + let mut bytes = input.bytes().enumerate(); + while let Some((i, byte)) = bytes.next() { + match byte { + b'"' if input.rest[i + 1..].starts_with(delimiter) => { + let rest = input.advance(i + 1 + delimiter.len()); + return Ok(literal_suffix(rest)); + } + b'\r' => match bytes.next() { + Some((_, b'\n')) => {} + _ => break, + }, + b'\0' => break, + _ => {} + } + } + Err(Reject) +} + +fn cooked_c_string(mut input: Cursor) -> Result { + let mut chars = input.char_indices(); + while let Some((i, ch)) = chars.next() { match ch { - '"' if input.rest[i + 1..].starts_with(&input.rest[..n]) => { - let rest = input.advance(i + 1 + n); - return Ok(literal_suffix(rest)); + '"' => { + let input = input.advance(i + 1); + return Ok(literal_suffix(input)); } '\r' => match chars.next() { Some((_, '\n')) => {} _ => break, }, - _ => {} + '\\' => match chars.next() { + Some((_, 'x')) => { + backslash_x_nonzero(&mut chars)?; + } + Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {} + Some((_, 'u')) => { + if backslash_u(&mut chars)? == '\0' { + break; + } + } + Some((newline, ch @ ('\n' | '\r'))) => { + input = input.advance(newline + 1); + trailing_backslash(&mut input, ch as u8)?; + chars = input.char_indices(); + } + _ => break, + }, + '\0' => break, + _ch => {} } } Err(Reject) @@ -508,9 +587,8 @@ fn byte(input: Cursor) -> Result { let mut bytes = input.bytes().enumerate(); let ok = match bytes.next().map(|(_, b)| b) { Some(b'\\') => match bytes.next().map(|(_, b)| b) { - Some(b'x') => backslash_x_byte(&mut bytes), - Some(b'n') | Some(b'r') | Some(b't') | Some(b'\\') | Some(b'0') | Some(b'\'') - | Some(b'"') => true, + Some(b'x') => backslash_x_byte(&mut bytes).is_ok(), + Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true, _ => false, }, b => b.is_some(), @@ -531,11 +609,9 @@ fn character(input: Cursor) -> Result { let mut chars = input.char_indices(); let ok = match chars.next().map(|(_, ch)| ch) { Some('\\') => match chars.next().map(|(_, ch)| ch) { - Some('x') => backslash_x_char(&mut chars), - Some('u') => backslash_u(&mut chars), - Some('n') | Some('r') | Some('t') | Some('\\') | Some('0') | Some('\'') | Some('"') => { - true - } + Some('x') => backslash_x_char(&mut chars).is_ok(), + Some('u') => backslash_u(&mut chars).is_ok(), + Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true, _ => false, }, ch => ch.is_some(), @@ -549,36 +625,49 @@ fn character(input: Cursor) -> Result { } macro_rules! next_ch { - ($chars:ident @ $pat:pat $(| $rest:pat)*) => { + ($chars:ident @ $pat:pat) => { match $chars.next() { Some((_, ch)) => match ch { - $pat $(| $rest)* => ch, - _ => return false, + $pat => ch, + _ => return Err(Reject), }, - None => return false, + None => return Err(Reject), } }; } -fn backslash_x_char(chars: &mut I) -> bool +fn backslash_x_char(chars: &mut I) -> Result<(), Reject> where I: Iterator, { next_ch!(chars @ '0'..='7'); next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); - true + Ok(()) } -fn backslash_x_byte(chars: &mut I) -> bool +fn backslash_x_byte(chars: &mut I) -> Result<(), Reject> where I: Iterator, { next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); - true + Ok(()) +} + +fn backslash_x_nonzero(chars: &mut I) -> Result<(), Reject> +where + I: Iterator, +{ + let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); + let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); + if first == '0' && second == '0' { + Err(Reject) + } else { + Ok(()) + } } -fn backslash_u(chars: &mut I) -> bool +fn backslash_u(chars: &mut I) -> Result where I: Iterator, { @@ -591,17 +680,36 @@ where 'a'..='f' => 10 + ch as u8 - b'a', 'A'..='F' => 10 + ch as u8 - b'A', '_' if len > 0 => continue, - '}' if len > 0 => return char::from_u32(value).is_some(), - _ => return false, + '}' if len > 0 => return char::from_u32(value).ok_or(Reject), + _ => break, }; if len == 6 { - return false; + break; } value *= 0x10; value += u32::from(digit); len += 1; } - false + Err(Reject) +} + +fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> { + let mut whitespace = input.bytes().enumerate(); + loop { + if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') { + return Err(Reject); + } + match whitespace.next() { + Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => { + last = b; + } + Some((offset, _)) => { + *input = input.advance(offset); + return Ok(()); + } + None => return Err(Reject), + } + } } fn float(input: Cursor) -> Result { @@ -617,7 +725,7 @@ fn float(input: Cursor) -> Result { fn float_digits(input: Cursor) -> Result { let mut chars = input.chars().peekable(); match chars.next() { - Some(ch) if ch >= '0' && ch <= '9' => {} + Some(ch) if '0' <= ch && ch <= '9' => {} _ => return Err(Reject), } @@ -806,12 +914,13 @@ fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult #[cfg(span_locations)] let lo = input.off; let (rest, (comment, inner)) = doc_comment_contents(input)?; - let span = crate::Span::_new_fallback(Span { + let fallback_span = Span { #[cfg(span_locations)] lo, #[cfg(span_locations)] hi: rest.off, - }); + }; + let span = crate::Span::_new_fallback(fallback_span); let mut scan_for_bare_cr = comment; while let Some(cr) = scan_for_bare_cr.find('\r') { @@ -832,7 +941,7 @@ fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult trees.push_token_from_parser(TokenTree::Punct(bang)); } - let doc_ident = crate::Ident::new("doc", span); + let doc_ident = crate::Ident::_new(crate::imp::Ident::new_unchecked("doc", fallback_span)); let mut equal = Punct::new('=', Spacing::Alone); equal.set_span(span); let mut literal = crate::Literal::string(comment); diff --git a/vendor/proc-macro2/src/rcvec.rs b/vendor/proc-macro2/src/rcvec.rs index 62298b4..37955af 100644 --- a/vendor/proc-macro2/src/rcvec.rs +++ b/vendor/proc-macro2/src/rcvec.rs @@ -1,8 +1,8 @@ +use alloc::rc::Rc; +use alloc::vec; use core::mem; +use core::panic::RefUnwindSafe; use core::slice; -use std::panic::RefUnwindSafe; -use std::rc::Rc; -use std::vec; pub(crate) struct RcVec { inner: Rc>, @@ -53,7 +53,7 @@ impl RcVec { T: Clone, { let vec = if let Some(owned) = Rc::get_mut(&mut self.inner) { - mem::replace(owned, Vec::new()) + mem::take(owned) } else { Vec::clone(&self.inner) }; diff --git a/vendor/proc-macro2/src/wrapper.rs b/vendor/proc-macro2/src/wrapper.rs index 00f67cd..a71043a 100644 --- a/vendor/proc-macro2/src/wrapper.rs +++ b/vendor/proc-macro2/src/wrapper.rs @@ -3,7 +3,8 @@ use crate::detection::inside_proc_macro; use crate::location::LineColumn; use crate::{fallback, Delimiter, Punct, Spacing, TokenTree}; use core::fmt::{self, Debug, Display}; -use core::iter::FromIterator; +#[cfg(span_locations)] +use core::ops::Range; use core::ops::RangeBounds; use core::str::FromStr; use std::panic; @@ -29,18 +30,23 @@ pub(crate) struct DeferredTokenStream { pub(crate) enum LexError { Compiler(proc_macro::LexError), Fallback(fallback::LexError), -} -impl LexError { - fn call_site() -> Self { - LexError::Fallback(fallback::LexError { - span: fallback::Span::call_site(), - }) - } + // Rustc was supposed to return a LexError, but it panicked instead. + // https://github.com/rust-lang/rust/issues/58736 + CompilerPanic, } -fn mismatch() -> ! { - panic!("compiler/fallback mismatch") +#[cold] +fn mismatch(line: u32) -> ! { + #[cfg(procmacro2_backtrace)] + { + let backtrace = std::backtrace::Backtrace::force_capture(); + panic!("compiler/fallback mismatch #{}\n\n{}", line, backtrace) + } + #[cfg(not(procmacro2_backtrace))] + { + panic!("compiler/fallback mismatch #{}", line) + } } impl DeferredTokenStream { @@ -89,13 +95,13 @@ impl TokenStream { fn unwrap_nightly(self) -> proc_macro::TokenStream { match self { TokenStream::Compiler(s) => s.into_token_stream(), - TokenStream::Fallback(_) => mismatch(), + TokenStream::Fallback(_) => mismatch(line!()), } } fn unwrap_stable(self) -> fallback::TokenStream { match self { - TokenStream::Compiler(_) => mismatch(), + TokenStream::Compiler(_) => mismatch(line!()), TokenStream::Fallback(s) => s, } } @@ -118,7 +124,7 @@ impl FromStr for TokenStream { // Work around https://github.com/rust-lang/rust/issues/58736. fn proc_macro_parse(src: &str) -> Result { let result = panic::catch_unwind(|| src.parse().map_err(LexError::Compiler)); - result.unwrap_or_else(|_| Err(LexError::call_site())) + result.unwrap_or_else(|_| Err(LexError::CompilerPanic)) } impl Display for TokenStream { @@ -199,14 +205,14 @@ impl FromIterator for TokenStream { first.evaluate_now(); first.stream.extend(streams.map(|s| match s { TokenStream::Compiler(s) => s.into_token_stream(), - TokenStream::Fallback(_) => mismatch(), + TokenStream::Fallback(_) => mismatch(line!()), })); TokenStream::Compiler(first) } Some(TokenStream::Fallback(mut first)) => { first.extend(streams.map(|s| match s { TokenStream::Fallback(s) => s, - TokenStream::Compiler(_) => mismatch(), + TokenStream::Compiler(_) => mismatch(line!()), })); TokenStream::Fallback(first) } @@ -256,7 +262,7 @@ impl Debug for TokenStream { impl LexError { pub(crate) fn span(&self) -> Span { match self { - LexError::Compiler(_) => Span::call_site(), + LexError::Compiler(_) | LexError::CompilerPanic => Span::call_site(), LexError::Fallback(e) => Span::Fallback(e.span()), } } @@ -279,6 +285,10 @@ impl Debug for LexError { match self { LexError::Compiler(e) => Debug::fmt(e, f), LexError::Fallback(e) => Debug::fmt(e, f), + LexError::CompilerPanic => { + let fallback = fallback::LexError::call_site(); + Debug::fmt(&fallback, f) + } } } } @@ -286,16 +296,12 @@ impl Debug for LexError { impl Display for LexError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - #[cfg(not(no_lexerror_display))] LexError::Compiler(e) => Display::fmt(e, f), - #[cfg(no_lexerror_display)] - LexError::Compiler(_e) => Display::fmt( - &fallback::LexError { - span: fallback::Span::call_site(), - }, - f, - ), LexError::Fallback(e) => Display::fmt(e, f), + LexError::CompilerPanic => { + let fallback = fallback::LexError::call_site(); + Display::fmt(&fallback, f) + } } } } @@ -406,7 +412,6 @@ impl Span { } } - #[cfg(not(no_hygiene))] pub fn mixed_site() -> Self { if inside_proc_macro() { Span::Compiler(proc_macro::Span::mixed_site()) @@ -426,29 +431,19 @@ impl Span { pub fn resolved_at(&self, other: Span) -> Span { match (self, other) { - #[cfg(not(no_hygiene))] (Span::Compiler(a), Span::Compiler(b)) => Span::Compiler(a.resolved_at(b)), - - // Name resolution affects semantics, but location is only cosmetic - #[cfg(no_hygiene)] - (Span::Compiler(_), Span::Compiler(_)) => other, - (Span::Fallback(a), Span::Fallback(b)) => Span::Fallback(a.resolved_at(b)), - _ => mismatch(), + (Span::Compiler(_), Span::Fallback(_)) => mismatch(line!()), + (Span::Fallback(_), Span::Compiler(_)) => mismatch(line!()), } } pub fn located_at(&self, other: Span) -> Span { match (self, other) { - #[cfg(not(no_hygiene))] (Span::Compiler(a), Span::Compiler(b)) => Span::Compiler(a.located_at(b)), - - // Name resolution affects semantics, but location is only cosmetic - #[cfg(no_hygiene)] - (Span::Compiler(_), Span::Compiler(_)) => *self, - (Span::Fallback(a), Span::Fallback(b)) => Span::Fallback(a.located_at(b)), - _ => mismatch(), + (Span::Compiler(_), Span::Fallback(_)) => mismatch(line!()), + (Span::Fallback(_), Span::Compiler(_)) => mismatch(line!()), } } @@ -468,46 +463,29 @@ impl Span { } #[cfg(span_locations)] - pub fn start(&self) -> LineColumn { + pub fn byte_range(&self) -> Range { match self { #[cfg(proc_macro_span)] - Span::Compiler(s) => { - let proc_macro::LineColumn { line, column } = s.start(); - LineColumn { line, column } - } + Span::Compiler(s) => s.byte_range(), #[cfg(not(proc_macro_span))] - Span::Compiler(_) => LineColumn { line: 0, column: 0 }, - Span::Fallback(s) => s.start(), + Span::Compiler(_) => 0..0, + Span::Fallback(s) => s.byte_range(), } } #[cfg(span_locations)] - pub fn end(&self) -> LineColumn { + pub fn start(&self) -> LineColumn { match self { - #[cfg(proc_macro_span)] - Span::Compiler(s) => { - let proc_macro::LineColumn { line, column } = s.end(); - LineColumn { line, column } - } - #[cfg(not(proc_macro_span))] Span::Compiler(_) => LineColumn { line: 0, column: 0 }, - Span::Fallback(s) => s.end(), - } - } - - #[cfg(super_unstable)] - pub fn before(&self) -> Span { - match self { - Span::Compiler(s) => Span::Compiler(s.before()), - Span::Fallback(s) => Span::Fallback(s.before()), + Span::Fallback(s) => s.start(), } } - #[cfg(super_unstable)] - pub fn after(&self) -> Span { + #[cfg(span_locations)] + pub fn end(&self) -> LineColumn { match self { - Span::Compiler(s) => Span::Compiler(s.after()), - Span::Fallback(s) => Span::Fallback(s.after()), + Span::Compiler(_) => LineColumn { line: 0, column: 0 }, + Span::Fallback(s) => s.end(), } } @@ -543,7 +521,7 @@ impl Span { fn unwrap_nightly(self) -> proc_macro::Span { match self { Span::Compiler(s) => s, - Span::Fallback(_) => mismatch(), + Span::Fallback(_) => mismatch(line!()), } } } @@ -630,20 +608,14 @@ impl Group { pub fn span_open(&self) -> Span { match self { - #[cfg(not(no_group_open_close))] Group::Compiler(g) => Span::Compiler(g.span_open()), - #[cfg(no_group_open_close)] - Group::Compiler(g) => Span::Compiler(g.span()), Group::Fallback(g) => Span::Fallback(g.span_open()), } } pub fn span_close(&self) -> Span { match self { - #[cfg(not(no_group_open_close))] Group::Compiler(g) => Span::Compiler(g.span_close()), - #[cfg(no_group_open_close)] - Group::Compiler(g) => Span::Compiler(g.span()), Group::Fallback(g) => Span::Fallback(g.span_close()), } } @@ -652,14 +624,15 @@ impl Group { match (self, span) { (Group::Compiler(g), Span::Compiler(s)) => g.set_span(s), (Group::Fallback(g), Span::Fallback(s)) => g.set_span(s), - _ => mismatch(), + (Group::Compiler(_), Span::Fallback(_)) => mismatch(line!()), + (Group::Fallback(_), Span::Compiler(_)) => mismatch(line!()), } } fn unwrap_nightly(self) -> proc_macro::Group { match self { Group::Compiler(g) => g, - Group::Fallback(_) => mismatch(), + Group::Fallback(_) => mismatch(line!()), } } } @@ -695,40 +668,30 @@ pub(crate) enum Ident { } impl Ident { - pub fn new(string: &str, span: Span) -> Self { + #[track_caller] + pub fn new_checked(string: &str, span: Span) -> Self { match span { Span::Compiler(s) => Ident::Compiler(proc_macro::Ident::new(string, s)), - Span::Fallback(s) => Ident::Fallback(fallback::Ident::new(string, s)), + Span::Fallback(s) => Ident::Fallback(fallback::Ident::new_checked(string, s)), } } - pub fn new_raw(string: &str, span: Span) -> Self { + pub fn new_unchecked(string: &str, span: fallback::Span) -> Self { + Ident::Fallback(fallback::Ident::new_unchecked(string, span)) + } + + #[track_caller] + pub fn new_raw_checked(string: &str, span: Span) -> Self { match span { - #[cfg(not(no_ident_new_raw))] Span::Compiler(s) => Ident::Compiler(proc_macro::Ident::new_raw(string, s)), - #[cfg(no_ident_new_raw)] - Span::Compiler(s) => { - let _ = proc_macro::Ident::new(string, s); - // At this point the un-r#-prefixed string is known to be a - // valid identifier. Try to produce a valid raw identifier by - // running the `TokenStream` parser, and unwrapping the first - // token as an `Ident`. - let raw_prefixed = format!("r#{}", string); - if let Ok(ts) = raw_prefixed.parse::() { - let mut iter = ts.into_iter(); - if let (Some(proc_macro::TokenTree::Ident(mut id)), None) = - (iter.next(), iter.next()) - { - id.set_span(s); - return Ident::Compiler(id); - } - } - panic!("not allowed as a raw identifier: `{}`", raw_prefixed) - } - Span::Fallback(s) => Ident::Fallback(fallback::Ident::new_raw(string, s)), + Span::Fallback(s) => Ident::Fallback(fallback::Ident::new_raw_checked(string, s)), } } + pub fn new_raw_unchecked(string: &str, span: fallback::Span) -> Self { + Ident::Fallback(fallback::Ident::new_raw_unchecked(string, span)) + } + pub fn span(&self) -> Span { match self { Ident::Compiler(t) => Span::Compiler(t.span()), @@ -740,14 +703,15 @@ impl Ident { match (self, span) { (Ident::Compiler(t), Span::Compiler(s)) => t.set_span(s), (Ident::Fallback(t), Span::Fallback(s)) => t.set_span(s), - _ => mismatch(), + (Ident::Compiler(_), Span::Fallback(_)) => mismatch(line!()), + (Ident::Fallback(_), Span::Compiler(_)) => mismatch(line!()), } } fn unwrap_nightly(self) -> proc_macro::Ident { match self { Ident::Compiler(s) => s, - Ident::Fallback(_) => mismatch(), + Ident::Fallback(_) => mismatch(line!()), } } } @@ -757,7 +721,8 @@ impl PartialEq for Ident { match (self, other) { (Ident::Compiler(t), Ident::Compiler(o)) => t.to_string() == o.to_string(), (Ident::Fallback(t), Ident::Fallback(o)) => t == o, - _ => mismatch(), + (Ident::Compiler(_), Ident::Fallback(_)) => mismatch(line!()), + (Ident::Fallback(_), Ident::Compiler(_)) => mismatch(line!()), } } } @@ -826,9 +791,9 @@ macro_rules! unsuffixed_integers { impl Literal { pub unsafe fn from_str_unchecked(repr: &str) -> Self { if inside_proc_macro() { - Literal::Compiler(compiler_literal_from_str(repr).expect("invalid literal")) + Literal::Compiler(proc_macro::Literal::from_str(repr).expect("invalid literal")) } else { - Literal::Fallback(fallback::Literal::from_str_unchecked(repr)) + Literal::Fallback(unsafe { fallback::Literal::from_str_unchecked(repr) }) } } @@ -916,7 +881,8 @@ impl Literal { match (self, span) { (Literal::Compiler(lit), Span::Compiler(s)) => lit.set_span(s), (Literal::Fallback(lit), Span::Fallback(s)) => lit.set_span(s), - _ => mismatch(), + (Literal::Compiler(_), Span::Fallback(_)) => mismatch(line!()), + (Literal::Fallback(_), Span::Compiler(_)) => mismatch(line!()), } } @@ -933,7 +899,7 @@ impl Literal { fn unwrap_nightly(self) -> proc_macro::Literal { match self { Literal::Compiler(s) => s, - Literal::Fallback(_) => mismatch(), + Literal::Fallback(_) => mismatch(line!()), } } } @@ -949,7 +915,8 @@ impl FromStr for Literal { fn from_str(repr: &str) -> Result { if inside_proc_macro() { - compiler_literal_from_str(repr).map(Literal::Compiler) + let literal = proc_macro::Literal::from_str(repr)?; + Ok(Literal::Compiler(literal)) } else { let literal = fallback::Literal::from_str(repr)?; Ok(Literal::Fallback(literal)) @@ -957,24 +924,6 @@ impl FromStr for Literal { } } -fn compiler_literal_from_str(repr: &str) -> Result { - #[cfg(not(no_literal_from_str))] - { - proc_macro::Literal::from_str(repr).map_err(LexError::Compiler) - } - #[cfg(no_literal_from_str)] - { - let tokens = proc_macro_parse(repr)?; - let mut iter = tokens.into_iter(); - if let (Some(proc_macro::TokenTree::Literal(literal)), None) = (iter.next(), iter.next()) { - if literal.to_string().len() == repr.len() { - return Ok(literal); - } - } - Err(LexError::call_site()) - } -} - impl Display for Literal { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -992,3 +941,14 @@ impl Debug for Literal { } } } + +#[cfg(span_locations)] +pub(crate) fn invalidate_current_thread_spans() { + if inside_proc_macro() { + panic!( + "proc_macro2::extra::invalidate_current_thread_spans is not available in procedural macros" + ); + } else { + crate::fallback::invalidate_current_thread_spans(); + } +} diff --git a/vendor/proc-macro2/tests/marker.rs b/vendor/proc-macro2/tests/marker.rs index 5b45733..d08fbfc 100644 --- a/vendor/proc-macro2/tests/marker.rs +++ b/vendor/proc-macro2/tests/marker.rs @@ -62,7 +62,6 @@ mod semver_exempt { assert_impl!(SourceFile is not Send or Sync); } -#[cfg(not(no_libprocmacro_unwind_safe))] mod unwind_safe { use proc_macro2::{ Delimiter, Group, Ident, LexError, Literal, Punct, Spacing, Span, TokenStream, TokenTree, diff --git a/vendor/proc-macro2/tests/test.rs b/vendor/proc-macro2/tests/test.rs index 75f69e2..486955c 100644 --- a/vendor/proc-macro2/tests/test.rs +++ b/vendor/proc-macro2/tests/test.rs @@ -7,7 +7,6 @@ use proc_macro2::{Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree}; use std::iter; -use std::panic; use std::str::{self, FromStr}; #[test] @@ -90,24 +89,9 @@ fn lifetime_number() { } #[test] +#[should_panic(expected = r#""'a#" is not a valid Ident"#)] fn lifetime_invalid() { - let result = panic::catch_unwind(|| Ident::new("'a#", Span::call_site())); - match result { - Err(box_any) => { - let message = box_any.downcast_ref::().unwrap(); - let expected1 = r#""\'a#" is not a valid Ident"#; // 1.31.0 .. 1.53.0 - let expected2 = r#""'a#" is not a valid Ident"#; // 1.53.0 .. - assert!( - message == expected1 || message == expected2, - "panic message does not match expected string\n\ - \x20 panic message: `{:?}`\n\ - \x20expected message: `{:?}`", - message, - expected2, - ); - } - Ok(_) => panic!("test did not panic as expected"), - } + Ident::new("'a#", Span::call_site()); } #[test] @@ -119,6 +103,9 @@ fn literal_string() { Literal::string("a\00b\07c\08d\0e\0").to_string(), "\"a\\x000b\\x007c\\08d\\0e\\0\"", ); + + "\"\\\r\n x\"".parse::().unwrap(); + "\"\\\r\n \rx\"".parse::().unwrap_err(); } #[test] @@ -156,6 +143,47 @@ fn literal_byte_string() { Literal::byte_string(b"a\00b\07c\08d\0e\0").to_string(), "b\"a\\x000b\\x007c\\08d\\0e\\0\"", ); + + "b\"\\\r\n x\"".parse::().unwrap(); + "b\"\\\r\n \rx\"".parse::().unwrap_err(); + "b\"\\\r\n \u{a0}x\"".parse::().unwrap_err(); + "br\"\u{a0}\"".parse::().unwrap_err(); +} + +#[test] +fn literal_c_string() { + let strings = r###" + c"hello\x80我叫\u{1F980}" // from the RFC + cr"\" + cr##"Hello "world"!"## + c"\t\n\r\"\\" + "###; + + let mut tokens = strings.parse::().unwrap().into_iter(); + + for expected in &[ + r#"c"hello\x80我叫\u{1F980}""#, + r#"cr"\""#, + r###"cr##"Hello "world"!"##"###, + r#"c"\t\n\r\"\\""#, + ] { + match tokens.next().unwrap() { + TokenTree::Literal(literal) => { + assert_eq!(literal.to_string(), *expected); + } + unexpected => panic!("unexpected token: {:?}", unexpected), + } + } + + if let Some(unexpected) = tokens.next() { + panic!("unexpected token: {:?}", unexpected); + } + + for invalid in &[r#"c"\0""#, r#"c"\x00""#, r#"c"\u{0}""#, "c\"\0\""] { + if let Ok(unexpected) = invalid.parse::() { + panic!("unexpected token: {:?}", unexpected); + } + } } #[test] @@ -297,6 +325,24 @@ fn literal_span() { assert!(positive.subspan(1..4).is_none()); } +#[cfg(span_locations)] +#[test] +fn source_text() { + let input = " 𓀕 a z "; + let mut tokens = input + .parse::() + .unwrap() + .into_iter(); + + let first = tokens.next().unwrap(); + assert_eq!("𓀕", first.span().source_text().unwrap()); + + let second = tokens.next().unwrap(); + let third = tokens.next().unwrap(); + assert_eq!("z", third.span().source_text().unwrap()); + assert_eq!("a", second.span().source_text().unwrap()); +} + #[test] fn roundtrip() { fn roundtrip(p: &str) { @@ -636,8 +682,8 @@ fn non_ascii_tokens() { check_spans("/*** ábc */ x", &[(1, 12, 1, 13)]); check_spans(r#""abc""#, &[(1, 0, 1, 5)]); check_spans(r#""ábc""#, &[(1, 0, 1, 5)]); - check_spans(r###"r#"abc"#"###, &[(1, 0, 1, 8)]); - check_spans(r###"r#"ábc"#"###, &[(1, 0, 1, 8)]); + check_spans(r##"r#"abc"#"##, &[(1, 0, 1, 8)]); + check_spans(r##"r#"ábc"#"##, &[(1, 0, 1, 8)]); check_spans("r#\"a\nc\"#", &[(1, 0, 2, 3)]); check_spans("r#\"á\nc\"#", &[(1, 0, 2, 3)]); check_spans("'a'", &[(1, 0, 1, 3)]); @@ -657,7 +703,6 @@ fn non_ascii_tokens() { check_spans("ábc// foo", &[(1, 0, 1, 3)]); check_spans("ábć// foo", &[(1, 0, 1, 3)]); check_spans("b\"a\\\n c\"", &[(1, 0, 2, 3)]); - check_spans("b\"a\\\n\u{00a0}c\"", &[(1, 0, 2, 3)]); } #[cfg(span_locations)] @@ -688,6 +733,18 @@ fn check_spans_internal(ts: TokenStream, lines: &mut &[(usize, usize, usize, usi } } +#[test] +fn whitespace() { + // space, horizontal tab, vertical tab, form feed, carriage return, line + // feed, non-breaking space, left-to-right mark, right-to-left mark + let various_spaces = " \t\u{b}\u{c}\r\n\u{a0}\u{200e}\u{200f}"; + let tokens = various_spaces.parse::().unwrap(); + assert_eq!(tokens.into_iter().count(), 0); + + let lone_carriage_returns = " \r \r\r\n "; + lone_carriage_returns.parse::().unwrap(); +} + #[test] fn byte_order_mark() { let string = "\u{feff}foo"; @@ -700,3 +757,39 @@ fn byte_order_mark() { let string = "foo\u{feff}"; string.parse::().unwrap_err(); } + +#[cfg(span_locations)] +fn create_span() -> proc_macro2::Span { + let tts: TokenStream = "1".parse().unwrap(); + match tts.into_iter().next().unwrap() { + TokenTree::Literal(literal) => literal.span(), + _ => unreachable!(), + } +} + +#[cfg(span_locations)] +#[test] +fn test_invalidate_current_thread_spans() { + let actual = format!("{:#?}", create_span()); + assert_eq!(actual, "bytes(1..2)"); + let actual = format!("{:#?}", create_span()); + assert_eq!(actual, "bytes(3..4)"); + + proc_macro2::extra::invalidate_current_thread_spans(); + + let actual = format!("{:#?}", create_span()); + // Test that span offsets have been reset after the call + // to invalidate_current_thread_spans() + assert_eq!(actual, "bytes(1..2)"); +} + +#[cfg(span_locations)] +#[test] +#[should_panic(expected = "Invalid span with no related FileInfo!")] +fn test_use_span_after_invalidation() { + let span = create_span(); + + proc_macro2::extra::invalidate_current_thread_spans(); + + span.source_text(); +} diff --git a/vendor/proc-macro2/tests/test_fmt.rs b/vendor/proc-macro2/tests/test_fmt.rs index 93dd19e..86a4c38 100644 --- a/vendor/proc-macro2/tests/test_fmt.rs +++ b/vendor/proc-macro2/tests/test_fmt.rs @@ -1,7 +1,7 @@ #![allow(clippy::from_iter_instead_of_collect)] use proc_macro2::{Delimiter, Group, Ident, Span, TokenStream, TokenTree}; -use std::iter::{self, FromIterator}; +use std::iter; #[test] fn test_fmt_group() { diff --git a/vendor/quote/.cargo-checksum.json b/vendor/quote/.cargo-checksum.json index d14e0d1..0d900a2 100644 --- a/vendor/quote/.cargo-checksum.json +++ b/vendor/quote/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"5969ab348602ad0e3909923146807be5bd7d650692c09e8028fcdfa0886d4a71","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"66f3cf08338e47618fd23d810355b075da573815d9c1e158a7f7ab140decc16d","build.rs":"3733c86ae2733629f873f93c2f45da30164beee8de9ee0833099fac6a05a3e6b","rust-toolchain.toml":"6bbb61302978c736b2da03e4fb40e3beab908f85d533ab46fd541e637b5f3e0f","src/ext.rs":"9881576cac3e476a4bf04f9b601cf9a53b79399fb0ca9634e8b861ac91709843","src/format.rs":"c595015418f35e6992e710441b9999f09b2afe4678b138039d670d100c0bdd86","src/ident_fragment.rs":"66788c5f57681547d936a9bcf51873b658630c76b2e690df4b3158edf573384a","src/lib.rs":"7938cba6edf3be1f8c2c4432d3c6c171104375aec4b9c213437c89656fd8d3f2","src/runtime.rs":"31b2159986c68dc1c78801a92f795435dbc0bcea859ca342df280889e82c6c4d","src/spanned.rs":"0ccaae1137af5f3e54eae75c3bdc637be74cfa56a857f2c0f85a041c9ba26838","src/to_tokens.rs":"99bb6f467289c32af6c1f7af0d45cc6ac7b31e2436774e616770152a49e6ac0f","tests/compiletest.rs":"022a8e400ef813d7ea1875b944549cee5125f6a995dc33e93b48cba3e1b57bd1","tests/test.rs":"3be80741f84a707376c230d9cf70ce9537caa359691d8d4c34968e28175e4ad7","tests/ui/does-not-have-iter-interpolated-dup.rs":"ad13eea21d4cdd2ab6c082f633392e1ff20fb0d1af5f2177041e0bf7f30da695","tests/ui/does-not-have-iter-interpolated-dup.stderr":"09406a4bcf96587a739df7053251c8e07ea520f8d20b13f8fbea33f9c29e019b","tests/ui/does-not-have-iter-interpolated.rs":"83a5b3f240651adcbe4b6e51076d76d653ad439b37442cf4054f1fd3c073f3b7","tests/ui/does-not-have-iter-interpolated.stderr":"626170deaca60092f1992262afe7598e03ef8e3821ebe91d0a643edd8346e9ac","tests/ui/does-not-have-iter-separated.rs":"fe413c48331d5e3a7ae5fef6a5892a90c72f610d54595879eb49d0a94154ba3f","tests/ui/does-not-have-iter-separated.stderr":"03fd560979ebcd5aa6f83858bc2c3c01ba6546c16335101275505304895c1ae9","tests/ui/does-not-have-iter.rs":"09dc9499d861b63cebb0848b855b78e2dc9497bfde37ba6339f3625ae009a62f","tests/ui/does-not-have-iter.stderr":"d6da483c29e232ced72059bbdf05d31afb1df9e02954edaa9cfaea1ec6df72dc","tests/ui/not-quotable.rs":"5759d0884943417609f28faadc70254a3e2fd3d9bd6ff7297a3fb70a77fafd8a","tests/ui/not-quotable.stderr":"efcace9419fdf64d6beca7e135c3b7daff74038d4449475896cbe8cbf2566ade","tests/ui/not-repeatable.rs":"a4b115c04e4e41049a05f5b69450503fbffeba031218b4189cb931839f7f9a9c","tests/ui/not-repeatable.stderr":"594249d59d16f039c16816f1aaf9933176994e296fcf81d1b8b24d5b66ae0d0a","tests/ui/wrong-type-span.rs":"6195e35ea844c0c52ba1cff5d790c3a371af6915d137d377834ad984229ef9ea","tests/ui/wrong-type-span.stderr":"cad072e40e0ecc04f375122ae41aede2f0da2a9244492b3fcf70249e59d1b128"},"package":"4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc"} \ No newline at end of file +{"files":{"Cargo.toml":"0ec1e0fd36354750321a12d04a5e4d9a8d5dc6a8af753183de50da55fc10391b","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"626e7079eab0baacf0fcaf3e244f407b2014ebaeca45905d72e8fb8bed18aaea","rust-toolchain.toml":"6bbb61302978c736b2da03e4fb40e3beab908f85d533ab46fd541e637b5f3e0f","src/ext.rs":"9881576cac3e476a4bf04f9b601cf9a53b79399fb0ca9634e8b861ac91709843","src/format.rs":"c595015418f35e6992e710441b9999f09b2afe4678b138039d670d100c0bdd86","src/ident_fragment.rs":"0b3e6c2129e55910fd2d240e1e7efba6f1796801d24352d1c0bfbceb0e8b678f","src/lib.rs":"cef1b4c031d401fb87e88a2ed51858c5f8f471e62a6261c1ef0f55ef9e1906a1","src/runtime.rs":"7f37326edaeac2c42ed806b447eeba12e36dd4b1bc25fbf52f8eb23140f3be7a","src/spanned.rs":"3ccf5120593f35787442c0a37d243e802c5262e7f8b35aed503873008ec035c5","src/to_tokens.rs":"1c76311fcc82098e630056d71fd6f3929194ee31b0840e2aa643ed7e78026e3e","tests/compiletest.rs":"022a8e400ef813d7ea1875b944549cee5125f6a995dc33e93b48cba3e1b57bd1","tests/test.rs":"3be80741f84a707376c230d9cf70ce9537caa359691d8d4c34968e28175e4ad7","tests/ui/does-not-have-iter-interpolated-dup.rs":"ad13eea21d4cdd2ab6c082f633392e1ff20fb0d1af5f2177041e0bf7f30da695","tests/ui/does-not-have-iter-interpolated-dup.stderr":"90a4bdb9267535f5d2785940148338d6b7d905548051d2c9c5dcbd58f2c11d8e","tests/ui/does-not-have-iter-interpolated.rs":"83a5b3f240651adcbe4b6e51076d76d653ad439b37442cf4054f1fd3c073f3b7","tests/ui/does-not-have-iter-interpolated.stderr":"ae7c2739554c862b331705e82781aa4687a4375210cef6ae899a4be4a4ec2d97","tests/ui/does-not-have-iter-separated.rs":"fe413c48331d5e3a7ae5fef6a5892a90c72f610d54595879eb49d0a94154ba3f","tests/ui/does-not-have-iter-separated.stderr":"03fd560979ebcd5aa6f83858bc2c3c01ba6546c16335101275505304895c1ae9","tests/ui/does-not-have-iter.rs":"09dc9499d861b63cebb0848b855b78e2dc9497bfde37ba6339f3625ae009a62f","tests/ui/does-not-have-iter.stderr":"d6da483c29e232ced72059bbdf05d31afb1df9e02954edaa9cfaea1ec6df72dc","tests/ui/not-quotable.rs":"5759d0884943417609f28faadc70254a3e2fd3d9bd6ff7297a3fb70a77fafd8a","tests/ui/not-quotable.stderr":"459bdadbf1e73b9401cf7d5d578dc053774bb4e5aa25ad2abf25d6b0f61aa306","tests/ui/not-repeatable.rs":"a4b115c04e4e41049a05f5b69450503fbffeba031218b4189cb931839f7f9a9c","tests/ui/not-repeatable.stderr":"594249d59d16f039c16816f1aaf9933176994e296fcf81d1b8b24d5b66ae0d0a","tests/ui/wrong-type-span.rs":"6195e35ea844c0c52ba1cff5d790c3a371af6915d137d377834ad984229ef9ea","tests/ui/wrong-type-span.stderr":"cad072e40e0ecc04f375122ae41aede2f0da2a9244492b3fcf70249e59d1b128"},"package":"291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"} \ No newline at end of file diff --git a/vendor/quote/Cargo.toml b/vendor/quote/Cargo.toml index 734042f..f3222c2 100644 --- a/vendor/quote/Cargo.toml +++ b/vendor/quote/Cargo.toml @@ -11,9 +11,9 @@ [package] edition = "2018" -rust-version = "1.31" +rust-version = "1.56" name = "quote" -version = "1.0.26" +version = "1.0.35" authors = ["David Tolnay "] autobenches = false description = "Quasi-quoting macro quote!(...)" @@ -28,13 +28,14 @@ license = "MIT OR Apache-2.0" repository = "https://github.com/dtolnay/quote" [package.metadata.docs.rs] +rustdoc-args = ["--generate-link-to-definition"] targets = ["x86_64-unknown-linux-gnu"] [lib] doc-scrape-examples = false [dependencies.proc-macro2] -version = "1.0.52" +version = "1.0.74" default-features = false [dev-dependencies.rustversion] diff --git a/vendor/quote/README.md b/vendor/quote/README.md index 74e99ce..bfc91a9 100644 --- a/vendor/quote/README.md +++ b/vendor/quote/README.md @@ -34,7 +34,7 @@ macros. quote = "1.0" ``` -*Version requirement: Quote supports rustc 1.31 and up.*
+*Version requirement: Quote supports rustc 1.56 and up.*
[*Release notes*](https://github.com/dtolnay/quote/releases)
@@ -233,15 +233,26 @@ macro. ## Non-macro code generators When using `quote` in a build.rs or main.rs and writing the output out to a -file, consider having the code generator pass the tokens through [rustfmt] -before writing (either by shelling out to the `rustfmt` binary or by pulling in -the `rustfmt` library as a dependency). This way if an error occurs in the -generated code it is convenient for a human to read and debug. +file, consider having the code generator pass the tokens through [prettyplease] +before writing. This way if an error occurs in the generated code it is +convenient for a human to read and debug. Be aware that no kind of hygiene or span information is retained when tokens are written to a file; the conversion from tokens to source code is lossy. -[rustfmt]: https://github.com/rust-lang/rustfmt +Example usage in build.rs: + +```rust +let output = quote! { ... }; +let syntax_tree = syn::parse2(output).unwrap(); +let formatted = prettyplease::unparse(&syntax_tree); + +let out_dir = env::var_os("OUT_DIR").unwrap(); +let dest_path = Path::new(&out_dir).join("out.rs"); +fs::write(dest_path, formatted).unwrap(); +``` + +[prettyplease]: https://github.com/dtolnay/prettyplease
diff --git a/vendor/quote/build.rs b/vendor/quote/build.rs deleted file mode 100644 index a7e6b2c..0000000 --- a/vendor/quote/build.rs +++ /dev/null @@ -1,38 +0,0 @@ -use std::env; -use std::process::{self, Command}; -use std::str; - -fn main() { - println!("cargo:rerun-if-changed=build.rs"); - - let version = match rustc_version() { - Some(version) => version, - None => return, - }; - - if version.minor < 31 { - eprintln!("Minimum supported rustc version is 1.31"); - process::exit(1); - } - - if version.minor < 53 { - // https://github.com/rust-lang/rust/issues/43081 - println!("cargo:rustc-cfg=needs_invalid_span_workaround"); - } -} - -struct RustcVersion { - minor: u32, -} - -fn rustc_version() -> Option { - let rustc = env::var_os("RUSTC")?; - let output = Command::new(rustc).arg("--version").output().ok()?; - let version = str::from_utf8(&output.stdout).ok()?; - let mut pieces = version.split('.'); - if pieces.next() != Some("rustc 1") { - return None; - } - let minor = pieces.next()?.parse().ok()?; - Some(RustcVersion { minor }) -} diff --git a/vendor/quote/src/ident_fragment.rs b/vendor/quote/src/ident_fragment.rs index cf74024..6c2a9a8 100644 --- a/vendor/quote/src/ident_fragment.rs +++ b/vendor/quote/src/ident_fragment.rs @@ -1,6 +1,6 @@ +use alloc::borrow::Cow; use core::fmt; use proc_macro2::{Ident, Span}; -use std::borrow::Cow; /// Specialized formatting trait used by `format_ident!`. /// @@ -8,6 +8,8 @@ use std::borrow::Cow; /// stripped, if present. /// /// See [`format_ident!`] for more information. +/// +/// [`format_ident!`]: crate::format_ident pub trait IdentFragment { /// Format this value as an identifier fragment. fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result; @@ -47,8 +49,8 @@ impl IdentFragment for Ident { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let id = self.to_string(); - if id.starts_with("r#") { - fmt::Display::fmt(&id[2..], f) + if let Some(id) = id.strip_prefix("r#") { + fmt::Display::fmt(id, f) } else { fmt::Display::fmt(&id[..], f) } diff --git a/vendor/quote/src/lib.rs b/vendor/quote/src/lib.rs index ab8c1b1..8b97abd 100644 --- a/vendor/quote/src/lib.rs +++ b/vendor/quote/src/lib.rs @@ -79,9 +79,20 @@ //! } //! }; //! ``` +//! +//!
+//! +//! # Non-macro code generators +//! +//! When using `quote` in a build.rs or main.rs and writing the output out to a +//! file, consider having the code generator pass the tokens through +//! [prettyplease] before writing. This way if an error occurs in the generated +//! code it is convenient for a human to read and debug. +//! +//! [prettyplease]: https://github.com/dtolnay/prettyplease // Quote types in rustdoc of other crates get linked to here. -#![doc(html_root_url = "https://docs.rs/quote/1.0.26")] +#![doc(html_root_url = "https://docs.rs/quote/1.0.35")] #![allow( clippy::doc_markdown, clippy::missing_errors_doc, @@ -91,10 +102,9 @@ clippy::wrong_self_convention, )] -#[cfg(all( - not(all(target_arch = "wasm32", target_os = "unknown")), - feature = "proc-macro" -))] +extern crate alloc; + +#[cfg(feature = "proc-macro")] extern crate proc_macro; mod ext; @@ -418,7 +428,7 @@ pub mod spanned; /// appears suffixed as integer literals by interpolating them as [`syn::Index`] /// instead. /// -/// [`syn::Index`]: https://docs.rs/syn/1.0/syn/struct.Index.html +/// [`syn::Index`]: https://docs.rs/syn/2.0/syn/struct.Index.html /// /// ```compile_fail /// let i = 0usize..self.fields.len(); diff --git a/vendor/quote/src/runtime.rs b/vendor/quote/src/runtime.rs index 4e3d4fd..eff044a 100644 --- a/vendor/quote/src/runtime.rs +++ b/vendor/quote/src/runtime.rs @@ -5,11 +5,21 @@ use core::iter; use core::ops::BitOr; use proc_macro2::{Group, Ident, Punct, Spacing, TokenTree}; +#[doc(hidden)] +pub use alloc::format; +#[doc(hidden)] pub use core::option::Option; -pub use proc_macro2::{Delimiter, Span, TokenStream}; -pub use std::format; +#[doc(hidden)] +pub type Delimiter = proc_macro2::Delimiter; +#[doc(hidden)] +pub type Span = proc_macro2::Span; +#[doc(hidden)] +pub type TokenStream = proc_macro2::TokenStream; + +#[doc(hidden)] pub struct HasIterator; // True +#[doc(hidden)] pub struct ThereIsNoIteratorInRepetition; // False impl BitOr for ThereIsNoIteratorInRepetition { @@ -46,14 +56,16 @@ impl BitOr for HasIterator { /// These traits expose a `quote_into_iter` method which should allow calling /// whichever impl happens to be applicable. Calling that method repeatedly on /// the returned value should be idempotent. +#[doc(hidden)] pub mod ext { use super::RepInterp; use super::{HasIterator as HasIter, ThereIsNoIteratorInRepetition as DoesNotHaveIter}; use crate::ToTokens; + use alloc::collections::btree_set::{self, BTreeSet}; use core::slice; - use std::collections::btree_set::{self, BTreeSet}; /// Extension trait providing the `quote_into_iter` method on iterators. + #[doc(hidden)] pub trait RepIteratorExt: Iterator + Sized { fn quote_into_iter(self) -> (Self, HasIter) { (self, HasIter) @@ -65,6 +77,7 @@ pub mod ext { /// Extension trait providing the `quote_into_iter` method for /// non-iterable types. These types interpolate the same value in each /// iteration of the repetition. + #[doc(hidden)] pub trait RepToTokensExt { /// Pretend to be an iterator for the purposes of `quote_into_iter`. /// This allows repeated calls to `quote_into_iter` to continue @@ -82,6 +95,7 @@ pub mod ext { /// Extension trait providing the `quote_into_iter` method for types that /// can be referenced as an iterator. + #[doc(hidden)] pub trait RepAsIteratorExt<'q> { type Iter: Iterator; @@ -140,6 +154,7 @@ pub mod ext { // Helper type used within interpolations to allow for repeated binding names. // Implements the relevant traits, and exports a dummy `next()` method. #[derive(Copy, Clone)] +#[doc(hidden)] pub struct RepInterp(pub T); impl RepInterp { @@ -166,6 +181,7 @@ impl ToTokens for RepInterp { } } +#[doc(hidden)] #[inline] pub fn get_span(span: T) -> GetSpan { GetSpan(GetSpanInner(GetSpanBase(span))) @@ -222,10 +238,12 @@ mod get_span { } } +#[doc(hidden)] pub fn push_group(tokens: &mut TokenStream, delimiter: Delimiter, inner: TokenStream) { tokens.append(Group::new(delimiter, inner)); } +#[doc(hidden)] pub fn push_group_spanned( tokens: &mut TokenStream, span: Span, @@ -237,11 +255,13 @@ pub fn push_group_spanned( tokens.append(g); } +#[doc(hidden)] pub fn parse(tokens: &mut TokenStream, s: &str) { let s: TokenStream = s.parse().expect("invalid token stream"); tokens.extend(iter::once(s)); } +#[doc(hidden)] pub fn parse_spanned(tokens: &mut TokenStream, span: Span, s: &str) { let s: TokenStream = s.parse().expect("invalid token stream"); tokens.extend(s.into_iter().map(|t| respan_token_tree(t, span))); @@ -264,15 +284,18 @@ fn respan_token_tree(mut token: TokenTree, span: Span) -> TokenTree { token } +#[doc(hidden)] pub fn push_ident(tokens: &mut TokenStream, s: &str) { let span = Span::call_site(); push_ident_spanned(tokens, span, s); } +#[doc(hidden)] pub fn push_ident_spanned(tokens: &mut TokenStream, span: Span, s: &str) { tokens.append(ident_maybe_raw(s, span)); } +#[doc(hidden)] pub fn push_lifetime(tokens: &mut TokenStream, lifetime: &str) { struct Lifetime<'a> { name: &'a str, @@ -303,6 +326,7 @@ pub fn push_lifetime(tokens: &mut TokenStream, lifetime: &str) { }); } +#[doc(hidden)] pub fn push_lifetime_spanned(tokens: &mut TokenStream, span: Span, lifetime: &str) { struct Lifetime<'a> { name: &'a str, @@ -339,9 +363,11 @@ pub fn push_lifetime_spanned(tokens: &mut TokenStream, span: Span, lifetime: &st macro_rules! push_punct { ($name:ident $spanned:ident $char1:tt) => { + #[doc(hidden)] pub fn $name(tokens: &mut TokenStream) { tokens.append(Punct::new($char1, Spacing::Alone)); } + #[doc(hidden)] pub fn $spanned(tokens: &mut TokenStream, span: Span) { let mut punct = Punct::new($char1, Spacing::Alone); punct.set_span(span); @@ -349,10 +375,12 @@ macro_rules! push_punct { } }; ($name:ident $spanned:ident $char1:tt $char2:tt) => { + #[doc(hidden)] pub fn $name(tokens: &mut TokenStream) { tokens.append(Punct::new($char1, Spacing::Joint)); tokens.append(Punct::new($char2, Spacing::Alone)); } + #[doc(hidden)] pub fn $spanned(tokens: &mut TokenStream, span: Span) { let mut punct = Punct::new($char1, Spacing::Joint); punct.set_span(span); @@ -363,11 +391,13 @@ macro_rules! push_punct { } }; ($name:ident $spanned:ident $char1:tt $char2:tt $char3:tt) => { + #[doc(hidden)] pub fn $name(tokens: &mut TokenStream) { tokens.append(Punct::new($char1, Spacing::Joint)); tokens.append(Punct::new($char2, Spacing::Joint)); tokens.append(Punct::new($char3, Spacing::Alone)); } + #[doc(hidden)] pub fn $spanned(tokens: &mut TokenStream, span: Span) { let mut punct = Punct::new($char1, Spacing::Joint); punct.set_span(span); @@ -427,24 +457,27 @@ push_punct!(push_star push_star_spanned '*'); push_punct!(push_sub push_sub_spanned '-'); push_punct!(push_sub_eq push_sub_eq_spanned '-' '='); +#[doc(hidden)] pub fn push_underscore(tokens: &mut TokenStream) { push_underscore_spanned(tokens, Span::call_site()); } +#[doc(hidden)] pub fn push_underscore_spanned(tokens: &mut TokenStream, span: Span) { tokens.append(Ident::new("_", span)); } // Helper method for constructing identifiers from the `format_ident!` macro, // handling `r#` prefixes. +#[doc(hidden)] pub fn mk_ident(id: &str, span: Option) -> Ident { let span = span.unwrap_or_else(Span::call_site); ident_maybe_raw(id, span) } fn ident_maybe_raw(id: &str, span: Span) -> Ident { - if id.starts_with("r#") { - Ident::new_raw(&id[2..], span) + if let Some(id) = id.strip_prefix("r#") { + Ident::new_raw(id, span) } else { Ident::new(id, span) } @@ -457,6 +490,7 @@ fn ident_maybe_raw(id: &str, span: Span) -> Ident { // `Octal`, `LowerHex`, `UpperHex`, and `Binary` to allow for their use within // `format_ident!`. #[derive(Copy, Clone)] +#[doc(hidden)] pub struct IdentFragmentAdapter(pub T); impl IdentFragmentAdapter { diff --git a/vendor/quote/src/spanned.rs b/vendor/quote/src/spanned.rs index efc2e8b..6eba644 100644 --- a/vendor/quote/src/spanned.rs +++ b/vendor/quote/src/spanned.rs @@ -26,20 +26,8 @@ impl Spanned for T { } fn join_spans(tokens: TokenStream) -> Span { - #[cfg(not(needs_invalid_span_workaround))] let mut iter = tokens.into_iter().map(|tt| tt.span()); - #[cfg(needs_invalid_span_workaround)] - let mut iter = tokens.into_iter().filter_map(|tt| { - let span = tt.span(); - let debug = format!("{:?}", span); - if debug.ends_with("bytes(0..0)") { - None - } else { - Some(span) - } - }); - let first = match iter.next() { Some(span) => span, None => return Span::call_site(), diff --git a/vendor/quote/src/to_tokens.rs b/vendor/quote/src/to_tokens.rs index 5748721..23b6ec2 100644 --- a/vendor/quote/src/to_tokens.rs +++ b/vendor/quote/src/to_tokens.rs @@ -1,8 +1,8 @@ use super::TokenStreamExt; +use alloc::borrow::Cow; +use alloc::rc::Rc; use core::iter; use proc_macro2::{Group, Ident, Literal, Punct, Span, TokenStream, TokenTree}; -use std::borrow::Cow; -use std::rc::Rc; /// Types that can be interpolated inside a `quote!` invocation. /// diff --git a/vendor/quote/tests/ui/does-not-have-iter-interpolated-dup.stderr b/vendor/quote/tests/ui/does-not-have-iter-interpolated-dup.stderr index 57d8bf0..99c20a5 100644 --- a/vendor/quote/tests/ui/does-not-have-iter-interpolated-dup.stderr +++ b/vendor/quote/tests/ui/does-not-have-iter-interpolated-dup.stderr @@ -6,5 +6,6 @@ error[E0308]: mismatched types | | | expected `HasIterator`, found `ThereIsNoIteratorInRepetition` | expected due to this + | here the type of `has_iter` is inferred to be `ThereIsNoIteratorInRepetition` | = note: this error originates in the macro `$crate::quote_token_with_context` which comes from the expansion of the macro `quote` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/vendor/quote/tests/ui/does-not-have-iter-interpolated.stderr b/vendor/quote/tests/ui/does-not-have-iter-interpolated.stderr index 8ec2d40..ef90813 100644 --- a/vendor/quote/tests/ui/does-not-have-iter-interpolated.stderr +++ b/vendor/quote/tests/ui/does-not-have-iter-interpolated.stderr @@ -6,5 +6,6 @@ error[E0308]: mismatched types | | | expected `HasIterator`, found `ThereIsNoIteratorInRepetition` | expected due to this + | here the type of `has_iter` is inferred to be `ThereIsNoIteratorInRepetition` | = note: this error originates in the macro `$crate::quote_token_with_context` which comes from the expansion of the macro `quote` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/vendor/quote/tests/ui/not-quotable.stderr b/vendor/quote/tests/ui/not-quotable.stderr index 79d4654..35cb6f2 100644 --- a/vendor/quote/tests/ui/not-quotable.stderr +++ b/vendor/quote/tests/ui/not-quotable.stderr @@ -8,13 +8,13 @@ error[E0277]: the trait bound `Ipv4Addr: ToTokens` is not satisfied | required by a bound introduced by this call | = help: the following other types implement trait `ToTokens`: - &'a T - &'a mut T - Box - Cow<'a, T> - Option - Rc - RepInterp - String - and 23 others + bool + char + isize + i8 + i16 + i32 + i64 + i128 + and $N others = note: this error originates in the macro `quote` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/vendor/regex-automata/.cargo-checksum.json b/vendor/regex-automata/.cargo-checksum.json new file mode 100644 index 0000000..af40ed8 --- /dev/null +++ b/vendor/regex-automata/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"f60dfe6afcafe69301493380fcf7bf66b83d0edec0c2817d20da43ded0554c69","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"61db25dbf26092fc80e8db89165692e55f9fb86b14e8451ebb28303f45932254","src/dfa/accel.rs":"2a045b0f6715e913d18d2212a7804fabaadfc3bcffad9382e35574d32eb0c492","src/dfa/automaton.rs":"c14707007bbb915fd5607424b0a4c8e53fa7daf6c7c1f4e3045d51ef15f9b202","src/dfa/dense.rs":"eda0e5ca69166aaeb7f8f3cdf9135ef535a97ae39aad6f3bcf9090c0a1d7d960","src/dfa/determinize.rs":"91b9f69d28bdd064aa86716fe0772e4145050fd458bb7869a28660b4f7b64872","src/dfa/minimize.rs":"b5cadb462b9f24cd4aa7a665e75fb813cd06858a92b8986c9c5ae7fd9a60dfab","src/dfa/mod.rs":"ab1ac378d81bb5ea40a23cf903928adae4758e30f54646afde71869234965723","src/dfa/onepass.rs":"013f09b795955aefd07936994f08df4bc5b39698797f586b85171f778162aeab","src/dfa/regex.rs":"d16f0434a0b0f1341d6d5e0a162e6afa29411a786fb37b0e98bbcc0c6ba3cfec","src/dfa/remapper.rs":"ca096abc0f8e45c43a2adf3a7743b8857714ae7411a623edea41cc3ce906a169","src/dfa/search.rs":"79b9ab2b0636177bc26d1ad6f0059ca033decf74824cb5a36f1ac19f020d2713","src/dfa/sparse.rs":"c863d92a4d919fa880dfca3d59a8b5b672c6ffa8423578b34fc0af2ae62e1d7a","src/dfa/special.rs":"c2e60de5b98e68c9c45aaffbc67a08f049831a764a1ed29d1d1db0fb68efdce5","src/dfa/start.rs":"46b1dbaf8e4518ddddda6bbe596621aae36f8ba694390483a22355d9d799be8e","src/hybrid/dfa.rs":"a6ed6d3268e4008f88c1469029a84391edfee7851df2912640763e4ba2188635","src/hybrid/error.rs":"37db2a9759721de4ca2c49e21ab74dd3d998b67c5ab0e65a62085b57ec1d7ba3","src/hybrid/id.rs":"6168aad5c81c627494ba0575a24d61fd0ae7efabaaceeadb8ff28472275e2813","src/hybrid/mod.rs":"ca21e89062bdb5a0998d5cd1bc78609af1f6b795533e5982be969c383ac0463a","src/hybrid/regex.rs":"47815d025526330291f4cd749b4dd79b1122ef208fe6f0a49715c70fc1ea47c8","src/hybrid/search.rs":"76067f3f8675013dcdf7e9c9cc4d9d33d1107fb2cbcd7adcc05cfd42177d90cc","src/lib.rs":"4e831d41057760c5f2f1274a206fa5a42f59dbca8f98ad3e782fe0fba0d6c37f","src/macros.rs":"3e4b39252bfa471fad384160a43f113ebfec7bec46a85d16f006622881dd2081","src/meta/error.rs":"710a6813314b1b11ace1b016a827067fff8b2624d47e15c7f52043bff5ab57da","src/meta/limited.rs":"98b6b2d19f67d4ce3ddb110e06045f22a040590262fde33614ab900bdd06b25b","src/meta/literal.rs":"52da98bb30995dedd22786e4728cb84e84c6093a284168bd91196b999dd0f6ec","src/meta/mod.rs":"f3b10b96fa08efaba3e4c9b81883cf40aac6e4c1f6ae55a497a534cf5805b46d","src/meta/regex.rs":"b0fab107d3f972db89568e14fec0199ba4cd8076cc5fd61c2582db42885f196e","src/meta/reverse_inner.rs":"945d6c2d4c7538e1609dbd430a096784d22abd33db58b1ba65c9c9af45a7d3c0","src/meta/stopat.rs":"acb6122e17d10a9b1b5e72d6030e6d95748227975bad0ff5cbbcc2587edfa6df","src/meta/strategy.rs":"c882c5c261de5fe58bc65251d2d407e4cb483b9b80c2bec5eba958ef90e0072d","src/meta/wrappers.rs":"3cb0717f87b7082cc75cb02148b8cde30cffbee689bdb6275abcf1416747ceb4","src/nfa/mod.rs":"1a731e217ed4053714500e84e58cc127f402e4e075f7d0e5b9aea715cd52405a","src/nfa/thompson/backtrack.rs":"041015ea153c1e485e9cf39ec60d1e51c7ab9e400ecd77cad2078af45775339b","src/nfa/thompson/builder.rs":"7adf6aba69171f6acd47fea0fec85ba589154fead83f2042a1c6fe9486aa4dbd","src/nfa/thompson/compiler.rs":"a8bb24f7f125a294cb75af9d8332821142738278d8eff354647ae08f66a597af","src/nfa/thompson/error.rs":"78488c2fdb85f819f53cc30bb11c7f96169112da5dd14c351e5cc3bcccf0e10e","src/nfa/thompson/literal_trie.rs":"c2d1d09b44da4648db797386c2410cbf63337afef8cb62e6e78cf34786892a11","src/nfa/thompson/map.rs":"fcd17ce7359b5179ef2e809fc9152dfa0b6c61d3d849d8c502497e1d0d8b0fa9","src/nfa/thompson/mod.rs":"0651520debd6f023ae1a2c422806aab37f8491e5bb092e20dfdc4fe4179d695c","src/nfa/thompson/nfa.rs":"410c3745c159eb17bea18256ec03ee92e1fccca630f01a24618a75fffcf86866","src/nfa/thompson/pikevm.rs":"aaf792832d1bf15fad8a8f0b2e6597170361eb3cbcb9343eb5bd242ff346d750","src/nfa/thompson/range_trie.rs":"d0ea4fc2a7085355a8bdb82a8ba4d5ce478596021a808390aaf0a4c1e4235970","src/util/alphabet.rs":"94cd73ce2f4e34e0ae0a146d3efdc85478263afdfefd6dc105e0abf0ec79d82b","src/util/captures.rs":"d2a118ba509b70e9922a10ea9f78771b14a521abb0ed4029be3ef6aeea44d032","src/util/determinize/mod.rs":"5e9e1f7dd060d69521b743afc2b900b21ad7942e17397084ac6563ea5dcf2fd9","src/util/determinize/state.rs":"c30eac89137df0f0128143eeb2e0c8d7ea4bd659825fa6721b5315141a326e3a","src/util/empty.rs":"13ec7d6cbd1520db5b4c1dae294f4419fa88d39d2bfc16f4ef258473d609f91c","src/util/escape.rs":"5b2731b41a55cb50ab688132bb5640dbd51f14f141adaa864b9db7f0aa092c74","src/util/int.rs":"b7eec0a6cab0798ba66707988fce3ecfc841b93418028a7b1408c5d0f6271351","src/util/interpolate.rs":"5e4e6b6fb6e5a7603e393bf05c609735d86a7d1f54c2436e42111b4e1409b6dd","src/util/iter.rs":"58ae97b4156d7160a46b909f4635d88d10354d9d892d2fcb4c5e18e24cf38f14","src/util/lazy.rs":"e16b3ed139210ca546fc302c463ce52a5dcfa77382f07c9097400ed8cddf78c8","src/util/look.rs":"fbfcaace79d0c6ad3698c9d6c025cb952f2e00cf88a48cf690d087fa73466689","src/util/memchr.rs":"573109ce4983907083ae0b29a084a324b9b53da369b4d96f7f3a21fd5c8eb5c9","src/util/mod.rs":"6c828a493f0f88c8b515aee4f8faf91ba653eb07e8fc3c23c0524553410803f9","src/util/pool.rs":"da1fad31f2fdf15cf3a6a605ece8d6162d8f6c42770c160af4c0fbf4ef148aa5","src/util/prefilter/aho_corasick.rs":"c54fa95f4d9e7ab53e2c6463a43f8953df6a440997fc9cd528f225db0dd32582","src/util/prefilter/byteset.rs":"1c80fa432acc23223a75a5181e37c40034764dffe42410e4b77af6f24f48bd5c","src/util/prefilter/memchr.rs":"36c6fe6354b2e729db6830166dd4862e439bc48c9e59258d88e4b6c5654e20ef","src/util/prefilter/memmem.rs":"6f6ed9450b14abf3e4a33d395337e51fbaa9743a0a16aac0009f7680aa60c500","src/util/prefilter/mod.rs":"b171a46c74678c77d659174284819939c656ccb3cc7661246fdeb47a042048f7","src/util/prefilter/teddy.rs":"ed54d26858b56e1c8c87e44afae5f63d81ab930787d79e671f3a3513f576e9cd","src/util/primitives.rs":"8a9cc19ef2e1ab183943cdc2d2f095b02252476e32b7e9fff4a06a251749b068","src/util/search.rs":"f37ff5193c7ae49356cc8f3167eeeab090a4754dfb9e5832ceb0b150f8f182fd","src/util/sparse_set.rs":"3d4aa30b6aa9fc875d36506487a5095dbe8ed528b89e4146a65c7e7497520a4d","src/util/start.rs":"73ebcf2550cea56f67b9048fa3dc91f3a8db9897fbd2400dd9941efb0cb4827e","src/util/syntax.rs":"720ac0d6600fad33f5967b5afe4e3de2096b857e4cda6fa16ba93b10a8230cab","src/util/unicode_data/mod.rs":"54c3e10bbc393e9881bfac3295815b160f59e69e2056bc29ee7cf0addd8e3cf7","src/util/unicode_data/perl_word.rs":"2e1a5d889598bd4e73af17d3a9f7d6b4cf2f6ab24920a5336e496bb255281e56","src/util/utf8.rs":"7a068009fdf07e693e521b1f0264725c0e6118dbe1eab55da9d0eab21785fcc1","src/util/wire.rs":"bfdf52615c516b6c07db3ce9c333ea61fdc535bd0b79560bbd7f6864ab83946e","test":"39d79ce3532c31a51c0be89a2939816fad0e4868d2b03992c202cbe64dce9f6c","tests/dfa/api.rs":"cc28e366b6bcbfcf379265acd492a92c62743c3f20e7a2b273019679aa9e1291","tests/dfa/mod.rs":"924d8fff500b9b7b140082623023e78007058a87323151cd8e361462945e4f16","tests/dfa/onepass/mod.rs":"d08f4ecb8ec243be584944c9602af1ed3a48a8732dd11cd573b0d1d182171303","tests/dfa/onepass/suite.rs":"6d63ec5469e6876656ae607cdbe07e6a4e17ace7836b67435763c9b1d233438a","tests/dfa/regression.rs":"ebcf2645290286aa7531eb2b7951385e5ed8167532437aeca2ad2049768fd796","tests/dfa/suite.rs":"2812aa0167ee5e93eff3f7d45096a78c5f3a2440197a513b3cf0310286640f51","tests/fuzz/dense.rs":"3e1099a0cce61e85abc0ad81bc592e85f497f159ef0e5d1d32bac1936aa6f20c","tests/fuzz/mod.rs":"043773510e02f51def43ee0c2b8b867c53ecc8638c8a9233b2ac098de9c3ac1e","tests/fuzz/sparse.rs":"ba61db4927ab28953037a4b20317399c86d01b4d774e46c020ade19029215e25","tests/fuzz/testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9":"8961279a8237c3e318452024dd971b1d5a26b058260c297382a74daca1b7f0d1","tests/fuzz/testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9":"c2d52e3dea78d3f159b5b521d433358a7fee45ce20ed1545067d461f45ef66b8","tests/fuzz/testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000":"5b2d273023de3fb04037eaf2e6b4f51cced4c5a08d2e6b44e4be540774f939b9","tests/fuzz/testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9":"e2e22e2f46a9a75b5c876476442276cf675fe244c5cf918789e4f6b14078fbd9","tests/fuzz/testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98":"24a12712e1f2ba0a40b5782707908a74dd19941dc372ef525d65a7134f91988c","tests/fuzz/testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838":"a97f39b2febf9c73535681f7a86201e4b06d5a1ffcf135299c96c1cabfa9f6c4","tests/fuzz/testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570":"44fe3ef878d35e2d51c2c17ff89bbbe3a4650e09d0cbbd48625c0f5e4dd0848b","tests/fuzz/testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b":"d5534be36653b4af6cb94a7c63be58869bb8c204c5c63d67a4d6c986b44bb2e1","tests/fuzz/testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9":"77b844898610560afa09f2b8de73a85a0ba9a3b8cee4ff1bbf26b8c97ad4e8a2","tests/gen/README.md":"c3bfdf2f9ced501dd5bd75d01509a34e503efb2dff2f5f7b260580dde5519ed4","tests/gen/dense/mod.rs":"5ae1cfb46212a674118ada2f66f37b25188e84643d406b95eb4665d722344262","tests/gen/dense/multi_pattern_v2.rs":"29b1e9a799adecbdbe7cd05e9748f664c2b915b10b1d2f5d36cfb6453826d1d2","tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa":"8421d5a1bfc0b6c3bdc8fc90dff591a046b0aaf8e06ef7de7cc293004a35d061","tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa":"dcf2fd5fd49f5f53cf1ec66f61623402f39401cb3aea30d6677b98bb1e9541bf","tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa":"73c4f20d984e544dfa4cf05f3009d0a9b52fa84bc97b501ea0ccd179e2def4bc","tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa":"74471209f05754e8e20c8a0222a5877b1b15b8b8f33cd8cac89ea65f708b4aff","tests/gen/mod.rs":"043773510e02f51def43ee0c2b8b867c53ecc8638c8a9233b2ac098de9c3ac1e","tests/gen/sparse/mod.rs":"5ae1cfb46212a674118ada2f66f37b25188e84643d406b95eb4665d722344262","tests/gen/sparse/multi_pattern_v2.rs":"e00fb2a510a215460aab84573196b1f51bb65884ff494c2382534c04f6fdbfe9","tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa":"3287956bd2003cd69653b125f82aade95d99adbb20229bfdbb4958b8877c0a0b","tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa":"bdf285901eaaac4596380115c5bbb20ab2f42f593d8d9e9238a00ed69863f9c9","tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa":"e466dc085dd68b2d2220932a0e4d28759edd161c1fdad652240aa3825fd85268","tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa":"80358d0c26c1cc7284065b0075f5b8804d83e673a8a8c8327f93a1c1ff455399","tests/hybrid/api.rs":"bd4862275c52f94c6f6737bf174c97e3de30f8075ca23f43c129c72a0d0afed7","tests/hybrid/mod.rs":"4856a49a4d9b5e9e079c2719a5e75c32408b37e9b76cbdea057b388a3537af6d","tests/hybrid/suite.rs":"688972275c5ef38cdc5112a1e6e54ccd2bf8290008ae2b17344c6c81e17e3a5a","tests/lib.rs":"9775b3c62fb338ea5c1bd3513a6589eff4b5c8d35c599439d9363dbf98c6f8d4","tests/meta/mod.rs":"d08f4ecb8ec243be584944c9602af1ed3a48a8732dd11cd573b0d1d182171303","tests/meta/suite.rs":"4c441f9df82508a5e60dd08f266183f772fc9b2b236fbf69cab87650ecf3b424","tests/nfa/mod.rs":"49055c358e38d97e42acb1602c671f97dddf24cafe089490f0e79ed208d74d9b","tests/nfa/thompson/backtrack/mod.rs":"d08f4ecb8ec243be584944c9602af1ed3a48a8732dd11cd573b0d1d182171303","tests/nfa/thompson/backtrack/suite.rs":"4e7baff70fc98b98b8297c6fd6d5818beb20343379e16cdb95bee46207ac4bd6","tests/nfa/thompson/mod.rs":"de9f5bcea1a8d1f03c85c55ad8c0747877d69e344fcd6c6886b0a402f0661291","tests/nfa/thompson/pikevm/mod.rs":"d08f4ecb8ec243be584944c9602af1ed3a48a8732dd11cd573b0d1d182171303","tests/nfa/thompson/pikevm/suite.rs":"263837ebf5b2e1906a06237982ea875386d83567e399b4ec1f669f10b1422599"},"package":"5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd"} \ No newline at end of file diff --git a/vendor/regex-automata/Cargo.toml b/vendor/regex-automata/Cargo.toml new file mode 100644 index 0000000..89ce214 --- /dev/null +++ b/vendor/regex-automata/Cargo.toml @@ -0,0 +1,183 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +rust-version = "1.65" +name = "regex-automata" +version = "0.4.5" +authors = [ + "The Rust Project Developers", + "Andrew Gallant ", +] +autoexamples = false +description = "Automata construction and matching using regular expressions." +documentation = "https://docs.rs/regex-automata" +readme = "README.md" +keywords = [ + "regex", + "dfa", + "automata", + "automaton", + "nfa", +] +categories = ["text-processing"] +license = "MIT OR Apache-2.0" +repository = "https://github.com/rust-lang/regex/tree/master/regex-automata" + +[lib] +bench = false + +[[test]] +name = "integration" +path = "tests/lib.rs" + +[dependencies.aho-corasick] +version = "1.0.0" +optional = true +default-features = false + +[dependencies.log] +version = "0.4.14" +optional = true + +[dependencies.memchr] +version = "2.6.0" +optional = true +default-features = false + +[dependencies.regex-syntax] +version = "0.8.2" +optional = true +default-features = false + +[dev-dependencies.anyhow] +version = "1.0.69" + +[dev-dependencies.bstr] +version = "1.3.0" +features = ["std"] +default-features = false + +[dev-dependencies.doc-comment] +version = "0.3.3" + +[dev-dependencies.env_logger] +version = "0.9.3" +features = [ + "atty", + "humantime", + "termcolor", +] +default-features = false + +[dev-dependencies.quickcheck] +version = "1.0.3" +default-features = false + +[dev-dependencies.regex-test] +version = "0.1.0" + +[features] +alloc = [] +default = [ + "std", + "syntax", + "perf", + "unicode", + "meta", + "nfa", + "dfa", + "hybrid", +] +dfa = [ + "dfa-build", + "dfa-search", + "dfa-onepass", +] +dfa-build = [ + "nfa-thompson", + "dfa-search", +] +dfa-onepass = ["nfa-thompson"] +dfa-search = [] +hybrid = [ + "alloc", + "nfa-thompson", +] +internal-instrument = ["internal-instrument-pikevm"] +internal-instrument-pikevm = [ + "logging", + "std", +] +logging = [ + "dep:log", + "aho-corasick?/logging", + "memchr?/logging", +] +meta = [ + "syntax", + "nfa-pikevm", +] +nfa = [ + "nfa-thompson", + "nfa-pikevm", + "nfa-backtrack", +] +nfa-backtrack = ["nfa-thompson"] +nfa-pikevm = ["nfa-thompson"] +nfa-thompson = ["alloc"] +perf = [ + "perf-inline", + "perf-literal", +] +perf-inline = [] +perf-literal = [ + "perf-literal-substring", + "perf-literal-multisubstring", +] +perf-literal-multisubstring = [ + "std", + "dep:aho-corasick", +] +perf-literal-substring = [ + "aho-corasick?/perf-literal", + "dep:memchr", +] +std = [ + "regex-syntax?/std", + "memchr?/std", + "aho-corasick?/std", + "alloc", +] +syntax = [ + "dep:regex-syntax", + "alloc", +] +unicode = [ + "unicode-age", + "unicode-bool", + "unicode-case", + "unicode-gencat", + "unicode-perl", + "unicode-script", + "unicode-segment", + "unicode-word-boundary", + "regex-syntax?/unicode", +] +unicode-age = ["regex-syntax?/unicode-age"] +unicode-bool = ["regex-syntax?/unicode-bool"] +unicode-case = ["regex-syntax?/unicode-case"] +unicode-gencat = ["regex-syntax?/unicode-gencat"] +unicode-perl = ["regex-syntax?/unicode-perl"] +unicode-script = ["regex-syntax?/unicode-script"] +unicode-segment = ["regex-syntax?/unicode-segment"] +unicode-word-boundary = [] diff --git a/vendor/regex-automata/LICENSE-APACHE b/vendor/regex-automata/LICENSE-APACHE new file mode 100644 index 0000000..16fe87b --- /dev/null +++ b/vendor/regex-automata/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/vendor/regex-automata/LICENSE-MIT b/vendor/regex-automata/LICENSE-MIT new file mode 100644 index 0000000..39d4bdb --- /dev/null +++ b/vendor/regex-automata/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2014 The Rust Project Developers + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/vendor/regex-automata/README.md b/vendor/regex-automata/README.md new file mode 100644 index 0000000..c12b070 --- /dev/null +++ b/vendor/regex-automata/README.md @@ -0,0 +1,117 @@ +regex-automata +============== +This crate exposes a variety of regex engines used by the `regex` crate. +It provides a vast, sprawling and "expert" level API to each regex engine. +The regex engines provided by this crate focus heavily on finite automata +implementations and specifically guarantee worst case `O(m * n)` time +complexity for all searches. (Where `m ~ len(regex)` and `n ~ len(haystack)`.) + +[![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions) +[![Crates.io](https://img.shields.io/crates/v/regex-automata.svg)](https://crates.io/crates/regex-automata) + + +### Documentation + +https://docs.rs/regex-automata + + +### Example + +This example shows how to search for matches of multiple regexes, where each +regex uses the same capture group names to parse different key-value formats. + +```rust +use regex_automata::{meta::Regex, PatternID}; + +let re = Regex::new_many(&[ + r#"(?m)^(?[[:word:]]+)=(?[[:word:]]+)$"#, + r#"(?m)^(?[[:word:]]+)="(?[^"]+)"$"#, + r#"(?m)^(?[[:word:]]+)='(?[^']+)'$"#, + r#"(?m)^(?[[:word:]]+):\s*(?[[:word:]]+)$"#, +]).unwrap(); +let hay = r#" +best_album="Blow Your Face Out" +best_quote='"then as it was, then again it will be"' +best_year=1973 +best_simpsons_episode: HOMR +"#; +let mut kvs = vec![]; +for caps in re.captures_iter(hay) { + // N.B. One could use capture indices '1' and '2' here + // as well. Capture indices are local to each pattern. + // (Just like names are.) + let key = &hay[caps.get_group_by_name("key").unwrap()]; + let val = &hay[caps.get_group_by_name("val").unwrap()]; + kvs.push((key, val)); +} +assert_eq!(kvs, vec![ + ("best_album", "Blow Your Face Out"), + ("best_quote", "\"then as it was, then again it will be\""), + ("best_year", "1973"), + ("best_simpsons_episode", "HOMR"), +]); +``` + + +### Safety + +**I welcome audits of `unsafe` code.** + +This crate tries to be extremely conservative in its use of `unsafe`, but does +use it in a few spots. In general, I am very open to removing uses of `unsafe` +if it doesn't result in measurable performance regressions and doesn't result +in significantly more complex code. + +Below is an outline of how `unsafe` is used in this crate. + +* `util::pool::Pool` makes use of `unsafe` to implement a fast path for +accessing an element of the pool. The fast path applies to the first thread +that uses the pool. In effect, the fast path is fast because it avoid a mutex +lock. `unsafe` is also used in the no-std version of `Pool` to implement a spin +lock for synchronization. +* `util::lazy::Lazy` uses `unsafe` to implement a variant of +`once_cell::sync::Lazy` that works in no-std environments. A no-std no-alloc +implementation is also provided that requires use of `unsafe`. +* The `dfa` module makes extensive use of `unsafe` to support zero-copy +deserialization of DFAs. The high level problem is that you need to get from +`&[u8]` to the internal representation of a DFA without doing any copies. +This is required for support in no-std no-alloc environments. It also makes +deserialization extremely cheap. +* The `dfa` and `hybrid` modules use `unsafe` to explicitly elide bounds checks +in the core search loops. This makes the codegen tighter and typically leads to +consistent 5-10% performance improvements on some workloads. + +In general, the above reflect the only uses of `unsafe` throughout the entire +`regex` crate. At present, there are no plans to meaningfully expand the use +of `unsafe`. With that said, one thing folks have been asking for is cheap +deserialization of a `regex::Regex`. My sense is that this feature will require +a lot more `unsafe` in places to support zero-copy deserialization. It is +unclear at this point whether this will be pursued. + + +### Motivation + +I started out building this crate because I wanted to re-work the `regex` +crate internals to make it more amenable to optimizations. It turns out that +there are a lot of different ways to build regex engines and even more ways to +compose them. Moreover, heuristic literal optimizations are often tricky to +get correct, but the fruit they bear is attractive. All of these things were +difficult to expand upon without risking the introduction of more bugs. So I +decided to tear things down and start fresh. + +In the course of doing so, I ended up designing strong boundaries between each +component so that each component could be reasoned and tested independently. +This also made it somewhat natural to expose the components as a library unto +itself. Namely, folks have been asking for more capabilities in the regex +crate for a long time, but these capabilities usually come with additional API +complexity that I didn't want to introduce in the `regex` crate proper. But +exposing them in an "expert" level crate like `regex-automata` seemed quite +fine. + +In the end, I do still somewhat consider this crate an experiment. It is +unclear whether the strong boundaries between components will be an impediment +to ongoing development or not. De-coupling tends to lead to slower development +in my experience, and when you mix in the added cost of not introducing +breaking changes all of the time, things can get quite complicated. But, I +don't think anyone has ever release the internals of a regex engine as a +library before. So it will be interesting to see how it plays out! diff --git a/vendor/regex-automata/src/dfa/accel.rs b/vendor/regex-automata/src/dfa/accel.rs new file mode 100644 index 0000000..c0ba18e --- /dev/null +++ b/vendor/regex-automata/src/dfa/accel.rs @@ -0,0 +1,517 @@ +// This module defines some core types for dealing with accelerated DFA states. +// Briefly, a DFA state can be "accelerated" if all of its transitions except +// for a few loop back to itself. This directly implies that the only way out +// of such a state is if a byte corresponding to one of those non-loopback +// transitions is found. Such states are often found in simple repetitions in +// non-Unicode regexes. For example, consider '(?-u)[^a]+a'. We can look at its +// DFA with regex-cli: +// +// $ regex-cli debug dense dfa -p '(?-u)[^a]+a' -BbC --no-table +// D 000000: +// Q 000001: +// *000002: +// A 000003: \x00-` => 3, a => 8, b-\xFF => 3 +// A 000004: \x00-` => 4, a => 7, b-\xFF => 4 +// 000005: \x00-` => 4, b-\xFF => 4 +// 000006: \x00-` => 3, a => 6, b-\xFF => 3 +// 000007: \x00-\xFF => 2, EOI => 2 +// 000008: \x00-\xFF => 2, EOI => 2 +// +// In particular, state 3 is accelerated (shown via the 'A' indicator) since +// the only way to leave that state once entered is to see an 'a' byte. If +// there is a long run of non-'a' bytes, then using something like 'memchr' +// to find the next 'a' byte can be significantly faster than just using the +// standard byte-at-a-time state machine. +// +// Unfortunately, this optimization rarely applies when Unicode is enabled. +// For example, patterns like '[^a]' don't actually match any byte that isn't +// 'a', but rather, any UTF-8 encoding of a Unicode scalar value that isn't +// 'a'. This makes the state machine much more complex---far beyond a single +// state---and removes the ability to easily accelerate it. (Because if the +// machine sees a non-UTF-8 sequence, then the machine won't match through it.) +// +// In practice, we only consider accelerating states that have 3 or fewer +// non-loop transitions. At a certain point, you get diminishing returns, but +// also because that's what the memchr crate supports. The structures below +// hard-code this assumption and provide (de)serialization APIs for use inside +// a DFA. +// +// And finally, note that there is some trickery involved in making it very +// fast to not only check whether a state is accelerated at search time, but +// also to access the bytes to search for to implement the acceleration itself. +// dfa/special.rs provides more detail, but the short story is that all +// accelerated states appear contiguously in a DFA. This means we can represent +// the ID space of all accelerated DFA states with a single range. So given +// a state ID, we can determine whether it's accelerated via +// +// min_accel_id <= id <= max_accel_id +// +// And find its corresponding accelerator with: +// +// accels.get((id - min_accel_id) / dfa_stride) + +#[cfg(feature = "dfa-build")] +use alloc::{vec, vec::Vec}; + +use crate::util::{ + int::Pointer, + memchr, + wire::{self, DeserializeError, Endian, SerializeError}, +}; + +/// The base type used to represent a collection of accelerators. +/// +/// While an `Accel` is represented as a fixed size array of bytes, a +/// *collection* of `Accel`s (called `Accels`) is represented internally as a +/// slice of u32. While it's a bit unnatural to do this and costs us a bit of +/// fairly low-risk not-safe code, it lets us remove the need for a second type +/// parameter in the definition of dense::DFA. (Which really wants everything +/// to be a slice of u32.) +type AccelTy = u32; + +/// The size of the unit of representation for accelerators. +/// +/// ACCEL_CAP *must* be a multiple of this size. +const ACCEL_TY_SIZE: usize = core::mem::size_of::(); + +/// The maximum length in bytes that a single Accel can be. This is distinct +/// from the capacity of an accelerator in that the length represents only the +/// bytes that should be read. +const ACCEL_LEN: usize = 4; + +/// The capacity of each accelerator, in bytes. We set this to 8 since it's a +/// multiple of 4 (our ID size) and because it gives us a little wiggle room +/// if we want to support more accel bytes in the future without a breaking +/// change. +/// +/// This MUST be a multiple of ACCEL_TY_SIZE. +const ACCEL_CAP: usize = 8; + +/// Search for between 1 and 3 needle bytes in the given haystack, starting the +/// search at the given position. If `needles` has a length other than 1-3, +/// then this panics. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn find_fwd( + needles: &[u8], + haystack: &[u8], + at: usize, +) -> Option { + let bs = needles; + let i = match needles.len() { + 1 => memchr::memchr(bs[0], &haystack[at..])?, + 2 => memchr::memchr2(bs[0], bs[1], &haystack[at..])?, + 3 => memchr::memchr3(bs[0], bs[1], bs[2], &haystack[at..])?, + 0 => panic!("cannot find with empty needles"), + n => panic!("invalid needles length: {}", n), + }; + Some(at + i) +} + +/// Search for between 1 and 3 needle bytes in the given haystack in reverse, +/// starting the search at the given position. If `needles` has a length other +/// than 1-3, then this panics. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn find_rev( + needles: &[u8], + haystack: &[u8], + at: usize, +) -> Option { + let bs = needles; + match needles.len() { + 1 => memchr::memrchr(bs[0], &haystack[..at]), + 2 => memchr::memrchr2(bs[0], bs[1], &haystack[..at]), + 3 => memchr::memrchr3(bs[0], bs[1], bs[2], &haystack[..at]), + 0 => panic!("cannot find with empty needles"), + n => panic!("invalid needles length: {}", n), + } +} + +/// Represents the accelerators for all accelerated states in a dense DFA. +/// +/// The `A` type parameter represents the type of the underlying bytes. +/// Generally, this is either `&[AccelTy]` or `Vec`. +#[derive(Clone)] +pub(crate) struct Accels { + /// A length prefixed slice of contiguous accelerators. See the top comment + /// in this module for more details on how we can jump from a DFA's state + /// ID to an accelerator in this list. + /// + /// The first 4 bytes always correspond to the number of accelerators + /// that follow. + accels: A, +} + +#[cfg(feature = "dfa-build")] +impl Accels> { + /// Create an empty sequence of accelerators for a DFA. + pub fn empty() -> Accels> { + Accels { accels: vec![0] } + } + + /// Add an accelerator to this sequence. + /// + /// This adds to the accelerator to the end of the sequence and therefore + /// should be done in correspondence with its state in the DFA. + /// + /// This panics if this results in more accelerators than AccelTy::MAX. + pub fn add(&mut self, accel: Accel) { + self.accels.extend_from_slice(&accel.as_accel_tys()); + let len = self.len(); + self.set_len(len + 1); + } + + /// Set the number of accelerators in this sequence, which is encoded in + /// the first 4 bytes of the underlying bytes. + fn set_len(&mut self, new_len: usize) { + // The only way an accelerator gets added is if a state exists for + // it, and if a state exists, then its index is guaranteed to be + // representable by a AccelTy by virtue of the guarantees provided by + // StateID. + let new_len = AccelTy::try_from(new_len).unwrap(); + self.accels[0] = new_len; + } +} + +impl<'a> Accels<&'a [AccelTy]> { + /// Deserialize a sequence of accelerators from the given bytes. If there + /// was a problem deserializing, then an error is returned. + /// + /// This is guaranteed to run in constant time. This does not guarantee + /// that every accelerator in the returned collection is valid. Thus, + /// accessing one may panic, or not-safe code that relies on accelerators + /// being correct my result in UB. + /// + /// Callers may check the validity of every accelerator with the `validate` + /// method. + pub fn from_bytes_unchecked( + mut slice: &'a [u8], + ) -> Result<(Accels<&'a [AccelTy]>, usize), DeserializeError> { + let slice_start = slice.as_ptr().as_usize(); + + let (accel_len, _) = + wire::try_read_u32_as_usize(slice, "accelerators length")?; + // The accelerator length is part of the accel_tys slice that + // we deserialize. This is perhaps a bit idiosyncratic. It would + // probably be better to split out the length into a real field. + + let accel_tys_len = wire::add( + wire::mul(accel_len, 2, "total number of accelerator accel_tys")?, + 1, + "total number of accel_tys", + )?; + let accel_tys_bytes_len = wire::mul( + ACCEL_TY_SIZE, + accel_tys_len, + "total number of bytes in accelerators", + )?; + wire::check_slice_len(slice, accel_tys_bytes_len, "accelerators")?; + wire::check_alignment::(slice)?; + let accel_tys = &slice[..accel_tys_bytes_len]; + slice = &slice[accel_tys_bytes_len..]; + // SAFETY: We've checked the length and alignment above, and since + // slice is just bytes and AccelTy is just a u32, we can safely cast to + // a slice of &[AccelTy]. + let accels = unsafe { + core::slice::from_raw_parts( + accel_tys.as_ptr().cast::(), + accel_tys_len, + ) + }; + Ok((Accels { accels }, slice.as_ptr().as_usize() - slice_start)) + } +} + +impl> Accels { + /// Return an owned version of the accelerators. + #[cfg(feature = "alloc")] + pub fn to_owned(&self) -> Accels> { + Accels { accels: self.accels.as_ref().to_vec() } + } + + /// Return a borrowed version of the accelerators. + pub fn as_ref(&self) -> Accels<&[AccelTy]> { + Accels { accels: self.accels.as_ref() } + } + + /// Return the bytes representing the serialization of the accelerators. + pub fn as_bytes(&self) -> &[u8] { + let accels = self.accels.as_ref(); + // SAFETY: This is safe because accels is a just a slice of AccelTy, + // and u8 always has a smaller alignment. + unsafe { + core::slice::from_raw_parts( + accels.as_ptr().cast::(), + accels.len() * ACCEL_TY_SIZE, + ) + } + } + + /// Returns the memory usage, in bytes, of these accelerators. + /// + /// The memory usage is computed based on the number of bytes used to + /// represent all of the accelerators. + /// + /// This does **not** include the stack size used by this value. + pub fn memory_usage(&self) -> usize { + self.as_bytes().len() + } + + /// Return the bytes to search for corresponding to the accelerator in this + /// sequence at index `i`. If no such accelerator exists, then this panics. + /// + /// The significance of the index is that it should be in correspondence + /// with the index of the corresponding DFA. That is, accelerated DFA + /// states are stored contiguously in the DFA and have an ordering implied + /// by their respective state IDs. The state's index in that sequence + /// corresponds to the index of its corresponding accelerator. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub fn needles(&self, i: usize) -> &[u8] { + if i >= self.len() { + panic!("invalid accelerator index {}", i); + } + let bytes = self.as_bytes(); + let offset = ACCEL_TY_SIZE + i * ACCEL_CAP; + let len = usize::from(bytes[offset]); + &bytes[offset + 1..offset + 1 + len] + } + + /// Return the total number of accelerators in this sequence. + pub fn len(&self) -> usize { + // This should never panic since deserialization checks that the + // length can fit into a usize. + usize::try_from(self.accels.as_ref()[0]).unwrap() + } + + /// Return the accelerator in this sequence at index `i`. If no such + /// accelerator exists, then this returns None. + /// + /// See the docs for `needles` on the significance of the index. + fn get(&self, i: usize) -> Option { + if i >= self.len() { + return None; + } + let offset = ACCEL_TY_SIZE + i * ACCEL_CAP; + let accel = Accel::from_slice(&self.as_bytes()[offset..]) + .expect("Accels must contain valid accelerators"); + Some(accel) + } + + /// Returns an iterator of accelerators in this sequence. + fn iter(&self) -> IterAccels<'_, A> { + IterAccels { accels: self, i: 0 } + } + + /// Writes these accelerators to the given byte buffer using the indicated + /// endianness. If the given buffer is too small, then an error is + /// returned. Upon success, the total number of bytes written is returned. + /// The number of bytes written is guaranteed to be a multiple of 8. + pub fn write_to( + &self, + dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + assert_eq!( + nwrite % ACCEL_TY_SIZE, + 0, + "expected accelerator bytes written to be a multiple of {}", + ACCEL_TY_SIZE, + ); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("accelerators")); + } + + // The number of accelerators can never exceed AccelTy::MAX. + E::write_u32(AccelTy::try_from(self.len()).unwrap(), dst); + // The actual accelerators are just raw bytes and thus their endianness + // is irrelevant. So we can copy them as bytes. + dst[ACCEL_TY_SIZE..nwrite] + .copy_from_slice(&self.as_bytes()[ACCEL_TY_SIZE..nwrite]); + Ok(nwrite) + } + + /// Validates that every accelerator in this collection can be successfully + /// deserialized as a valid accelerator. + pub fn validate(&self) -> Result<(), DeserializeError> { + for chunk in self.as_bytes()[ACCEL_TY_SIZE..].chunks(ACCEL_CAP) { + let _ = Accel::from_slice(chunk)?; + } + Ok(()) + } + + /// Returns the total number of bytes written by `write_to`. + pub fn write_to_len(&self) -> usize { + self.as_bytes().len() + } +} + +impl> core::fmt::Debug for Accels { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "Accels(")?; + let mut list = f.debug_list(); + for a in self.iter() { + list.entry(&a); + } + list.finish()?; + write!(f, ")") + } +} + +#[derive(Debug)] +struct IterAccels<'a, A: AsRef<[AccelTy]>> { + accels: &'a Accels, + i: usize, +} + +impl<'a, A: AsRef<[AccelTy]>> Iterator for IterAccels<'a, A> { + type Item = Accel; + + fn next(&mut self) -> Option { + let accel = self.accels.get(self.i)?; + self.i += 1; + Some(accel) + } +} + +/// Accel represents a structure for determining how to "accelerate" a DFA +/// state. +/// +/// Namely, it contains zero or more bytes that must be seen in order for the +/// DFA to leave the state it is associated with. In practice, the actual range +/// is 1 to 3 bytes. +/// +/// The purpose of acceleration is to identify states whose vast majority +/// of transitions are just loops back to the same state. For example, +/// in the regex `(?-u)^[^a]+b`, the corresponding DFA will have a state +/// (corresponding to `[^a]+`) where all transitions *except* for `a` and +/// `b` loop back to itself. Thus, this state can be "accelerated" by simply +/// looking for the next occurrence of either `a` or `b` instead of explicitly +/// following transitions. (In this case, `b` transitions to the next state +/// where as `a` would transition to the dead state.) +#[derive(Clone)] +pub(crate) struct Accel { + /// The first byte is the length. Subsequent bytes are the accelerated + /// bytes. + /// + /// Note that we make every accelerator 8 bytes as a slightly wasteful + /// way of making sure alignment is always correct for state ID sizes of + /// 1, 2, 4 and 8. This should be okay since accelerated states aren't + /// particularly common, especially when Unicode is enabled. + bytes: [u8; ACCEL_CAP], +} + +impl Accel { + /// Returns an empty accel, where no bytes are accelerated. + #[cfg(feature = "dfa-build")] + pub fn new() -> Accel { + Accel { bytes: [0; ACCEL_CAP] } + } + + /// Returns a verified accelerator derived from the beginning of the given + /// slice. + /// + /// If the slice is not long enough or contains invalid bytes for an + /// accelerator, then this returns an error. + pub fn from_slice(mut slice: &[u8]) -> Result { + slice = &slice[..core::cmp::min(ACCEL_LEN, slice.len())]; + let bytes = slice + .try_into() + .map_err(|_| DeserializeError::buffer_too_small("accelerator"))?; + Accel::from_bytes(bytes) + } + + /// Returns a verified accelerator derived from raw bytes. + /// + /// If the given bytes are invalid, then this returns an error. + fn from_bytes(bytes: [u8; 4]) -> Result { + if usize::from(bytes[0]) >= ACCEL_LEN { + return Err(DeserializeError::generic( + "accelerator bytes cannot have length more than 3", + )); + } + Ok(Accel::from_bytes_unchecked(bytes)) + } + + /// Returns an accelerator derived from raw bytes. + /// + /// This does not check whether the given bytes are valid. Invalid bytes + /// cannot sacrifice memory safety, but may result in panics or silent + /// logic bugs. + fn from_bytes_unchecked(bytes: [u8; 4]) -> Accel { + Accel { bytes: [bytes[0], bytes[1], bytes[2], bytes[3], 0, 0, 0, 0] } + } + + /// Attempts to add the given byte to this accelerator. If the accelerator + /// is already full or thinks the byte is a poor accelerator, then this + /// returns false. Otherwise, returns true. + /// + /// If the given byte is already in this accelerator, then it panics. + #[cfg(feature = "dfa-build")] + pub fn add(&mut self, byte: u8) -> bool { + if self.len() >= 3 { + return false; + } + // As a special case, we totally reject trying to accelerate a state + // with an ASCII space. In most cases, it occurs very frequently, and + // tends to result in worse overall performance. + if byte == b' ' { + return false; + } + assert!( + !self.contains(byte), + "accelerator already contains {:?}", + crate::util::escape::DebugByte(byte) + ); + self.bytes[self.len() + 1] = byte; + self.bytes[0] += 1; + true + } + + /// Return the number of bytes in this accelerator. + pub fn len(&self) -> usize { + usize::from(self.bytes[0]) + } + + /// Returns true if and only if there are no bytes in this accelerator. + #[cfg(feature = "dfa-build")] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the slice of bytes to accelerate. + /// + /// If this accelerator is empty, then this returns an empty slice. + fn needles(&self) -> &[u8] { + &self.bytes[1..1 + self.len()] + } + + /// Returns true if and only if this accelerator will accelerate the given + /// byte. + #[cfg(feature = "dfa-build")] + fn contains(&self, byte: u8) -> bool { + self.needles().iter().position(|&b| b == byte).is_some() + } + + /// Returns the accelerator bytes as an array of AccelTys. + #[cfg(feature = "dfa-build")] + fn as_accel_tys(&self) -> [AccelTy; 2] { + assert_eq!(ACCEL_CAP, 8); + // These unwraps are OK since ACCEL_CAP is set to 8. + let first = + AccelTy::from_ne_bytes(self.bytes[0..4].try_into().unwrap()); + let second = + AccelTy::from_ne_bytes(self.bytes[4..8].try_into().unwrap()); + [first, second] + } +} + +impl core::fmt::Debug for Accel { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "Accel(")?; + let mut set = f.debug_set(); + for &b in self.needles() { + set.entry(&crate::util::escape::DebugByte(b)); + } + set.finish()?; + write!(f, ")") + } +} diff --git a/vendor/regex-automata/src/dfa/automaton.rs b/vendor/regex-automata/src/dfa/automaton.rs new file mode 100644 index 0000000..fcfcf29 --- /dev/null +++ b/vendor/regex-automata/src/dfa/automaton.rs @@ -0,0 +1,2260 @@ +#[cfg(feature = "alloc")] +use crate::util::search::PatternSet; +use crate::{ + dfa::search, + util::{ + empty, + prefilter::Prefilter, + primitives::{PatternID, StateID}, + search::{Anchored, HalfMatch, Input, MatchError}, + start, + }, +}; + +/// A trait describing the interface of a deterministic finite automaton (DFA). +/// +/// The complexity of this trait probably means that it's unlikely for others +/// to implement it. The primary purpose of the trait is to provide for a way +/// of abstracting over different types of DFAs. In this crate, that means +/// dense DFAs and sparse DFAs. (Dense DFAs are fast but memory hungry, where +/// as sparse DFAs are slower but come with a smaller memory footprint. But +/// they otherwise provide exactly equivalent expressive power.) For example, a +/// [`dfa::regex::Regex`](crate::dfa::regex::Regex) is generic over this trait. +/// +/// Normally, a DFA's execution model is very simple. You might have a single +/// start state, zero or more final or "match" states and a function that +/// transitions from one state to the next given the next byte of input. +/// Unfortunately, the interface described by this trait is significantly +/// more complicated than this. The complexity has a number of different +/// reasons, mostly motivated by performance, functionality or space savings: +/// +/// * A DFA can search for multiple patterns simultaneously. This +/// means extra information is returned when a match occurs. Namely, +/// a match is not just an offset, but an offset plus a pattern ID. +/// [`Automaton::pattern_len`] returns the number of patterns compiled into +/// the DFA, [`Automaton::match_len`] returns the total number of patterns +/// that match in a particular state and [`Automaton::match_pattern`] permits +/// iterating over the patterns that match in a particular state. +/// * A DFA can have multiple start states, and the choice of which start +/// state to use depends on the content of the string being searched and +/// position of the search, as well as whether the search is an anchored +/// search for a specific pattern in the DFA. Moreover, computing the start +/// state also depends on whether you're doing a forward or a reverse search. +/// [`Automaton::start_state_forward`] and [`Automaton::start_state_reverse`] +/// are used to compute the start state for forward and reverse searches, +/// respectively. +/// * All matches are delayed by one byte to support things like `$` and `\b` +/// at the end of a pattern. Therefore, every use of a DFA is required to use +/// [`Automaton::next_eoi_state`] +/// at the end of the search to compute the final transition. +/// * For optimization reasons, some states are treated specially. Every +/// state is either special or not, which can be determined via the +/// [`Automaton::is_special_state`] method. If it's special, then the state +/// must be at least one of a few possible types of states. (Note that some +/// types can overlap, for example, a match state can also be an accel state. +/// But some types can't. If a state is a dead state, then it can never be any +/// other type of state.) Those types are: +/// * A dead state. A dead state means the DFA will never enter a match +/// state. This can be queried via the [`Automaton::is_dead_state`] method. +/// * A quit state. A quit state occurs if the DFA had to stop the search +/// prematurely for some reason. This can be queried via the +/// [`Automaton::is_quit_state`] method. +/// * A match state. A match state occurs when a match is found. When a DFA +/// enters a match state, the search may stop immediately (when looking +/// for the earliest match), or it may continue to find the leftmost-first +/// match. This can be queried via the [`Automaton::is_match_state`] +/// method. +/// * A start state. A start state is where a search begins. For every +/// search, there is exactly one start state that is used, however, a +/// DFA may contain many start states. When the search is in a start +/// state, it may use a prefilter to quickly skip to candidate matches +/// without executing the DFA on every byte. This can be queried via the +/// [`Automaton::is_start_state`] method. +/// * An accel state. An accel state is a state that is accelerated. +/// That is, it is a state where _most_ of its transitions loop back to +/// itself and only a small number of transitions lead to other states. +/// This kind of state is said to be accelerated because a search routine +/// can quickly look for the bytes leading out of the state instead of +/// continuing to execute the DFA on each byte. This can be queried via the +/// [`Automaton::is_accel_state`] method. And the bytes that lead out of +/// the state can be queried via the [`Automaton::accelerator`] method. +/// +/// There are a number of provided methods on this trait that implement +/// efficient searching (for forwards and backwards) with a DFA using +/// all of the above features of this trait. In particular, given the +/// complexity of all these features, implementing a search routine in +/// this trait can be a little subtle. With that said, it is possible to +/// somewhat simplify the search routine. For example, handling accelerated +/// states is strictly optional, since it is always correct to assume that +/// `Automaton::is_accel_state` returns false. However, one complex part of +/// writing a search routine using this trait is handling the 1-byte delay of a +/// match. That is not optional. +/// +/// # Safety +/// +/// This trait is not safe to implement so that code may rely on the +/// correctness of implementations of this trait to avoid undefined behavior. +/// The primary correctness guarantees are: +/// +/// * `Automaton::start_state` always returns a valid state ID or an error or +/// panics. +/// * `Automaton::next_state`, when given a valid state ID, always returns +/// a valid state ID for all values of `anchored` and `byte`, or otherwise +/// panics. +/// +/// In general, the rest of the methods on `Automaton` need to uphold their +/// contracts as well. For example, `Automaton::is_dead` should only returns +/// true if the given state ID is actually a dead state. +pub unsafe trait Automaton { + /// Transitions from the current state to the next state, given the next + /// byte of input. + /// + /// Implementations must guarantee that the returned ID is always a valid + /// ID when `current` refers to a valid ID. Moreover, the transition + /// function must be defined for all possible values of `input`. + /// + /// # Panics + /// + /// If the given ID does not refer to a valid state, then this routine + /// may panic but it also may not panic and instead return an invalid ID. + /// However, if the caller provides an invalid ID then this must never + /// sacrifice memory safety. + /// + /// # Example + /// + /// This shows a simplistic example for walking a DFA for a given haystack + /// by using the `next_state` method. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, Input}; + /// + /// let dfa = dense::DFA::new(r"[a-z]+r")?; + /// let haystack = "bar".as_bytes(); + /// + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. + /// let mut state = dfa.start_state_forward(&Input::new(haystack))?; + /// // Walk all the bytes in the haystack. + /// for &b in haystack { + /// state = dfa.next_state(state, b); + /// } + /// // Matches are always delayed by 1 byte, so we must explicitly walk the + /// // special "EOI" transition at the end of the search. + /// state = dfa.next_eoi_state(state); + /// assert!(dfa.is_match_state(state)); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn next_state(&self, current: StateID, input: u8) -> StateID; + + /// Transitions from the current state to the next state, given the next + /// byte of input. + /// + /// Unlike [`Automaton::next_state`], implementations may implement this + /// more efficiently by assuming that the `current` state ID is valid. + /// Typically, this manifests by eliding bounds checks. + /// + /// # Safety + /// + /// Callers of this method must guarantee that `current` refers to a valid + /// state ID. If `current` is not a valid state ID for this automaton, then + /// calling this routine may result in undefined behavior. + /// + /// If `current` is valid, then implementations must guarantee that the ID + /// returned is valid for all possible values of `input`. + unsafe fn next_state_unchecked( + &self, + current: StateID, + input: u8, + ) -> StateID; + + /// Transitions from the current state to the next state for the special + /// EOI symbol. + /// + /// Implementations must guarantee that the returned ID is always a valid + /// ID when `current` refers to a valid ID. + /// + /// This routine must be called at the end of every search in a correct + /// implementation of search. Namely, DFAs in this crate delay matches + /// by one byte in order to support look-around operators. Thus, after + /// reaching the end of a haystack, a search implementation must follow one + /// last EOI transition. + /// + /// It is best to think of EOI as an additional symbol in the alphabet of + /// a DFA that is distinct from every other symbol. That is, the alphabet + /// of DFAs in this crate has a logical size of 257 instead of 256, where + /// 256 corresponds to every possible inhabitant of `u8`. (In practice, the + /// physical alphabet size may be smaller because of alphabet compression + /// via equivalence classes, but EOI is always represented somehow in the + /// alphabet.) + /// + /// # Panics + /// + /// If the given ID does not refer to a valid state, then this routine + /// may panic but it also may not panic and instead return an invalid ID. + /// However, if the caller provides an invalid ID then this must never + /// sacrifice memory safety. + /// + /// # Example + /// + /// This shows a simplistic example for walking a DFA for a given haystack, + /// and then finishing the search with the final EOI transition. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, Input}; + /// + /// let dfa = dense::DFA::new(r"[a-z]+r")?; + /// let haystack = "bar".as_bytes(); + /// + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. + /// // + /// // The unwrap is OK because we aren't requesting a start state for a + /// // specific pattern. + /// let mut state = dfa.start_state_forward(&Input::new(haystack))?; + /// // Walk all the bytes in the haystack. + /// for &b in haystack { + /// state = dfa.next_state(state, b); + /// } + /// // Matches are always delayed by 1 byte, so we must explicitly walk + /// // the special "EOI" transition at the end of the search. Without this + /// // final transition, the assert below will fail since the DFA will not + /// // have entered a match state yet! + /// state = dfa.next_eoi_state(state); + /// assert!(dfa.is_match_state(state)); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn next_eoi_state(&self, current: StateID) -> StateID; + + /// Return the ID of the start state for this DFA for the given starting + /// configuration. + /// + /// Unlike typical DFA implementations, the start state for DFAs in this + /// crate is dependent on a few different factors: + /// + /// * The [`Anchored`] mode of the search. Unanchored, anchored and + /// anchored searches for a specific [`PatternID`] all use different start + /// states. + /// * Whether a "look-behind" byte exists. For example, the `^` anchor + /// matches if and only if there is no look-behind byte. + /// * The specific value of that look-behind byte. For example, a `(?m:^)` + /// assertion only matches when there is either no look-behind byte, or + /// when the look-behind byte is a line terminator. + /// + /// The [starting configuration](start::Config) provides the above + /// information. + /// + /// This routine can be used for either forward or reverse searches. + /// Although, as a convenience, if you have an [`Input`], then it may + /// be more succinct to use [`Automaton::start_state_forward`] or + /// [`Automaton::start_state_reverse`]. Note, for example, that the + /// convenience routines return a [`MatchError`] on failure where as this + /// routine returns a [`StartError`]. + /// + /// # Errors + /// + /// This may return a [`StartError`] if the search needs to give up when + /// determining the start state (for example, if it sees a "quit" byte). + /// This can also return an error if the given configuration contains an + /// unsupported [`Anchored`] configuration. + fn start_state( + &self, + config: &start::Config, + ) -> Result; + + /// Return the ID of the start state for this DFA when executing a forward + /// search. + /// + /// This is a convenience routine for calling [`Automaton::start_state`] + /// that converts the given [`Input`] to a [start + /// configuration](start::Config). Additionally, if an error occurs, it is + /// converted from a [`StartError`] to a [`MatchError`] using the offset + /// information in the given [`Input`]. + /// + /// # Errors + /// + /// This may return a [`MatchError`] if the search needs to give up + /// when determining the start state (for example, if it sees a "quit" + /// byte). This can also return an error if the given `Input` contains an + /// unsupported [`Anchored`] configuration. + fn start_state_forward( + &self, + input: &Input<'_>, + ) -> Result { + let config = start::Config::from_input_forward(input); + self.start_state(&config).map_err(|err| match err { + StartError::Quit { byte } => { + let offset = input + .start() + .checked_sub(1) + .expect("no quit in start without look-behind"); + MatchError::quit(byte, offset) + } + StartError::UnsupportedAnchored { mode } => { + MatchError::unsupported_anchored(mode) + } + }) + } + + /// Return the ID of the start state for this DFA when executing a reverse + /// search. + /// + /// This is a convenience routine for calling [`Automaton::start_state`] + /// that converts the given [`Input`] to a [start + /// configuration](start::Config). Additionally, if an error occurs, it is + /// converted from a [`StartError`] to a [`MatchError`] using the offset + /// information in the given [`Input`]. + /// + /// # Errors + /// + /// This may return a [`MatchError`] if the search needs to give up + /// when determining the start state (for example, if it sees a "quit" + /// byte). This can also return an error if the given `Input` contains an + /// unsupported [`Anchored`] configuration. + fn start_state_reverse( + &self, + input: &Input<'_>, + ) -> Result { + let config = start::Config::from_input_reverse(input); + self.start_state(&config).map_err(|err| match err { + StartError::Quit { byte } => { + let offset = input.end(); + MatchError::quit(byte, offset) + } + StartError::UnsupportedAnchored { mode } => { + MatchError::unsupported_anchored(mode) + } + }) + } + + /// If this DFA has a universal starting state for the given anchor mode + /// and the DFA supports universal starting states, then this returns that + /// state's identifier. + /// + /// A DFA is said to have a universal starting state when the starting + /// state is invariant with respect to the haystack. Usually, the starting + /// state is chosen depending on the bytes immediately surrounding the + /// starting position of a search. However, the starting state only differs + /// when one or more of the patterns in the DFA have look-around assertions + /// in its prefix. + /// + /// Stated differently, if none of the patterns in a DFA have look-around + /// assertions in their prefix, then the DFA has a universal starting state + /// and _may_ be returned by this method. + /// + /// It is always correct for implementations to return `None`, and indeed, + /// this is what the default implementation does. When this returns `None`, + /// callers must use either `start_state_forward` or `start_state_reverse` + /// to get the starting state. + /// + /// # Use case + /// + /// There are a few reasons why one might want to use this: + /// + /// * If you know your regex patterns have no look-around assertions in + /// their prefix, then calling this routine is likely cheaper and perhaps + /// more semantically meaningful. + /// * When implementing prefilter support in a DFA regex implementation, + /// it is necessary to re-compute the start state after a candidate + /// is returned from the prefilter. However, this is only needed when + /// there isn't a universal start state. When one exists, one can avoid + /// re-computing the start state. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// dfa::{Automaton, dense::DFA}, + /// Anchored, + /// }; + /// + /// // There are no look-around assertions in the prefixes of any of the + /// // patterns, so we get a universal start state. + /// let dfa = DFA::new_many(&["[0-9]+", "[a-z]+$", "[A-Z]+"])?; + /// assert!(dfa.universal_start_state(Anchored::No).is_some()); + /// assert!(dfa.universal_start_state(Anchored::Yes).is_some()); + /// + /// // One of the patterns has a look-around assertion in its prefix, + /// // so this means there is no longer a universal start state. + /// let dfa = DFA::new_many(&["[0-9]+", "^[a-z]+$", "[A-Z]+"])?; + /// assert!(!dfa.universal_start_state(Anchored::No).is_some()); + /// assert!(!dfa.universal_start_state(Anchored::Yes).is_some()); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + fn universal_start_state(&self, _mode: Anchored) -> Option { + None + } + + /// Returns true if and only if the given identifier corresponds to a + /// "special" state. A special state is one or more of the following: + /// a dead state, a quit state, a match state, a start state or an + /// accelerated state. + /// + /// A correct implementation _may_ always return false for states that + /// are either start states or accelerated states, since that information + /// is only intended to be used for optimization purposes. Correct + /// implementations must return true if the state is a dead, quit or match + /// state. This is because search routines using this trait must be able + /// to rely on `is_special_state` as an indicator that a state may need + /// special treatment. (For example, when a search routine sees a dead + /// state, it must terminate.) + /// + /// This routine permits search implementations to use a single branch to + /// check whether a state needs special attention before executing the next + /// transition. The example below shows how to do this. + /// + /// # Example + /// + /// This example shows how `is_special_state` can be used to implement a + /// correct search routine with minimal branching. In particular, this + /// search routine implements "leftmost" matching, which means that it + /// doesn't immediately stop once a match is found. Instead, it continues + /// until it reaches a dead state. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{Automaton, dense}, + /// HalfMatch, MatchError, Input, + /// }; + /// + /// fn find( + /// dfa: &A, + /// haystack: &[u8], + /// ) -> Result, MatchError> { + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. Note that start states can never + /// // be match states (since DFAs in this crate delay matches by 1 + /// // byte), so we don't need to check if the start state is a match. + /// let mut state = dfa.start_state_forward(&Input::new(haystack))?; + /// let mut last_match = None; + /// // Walk all the bytes in the haystack. We can quit early if we see + /// // a dead or a quit state. The former means the automaton will + /// // never transition to any other state. The latter means that the + /// // automaton entered a condition in which its search failed. + /// for (i, &b) in haystack.iter().enumerate() { + /// state = dfa.next_state(state, b); + /// if dfa.is_special_state(state) { + /// if dfa.is_match_state(state) { + /// last_match = Some(HalfMatch::new( + /// dfa.match_pattern(state, 0), + /// i, + /// )); + /// } else if dfa.is_dead_state(state) { + /// return Ok(last_match); + /// } else if dfa.is_quit_state(state) { + /// // It is possible to enter into a quit state after + /// // observing a match has occurred. In that case, we + /// // should return the match instead of an error. + /// if last_match.is_some() { + /// return Ok(last_match); + /// } + /// return Err(MatchError::quit(b, i)); + /// } + /// // Implementors may also want to check for start or accel + /// // states and handle them differently for performance + /// // reasons. But it is not necessary for correctness. + /// } + /// } + /// // Matches are always delayed by 1 byte, so we must explicitly walk + /// // the special "EOI" transition at the end of the search. + /// state = dfa.next_eoi_state(state); + /// if dfa.is_match_state(state) { + /// last_match = Some(HalfMatch::new( + /// dfa.match_pattern(state, 0), + /// haystack.len(), + /// )); + /// } + /// Ok(last_match) + /// } + /// + /// // We use a greedy '+' operator to show how the search doesn't just + /// // stop once a match is detected. It continues extending the match. + /// // Using '[a-z]+?' would also work as expected and stop the search + /// // early. Greediness is built into the automaton. + /// let dfa = dense::DFA::new(r"[a-z]+")?; + /// let haystack = "123 foobar 4567".as_bytes(); + /// let mat = find(&dfa, haystack)?.unwrap(); + /// assert_eq!(mat.pattern().as_usize(), 0); + /// assert_eq!(mat.offset(), 10); + /// + /// // Here's another example that tests our handling of the special EOI + /// // transition. This will fail to find a match if we don't call + /// // 'next_eoi_state' at the end of the search since the match isn't + /// // found until the final byte in the haystack. + /// let dfa = dense::DFA::new(r"[0-9]{4}")?; + /// let haystack = "123 foobar 4567".as_bytes(); + /// let mat = find(&dfa, haystack)?.unwrap(); + /// assert_eq!(mat.pattern().as_usize(), 0); + /// assert_eq!(mat.offset(), 15); + /// + /// // And note that our search implementation above automatically works + /// // with multi-DFAs. Namely, `dfa.match_pattern(match_state, 0)` selects + /// // the appropriate pattern ID for us. + /// let dfa = dense::DFA::new_many(&[r"[a-z]+", r"[0-9]+"])?; + /// let haystack = "123 foobar 4567".as_bytes(); + /// let mat = find(&dfa, haystack)?.unwrap(); + /// assert_eq!(mat.pattern().as_usize(), 1); + /// assert_eq!(mat.offset(), 3); + /// let mat = find(&dfa, &haystack[3..])?.unwrap(); + /// assert_eq!(mat.pattern().as_usize(), 0); + /// assert_eq!(mat.offset(), 7); + /// let mat = find(&dfa, &haystack[10..])?.unwrap(); + /// assert_eq!(mat.pattern().as_usize(), 1); + /// assert_eq!(mat.offset(), 5); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn is_special_state(&self, id: StateID) -> bool; + + /// Returns true if and only if the given identifier corresponds to a dead + /// state. When a DFA enters a dead state, it is impossible to leave. That + /// is, every transition on a dead state by definition leads back to the + /// same dead state. + /// + /// In practice, the dead state always corresponds to the identifier `0`. + /// Moreover, in practice, there is only one dead state. + /// + /// The existence of a dead state is not strictly required in the classical + /// model of finite state machines, where one generally only cares about + /// the question of whether an input sequence matches or not. Dead states + /// are not needed to answer that question, since one can immediately quit + /// as soon as one enters a final or "match" state. However, we don't just + /// care about matches but also care about the location of matches, and + /// more specifically, care about semantics like "greedy" matching. + /// + /// For example, given the pattern `a+` and the input `aaaz`, the dead + /// state won't be entered until the state machine reaches `z` in the + /// input, at which point, the search routine can quit. But without the + /// dead state, the search routine wouldn't know when to quit. In a + /// classical representation, the search routine would stop after seeing + /// the first `a` (which is when the search would enter a match state). But + /// this wouldn't implement "greedy" matching where `a+` matches as many + /// `a`'s as possible. + /// + /// # Example + /// + /// See the example for [`Automaton::is_special_state`] for how to use this + /// method correctly. + fn is_dead_state(&self, id: StateID) -> bool; + + /// Returns true if and only if the given identifier corresponds to a quit + /// state. A quit state is like a dead state (it has no transitions other + /// than to itself), except it indicates that the DFA failed to complete + /// the search. When this occurs, callers can neither accept or reject that + /// a match occurred. + /// + /// In practice, the quit state always corresponds to the state immediately + /// following the dead state. (Which is not usually represented by `1`, + /// since state identifiers are pre-multiplied by the state machine's + /// alphabet stride, and the alphabet stride varies between DFAs.) + /// + /// The typical way in which a quit state can occur is when heuristic + /// support for Unicode word boundaries is enabled via the + /// [`dense::Config::unicode_word_boundary`](crate::dfa::dense::Config::unicode_word_boundary) + /// option. But other options, like the lower level + /// [`dense::Config::quit`](crate::dfa::dense::Config::quit) + /// configuration, can also result in a quit state being entered. The + /// purpose of the quit state is to provide a way to execute a fast DFA + /// in common cases while delegating to slower routines when the DFA quits. + /// + /// The default search implementations provided by this crate will return a + /// [`MatchError::quit`] error when a quit state is entered. + /// + /// # Example + /// + /// See the example for [`Automaton::is_special_state`] for how to use this + /// method correctly. + fn is_quit_state(&self, id: StateID) -> bool; + + /// Returns true if and only if the given identifier corresponds to a + /// match state. A match state is also referred to as a "final" state and + /// indicates that a match has been found. + /// + /// If all you care about is whether a particular pattern matches in the + /// input sequence, then a search routine can quit early as soon as the + /// machine enters a match state. However, if you're looking for the + /// standard "leftmost-first" match location, then search _must_ continue + /// until either the end of the input or until the machine enters a dead + /// state. (Since either condition implies that no other useful work can + /// be done.) Namely, when looking for the location of a match, then + /// search implementations should record the most recent location in + /// which a match state was entered, but otherwise continue executing the + /// search as normal. (The search may even leave the match state.) Once + /// the termination condition is reached, the most recently recorded match + /// location should be returned. + /// + /// Finally, one additional power given to match states in this crate + /// is that they are always associated with a specific pattern in order + /// to support multi-DFAs. See [`Automaton::match_pattern`] for more + /// details and an example for how to query the pattern associated with a + /// particular match state. + /// + /// # Example + /// + /// See the example for [`Automaton::is_special_state`] for how to use this + /// method correctly. + fn is_match_state(&self, id: StateID) -> bool; + + /// Returns true only if the given identifier corresponds to a start + /// state + /// + /// A start state is a state in which a DFA begins a search. + /// All searches begin in a start state. Moreover, since all matches are + /// delayed by one byte, a start state can never be a match state. + /// + /// The main role of a start state is, as mentioned, to be a starting + /// point for a DFA. This starting point is determined via one of + /// [`Automaton::start_state_forward`] or + /// [`Automaton::start_state_reverse`], depending on whether one is doing + /// a forward or a reverse search, respectively. + /// + /// A secondary use of start states is for prefix acceleration. Namely, + /// while executing a search, if one detects that you're in a start state, + /// then it may be faster to look for the next match of a prefix of the + /// pattern, if one exists. If a prefix exists and since all matches must + /// begin with that prefix, then skipping ahead to occurrences of that + /// prefix may be much faster than executing the DFA. + /// + /// As mentioned in the documentation for + /// [`is_special_state`](Automaton::is_special_state) implementations + /// _may_ always return false, even if the given identifier is a start + /// state. This is because knowing whether a state is a start state or not + /// is not necessary for correctness and is only treated as a potential + /// performance optimization. (For example, the implementations of this + /// trait in this crate will only return true when the given identifier + /// corresponds to a start state and when [specialization of start + /// states](crate::dfa::dense::Config::specialize_start_states) was enabled + /// during DFA construction. If start state specialization is disabled + /// (which is the default), then this method will always return false.) + /// + /// # Example + /// + /// This example shows how to implement your own search routine that does + /// a prefix search whenever the search enters a start state. + /// + /// Note that you do not need to implement your own search routine + /// to make use of prefilters like this. The search routines + /// provided by this crate already implement prefilter support via + /// the [`Prefilter`](crate::util::prefilter::Prefilter) trait. + /// A prefilter can be added to your search configuration with + /// [`dense::Config::prefilter`](crate::dfa::dense::Config::prefilter) for + /// dense and sparse DFAs in this crate. + /// + /// This example is meant to show how you might deal with prefilters in a + /// simplified case if you are implementing your own search routine. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{Automaton, dense}, + /// HalfMatch, MatchError, Input, + /// }; + /// + /// fn find_byte(slice: &[u8], at: usize, byte: u8) -> Option { + /// // Would be faster to use the memchr crate, but this is still + /// // faster than running through the DFA. + /// slice[at..].iter().position(|&b| b == byte).map(|i| at + i) + /// } + /// + /// fn find( + /// dfa: &A, + /// haystack: &[u8], + /// prefix_byte: Option, + /// ) -> Result, MatchError> { + /// // See the Automaton::is_special_state example for similar code + /// // with more comments. + /// + /// let mut state = dfa.start_state_forward(&Input::new(haystack))?; + /// let mut last_match = None; + /// let mut pos = 0; + /// while pos < haystack.len() { + /// let b = haystack[pos]; + /// state = dfa.next_state(state, b); + /// pos += 1; + /// if dfa.is_special_state(state) { + /// if dfa.is_match_state(state) { + /// last_match = Some(HalfMatch::new( + /// dfa.match_pattern(state, 0), + /// pos - 1, + /// )); + /// } else if dfa.is_dead_state(state) { + /// return Ok(last_match); + /// } else if dfa.is_quit_state(state) { + /// // It is possible to enter into a quit state after + /// // observing a match has occurred. In that case, we + /// // should return the match instead of an error. + /// if last_match.is_some() { + /// return Ok(last_match); + /// } + /// return Err(MatchError::quit(b, pos - 1)); + /// } else if dfa.is_start_state(state) { + /// // If we're in a start state and know all matches begin + /// // with a particular byte, then we can quickly skip to + /// // candidate matches without running the DFA through + /// // every byte inbetween. + /// if let Some(prefix_byte) = prefix_byte { + /// pos = match find_byte(haystack, pos, prefix_byte) { + /// Some(pos) => pos, + /// None => break, + /// }; + /// } + /// } + /// } + /// } + /// // Matches are always delayed by 1 byte, so we must explicitly walk + /// // the special "EOI" transition at the end of the search. + /// state = dfa.next_eoi_state(state); + /// if dfa.is_match_state(state) { + /// last_match = Some(HalfMatch::new( + /// dfa.match_pattern(state, 0), + /// haystack.len(), + /// )); + /// } + /// Ok(last_match) + /// } + /// + /// // In this example, it's obvious that all occurrences of our pattern + /// // begin with 'Z', so we pass in 'Z'. Note also that we need to + /// // enable start state specialization, or else it won't be possible to + /// // detect start states during a search. ('is_start_state' would always + /// // return false.) + /// let dfa = dense::DFA::builder() + /// .configure(dense::DFA::config().specialize_start_states(true)) + /// .build(r"Z[a-z]+")?; + /// let haystack = "123 foobar Zbaz quux".as_bytes(); + /// let mat = find(&dfa, haystack, Some(b'Z'))?.unwrap(); + /// assert_eq!(mat.pattern().as_usize(), 0); + /// assert_eq!(mat.offset(), 15); + /// + /// // But note that we don't need to pass in a prefix byte. If we don't, + /// // then the search routine does no acceleration. + /// let mat = find(&dfa, haystack, None)?.unwrap(); + /// assert_eq!(mat.pattern().as_usize(), 0); + /// assert_eq!(mat.offset(), 15); + /// + /// // However, if we pass an incorrect byte, then the prefix search will + /// // result in incorrect results. + /// assert_eq!(find(&dfa, haystack, Some(b'X'))?, None); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn is_start_state(&self, id: StateID) -> bool; + + /// Returns true if and only if the given identifier corresponds to an + /// accelerated state. + /// + /// An accelerated state is a special optimization + /// trick implemented by this crate. Namely, if + /// [`dense::Config::accelerate`](crate::dfa::dense::Config::accelerate) is + /// enabled (and it is by default), then DFAs generated by this crate will + /// tag states meeting certain characteristics as accelerated. States meet + /// this criteria whenever most of their transitions are self-transitions. + /// That is, transitions that loop back to the same state. When a small + /// number of transitions aren't self-transitions, then it follows that + /// there are only a small number of bytes that can cause the DFA to leave + /// that state. Thus, there is an opportunity to look for those bytes + /// using more optimized routines rather than continuing to run through + /// the DFA. This trick is similar to the prefilter idea described in + /// the documentation of [`Automaton::is_start_state`] with two main + /// differences: + /// + /// 1. It is more limited since acceleration only applies to single bytes. + /// This means states are rarely accelerated when Unicode mode is enabled + /// (which is enabled by default). + /// 2. It can occur anywhere in the DFA, which increases optimization + /// opportunities. + /// + /// Like the prefilter idea, the main downside (and a possible reason to + /// disable it) is that it can lead to worse performance in some cases. + /// Namely, if a state is accelerated for very common bytes, then the + /// overhead of checking for acceleration and using the more optimized + /// routines to look for those bytes can cause overall performance to be + /// worse than if acceleration wasn't enabled at all. + /// + /// A simple example of a regex that has an accelerated state is + /// `(?-u)[^a]+a`. Namely, the `[^a]+` sub-expression gets compiled down + /// into a single state where all transitions except for `a` loop back to + /// itself, and where `a` is the only transition (other than the special + /// EOI transition) that goes to some other state. Thus, this state can + /// be accelerated and implemented more efficiently by calling an + /// optimized routine like `memchr` with `a` as the needle. Notice that + /// the `(?-u)` to disable Unicode is necessary here, as without it, + /// `[^a]` will match any UTF-8 encoding of any Unicode scalar value other + /// than `a`. This more complicated expression compiles down to many DFA + /// states and the simple acceleration optimization is no longer available. + /// + /// Typically, this routine is used to guard calls to + /// [`Automaton::accelerator`], which returns the accelerated bytes for + /// the specified state. + fn is_accel_state(&self, id: StateID) -> bool; + + /// Returns the total number of patterns compiled into this DFA. + /// + /// In the case of a DFA that contains no patterns, this must return `0`. + /// + /// # Example + /// + /// This example shows the pattern length for a DFA that never matches: + /// + /// ``` + /// use regex_automata::dfa::{Automaton, dense::DFA}; + /// + /// let dfa: DFA> = DFA::never_match()?; + /// assert_eq!(dfa.pattern_len(), 0); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And another example for a DFA that matches at every position: + /// + /// ``` + /// use regex_automata::dfa::{Automaton, dense::DFA}; + /// + /// let dfa: DFA> = DFA::always_match()?; + /// assert_eq!(dfa.pattern_len(), 1); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And finally, a DFA that was constructed from multiple patterns: + /// + /// ``` + /// use regex_automata::dfa::{Automaton, dense::DFA}; + /// + /// let dfa = DFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?; + /// assert_eq!(dfa.pattern_len(), 3); + /// # Ok::<(), Box>(()) + /// ``` + fn pattern_len(&self) -> usize; + + /// Returns the total number of patterns that match in this state. + /// + /// If the given state is not a match state, then implementations may + /// panic. + /// + /// If the DFA was compiled with one pattern, then this must necessarily + /// always return `1` for all match states. + /// + /// Implementations must guarantee that [`Automaton::match_pattern`] can be + /// called with indices up to (but not including) the length returned by + /// this routine without panicking. + /// + /// # Panics + /// + /// Implementations are permitted to panic if the provided state ID does + /// not correspond to a match state. + /// + /// # Example + /// + /// This example shows a simple instance of implementing overlapping + /// matches. In particular, it shows not only how to determine how many + /// patterns have matched in a particular state, but also how to access + /// which specific patterns have matched. + /// + /// Notice that we must use + /// [`MatchKind::All`](crate::MatchKind::All) + /// when building the DFA. If we used + /// [`MatchKind::LeftmostFirst`](crate::MatchKind::LeftmostFirst) + /// instead, then the DFA would not be constructed in a way that + /// supports overlapping matches. (It would only report a single pattern + /// that matches at any particular point in time.) + /// + /// Another thing to take note of is the patterns used and the order in + /// which the pattern IDs are reported. In the example below, pattern `3` + /// is yielded first. Why? Because it corresponds to the match that + /// appears first. Namely, the `@` symbol is part of `\S+` but not part + /// of any of the other patterns. Since the `\S+` pattern has a match that + /// starts to the left of any other pattern, its ID is returned before any + /// other. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::{Automaton, dense}, Input, MatchKind}; + /// + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().match_kind(MatchKind::All)) + /// .build_many(&[ + /// r"[[:word:]]+", r"[a-z]+", r"[A-Z]+", r"[[:^space:]]+", + /// ])?; + /// let haystack = "@bar".as_bytes(); + /// + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. + /// let mut state = dfa.start_state_forward(&Input::new(haystack))?; + /// // Walk all the bytes in the haystack. + /// for &b in haystack { + /// state = dfa.next_state(state, b); + /// } + /// state = dfa.next_eoi_state(state); + /// + /// assert!(dfa.is_match_state(state)); + /// assert_eq!(dfa.match_len(state), 3); + /// // The following calls are guaranteed to not panic since `match_len` + /// // returned `3` above. + /// assert_eq!(dfa.match_pattern(state, 0).as_usize(), 3); + /// assert_eq!(dfa.match_pattern(state, 1).as_usize(), 0); + /// assert_eq!(dfa.match_pattern(state, 2).as_usize(), 1); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn match_len(&self, id: StateID) -> usize; + + /// Returns the pattern ID corresponding to the given match index in the + /// given state. + /// + /// See [`Automaton::match_len`] for an example of how to use this + /// method correctly. Note that if you know your DFA is compiled with a + /// single pattern, then this routine is never necessary since it will + /// always return a pattern ID of `0` for an index of `0` when `id` + /// corresponds to a match state. + /// + /// Typically, this routine is used when implementing an overlapping + /// search, as the example for `Automaton::match_len` does. + /// + /// # Panics + /// + /// If the state ID is not a match state or if the match index is out + /// of bounds for the given state, then this routine may either panic + /// or produce an incorrect result. If the state ID is correct and the + /// match index is correct, then this routine must always produce a valid + /// `PatternID`. + fn match_pattern(&self, id: StateID, index: usize) -> PatternID; + + /// Returns true if and only if this automaton can match the empty string. + /// When it returns false, all possible matches are guaranteed to have a + /// non-zero length. + /// + /// This is useful as cheap way to know whether code needs to handle the + /// case of a zero length match. This is particularly important when UTF-8 + /// modes are enabled, as when UTF-8 mode is enabled, empty matches that + /// split a codepoint must never be reported. This extra handling can + /// sometimes be costly, and since regexes matching an empty string are + /// somewhat rare, it can be beneficial to treat such regexes specially. + /// + /// # Example + /// + /// This example shows a few different DFAs and whether they match the + /// empty string or not. Notice the empty string isn't merely a matter + /// of a string of length literally `0`, but rather, whether a match can + /// occur between specific pairs of bytes. + /// + /// ``` + /// use regex_automata::{dfa::{dense::DFA, Automaton}, util::syntax}; + /// + /// // The empty regex matches the empty string. + /// let dfa = DFA::new("")?; + /// assert!(dfa.has_empty(), "empty matches empty"); + /// // The '+' repetition operator requires at least one match, and so + /// // does not match the empty string. + /// let dfa = DFA::new("a+")?; + /// assert!(!dfa.has_empty(), "+ does not match empty"); + /// // But the '*' repetition operator does. + /// let dfa = DFA::new("a*")?; + /// assert!(dfa.has_empty(), "* does match empty"); + /// // And wrapping '+' in an operator that can match an empty string also + /// // causes it to match the empty string too. + /// let dfa = DFA::new("(a+)*")?; + /// assert!(dfa.has_empty(), "+ inside of * matches empty"); + /// + /// // If a regex is just made of a look-around assertion, even if the + /// // assertion requires some kind of non-empty string around it (such as + /// // \b), then it is still treated as if it matches the empty string. + /// // Namely, if a match occurs of just a look-around assertion, then the + /// // match returned is empty. + /// let dfa = DFA::builder() + /// .configure(DFA::config().unicode_word_boundary(true)) + /// .syntax(syntax::Config::new().utf8(false)) + /// .build(r"^$\A\z\b\B(?-u:\b\B)")?; + /// assert!(dfa.has_empty(), "assertions match empty"); + /// // Even when an assertion is wrapped in a '+', it still matches the + /// // empty string. + /// let dfa = DFA::new(r"^+")?; + /// assert!(dfa.has_empty(), "+ of an assertion matches empty"); + /// + /// // An alternation with even one branch that can match the empty string + /// // is also said to match the empty string overall. + /// let dfa = DFA::new("foo|(bar)?|quux")?; + /// assert!(dfa.has_empty(), "alternations can match empty"); + /// + /// // An NFA that matches nothing does not match the empty string. + /// let dfa = DFA::new("[a&&b]")?; + /// assert!(!dfa.has_empty(), "never matching means not matching empty"); + /// // But if it's wrapped in something that doesn't require a match at + /// // all, then it can match the empty string! + /// let dfa = DFA::new("[a&&b]*")?; + /// assert!(dfa.has_empty(), "* on never-match still matches empty"); + /// // Since a '+' requires a match, using it on something that can never + /// // match will itself produce a regex that can never match anything, + /// // and thus does not match the empty string. + /// let dfa = DFA::new("[a&&b]+")?; + /// assert!(!dfa.has_empty(), "+ on never-match still matches nothing"); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn has_empty(&self) -> bool; + + /// Whether UTF-8 mode is enabled for this DFA or not. + /// + /// When UTF-8 mode is enabled, all matches reported by a DFA are + /// guaranteed to correspond to spans of valid UTF-8. This includes + /// zero-width matches. For example, the DFA must guarantee that the empty + /// regex will not match at the positions between code units in the UTF-8 + /// encoding of a single codepoint. + /// + /// See [`thompson::Config::utf8`](crate::nfa::thompson::Config::utf8) for + /// more information. + /// + /// # Example + /// + /// This example shows how UTF-8 mode can impact the match spans that may + /// be reported in certain cases. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton}, + /// nfa::thompson, + /// HalfMatch, Input, + /// }; + /// + /// // UTF-8 mode is enabled by default. + /// let re = DFA::new("")?; + /// assert!(re.is_utf8()); + /// let mut input = Input::new("☃"); + /// let got = re.try_search_fwd(&input)?; + /// assert_eq!(Some(HalfMatch::must(0, 0)), got); + /// + /// // Even though an empty regex matches at 1..1, our next match is + /// // 3..3 because 1..1 and 2..2 split the snowman codepoint (which is + /// // three bytes long). + /// input.set_start(1); + /// let got = re.try_search_fwd(&input)?; + /// assert_eq!(Some(HalfMatch::must(0, 3)), got); + /// + /// // But if we disable UTF-8, then we'll get matches at 1..1 and 2..2: + /// let re = DFA::builder() + /// .thompson(thompson::Config::new().utf8(false)) + /// .build("")?; + /// assert!(!re.is_utf8()); + /// let got = re.try_search_fwd(&input)?; + /// assert_eq!(Some(HalfMatch::must(0, 1)), got); + /// + /// input.set_start(2); + /// let got = re.try_search_fwd(&input)?; + /// assert_eq!(Some(HalfMatch::must(0, 2)), got); + /// + /// input.set_start(3); + /// let got = re.try_search_fwd(&input)?; + /// assert_eq!(Some(HalfMatch::must(0, 3)), got); + /// + /// input.set_start(4); + /// let got = re.try_search_fwd(&input)?; + /// assert_eq!(None, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn is_utf8(&self) -> bool; + + /// Returns true if and only if this DFA is limited to returning matches + /// whose start position is `0`. + /// + /// Note that if you're using DFAs provided by + /// this crate, then this is _orthogonal_ to + /// [`Config::start_kind`](crate::dfa::dense::Config::start_kind). + /// + /// This is useful in some cases because if a DFA is limited to producing + /// matches that start at offset `0`, then a reverse search is never + /// required for finding the start of a match. + /// + /// # Example + /// + /// ``` + /// use regex_automata::dfa::{dense::DFA, Automaton}; + /// + /// // The empty regex matches anywhere + /// let dfa = DFA::new("")?; + /// assert!(!dfa.is_always_start_anchored(), "empty matches anywhere"); + /// // 'a' matches anywhere. + /// let dfa = DFA::new("a")?; + /// assert!(!dfa.is_always_start_anchored(), "'a' matches anywhere"); + /// // '^' only matches at offset 0! + /// let dfa = DFA::new("^a")?; + /// assert!(dfa.is_always_start_anchored(), "'^a' matches only at 0"); + /// // But '(?m:^)' matches at 0 but at other offsets too. + /// let dfa = DFA::new("(?m:^)a")?; + /// assert!(!dfa.is_always_start_anchored(), "'(?m:^)a' matches anywhere"); + /// + /// # Ok::<(), Box>(()) + /// ``` + fn is_always_start_anchored(&self) -> bool; + + /// Return a slice of bytes to accelerate for the given state, if possible. + /// + /// If the given state has no accelerator, then an empty slice must be + /// returned. If `Automaton::is_accel_state` returns true for the given ID, + /// then this routine _must_ return a non-empty slice. But note that it is + /// not required for an implementation of this trait to ever return `true` + /// for `is_accel_state`, even if the state _could_ be accelerated. That + /// is, acceleration is an optional optimization. But the return values of + /// `is_accel_state` and `accelerator` must be in sync. + /// + /// If the given ID is not a valid state ID for this automaton, then + /// implementations may panic or produce incorrect results. + /// + /// See [`Automaton::is_accel_state`] for more details on state + /// acceleration. + /// + /// By default, this method will always return an empty slice. + /// + /// # Example + /// + /// This example shows a contrived case in which we build a regex that we + /// know is accelerated and extract the accelerator from a state. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{Automaton, dense}, + /// util::{primitives::StateID, syntax}, + /// }; + /// + /// let dfa = dense::Builder::new() + /// // We disable Unicode everywhere and permit the regex to match + /// // invalid UTF-8. e.g., [^abc] matches \xFF, which is not valid + /// // UTF-8. If we left Unicode enabled, [^abc] would match any UTF-8 + /// // encoding of any Unicode scalar value except for 'a', 'b' or 'c'. + /// // That translates to a much more complicated DFA, and also + /// // inhibits the 'accelerator' optimization that we are trying to + /// // demonstrate in this example. + /// .syntax(syntax::Config::new().unicode(false).utf8(false)) + /// .build("[^abc]+a")?; + /// + /// // Here we just pluck out the state that we know is accelerated. + /// // While the stride calculations are something that can be relied + /// // on by callers, the specific position of the accelerated state is + /// // implementation defined. + /// // + /// // N.B. We get '3' by inspecting the state machine using 'regex-cli'. + /// // e.g., try `regex-cli debug dense dfa -p '[^abc]+a' -BbUC`. + /// let id = StateID::new(3 * dfa.stride()).unwrap(); + /// let accelerator = dfa.accelerator(id); + /// // The `[^abc]+` sub-expression permits [a, b, c] to be accelerated. + /// assert_eq!(accelerator, &[b'a', b'b', b'c']); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + fn accelerator(&self, _id: StateID) -> &[u8] { + &[] + } + + /// Returns the prefilter associated with a DFA, if one exists. + /// + /// The default implementation of this trait always returns `None`. And + /// indeed, it is always correct to return `None`. + /// + /// For DFAs in this crate, a prefilter can be attached to a DFA via + /// [`dense::Config::prefilter`](crate::dfa::dense::Config::prefilter). + /// + /// Do note that prefilters are not serialized by DFAs in this crate. + /// So if you deserialize a DFA that had a prefilter attached to it + /// at serialization time, then it will not have a prefilter after + /// deserialization. + #[inline] + fn get_prefilter(&self) -> Option<&Prefilter> { + None + } + + /// Executes a forward search and returns the end position of the leftmost + /// match that is found. If no match exists, then `None` is returned. + /// + /// In particular, this method continues searching even after it enters + /// a match state. The search only terminates once it has reached the + /// end of the input or when it has entered a dead or quit state. Upon + /// termination, the position of the last byte seen while still in a match + /// state is returned. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Notes for implementors + /// + /// Implementors of this trait are not required to implement any particular + /// match semantics (such as leftmost-first), which are instead manifest in + /// the DFA's transitions. But this search routine should behave as a + /// general "leftmost" search. + /// + /// In particular, this method must continue searching even after it enters + /// a match state. The search should only terminate once it has reached + /// the end of the input or when it has entered a dead or quit state. Upon + /// termination, the position of the last byte seen while still in a match + /// state is returned. + /// + /// Since this trait provides an implementation for this method by default, + /// it's unlikely that one will need to implement this. + /// + /// # Example + /// + /// This example shows how to use this method with a + /// [`dense::DFA`](crate::dfa::dense::DFA). + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, HalfMatch, Input}; + /// + /// let dfa = dense::DFA::new("foo[0-9]+")?; + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new(b"foo12345"))?); + /// + /// // Even though a match is found after reading the first byte (`a`), + /// // the leftmost first match semantics demand that we find the earliest + /// // match that prefers earlier parts of the pattern over latter parts. + /// let dfa = dense::DFA::new("abc|a")?; + /// let expected = Some(HalfMatch::must(0, 3)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new(b"abc"))?); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specific pattern search + /// + /// This example shows how to build a multi-DFA that permits searching for + /// specific patterns. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// dfa::{Automaton, dense}, + /// Anchored, HalfMatch, PatternID, Input, + /// }; + /// + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().starts_for_each_pattern(true)) + /// .build_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?; + /// let haystack = "foo123".as_bytes(); + /// + /// // Since we are using the default leftmost-first match and both + /// // patterns match at the same starting position, only the first pattern + /// // will be returned in this case when doing a search for any of the + /// // patterns. + /// let expected = Some(HalfMatch::must(0, 6)); + /// let got = dfa.try_search_fwd(&Input::new(haystack))?; + /// assert_eq!(expected, got); + /// + /// // But if we want to check whether some other pattern matches, then we + /// // can provide its pattern ID. + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(1))); + /// let expected = Some(HalfMatch::must(1, 6)); + /// let got = dfa.try_search_fwd(&input)?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specifying the bounds of a search + /// + /// This example shows how providing the bounds of a search can produce + /// different results than simply sub-slicing the haystack. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, HalfMatch, Input}; + /// + /// // N.B. We disable Unicode here so that we use a simple ASCII word + /// // boundary. Alternatively, we could enable heuristic support for + /// // Unicode word boundaries. + /// let dfa = dense::DFA::new(r"(?-u)\b[0-9]{3}\b")?; + /// let haystack = "foo123bar".as_bytes(); + /// + /// // Since we sub-slice the haystack, the search doesn't know about the + /// // larger context and assumes that `123` is surrounded by word + /// // boundaries. And of course, the match position is reported relative + /// // to the sub-slice as well, which means we get `3` instead of `6`. + /// let input = Input::new(&haystack[3..6]); + /// let expected = Some(HalfMatch::must(0, 3)); + /// let got = dfa.try_search_fwd(&input)?; + /// assert_eq!(expected, got); + /// + /// // But if we provide the bounds of the search within the context of the + /// // entire haystack, then the search can take the surrounding context + /// // into account. (And if we did find a match, it would be reported + /// // as a valid offset into `haystack` instead of its sub-slice.) + /// let input = Input::new(haystack).range(3..6); + /// let expected = None; + /// let got = dfa.try_search_fwd(&input)?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + fn try_search_fwd( + &self, + input: &Input<'_>, + ) -> Result, MatchError> { + let utf8empty = self.has_empty() && self.is_utf8(); + let hm = match search::find_fwd(&self, input)? { + None => return Ok(None), + Some(hm) if !utf8empty => return Ok(Some(hm)), + Some(hm) => hm, + }; + // We get to this point when we know our DFA can match the empty string + // AND when UTF-8 mode is enabled. In this case, we skip any matches + // whose offset splits a codepoint. Such a match is necessarily a + // zero-width match, because UTF-8 mode requires the underlying NFA + // to be built such that all non-empty matches span valid UTF-8. + // Therefore, any match that ends in the middle of a codepoint cannot + // be part of a span of valid UTF-8 and thus must be an empty match. + // In such cases, we skip it, so as not to report matches that split a + // codepoint. + // + // Note that this is not a checked assumption. Callers *can* provide an + // NFA with UTF-8 mode enabled but produces non-empty matches that span + // invalid UTF-8. But doing so is documented to result in unspecified + // behavior. + empty::skip_splits_fwd(input, hm, hm.offset(), |input| { + let got = search::find_fwd(&self, input)?; + Ok(got.map(|hm| (hm, hm.offset()))) + }) + } + + /// Executes a reverse search and returns the start of the position of the + /// leftmost match that is found. If no match exists, then `None` is + /// returned. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to use this method with a + /// [`dense::DFA`](crate::dfa::dense::DFA). In particular, this + /// routine is principally useful when used in conjunction with the + /// [`nfa::thompson::Config::reverse`](crate::nfa::thompson::Config::reverse) + /// configuration. In general, it's unlikely to be correct to use + /// both `try_search_fwd` and `try_search_rev` with the same DFA since + /// any particular DFA will only support searching in one direction with + /// respect to the pattern. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson, + /// dfa::{Automaton, dense}, + /// HalfMatch, Input, + /// }; + /// + /// let dfa = dense::Builder::new() + /// .thompson(thompson::Config::new().reverse(true)) + /// .build("foo[0-9]+")?; + /// let expected = Some(HalfMatch::must(0, 0)); + /// assert_eq!(expected, dfa.try_search_rev(&Input::new(b"foo12345"))?); + /// + /// // Even though a match is found after reading the last byte (`c`), + /// // the leftmost first match semantics demand that we find the earliest + /// // match that prefers earlier parts of the pattern over latter parts. + /// let dfa = dense::Builder::new() + /// .thompson(thompson::Config::new().reverse(true)) + /// .build("abc|c")?; + /// let expected = Some(HalfMatch::must(0, 0)); + /// assert_eq!(expected, dfa.try_search_rev(&Input::new(b"abc"))?); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: UTF-8 mode + /// + /// This examples demonstrates that UTF-8 mode applies to reverse + /// DFAs. When UTF-8 mode is enabled in the underlying NFA, then all + /// matches reported must correspond to valid UTF-8 spans. This includes + /// prohibiting zero-width matches that split a codepoint. + /// + /// UTF-8 mode is enabled by default. Notice below how the only zero-width + /// matches reported are those at UTF-8 boundaries: + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton}, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .thompson(thompson::Config::new().reverse(true)) + /// .build(r"")?; + /// + /// // Run the reverse DFA to collect all matches. + /// let mut input = Input::new("☃"); + /// let mut matches = vec![]; + /// loop { + /// match dfa.try_search_rev(&input)? { + /// None => break, + /// Some(hm) => { + /// matches.push(hm); + /// if hm.offset() == 0 || input.end() == 0 { + /// break; + /// } else if hm.offset() < input.end() { + /// input.set_end(hm.offset()); + /// } else { + /// // This is only necessary to handle zero-width + /// // matches, which of course occur in this example. + /// // Without this, the search would never advance + /// // backwards beyond the initial match. + /// input.set_end(input.end() - 1); + /// } + /// } + /// } + /// } + /// + /// // No matches split a codepoint. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Now let's look at the same example, but with UTF-8 mode on the + /// original NFA disabled (which results in disabling UTF-8 mode on the + /// DFA): + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton}, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .thompson(thompson::Config::new().reverse(true).utf8(false)) + /// .build(r"")?; + /// + /// // Run the reverse DFA to collect all matches. + /// let mut input = Input::new("☃"); + /// let mut matches = vec![]; + /// loop { + /// match dfa.try_search_rev(&input)? { + /// None => break, + /// Some(hm) => { + /// matches.push(hm); + /// if hm.offset() == 0 || input.end() == 0 { + /// break; + /// } else if hm.offset() < input.end() { + /// input.set_end(hm.offset()); + /// } else { + /// // This is only necessary to handle zero-width + /// // matches, which of course occur in this example. + /// // Without this, the search would never advance + /// // backwards beyond the initial match. + /// input.set_end(input.end() - 1); + /// } + /// } + /// } + /// } + /// + /// // No matches split a codepoint. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(0, 2), + /// HalfMatch::must(0, 1), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + fn try_search_rev( + &self, + input: &Input<'_>, + ) -> Result, MatchError> { + let utf8empty = self.has_empty() && self.is_utf8(); + let hm = match search::find_rev(self, input)? { + None => return Ok(None), + Some(hm) if !utf8empty => return Ok(Some(hm)), + Some(hm) => hm, + }; + empty::skip_splits_rev(input, hm, hm.offset(), |input| { + let got = search::find_rev(self, input)?; + Ok(got.map(|hm| (hm, hm.offset()))) + }) + } + + /// Executes an overlapping forward search. Matches, if one exists, can be + /// obtained via the [`OverlappingState::get_match`] method. + /// + /// This routine is principally only useful when searching for multiple + /// patterns on inputs where multiple patterns may match the same regions + /// of text. In particular, callers must preserve the automaton's search + /// state from prior calls so that the implementation knows where the last + /// match occurred. + /// + /// When using this routine to implement an iterator of overlapping + /// matches, the `start` of the search should always be set to the end + /// of the last match. If more patterns match at the previous location, + /// then they will be immediately returned. (This is tracked by the given + /// overlapping state.) Otherwise, the search continues at the starting + /// position given. + /// + /// If for some reason you want the search to forget about its previous + /// state and restart the search at a particular position, then setting the + /// state to [`OverlappingState::start`] will accomplish that. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to run a basic overlapping search with a + /// [`dense::DFA`](crate::dfa::dense::DFA). Notice that we build the + /// automaton with a `MatchKind::All` configuration. Overlapping searches + /// are unlikely to work as one would expect when using the default + /// `MatchKind::LeftmostFirst` match semantics, since leftmost-first + /// matching is fundamentally incompatible with overlapping searches. + /// Namely, overlapping searches need to report matches as they are seen, + /// where as leftmost-first searches will continue searching even after a + /// match has been observed in order to find the conventional end position + /// of the match. More concretely, leftmost-first searches use dead states + /// to terminate a search after a specific match can no longer be extended. + /// Overlapping searches instead do the opposite by continuing the search + /// to find totally new matches (potentially of other patterns). + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// dfa::{Automaton, OverlappingState, dense}, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().match_kind(MatchKind::All)) + /// .build_many(&[r"[[:word:]]+$", r"[[:^space:]]+$"])?; + /// let haystack = "@foo"; + /// let mut state = OverlappingState::start(); + /// + /// let expected = Some(HalfMatch::must(1, 4)); + /// dfa.try_search_overlapping_fwd(&Input::new(haystack), &mut state)?; + /// assert_eq!(expected, state.get_match()); + /// + /// // The first pattern also matches at the same position, so re-running + /// // the search will yield another match. Notice also that the first + /// // pattern is returned after the second. This is because the second + /// // pattern begins its match before the first, is therefore an earlier + /// // match and is thus reported first. + /// let expected = Some(HalfMatch::must(0, 4)); + /// dfa.try_search_overlapping_fwd(&Input::new(haystack), &mut state)?; + /// assert_eq!(expected, state.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + fn try_search_overlapping_fwd( + &self, + input: &Input<'_>, + state: &mut OverlappingState, + ) -> Result<(), MatchError> { + let utf8empty = self.has_empty() && self.is_utf8(); + search::find_overlapping_fwd(self, input, state)?; + match state.get_match() { + None => Ok(()), + Some(_) if !utf8empty => Ok(()), + Some(_) => skip_empty_utf8_splits_overlapping( + input, + state, + |input, state| { + search::find_overlapping_fwd(self, input, state) + }, + ), + } + } + + /// Executes a reverse overlapping forward search. Matches, if one exists, + /// can be obtained via the [`OverlappingState::get_match`] method. + /// + /// When using this routine to implement an iterator of overlapping + /// matches, the `start` of the search should remain invariant throughout + /// iteration. The `OverlappingState` given to the search will keep track + /// of the current position of the search. (This is because multiple + /// matches may be reported at the same position, so only the search + /// implementation itself knows when to advance the position.) + /// + /// If for some reason you want the search to forget about its previous + /// state and restart the search at a particular position, then setting the + /// state to [`OverlappingState::start`] will accomplish that. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example: UTF-8 mode + /// + /// This examples demonstrates that UTF-8 mode applies to reverse + /// DFAs. When UTF-8 mode is enabled in the underlying NFA, then all + /// matches reported must correspond to valid UTF-8 spans. This includes + /// prohibiting zero-width matches that split a codepoint. + /// + /// UTF-8 mode is enabled by default. Notice below how the only zero-width + /// matches reported are those at UTF-8 boundaries: + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton, OverlappingState}, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .thompson(thompson::Config::new().reverse(true)) + /// .build_many(&[r"", r"☃"])?; + /// + /// // Run the reverse DFA to collect all matches. + /// let input = Input::new("☃"); + /// let mut state = OverlappingState::start(); + /// let mut matches = vec![]; + /// loop { + /// dfa.try_search_overlapping_rev(&input, &mut state)?; + /// match state.get_match() { + /// None => break, + /// Some(hm) => matches.push(hm), + /// } + /// } + /// + /// // No matches split a codepoint. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(1, 0), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Now let's look at the same example, but with UTF-8 mode on the + /// original NFA disabled (which results in disabling UTF-8 mode on the + /// DFA): + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton, OverlappingState}, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .thompson(thompson::Config::new().reverse(true).utf8(false)) + /// .build_many(&[r"", r"☃"])?; + /// + /// // Run the reverse DFA to collect all matches. + /// let input = Input::new("☃"); + /// let mut state = OverlappingState::start(); + /// let mut matches = vec![]; + /// loop { + /// dfa.try_search_overlapping_rev(&input, &mut state)?; + /// match state.get_match() { + /// None => break, + /// Some(hm) => matches.push(hm), + /// } + /// } + /// + /// // Now *all* positions match, even within a codepoint, + /// // because we lifted the requirement that matches + /// // correspond to valid UTF-8 spans. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(0, 2), + /// HalfMatch::must(0, 1), + /// HalfMatch::must(1, 0), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + fn try_search_overlapping_rev( + &self, + input: &Input<'_>, + state: &mut OverlappingState, + ) -> Result<(), MatchError> { + let utf8empty = self.has_empty() && self.is_utf8(); + search::find_overlapping_rev(self, input, state)?; + match state.get_match() { + None => Ok(()), + Some(_) if !utf8empty => Ok(()), + Some(_) => skip_empty_utf8_splits_overlapping( + input, + state, + |input, state| { + search::find_overlapping_rev(self, input, state) + }, + ), + } + } + + /// Writes the set of patterns that match anywhere in the given search + /// configuration to `patset`. If multiple patterns match at the same + /// position and the underlying DFA supports overlapping matches, then all + /// matching patterns are written to the given set. + /// + /// Unless all of the patterns in this DFA are anchored, then generally + /// speaking, this will visit every byte in the haystack. + /// + /// This search routine *does not* clear the pattern set. This gives some + /// flexibility to the caller (e.g., running multiple searches with the + /// same pattern set), but does make the API bug-prone if you're reusing + /// the same pattern set for multiple searches but intended them to be + /// independent. + /// + /// If a pattern ID matched but the given `PatternSet` does not have + /// sufficient capacity to store it, then it is not inserted and silently + /// dropped. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to find all matching patterns in a haystack, + /// even when some patterns match at the same position as other patterns. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// dfa::{Automaton, dense::DFA}, + /// Input, MatchKind, PatternSet, + /// }; + /// + /// let patterns = &[ + /// r"[[:word:]]+", + /// r"[0-9]+", + /// r"[[:alpha:]]+", + /// r"foo", + /// r"bar", + /// r"barfoo", + /// r"foobar", + /// ]; + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .build_many(patterns)?; + /// + /// let input = Input::new("foobar"); + /// let mut patset = PatternSet::new(dfa.pattern_len()); + /// dfa.try_which_overlapping_matches(&input, &mut patset)?; + /// let expected = vec![0, 2, 3, 4, 6]; + /// let got: Vec = patset.iter().map(|p| p.as_usize()).collect(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "alloc")] + #[inline] + fn try_which_overlapping_matches( + &self, + input: &Input<'_>, + patset: &mut PatternSet, + ) -> Result<(), MatchError> { + let mut state = OverlappingState::start(); + while let Some(m) = { + self.try_search_overlapping_fwd(input, &mut state)?; + state.get_match() + } { + let _ = patset.insert(m.pattern()); + // There's nothing left to find, so we can stop. Or the caller + // asked us to. + if patset.is_full() || input.get_earliest() { + break; + } + } + Ok(()) + } +} + +unsafe impl<'a, A: Automaton + ?Sized> Automaton for &'a A { + #[inline] + fn next_state(&self, current: StateID, input: u8) -> StateID { + (**self).next_state(current, input) + } + + #[inline] + unsafe fn next_state_unchecked( + &self, + current: StateID, + input: u8, + ) -> StateID { + (**self).next_state_unchecked(current, input) + } + + #[inline] + fn next_eoi_state(&self, current: StateID) -> StateID { + (**self).next_eoi_state(current) + } + + #[inline] + fn start_state( + &self, + config: &start::Config, + ) -> Result { + (**self).start_state(config) + } + + #[inline] + fn start_state_forward( + &self, + input: &Input<'_>, + ) -> Result { + (**self).start_state_forward(input) + } + + #[inline] + fn start_state_reverse( + &self, + input: &Input<'_>, + ) -> Result { + (**self).start_state_reverse(input) + } + + #[inline] + fn universal_start_state(&self, mode: Anchored) -> Option { + (**self).universal_start_state(mode) + } + + #[inline] + fn is_special_state(&self, id: StateID) -> bool { + (**self).is_special_state(id) + } + + #[inline] + fn is_dead_state(&self, id: StateID) -> bool { + (**self).is_dead_state(id) + } + + #[inline] + fn is_quit_state(&self, id: StateID) -> bool { + (**self).is_quit_state(id) + } + + #[inline] + fn is_match_state(&self, id: StateID) -> bool { + (**self).is_match_state(id) + } + + #[inline] + fn is_start_state(&self, id: StateID) -> bool { + (**self).is_start_state(id) + } + + #[inline] + fn is_accel_state(&self, id: StateID) -> bool { + (**self).is_accel_state(id) + } + + #[inline] + fn pattern_len(&self) -> usize { + (**self).pattern_len() + } + + #[inline] + fn match_len(&self, id: StateID) -> usize { + (**self).match_len(id) + } + + #[inline] + fn match_pattern(&self, id: StateID, index: usize) -> PatternID { + (**self).match_pattern(id, index) + } + + #[inline] + fn has_empty(&self) -> bool { + (**self).has_empty() + } + + #[inline] + fn is_utf8(&self) -> bool { + (**self).is_utf8() + } + + #[inline] + fn is_always_start_anchored(&self) -> bool { + (**self).is_always_start_anchored() + } + + #[inline] + fn accelerator(&self, id: StateID) -> &[u8] { + (**self).accelerator(id) + } + + #[inline] + fn get_prefilter(&self) -> Option<&Prefilter> { + (**self).get_prefilter() + } + + #[inline] + fn try_search_fwd( + &self, + input: &Input<'_>, + ) -> Result, MatchError> { + (**self).try_search_fwd(input) + } + + #[inline] + fn try_search_rev( + &self, + input: &Input<'_>, + ) -> Result, MatchError> { + (**self).try_search_rev(input) + } + + #[inline] + fn try_search_overlapping_fwd( + &self, + input: &Input<'_>, + state: &mut OverlappingState, + ) -> Result<(), MatchError> { + (**self).try_search_overlapping_fwd(input, state) + } + + #[inline] + fn try_search_overlapping_rev( + &self, + input: &Input<'_>, + state: &mut OverlappingState, + ) -> Result<(), MatchError> { + (**self).try_search_overlapping_rev(input, state) + } + + #[cfg(feature = "alloc")] + #[inline] + fn try_which_overlapping_matches( + &self, + input: &Input<'_>, + patset: &mut PatternSet, + ) -> Result<(), MatchError> { + (**self).try_which_overlapping_matches(input, patset) + } +} + +/// Represents the current state of an overlapping search. +/// +/// This is used for overlapping searches since they need to know something +/// about the previous search. For example, when multiple patterns match at the +/// same position, this state tracks the last reported pattern so that the next +/// search knows whether to report another matching pattern or continue with +/// the search at the next position. Additionally, it also tracks which state +/// the last search call terminated in. +/// +/// This type provides little introspection capabilities. The only thing a +/// caller can do is construct it and pass it around to permit search routines +/// to use it to track state, and also ask whether a match has been found. +/// +/// Callers should always provide a fresh state constructed via +/// [`OverlappingState::start`] when starting a new search. Reusing state from +/// a previous search may result in incorrect results. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct OverlappingState { + /// The match reported by the most recent overlapping search to use this + /// state. + /// + /// If a search does not find any matches, then it is expected to clear + /// this value. + pub(crate) mat: Option, + /// The state ID of the state at which the search was in when the call + /// terminated. When this is a match state, `last_match` must be set to a + /// non-None value. + /// + /// A `None` value indicates the start state of the corresponding + /// automaton. We cannot use the actual ID, since any one automaton may + /// have many start states, and which one is in use depends on several + /// search-time factors. + pub(crate) id: Option, + /// The position of the search. + /// + /// When `id` is None (i.e., we are starting a search), this is set to + /// the beginning of the search as given by the caller regardless of its + /// current value. Subsequent calls to an overlapping search pick up at + /// this offset. + pub(crate) at: usize, + /// The index into the matching patterns of the next match to report if the + /// current state is a match state. Note that this may be 1 greater than + /// the total number of matches to report for the current match state. (In + /// which case, no more matches should be reported at the current position + /// and the search should advance to the next position.) + pub(crate) next_match_index: Option, + /// This is set to true when a reverse overlapping search has entered its + /// EOI transitions. + /// + /// This isn't used in a forward search because it knows to stop once the + /// position exceeds the end of the search range. In a reverse search, + /// since we use unsigned offsets, we don't "know" once we've gone past + /// `0`. So the only way to detect it is with this extra flag. The reverse + /// overlapping search knows to terminate specifically after it has + /// reported all matches after following the EOI transition. + pub(crate) rev_eoi: bool, +} + +impl OverlappingState { + /// Create a new overlapping state that begins at the start state of any + /// automaton. + pub fn start() -> OverlappingState { + OverlappingState { + mat: None, + id: None, + at: 0, + next_match_index: None, + rev_eoi: false, + } + } + + /// Return the match result of the most recent search to execute with this + /// state. + /// + /// A searches will clear this result automatically, such that if no + /// match is found, this will correctly report `None`. + pub fn get_match(&self) -> Option { + self.mat + } +} + +/// An error that can occur when computing the start state for a search. +/// +/// Computing a start state can fail for a few reasons, either based on +/// incorrect configuration or even based on whether the look-behind byte +/// triggers a quit state. Typically one does not need to handle this error +/// if you're using [`Automaton::start_state_forward`] (or its reverse +/// counterpart), as that routine automatically converts `StartError` to a +/// [`MatchError`] for you. +/// +/// This error may be returned by the [`Automaton::start_state`] routine. +/// +/// This error implements the `std::error::Error` trait when the `std` feature +/// is enabled. +/// +/// This error is marked as non-exhaustive. New variants may be added in a +/// semver compatible release. +#[non_exhaustive] +#[derive(Clone, Debug)] +pub enum StartError { + /// An error that occurs when a starting configuration's look-behind byte + /// is in this DFA's quit set. + Quit { + /// The quit byte that was found. + byte: u8, + }, + /// An error that occurs when the caller requests an anchored mode that + /// isn't supported by the DFA. + UnsupportedAnchored { + /// The anchored mode given that is unsupported. + mode: Anchored, + }, +} + +impl StartError { + pub(crate) fn quit(byte: u8) -> StartError { + StartError::Quit { byte } + } + + pub(crate) fn unsupported_anchored(mode: Anchored) -> StartError { + StartError::UnsupportedAnchored { mode } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for StartError {} + +impl core::fmt::Display for StartError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match *self { + StartError::Quit { byte } => write!( + f, + "error computing start state because the look-behind byte \ + {:?} triggered a quit state", + crate::util::escape::DebugByte(byte), + ), + StartError::UnsupportedAnchored { mode: Anchored::Yes } => { + write!( + f, + "error computing start state because \ + anchored searches are not supported or enabled" + ) + } + StartError::UnsupportedAnchored { mode: Anchored::No } => { + write!( + f, + "error computing start state because \ + unanchored searches are not supported or enabled" + ) + } + StartError::UnsupportedAnchored { + mode: Anchored::Pattern(pid), + } => { + write!( + f, + "error computing start state because \ + anchored searches for a specific pattern ({}) \ + are not supported or enabled", + pid.as_usize(), + ) + } + } + } +} + +/// Runs the given overlapping `search` function (forwards or backwards) until +/// a match is found whose offset does not split a codepoint. +/// +/// This is *not* always correct to call. It should only be called when the DFA +/// has UTF-8 mode enabled *and* it can produce zero-width matches. Calling +/// this when both of those things aren't true might result in legitimate +/// matches getting skipped. +#[cold] +#[inline(never)] +fn skip_empty_utf8_splits_overlapping( + input: &Input<'_>, + state: &mut OverlappingState, + mut search: F, +) -> Result<(), MatchError> +where + F: FnMut(&Input<'_>, &mut OverlappingState) -> Result<(), MatchError>, +{ + // Note that this routine works for forwards and reverse searches + // even though there's no code here to handle those cases. That's + // because overlapping searches drive themselves to completion via + // `OverlappingState`. So all we have to do is push it until no matches are + // found. + + let mut hm = match state.get_match() { + None => return Ok(()), + Some(hm) => hm, + }; + if input.get_anchored().is_anchored() { + if !input.is_char_boundary(hm.offset()) { + state.mat = None; + } + return Ok(()); + } + while !input.is_char_boundary(hm.offset()) { + search(input, state)?; + hm = match state.get_match() { + None => return Ok(()), + Some(hm) => hm, + }; + } + Ok(()) +} + +/// Write a prefix "state" indicator for fmt::Debug impls. +/// +/// Specifically, this tries to succinctly distinguish the different types of +/// states: dead states, quit states, accelerated states, start states and +/// match states. It even accounts for the possible overlappings of different +/// state types. +pub(crate) fn fmt_state_indicator( + f: &mut core::fmt::Formatter<'_>, + dfa: A, + id: StateID, +) -> core::fmt::Result { + if dfa.is_dead_state(id) { + write!(f, "D")?; + if dfa.is_start_state(id) { + write!(f, ">")?; + } else { + write!(f, " ")?; + } + } else if dfa.is_quit_state(id) { + write!(f, "Q ")?; + } else if dfa.is_start_state(id) { + if dfa.is_accel_state(id) { + write!(f, "A>")?; + } else { + write!(f, " >")?; + } + } else if dfa.is_match_state(id) { + if dfa.is_accel_state(id) { + write!(f, "A*")?; + } else { + write!(f, " *")?; + } + } else if dfa.is_accel_state(id) { + write!(f, "A ")?; + } else { + write!(f, " ")?; + } + Ok(()) +} + +#[cfg(all(test, feature = "syntax", feature = "dfa-build"))] +mod tests { + // A basic test ensuring that our Automaton trait is object safe. (This is + // the main reason why we don't define the search routines as generic over + // Into.) + #[test] + fn object_safe() { + use crate::{ + dfa::{dense, Automaton}, + HalfMatch, Input, + }; + + let dfa = dense::DFA::new("abc").unwrap(); + let dfa: &dyn Automaton = &dfa; + assert_eq!( + Ok(Some(HalfMatch::must(0, 6))), + dfa.try_search_fwd(&Input::new(b"xyzabcxyz")), + ); + } +} diff --git a/vendor/regex-automata/src/dfa/dense.rs b/vendor/regex-automata/src/dfa/dense.rs new file mode 100644 index 0000000..6fc61dc --- /dev/null +++ b/vendor/regex-automata/src/dfa/dense.rs @@ -0,0 +1,5153 @@ +/*! +Types and routines specific to dense DFAs. + +This module is the home of [`dense::DFA`](DFA). + +This module also contains a [`dense::Builder`](Builder) and a +[`dense::Config`](Config) for building and configuring a dense DFA. +*/ + +#[cfg(feature = "dfa-build")] +use core::cmp; +use core::{convert::TryFrom, fmt, iter, mem::size_of, slice}; + +#[cfg(feature = "dfa-build")] +use alloc::{ + collections::{BTreeMap, BTreeSet}, + vec, + vec::Vec, +}; + +#[cfg(feature = "dfa-build")] +use crate::{ + dfa::{ + accel::Accel, determinize, minimize::Minimizer, remapper::Remapper, + sparse, + }, + nfa::thompson, + util::{look::LookMatcher, search::MatchKind}, +}; +use crate::{ + dfa::{ + accel::Accels, + automaton::{fmt_state_indicator, Automaton, StartError}, + special::Special, + start::StartKind, + DEAD, + }, + util::{ + alphabet::{self, ByteClasses, ByteSet}, + int::{Pointer, Usize}, + prefilter::Prefilter, + primitives::{PatternID, StateID}, + search::Anchored, + start::{self, Start, StartByteMap}, + wire::{self, DeserializeError, Endian, SerializeError}, + }, +}; + +/// The label that is pre-pended to a serialized DFA. +const LABEL: &str = "rust-regex-automata-dfa-dense"; + +/// The format version of dense regexes. This version gets incremented when a +/// change occurs. A change may not necessarily be a breaking change, but the +/// version does permit good error messages in the case where a breaking change +/// is made. +const VERSION: u32 = 2; + +/// The configuration used for compiling a dense DFA. +/// +/// As a convenience, [`DFA::config`] is an alias for [`Config::new`]. The +/// advantage of the former is that it often lets you avoid importing the +/// `Config` type directly. +/// +/// A dense DFA configuration is a simple data object that is typically used +/// with [`dense::Builder::configure`](self::Builder::configure). +/// +/// The default configuration guarantees that a search will never return +/// a "quit" error, although it is possible for a search to fail if +/// [`Config::starts_for_each_pattern`] wasn't enabled (which it is +/// not by default) and an [`Anchored::Pattern`] mode is requested via +/// [`Input`](crate::Input). +#[cfg(feature = "dfa-build")] +#[derive(Clone, Debug, Default)] +pub struct Config { + // As with other configuration types in this crate, we put all our knobs + // in options so that we can distinguish between "default" and "not set." + // This makes it possible to easily combine multiple configurations + // without default values overwriting explicitly specified values. See the + // 'overwrite' method. + // + // For docs on the fields below, see the corresponding method setters. + accelerate: Option, + pre: Option>, + minimize: Option, + match_kind: Option, + start_kind: Option, + starts_for_each_pattern: Option, + byte_classes: Option, + unicode_word_boundary: Option, + quitset: Option, + specialize_start_states: Option, + dfa_size_limit: Option>, + determinize_size_limit: Option>, +} + +#[cfg(feature = "dfa-build")] +impl Config { + /// Return a new default dense DFA compiler configuration. + pub fn new() -> Config { + Config::default() + } + + /// Enable state acceleration. + /// + /// When enabled, DFA construction will analyze each state to determine + /// whether it is eligible for simple acceleration. Acceleration typically + /// occurs when most of a state's transitions loop back to itself, leaving + /// only a select few bytes that will exit the state. When this occurs, + /// other routines like `memchr` can be used to look for those bytes which + /// may be much faster than traversing the DFA. + /// + /// Callers may elect to disable this if consistent performance is more + /// desirable than variable performance. Namely, acceleration can sometimes + /// make searching slower than it otherwise would be if the transitions + /// that leave accelerated states are traversed frequently. + /// + /// See [`Automaton::accelerator`] for an example. + /// + /// This is enabled by default. + pub fn accelerate(mut self, yes: bool) -> Config { + self.accelerate = Some(yes); + self + } + + /// Set a prefilter to be used whenever a start state is entered. + /// + /// A [`Prefilter`] in this context is meant to accelerate searches by + /// looking for literal prefixes that every match for the corresponding + /// pattern (or patterns) must start with. Once a prefilter produces a + /// match, the underlying search routine continues on to try and confirm + /// the match. + /// + /// Be warned that setting a prefilter does not guarantee that the search + /// will be faster. While it's usually a good bet, if the prefilter + /// produces a lot of false positive candidates (i.e., positions matched + /// by the prefilter but not by the regex), then the overall result can + /// be slower than if you had just executed the regex engine without any + /// prefilters. + /// + /// Note that unless [`Config::specialize_start_states`] has been + /// explicitly set, then setting this will also enable (when `pre` is + /// `Some`) or disable (when `pre` is `None`) start state specialization. + /// This occurs because without start state specialization, a prefilter + /// is likely to be less effective. And without a prefilter, start state + /// specialization is usually pointless. + /// + /// **WARNING:** Note that prefilters are not preserved as part of + /// serialization. Serializing a DFA will drop its prefilter. + /// + /// By default no prefilter is set. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton}, + /// util::prefilter::Prefilter, + /// Input, HalfMatch, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "bar"]); + /// let re = DFA::builder() + /// .configure(DFA::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let input = Input::new("foo1 barfox bar"); + /// assert_eq!( + /// Some(HalfMatch::must(0, 11)), + /// re.try_search_fwd(&input)?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Be warned though that an incorrect prefilter can lead to incorrect + /// results! + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton}, + /// util::prefilter::Prefilter, + /// Input, HalfMatch, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "car"]); + /// let re = DFA::builder() + /// .configure(DFA::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let input = Input::new("foo1 barfox bar"); + /// assert_eq!( + /// // No match reported even though there clearly is one! + /// None, + /// re.try_search_fwd(&input)?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn prefilter(mut self, pre: Option) -> Config { + self.pre = Some(pre); + if self.specialize_start_states.is_none() { + self.specialize_start_states = + Some(self.get_prefilter().is_some()); + } + self + } + + /// Minimize the DFA. + /// + /// When enabled, the DFA built will be minimized such that it is as small + /// as possible. + /// + /// Whether one enables minimization or not depends on the types of costs + /// you're willing to pay and how much you care about its benefits. In + /// particular, minimization has worst case `O(n*k*logn)` time and `O(k*n)` + /// space, where `n` is the number of DFA states and `k` is the alphabet + /// size. In practice, minimization can be quite costly in terms of both + /// space and time, so it should only be done if you're willing to wait + /// longer to produce a DFA. In general, you might want a minimal DFA in + /// the following circumstances: + /// + /// 1. You would like to optimize for the size of the automaton. This can + /// manifest in one of two ways. Firstly, if you're converting the + /// DFA into Rust code (or a table embedded in the code), then a minimal + /// DFA will translate into a corresponding reduction in code size, and + /// thus, also the final compiled binary size. Secondly, if you are + /// building many DFAs and putting them on the heap, you'll be able to + /// fit more if they are smaller. Note though that building a minimal + /// DFA itself requires additional space; you only realize the space + /// savings once the minimal DFA is constructed (at which point, the + /// space used for minimization is freed). + /// 2. You've observed that a smaller DFA results in faster match + /// performance. Naively, this isn't guaranteed since there is no + /// inherent difference between matching with a bigger-than-minimal + /// DFA and a minimal DFA. However, a smaller DFA may make use of your + /// CPU's cache more efficiently. + /// 3. You are trying to establish an equivalence between regular + /// languages. The standard method for this is to build a minimal DFA + /// for each language and then compare them. If the DFAs are equivalent + /// (up to state renaming), then the languages are equivalent. + /// + /// Typically, minimization only makes sense as an offline process. That + /// is, one might minimize a DFA before serializing it to persistent + /// storage. In practical terms, minimization can take around an order of + /// magnitude more time than compiling the initial DFA via determinization. + /// + /// This option is disabled by default. + pub fn minimize(mut self, yes: bool) -> Config { + self.minimize = Some(yes); + self + } + + /// Set the desired match semantics. + /// + /// The default is [`MatchKind::LeftmostFirst`], which corresponds to the + /// match semantics of Perl-like regex engines. That is, when multiple + /// patterns would match at the same leftmost position, the pattern that + /// appears first in the concrete syntax is chosen. + /// + /// Currently, the only other kind of match semantics supported is + /// [`MatchKind::All`]. This corresponds to classical DFA construction + /// where all possible matches are added to the DFA. + /// + /// Typically, `All` is used when one wants to execute an overlapping + /// search and `LeftmostFirst` otherwise. In particular, it rarely makes + /// sense to use `All` with the various "leftmost" find routines, since the + /// leftmost routines depend on the `LeftmostFirst` automata construction + /// strategy. Specifically, `LeftmostFirst` adds dead states to the DFA + /// as a way to terminate the search and report a match. `LeftmostFirst` + /// also supports non-greedy matches using this strategy where as `All` + /// does not. + /// + /// # Example: overlapping search + /// + /// This example shows the typical use of `MatchKind::All`, which is to + /// report overlapping matches. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// dfa::{Automaton, OverlappingState, dense}, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().match_kind(MatchKind::All)) + /// .build_many(&[r"\w+$", r"\S+$"])?; + /// let input = Input::new("@foo"); + /// let mut state = OverlappingState::start(); + /// + /// let expected = Some(HalfMatch::must(1, 4)); + /// dfa.try_search_overlapping_fwd(&input, &mut state)?; + /// assert_eq!(expected, state.get_match()); + /// + /// // The first pattern also matches at the same position, so re-running + /// // the search will yield another match. Notice also that the first + /// // pattern is returned after the second. This is because the second + /// // pattern begins its match before the first, is therefore an earlier + /// // match and is thus reported first. + /// let expected = Some(HalfMatch::must(0, 4)); + /// dfa.try_search_overlapping_fwd(&input, &mut state)?; + /// assert_eq!(expected, state.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: reverse automaton to find start of match + /// + /// Another example for using `MatchKind::All` is for constructing a + /// reverse automaton to find the start of a match. `All` semantics are + /// used for this in order to find the longest possible match, which + /// corresponds to the leftmost starting position. + /// + /// Note that if you need the starting position then + /// [`dfa::regex::Regex`](crate::dfa::regex::Regex) will handle this for + /// you, so it's usually not necessary to do this yourself. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense, Automaton, StartKind}, + /// nfa::thompson::NFA, + /// Anchored, HalfMatch, Input, MatchKind, + /// }; + /// + /// let haystack = "123foobar456".as_bytes(); + /// let pattern = r"[a-z]+r"; + /// + /// let dfa_fwd = dense::DFA::new(pattern)?; + /// let dfa_rev = dense::Builder::new() + /// .thompson(NFA::config().reverse(true)) + /// .configure(dense::Config::new() + /// // This isn't strictly necessary since both anchored and + /// // unanchored searches are supported by default. But since + /// // finding the start-of-match only requires anchored searches, + /// // we can get rid of the unanchored configuration and possibly + /// // slim down our DFA considerably. + /// .start_kind(StartKind::Anchored) + /// .match_kind(MatchKind::All) + /// ) + /// .build(pattern)?; + /// let expected_fwd = HalfMatch::must(0, 9); + /// let expected_rev = HalfMatch::must(0, 3); + /// let got_fwd = dfa_fwd.try_search_fwd(&Input::new(haystack))?.unwrap(); + /// // Here we don't specify the pattern to search for since there's only + /// // one pattern and we're doing a leftmost search. But if this were an + /// // overlapping search, you'd need to specify the pattern that matched + /// // in the forward direction. (Otherwise, you might wind up finding the + /// // starting position of a match of some other pattern.) That in turn + /// // requires building the reverse automaton with starts_for_each_pattern + /// // enabled. Indeed, this is what Regex does internally. + /// let input = Input::new(haystack) + /// .range(..got_fwd.offset()) + /// .anchored(Anchored::Yes); + /// let got_rev = dfa_rev.try_search_rev(&input)?.unwrap(); + /// assert_eq!(expected_fwd, got_fwd); + /// assert_eq!(expected_rev, got_rev); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn match_kind(mut self, kind: MatchKind) -> Config { + self.match_kind = Some(kind); + self + } + + /// The type of starting state configuration to use for a DFA. + /// + /// By default, the starting state configuration is [`StartKind::Both`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense::DFA, Automaton, StartKind}, + /// Anchored, HalfMatch, Input, + /// }; + /// + /// let haystack = "quux foo123"; + /// let expected = HalfMatch::must(0, 11); + /// + /// // By default, DFAs support both anchored and unanchored searches. + /// let dfa = DFA::new(r"[0-9]+")?; + /// let input = Input::new(haystack); + /// assert_eq!(Some(expected), dfa.try_search_fwd(&input)?); + /// + /// // But if we only need anchored searches, then we can build a DFA + /// // that only supports anchored searches. This leads to a smaller DFA + /// // (potentially significantly smaller in some cases), but a DFA that + /// // will panic if you try to use it with an unanchored search. + /// let dfa = DFA::builder() + /// .configure(DFA::config().start_kind(StartKind::Anchored)) + /// .build(r"[0-9]+")?; + /// let input = Input::new(haystack) + /// .range(8..) + /// .anchored(Anchored::Yes); + /// assert_eq!(Some(expected), dfa.try_search_fwd(&input)?); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn start_kind(mut self, kind: StartKind) -> Config { + self.start_kind = Some(kind); + self + } + + /// Whether to compile a separate start state for each pattern in the + /// automaton. + /// + /// When enabled, a separate **anchored** start state is added for each + /// pattern in the DFA. When this start state is used, then the DFA will + /// only search for matches for the pattern specified, even if there are + /// other patterns in the DFA. + /// + /// The main downside of this option is that it can potentially increase + /// the size of the DFA and/or increase the time it takes to build the DFA. + /// + /// There are a few reasons one might want to enable this (it's disabled + /// by default): + /// + /// 1. When looking for the start of an overlapping match (using a + /// reverse DFA), doing it correctly requires starting the reverse search + /// using the starting state of the pattern that matched in the forward + /// direction. Indeed, when building a [`Regex`](crate::dfa::regex::Regex), + /// it will automatically enable this option when building the reverse DFA + /// internally. + /// 2. When you want to use a DFA with multiple patterns to both search + /// for matches of any pattern or to search for anchored matches of one + /// particular pattern while using the same DFA. (Otherwise, you would need + /// to compile a new DFA for each pattern.) + /// 3. Since the start states added for each pattern are anchored, if you + /// compile an unanchored DFA with one pattern while also enabling this + /// option, then you can use the same DFA to perform anchored or unanchored + /// searches. The latter you get with the standard search APIs. The former + /// you get from the various `_at` search methods that allow you specify a + /// pattern ID to search for. + /// + /// By default this is disabled. + /// + /// # Example + /// + /// This example shows how to use this option to permit the same DFA to + /// run both anchored and unanchored searches for a single pattern. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{dense, Automaton}, + /// Anchored, HalfMatch, PatternID, Input, + /// }; + /// + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().starts_for_each_pattern(true)) + /// .build(r"foo[0-9]+")?; + /// let haystack = "quux foo123"; + /// + /// // Here's a normal unanchored search. Notice that we use 'None' for the + /// // pattern ID. Since the DFA was built as an unanchored machine, it + /// // use its default unanchored starting state. + /// let expected = HalfMatch::must(0, 11); + /// let input = Input::new(haystack); + /// assert_eq!(Some(expected), dfa.try_search_fwd(&input)?); + /// // But now if we explicitly specify the pattern to search ('0' being + /// // the only pattern in the DFA), then it will use the starting state + /// // for that specific pattern which is always anchored. Since the + /// // pattern doesn't have a match at the beginning of the haystack, we + /// // find nothing. + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(0))); + /// assert_eq!(None, dfa.try_search_fwd(&input)?); + /// // And finally, an anchored search is not the same as putting a '^' at + /// // beginning of the pattern. An anchored search can only match at the + /// // beginning of the *search*, which we can change: + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(0))) + /// .range(5..); + /// assert_eq!(Some(expected), dfa.try_search_fwd(&input)?); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn starts_for_each_pattern(mut self, yes: bool) -> Config { + self.starts_for_each_pattern = Some(yes); + self + } + + /// Whether to attempt to shrink the size of the DFA's alphabet or not. + /// + /// This option is enabled by default and should never be disabled unless + /// one is debugging a generated DFA. + /// + /// When enabled, the DFA will use a map from all possible bytes to their + /// corresponding equivalence class. Each equivalence class represents a + /// set of bytes that does not discriminate between a match and a non-match + /// in the DFA. For example, the pattern `[ab]+` has at least two + /// equivalence classes: a set containing `a` and `b` and a set containing + /// every byte except for `a` and `b`. `a` and `b` are in the same + /// equivalence class because they never discriminate between a match and a + /// non-match. + /// + /// The advantage of this map is that the size of the transition table + /// can be reduced drastically from `#states * 256 * sizeof(StateID)` to + /// `#states * k * sizeof(StateID)` where `k` is the number of equivalence + /// classes (rounded up to the nearest power of 2). As a result, total + /// space usage can decrease substantially. Moreover, since a smaller + /// alphabet is used, DFA compilation becomes faster as well. + /// + /// **WARNING:** This is only useful for debugging DFAs. Disabling this + /// does not yield any speed advantages. Namely, even when this is + /// disabled, a byte class map is still used while searching. The only + /// difference is that every byte will be forced into its own distinct + /// equivalence class. This is useful for debugging the actual generated + /// transitions because it lets one see the transitions defined on actual + /// bytes instead of the equivalence classes. + pub fn byte_classes(mut self, yes: bool) -> Config { + self.byte_classes = Some(yes); + self + } + + /// Heuristically enable Unicode word boundaries. + /// + /// When set, this will attempt to implement Unicode word boundaries as if + /// they were ASCII word boundaries. This only works when the search input + /// is ASCII only. If a non-ASCII byte is observed while searching, then a + /// [`MatchError::quit`](crate::MatchError::quit) error is returned. + /// + /// A possible alternative to enabling this option is to simply use an + /// ASCII word boundary, e.g., via `(?-u:\b)`. The main reason to use this + /// option is if you absolutely need Unicode support. This option lets one + /// use a fast search implementation (a DFA) for some potentially very + /// common cases, while providing the option to fall back to some other + /// regex engine to handle the general case when an error is returned. + /// + /// If the pattern provided has no Unicode word boundary in it, then this + /// option has no effect. (That is, quitting on a non-ASCII byte only + /// occurs when this option is enabled _and_ a Unicode word boundary is + /// present in the pattern.) + /// + /// This is almost equivalent to setting all non-ASCII bytes to be quit + /// bytes. The only difference is that this will cause non-ASCII bytes to + /// be quit bytes _only_ when a Unicode word boundary is present in the + /// pattern. + /// + /// When enabling this option, callers _must_ be prepared to handle + /// a [`MatchError`](crate::MatchError) error during search. + /// When using a [`Regex`](crate::dfa::regex::Regex), this corresponds + /// to using the `try_` suite of methods. Alternatively, if + /// callers can guarantee that their input is ASCII only, then a + /// [`MatchError::quit`](crate::MatchError::quit) error will never be + /// returned while searching. + /// + /// This is disabled by default. + /// + /// # Example + /// + /// This example shows how to heuristically enable Unicode word boundaries + /// in a pattern. It also shows what happens when a search comes across a + /// non-ASCII byte. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{Automaton, dense}, + /// HalfMatch, Input, MatchError, + /// }; + /// + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().unicode_word_boundary(true)) + /// .build(r"\b[0-9]+\b")?; + /// + /// // The match occurs before the search ever observes the snowman + /// // character, so no error occurs. + /// let haystack = "foo 123 ☃".as_bytes(); + /// let expected = Some(HalfMatch::must(0, 7)); + /// let got = dfa.try_search_fwd(&Input::new(haystack))?; + /// assert_eq!(expected, got); + /// + /// // Notice that this search fails, even though the snowman character + /// // occurs after the ending match offset. This is because search + /// // routines read one byte past the end of the search to account for + /// // look-around, and indeed, this is required here to determine whether + /// // the trailing \b matches. + /// let haystack = "foo 123 ☃".as_bytes(); + /// let expected = MatchError::quit(0xE2, 8); + /// let got = dfa.try_search_fwd(&Input::new(haystack)); + /// assert_eq!(Err(expected), got); + /// + /// // Another example is executing a search where the span of the haystack + /// // we specify is all ASCII, but there is non-ASCII just before it. This + /// // correctly also reports an error. + /// let input = Input::new("β123").range(2..); + /// let expected = MatchError::quit(0xB2, 1); + /// let got = dfa.try_search_fwd(&input); + /// assert_eq!(Err(expected), got); + /// + /// // And similarly for the trailing word boundary. + /// let input = Input::new("123β").range(..3); + /// let expected = MatchError::quit(0xCE, 3); + /// let got = dfa.try_search_fwd(&input); + /// assert_eq!(Err(expected), got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn unicode_word_boundary(mut self, yes: bool) -> Config { + // We have a separate option for this instead of just setting the + // appropriate quit bytes here because we don't want to set quit bytes + // for every regex. We only want to set them when the regex contains a + // Unicode word boundary. + self.unicode_word_boundary = Some(yes); + self + } + + /// Add a "quit" byte to the DFA. + /// + /// When a quit byte is seen during search time, then search will return + /// a [`MatchError::quit`](crate::MatchError::quit) error indicating the + /// offset at which the search stopped. + /// + /// A quit byte will always overrule any other aspects of a regex. For + /// example, if the `x` byte is added as a quit byte and the regex `\w` is + /// used, then observing `x` will cause the search to quit immediately + /// despite the fact that `x` is in the `\w` class. + /// + /// This mechanism is primarily useful for heuristically enabling certain + /// features like Unicode word boundaries in a DFA. Namely, if the input + /// to search is ASCII, then a Unicode word boundary can be implemented + /// via an ASCII word boundary with no change in semantics. Thus, a DFA + /// can attempt to match a Unicode word boundary but give up as soon as it + /// observes a non-ASCII byte. Indeed, if callers set all non-ASCII bytes + /// to be quit bytes, then Unicode word boundaries will be permitted when + /// building DFAs. Of course, callers should enable + /// [`Config::unicode_word_boundary`] if they want this behavior instead. + /// (The advantage being that non-ASCII quit bytes will only be added if a + /// Unicode word boundary is in the pattern.) + /// + /// When enabling this option, callers _must_ be prepared to handle a + /// [`MatchError`](crate::MatchError) error during search. When using a + /// [`Regex`](crate::dfa::regex::Regex), this corresponds to using the + /// `try_` suite of methods. + /// + /// By default, there are no quit bytes set. + /// + /// # Panics + /// + /// This panics if heuristic Unicode word boundaries are enabled and any + /// non-ASCII byte is removed from the set of quit bytes. Namely, enabling + /// Unicode word boundaries requires setting every non-ASCII byte to a quit + /// byte. So if the caller attempts to undo any of that, then this will + /// panic. + /// + /// # Example + /// + /// This example shows how to cause a search to terminate if it sees a + /// `\n` byte. This could be useful if, for example, you wanted to prevent + /// a user supplied pattern from matching across a line boundary. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::{Automaton, dense}, Input, MatchError}; + /// + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().quit(b'\n', true)) + /// .build(r"foo\p{any}+bar")?; + /// + /// let haystack = "foo\nbar".as_bytes(); + /// // Normally this would produce a match, since \p{any} contains '\n'. + /// // But since we instructed the automaton to enter a quit state if a + /// // '\n' is observed, this produces a match error instead. + /// let expected = MatchError::quit(b'\n', 3); + /// let got = dfa.try_search_fwd(&Input::new(haystack)).unwrap_err(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn quit(mut self, byte: u8, yes: bool) -> Config { + if self.get_unicode_word_boundary() && !byte.is_ascii() && !yes { + panic!( + "cannot set non-ASCII byte to be non-quit when \ + Unicode word boundaries are enabled" + ); + } + if self.quitset.is_none() { + self.quitset = Some(ByteSet::empty()); + } + if yes { + self.quitset.as_mut().unwrap().add(byte); + } else { + self.quitset.as_mut().unwrap().remove(byte); + } + self + } + + /// Enable specializing start states in the DFA. + /// + /// When start states are specialized, an implementor of a search routine + /// using a lazy DFA can tell when the search has entered a starting state. + /// When start states aren't specialized, then it is impossible to know + /// whether the search has entered a start state. + /// + /// Ideally, this option wouldn't need to exist and we could always + /// specialize start states. The problem is that start states can be quite + /// active. This in turn means that an efficient search routine is likely + /// to ping-pong between a heavily optimized hot loop that handles most + /// states and to a less optimized specialized handling of start states. + /// This causes branches to get heavily mispredicted and overall can + /// materially decrease throughput. Therefore, specializing start states + /// should only be enabled when it is needed. + /// + /// Knowing whether a search is in a start state is typically useful when a + /// prefilter is active for the search. A prefilter is typically only run + /// when in a start state and a prefilter can greatly accelerate a search. + /// Therefore, the possible cost of specializing start states is worth it + /// in this case. Otherwise, if you have no prefilter, there is likely no + /// reason to specialize start states. + /// + /// This is disabled by default, but note that it is automatically + /// enabled (or disabled) if [`Config::prefilter`] is set. Namely, unless + /// `specialize_start_states` has already been set, [`Config::prefilter`] + /// will automatically enable or disable it based on whether a prefilter + /// is present or not, respectively. This is done because a prefilter's + /// effectiveness is rooted in being executed whenever the DFA is in a + /// start state, and that's only possible to do when they are specialized. + /// + /// Note that it is plausibly reasonable to _disable_ this option + /// explicitly while _enabling_ a prefilter. In that case, a prefilter + /// will still be run at the beginning of a search, but never again. This + /// in theory could strike a good balance if you're in a situation where a + /// prefilter is likely to produce many false positive candidates. + /// + /// # Example + /// + /// This example shows how to enable start state specialization and then + /// shows how to check whether a state is a start state or not. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, Input}; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().specialize_start_states(true)) + /// .build(r"[a-z]+")?; + /// + /// let haystack = "123 foobar 4567".as_bytes(); + /// let sid = dfa.start_state_forward(&Input::new(haystack))?; + /// // The ID returned by 'start_state_forward' will always be tagged as + /// // a start state when start state specialization is enabled. + /// assert!(dfa.is_special_state(sid)); + /// assert!(dfa.is_start_state(sid)); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Compare the above with the default DFA configuration where start states + /// are _not_ specialized. In this case, the start state is not tagged at + /// all: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, Input}; + /// + /// let dfa = DFA::new(r"[a-z]+")?; + /// + /// let haystack = "123 foobar 4567"; + /// let sid = dfa.start_state_forward(&Input::new(haystack))?; + /// // Start states are not special in the default configuration! + /// assert!(!dfa.is_special_state(sid)); + /// assert!(!dfa.is_start_state(sid)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn specialize_start_states(mut self, yes: bool) -> Config { + self.specialize_start_states = Some(yes); + self + } + + /// Set a size limit on the total heap used by a DFA. + /// + /// This size limit is expressed in bytes and is applied during + /// determinization of an NFA into a DFA. If the DFA's heap usage, and only + /// the DFA, exceeds this configured limit, then determinization is stopped + /// and an error is returned. + /// + /// This limit does not apply to auxiliary storage used during + /// determinization that isn't part of the generated DFA. + /// + /// This limit is only applied during determinization. Currently, there is + /// no way to post-pone this check to after minimization if minimization + /// was enabled. + /// + /// The total limit on heap used during determinization is the sum of the + /// DFA and determinization size limits. + /// + /// The default is no limit. + /// + /// # Example + /// + /// This example shows a DFA that fails to build because of a configured + /// size limit. This particular example also serves as a cautionary tale + /// demonstrating just how big DFAs with large Unicode character classes + /// can get. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::{dense, Automaton}, Input}; + /// + /// // 6MB isn't enough! + /// dense::Builder::new() + /// .configure(dense::Config::new().dfa_size_limit(Some(6_000_000))) + /// .build(r"\w{20}") + /// .unwrap_err(); + /// + /// // ... but 7MB probably is! + /// // (Note that DFA sizes aren't necessarily stable between releases.) + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new().dfa_size_limit(Some(7_000_000))) + /// .build(r"\w{20}")?; + /// let haystack = "A".repeat(20).into_bytes(); + /// assert!(dfa.try_search_fwd(&Input::new(&haystack))?.is_some()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// While one needs a little more than 6MB to represent `\w{20}`, it + /// turns out that you only need a little more than 6KB to represent + /// `(?-u:\w{20})`. So only use Unicode if you need it! + /// + /// As with [`Config::determinize_size_limit`], the size of a DFA is + /// influenced by other factors, such as what start state configurations + /// to support. For example, if you only need unanchored searches and not + /// anchored searches, then configuring the DFA to only support unanchored + /// searches can reduce its size. By default, DFAs support both unanchored + /// and anchored searches. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::{dense, Automaton, StartKind}, Input}; + /// + /// // 3MB isn't enough! + /// dense::Builder::new() + /// .configure(dense::Config::new() + /// .dfa_size_limit(Some(3_000_000)) + /// .start_kind(StartKind::Unanchored) + /// ) + /// .build(r"\w{20}") + /// .unwrap_err(); + /// + /// // ... but 4MB probably is! + /// // (Note that DFA sizes aren't necessarily stable between releases.) + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new() + /// .dfa_size_limit(Some(4_000_000)) + /// .start_kind(StartKind::Unanchored) + /// ) + /// .build(r"\w{20}")?; + /// let haystack = "A".repeat(20).into_bytes(); + /// assert!(dfa.try_search_fwd(&Input::new(&haystack))?.is_some()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn dfa_size_limit(mut self, bytes: Option) -> Config { + self.dfa_size_limit = Some(bytes); + self + } + + /// Set a size limit on the total heap used by determinization. + /// + /// This size limit is expressed in bytes and is applied during + /// determinization of an NFA into a DFA. If the heap used for auxiliary + /// storage during determinization (memory that is not in the DFA but + /// necessary for building the DFA) exceeds this configured limit, then + /// determinization is stopped and an error is returned. + /// + /// This limit does not apply to heap used by the DFA itself. + /// + /// The total limit on heap used during determinization is the sum of the + /// DFA and determinization size limits. + /// + /// The default is no limit. + /// + /// # Example + /// + /// This example shows a DFA that fails to build because of a + /// configured size limit on the amount of heap space used by + /// determinization. This particular example complements the example for + /// [`Config::dfa_size_limit`] by demonstrating that not only does Unicode + /// potentially make DFAs themselves big, but it also results in more + /// auxiliary storage during determinization. (Although, auxiliary storage + /// is still not as much as the DFA itself.) + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// # if !cfg!(target_pointer_width = "64") { return Ok(()); } // see #1039 + /// use regex_automata::{dfa::{dense, Automaton}, Input}; + /// + /// // 700KB isn't enough! + /// dense::Builder::new() + /// .configure(dense::Config::new() + /// .determinize_size_limit(Some(700_000)) + /// ) + /// .build(r"\w{20}") + /// .unwrap_err(); + /// + /// // ... but 800KB probably is! + /// // (Note that auxiliary storage sizes aren't necessarily stable between + /// // releases.) + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new() + /// .determinize_size_limit(Some(800_000)) + /// ) + /// .build(r"\w{20}")?; + /// let haystack = "A".repeat(20).into_bytes(); + /// assert!(dfa.try_search_fwd(&Input::new(&haystack))?.is_some()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Note that some parts of the configuration on a DFA can have a + /// big impact on how big the DFA is, and thus, how much memory is + /// used. For example, the default setting for [`Config::start_kind`] is + /// [`StartKind::Both`]. But if you only need an anchored search, for + /// example, then it can be much cheaper to build a DFA that only supports + /// anchored searches. (Running an unanchored search with it would panic.) + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// # if !cfg!(target_pointer_width = "64") { return Ok(()); } // see #1039 + /// use regex_automata::{ + /// dfa::{dense, Automaton, StartKind}, + /// Anchored, Input, + /// }; + /// + /// // 200KB isn't enough! + /// dense::Builder::new() + /// .configure(dense::Config::new() + /// .determinize_size_limit(Some(200_000)) + /// .start_kind(StartKind::Anchored) + /// ) + /// .build(r"\w{20}") + /// .unwrap_err(); + /// + /// // ... but 300KB probably is! + /// // (Note that auxiliary storage sizes aren't necessarily stable between + /// // releases.) + /// let dfa = dense::Builder::new() + /// .configure(dense::Config::new() + /// .determinize_size_limit(Some(300_000)) + /// .start_kind(StartKind::Anchored) + /// ) + /// .build(r"\w{20}")?; + /// let haystack = "A".repeat(20).into_bytes(); + /// let input = Input::new(&haystack).anchored(Anchored::Yes); + /// assert!(dfa.try_search_fwd(&input)?.is_some()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn determinize_size_limit(mut self, bytes: Option) -> Config { + self.determinize_size_limit = Some(bytes); + self + } + + /// Returns whether this configuration has enabled simple state + /// acceleration. + pub fn get_accelerate(&self) -> bool { + self.accelerate.unwrap_or(true) + } + + /// Returns the prefilter attached to this configuration, if any. + pub fn get_prefilter(&self) -> Option<&Prefilter> { + self.pre.as_ref().unwrap_or(&None).as_ref() + } + + /// Returns whether this configuration has enabled the expensive process + /// of minimizing a DFA. + pub fn get_minimize(&self) -> bool { + self.minimize.unwrap_or(false) + } + + /// Returns the match semantics set in this configuration. + pub fn get_match_kind(&self) -> MatchKind { + self.match_kind.unwrap_or(MatchKind::LeftmostFirst) + } + + /// Returns the starting state configuration for a DFA. + pub fn get_starts(&self) -> StartKind { + self.start_kind.unwrap_or(StartKind::Both) + } + + /// Returns whether this configuration has enabled anchored starting states + /// for every pattern in the DFA. + pub fn get_starts_for_each_pattern(&self) -> bool { + self.starts_for_each_pattern.unwrap_or(false) + } + + /// Returns whether this configuration has enabled byte classes or not. + /// This is typically a debugging oriented option, as disabling it confers + /// no speed benefit. + pub fn get_byte_classes(&self) -> bool { + self.byte_classes.unwrap_or(true) + } + + /// Returns whether this configuration has enabled heuristic Unicode word + /// boundary support. When enabled, it is possible for a search to return + /// an error. + pub fn get_unicode_word_boundary(&self) -> bool { + self.unicode_word_boundary.unwrap_or(false) + } + + /// Returns whether this configuration will instruct the DFA to enter a + /// quit state whenever the given byte is seen during a search. When at + /// least one byte has this enabled, it is possible for a search to return + /// an error. + pub fn get_quit(&self, byte: u8) -> bool { + self.quitset.map_or(false, |q| q.contains(byte)) + } + + /// Returns whether this configuration will instruct the DFA to + /// "specialize" start states. When enabled, the DFA will mark start states + /// as "special" so that search routines using the DFA can detect when + /// it's in a start state and do some kind of optimization (like run a + /// prefilter). + pub fn get_specialize_start_states(&self) -> bool { + self.specialize_start_states.unwrap_or(false) + } + + /// Returns the DFA size limit of this configuration if one was set. + /// The size limit is total number of bytes on the heap that a DFA is + /// permitted to use. If the DFA exceeds this limit during construction, + /// then construction is stopped and an error is returned. + pub fn get_dfa_size_limit(&self) -> Option { + self.dfa_size_limit.unwrap_or(None) + } + + /// Returns the determinization size limit of this configuration if one + /// was set. The size limit is total number of bytes on the heap that + /// determinization is permitted to use. If determinization exceeds this + /// limit during construction, then construction is stopped and an error is + /// returned. + /// + /// This is different from the DFA size limit in that this only applies to + /// the auxiliary storage used during determinization. Once determinization + /// is complete, this memory is freed. + /// + /// The limit on the total heap memory used is the sum of the DFA and + /// determinization size limits. + pub fn get_determinize_size_limit(&self) -> Option { + self.determinize_size_limit.unwrap_or(None) + } + + /// Overwrite the default configuration such that the options in `o` are + /// always used. If an option in `o` is not set, then the corresponding + /// option in `self` is used. If it's not set in `self` either, then it + /// remains not set. + pub(crate) fn overwrite(&self, o: Config) -> Config { + Config { + accelerate: o.accelerate.or(self.accelerate), + pre: o.pre.or_else(|| self.pre.clone()), + minimize: o.minimize.or(self.minimize), + match_kind: o.match_kind.or(self.match_kind), + start_kind: o.start_kind.or(self.start_kind), + starts_for_each_pattern: o + .starts_for_each_pattern + .or(self.starts_for_each_pattern), + byte_classes: o.byte_classes.or(self.byte_classes), + unicode_word_boundary: o + .unicode_word_boundary + .or(self.unicode_word_boundary), + quitset: o.quitset.or(self.quitset), + specialize_start_states: o + .specialize_start_states + .or(self.specialize_start_states), + dfa_size_limit: o.dfa_size_limit.or(self.dfa_size_limit), + determinize_size_limit: o + .determinize_size_limit + .or(self.determinize_size_limit), + } + } +} + +/// A builder for constructing a deterministic finite automaton from regular +/// expressions. +/// +/// This builder provides two main things: +/// +/// 1. It provides a few different `build` routines for actually constructing +/// a DFA from different kinds of inputs. The most convenient is +/// [`Builder::build`], which builds a DFA directly from a pattern string. The +/// most flexible is [`Builder::build_from_nfa`], which builds a DFA straight +/// from an NFA. +/// 2. The builder permits configuring a number of things. +/// [`Builder::configure`] is used with [`Config`] to configure aspects of +/// the DFA and the construction process itself. [`Builder::syntax`] and +/// [`Builder::thompson`] permit configuring the regex parser and Thompson NFA +/// construction, respectively. The syntax and thompson configurations only +/// apply when building from a pattern string. +/// +/// This builder always constructs a *single* DFA. As such, this builder +/// can only be used to construct regexes that either detect the presence +/// of a match or find the end location of a match. A single DFA cannot +/// produce both the start and end of a match. For that information, use a +/// [`Regex`](crate::dfa::regex::Regex), which can be similarly configured +/// using [`regex::Builder`](crate::dfa::regex::Builder). The main reason to +/// use a DFA directly is if the end location of a match is enough for your use +/// case. Namely, a `Regex` will construct two DFAs instead of one, since a +/// second reverse DFA is needed to find the start of a match. +/// +/// Note that if one wants to build a sparse DFA, you must first build a dense +/// DFA and convert that to a sparse DFA. There is no way to build a sparse +/// DFA without first building a dense DFA. +/// +/// # Example +/// +/// This example shows how to build a minimized DFA that completely disables +/// Unicode. That is: +/// +/// * Things such as `\w`, `.` and `\b` are no longer Unicode-aware. `\w` +/// and `\b` are ASCII-only while `.` matches any byte except for `\n` +/// (instead of any UTF-8 encoding of a Unicode scalar value except for +/// `\n`). Things that are Unicode only, such as `\pL`, are not allowed. +/// * The pattern itself is permitted to match invalid UTF-8. For example, +/// things like `[^a]` that match any byte except for `a` are permitted. +/// +/// ``` +/// use regex_automata::{ +/// dfa::{Automaton, dense}, +/// util::syntax, +/// HalfMatch, Input, +/// }; +/// +/// let dfa = dense::Builder::new() +/// .configure(dense::Config::new().minimize(false)) +/// .syntax(syntax::Config::new().unicode(false).utf8(false)) +/// .build(r"foo[^b]ar.*")?; +/// +/// let haystack = b"\xFEfoo\xFFar\xE2\x98\xFF\n"; +/// let expected = Some(HalfMatch::must(0, 10)); +/// let got = dfa.try_search_fwd(&Input::new(haystack))?; +/// assert_eq!(expected, got); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[cfg(feature = "dfa-build")] +#[derive(Clone, Debug)] +pub struct Builder { + config: Config, + #[cfg(feature = "syntax")] + thompson: thompson::Compiler, +} + +#[cfg(feature = "dfa-build")] +impl Builder { + /// Create a new dense DFA builder with the default configuration. + pub fn new() -> Builder { + Builder { + config: Config::default(), + #[cfg(feature = "syntax")] + thompson: thompson::Compiler::new(), + } + } + + /// Build a DFA from the given pattern. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(feature = "syntax")] + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Build a DFA from the given patterns. + /// + /// When matches are returned, the pattern ID corresponds to the index of + /// the pattern in the slice given. + #[cfg(feature = "syntax")] + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let nfa = self + .thompson + .clone() + // We can always forcefully disable captures because DFAs do not + // support them. + .configure( + thompson::Config::new() + .which_captures(thompson::WhichCaptures::None), + ) + .build_many(patterns) + .map_err(BuildError::nfa)?; + self.build_from_nfa(&nfa) + } + + /// Build a DFA from the given NFA. + /// + /// # Example + /// + /// This example shows how to build a DFA if you already have an NFA in + /// hand. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{Automaton, dense}, + /// nfa::thompson::NFA, + /// HalfMatch, Input, + /// }; + /// + /// let haystack = "foo123bar".as_bytes(); + /// + /// // This shows how to set non-default options for building an NFA. + /// let nfa = NFA::compiler() + /// .configure(NFA::config().shrink(true)) + /// .build(r"[0-9]+")?; + /// let dfa = dense::Builder::new().build_from_nfa(&nfa)?; + /// let expected = Some(HalfMatch::must(0, 6)); + /// let got = dfa.try_search_fwd(&Input::new(haystack))?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_from_nfa( + &self, + nfa: &thompson::NFA, + ) -> Result { + let mut quitset = self.config.quitset.unwrap_or(ByteSet::empty()); + if self.config.get_unicode_word_boundary() + && nfa.look_set_any().contains_word_unicode() + { + for b in 0x80..=0xFF { + quitset.add(b); + } + } + let classes = if !self.config.get_byte_classes() { + // DFAs will always use the equivalence class map, but enabling + // this option is useful for debugging. Namely, this will cause all + // transitions to be defined over their actual bytes instead of an + // opaque equivalence class identifier. The former is much easier + // to grok as a human. + ByteClasses::singletons() + } else { + let mut set = nfa.byte_class_set().clone(); + // It is important to distinguish any "quit" bytes from all other + // bytes. Otherwise, a non-quit byte may end up in the same + // class as a quit byte, and thus cause the DFA to stop when it + // shouldn't. + // + // Test case: + // + // regex-cli find match dense --unicode-word-boundary \ + // -p '^#' -p '\b10\.55\.182\.100\b' -y @conn.json.1000x.log + if !quitset.is_empty() { + set.add_set(&quitset); + } + set.byte_classes() + }; + + let mut dfa = DFA::initial( + classes, + nfa.pattern_len(), + self.config.get_starts(), + nfa.look_matcher(), + self.config.get_starts_for_each_pattern(), + self.config.get_prefilter().map(|p| p.clone()), + quitset, + Flags::from_nfa(&nfa), + )?; + determinize::Config::new() + .match_kind(self.config.get_match_kind()) + .quit(quitset) + .dfa_size_limit(self.config.get_dfa_size_limit()) + .determinize_size_limit(self.config.get_determinize_size_limit()) + .run(nfa, &mut dfa)?; + if self.config.get_minimize() { + dfa.minimize(); + } + if self.config.get_accelerate() { + dfa.accelerate(); + } + // The state shuffling done before this point always assumes that start + // states should be marked as "special," even though it isn't the + // default configuration. State shuffling is complex enough as it is, + // so it's simpler to just "fix" our special state ID ranges to not + // include starting states after-the-fact. + if !self.config.get_specialize_start_states() { + dfa.special.set_no_special_start_states(); + } + // Look for and set the universal starting states. + dfa.set_universal_starts(); + Ok(dfa) + } + + /// Apply the given dense DFA configuration options to this builder. + pub fn configure(&mut self, config: Config) -> &mut Builder { + self.config = self.config.overwrite(config); + self + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + /// + /// These settings only apply when constructing a DFA directly from a + /// pattern. + #[cfg(feature = "syntax")] + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + self.thompson.syntax(config); + self + } + + /// Set the Thompson NFA configuration for this builder using + /// [`nfa::thompson::Config`](crate::nfa::thompson::Config). + /// + /// This permits setting things like whether the DFA should match the regex + /// in reverse or if additional time should be spent shrinking the size of + /// the NFA. + /// + /// These settings only apply when constructing a DFA directly from a + /// pattern. + #[cfg(feature = "syntax")] + pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder { + self.thompson.configure(config); + self + } +} + +#[cfg(feature = "dfa-build")] +impl Default for Builder { + fn default() -> Builder { + Builder::new() + } +} + +/// A convenience alias for an owned DFA. We use this particular instantiation +/// a lot in this crate, so it's worth giving it a name. This instantiation +/// is commonly used for mutable APIs on the DFA while building it. The main +/// reason for making DFAs generic is no_std support, and more generally, +/// making it possible to load a DFA from an arbitrary slice of bytes. +#[cfg(feature = "alloc")] +pub(crate) type OwnedDFA = DFA>; + +/// A dense table-based deterministic finite automaton (DFA). +/// +/// All dense DFAs have one or more start states, zero or more match states +/// and a transition table that maps the current state and the current byte +/// of input to the next state. A DFA can use this information to implement +/// fast searching. In particular, the use of a dense DFA generally makes the +/// trade off that match speed is the most valuable characteristic, even if +/// building the DFA may take significant time *and* space. (More concretely, +/// building a DFA takes time and space that is exponential in the size of the +/// pattern in the worst case.) As such, the processing of every byte of input +/// is done with a small constant number of operations that does not vary with +/// the pattern, its size or the size of the alphabet. If your needs don't line +/// up with this trade off, then a dense DFA may not be an adequate solution to +/// your problem. +/// +/// In contrast, a [`sparse::DFA`] makes the opposite +/// trade off: it uses less space but will execute a variable number of +/// instructions per byte at match time, which makes it slower for matching. +/// (Note that space usage is still exponential in the size of the pattern in +/// the worst case.) +/// +/// A DFA can be built using the default configuration via the +/// [`DFA::new`] constructor. Otherwise, one can +/// configure various aspects via [`dense::Builder`](Builder). +/// +/// A single DFA fundamentally supports the following operations: +/// +/// 1. Detection of a match. +/// 2. Location of the end of a match. +/// 3. In the case of a DFA with multiple patterns, which pattern matched is +/// reported as well. +/// +/// A notable absence from the above list of capabilities is the location of +/// the *start* of a match. In order to provide both the start and end of +/// a match, *two* DFAs are required. This functionality is provided by a +/// [`Regex`](crate::dfa::regex::Regex). +/// +/// # Type parameters +/// +/// A `DFA` has one type parameter, `T`, which is used to represent state IDs, +/// pattern IDs and accelerators. `T` is typically a `Vec` or a `&[u32]`. +/// +/// # The `Automaton` trait +/// +/// This type implements the [`Automaton`] trait, which means it can be used +/// for searching. For example: +/// +/// ``` +/// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; +/// +/// let dfa = DFA::new("foo[0-9]+")?; +/// let expected = HalfMatch::must(0, 8); +/// assert_eq!(Some(expected), dfa.try_search_fwd(&Input::new("foo12345"))?); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone)] +pub struct DFA { + /// The transition table for this DFA. This includes the transitions + /// themselves, along with the stride, number of states and the equivalence + /// class mapping. + tt: TransitionTable, + /// The set of starting state identifiers for this DFA. The starting state + /// IDs act as pointers into the transition table. The specific starting + /// state chosen for each search is dependent on the context at which the + /// search begins. + st: StartTable, + /// The set of match states and the patterns that match for each + /// corresponding match state. + /// + /// This structure is technically only needed because of support for + /// multi-regexes. Namely, multi-regexes require answering not just whether + /// a match exists, but _which_ patterns match. So we need to store the + /// matching pattern IDs for each match state. We do this even when there + /// is only one pattern for the sake of simplicity. In practice, this uses + /// up very little space for the case of one pattern. + ms: MatchStates, + /// Information about which states are "special." Special states are states + /// that are dead, quit, matching, starting or accelerated. For more info, + /// see the docs for `Special`. + special: Special, + /// The accelerators for this DFA. + /// + /// If a state is accelerated, then there exist only a small number of + /// bytes that can cause the DFA to leave the state. This permits searching + /// to use optimized routines to find those specific bytes instead of using + /// the transition table. + /// + /// All accelerated states exist in a contiguous range in the DFA's + /// transition table. See dfa/special.rs for more details on how states are + /// arranged. + accels: Accels, + /// Any prefilter attached to this DFA. + /// + /// Note that currently prefilters are not serialized. When deserializing + /// a DFA from bytes, this is always set to `None`. + pre: Option, + /// The set of "quit" bytes for this DFA. + /// + /// This is only used when computing the start state for a particular + /// position in a haystack. Namely, in the case where there is a quit + /// byte immediately before the start of the search, this set needs to be + /// explicitly consulted. In all other cases, quit bytes are detected by + /// the DFA itself, by transitioning all quit bytes to a special "quit + /// state." + quitset: ByteSet, + /// Various flags describing the behavior of this DFA. + flags: Flags, +} + +#[cfg(feature = "dfa-build")] +impl OwnedDFA { + /// Parse the given regular expression using a default configuration and + /// return the corresponding DFA. + /// + /// If you want a non-default configuration, then use the + /// [`dense::Builder`](Builder) to set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, HalfMatch, Input}; + /// + /// let dfa = dense::DFA::new("foo[0-9]+bar")?; + /// let expected = Some(HalfMatch::must(0, 11)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345bar"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new(pattern: &str) -> Result { + Builder::new().build(pattern) + } + + /// Parse the given regular expressions using a default configuration and + /// return the corresponding multi-DFA. + /// + /// If you want a non-default configuration, then use the + /// [`dense::Builder`](Builder) to set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, HalfMatch, Input}; + /// + /// let dfa = dense::DFA::new_many(&["[0-9]+", "[a-z]+"])?; + /// let expected = Some(HalfMatch::must(1, 3)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345bar"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new_many>( + patterns: &[P], + ) -> Result { + Builder::new().build_many(patterns) + } +} + +#[cfg(feature = "dfa-build")] +impl OwnedDFA { + /// Create a new DFA that matches every input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, HalfMatch, Input}; + /// + /// let dfa = dense::DFA::always_match()?; + /// + /// let expected = Some(HalfMatch::must(0, 0)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new(""))?); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn always_match() -> Result { + let nfa = thompson::NFA::always_match(); + Builder::new().build_from_nfa(&nfa) + } + + /// Create a new DFA that never matches any input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, Input}; + /// + /// let dfa = dense::DFA::never_match()?; + /// assert_eq!(None, dfa.try_search_fwd(&Input::new(""))?); + /// assert_eq!(None, dfa.try_search_fwd(&Input::new("foo"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn never_match() -> Result { + let nfa = thompson::NFA::never_match(); + Builder::new().build_from_nfa(&nfa) + } + + /// Create an initial DFA with the given equivalence classes, pattern + /// length and whether anchored starting states are enabled for each + /// pattern. An initial DFA can be further mutated via determinization. + fn initial( + classes: ByteClasses, + pattern_len: usize, + starts: StartKind, + lookm: &LookMatcher, + starts_for_each_pattern: bool, + pre: Option, + quitset: ByteSet, + flags: Flags, + ) -> Result { + let start_pattern_len = + if starts_for_each_pattern { Some(pattern_len) } else { None }; + Ok(DFA { + tt: TransitionTable::minimal(classes), + st: StartTable::dead(starts, lookm, start_pattern_len)?, + ms: MatchStates::empty(pattern_len), + special: Special::new(), + accels: Accels::empty(), + pre, + quitset, + flags, + }) + } +} + +#[cfg(feature = "dfa-build")] +impl DFA<&[u32]> { + /// Return a new default dense DFA compiler configuration. + /// + /// This is a convenience routine to avoid needing to import the [`Config`] + /// type when customizing the construction of a dense DFA. + pub fn config() -> Config { + Config::new() + } + + /// Create a new dense DFA builder with the default configuration. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + pub fn builder() -> Builder { + Builder::new() + } +} + +impl> DFA { + /// Cheaply return a borrowed version of this dense DFA. Specifically, + /// the DFA returned always uses `&[u32]` for its transition table. + pub fn as_ref(&self) -> DFA<&'_ [u32]> { + DFA { + tt: self.tt.as_ref(), + st: self.st.as_ref(), + ms: self.ms.as_ref(), + special: self.special, + accels: self.accels(), + pre: self.pre.clone(), + quitset: self.quitset, + flags: self.flags, + } + } + + /// Return an owned version of this sparse DFA. Specifically, the DFA + /// returned always uses `Vec` for its transition table. + /// + /// Effectively, this returns a dense DFA whose transition table lives on + /// the heap. + #[cfg(feature = "alloc")] + pub fn to_owned(&self) -> OwnedDFA { + DFA { + tt: self.tt.to_owned(), + st: self.st.to_owned(), + ms: self.ms.to_owned(), + special: self.special, + accels: self.accels().to_owned(), + pre: self.pre.clone(), + quitset: self.quitset, + flags: self.flags, + } + } + + /// Returns the starting state configuration for this DFA. + /// + /// The default is [`StartKind::Both`], which means the DFA supports both + /// unanchored and anchored searches. However, this can generally lead to + /// bigger DFAs. Therefore, a DFA might be compiled with support for just + /// unanchored or anchored searches. In that case, running a search with + /// an unsupported configuration will panic. + pub fn start_kind(&self) -> StartKind { + self.st.kind + } + + /// Returns the start byte map used for computing the `Start` configuration + /// at the beginning of a search. + pub(crate) fn start_map(&self) -> &StartByteMap { + &self.st.start_map + } + + /// Returns true only if this DFA has starting states for each pattern. + /// + /// When a DFA has starting states for each pattern, then a search with the + /// DFA can be configured to only look for anchored matches of a specific + /// pattern. Specifically, APIs like [`Automaton::try_search_fwd`] can + /// accept a non-None `pattern_id` if and only if this method returns true. + /// Otherwise, calling `try_search_fwd` will panic. + /// + /// Note that if the DFA has no patterns, this always returns false. + pub fn starts_for_each_pattern(&self) -> bool { + self.st.pattern_len.is_some() + } + + /// Returns the equivalence classes that make up the alphabet for this DFA. + /// + /// Unless [`Config::byte_classes`] was disabled, it is possible that + /// multiple distinct bytes are grouped into the same equivalence class + /// if it is impossible for them to discriminate between a match and a + /// non-match. This has the effect of reducing the overall alphabet size + /// and in turn potentially substantially reducing the size of the DFA's + /// transition table. + /// + /// The downside of using equivalence classes like this is that every state + /// transition will automatically use this map to convert an arbitrary + /// byte to its corresponding equivalence class. In practice this has a + /// negligible impact on performance. + pub fn byte_classes(&self) -> &ByteClasses { + &self.tt.classes + } + + /// Returns the total number of elements in the alphabet for this DFA. + /// + /// That is, this returns the total number of transitions that each state + /// in this DFA must have. Typically, a normal byte oriented DFA would + /// always have an alphabet size of 256, corresponding to the number of + /// unique values in a single byte. However, this implementation has two + /// peculiarities that impact the alphabet length: + /// + /// * Every state has a special "EOI" transition that is only followed + /// after the end of some haystack is reached. This EOI transition is + /// necessary to account for one byte of look-ahead when implementing + /// things like `\b` and `$`. + /// * Bytes are grouped into equivalence classes such that no two bytes in + /// the same class can distinguish a match from a non-match. For example, + /// in the regex `^[a-z]+$`, the ASCII bytes `a-z` could all be in the + /// same equivalence class. This leads to a massive space savings. + /// + /// Note though that the alphabet length does _not_ necessarily equal the + /// total stride space taken up by a single DFA state in the transition + /// table. Namely, for performance reasons, the stride is always the + /// smallest power of two that is greater than or equal to the alphabet + /// length. For this reason, [`DFA::stride`] or [`DFA::stride2`] are + /// often more useful. The alphabet length is typically useful only for + /// informational purposes. + pub fn alphabet_len(&self) -> usize { + self.tt.alphabet_len() + } + + /// Returns the total stride for every state in this DFA, expressed as the + /// exponent of a power of 2. The stride is the amount of space each state + /// takes up in the transition table, expressed as a number of transitions. + /// (Unused transitions map to dead states.) + /// + /// The stride of a DFA is always equivalent to the smallest power of 2 + /// that is greater than or equal to the DFA's alphabet length. This + /// definition uses extra space, but permits faster translation between + /// premultiplied state identifiers and contiguous indices (by using shifts + /// instead of relying on integer division). + /// + /// For example, if the DFA's stride is 16 transitions, then its `stride2` + /// is `4` since `2^4 = 16`. + /// + /// The minimum `stride2` value is `1` (corresponding to a stride of `2`) + /// while the maximum `stride2` value is `9` (corresponding to a stride of + /// `512`). The maximum is not `8` since the maximum alphabet size is `257` + /// when accounting for the special EOI transition. However, an alphabet + /// length of that size is exceptionally rare since the alphabet is shrunk + /// into equivalence classes. + pub fn stride2(&self) -> usize { + self.tt.stride2 + } + + /// Returns the total stride for every state in this DFA. This corresponds + /// to the total number of transitions used by each state in this DFA's + /// transition table. + /// + /// Please see [`DFA::stride2`] for more information. In particular, this + /// returns the stride as the number of transitions, where as `stride2` + /// returns it as the exponent of a power of 2. + pub fn stride(&self) -> usize { + self.tt.stride() + } + + /// Returns the memory usage, in bytes, of this DFA. + /// + /// The memory usage is computed based on the number of bytes used to + /// represent this DFA. + /// + /// This does **not** include the stack size used up by this DFA. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + self.tt.memory_usage() + + self.st.memory_usage() + + self.ms.memory_usage() + + self.accels.memory_usage() + } +} + +/// Routines for converting a dense DFA to other representations, such as +/// sparse DFAs or raw bytes suitable for persistent storage. +impl> DFA { + /// Convert this dense DFA to a sparse DFA. + /// + /// If a `StateID` is too small to represent all states in the sparse + /// DFA, then this returns an error. In most cases, if a dense DFA is + /// constructable with `StateID` then a sparse DFA will be as well. + /// However, it is not guaranteed. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense}, HalfMatch, Input}; + /// + /// let dense = dense::DFA::new("foo[0-9]+")?; + /// let sparse = dense.to_sparse()?; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, sparse.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "dfa-build")] + pub fn to_sparse(&self) -> Result>, BuildError> { + sparse::DFA::from_dense(self) + } + + /// Serialize this DFA as raw bytes to a `Vec` in little endian + /// format. Upon success, the `Vec` and the initial padding length are + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// The padding returned is non-zero if the returned `Vec` starts at + /// an address that does not have the same alignment as `u32`. The padding + /// corresponds to the number of leading bytes written to the returned + /// `Vec`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // N.B. We use native endianness here to make the example work, but + /// // using to_bytes_little_endian would work on a little endian target. + /// let (buf, _) = original_dfa.to_bytes_native_endian(); + /// // Even if buf has initial padding, DFA::from_bytes will automatically + /// // ignore it. + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&buf)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "dfa-build")] + pub fn to_bytes_little_endian(&self) -> (Vec, usize) { + self.to_bytes::() + } + + /// Serialize this DFA as raw bytes to a `Vec` in big endian + /// format. Upon success, the `Vec` and the initial padding length are + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// The padding returned is non-zero if the returned `Vec` starts at + /// an address that does not have the same alignment as `u32`. The padding + /// corresponds to the number of leading bytes written to the returned + /// `Vec`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // N.B. We use native endianness here to make the example work, but + /// // using to_bytes_big_endian would work on a big endian target. + /// let (buf, _) = original_dfa.to_bytes_native_endian(); + /// // Even if buf has initial padding, DFA::from_bytes will automatically + /// // ignore it. + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&buf)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "dfa-build")] + pub fn to_bytes_big_endian(&self) -> (Vec, usize) { + self.to_bytes::() + } + + /// Serialize this DFA as raw bytes to a `Vec` in native endian + /// format. Upon success, the `Vec` and the initial padding length are + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// The padding returned is non-zero if the returned `Vec` starts at + /// an address that does not have the same alignment as `u32`. The padding + /// corresponds to the number of leading bytes written to the returned + /// `Vec`. + /// + /// Generally speaking, native endian format should only be used when + /// you know that the target you're compiling the DFA for matches the + /// endianness of the target on which you're compiling DFA. For example, + /// if serialization and deserialization happen in the same process or on + /// the same machine. Otherwise, when serializing a DFA for use in a + /// portable environment, you'll almost certainly want to serialize _both_ + /// a little endian and a big endian version and then load the correct one + /// based on the target's configuration. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// let (buf, _) = original_dfa.to_bytes_native_endian(); + /// // Even if buf has initial padding, DFA::from_bytes will automatically + /// // ignore it. + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&buf)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "dfa-build")] + pub fn to_bytes_native_endian(&self) -> (Vec, usize) { + self.to_bytes::() + } + + /// The implementation of the public `to_bytes` serialization methods, + /// which is generic over endianness. + #[cfg(feature = "dfa-build")] + fn to_bytes(&self) -> (Vec, usize) { + let len = self.write_to_len(); + let (mut buf, padding) = wire::alloc_aligned_buffer::(len); + // This should always succeed since the only possible serialization + // error is providing a buffer that's too small, but we've ensured that + // `buf` is big enough here. + self.as_ref().write_to::(&mut buf[padding..]).unwrap(); + (buf, padding) + } + + /// Serialize this DFA as raw bytes to the given slice, in little endian + /// format. Upon success, the total number of bytes written to `dst` is + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// Note that unlike the various `to_byte_*` routines, this does not write + /// any padding. Callers are responsible for handling alignment correctly. + /// + /// # Errors + /// + /// This returns an error if the given destination slice is not big enough + /// to contain the full serialized DFA. If an error occurs, then nothing + /// is written to `dst`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA without + /// dynamic memory allocation. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // Create a 4KB buffer on the stack to store our serialized DFA. We + /// // need to use a special type to force the alignment of our [u8; N] + /// // array to be aligned to a 4 byte boundary. Otherwise, deserializing + /// // the DFA may fail because of an alignment mismatch. + /// #[repr(C)] + /// struct Aligned { + /// _align: [u32; 0], + /// bytes: B, + /// } + /// let mut buf = Aligned { _align: [], bytes: [0u8; 4 * (1<<10)] }; + /// // N.B. We use native endianness here to make the example work, but + /// // using write_to_little_endian would work on a little endian target. + /// let written = original_dfa.write_to_native_endian(&mut buf.bytes)?; + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&buf.bytes[..written])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn write_to_little_endian( + &self, + dst: &mut [u8], + ) -> Result { + self.as_ref().write_to::(dst) + } + + /// Serialize this DFA as raw bytes to the given slice, in big endian + /// format. Upon success, the total number of bytes written to `dst` is + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// Note that unlike the various `to_byte_*` routines, this does not write + /// any padding. Callers are responsible for handling alignment correctly. + /// + /// # Errors + /// + /// This returns an error if the given destination slice is not big enough + /// to contain the full serialized DFA. If an error occurs, then nothing + /// is written to `dst`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA without + /// dynamic memory allocation. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // Create a 4KB buffer on the stack to store our serialized DFA. We + /// // need to use a special type to force the alignment of our [u8; N] + /// // array to be aligned to a 4 byte boundary. Otherwise, deserializing + /// // the DFA may fail because of an alignment mismatch. + /// #[repr(C)] + /// struct Aligned { + /// _align: [u32; 0], + /// bytes: B, + /// } + /// let mut buf = Aligned { _align: [], bytes: [0u8; 4 * (1<<10)] }; + /// // N.B. We use native endianness here to make the example work, but + /// // using write_to_big_endian would work on a big endian target. + /// let written = original_dfa.write_to_native_endian(&mut buf.bytes)?; + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&buf.bytes[..written])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn write_to_big_endian( + &self, + dst: &mut [u8], + ) -> Result { + self.as_ref().write_to::(dst) + } + + /// Serialize this DFA as raw bytes to the given slice, in native endian + /// format. Upon success, the total number of bytes written to `dst` is + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// Generally speaking, native endian format should only be used when + /// you know that the target you're compiling the DFA for matches the + /// endianness of the target on which you're compiling DFA. For example, + /// if serialization and deserialization happen in the same process or on + /// the same machine. Otherwise, when serializing a DFA for use in a + /// portable environment, you'll almost certainly want to serialize _both_ + /// a little endian and a big endian version and then load the correct one + /// based on the target's configuration. + /// + /// Note that unlike the various `to_byte_*` routines, this does not write + /// any padding. Callers are responsible for handling alignment correctly. + /// + /// # Errors + /// + /// This returns an error if the given destination slice is not big enough + /// to contain the full serialized DFA. If an error occurs, then nothing + /// is written to `dst`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA without + /// dynamic memory allocation. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // Create a 4KB buffer on the stack to store our serialized DFA. We + /// // need to use a special type to force the alignment of our [u8; N] + /// // array to be aligned to a 4 byte boundary. Otherwise, deserializing + /// // the DFA may fail because of an alignment mismatch. + /// #[repr(C)] + /// struct Aligned { + /// _align: [u32; 0], + /// bytes: B, + /// } + /// let mut buf = Aligned { _align: [], bytes: [0u8; 4 * (1<<10)] }; + /// let written = original_dfa.write_to_native_endian(&mut buf.bytes)?; + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&buf.bytes[..written])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn write_to_native_endian( + &self, + dst: &mut [u8], + ) -> Result { + self.as_ref().write_to::(dst) + } + + /// Return the total number of bytes required to serialize this DFA. + /// + /// This is useful for determining the size of the buffer required to pass + /// to one of the serialization routines: + /// + /// * [`DFA::write_to_little_endian`] + /// * [`DFA::write_to_big_endian`] + /// * [`DFA::write_to_native_endian`] + /// + /// Passing a buffer smaller than the size returned by this method will + /// result in a serialization error. Serialization routines are guaranteed + /// to succeed when the buffer is big enough. + /// + /// # Example + /// + /// This example shows how to dynamically allocate enough room to serialize + /// a DFA. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// let mut buf = vec![0; original_dfa.write_to_len()]; + /// // This is guaranteed to succeed, because the only serialization error + /// // that can occur is when the provided buffer is too small. But + /// // write_to_len guarantees a correct size. + /// let written = original_dfa.write_to_native_endian(&mut buf).unwrap(); + /// // But this is not guaranteed to succeed! In particular, + /// // deserialization requires proper alignment for &[u32], but our buffer + /// // was allocated as a &[u8] whose required alignment is smaller than + /// // &[u32]. However, it's likely to work in practice because of how most + /// // allocators work. So if you write code like this, make sure to either + /// // handle the error correctly and/or run it under Miri since Miri will + /// // likely provoke the error by returning Vec buffers with alignment + /// // less than &[u32]. + /// let dfa: DFA<&[u32]> = match DFA::from_bytes(&buf[..written]) { + /// // As mentioned above, it is legal for an error to be returned + /// // here. It is quite difficult to get a Vec with a guaranteed + /// // alignment equivalent to Vec. + /// Err(_) => return Ok(()), + /// Ok((dfa, _)) => dfa, + /// }; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Note that this example isn't actually guaranteed to work! In + /// particular, if `buf` is not aligned to a 4-byte boundary, then the + /// `DFA::from_bytes` call will fail. If you need this to work, then you + /// either need to deal with adding some initial padding yourself, or use + /// one of the `to_bytes` methods, which will do it for you. + pub fn write_to_len(&self) -> usize { + wire::write_label_len(LABEL) + + wire::write_endianness_check_len() + + wire::write_version_len() + + size_of::() // unused, intended for future flexibility + + self.flags.write_to_len() + + self.tt.write_to_len() + + self.st.write_to_len() + + self.ms.write_to_len() + + self.special.write_to_len() + + self.accels.write_to_len() + + self.quitset.write_to_len() + } +} + +impl<'a> DFA<&'a [u32]> { + /// Safely deserialize a DFA with a specific state identifier + /// representation. Upon success, this returns both the deserialized DFA + /// and the number of bytes read from the given slice. Namely, the contents + /// of the slice beyond the DFA are not read. + /// + /// Deserializing a DFA using this routine will never allocate heap memory. + /// For safety purposes, the DFA's transition table will be verified such + /// that every transition points to a valid state. If this verification is + /// too costly, then a [`DFA::from_bytes_unchecked`] API is provided, which + /// will always execute in constant time. + /// + /// The bytes given must be generated by one of the serialization APIs + /// of a `DFA` using a semver compatible release of this crate. Those + /// include: + /// + /// * [`DFA::to_bytes_little_endian`] + /// * [`DFA::to_bytes_big_endian`] + /// * [`DFA::to_bytes_native_endian`] + /// * [`DFA::write_to_little_endian`] + /// * [`DFA::write_to_big_endian`] + /// * [`DFA::write_to_native_endian`] + /// + /// The `to_bytes` methods allocate and return a `Vec` for you, along + /// with handling alignment correctly. The `write_to` methods do not + /// allocate and write to an existing slice (which may be on the stack). + /// Since deserialization always uses the native endianness of the target + /// platform, the serialization API you use should match the endianness of + /// the target platform. (It's often a good idea to generate serialized + /// DFAs for both forms of endianness and then load the correct one based + /// on endianness.) + /// + /// # Errors + /// + /// Generally speaking, it's easier to state the conditions in which an + /// error is _not_ returned. All of the following must be true: + /// + /// * The bytes given must be produced by one of the serialization APIs + /// on this DFA, as mentioned above. + /// * The endianness of the target platform matches the endianness used to + /// serialized the provided DFA. + /// * The slice given must have the same alignment as `u32`. + /// + /// If any of the above are not true, then an error will be returned. + /// + /// # Panics + /// + /// This routine will never panic for any input. + /// + /// # Example + /// + /// This example shows how to serialize a DFA to raw bytes, deserialize it + /// and then use it for searching. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// let initial = DFA::new("foo[0-9]+")?; + /// let (bytes, _) = initial.to_bytes_native_endian(); + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&bytes)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: dealing with alignment and padding + /// + /// In the above example, we used the `to_bytes_native_endian` method to + /// serialize a DFA, but we ignored part of its return value corresponding + /// to padding added to the beginning of the serialized DFA. This is OK + /// because deserialization will skip this initial padding. What matters + /// is that the address immediately following the padding has an alignment + /// that matches `u32`. That is, the following is an equivalent but + /// alternative way to write the above example: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// let initial = DFA::new("foo[0-9]+")?; + /// // Serialization returns the number of leading padding bytes added to + /// // the returned Vec. + /// let (bytes, pad) = initial.to_bytes_native_endian(); + /// let dfa: DFA<&[u32]> = DFA::from_bytes(&bytes[pad..])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// This padding is necessary because Rust's standard library does + /// not expose any safe and robust way of creating a `Vec` with a + /// guaranteed alignment other than 1. Now, in practice, the underlying + /// allocator is likely to provide a `Vec` that meets our alignment + /// requirements, which means `pad` is zero in practice most of the time. + /// + /// The purpose of exposing the padding like this is flexibility for the + /// caller. For example, if one wants to embed a serialized DFA into a + /// compiled program, then it's important to guarantee that it starts at a + /// `u32`-aligned address. The simplest way to do this is to discard the + /// padding bytes and set it up so that the serialized DFA itself begins at + /// a properly aligned address. We can show this in two parts. The first + /// part is serializing the DFA to a file: + /// + /// ```no_run + /// use regex_automata::dfa::dense::DFA; + /// + /// let dfa = DFA::new("foo[0-9]+")?; + /// + /// let (bytes, pad) = dfa.to_bytes_big_endian(); + /// // Write the contents of the DFA *without* the initial padding. + /// std::fs::write("foo.bigendian.dfa", &bytes[pad..])?; + /// + /// // Do it again, but this time for little endian. + /// let (bytes, pad) = dfa.to_bytes_little_endian(); + /// std::fs::write("foo.littleendian.dfa", &bytes[pad..])?; + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And now the second part is embedding the DFA into the compiled program + /// and deserializing it at runtime on first use. We use conditional + /// compilation to choose the correct endianness. + /// + /// ```no_run + /// use regex_automata::{ + /// dfa::{Automaton, dense::DFA}, + /// util::{lazy::Lazy, wire::AlignAs}, + /// HalfMatch, Input, + /// }; + /// + /// // This crate provides its own "lazy" type, kind of like + /// // lazy_static! or once_cell::sync::Lazy. But it works in no-alloc + /// // no-std environments and let's us write this using completely + /// // safe code. + /// static RE: Lazy> = Lazy::new(|| { + /// # const _: &str = stringify! { + /// // This assignment is made possible (implicitly) via the + /// // CoerceUnsized trait. This is what guarantees that our + /// // bytes are stored in memory on a 4 byte boundary. You + /// // *must* do this or something equivalent for correct + /// // deserialization. + /// static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + /// _align: [], + /// #[cfg(target_endian = "big")] + /// bytes: *include_bytes!("foo.bigendian.dfa"), + /// #[cfg(target_endian = "little")] + /// bytes: *include_bytes!("foo.littleendian.dfa"), + /// }; + /// # }; + /// # static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + /// # _align: [], + /// # bytes: [], + /// # }; + /// + /// let (dfa, _) = DFA::from_bytes(&ALIGNED.bytes) + /// .expect("serialized DFA should be valid"); + /// dfa + /// }); + /// + /// let expected = Ok(Some(HalfMatch::must(0, 8))); + /// assert_eq!(expected, RE.try_search_fwd(&Input::new("foo12345"))); + /// ``` + /// + /// An alternative to [`util::lazy::Lazy`](crate::util::lazy::Lazy) + /// is [`lazy_static`](https://crates.io/crates/lazy_static) or + /// [`once_cell`](https://crates.io/crates/once_cell), which provide + /// stronger guarantees (like the initialization function only being + /// executed once). And `once_cell` in particular provides a more + /// expressive API. But a `Lazy` value from this crate is likely just fine + /// in most circumstances. + /// + /// Note that regardless of which initialization method you use, you + /// will still need to use the [`AlignAs`](crate::util::wire::AlignAs) + /// trick above to force correct alignment, but this is safe to do and + /// `from_bytes` will return an error if you get it wrong. + pub fn from_bytes( + slice: &'a [u8], + ) -> Result<(DFA<&'a [u32]>, usize), DeserializeError> { + // SAFETY: This is safe because we validate the transition table, start + // table, match states and accelerators below. If any validation fails, + // then we return an error. + let (dfa, nread) = unsafe { DFA::from_bytes_unchecked(slice)? }; + dfa.tt.validate(&dfa)?; + dfa.st.validate(&dfa)?; + dfa.ms.validate(&dfa)?; + dfa.accels.validate()?; + // N.B. dfa.special doesn't have a way to do unchecked deserialization, + // so it has already been validated. + for state in dfa.states() { + // If the state is an accel state, then it must have a non-empty + // accelerator. + if dfa.is_accel_state(state.id()) { + let index = dfa.accelerator_index(state.id()); + if index >= dfa.accels.len() { + return Err(DeserializeError::generic( + "found DFA state with invalid accelerator index", + )); + } + let needles = dfa.accels.needles(index); + if !(1 <= needles.len() && needles.len() <= 3) { + return Err(DeserializeError::generic( + "accelerator needles has invalid length", + )); + } + } + } + Ok((dfa, nread)) + } + + /// Deserialize a DFA with a specific state identifier representation in + /// constant time by omitting the verification of the validity of the + /// transition table and other data inside the DFA. + /// + /// This is just like [`DFA::from_bytes`], except it can potentially return + /// a DFA that exhibits undefined behavior if its transition table contains + /// invalid state identifiers. + /// + /// This routine is useful if you need to deserialize a DFA cheaply + /// and cannot afford the transition table validation performed by + /// `from_bytes`. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, dense::DFA}, HalfMatch, Input}; + /// + /// let initial = DFA::new("foo[0-9]+")?; + /// let (bytes, _) = initial.to_bytes_native_endian(); + /// // SAFETY: This is guaranteed to be safe since the bytes given come + /// // directly from a compatible serialization routine. + /// let dfa: DFA<&[u32]> = unsafe { DFA::from_bytes_unchecked(&bytes)?.0 }; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub unsafe fn from_bytes_unchecked( + slice: &'a [u8], + ) -> Result<(DFA<&'a [u32]>, usize), DeserializeError> { + let mut nr = 0; + + nr += wire::skip_initial_padding(slice); + wire::check_alignment::(&slice[nr..])?; + nr += wire::read_label(&slice[nr..], LABEL)?; + nr += wire::read_endianness_check(&slice[nr..])?; + nr += wire::read_version(&slice[nr..], VERSION)?; + + let _unused = wire::try_read_u32(&slice[nr..], "unused space")?; + nr += size_of::(); + + let (flags, nread) = Flags::from_bytes(&slice[nr..])?; + nr += nread; + + let (tt, nread) = TransitionTable::from_bytes_unchecked(&slice[nr..])?; + nr += nread; + + let (st, nread) = StartTable::from_bytes_unchecked(&slice[nr..])?; + nr += nread; + + let (ms, nread) = MatchStates::from_bytes_unchecked(&slice[nr..])?; + nr += nread; + + let (special, nread) = Special::from_bytes(&slice[nr..])?; + nr += nread; + special.validate_state_len(tt.len(), tt.stride2)?; + + let (accels, nread) = Accels::from_bytes_unchecked(&slice[nr..])?; + nr += nread; + + let (quitset, nread) = ByteSet::from_bytes(&slice[nr..])?; + nr += nread; + + // Prefilters don't support serialization, so they're always absent. + let pre = None; + Ok((DFA { tt, st, ms, special, accels, pre, quitset, flags }, nr)) + } + + /// The implementation of the public `write_to` serialization methods, + /// which is generic over endianness. + /// + /// This is defined only for &[u32] to reduce binary size/compilation time. + fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("dense DFA")); + } + dst = &mut dst[..nwrite]; + + let mut nw = 0; + nw += wire::write_label(LABEL, &mut dst[nw..])?; + nw += wire::write_endianness_check::(&mut dst[nw..])?; + nw += wire::write_version::(VERSION, &mut dst[nw..])?; + nw += { + // Currently unused, intended for future flexibility + E::write_u32(0, &mut dst[nw..]); + size_of::() + }; + nw += self.flags.write_to::(&mut dst[nw..])?; + nw += self.tt.write_to::(&mut dst[nw..])?; + nw += self.st.write_to::(&mut dst[nw..])?; + nw += self.ms.write_to::(&mut dst[nw..])?; + nw += self.special.write_to::(&mut dst[nw..])?; + nw += self.accels.write_to::(&mut dst[nw..])?; + nw += self.quitset.write_to::(&mut dst[nw..])?; + Ok(nw) + } +} + +// The following methods implement mutable routines on the internal +// representation of a DFA. As such, we must fix the first type parameter to a +// `Vec` since a generic `T: AsRef<[u32]>` does not permit mutation. We +// can get away with this because these methods are internal to the crate and +// are exclusively used during construction of the DFA. +#[cfg(feature = "dfa-build")] +impl OwnedDFA { + /// Add a start state of this DFA. + pub(crate) fn set_start_state( + &mut self, + anchored: Anchored, + start: Start, + id: StateID, + ) { + assert!(self.tt.is_valid(id), "invalid start state"); + self.st.set_start(anchored, start, id); + } + + /// Set the given transition to this DFA. Both the `from` and `to` states + /// must already exist. + pub(crate) fn set_transition( + &mut self, + from: StateID, + byte: alphabet::Unit, + to: StateID, + ) { + self.tt.set(from, byte, to); + } + + /// An an empty state (a state where all transitions lead to a dead state) + /// and return its identifier. The identifier returned is guaranteed to + /// not point to any other existing state. + /// + /// If adding a state would exceed `StateID::LIMIT`, then this returns an + /// error. + pub(crate) fn add_empty_state(&mut self) -> Result { + self.tt.add_empty_state() + } + + /// Swap the two states given in the transition table. + /// + /// This routine does not do anything to check the correctness of this + /// swap. Callers must ensure that other states pointing to id1 and id2 are + /// updated appropriately. + pub(crate) fn swap_states(&mut self, id1: StateID, id2: StateID) { + self.tt.swap(id1, id2); + } + + /// Remap all of the state identifiers in this DFA according to the map + /// function given. This includes all transitions and all starting state + /// identifiers. + pub(crate) fn remap(&mut self, map: impl Fn(StateID) -> StateID) { + // We could loop over each state ID and call 'remap_state' here, but + // this is more direct: just map every transition directly. This + // technically might do a little extra work since the alphabet length + // is likely less than the stride, but if that is indeed an issue we + // should benchmark it and fix it. + for sid in self.tt.table_mut().iter_mut() { + *sid = map(*sid); + } + for sid in self.st.table_mut().iter_mut() { + *sid = map(*sid); + } + } + + /// Remap the transitions for the state given according to the function + /// given. This applies the given map function to every transition in the + /// given state and changes the transition in place to the result of the + /// map function for that transition. + pub(crate) fn remap_state( + &mut self, + id: StateID, + map: impl Fn(StateID) -> StateID, + ) { + self.tt.remap(id, map); + } + + /// Truncate the states in this DFA to the given length. + /// + /// This routine does not do anything to check the correctness of this + /// truncation. Callers must ensure that other states pointing to truncated + /// states are updated appropriately. + pub(crate) fn truncate_states(&mut self, len: usize) { + self.tt.truncate(len); + } + + /// Minimize this DFA in place using Hopcroft's algorithm. + pub(crate) fn minimize(&mut self) { + Minimizer::new(self).run(); + } + + /// Updates the match state pattern ID map to use the one provided. + /// + /// This is useful when it's convenient to manipulate matching states + /// (and their corresponding pattern IDs) as a map. In particular, the + /// representation used by a DFA for this map is not amenable to mutation, + /// so if things need to be changed (like when shuffling states), it's + /// often easier to work with the map form. + pub(crate) fn set_pattern_map( + &mut self, + map: &BTreeMap>, + ) -> Result<(), BuildError> { + self.ms = self.ms.new_with_map(map)?; + Ok(()) + } + + /// Find states that have a small number of non-loop transitions and mark + /// them as candidates for acceleration during search. + pub(crate) fn accelerate(&mut self) { + // dead and quit states can never be accelerated. + if self.state_len() <= 2 { + return; + } + + // Go through every state and record their accelerator, if possible. + let mut accels = BTreeMap::new(); + // Count the number of accelerated match, start and non-match/start + // states. + let (mut cmatch, mut cstart, mut cnormal) = (0, 0, 0); + for state in self.states() { + if let Some(accel) = state.accelerate(self.byte_classes()) { + debug!( + "accelerating full DFA state {}: {:?}", + state.id().as_usize(), + accel, + ); + accels.insert(state.id(), accel); + if self.is_match_state(state.id()) { + cmatch += 1; + } else if self.is_start_state(state.id()) { + cstart += 1; + } else { + assert!(!self.is_dead_state(state.id())); + assert!(!self.is_quit_state(state.id())); + cnormal += 1; + } + } + } + // If no states were able to be accelerated, then we're done. + if accels.is_empty() { + return; + } + let original_accels_len = accels.len(); + + // A remapper keeps track of state ID changes. Once we're done + // shuffling, the remapper is used to rewrite all transitions in the + // DFA based on the new positions of states. + let mut remapper = Remapper::new(self); + + // As we swap states, if they are match states, we need to swap their + // pattern ID lists too (for multi-regexes). We do this by converting + // the lists to an easily swappable map, and then convert back to + // MatchStates once we're done. + let mut new_matches = self.ms.to_map(self); + + // There is at least one state that gets accelerated, so these are + // guaranteed to get set to sensible values below. + self.special.min_accel = StateID::MAX; + self.special.max_accel = StateID::ZERO; + let update_special_accel = + |special: &mut Special, accel_id: StateID| { + special.min_accel = cmp::min(special.min_accel, accel_id); + special.max_accel = cmp::max(special.max_accel, accel_id); + }; + + // Start by shuffling match states. Any match states that are + // accelerated get moved to the end of the match state range. + if cmatch > 0 && self.special.matches() { + // N.B. special.{min,max}_match do not need updating, since the + // range/number of match states does not change. Only the ordering + // of match states may change. + let mut next_id = self.special.max_match; + let mut cur_id = next_id; + while cur_id >= self.special.min_match { + if let Some(accel) = accels.remove(&cur_id) { + accels.insert(next_id, accel); + update_special_accel(&mut self.special, next_id); + + // No need to do any actual swapping for equivalent IDs. + if cur_id != next_id { + remapper.swap(self, cur_id, next_id); + + // Swap pattern IDs for match states. + let cur_pids = new_matches.remove(&cur_id).unwrap(); + let next_pids = new_matches.remove(&next_id).unwrap(); + new_matches.insert(cur_id, next_pids); + new_matches.insert(next_id, cur_pids); + } + next_id = self.tt.prev_state_id(next_id); + } + cur_id = self.tt.prev_state_id(cur_id); + } + } + + // This is where it gets tricky. Without acceleration, start states + // normally come right after match states. But we want accelerated + // states to be a single contiguous range (to make it very fast + // to determine whether a state *is* accelerated), while also keeping + // match and starting states as contiguous ranges for the same reason. + // So what we do here is shuffle states such that it looks like this: + // + // DQMMMMAAAAASSSSSSNNNNNNN + // | | + // |---------| + // accelerated states + // + // Where: + // D - dead state + // Q - quit state + // M - match state (may be accelerated) + // A - normal state that is accelerated + // S - start state (may be accelerated) + // N - normal state that is NOT accelerated + // + // We implement this by shuffling states, which is done by a sequence + // of pairwise swaps. We start by looking at all normal states to be + // accelerated. When we find one, we swap it with the earliest starting + // state, and then swap that with the earliest normal state. This + // preserves the contiguous property. + // + // Once we're done looking for accelerated normal states, now we look + // for accelerated starting states by moving them to the beginning + // of the starting state range (just like we moved accelerated match + // states to the end of the matching state range). + // + // For a more detailed/different perspective on this, see the docs + // in dfa/special.rs. + if cnormal > 0 { + // our next available starting and normal states for swapping. + let mut next_start_id = self.special.min_start; + let mut cur_id = self.to_state_id(self.state_len() - 1); + // This is guaranteed to exist since cnormal > 0. + let mut next_norm_id = + self.tt.next_state_id(self.special.max_start); + while cur_id >= next_norm_id { + if let Some(accel) = accels.remove(&cur_id) { + remapper.swap(self, next_start_id, cur_id); + remapper.swap(self, next_norm_id, cur_id); + // Keep our accelerator map updated with new IDs if the + // states we swapped were also accelerated. + if let Some(accel2) = accels.remove(&next_norm_id) { + accels.insert(cur_id, accel2); + } + if let Some(accel2) = accels.remove(&next_start_id) { + accels.insert(next_norm_id, accel2); + } + accels.insert(next_start_id, accel); + update_special_accel(&mut self.special, next_start_id); + // Our start range shifts one to the right now. + self.special.min_start = + self.tt.next_state_id(self.special.min_start); + self.special.max_start = + self.tt.next_state_id(self.special.max_start); + next_start_id = self.tt.next_state_id(next_start_id); + next_norm_id = self.tt.next_state_id(next_norm_id); + } + // This is pretty tricky, but if our 'next_norm_id' state also + // happened to be accelerated, then the result is that it is + // now in the position of cur_id, so we need to consider it + // again. This loop is still guaranteed to terminate though, + // because when accels contains cur_id, we're guaranteed to + // increment next_norm_id even if cur_id remains unchanged. + if !accels.contains_key(&cur_id) { + cur_id = self.tt.prev_state_id(cur_id); + } + } + } + // Just like we did for match states, but we want to move accelerated + // start states to the beginning of the range instead of the end. + if cstart > 0 { + // N.B. special.{min,max}_start do not need updating, since the + // range/number of start states does not change at this point. Only + // the ordering of start states may change. + let mut next_id = self.special.min_start; + let mut cur_id = next_id; + while cur_id <= self.special.max_start { + if let Some(accel) = accels.remove(&cur_id) { + remapper.swap(self, cur_id, next_id); + accels.insert(next_id, accel); + update_special_accel(&mut self.special, next_id); + next_id = self.tt.next_state_id(next_id); + } + cur_id = self.tt.next_state_id(cur_id); + } + } + + // Remap all transitions in our DFA and assert some things. + remapper.remap(self); + // This unwrap is OK because acceleration never changes the number of + // match states or patterns in those match states. Since acceleration + // runs after the pattern map has been set at least once, we know that + // our match states cannot error. + self.set_pattern_map(&new_matches).unwrap(); + self.special.set_max(); + self.special.validate().expect("special state ranges should validate"); + self.special + .validate_state_len(self.state_len(), self.stride2()) + .expect( + "special state ranges should be consistent with state length", + ); + assert_eq!( + self.special.accel_len(self.stride()), + // We record the number of accelerated states initially detected + // since the accels map is itself mutated in the process above. + // If mutated incorrectly, its size may change, and thus can't be + // trusted as a source of truth of how many accelerated states we + // expected there to be. + original_accels_len, + "mismatch with expected number of accelerated states", + ); + + // And finally record our accelerators. We kept our accels map updated + // as we shuffled states above, so the accelerators should now + // correspond to a contiguous range in the state ID space. (Which we + // assert.) + let mut prev: Option = None; + for (id, accel) in accels { + assert!(prev.map_or(true, |p| self.tt.next_state_id(p) == id)); + prev = Some(id); + self.accels.add(accel); + } + } + + /// Shuffle the states in this DFA so that starting states, match + /// states and accelerated states are all contiguous. + /// + /// See dfa/special.rs for more details. + pub(crate) fn shuffle( + &mut self, + mut matches: BTreeMap>, + ) -> Result<(), BuildError> { + // The determinizer always adds a quit state and it is always second. + self.special.quit_id = self.to_state_id(1); + // If all we have are the dead and quit states, then we're done and + // the DFA will never produce a match. + if self.state_len() <= 2 { + self.special.set_max(); + return Ok(()); + } + + // Collect all our non-DEAD start states into a convenient set and + // confirm there is no overlap with match states. In the classicl DFA + // construction, start states can be match states. But because of + // look-around, we delay all matches by a byte, which prevents start + // states from being match states. + let mut is_start: BTreeSet = BTreeSet::new(); + for (start_id, _, _) in self.starts() { + // If a starting configuration points to a DEAD state, then we + // don't want to shuffle it. The DEAD state is always the first + // state with ID=0. So we can just leave it be. + if start_id == DEAD { + continue; + } + assert!( + !matches.contains_key(&start_id), + "{:?} is both a start and a match state, which is not allowed", + start_id, + ); + is_start.insert(start_id); + } + + // We implement shuffling by a sequence of pairwise swaps of states. + // Since we have a number of things referencing states via their + // IDs and swapping them changes their IDs, we need to record every + // swap we make so that we can remap IDs. The remapper handles this + // book-keeping for us. + let mut remapper = Remapper::new(self); + + // Shuffle matching states. + if matches.is_empty() { + self.special.min_match = DEAD; + self.special.max_match = DEAD; + } else { + // The determinizer guarantees that the first two states are the + // dead and quit states, respectively. We want our match states to + // come right after quit. + let mut next_id = self.to_state_id(2); + let mut new_matches = BTreeMap::new(); + self.special.min_match = next_id; + for (id, pids) in matches { + remapper.swap(self, next_id, id); + new_matches.insert(next_id, pids); + // If we swapped a start state, then update our set. + if is_start.contains(&next_id) { + is_start.remove(&next_id); + is_start.insert(id); + } + next_id = self.tt.next_state_id(next_id); + } + matches = new_matches; + self.special.max_match = cmp::max( + self.special.min_match, + self.tt.prev_state_id(next_id), + ); + } + + // Shuffle starting states. + { + let mut next_id = self.to_state_id(2); + if self.special.matches() { + next_id = self.tt.next_state_id(self.special.max_match); + } + self.special.min_start = next_id; + for id in is_start { + remapper.swap(self, next_id, id); + next_id = self.tt.next_state_id(next_id); + } + self.special.max_start = cmp::max( + self.special.min_start, + self.tt.prev_state_id(next_id), + ); + } + + // Finally remap all transitions in our DFA. + remapper.remap(self); + self.set_pattern_map(&matches)?; + self.special.set_max(); + self.special.validate().expect("special state ranges should validate"); + self.special + .validate_state_len(self.state_len(), self.stride2()) + .expect( + "special state ranges should be consistent with state length", + ); + Ok(()) + } + + /// Checks whether there are universal start states (both anchored and + /// unanchored), and if so, sets the relevant fields to the start state + /// IDs. + /// + /// Universal start states occur precisely when the all patterns in the + /// DFA have no look-around assertions in their prefix. + fn set_universal_starts(&mut self) { + assert_eq!(6, Start::len(), "expected 6 start configurations"); + + let start_id = |dfa: &mut OwnedDFA, + anchored: Anchored, + start: Start| { + // This OK because we only call 'start' under conditions + // in which we know it will succeed. + dfa.st.start(anchored, start).expect("valid Input configuration") + }; + if self.start_kind().has_unanchored() { + let anchor = Anchored::No; + let sid = start_id(self, anchor, Start::NonWordByte); + if sid == start_id(self, anchor, Start::WordByte) + && sid == start_id(self, anchor, Start::Text) + && sid == start_id(self, anchor, Start::LineLF) + && sid == start_id(self, anchor, Start::LineCR) + && sid == start_id(self, anchor, Start::CustomLineTerminator) + { + self.st.universal_start_unanchored = Some(sid); + } + } + if self.start_kind().has_anchored() { + let anchor = Anchored::Yes; + let sid = start_id(self, anchor, Start::NonWordByte); + if sid == start_id(self, anchor, Start::WordByte) + && sid == start_id(self, anchor, Start::Text) + && sid == start_id(self, anchor, Start::LineLF) + && sid == start_id(self, anchor, Start::LineCR) + && sid == start_id(self, anchor, Start::CustomLineTerminator) + { + self.st.universal_start_anchored = Some(sid); + } + } + } +} + +// A variety of generic internal methods for accessing DFA internals. +impl> DFA { + /// Return the info about special states. + pub(crate) fn special(&self) -> &Special { + &self.special + } + + /// Return the info about special states as a mutable borrow. + #[cfg(feature = "dfa-build")] + pub(crate) fn special_mut(&mut self) -> &mut Special { + &mut self.special + } + + /// Returns the quit set (may be empty) used by this DFA. + pub(crate) fn quitset(&self) -> &ByteSet { + &self.quitset + } + + /// Returns the flags for this DFA. + pub(crate) fn flags(&self) -> &Flags { + &self.flags + } + + /// Returns an iterator over all states in this DFA. + /// + /// This iterator yields a tuple for each state. The first element of the + /// tuple corresponds to a state's identifier, and the second element + /// corresponds to the state itself (comprised of its transitions). + pub(crate) fn states(&self) -> StateIter<'_, T> { + self.tt.states() + } + + /// Return the total number of states in this DFA. Every DFA has at least + /// 1 state, even the empty DFA. + pub(crate) fn state_len(&self) -> usize { + self.tt.len() + } + + /// Return an iterator over all pattern IDs for the given match state. + /// + /// If the given state is not a match state, then this panics. + #[cfg(feature = "dfa-build")] + pub(crate) fn pattern_id_slice(&self, id: StateID) -> &[PatternID] { + assert!(self.is_match_state(id)); + self.ms.pattern_id_slice(self.match_state_index(id)) + } + + /// Return the total number of pattern IDs for the given match state. + /// + /// If the given state is not a match state, then this panics. + pub(crate) fn match_pattern_len(&self, id: StateID) -> usize { + assert!(self.is_match_state(id)); + self.ms.pattern_len(self.match_state_index(id)) + } + + /// Returns the total number of patterns matched by this DFA. + pub(crate) fn pattern_len(&self) -> usize { + self.ms.pattern_len + } + + /// Returns a map from match state ID to a list of pattern IDs that match + /// in that state. + #[cfg(feature = "dfa-build")] + pub(crate) fn pattern_map(&self) -> BTreeMap> { + self.ms.to_map(self) + } + + /// Returns the ID of the quit state for this DFA. + #[cfg(feature = "dfa-build")] + pub(crate) fn quit_id(&self) -> StateID { + self.to_state_id(1) + } + + /// Convert the given state identifier to the state's index. The state's + /// index corresponds to the position in which it appears in the transition + /// table. When a DFA is NOT premultiplied, then a state's identifier is + /// also its index. When a DFA is premultiplied, then a state's identifier + /// is equal to `index * alphabet_len`. This routine reverses that. + pub(crate) fn to_index(&self, id: StateID) -> usize { + self.tt.to_index(id) + } + + /// Convert an index to a state (in the range 0..self.state_len()) to an + /// actual state identifier. + /// + /// This is useful when using a `Vec` as an efficient map keyed by state + /// to some other information (such as a remapped state ID). + #[cfg(feature = "dfa-build")] + pub(crate) fn to_state_id(&self, index: usize) -> StateID { + self.tt.to_state_id(index) + } + + /// Return the table of state IDs for this DFA's start states. + pub(crate) fn starts(&self) -> StartStateIter<'_> { + self.st.iter() + } + + /// Returns the index of the match state for the given ID. If the + /// given ID does not correspond to a match state, then this may + /// panic or produce an incorrect result. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn match_state_index(&self, id: StateID) -> usize { + debug_assert!(self.is_match_state(id)); + // This is one of the places where we rely on the fact that match + // states are contiguous in the transition table. Namely, that the + // first match state ID always corresponds to dfa.special.min_match. + // From there, since we know the stride, we can compute the overall + // index of any match state given the match state's ID. + let min = self.special().min_match.as_usize(); + // CORRECTNESS: We're allowed to produce an incorrect result or panic, + // so both the subtraction and the unchecked StateID construction is + // OK. + self.to_index(StateID::new_unchecked(id.as_usize() - min)) + } + + /// Returns the index of the accelerator state for the given ID. If the + /// given ID does not correspond to an accelerator state, then this may + /// panic or produce an incorrect result. + fn accelerator_index(&self, id: StateID) -> usize { + let min = self.special().min_accel.as_usize(); + // CORRECTNESS: We're allowed to produce an incorrect result or panic, + // so both the subtraction and the unchecked StateID construction is + // OK. + self.to_index(StateID::new_unchecked(id.as_usize() - min)) + } + + /// Return the accelerators for this DFA. + fn accels(&self) -> Accels<&[u32]> { + self.accels.as_ref() + } + + /// Return this DFA's transition table as a slice. + fn trans(&self) -> &[StateID] { + self.tt.table() + } +} + +impl> fmt::Debug for DFA { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "dense::DFA(")?; + for state in self.states() { + fmt_state_indicator(f, self, state.id())?; + let id = if f.alternate() { + state.id().as_usize() + } else { + self.to_index(state.id()) + }; + write!(f, "{:06?}: ", id)?; + state.fmt(f)?; + write!(f, "\n")?; + } + writeln!(f, "")?; + for (i, (start_id, anchored, sty)) in self.starts().enumerate() { + let id = if f.alternate() { + start_id.as_usize() + } else { + self.to_index(start_id) + }; + if i % self.st.stride == 0 { + match anchored { + Anchored::No => writeln!(f, "START-GROUP(unanchored)")?, + Anchored::Yes => writeln!(f, "START-GROUP(anchored)")?, + Anchored::Pattern(pid) => { + writeln!(f, "START_GROUP(pattern: {:?})", pid)? + } + } + } + writeln!(f, " {:?} => {:06?}", sty, id)?; + } + if self.pattern_len() > 1 { + writeln!(f, "")?; + for i in 0..self.ms.len() { + let id = self.ms.match_state_id(self, i); + let id = if f.alternate() { + id.as_usize() + } else { + self.to_index(id) + }; + write!(f, "MATCH({:06?}): ", id)?; + for (i, &pid) in self.ms.pattern_id_slice(i).iter().enumerate() + { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{:?}", pid)?; + } + writeln!(f, "")?; + } + } + writeln!(f, "state length: {:?}", self.state_len())?; + writeln!(f, "pattern length: {:?}", self.pattern_len())?; + writeln!(f, "flags: {:?}", self.flags)?; + writeln!(f, ")")?; + Ok(()) + } +} + +// SAFETY: We assert that our implementation of each method is correct. +unsafe impl> Automaton for DFA { + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_special_state(&self, id: StateID) -> bool { + self.special.is_special_state(id) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_dead_state(&self, id: StateID) -> bool { + self.special.is_dead_state(id) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_quit_state(&self, id: StateID) -> bool { + self.special.is_quit_state(id) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_match_state(&self, id: StateID) -> bool { + self.special.is_match_state(id) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_start_state(&self, id: StateID) -> bool { + self.special.is_start_state(id) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_accel_state(&self, id: StateID) -> bool { + self.special.is_accel_state(id) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn next_state(&self, current: StateID, input: u8) -> StateID { + let input = self.byte_classes().get(input); + let o = current.as_usize() + usize::from(input); + self.trans()[o] + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + unsafe fn next_state_unchecked( + &self, + current: StateID, + byte: u8, + ) -> StateID { + // We don't (or shouldn't) need an unchecked variant for the byte + // class mapping, since bound checks should be omitted automatically + // by virtue of its representation. If this ends up not being true as + // confirmed by codegen, please file an issue. ---AG + let class = self.byte_classes().get(byte); + let o = current.as_usize() + usize::from(class); + let next = *self.trans().get_unchecked(o); + next + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn next_eoi_state(&self, current: StateID) -> StateID { + let eoi = self.byte_classes().eoi().as_usize(); + let o = current.as_usize() + eoi; + self.trans()[o] + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn pattern_len(&self) -> usize { + self.ms.pattern_len + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn match_len(&self, id: StateID) -> usize { + self.match_pattern_len(id) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn match_pattern(&self, id: StateID, match_index: usize) -> PatternID { + // This is an optimization for the very common case of a DFA with a + // single pattern. This conditional avoids a somewhat more costly path + // that finds the pattern ID from the state machine, which requires + // a bit of slicing/pointer-chasing. This optimization tends to only + // matter when matches are frequent. + if self.ms.pattern_len == 1 { + return PatternID::ZERO; + } + let state_index = self.match_state_index(id); + self.ms.pattern_id(state_index, match_index) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn has_empty(&self) -> bool { + self.flags.has_empty + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_utf8(&self) -> bool { + self.flags.is_utf8 + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_always_start_anchored(&self) -> bool { + self.flags.is_always_start_anchored + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn start_state( + &self, + config: &start::Config, + ) -> Result { + let anchored = config.get_anchored(); + let start = match config.get_look_behind() { + None => Start::Text, + Some(byte) => { + if !self.quitset.is_empty() && self.quitset.contains(byte) { + return Err(StartError::quit(byte)); + } + self.st.start_map.get(byte) + } + }; + self.st.start(anchored, start) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn universal_start_state(&self, mode: Anchored) -> Option { + match mode { + Anchored::No => self.st.universal_start_unanchored, + Anchored::Yes => self.st.universal_start_anchored, + Anchored::Pattern(_) => None, + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn accelerator(&self, id: StateID) -> &[u8] { + if !self.is_accel_state(id) { + return &[]; + } + self.accels.needles(self.accelerator_index(id)) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn get_prefilter(&self) -> Option<&Prefilter> { + self.pre.as_ref() + } +} + +/// The transition table portion of a dense DFA. +/// +/// The transition table is the core part of the DFA in that it describes how +/// to move from one state to another based on the input sequence observed. +#[derive(Clone)] +pub(crate) struct TransitionTable { + /// A contiguous region of memory representing the transition table in + /// row-major order. The representation is dense. That is, every state + /// has precisely the same number of transitions. The maximum number of + /// transitions per state is 257 (256 for each possible byte value, plus 1 + /// for the special EOI transition). If a DFA has been instructed to use + /// byte classes (the default), then the number of transitions is usually + /// substantially fewer. + /// + /// In practice, T is either `Vec` or `&[u32]`. + table: T, + /// A set of equivalence classes, where a single equivalence class + /// represents a set of bytes that never discriminate between a match + /// and a non-match in the DFA. Each equivalence class corresponds to a + /// single character in this DFA's alphabet, where the maximum number of + /// characters is 257 (each possible value of a byte plus the special + /// EOI transition). Consequently, the number of equivalence classes + /// corresponds to the number of transitions for each DFA state. Note + /// though that the *space* used by each DFA state in the transition table + /// may be larger. The total space used by each DFA state is known as the + /// stride. + /// + /// The only time the number of equivalence classes is fewer than 257 is if + /// the DFA's kind uses byte classes (which is the default). Equivalence + /// classes should generally only be disabled when debugging, so that + /// the transitions themselves aren't obscured. Disabling them has no + /// other benefit, since the equivalence class map is always used while + /// searching. In the vast majority of cases, the number of equivalence + /// classes is substantially smaller than 257, particularly when large + /// Unicode classes aren't used. + classes: ByteClasses, + /// The stride of each DFA state, expressed as a power-of-two exponent. + /// + /// The stride of a DFA corresponds to the total amount of space used by + /// each DFA state in the transition table. This may be bigger than the + /// size of a DFA's alphabet, since the stride is always the smallest + /// power of two greater than or equal to the alphabet size. + /// + /// While this wastes space, this avoids the need for integer division + /// to convert between premultiplied state IDs and their corresponding + /// indices. Instead, we can use simple bit-shifts. + /// + /// See the docs for the `stride2` method for more details. + /// + /// The minimum `stride2` value is `1` (corresponding to a stride of `2`) + /// while the maximum `stride2` value is `9` (corresponding to a stride of + /// `512`). The maximum is not `8` since the maximum alphabet size is `257` + /// when accounting for the special EOI transition. However, an alphabet + /// length of that size is exceptionally rare since the alphabet is shrunk + /// into equivalence classes. + stride2: usize, +} + +impl<'a> TransitionTable<&'a [u32]> { + /// Deserialize a transition table starting at the beginning of `slice`. + /// Upon success, return the total number of bytes read along with the + /// transition table. + /// + /// If there was a problem deserializing any part of the transition table, + /// then this returns an error. Notably, if the given slice does not have + /// the same alignment as `StateID`, then this will return an error (among + /// other possible errors). + /// + /// This is guaranteed to execute in constant time. + /// + /// # Safety + /// + /// This routine is not safe because it does not check the validity of the + /// transition table itself. In particular, the transition table can be + /// quite large, so checking its validity can be somewhat expensive. An + /// invalid transition table is not safe because other code may rely on the + /// transition table being correct (such as explicit bounds check elision). + /// Therefore, an invalid transition table can lead to undefined behavior. + /// + /// Callers that use this function must either pass on the safety invariant + /// or guarantee that the bytes given contain a valid transition table. + /// This guarantee is upheld by the bytes written by `write_to`. + unsafe fn from_bytes_unchecked( + mut slice: &'a [u8], + ) -> Result<(TransitionTable<&'a [u32]>, usize), DeserializeError> { + let slice_start = slice.as_ptr().as_usize(); + + let (state_len, nr) = + wire::try_read_u32_as_usize(slice, "state length")?; + slice = &slice[nr..]; + + let (stride2, nr) = wire::try_read_u32_as_usize(slice, "stride2")?; + slice = &slice[nr..]; + + let (classes, nr) = ByteClasses::from_bytes(slice)?; + slice = &slice[nr..]; + + // The alphabet length (determined by the byte class map) cannot be + // bigger than the stride (total space used by each DFA state). + if stride2 > 9 { + return Err(DeserializeError::generic( + "dense DFA has invalid stride2 (too big)", + )); + } + // It also cannot be zero, since even a DFA that never matches anything + // has a non-zero number of states with at least two equivalence + // classes: one for all 256 byte values and another for the EOI + // sentinel. + if stride2 < 1 { + return Err(DeserializeError::generic( + "dense DFA has invalid stride2 (too small)", + )); + } + // This is OK since 1 <= stride2 <= 9. + let stride = + 1usize.checked_shl(u32::try_from(stride2).unwrap()).unwrap(); + if classes.alphabet_len() > stride { + return Err(DeserializeError::generic( + "alphabet size cannot be bigger than transition table stride", + )); + } + + let trans_len = + wire::shl(state_len, stride2, "dense table transition length")?; + let table_bytes_len = wire::mul( + trans_len, + StateID::SIZE, + "dense table state byte length", + )?; + wire::check_slice_len(slice, table_bytes_len, "transition table")?; + wire::check_alignment::(slice)?; + let table_bytes = &slice[..table_bytes_len]; + slice = &slice[table_bytes_len..]; + // SAFETY: Since StateID is always representable as a u32, all we need + // to do is ensure that we have the proper length and alignment. We've + // checked both above, so the cast below is safe. + // + // N.B. This is the only not-safe code in this function. + let table = core::slice::from_raw_parts( + table_bytes.as_ptr().cast::(), + trans_len, + ); + let tt = TransitionTable { table, classes, stride2 }; + Ok((tt, slice.as_ptr().as_usize() - slice_start)) + } +} + +#[cfg(feature = "dfa-build")] +impl TransitionTable> { + /// Create a minimal transition table with just two states: a dead state + /// and a quit state. The alphabet length and stride of the transition + /// table is determined by the given set of equivalence classes. + fn minimal(classes: ByteClasses) -> TransitionTable> { + let mut tt = TransitionTable { + table: vec![], + classes, + stride2: classes.stride2(), + }; + // Two states, regardless of alphabet size, can always fit into u32. + tt.add_empty_state().unwrap(); // dead state + tt.add_empty_state().unwrap(); // quit state + tt + } + + /// Set a transition in this table. Both the `from` and `to` states must + /// already exist, otherwise this panics. `unit` should correspond to the + /// transition out of `from` to set to `to`. + fn set(&mut self, from: StateID, unit: alphabet::Unit, to: StateID) { + assert!(self.is_valid(from), "invalid 'from' state"); + assert!(self.is_valid(to), "invalid 'to' state"); + self.table[from.as_usize() + self.classes.get_by_unit(unit)] = + to.as_u32(); + } + + /// Add an empty state (a state where all transitions lead to a dead state) + /// and return its identifier. The identifier returned is guaranteed to + /// not point to any other existing state. + /// + /// If adding a state would exhaust the state identifier space, then this + /// returns an error. + fn add_empty_state(&mut self) -> Result { + // Normally, to get a fresh state identifier, we would just + // take the index of the next state added to the transition + // table. However, we actually perform an optimization here + // that premultiplies state IDs by the stride, such that they + // point immediately at the beginning of their transitions in + // the transition table. This avoids an extra multiplication + // instruction for state lookup at search time. + // + // Premultiplied identifiers means that instead of your matching + // loop looking something like this: + // + // state = dfa.start + // for byte in haystack: + // next = dfa.transitions[state * stride + byte] + // if dfa.is_match(next): + // return true + // return false + // + // it can instead look like this: + // + // state = dfa.start + // for byte in haystack: + // next = dfa.transitions[state + byte] + // if dfa.is_match(next): + // return true + // return false + // + // In other words, we save a multiplication instruction in the + // critical path. This turns out to be a decent performance win. + // The cost of using premultiplied state ids is that they can + // require a bigger state id representation. (And they also make + // the code a bit more complex, especially during minimization and + // when reshuffling states, as one needs to convert back and forth + // between state IDs and state indices.) + // + // To do this, we simply take the index of the state into the + // entire transition table, rather than the index of the state + // itself. e.g., If the stride is 64, then the ID of the 3rd state + // is 192, not 2. + let next = self.table.len(); + let id = + StateID::new(next).map_err(|_| BuildError::too_many_states())?; + self.table.extend(iter::repeat(0).take(self.stride())); + Ok(id) + } + + /// Swap the two states given in this transition table. + /// + /// This routine does not do anything to check the correctness of this + /// swap. Callers must ensure that other states pointing to id1 and id2 are + /// updated appropriately. + /// + /// Both id1 and id2 must point to valid states, otherwise this panics. + fn swap(&mut self, id1: StateID, id2: StateID) { + assert!(self.is_valid(id1), "invalid 'id1' state: {:?}", id1); + assert!(self.is_valid(id2), "invalid 'id2' state: {:?}", id2); + // We only need to swap the parts of the state that are used. So if the + // stride is 64, but the alphabet length is only 33, then we save a lot + // of work. + for b in 0..self.classes.alphabet_len() { + self.table.swap(id1.as_usize() + b, id2.as_usize() + b); + } + } + + /// Remap the transitions for the state given according to the function + /// given. This applies the given map function to every transition in the + /// given state and changes the transition in place to the result of the + /// map function for that transition. + fn remap(&mut self, id: StateID, map: impl Fn(StateID) -> StateID) { + for byte in 0..self.alphabet_len() { + let i = id.as_usize() + byte; + let next = self.table()[i]; + self.table_mut()[id.as_usize() + byte] = map(next); + } + } + + /// Truncate the states in this transition table to the given length. + /// + /// This routine does not do anything to check the correctness of this + /// truncation. Callers must ensure that other states pointing to truncated + /// states are updated appropriately. + fn truncate(&mut self, len: usize) { + self.table.truncate(len << self.stride2); + } +} + +impl> TransitionTable { + /// Writes a serialized form of this transition table to the buffer given. + /// If the buffer is too small, then an error is returned. To determine + /// how big the buffer must be, use `write_to_len`. + fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("transition table")); + } + dst = &mut dst[..nwrite]; + + // write state length + // Unwrap is OK since number of states is guaranteed to fit in a u32. + E::write_u32(u32::try_from(self.len()).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write state stride (as power of 2) + // Unwrap is OK since stride2 is guaranteed to be <= 9. + E::write_u32(u32::try_from(self.stride2).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write byte class map + let n = self.classes.write_to(dst)?; + dst = &mut dst[n..]; + + // write actual transitions + for &sid in self.table() { + let n = wire::write_state_id::(sid, &mut dst); + dst = &mut dst[n..]; + } + Ok(nwrite) + } + + /// Returns the number of bytes the serialized form of this transition + /// table will use. + fn write_to_len(&self) -> usize { + size_of::() // state length + + size_of::() // stride2 + + self.classes.write_to_len() + + (self.table().len() * StateID::SIZE) + } + + /// Validates that every state ID in this transition table is valid. + /// + /// That is, every state ID can be used to correctly index a state in this + /// table. + fn validate(&self, dfa: &DFA) -> Result<(), DeserializeError> { + let sp = &dfa.special; + for state in self.states() { + // We check that the ID itself is well formed. That is, if it's + // a special state then it must actually be a quit, dead, accel, + // match or start state. + if sp.is_special_state(state.id()) { + let is_actually_special = sp.is_dead_state(state.id()) + || sp.is_quit_state(state.id()) + || sp.is_match_state(state.id()) + || sp.is_start_state(state.id()) + || sp.is_accel_state(state.id()); + if !is_actually_special { + // This is kind of a cryptic error message... + return Err(DeserializeError::generic( + "found dense state tagged as special but \ + wasn't actually special", + )); + } + if sp.is_match_state(state.id()) + && dfa.match_len(state.id()) == 0 + { + return Err(DeserializeError::generic( + "found match state with zero pattern IDs", + )); + } + } + for (_, to) in state.transitions() { + if !self.is_valid(to) { + return Err(DeserializeError::generic( + "found invalid state ID in transition table", + )); + } + } + } + Ok(()) + } + + /// Converts this transition table to a borrowed value. + fn as_ref(&self) -> TransitionTable<&'_ [u32]> { + TransitionTable { + table: self.table.as_ref(), + classes: self.classes.clone(), + stride2: self.stride2, + } + } + + /// Converts this transition table to an owned value. + #[cfg(feature = "alloc")] + fn to_owned(&self) -> TransitionTable> { + TransitionTable { + table: self.table.as_ref().to_vec(), + classes: self.classes.clone(), + stride2: self.stride2, + } + } + + /// Return the state for the given ID. If the given ID is not valid, then + /// this panics. + fn state(&self, id: StateID) -> State<'_> { + assert!(self.is_valid(id)); + + let i = id.as_usize(); + State { + id, + stride2: self.stride2, + transitions: &self.table()[i..i + self.alphabet_len()], + } + } + + /// Returns an iterator over all states in this transition table. + /// + /// This iterator yields a tuple for each state. The first element of the + /// tuple corresponds to a state's identifier, and the second element + /// corresponds to the state itself (comprised of its transitions). + fn states(&self) -> StateIter<'_, T> { + StateIter { + tt: self, + it: self.table().chunks(self.stride()).enumerate(), + } + } + + /// Convert a state identifier to an index to a state (in the range + /// 0..self.len()). + /// + /// This is useful when using a `Vec` as an efficient map keyed by state + /// to some other information (such as a remapped state ID). + /// + /// If the given ID is not valid, then this may panic or produce an + /// incorrect index. + fn to_index(&self, id: StateID) -> usize { + id.as_usize() >> self.stride2 + } + + /// Convert an index to a state (in the range 0..self.len()) to an actual + /// state identifier. + /// + /// This is useful when using a `Vec` as an efficient map keyed by state + /// to some other information (such as a remapped state ID). + /// + /// If the given index is not in the specified range, then this may panic + /// or produce an incorrect state ID. + fn to_state_id(&self, index: usize) -> StateID { + // CORRECTNESS: If the given index is not valid, then it is not + // required for this to panic or return a valid state ID. + StateID::new_unchecked(index << self.stride2) + } + + /// Returns the state ID for the state immediately following the one given. + /// + /// This does not check whether the state ID returned is invalid. In fact, + /// if the state ID given is the last state in this DFA, then the state ID + /// returned is guaranteed to be invalid. + #[cfg(feature = "dfa-build")] + fn next_state_id(&self, id: StateID) -> StateID { + self.to_state_id(self.to_index(id).checked_add(1).unwrap()) + } + + /// Returns the state ID for the state immediately preceding the one given. + /// + /// If the dead ID given (which is zero), then this panics. + #[cfg(feature = "dfa-build")] + fn prev_state_id(&self, id: StateID) -> StateID { + self.to_state_id(self.to_index(id).checked_sub(1).unwrap()) + } + + /// Returns the table as a slice of state IDs. + fn table(&self) -> &[StateID] { + wire::u32s_to_state_ids(self.table.as_ref()) + } + + /// Returns the total number of states in this transition table. + /// + /// Note that a DFA always has at least two states: the dead and quit + /// states. In particular, the dead state always has ID 0 and is + /// correspondingly always the first state. The dead state is never a match + /// state. + fn len(&self) -> usize { + self.table().len() >> self.stride2 + } + + /// Returns the total stride for every state in this DFA. This corresponds + /// to the total number of transitions used by each state in this DFA's + /// transition table. + fn stride(&self) -> usize { + 1 << self.stride2 + } + + /// Returns the total number of elements in the alphabet for this + /// transition table. This is always less than or equal to `self.stride()`. + /// It is only equal when the alphabet length is a power of 2. Otherwise, + /// it is always strictly less. + fn alphabet_len(&self) -> usize { + self.classes.alphabet_len() + } + + /// Returns true if and only if the given state ID is valid for this + /// transition table. Validity in this context means that the given ID can + /// be used as a valid offset with `self.stride()` to index this transition + /// table. + fn is_valid(&self, id: StateID) -> bool { + let id = id.as_usize(); + id < self.table().len() && id % self.stride() == 0 + } + + /// Return the memory usage, in bytes, of this transition table. + /// + /// This does not include the size of a `TransitionTable` value itself. + fn memory_usage(&self) -> usize { + self.table().len() * StateID::SIZE + } +} + +#[cfg(feature = "dfa-build")] +impl> TransitionTable { + /// Returns the table as a slice of state IDs. + fn table_mut(&mut self) -> &mut [StateID] { + wire::u32s_to_state_ids_mut(self.table.as_mut()) + } +} + +/// The set of all possible starting states in a DFA. +/// +/// The set of starting states corresponds to the possible choices one can make +/// in terms of starting a DFA. That is, before following the first transition, +/// you first need to select the state that you start in. +/// +/// Normally, a DFA converted from an NFA that has a single starting state +/// would itself just have one starting state. However, our support for look +/// around generally requires more starting states. The correct starting state +/// is chosen based on certain properties of the position at which we begin +/// our search. +/// +/// Before listing those properties, we first must define two terms: +/// +/// * `haystack` - The bytes to execute the search. The search always starts +/// at the beginning of `haystack` and ends before or at the end of +/// `haystack`. +/// * `context` - The (possibly empty) bytes surrounding `haystack`. `haystack` +/// must be contained within `context` such that `context` is at least as big +/// as `haystack`. +/// +/// This split is crucial for dealing with look-around. For example, consider +/// the context `foobarbaz`, the haystack `bar` and the regex `^bar$`. This +/// regex should _not_ match the haystack since `bar` does not appear at the +/// beginning of the input. Similarly, the regex `\Bbar\B` should match the +/// haystack because `bar` is not surrounded by word boundaries. But a search +/// that does not take context into account would not permit `\B` to match +/// since the beginning of any string matches a word boundary. Similarly, a +/// search that does not take context into account when searching `^bar$` in +/// the haystack `bar` would produce a match when it shouldn't. +/// +/// Thus, it follows that the starting state is chosen based on the following +/// criteria, derived from the position at which the search starts in the +/// `context` (corresponding to the start of `haystack`): +/// +/// 1. If the search starts at the beginning of `context`, then the `Text` +/// start state is used. (Since `^` corresponds to +/// `hir::Anchor::Start`.) +/// 2. If the search starts at a position immediately following a line +/// terminator, then the `Line` start state is used. (Since `(?m:^)` +/// corresponds to `hir::Anchor::StartLF`.) +/// 3. If the search starts at a position immediately following a byte +/// classified as a "word" character (`[_0-9a-zA-Z]`), then the `WordByte` +/// start state is used. (Since `(?-u:\b)` corresponds to a word boundary.) +/// 4. Otherwise, if the search starts at a position immediately following +/// a byte that is not classified as a "word" character (`[^_0-9a-zA-Z]`), +/// then the `NonWordByte` start state is used. (Since `(?-u:\B)` +/// corresponds to a not-word-boundary.) +/// +/// (N.B. Unicode word boundaries are not supported by the DFA because they +/// require multi-byte look-around and this is difficult to support in a DFA.) +/// +/// To further complicate things, we also support constructing individual +/// anchored start states for each pattern in the DFA. (Which is required to +/// implement overlapping regexes correctly, but is also generally useful.) +/// Thus, when individual start states for each pattern are enabled, then the +/// total number of start states represented is `4 + (4 * #patterns)`, where +/// the 4 comes from each of the 4 possibilities above. The first 4 represents +/// the starting states for the entire DFA, which support searching for +/// multiple patterns simultaneously (possibly unanchored). +/// +/// If individual start states are disabled, then this will only store 4 +/// start states. Typically, individual start states are only enabled when +/// constructing the reverse DFA for regex matching. But they are also useful +/// for building DFAs that can search for a specific pattern or even to support +/// both anchored and unanchored searches with the same DFA. +/// +/// Note though that while the start table always has either `4` or +/// `4 + (4 * #patterns)` starting state *ids*, the total number of states +/// might be considerably smaller. That is, many of the IDs may be duplicative. +/// (For example, if a regex doesn't have a `\b` sub-pattern, then there's no +/// reason to generate a unique starting state for handling word boundaries. +/// Similarly for start/end anchors.) +#[derive(Clone)] +pub(crate) struct StartTable { + /// The initial start state IDs. + /// + /// In practice, T is either `Vec` or `&[u32]`. + /// + /// The first `2 * stride` (currently always 8) entries always correspond + /// to the starts states for the entire DFA, with the first 4 entries being + /// for unanchored searches and the second 4 entries being for anchored + /// searches. To keep things simple, we always use 8 entries even if the + /// `StartKind` is not both. + /// + /// After that, there are `stride * patterns` state IDs, where `patterns` + /// may be zero in the case of a DFA with no patterns or in the case where + /// the DFA was built without enabling starting states for each pattern. + table: T, + /// The starting state configuration supported. When 'both', both + /// unanchored and anchored searches work. When 'unanchored', anchored + /// searches panic. When 'anchored', unanchored searches panic. + kind: StartKind, + /// The start state configuration for every possible byte. + start_map: StartByteMap, + /// The number of starting state IDs per pattern. + stride: usize, + /// The total number of patterns for which starting states are encoded. + /// This is `None` for DFAs that were built without start states for each + /// pattern. Thus, one cannot use this field to say how many patterns + /// are in the DFA in all cases. It is specific to how many patterns are + /// represented in this start table. + pattern_len: Option, + /// The universal starting state for unanchored searches. This is only + /// present when the DFA supports unanchored searches and when all starting + /// state IDs for an unanchored search are equivalent. + universal_start_unanchored: Option, + /// The universal starting state for anchored searches. This is only + /// present when the DFA supports anchored searches and when all starting + /// state IDs for an anchored search are equivalent. + universal_start_anchored: Option, +} + +#[cfg(feature = "dfa-build")] +impl StartTable> { + /// Create a valid set of start states all pointing to the dead state. + /// + /// When the corresponding DFA is constructed with start states for each + /// pattern, then `patterns` should be the number of patterns. Otherwise, + /// it should be zero. + /// + /// If the total table size could exceed the allocatable limit, then this + /// returns an error. In practice, this is unlikely to be able to occur, + /// since it's likely that allocation would have failed long before it got + /// to this point. + fn dead( + kind: StartKind, + lookm: &LookMatcher, + pattern_len: Option, + ) -> Result>, BuildError> { + if let Some(len) = pattern_len { + assert!(len <= PatternID::LIMIT); + } + let stride = Start::len(); + // OK because 2*4 is never going to overflow anything. + let starts_len = stride.checked_mul(2).unwrap(); + let pattern_starts_len = + match stride.checked_mul(pattern_len.unwrap_or(0)) { + Some(x) => x, + None => return Err(BuildError::too_many_start_states()), + }; + let table_len = match starts_len.checked_add(pattern_starts_len) { + Some(x) => x, + None => return Err(BuildError::too_many_start_states()), + }; + if let Err(_) = isize::try_from(table_len) { + return Err(BuildError::too_many_start_states()); + } + let table = vec![DEAD.as_u32(); table_len]; + let start_map = StartByteMap::new(lookm); + Ok(StartTable { + table, + kind, + start_map, + stride, + pattern_len, + universal_start_unanchored: None, + universal_start_anchored: None, + }) + } +} + +impl<'a> StartTable<&'a [u32]> { + /// Deserialize a table of start state IDs starting at the beginning of + /// `slice`. Upon success, return the total number of bytes read along with + /// the table of starting state IDs. + /// + /// If there was a problem deserializing any part of the starting IDs, + /// then this returns an error. Notably, if the given slice does not have + /// the same alignment as `StateID`, then this will return an error (among + /// other possible errors). + /// + /// This is guaranteed to execute in constant time. + /// + /// # Safety + /// + /// This routine is not safe because it does not check the validity of the + /// starting state IDs themselves. In particular, the number of starting + /// IDs can be of variable length, so it's possible that checking their + /// validity cannot be done in constant time. An invalid starting state + /// ID is not safe because other code may rely on the starting IDs being + /// correct (such as explicit bounds check elision). Therefore, an invalid + /// start ID can lead to undefined behavior. + /// + /// Callers that use this function must either pass on the safety invariant + /// or guarantee that the bytes given contain valid starting state IDs. + /// This guarantee is upheld by the bytes written by `write_to`. + unsafe fn from_bytes_unchecked( + mut slice: &'a [u8], + ) -> Result<(StartTable<&'a [u32]>, usize), DeserializeError> { + let slice_start = slice.as_ptr().as_usize(); + + let (kind, nr) = StartKind::from_bytes(slice)?; + slice = &slice[nr..]; + + let (start_map, nr) = StartByteMap::from_bytes(slice)?; + slice = &slice[nr..]; + + let (stride, nr) = + wire::try_read_u32_as_usize(slice, "start table stride")?; + slice = &slice[nr..]; + if stride != Start::len() { + return Err(DeserializeError::generic( + "invalid starting table stride", + )); + } + + let (maybe_pattern_len, nr) = + wire::try_read_u32_as_usize(slice, "start table patterns")?; + slice = &slice[nr..]; + let pattern_len = if maybe_pattern_len.as_u32() == u32::MAX { + None + } else { + Some(maybe_pattern_len) + }; + if pattern_len.map_or(false, |len| len > PatternID::LIMIT) { + return Err(DeserializeError::generic( + "invalid number of patterns", + )); + } + + let (universal_unanchored, nr) = + wire::try_read_u32(slice, "universal unanchored start")?; + slice = &slice[nr..]; + let universal_start_unanchored = if universal_unanchored == u32::MAX { + None + } else { + Some(StateID::try_from(universal_unanchored).map_err(|e| { + DeserializeError::state_id_error( + e, + "universal unanchored start", + ) + })?) + }; + + let (universal_anchored, nr) = + wire::try_read_u32(slice, "universal anchored start")?; + slice = &slice[nr..]; + let universal_start_anchored = if universal_anchored == u32::MAX { + None + } else { + Some(StateID::try_from(universal_anchored).map_err(|e| { + DeserializeError::state_id_error(e, "universal anchored start") + })?) + }; + + let pattern_table_size = wire::mul( + stride, + pattern_len.unwrap_or(0), + "invalid pattern length", + )?; + // Our start states always start with a two stride of start states for + // the entire automaton. The first stride is for unanchored starting + // states and the second stride is for anchored starting states. What + // follows it are an optional set of start states for each pattern. + let start_state_len = wire::add( + wire::mul(2, stride, "start state stride too big")?, + pattern_table_size, + "invalid 'any' pattern starts size", + )?; + let table_bytes_len = wire::mul( + start_state_len, + StateID::SIZE, + "pattern table bytes length", + )?; + wire::check_slice_len(slice, table_bytes_len, "start ID table")?; + wire::check_alignment::(slice)?; + let table_bytes = &slice[..table_bytes_len]; + slice = &slice[table_bytes_len..]; + // SAFETY: Since StateID is always representable as a u32, all we need + // to do is ensure that we have the proper length and alignment. We've + // checked both above, so the cast below is safe. + // + // N.B. This is the only not-safe code in this function. + let table = core::slice::from_raw_parts( + table_bytes.as_ptr().cast::(), + start_state_len, + ); + let st = StartTable { + table, + kind, + start_map, + stride, + pattern_len, + universal_start_unanchored, + universal_start_anchored, + }; + Ok((st, slice.as_ptr().as_usize() - slice_start)) + } +} + +impl> StartTable { + /// Writes a serialized form of this start table to the buffer given. If + /// the buffer is too small, then an error is returned. To determine how + /// big the buffer must be, use `write_to_len`. + fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small( + "starting table ids", + )); + } + dst = &mut dst[..nwrite]; + + // write start kind + let nw = self.kind.write_to::(dst)?; + dst = &mut dst[nw..]; + // write start byte map + let nw = self.start_map.write_to(dst)?; + dst = &mut dst[nw..]; + // write stride + // Unwrap is OK since the stride is always 4 (currently). + E::write_u32(u32::try_from(self.stride).unwrap(), dst); + dst = &mut dst[size_of::()..]; + // write pattern length + // Unwrap is OK since number of patterns is guaranteed to fit in a u32. + E::write_u32( + u32::try_from(self.pattern_len.unwrap_or(0xFFFF_FFFF)).unwrap(), + dst, + ); + dst = &mut dst[size_of::()..]; + // write universal start unanchored state id, u32::MAX if absent + E::write_u32( + self.universal_start_unanchored + .map_or(u32::MAX, |sid| sid.as_u32()), + dst, + ); + dst = &mut dst[size_of::()..]; + // write universal start anchored state id, u32::MAX if absent + E::write_u32( + self.universal_start_anchored.map_or(u32::MAX, |sid| sid.as_u32()), + dst, + ); + dst = &mut dst[size_of::()..]; + // write start IDs + for &sid in self.table() { + let n = wire::write_state_id::(sid, &mut dst); + dst = &mut dst[n..]; + } + Ok(nwrite) + } + + /// Returns the number of bytes the serialized form of this start ID table + /// will use. + fn write_to_len(&self) -> usize { + self.kind.write_to_len() + + self.start_map.write_to_len() + + size_of::() // stride + + size_of::() // # patterns + + size_of::() // universal unanchored start + + size_of::() // universal anchored start + + (self.table().len() * StateID::SIZE) + } + + /// Validates that every state ID in this start table is valid by checking + /// it against the given transition table (which must be for the same DFA). + /// + /// That is, every state ID can be used to correctly index a state. + fn validate(&self, dfa: &DFA) -> Result<(), DeserializeError> { + let tt = &dfa.tt; + if !self.universal_start_unanchored.map_or(true, |s| tt.is_valid(s)) { + return Err(DeserializeError::generic( + "found invalid universal unanchored starting state ID", + )); + } + if !self.universal_start_anchored.map_or(true, |s| tt.is_valid(s)) { + return Err(DeserializeError::generic( + "found invalid universal anchored starting state ID", + )); + } + for &id in self.table() { + if !tt.is_valid(id) { + return Err(DeserializeError::generic( + "found invalid starting state ID", + )); + } + } + Ok(()) + } + + /// Converts this start list to a borrowed value. + fn as_ref(&self) -> StartTable<&'_ [u32]> { + StartTable { + table: self.table.as_ref(), + kind: self.kind, + start_map: self.start_map.clone(), + stride: self.stride, + pattern_len: self.pattern_len, + universal_start_unanchored: self.universal_start_unanchored, + universal_start_anchored: self.universal_start_anchored, + } + } + + /// Converts this start list to an owned value. + #[cfg(feature = "alloc")] + fn to_owned(&self) -> StartTable> { + StartTable { + table: self.table.as_ref().to_vec(), + kind: self.kind, + start_map: self.start_map.clone(), + stride: self.stride, + pattern_len: self.pattern_len, + universal_start_unanchored: self.universal_start_unanchored, + universal_start_anchored: self.universal_start_anchored, + } + } + + /// Return the start state for the given input and starting configuration. + /// This returns an error if the input configuration is not supported by + /// this DFA. For example, requesting an unanchored search when the DFA was + /// not built with unanchored starting states. Or asking for an anchored + /// pattern search with an invalid pattern ID or on a DFA that was not + /// built with start states for each pattern. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn start( + &self, + anchored: Anchored, + start: Start, + ) -> Result { + let start_index = start.as_usize(); + let index = match anchored { + Anchored::No => { + if !self.kind.has_unanchored() { + return Err(StartError::unsupported_anchored(anchored)); + } + start_index + } + Anchored::Yes => { + if !self.kind.has_anchored() { + return Err(StartError::unsupported_anchored(anchored)); + } + self.stride + start_index + } + Anchored::Pattern(pid) => { + let len = match self.pattern_len { + None => { + return Err(StartError::unsupported_anchored(anchored)) + } + Some(len) => len, + }; + if pid.as_usize() >= len { + return Ok(DEAD); + } + (2 * self.stride) + + (self.stride * pid.as_usize()) + + start_index + } + }; + Ok(self.table()[index]) + } + + /// Returns an iterator over all start state IDs in this table. + /// + /// Each item is a triple of: start state ID, the start state type and the + /// pattern ID (if any). + fn iter(&self) -> StartStateIter<'_> { + StartStateIter { st: self.as_ref(), i: 0 } + } + + /// Returns the table as a slice of state IDs. + fn table(&self) -> &[StateID] { + wire::u32s_to_state_ids(self.table.as_ref()) + } + + /// Return the memory usage, in bytes, of this start list. + /// + /// This does not include the size of a `StartList` value itself. + fn memory_usage(&self) -> usize { + self.table().len() * StateID::SIZE + } +} + +#[cfg(feature = "dfa-build")] +impl> StartTable { + /// Set the start state for the given index and pattern. + /// + /// If the pattern ID or state ID are not valid, then this will panic. + fn set_start(&mut self, anchored: Anchored, start: Start, id: StateID) { + let start_index = start.as_usize(); + let index = match anchored { + Anchored::No => start_index, + Anchored::Yes => self.stride + start_index, + Anchored::Pattern(pid) => { + let pid = pid.as_usize(); + let len = self + .pattern_len + .expect("start states for each pattern enabled"); + assert!(pid < len, "invalid pattern ID {:?}", pid); + self.stride + .checked_mul(pid) + .unwrap() + .checked_add(self.stride.checked_mul(2).unwrap()) + .unwrap() + .checked_add(start_index) + .unwrap() + } + }; + self.table_mut()[index] = id; + } + + /// Returns the table as a mutable slice of state IDs. + fn table_mut(&mut self) -> &mut [StateID] { + wire::u32s_to_state_ids_mut(self.table.as_mut()) + } +} + +/// An iterator over start state IDs. +/// +/// This iterator yields a triple of start state ID, the anchored mode and the +/// start state type. If a pattern ID is relevant, then the anchored mode will +/// contain it. Start states with an anchored mode containing a pattern ID will +/// only occur when the DFA was compiled with start states for each pattern +/// (which is disabled by default). +pub(crate) struct StartStateIter<'a> { + st: StartTable<&'a [u32]>, + i: usize, +} + +impl<'a> Iterator for StartStateIter<'a> { + type Item = (StateID, Anchored, Start); + + fn next(&mut self) -> Option<(StateID, Anchored, Start)> { + let i = self.i; + let table = self.st.table(); + if i >= table.len() { + return None; + } + self.i += 1; + + // This unwrap is okay since the stride of the starting state table + // must always match the number of start state types. + let start_type = Start::from_usize(i % self.st.stride).unwrap(); + let anchored = if i < self.st.stride { + Anchored::No + } else if i < (2 * self.st.stride) { + Anchored::Yes + } else { + let pid = (i - (2 * self.st.stride)) / self.st.stride; + Anchored::Pattern(PatternID::new(pid).unwrap()) + }; + Some((table[i], anchored, start_type)) + } +} + +/// This type represents that patterns that should be reported whenever a DFA +/// enters a match state. This structure exists to support DFAs that search for +/// matches for multiple regexes. +/// +/// This structure relies on the fact that all match states in a DFA occur +/// contiguously in the DFA's transition table. (See dfa/special.rs for a more +/// detailed breakdown of the representation.) Namely, when a match occurs, we +/// know its state ID. Since we know the start and end of the contiguous region +/// of match states, we can use that to compute the position at which the match +/// state occurs. That in turn is used as an offset into this structure. +#[derive(Clone, Debug)] +struct MatchStates { + /// Slices is a flattened sequence of pairs, where each pair points to a + /// sub-slice of pattern_ids. The first element of the pair is an offset + /// into pattern_ids and the second element of the pair is the number + /// of 32-bit pattern IDs starting at that position. That is, each pair + /// corresponds to a single DFA match state and its corresponding match + /// IDs. The number of pairs always corresponds to the number of distinct + /// DFA match states. + /// + /// In practice, T is either Vec or &[u32]. + slices: T, + /// A flattened sequence of pattern IDs for each DFA match state. The only + /// way to correctly read this sequence is indirectly via `slices`. + /// + /// In practice, T is either Vec or &[u32]. + pattern_ids: T, + /// The total number of unique patterns represented by these match states. + pattern_len: usize, +} + +impl<'a> MatchStates<&'a [u32]> { + unsafe fn from_bytes_unchecked( + mut slice: &'a [u8], + ) -> Result<(MatchStates<&'a [u32]>, usize), DeserializeError> { + let slice_start = slice.as_ptr().as_usize(); + + // Read the total number of match states. + let (state_len, nr) = + wire::try_read_u32_as_usize(slice, "match state length")?; + slice = &slice[nr..]; + + // Read the slice start/length pairs. + let pair_len = wire::mul(2, state_len, "match state offset pairs")?; + let slices_bytes_len = wire::mul( + pair_len, + PatternID::SIZE, + "match state slice offset byte length", + )?; + wire::check_slice_len(slice, slices_bytes_len, "match state slices")?; + wire::check_alignment::(slice)?; + let slices_bytes = &slice[..slices_bytes_len]; + slice = &slice[slices_bytes_len..]; + // SAFETY: Since PatternID is always representable as a u32, all we + // need to do is ensure that we have the proper length and alignment. + // We've checked both above, so the cast below is safe. + // + // N.B. This is one of the few not-safe snippets in this function, + // so we mark it explicitly to call it out. + let slices = core::slice::from_raw_parts( + slices_bytes.as_ptr().cast::(), + pair_len, + ); + + // Read the total number of unique pattern IDs (which is always 1 more + // than the maximum pattern ID in this automaton, since pattern IDs are + // handed out contiguously starting at 0). + let (pattern_len, nr) = + wire::try_read_u32_as_usize(slice, "pattern length")?; + slice = &slice[nr..]; + + // Now read the pattern ID length. We don't need to store this + // explicitly, but we need it to know how many pattern IDs to read. + let (idlen, nr) = + wire::try_read_u32_as_usize(slice, "pattern ID length")?; + slice = &slice[nr..]; + + // Read the actual pattern IDs. + let pattern_ids_len = + wire::mul(idlen, PatternID::SIZE, "pattern ID byte length")?; + wire::check_slice_len(slice, pattern_ids_len, "match pattern IDs")?; + wire::check_alignment::(slice)?; + let pattern_ids_bytes = &slice[..pattern_ids_len]; + slice = &slice[pattern_ids_len..]; + // SAFETY: Since PatternID is always representable as a u32, all we + // need to do is ensure that we have the proper length and alignment. + // We've checked both above, so the cast below is safe. + // + // N.B. This is one of the few not-safe snippets in this function, + // so we mark it explicitly to call it out. + let pattern_ids = core::slice::from_raw_parts( + pattern_ids_bytes.as_ptr().cast::(), + idlen, + ); + + let ms = MatchStates { slices, pattern_ids, pattern_len }; + Ok((ms, slice.as_ptr().as_usize() - slice_start)) + } +} + +#[cfg(feature = "dfa-build")] +impl MatchStates> { + fn empty(pattern_len: usize) -> MatchStates> { + assert!(pattern_len <= PatternID::LIMIT); + MatchStates { slices: vec![], pattern_ids: vec![], pattern_len } + } + + fn new( + matches: &BTreeMap>, + pattern_len: usize, + ) -> Result>, BuildError> { + let mut m = MatchStates::empty(pattern_len); + for (_, pids) in matches.iter() { + let start = PatternID::new(m.pattern_ids.len()) + .map_err(|_| BuildError::too_many_match_pattern_ids())?; + m.slices.push(start.as_u32()); + // This is always correct since the number of patterns in a single + // match state can never exceed maximum number of allowable + // patterns. Why? Because a pattern can only appear once in a + // particular match state, by construction. (And since our pattern + // ID limit is one less than u32::MAX, we're guaranteed that the + // length fits in a u32.) + m.slices.push(u32::try_from(pids.len()).unwrap()); + for &pid in pids { + m.pattern_ids.push(pid.as_u32()); + } + } + m.pattern_len = pattern_len; + Ok(m) + } + + fn new_with_map( + &self, + matches: &BTreeMap>, + ) -> Result>, BuildError> { + MatchStates::new(matches, self.pattern_len) + } +} + +impl> MatchStates { + /// Writes a serialized form of these match states to the buffer given. If + /// the buffer is too small, then an error is returned. To determine how + /// big the buffer must be, use `write_to_len`. + fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("match states")); + } + dst = &mut dst[..nwrite]; + + // write state ID length + // Unwrap is OK since number of states is guaranteed to fit in a u32. + E::write_u32(u32::try_from(self.len()).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write slice offset pairs + for &pid in self.slices() { + let n = wire::write_pattern_id::(pid, &mut dst); + dst = &mut dst[n..]; + } + + // write unique pattern ID length + // Unwrap is OK since number of patterns is guaranteed to fit in a u32. + E::write_u32(u32::try_from(self.pattern_len).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write pattern ID length + // Unwrap is OK since we check at construction (and deserialization) + // that the number of patterns is representable as a u32. + E::write_u32(u32::try_from(self.pattern_ids().len()).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write pattern IDs + for &pid in self.pattern_ids() { + let n = wire::write_pattern_id::(pid, &mut dst); + dst = &mut dst[n..]; + } + + Ok(nwrite) + } + + /// Returns the number of bytes the serialized form of these match states + /// will use. + fn write_to_len(&self) -> usize { + size_of::() // match state length + + (self.slices().len() * PatternID::SIZE) + + size_of::() // unique pattern ID length + + size_of::() // pattern ID length + + (self.pattern_ids().len() * PatternID::SIZE) + } + + /// Valides that the match state info is itself internally consistent and + /// consistent with the recorded match state region in the given DFA. + fn validate(&self, dfa: &DFA) -> Result<(), DeserializeError> { + if self.len() != dfa.special.match_len(dfa.stride()) { + return Err(DeserializeError::generic( + "match state length mismatch", + )); + } + for si in 0..self.len() { + let start = self.slices()[si * 2].as_usize(); + let len = self.slices()[si * 2 + 1].as_usize(); + if start >= self.pattern_ids().len() { + return Err(DeserializeError::generic( + "invalid pattern ID start offset", + )); + } + if start + len > self.pattern_ids().len() { + return Err(DeserializeError::generic( + "invalid pattern ID length", + )); + } + for mi in 0..len { + let pid = self.pattern_id(si, mi); + if pid.as_usize() >= self.pattern_len { + return Err(DeserializeError::generic( + "invalid pattern ID", + )); + } + } + } + Ok(()) + } + + /// Converts these match states back into their map form. This is useful + /// when shuffling states, as the normal MatchStates representation is not + /// amenable to easy state swapping. But with this map, to swap id1 and + /// id2, all you need to do is: + /// + /// if let Some(pids) = map.remove(&id1) { + /// map.insert(id2, pids); + /// } + /// + /// Once shuffling is done, use MatchStates::new to convert back. + #[cfg(feature = "dfa-build")] + fn to_map(&self, dfa: &DFA) -> BTreeMap> { + let mut map = BTreeMap::new(); + for i in 0..self.len() { + let mut pids = vec![]; + for j in 0..self.pattern_len(i) { + pids.push(self.pattern_id(i, j)); + } + map.insert(self.match_state_id(dfa, i), pids); + } + map + } + + /// Converts these match states to a borrowed value. + fn as_ref(&self) -> MatchStates<&'_ [u32]> { + MatchStates { + slices: self.slices.as_ref(), + pattern_ids: self.pattern_ids.as_ref(), + pattern_len: self.pattern_len, + } + } + + /// Converts these match states to an owned value. + #[cfg(feature = "alloc")] + fn to_owned(&self) -> MatchStates> { + MatchStates { + slices: self.slices.as_ref().to_vec(), + pattern_ids: self.pattern_ids.as_ref().to_vec(), + pattern_len: self.pattern_len, + } + } + + /// Returns the match state ID given the match state index. (Where the + /// first match state corresponds to index 0.) + /// + /// This panics if there is no match state at the given index. + fn match_state_id(&self, dfa: &DFA, index: usize) -> StateID { + assert!(dfa.special.matches(), "no match states to index"); + // This is one of the places where we rely on the fact that match + // states are contiguous in the transition table. Namely, that the + // first match state ID always corresponds to dfa.special.min_start. + // From there, since we know the stride, we can compute the ID of any + // match state given its index. + let stride2 = u32::try_from(dfa.stride2()).unwrap(); + let offset = index.checked_shl(stride2).unwrap(); + let id = dfa.special.min_match.as_usize().checked_add(offset).unwrap(); + let sid = StateID::new(id).unwrap(); + assert!(dfa.is_match_state(sid)); + sid + } + + /// Returns the pattern ID at the given match index for the given match + /// state. + /// + /// The match state index is the state index minus the state index of the + /// first match state in the DFA. + /// + /// The match index is the index of the pattern ID for the given state. + /// The index must be less than `self.pattern_len(state_index)`. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn pattern_id(&self, state_index: usize, match_index: usize) -> PatternID { + self.pattern_id_slice(state_index)[match_index] + } + + /// Returns the number of patterns in the given match state. + /// + /// The match state index is the state index minus the state index of the + /// first match state in the DFA. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn pattern_len(&self, state_index: usize) -> usize { + self.slices()[state_index * 2 + 1].as_usize() + } + + /// Returns all of the pattern IDs for the given match state index. + /// + /// The match state index is the state index minus the state index of the + /// first match state in the DFA. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn pattern_id_slice(&self, state_index: usize) -> &[PatternID] { + let start = self.slices()[state_index * 2].as_usize(); + let len = self.pattern_len(state_index); + &self.pattern_ids()[start..start + len] + } + + /// Returns the pattern ID offset slice of u32 as a slice of PatternID. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn slices(&self) -> &[PatternID] { + wire::u32s_to_pattern_ids(self.slices.as_ref()) + } + + /// Returns the total number of match states. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn len(&self) -> usize { + assert_eq!(0, self.slices().len() % 2); + self.slices().len() / 2 + } + + /// Returns the pattern ID slice of u32 as a slice of PatternID. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn pattern_ids(&self) -> &[PatternID] { + wire::u32s_to_pattern_ids(self.pattern_ids.as_ref()) + } + + /// Return the memory usage, in bytes, of these match pairs. + fn memory_usage(&self) -> usize { + (self.slices().len() + self.pattern_ids().len()) * PatternID::SIZE + } +} + +/// A common set of flags for both dense and sparse DFAs. This primarily +/// centralizes the serialization format of these flags at a bitset. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Flags { + /// Whether the DFA can match the empty string. When this is false, all + /// matches returned by this DFA are guaranteed to have non-zero length. + pub(crate) has_empty: bool, + /// Whether the DFA should only produce matches with spans that correspond + /// to valid UTF-8. This also includes omitting any zero-width matches that + /// split the UTF-8 encoding of a codepoint. + pub(crate) is_utf8: bool, + /// Whether the DFA is always anchored or not, regardless of `Input` + /// configuration. This is useful for avoiding a reverse scan even when + /// executing unanchored searches. + pub(crate) is_always_start_anchored: bool, +} + +impl Flags { + /// Creates a set of flags for a DFA from an NFA. + /// + /// N.B. This constructor was defined at the time of writing because all + /// of the flags are derived directly from the NFA. If this changes in the + /// future, we might be more thoughtful about how the `Flags` value is + /// itself built. + #[cfg(feature = "dfa-build")] + fn from_nfa(nfa: &thompson::NFA) -> Flags { + Flags { + has_empty: nfa.has_empty(), + is_utf8: nfa.is_utf8(), + is_always_start_anchored: nfa.is_always_start_anchored(), + } + } + + /// Deserializes the flags from the given slice. On success, this also + /// returns the number of bytes read from the slice. + pub(crate) fn from_bytes( + slice: &[u8], + ) -> Result<(Flags, usize), DeserializeError> { + let (bits, nread) = wire::try_read_u32(slice, "flag bitset")?; + let flags = Flags { + has_empty: bits & (1 << 0) != 0, + is_utf8: bits & (1 << 1) != 0, + is_always_start_anchored: bits & (1 << 2) != 0, + }; + Ok((flags, nread)) + } + + /// Writes these flags to the given byte slice. If the buffer is too small, + /// then an error is returned. To determine how big the buffer must be, + /// use `write_to_len`. + pub(crate) fn write_to( + &self, + dst: &mut [u8], + ) -> Result { + fn bool_to_int(b: bool) -> u32 { + if b { + 1 + } else { + 0 + } + } + + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("flag bitset")); + } + let bits = (bool_to_int(self.has_empty) << 0) + | (bool_to_int(self.is_utf8) << 1) + | (bool_to_int(self.is_always_start_anchored) << 2); + E::write_u32(bits, dst); + Ok(nwrite) + } + + /// Returns the number of bytes the serialized form of these flags + /// will use. + pub(crate) fn write_to_len(&self) -> usize { + size_of::() + } +} + +/// An iterator over all states in a DFA. +/// +/// This iterator yields a tuple for each state. The first element of the +/// tuple corresponds to a state's identifier, and the second element +/// corresponds to the state itself (comprised of its transitions). +/// +/// `'a` corresponding to the lifetime of original DFA, `T` corresponds to +/// the type of the transition table itself. +pub(crate) struct StateIter<'a, T> { + tt: &'a TransitionTable, + it: iter::Enumerate>, +} + +impl<'a, T: AsRef<[u32]>> Iterator for StateIter<'a, T> { + type Item = State<'a>; + + fn next(&mut self) -> Option> { + self.it.next().map(|(index, _)| { + let id = self.tt.to_state_id(index); + self.tt.state(id) + }) + } +} + +/// An immutable representation of a single DFA state. +/// +/// `'a` correspondings to the lifetime of a DFA's transition table. +pub(crate) struct State<'a> { + id: StateID, + stride2: usize, + transitions: &'a [StateID], +} + +impl<'a> State<'a> { + /// Return an iterator over all transitions in this state. This yields + /// a number of transitions equivalent to the alphabet length of the + /// corresponding DFA. + /// + /// Each transition is represented by a tuple. The first element is + /// the input byte for that transition and the second element is the + /// transitions itself. + pub(crate) fn transitions(&self) -> StateTransitionIter<'_> { + StateTransitionIter { + len: self.transitions.len(), + it: self.transitions.iter().enumerate(), + } + } + + /// Return an iterator over a sparse representation of the transitions in + /// this state. Only non-dead transitions are returned. + /// + /// The "sparse" representation in this case corresponds to a sequence of + /// triples. The first two elements of the triple comprise an inclusive + /// byte range while the last element corresponds to the transition taken + /// for all bytes in the range. + /// + /// This is somewhat more condensed than the classical sparse + /// representation (where you have an element for every non-dead + /// transition), but in practice, checking if a byte is in a range is very + /// cheap and using ranges tends to conserve quite a bit more space. + pub(crate) fn sparse_transitions(&self) -> StateSparseTransitionIter<'_> { + StateSparseTransitionIter { dense: self.transitions(), cur: None } + } + + /// Returns the identifier for this state. + pub(crate) fn id(&self) -> StateID { + self.id + } + + /// Analyzes this state to determine whether it can be accelerated. If so, + /// it returns an accelerator that contains at least one byte. + #[cfg(feature = "dfa-build")] + fn accelerate(&self, classes: &ByteClasses) -> Option { + // We just try to add bytes to our accelerator. Once adding fails + // (because we've added too many bytes), then give up. + let mut accel = Accel::new(); + for (class, id) in self.transitions() { + if id == self.id() { + continue; + } + for unit in classes.elements(class) { + if let Some(byte) = unit.as_u8() { + if !accel.add(byte) { + return None; + } + } + } + } + if accel.is_empty() { + None + } else { + Some(accel) + } + } +} + +impl<'a> fmt::Debug for State<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for (i, (start, end, sid)) in self.sparse_transitions().enumerate() { + let id = if f.alternate() { + sid.as_usize() + } else { + sid.as_usize() >> self.stride2 + }; + if i > 0 { + write!(f, ", ")?; + } + if start == end { + write!(f, "{:?} => {:?}", start, id)?; + } else { + write!(f, "{:?}-{:?} => {:?}", start, end, id)?; + } + } + Ok(()) + } +} + +/// An iterator over all transitions in a single DFA state. This yields +/// a number of transitions equivalent to the alphabet length of the +/// corresponding DFA. +/// +/// Each transition is represented by a tuple. The first element is the input +/// byte for that transition and the second element is the transition itself. +#[derive(Debug)] +pub(crate) struct StateTransitionIter<'a> { + len: usize, + it: iter::Enumerate>, +} + +impl<'a> Iterator for StateTransitionIter<'a> { + type Item = (alphabet::Unit, StateID); + + fn next(&mut self) -> Option<(alphabet::Unit, StateID)> { + self.it.next().map(|(i, &id)| { + let unit = if i + 1 == self.len { + alphabet::Unit::eoi(i) + } else { + let b = u8::try_from(i) + .expect("raw byte alphabet is never exceeded"); + alphabet::Unit::u8(b) + }; + (unit, id) + }) + } +} + +/// An iterator over all non-DEAD transitions in a single DFA state using a +/// sparse representation. +/// +/// Each transition is represented by a triple. The first two elements of the +/// triple comprise an inclusive byte range while the last element corresponds +/// to the transition taken for all bytes in the range. +/// +/// As a convenience, this always returns `alphabet::Unit` values of the same +/// type. That is, you'll never get a (byte, EOI) or a (EOI, byte). Only (byte, +/// byte) and (EOI, EOI) values are yielded. +#[derive(Debug)] +pub(crate) struct StateSparseTransitionIter<'a> { + dense: StateTransitionIter<'a>, + cur: Option<(alphabet::Unit, alphabet::Unit, StateID)>, +} + +impl<'a> Iterator for StateSparseTransitionIter<'a> { + type Item = (alphabet::Unit, alphabet::Unit, StateID); + + fn next(&mut self) -> Option<(alphabet::Unit, alphabet::Unit, StateID)> { + while let Some((unit, next)) = self.dense.next() { + let (prev_start, prev_end, prev_next) = match self.cur { + Some(t) => t, + None => { + self.cur = Some((unit, unit, next)); + continue; + } + }; + if prev_next == next && !unit.is_eoi() { + self.cur = Some((prev_start, unit, prev_next)); + } else { + self.cur = Some((unit, unit, next)); + if prev_next != DEAD { + return Some((prev_start, prev_end, prev_next)); + } + } + } + if let Some((start, end, next)) = self.cur.take() { + if next != DEAD { + return Some((start, end, next)); + } + } + None + } +} + +/// An error that occurred during the construction of a DFA. +/// +/// This error does not provide many introspection capabilities. There are +/// generally only two things you can do with it: +/// +/// * Obtain a human readable message via its `std::fmt::Display` impl. +/// * Access an underlying [`nfa::thompson::BuildError`](thompson::BuildError) +/// type from its `source` method via the `std::error::Error` trait. This error +/// only occurs when using convenience routines for building a DFA directly +/// from a pattern string. +/// +/// When the `std` feature is enabled, this implements the `std::error::Error` +/// trait. +#[cfg(feature = "dfa-build")] +#[derive(Clone, Debug)] +pub struct BuildError { + kind: BuildErrorKind, +} + +/// The kind of error that occurred during the construction of a DFA. +/// +/// Note that this error is non-exhaustive. Adding new variants is not +/// considered a breaking change. +#[cfg(feature = "dfa-build")] +#[derive(Clone, Debug)] +enum BuildErrorKind { + /// An error that occurred while constructing an NFA as a precursor step + /// before a DFA is compiled. + NFA(thompson::BuildError), + /// An error that occurred because an unsupported regex feature was used. + /// The message string describes which unsupported feature was used. + /// + /// The primary regex feature that is unsupported by DFAs is the Unicode + /// word boundary look-around assertion (`\b`). This can be worked around + /// by either using an ASCII word boundary (`(?-u:\b)`) or by enabling + /// Unicode word boundaries when building a DFA. + Unsupported(&'static str), + /// An error that occurs if too many states are produced while building a + /// DFA. + TooManyStates, + /// An error that occurs if too many start states are needed while building + /// a DFA. + /// + /// This is a kind of oddball error that occurs when building a DFA with + /// start states enabled for each pattern and enough patterns to cause + /// the table of start states to overflow `usize`. + TooManyStartStates, + /// This is another oddball error that can occur if there are too many + /// patterns spread out across too many match states. + TooManyMatchPatternIDs, + /// An error that occurs if the DFA got too big during determinization. + DFAExceededSizeLimit { limit: usize }, + /// An error that occurs if auxiliary storage (not the DFA) used during + /// determinization got too big. + DeterminizeExceededSizeLimit { limit: usize }, +} + +#[cfg(feature = "dfa-build")] +impl BuildError { + /// Return the kind of this error. + fn kind(&self) -> &BuildErrorKind { + &self.kind + } + + pub(crate) fn nfa(err: thompson::BuildError) -> BuildError { + BuildError { kind: BuildErrorKind::NFA(err) } + } + + pub(crate) fn unsupported_dfa_word_boundary_unicode() -> BuildError { + let msg = "cannot build DFAs for regexes with Unicode word \ + boundaries; switch to ASCII word boundaries, or \ + heuristically enable Unicode word boundaries or use a \ + different regex engine"; + BuildError { kind: BuildErrorKind::Unsupported(msg) } + } + + pub(crate) fn too_many_states() -> BuildError { + BuildError { kind: BuildErrorKind::TooManyStates } + } + + pub(crate) fn too_many_start_states() -> BuildError { + BuildError { kind: BuildErrorKind::TooManyStartStates } + } + + pub(crate) fn too_many_match_pattern_ids() -> BuildError { + BuildError { kind: BuildErrorKind::TooManyMatchPatternIDs } + } + + pub(crate) fn dfa_exceeded_size_limit(limit: usize) -> BuildError { + BuildError { kind: BuildErrorKind::DFAExceededSizeLimit { limit } } + } + + pub(crate) fn determinize_exceeded_size_limit(limit: usize) -> BuildError { + BuildError { + kind: BuildErrorKind::DeterminizeExceededSizeLimit { limit }, + } + } +} + +#[cfg(all(feature = "std", feature = "dfa-build"))] +impl std::error::Error for BuildError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self.kind() { + BuildErrorKind::NFA(ref err) => Some(err), + _ => None, + } + } +} + +#[cfg(feature = "dfa-build")] +impl core::fmt::Display for BuildError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.kind() { + BuildErrorKind::NFA(_) => write!(f, "error building NFA"), + BuildErrorKind::Unsupported(ref msg) => { + write!(f, "unsupported regex feature for DFAs: {}", msg) + } + BuildErrorKind::TooManyStates => write!( + f, + "number of DFA states exceeds limit of {}", + StateID::LIMIT, + ), + BuildErrorKind::TooManyStartStates => { + let stride = Start::len(); + // The start table has `stride` entries for starting states for + // the entire DFA, and then `stride` entries for each pattern + // if start states for each pattern are enabled (which is the + // only way this error can occur). Thus, the total number of + // patterns that can fit in the table is `stride` less than + // what we can allocate. + let max = usize::try_from(core::isize::MAX).unwrap(); + let limit = (max - stride) / stride; + write!( + f, + "compiling DFA with start states exceeds pattern \ + pattern limit of {}", + limit, + ) + } + BuildErrorKind::TooManyMatchPatternIDs => write!( + f, + "compiling DFA with total patterns in all match states \ + exceeds limit of {}", + PatternID::LIMIT, + ), + BuildErrorKind::DFAExceededSizeLimit { limit } => write!( + f, + "DFA exceeded size limit of {:?} during determinization", + limit, + ), + BuildErrorKind::DeterminizeExceededSizeLimit { limit } => { + write!(f, "determinization exceeded size limit of {:?}", limit) + } + } + } +} + +#[cfg(all(test, feature = "syntax", feature = "dfa-build"))] +mod tests { + use crate::{Input, MatchError}; + + use super::*; + + #[test] + fn errors_with_unicode_word_boundary() { + let pattern = r"\b"; + assert!(Builder::new().build(pattern).is_err()); + } + + #[test] + fn roundtrip_never_match() { + let dfa = DFA::never_match().unwrap(); + let (buf, _) = dfa.to_bytes_native_endian(); + let dfa: DFA<&[u32]> = DFA::from_bytes(&buf).unwrap().0; + + assert_eq!(None, dfa.try_search_fwd(&Input::new("foo12345")).unwrap()); + } + + #[test] + fn roundtrip_always_match() { + use crate::HalfMatch; + + let dfa = DFA::always_match().unwrap(); + let (buf, _) = dfa.to_bytes_native_endian(); + let dfa: DFA<&[u32]> = DFA::from_bytes(&buf).unwrap().0; + + assert_eq!( + Some(HalfMatch::must(0, 0)), + dfa.try_search_fwd(&Input::new("foo12345")).unwrap() + ); + } + + // See the analogous test in src/hybrid/dfa.rs. + #[test] + fn heuristic_unicode_reverse() { + let dfa = DFA::builder() + .configure(DFA::config().unicode_word_boundary(true)) + .thompson(thompson::Config::new().reverse(true)) + .build(r"\b[0-9]+\b") + .unwrap(); + + let input = Input::new("β123").range(2..); + let expected = MatchError::quit(0xB2, 1); + let got = dfa.try_search_rev(&input); + assert_eq!(Err(expected), got); + + let input = Input::new("123β").range(..3); + let expected = MatchError::quit(0xCE, 3); + let got = dfa.try_search_rev(&input); + assert_eq!(Err(expected), got); + } +} diff --git a/vendor/regex-automata/src/dfa/determinize.rs b/vendor/regex-automata/src/dfa/determinize.rs new file mode 100644 index 0000000..19f99f5 --- /dev/null +++ b/vendor/regex-automata/src/dfa/determinize.rs @@ -0,0 +1,599 @@ +use alloc::{collections::BTreeMap, vec::Vec}; + +use crate::{ + dfa::{ + dense::{self, BuildError}, + DEAD, + }, + nfa::thompson, + util::{ + self, + alphabet::{self, ByteSet}, + determinize::{State, StateBuilderEmpty, StateBuilderNFA}, + primitives::{PatternID, StateID}, + search::{Anchored, MatchKind}, + sparse_set::SparseSets, + start::Start, + }, +}; + +/// A builder for configuring and running a DFA determinizer. +#[derive(Clone, Debug)] +pub(crate) struct Config { + match_kind: MatchKind, + quit: ByteSet, + dfa_size_limit: Option, + determinize_size_limit: Option, +} + +impl Config { + /// Create a new default config for a determinizer. The determinizer may be + /// configured before calling `run`. + pub fn new() -> Config { + Config { + match_kind: MatchKind::LeftmostFirst, + quit: ByteSet::empty(), + dfa_size_limit: None, + determinize_size_limit: None, + } + } + + /// Run determinization on the given NFA and write the resulting DFA into + /// the one given. The DFA given should be initialized but otherwise empty. + /// "Initialized" means that it is setup to handle the NFA's byte classes, + /// number of patterns and whether to build start states for each pattern. + pub fn run( + &self, + nfa: &thompson::NFA, + dfa: &mut dense::OwnedDFA, + ) -> Result<(), BuildError> { + let dead = State::dead(); + let quit = State::dead(); + let mut cache = StateMap::default(); + // We only insert the dead state here since its representation is + // identical to the quit state. And we never want anything pointing + // to the quit state other than specific transitions derived from the + // determinizer's configured "quit" bytes. + // + // We do put the quit state into 'builder_states' below. This ensures + // that a proper DFA state ID is allocated for it, and that no other + // DFA state uses the "location after the DEAD state." That is, it + // is assumed that the quit state is always the state immediately + // following the DEAD state. + cache.insert(dead.clone(), DEAD); + + let runner = Runner { + config: self.clone(), + nfa, + dfa, + builder_states: alloc::vec![dead, quit], + cache, + memory_usage_state: 0, + sparses: SparseSets::new(nfa.states().len()), + stack: alloc::vec![], + scratch_state_builder: StateBuilderEmpty::new(), + }; + runner.run() + } + + /// The match semantics to use for determinization. + /// + /// MatchKind::All corresponds to the standard textbook construction. + /// All possible match states are represented in the DFA. + /// MatchKind::LeftmostFirst permits greediness and otherwise tries to + /// simulate the match semantics of backtracking regex engines. Namely, + /// only a subset of match states are built, and dead states are used to + /// stop searches with an unanchored prefix. + /// + /// The default is MatchKind::LeftmostFirst. + pub fn match_kind(&mut self, kind: MatchKind) -> &mut Config { + self.match_kind = kind; + self + } + + /// The set of bytes to use that will cause the DFA to enter a quit state, + /// stop searching and return an error. By default, this is empty. + pub fn quit(&mut self, set: ByteSet) -> &mut Config { + self.quit = set; + self + } + + /// The limit, in bytes of the heap, that the DFA is permitted to use. This + /// does not include the auxiliary heap storage used by determinization. + pub fn dfa_size_limit(&mut self, bytes: Option) -> &mut Config { + self.dfa_size_limit = bytes; + self + } + + /// The limit, in bytes of the heap, that determinization itself is allowed + /// to use. This does not include the size of the DFA being built. + pub fn determinize_size_limit( + &mut self, + bytes: Option, + ) -> &mut Config { + self.determinize_size_limit = bytes; + self + } +} + +/// The actual implementation of determinization that converts an NFA to a DFA +/// through powerset construction. +/// +/// This determinizer roughly follows the typical powerset construction, where +/// each DFA state is comprised of one or more NFA states. In the worst case, +/// there is one DFA state for every possible combination of NFA states. In +/// practice, this only happens in certain conditions, typically when there are +/// bounded repetitions. +/// +/// The main differences between this implementation and typical deteminization +/// are that this implementation delays matches by one state and hackily makes +/// look-around work. Comments below attempt to explain this. +/// +/// The lifetime variable `'a` refers to the lifetime of the NFA or DFA, +/// whichever is shorter. +#[derive(Debug)] +struct Runner<'a> { + /// The configuration used to initialize determinization. + config: Config, + /// The NFA we're converting into a DFA. + nfa: &'a thompson::NFA, + /// The DFA we're building. + dfa: &'a mut dense::OwnedDFA, + /// Each DFA state being built is defined as an *ordered* set of NFA + /// states, along with some meta facts about the ordered set of NFA states. + /// + /// This is never empty. The first state is always a dummy state such that + /// a state id == 0 corresponds to a dead state. The second state is always + /// the quit state. + /// + /// Why do we have states in both a `Vec` and in a cache map below? + /// Well, they serve two different roles based on access patterns. + /// `builder_states` is the canonical home of each state, and provides + /// constant random access by a DFA state's ID. The cache map below, on + /// the other hand, provides a quick way of searching for identical DFA + /// states by using the DFA state as a key in the map. Of course, we use + /// reference counting to avoid actually duplicating the state's data + /// itself. (Although this has never been benchmarked.) Note that the cache + /// map does not give us full minimization; it just lets us avoid some very + /// obvious redundant states. + /// + /// Note that the index into this Vec isn't quite the DFA's state ID. + /// Rather, it's just an index. To get the state ID, you have to multiply + /// it by the DFA's stride. That's done by self.dfa.from_index. And the + /// inverse is self.dfa.to_index. + /// + /// Moreover, DFA states don't usually retain the IDs assigned to them + /// by their position in this Vec. After determinization completes, + /// states are shuffled around to support other optimizations. See the + /// sibling 'special' module for more details on that. (The reason for + /// mentioning this is that if you print out the DFA for debugging during + /// determinization, and then print out the final DFA after it is fully + /// built, then the state IDs likely won't match up.) + builder_states: Vec, + /// A cache of DFA states that already exist and can be easily looked up + /// via ordered sets of NFA states. + /// + /// See `builder_states` docs for why we store states in two different + /// ways. + cache: StateMap, + /// The memory usage, in bytes, used by builder_states and cache. We track + /// this as new states are added since states use a variable amount of + /// heap. Tracking this as we add states makes it possible to compute the + /// total amount of memory used by the determinizer in constant time. + memory_usage_state: usize, + /// A pair of sparse sets for tracking ordered sets of NFA state IDs. + /// These are reused throughout determinization. A bounded sparse set + /// gives us constant time insertion, membership testing and clearing. + sparses: SparseSets, + /// Scratch space for a stack of NFA states to visit, for depth first + /// visiting without recursion. + stack: Vec, + /// Scratch space for storing an ordered sequence of NFA states, for + /// amortizing allocation. This is principally useful for when we avoid + /// adding a new DFA state since it already exists. In order to detect this + /// case though, we still need an ordered set of NFA state IDs. So we use + /// this space to stage that ordered set before we know whether we need to + /// create a new DFA state or not. + scratch_state_builder: StateBuilderEmpty, +} + +/// A map from states to state identifiers. When using std, we use a standard +/// hashmap, since it's a bit faster for this use case. (Other maps, like +/// one's based on FNV, have not yet been benchmarked.) +/// +/// The main purpose of this map is to reuse states where possible. This won't +/// fully minimize the DFA, but it works well in a lot of cases. +#[cfg(feature = "std")] +type StateMap = std::collections::HashMap; +#[cfg(not(feature = "std"))] +type StateMap = BTreeMap; + +impl<'a> Runner<'a> { + /// Build the DFA. If there was a problem constructing the DFA (e.g., if + /// the chosen state identifier representation is too small), then an error + /// is returned. + fn run(mut self) -> Result<(), BuildError> { + if self.nfa.look_set_any().contains_word_unicode() + && !self.config.quit.contains_range(0x80, 0xFF) + { + return Err(BuildError::unsupported_dfa_word_boundary_unicode()); + } + + // A sequence of "representative" bytes drawn from each equivalence + // class. These representative bytes are fed to the NFA to compute + // state transitions. This allows us to avoid re-computing state + // transitions for bytes that are guaranteed to produce identical + // results. Since computing the representatives needs to do a little + // work, we do it once here because we'll be iterating over them a lot. + let representatives: Vec = + self.dfa.byte_classes().representatives(..).collect(); + // The set of all DFA state IDs that still need to have their + // transitions set. We start by seeding this with all starting states. + let mut uncompiled = alloc::vec![]; + self.add_all_starts(&mut uncompiled)?; + while let Some(dfa_id) = uncompiled.pop() { + for &unit in &representatives { + if unit.as_u8().map_or(false, |b| self.config.quit.contains(b)) + { + continue; + } + // In many cases, the state we transition to has already been + // computed. 'cached_state' will do the minimal amount of work + // to check this, and if it exists, immediately return an + // already existing state ID. + let (next_dfa_id, is_new) = self.cached_state(dfa_id, unit)?; + self.dfa.set_transition(dfa_id, unit, next_dfa_id); + // If the state ID we got back is newly created, then we need + // to compile it, so add it to our uncompiled frontier. + if is_new { + uncompiled.push(next_dfa_id); + } + } + } + debug!( + "determinization complete, memory usage: {}, \ + dense DFA size: {}, \ + is reverse? {}", + self.memory_usage(), + self.dfa.memory_usage(), + self.nfa.is_reverse(), + ); + + // A map from DFA state ID to one or more NFA match IDs. Each NFA match + // ID corresponds to a distinct regex pattern that matches in the state + // corresponding to the key. + let mut matches: BTreeMap> = BTreeMap::new(); + self.cache.clear(); + #[cfg(feature = "logging")] + let mut total_pat_len = 0; + for (i, state) in self.builder_states.into_iter().enumerate() { + if let Some(pat_ids) = state.match_pattern_ids() { + let id = self.dfa.to_state_id(i); + log! { + total_pat_len += pat_ids.len(); + } + matches.insert(id, pat_ids); + } + } + log! { + use core::mem::size_of; + let per_elem = size_of::() + size_of::>(); + let pats = total_pat_len * size_of::(); + let mem = (matches.len() * per_elem) + pats; + log::debug!("matches map built, memory usage: {}", mem); + } + // At this point, we shuffle the "special" states in the final DFA. + // This permits a DFA's match loop to detect a match condition (among + // other things) by merely inspecting the current state's identifier, + // and avoids the need for any additional auxiliary storage. + self.dfa.shuffle(matches)?; + Ok(()) + } + + /// Return the identifier for the next DFA state given an existing DFA + /// state and an input byte. If the next DFA state already exists, then + /// return its identifier from the cache. Otherwise, build the state, cache + /// it and return its identifier. + /// + /// This routine returns a boolean indicating whether a new state was + /// built. If a new state is built, then the caller needs to add it to its + /// frontier of uncompiled DFA states to compute transitions for. + fn cached_state( + &mut self, + dfa_id: StateID, + unit: alphabet::Unit, + ) -> Result<(StateID, bool), BuildError> { + // Compute the set of all reachable NFA states, including epsilons. + let empty_builder = self.get_state_builder(); + let builder = util::determinize::next( + self.nfa, + self.config.match_kind, + &mut self.sparses, + &mut self.stack, + &self.builder_states[self.dfa.to_index(dfa_id)], + unit, + empty_builder, + ); + self.maybe_add_state(builder) + } + + /// Compute the set of DFA start states and add their identifiers in + /// 'dfa_state_ids' (no duplicates are added). + fn add_all_starts( + &mut self, + dfa_state_ids: &mut Vec, + ) -> Result<(), BuildError> { + // These should be the first states added. + assert!(dfa_state_ids.is_empty()); + // We only want to add (un)anchored starting states that is consistent + // with our DFA's configuration. Unconditionally adding both (although + // it is the default) can make DFAs quite a bit bigger. + if self.dfa.start_kind().has_unanchored() { + self.add_start_group(Anchored::No, dfa_state_ids)?; + } + if self.dfa.start_kind().has_anchored() { + self.add_start_group(Anchored::Yes, dfa_state_ids)?; + } + // I previously has an 'assert' here checking that either + // 'dfa_state_ids' was non-empty, or the NFA had zero patterns. But it + // turns out this isn't always true. For example, the NFA might have + // one or more patterns but where all such patterns are just 'fail' + // states. These will ultimately just compile down to DFA dead states, + // and since the dead state was added earlier, no new DFA states are + // added. And thus, it is valid and okay for 'dfa_state_ids' to be + // empty even if there are a non-zero number of patterns in the NFA. + + // We only need to compute anchored start states for each pattern if it + // was requested to do so. + if self.dfa.starts_for_each_pattern() { + for pid in self.nfa.patterns() { + self.add_start_group(Anchored::Pattern(pid), dfa_state_ids)?; + } + } + Ok(()) + } + + /// Add a group of start states for the given match pattern ID. Any new + /// DFA states added are pushed on to 'dfa_state_ids'. (No duplicates are + /// pushed.) + /// + /// When pattern_id is None, then this will compile a group of unanchored + /// start states (if the DFA is unanchored). When the pattern_id is + /// present, then this will compile a group of anchored start states that + /// only match the given pattern. + /// + /// This panics if `anchored` corresponds to an invalid pattern ID. + fn add_start_group( + &mut self, + anchored: Anchored, + dfa_state_ids: &mut Vec, + ) -> Result<(), BuildError> { + let nfa_start = match anchored { + Anchored::No => self.nfa.start_unanchored(), + Anchored::Yes => self.nfa.start_anchored(), + Anchored::Pattern(pid) => { + self.nfa.start_pattern(pid).expect("valid pattern ID") + } + }; + + // When compiling start states, we're careful not to build additional + // states that aren't necessary. For example, if the NFA has no word + // boundary assertion, then there's no reason to have distinct start + // states for 'NonWordByte' and 'WordByte' starting configurations. + // Instead, the 'WordByte' starting configuration can just point + // directly to the start state for the 'NonWordByte' config. + // + // Note though that we only need to care about assertions in the prefix + // of an NFA since this only concerns the starting states. (Actually, + // the most precisely thing we could do it is look at the prefix + // assertions of each pattern when 'anchored == Anchored::Pattern', + // and then only compile extra states if the prefix is non-empty.) But + // we settle for simplicity here instead of absolute minimalism. It is + // somewhat rare, after all, for multiple patterns in the same regex to + // have different prefix look-arounds. + + let (id, is_new) = + self.add_one_start(nfa_start, Start::NonWordByte)?; + self.dfa.set_start_state(anchored, Start::NonWordByte, id); + if is_new { + dfa_state_ids.push(id); + } + + if !self.nfa.look_set_prefix_any().contains_word() { + self.dfa.set_start_state(anchored, Start::WordByte, id); + } else { + let (id, is_new) = + self.add_one_start(nfa_start, Start::WordByte)?; + self.dfa.set_start_state(anchored, Start::WordByte, id); + if is_new { + dfa_state_ids.push(id); + } + } + if !self.nfa.look_set_prefix_any().contains_anchor() { + self.dfa.set_start_state(anchored, Start::Text, id); + self.dfa.set_start_state(anchored, Start::LineLF, id); + self.dfa.set_start_state(anchored, Start::LineCR, id); + self.dfa.set_start_state( + anchored, + Start::CustomLineTerminator, + id, + ); + } else { + let (id, is_new) = self.add_one_start(nfa_start, Start::Text)?; + self.dfa.set_start_state(anchored, Start::Text, id); + if is_new { + dfa_state_ids.push(id); + } + + let (id, is_new) = self.add_one_start(nfa_start, Start::LineLF)?; + self.dfa.set_start_state(anchored, Start::LineLF, id); + if is_new { + dfa_state_ids.push(id); + } + + let (id, is_new) = self.add_one_start(nfa_start, Start::LineCR)?; + self.dfa.set_start_state(anchored, Start::LineCR, id); + if is_new { + dfa_state_ids.push(id); + } + + let (id, is_new) = + self.add_one_start(nfa_start, Start::CustomLineTerminator)?; + self.dfa.set_start_state( + anchored, + Start::CustomLineTerminator, + id, + ); + if is_new { + dfa_state_ids.push(id); + } + } + + Ok(()) + } + + /// Add a new DFA start state corresponding to the given starting NFA + /// state, and the starting search configuration. (The starting search + /// configuration essentially tells us which look-behind assertions are + /// true for this particular state.) + /// + /// The boolean returned indicates whether the state ID returned is a newly + /// created state, or a previously cached state. + fn add_one_start( + &mut self, + nfa_start: StateID, + start: Start, + ) -> Result<(StateID, bool), BuildError> { + // Compute the look-behind assertions that are true in this starting + // configuration, and the determine the epsilon closure. While + // computing the epsilon closure, we only follow condiional epsilon + // transitions that satisfy the look-behind assertions in 'look_have'. + let mut builder_matches = self.get_state_builder().into_matches(); + util::determinize::set_lookbehind_from_start( + self.nfa, + &start, + &mut builder_matches, + ); + self.sparses.set1.clear(); + util::determinize::epsilon_closure( + self.nfa, + nfa_start, + builder_matches.look_have(), + &mut self.stack, + &mut self.sparses.set1, + ); + let mut builder = builder_matches.into_nfa(); + util::determinize::add_nfa_states( + &self.nfa, + &self.sparses.set1, + &mut builder, + ); + self.maybe_add_state(builder) + } + + /// Adds the given state to the DFA being built depending on whether it + /// already exists in this determinizer's cache. + /// + /// If it does exist, then the memory used by 'state' is put back into the + /// determinizer and the previously created state's ID is returned. (Along + /// with 'false', indicating that no new state was added.) + /// + /// If it does not exist, then the state is added to the DFA being built + /// and a fresh ID is allocated (if ID allocation fails, then an error is + /// returned) and returned. (Along with 'true', indicating that a new state + /// was added.) + fn maybe_add_state( + &mut self, + builder: StateBuilderNFA, + ) -> Result<(StateID, bool), BuildError> { + if let Some(&cached_id) = self.cache.get(builder.as_bytes()) { + // Since we have a cached state, put the constructed state's + // memory back into our scratch space, so that it can be reused. + self.put_state_builder(builder); + return Ok((cached_id, false)); + } + self.add_state(builder).map(|sid| (sid, true)) + } + + /// Add the given state to the DFA and make it available in the cache. + /// + /// The state initially has no transitions. That is, it transitions to the + /// dead state for all possible inputs, and transitions to the quit state + /// for all quit bytes. + /// + /// If adding the state would exceed the maximum value for StateID, then an + /// error is returned. + fn add_state( + &mut self, + builder: StateBuilderNFA, + ) -> Result { + let id = self.dfa.add_empty_state()?; + if !self.config.quit.is_empty() { + for b in self.config.quit.iter() { + self.dfa.set_transition( + id, + alphabet::Unit::u8(b), + self.dfa.quit_id(), + ); + } + } + let state = builder.to_state(); + // States use reference counting internally, so we only need to count + // their memory usage once. + self.memory_usage_state += state.memory_usage(); + self.builder_states.push(state.clone()); + self.cache.insert(state, id); + self.put_state_builder(builder); + if let Some(limit) = self.config.dfa_size_limit { + if self.dfa.memory_usage() > limit { + return Err(BuildError::dfa_exceeded_size_limit(limit)); + } + } + if let Some(limit) = self.config.determinize_size_limit { + if self.memory_usage() > limit { + return Err(BuildError::determinize_exceeded_size_limit( + limit, + )); + } + } + Ok(id) + } + + /// Returns a state builder from this determinizer that might have existing + /// capacity. This helps avoid allocs in cases where a state is built that + /// turns out to already be cached. + /// + /// Callers must put the state builder back with 'put_state_builder', + /// otherwise the allocation reuse won't work. + fn get_state_builder(&mut self) -> StateBuilderEmpty { + core::mem::replace( + &mut self.scratch_state_builder, + StateBuilderEmpty::new(), + ) + } + + /// Puts the given state builder back into this determinizer for reuse. + /// + /// Note that building a 'State' from a builder always creates a new + /// alloc, so callers should always put the builder back. + fn put_state_builder(&mut self, builder: StateBuilderNFA) { + let _ = core::mem::replace( + &mut self.scratch_state_builder, + builder.clear(), + ); + } + + /// Return the memory usage, in bytes, of this determinizer at the current + /// point in time. This does not include memory used by the NFA or the + /// dense DFA itself. + fn memory_usage(&self) -> usize { + use core::mem::size_of; + + self.builder_states.len() * size_of::() + // Maps likely use more memory than this, but it's probably close. + + self.cache.len() * (size_of::() + size_of::()) + + self.memory_usage_state + + self.stack.capacity() * size_of::() + + self.scratch_state_builder.capacity() + } +} diff --git a/vendor/regex-automata/src/dfa/minimize.rs b/vendor/regex-automata/src/dfa/minimize.rs new file mode 100644 index 0000000..fea925b --- /dev/null +++ b/vendor/regex-automata/src/dfa/minimize.rs @@ -0,0 +1,463 @@ +use core::{cell::RefCell, fmt, mem}; + +use alloc::{collections::BTreeMap, rc::Rc, vec, vec::Vec}; + +use crate::{ + dfa::{automaton::Automaton, dense, DEAD}, + util::{ + alphabet, + primitives::{PatternID, StateID}, + }, +}; + +/// An implementation of Hopcroft's algorithm for minimizing DFAs. +/// +/// The algorithm implemented here is mostly taken from Wikipedia: +/// https://en.wikipedia.org/wiki/DFA_minimization#Hopcroft's_algorithm +/// +/// This code has had some light optimization attention paid to it, +/// particularly in the form of reducing allocation as much as possible. +/// However, it is still generally slow. Future optimization work should +/// probably focus on the bigger picture rather than micro-optimizations. For +/// example: +/// +/// 1. Figure out how to more intelligently create initial partitions. That is, +/// Hopcroft's algorithm starts by creating two partitions of DFA states +/// that are known to NOT be equivalent: match states and non-match states. +/// The algorithm proceeds by progressively refining these partitions into +/// smaller partitions. If we could start with more partitions, then we +/// could reduce the amount of work that Hopcroft's algorithm needs to do. +/// 2. For every partition that we visit, we find all incoming transitions to +/// every state in the partition for *every* element in the alphabet. (This +/// is why using byte classes can significantly decrease minimization times, +/// since byte classes shrink the alphabet.) This is quite costly and there +/// is perhaps some redundant work being performed depending on the specific +/// states in the set. For example, we might be able to only visit some +/// elements of the alphabet based on the transitions. +/// 3. Move parts of minimization into determinization. If minimization has +/// fewer states to deal with, then it should run faster. A prime example +/// of this might be large Unicode classes, which are generated in way that +/// can create a lot of redundant states. (Some work has been done on this +/// point during NFA compilation via the algorithm described in the +/// "Incremental Construction of MinimalAcyclic Finite-State Automata" +/// paper.) +pub(crate) struct Minimizer<'a> { + dfa: &'a mut dense::OwnedDFA, + in_transitions: Vec>>, + partitions: Vec, + waiting: Vec, +} + +impl<'a> fmt::Debug for Minimizer<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Minimizer") + .field("dfa", &self.dfa) + .field("in_transitions", &self.in_transitions) + .field("partitions", &self.partitions) + .field("waiting", &self.waiting) + .finish() + } +} + +/// A set of states. A state set makes up a single partition in Hopcroft's +/// algorithm. +/// +/// It is represented by an ordered set of state identifiers. We use shared +/// ownership so that a single state set can be in both the set of partitions +/// and in the set of waiting sets simultaneously without an additional +/// allocation. Generally, once a state set is built, it becomes immutable. +/// +/// We use this representation because it avoids the overhead of more +/// traditional set data structures (HashSet/BTreeSet), and also because +/// computing intersection/subtraction on this representation is especially +/// fast. +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] +struct StateSet { + ids: Rc>>, +} + +impl<'a> Minimizer<'a> { + pub fn new(dfa: &'a mut dense::OwnedDFA) -> Minimizer<'a> { + let in_transitions = Minimizer::incoming_transitions(dfa); + let partitions = Minimizer::initial_partitions(dfa); + let waiting = partitions.clone(); + Minimizer { dfa, in_transitions, partitions, waiting } + } + + pub fn run(mut self) { + let stride2 = self.dfa.stride2(); + let as_state_id = |index: usize| -> StateID { + StateID::new(index << stride2).unwrap() + }; + let as_index = |id: StateID| -> usize { id.as_usize() >> stride2 }; + + let mut incoming = StateSet::empty(); + let mut scratch1 = StateSet::empty(); + let mut scratch2 = StateSet::empty(); + let mut newparts = vec![]; + + // This loop is basically Hopcroft's algorithm. Everything else is just + // shuffling data around to fit our representation. + while let Some(set) = self.waiting.pop() { + for b in self.dfa.byte_classes().iter() { + self.find_incoming_to(b, &set, &mut incoming); + // If incoming is empty, then the intersection with any other + // set must also be empty. So 'newparts' just ends up being + // 'self.partitions'. So there's no need to go through the loop + // below. + // + // This actually turns out to be rather large optimization. On + // the order of making minimization 4-5x faster. It's likely + // that the vast majority of all states have very few incoming + // transitions. + if incoming.is_empty() { + continue; + } + + for p in 0..self.partitions.len() { + self.partitions[p].intersection(&incoming, &mut scratch1); + if scratch1.is_empty() { + newparts.push(self.partitions[p].clone()); + continue; + } + + self.partitions[p].subtract(&incoming, &mut scratch2); + if scratch2.is_empty() { + newparts.push(self.partitions[p].clone()); + continue; + } + + let (x, y) = + (scratch1.deep_clone(), scratch2.deep_clone()); + newparts.push(x.clone()); + newparts.push(y.clone()); + match self.find_waiting(&self.partitions[p]) { + Some(i) => { + self.waiting[i] = x; + self.waiting.push(y); + } + None => { + if x.len() <= y.len() { + self.waiting.push(x); + } else { + self.waiting.push(y); + } + } + } + } + newparts = mem::replace(&mut self.partitions, newparts); + newparts.clear(); + } + } + + // At this point, we now have a minimal partitioning of states, where + // each partition is an equivalence class of DFA states. Now we need to + // use this partitioning to update the DFA to only contain one state for + // each partition. + + // Create a map from DFA state ID to the representative ID of the + // equivalence class to which it belongs. The representative ID of an + // equivalence class of states is the minimum ID in that class. + let mut state_to_part = vec![DEAD; self.dfa.state_len()]; + for p in &self.partitions { + p.iter(|id| state_to_part[as_index(id)] = p.min()); + } + + // Generate a new contiguous sequence of IDs for minimal states, and + // create a map from equivalence IDs to the new IDs. Thus, the new + // minimal ID of *any* state in the unminimized DFA can be obtained + // with minimals_ids[state_to_part[old_id]]. + let mut minimal_ids = vec![DEAD; self.dfa.state_len()]; + let mut new_index = 0; + for state in self.dfa.states() { + if state_to_part[as_index(state.id())] == state.id() { + minimal_ids[as_index(state.id())] = as_state_id(new_index); + new_index += 1; + } + } + // The total number of states in the minimal DFA. + let minimal_count = new_index; + // Convenience function for remapping state IDs. This takes an old ID, + // looks up its Hopcroft partition and then maps that to the new ID + // range. + let remap = |old| minimal_ids[as_index(state_to_part[as_index(old)])]; + + // Re-map this DFA in place such that the only states remaining + // correspond to the representative states of every equivalence class. + for id in (0..self.dfa.state_len()).map(as_state_id) { + // If this state isn't a representative for an equivalence class, + // then we skip it since it won't appear in the minimal DFA. + if state_to_part[as_index(id)] != id { + continue; + } + self.dfa.remap_state(id, remap); + self.dfa.swap_states(id, minimal_ids[as_index(id)]); + } + // Trim off all unused states from the pre-minimized DFA. This + // represents all states that were merged into a non-singleton + // equivalence class of states, and appeared after the first state + // in each such class. (Because the state with the smallest ID in each + // equivalence class is its representative ID.) + self.dfa.truncate_states(minimal_count); + + // Update the new start states, which is now just the minimal ID of + // whatever state the old start state was collapsed into. Also, we + // collect everything before-hand to work around the borrow checker. + // We're already allocating so much that this is probably fine. If this + // turns out to be costly, then I guess add a `starts_mut` iterator. + let starts: Vec<_> = self.dfa.starts().collect(); + for (old_start_id, anchored, start_type) in starts { + self.dfa.set_start_state( + anchored, + start_type, + remap(old_start_id), + ); + } + + // Update the match state pattern ID list for multi-regexes. All we + // need to do is remap the match state IDs. The pattern ID lists are + // always the same as they were since match states with distinct + // pattern ID lists are always considered distinct states. + let mut pmap = BTreeMap::new(); + for (match_id, pattern_ids) in self.dfa.pattern_map() { + let new_id = remap(match_id); + pmap.insert(new_id, pattern_ids); + } + // This unwrap is OK because minimization never increases the number of + // match states or patterns in those match states. Since minimization + // runs after the pattern map has already been set at least once, we + // know that our match states cannot error. + self.dfa.set_pattern_map(&pmap).unwrap(); + + // In order to update the ID of the maximum match state, we need to + // find the maximum ID among all of the match states in the minimized + // DFA. This is not necessarily the new ID of the unminimized maximum + // match state, since that could have been collapsed with a much + // earlier match state. Therefore, to find the new max match state, + // we iterate over all previous match states, find their corresponding + // new minimal ID, and take the maximum of those. + let old = self.dfa.special().clone(); + let new = self.dfa.special_mut(); + // ... but only remap if we had match states. + if old.matches() { + new.min_match = StateID::MAX; + new.max_match = StateID::ZERO; + for i in as_index(old.min_match)..=as_index(old.max_match) { + let new_id = remap(as_state_id(i)); + if new_id < new.min_match { + new.min_match = new_id; + } + if new_id > new.max_match { + new.max_match = new_id; + } + } + } + // ... same, but for start states. + if old.starts() { + new.min_start = StateID::MAX; + new.max_start = StateID::ZERO; + for i in as_index(old.min_start)..=as_index(old.max_start) { + let new_id = remap(as_state_id(i)); + if new_id == DEAD { + continue; + } + if new_id < new.min_start { + new.min_start = new_id; + } + if new_id > new.max_start { + new.max_start = new_id; + } + } + if new.max_start == DEAD { + new.min_start = DEAD; + } + } + new.quit_id = remap(new.quit_id); + new.set_max(); + } + + fn find_waiting(&self, set: &StateSet) -> Option { + self.waiting.iter().position(|s| s == set) + } + + fn find_incoming_to( + &self, + b: alphabet::Unit, + set: &StateSet, + incoming: &mut StateSet, + ) { + incoming.clear(); + set.iter(|id| { + for &inid in + &self.in_transitions[self.dfa.to_index(id)][b.as_usize()] + { + incoming.add(inid); + } + }); + incoming.canonicalize(); + } + + fn initial_partitions(dfa: &dense::OwnedDFA) -> Vec { + // For match states, we know that two match states with different + // pattern ID lists will *always* be distinct, so we can partition them + // initially based on that. + let mut matching: BTreeMap, StateSet> = BTreeMap::new(); + let mut is_quit = StateSet::empty(); + let mut no_match = StateSet::empty(); + for state in dfa.states() { + if dfa.is_match_state(state.id()) { + let mut pids = vec![]; + for i in 0..dfa.match_len(state.id()) { + pids.push(dfa.match_pattern(state.id(), i)); + } + matching + .entry(pids) + .or_insert(StateSet::empty()) + .add(state.id()); + } else if dfa.is_quit_state(state.id()) { + is_quit.add(state.id()); + } else { + no_match.add(state.id()); + } + } + + let mut sets: Vec = + matching.into_iter().map(|(_, set)| set).collect(); + sets.push(no_match); + sets.push(is_quit); + sets + } + + fn incoming_transitions(dfa: &dense::OwnedDFA) -> Vec>> { + let mut incoming = vec![]; + for _ in dfa.states() { + incoming.push(vec![vec![]; dfa.alphabet_len()]); + } + for state in dfa.states() { + for (b, next) in state.transitions() { + incoming[dfa.to_index(next)][b.as_usize()].push(state.id()); + } + } + incoming + } +} + +impl StateSet { + fn empty() -> StateSet { + StateSet { ids: Rc::new(RefCell::new(vec![])) } + } + + fn add(&mut self, id: StateID) { + self.ids.borrow_mut().push(id); + } + + fn min(&self) -> StateID { + self.ids.borrow()[0] + } + + fn canonicalize(&mut self) { + self.ids.borrow_mut().sort(); + self.ids.borrow_mut().dedup(); + } + + fn clear(&mut self) { + self.ids.borrow_mut().clear(); + } + + fn len(&self) -> usize { + self.ids.borrow().len() + } + + fn is_empty(&self) -> bool { + self.len() == 0 + } + + fn deep_clone(&self) -> StateSet { + let ids = self.ids.borrow().iter().cloned().collect(); + StateSet { ids: Rc::new(RefCell::new(ids)) } + } + + fn iter(&self, mut f: F) { + for &id in self.ids.borrow().iter() { + f(id); + } + } + + fn intersection(&self, other: &StateSet, dest: &mut StateSet) { + dest.clear(); + if self.is_empty() || other.is_empty() { + return; + } + + let (seta, setb) = (self.ids.borrow(), other.ids.borrow()); + let (mut ita, mut itb) = (seta.iter().cloned(), setb.iter().cloned()); + let (mut a, mut b) = (ita.next().unwrap(), itb.next().unwrap()); + loop { + if a == b { + dest.add(a); + a = match ita.next() { + None => break, + Some(a) => a, + }; + b = match itb.next() { + None => break, + Some(b) => b, + }; + } else if a < b { + a = match ita.next() { + None => break, + Some(a) => a, + }; + } else { + b = match itb.next() { + None => break, + Some(b) => b, + }; + } + } + } + + fn subtract(&self, other: &StateSet, dest: &mut StateSet) { + dest.clear(); + if self.is_empty() || other.is_empty() { + self.iter(|s| dest.add(s)); + return; + } + + let (seta, setb) = (self.ids.borrow(), other.ids.borrow()); + let (mut ita, mut itb) = (seta.iter().cloned(), setb.iter().cloned()); + let (mut a, mut b) = (ita.next().unwrap(), itb.next().unwrap()); + loop { + if a == b { + a = match ita.next() { + None => break, + Some(a) => a, + }; + b = match itb.next() { + None => { + dest.add(a); + break; + } + Some(b) => b, + }; + } else if a < b { + dest.add(a); + a = match ita.next() { + None => break, + Some(a) => a, + }; + } else { + b = match itb.next() { + None => { + dest.add(a); + break; + } + Some(b) => b, + }; + } + } + for a in ita { + dest.add(a); + } + } +} diff --git a/vendor/regex-automata/src/dfa/mod.rs b/vendor/regex-automata/src/dfa/mod.rs new file mode 100644 index 0000000..fd58cac --- /dev/null +++ b/vendor/regex-automata/src/dfa/mod.rs @@ -0,0 +1,360 @@ +/*! +A module for building and searching with deterministic finite automata (DFAs). + +Like other modules in this crate, DFAs support a rich regex syntax with Unicode +features. DFAs also have extensive options for configuring the best space vs +time trade off for your use case and provides support for cheap deserialization +of automata for use in `no_std` environments. + +If you're looking for lazy DFAs that build themselves incrementally during +search, then please see the top-level [`hybrid` module](crate::hybrid). + +# Overview + +This section gives a brief overview of the primary types in this module: + +* A [`regex::Regex`] provides a way to search for matches of a regular +expression using DFAs. This includes iterating over matches with both the start +and end positions of each match. +* A [`dense::DFA`] provides low level access to a DFA that uses a dense +representation (uses lots of space, but fast searching). +* A [`sparse::DFA`] provides the same API as a `dense::DFA`, but uses a sparse +representation (uses less space, but slower searching). +* An [`Automaton`] trait that defines an interface that both dense and sparse +DFAs implement. (A `regex::Regex` is generic over this trait.) +* Both dense DFAs and sparse DFAs support serialization to raw bytes (e.g., +[`dense::DFA::to_bytes_little_endian`]) and cheap deserialization (e.g., +[`dense::DFA::from_bytes`]). + +There is also a [`onepass`] module that provides a [one-pass +DFA](onepass::DFA). The unique advantage of this DFA is that, for the class +of regexes it can be built with, it supports reporting the spans of matching +capturing groups. It is the only DFA in this crate capable of such a thing. + +# Example: basic regex searching + +This example shows how to compile a regex using the default configuration +and then use it to find matches in a byte string: + +``` +use regex_automata::{Match, dfa::regex::Regex}; + +let re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?; +let text = b"2018-12-24 2016-10-08"; +let matches: Vec = re.find_iter(text).collect(); +assert_eq!(matches, vec![ + Match::must(0, 0..10), + Match::must(0, 11..21), +]); +# Ok::<(), Box>(()) +``` + +# Example: searching with regex sets + +The DFAs in this module all fully support searching with multiple regexes +simultaneously. You can use this support with standard leftmost-first style +searching to find non-overlapping matches: + +``` +# if cfg!(miri) { return Ok(()); } // miri takes too long +use regex_automata::{Match, dfa::regex::Regex}; + +let re = Regex::new_many(&[r"\w+", r"\S+"])?; +let text = b"@foo bar"; +let matches: Vec = re.find_iter(text).collect(); +assert_eq!(matches, vec![ + Match::must(1, 0..4), + Match::must(0, 5..8), +]); +# Ok::<(), Box>(()) +``` + +# Example: use sparse DFAs + +By default, compiling a regex will use dense DFAs internally. This uses more +memory, but executes searches more quickly. If you can abide slower searches +(somewhere around 3-5x), then sparse DFAs might make more sense since they can +use significantly less space. + +Using sparse DFAs is as easy as using `Regex::new_sparse` instead of +`Regex::new`: + +``` +use regex_automata::{Match, dfa::regex::Regex}; + +let re = Regex::new_sparse(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap(); +let text = b"2018-12-24 2016-10-08"; +let matches: Vec = re.find_iter(text).collect(); +assert_eq!(matches, vec![ + Match::must(0, 0..10), + Match::must(0, 11..21), +]); +# Ok::<(), Box>(()) +``` + +If you already have dense DFAs for some reason, they can be converted to sparse +DFAs and used to build a new `Regex`. For example: + +``` +use regex_automata::{Match, dfa::regex::Regex}; + +let dense_re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap(); +let sparse_re = Regex::builder().build_from_dfas( + dense_re.forward().to_sparse()?, + dense_re.reverse().to_sparse()?, +); +let text = b"2018-12-24 2016-10-08"; +let matches: Vec = sparse_re.find_iter(text).collect(); +assert_eq!(matches, vec![ + Match::must(0, 0..10), + Match::must(0, 11..21), +]); +# Ok::<(), Box>(()) +``` + +# Example: deserialize a DFA + +This shows how to first serialize a DFA into raw bytes, and then deserialize +those raw bytes back into a DFA. While this particular example is a +bit contrived, this same technique can be used in your program to +deserialize a DFA at start up time or by memory mapping a file. + +``` +use regex_automata::{Match, dfa::{dense, regex::Regex}}; + +let re1 = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap(); +// serialize both the forward and reverse DFAs, see note below +let (fwd_bytes, fwd_pad) = re1.forward().to_bytes_native_endian(); +let (rev_bytes, rev_pad) = re1.reverse().to_bytes_native_endian(); +// now deserialize both---we need to specify the correct type! +let fwd: dense::DFA<&[u32]> = dense::DFA::from_bytes(&fwd_bytes[fwd_pad..])?.0; +let rev: dense::DFA<&[u32]> = dense::DFA::from_bytes(&rev_bytes[rev_pad..])?.0; +// finally, reconstruct our regex +let re2 = Regex::builder().build_from_dfas(fwd, rev); + +// we can use it like normal +let text = b"2018-12-24 2016-10-08"; +let matches: Vec = re2.find_iter(text).collect(); +assert_eq!(matches, vec![ + Match::must(0, 0..10), + Match::must(0, 11..21), +]); +# Ok::<(), Box>(()) +``` + +There are a few points worth noting here: + +* We need to extract the raw DFAs used by the regex and serialize those. You +can build the DFAs manually yourself using [`dense::Builder`], but using +the DFAs from a `Regex` guarantees that the DFAs are built correctly. (In +particular, a `Regex` constructs a reverse DFA for finding the starting +location of matches.) +* To convert the DFA to raw bytes, we use the `to_bytes_native_endian` method. +In practice, you'll want to use either [`dense::DFA::to_bytes_little_endian`] +or [`dense::DFA::to_bytes_big_endian`], depending on which platform you're +deserializing your DFA from. If you intend to deserialize on either platform, +then you'll need to serialize both and deserialize the right one depending on +your target's endianness. +* Safely deserializing a DFA requires verifying the raw bytes, particularly if +they are untrusted, since an invalid DFA could cause logical errors, panics +or even undefined behavior. This verification step requires visiting all of +the transitions in the DFA, which can be costly. If cheaper verification is +desired, then [`dense::DFA::from_bytes_unchecked`] is available that only does +verification that can be performed in constant time. However, one can only use +this routine if the caller can guarantee that the bytes provided encoded a +valid DFA. + +The same process can be achieved with sparse DFAs as well: + +``` +use regex_automata::{Match, dfa::{sparse, regex::Regex}}; + +let re1 = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap(); +// serialize both +let fwd_bytes = re1.forward().to_sparse()?.to_bytes_native_endian(); +let rev_bytes = re1.reverse().to_sparse()?.to_bytes_native_endian(); +// now deserialize both---we need to specify the correct type! +let fwd: sparse::DFA<&[u8]> = sparse::DFA::from_bytes(&fwd_bytes)?.0; +let rev: sparse::DFA<&[u8]> = sparse::DFA::from_bytes(&rev_bytes)?.0; +// finally, reconstruct our regex +let re2 = Regex::builder().build_from_dfas(fwd, rev); + +// we can use it like normal +let text = b"2018-12-24 2016-10-08"; +let matches: Vec = re2.find_iter(text).collect(); +assert_eq!(matches, vec![ + Match::must(0, 0..10), + Match::must(0, 11..21), +]); +# Ok::<(), Box>(()) +``` + +Note that unlike dense DFAs, sparse DFAs have no alignment requirements. +Conversely, dense DFAs must be be aligned to the same alignment as a +[`StateID`](crate::util::primitives::StateID). + +# Support for `no_std` and `alloc`-only + +This crate comes with `alloc` and `std` features that are enabled by default. +When the `alloc` or `std` features are enabled, the API of this module will +include the facilities necessary for compiling, serializing, deserializing +and searching with DFAs. When only the `alloc` feature is enabled, then +implementations of the `std::error::Error` trait are dropped, but everything +else generally remains the same. When both the `alloc` and `std` features are +disabled, the API of this module will shrink such that it only includes the +facilities necessary for deserializing and searching with DFAs. + +The intended workflow for `no_std` environments is thus as follows: + +* Write a program with the `alloc` or `std` features that compiles and +serializes a regular expression. You may need to serialize both little and big +endian versions of each DFA. (So that's 4 DFAs in total for each regex.) +* In your `no_std` environment, follow the examples above for deserializing +your previously serialized DFAs into regexes. You can then search with them as +you would any regex. + +Deserialization can happen anywhere. For example, with bytes embedded into a +binary or with a file memory mapped at runtime. + +The `regex-cli` command (found in the same repository as this crate) can be +used to serialize DFAs to files and generate Rust code to read them. + +# Syntax + +This module supports the same syntax as the `regex` crate, since they share the +same parser. You can find an exhaustive list of supported syntax in the +[documentation for the `regex` crate](https://docs.rs/regex/1/regex/#syntax). + +There are two things that are not supported by the DFAs in this module: + +* Capturing groups. The DFAs (and [`Regex`](regex::Regex)es built on top +of them) can only find the offsets of an entire match, but cannot resolve +the offsets of each capturing group. This is because DFAs do not have the +expressive power necessary. +* Unicode word boundaries. These present particularly difficult challenges for +DFA construction and would result in an explosion in the number of states. +One can enable [`dense::Config::unicode_word_boundary`] though, which provides +heuristic support for Unicode word boundaries that only works on ASCII text. +Otherwise, one can use `(?-u:\b)` for an ASCII word boundary, which will work +on any input. + +There are no plans to lift either of these limitations. + +Note that these restrictions are identical to the restrictions on lazy DFAs. + +# Differences with general purpose regexes + +The main goal of the [`regex`](https://docs.rs/regex) crate is to serve as a +general purpose regular expression engine. It aims to automatically balance low +compile times, fast search times and low memory usage, while also providing +a convenient API for users. In contrast, this module provides a lower level +regular expression interface based exclusively on DFAs that is a bit less +convenient while providing more explicit control over memory usage and search +times. + +Here are some specific negative differences: + +* **Compilation can take an exponential amount of time and space** in the size +of the regex pattern. While most patterns do not exhibit worst case exponential +time, such patterns do exist. For example, `[01]*1[01]{N}` will build a DFA +with approximately `2^(N+2)` states. For this reason, untrusted patterns should +not be compiled with this module. (In the future, the API may expose an option +to return an error if the DFA gets too big.) +* This module does not support sub-match extraction via capturing groups, which +can be achieved with the regex crate's "captures" API. +* While the regex crate doesn't necessarily sport fast compilation times, +the regexes in this module are almost universally slow to compile, especially +when they contain large Unicode character classes. For example, on my system, +compiling `\w{50}` takes about 1 second and almost 15MB of memory! (Compiling +a sparse regex takes about the same time but only uses about 1.2MB of +memory.) Conversely, compiling the same regex without Unicode support, e.g., +`(?-u)\w{50}`, takes under 1 millisecond and about 15KB of memory. For this +reason, you should only use Unicode character classes if you absolutely need +them! (They are enabled by default though.) +* This module does not support Unicode word boundaries. ASCII word bondaries +may be used though by disabling Unicode or selectively doing so in the syntax, +e.g., `(?-u:\b)`. There is also an option to +[heuristically enable Unicode word boundaries](crate::dfa::dense::Config::unicode_word_boundary), +where the corresponding DFA will give up if any non-ASCII byte is seen. +* As a lower level API, this module does not do literal optimizations +automatically. Although it does provide hooks in its API to make use of the +[`Prefilter`](crate::util::prefilter::Prefilter) trait. Missing literal +optimizations means that searches may run much slower than what you're +accustomed to, although, it does provide more predictable and consistent +performance. +* There is no `&str` API like in the regex crate. In this module, all APIs +operate on `&[u8]`. By default, match indices are +guaranteed to fall on UTF-8 boundaries, unless either of +[`syntax::Config::utf8`](crate::util::syntax::Config::utf8) or +[`thompson::Config::utf8`](crate::nfa::thompson::Config::utf8) are disabled. + +With some of the downsides out of the way, here are some positive differences: + +* Both dense and sparse DFAs can be serialized to raw bytes, and then cheaply +deserialized. Deserialization can be done in constant time with the unchecked +APIs, since searching can be performed directly on the raw serialized bytes of +a DFA. +* This module was specifically designed so that the searching phase of a +DFA has minimal runtime requirements, and can therefore be used in `no_std` +environments. While `no_std` environments cannot compile regexes, they can +deserialize pre-compiled regexes. +* Since this module builds DFAs ahead of time, it will generally out-perform +the `regex` crate on equivalent tasks. The performance difference is likely +not large. However, because of a complex set of optimizations in the regex +crate (like literal optimizations), an accurate performance comparison may be +difficult to do. +* Sparse DFAs provide a way to build a DFA ahead of time that sacrifices search +performance a small amount, but uses much less storage space. Potentially even +less than what the regex crate uses. +* This module exposes DFAs directly, such as [`dense::DFA`] and +[`sparse::DFA`], which enables one to do less work in some cases. For example, +if you only need the end of a match and not the start of a match, then you can +use a DFA directly without building a `Regex`, which always requires a second +DFA to find the start of a match. +* This module provides more control over memory usage. Aside from choosing +between dense and sparse DFAs, one can also choose a smaller state identifier +representation to use less space. Also, one can enable DFA minimization +via [`dense::Config::minimize`], but it can increase compilation times +dramatically. +*/ + +#[cfg(feature = "dfa-search")] +pub use crate::dfa::{ + automaton::{Automaton, OverlappingState, StartError}, + start::StartKind, +}; + +/// This is an alias for a state ID of zero. It has special significance +/// because it always corresponds to the first state in a DFA, and the first +/// state in a DFA is always "dead." That is, the dead state always has all +/// of its transitions set to itself. Moreover, the dead state is used as a +/// sentinel for various things. e.g., In search, reaching a dead state means +/// that the search must stop. +const DEAD: crate::util::primitives::StateID = + crate::util::primitives::StateID::ZERO; + +#[cfg(feature = "dfa-search")] +pub mod dense; +#[cfg(feature = "dfa-onepass")] +pub mod onepass; +#[cfg(feature = "dfa-search")] +pub mod regex; +#[cfg(feature = "dfa-search")] +pub mod sparse; + +#[cfg(feature = "dfa-search")] +pub(crate) mod accel; +#[cfg(feature = "dfa-search")] +mod automaton; +#[cfg(feature = "dfa-build")] +mod determinize; +#[cfg(feature = "dfa-build")] +mod minimize; +#[cfg(any(feature = "dfa-build", feature = "dfa-onepass"))] +mod remapper; +#[cfg(feature = "dfa-search")] +mod search; +#[cfg(feature = "dfa-search")] +mod special; +#[cfg(feature = "dfa-search")] +mod start; diff --git a/vendor/regex-automata/src/dfa/onepass.rs b/vendor/regex-automata/src/dfa/onepass.rs new file mode 100644 index 0000000..e62bbd3 --- /dev/null +++ b/vendor/regex-automata/src/dfa/onepass.rs @@ -0,0 +1,3192 @@ +/*! +A DFA that can return spans for matching capturing groups. + +This module is the home of a [one-pass DFA](DFA). + +This module also contains a [`Builder`] and a [`Config`] for building and +configuring a one-pass DFA. +*/ + +// A note on naming and credit: +// +// As far as I know, Russ Cox came up with the practical vision and +// implementation of a "one-pass regex engine." He mentions and describes it +// briefly in the third article of his regexp article series: +// https://swtch.com/~rsc/regexp/regexp3.html +// +// Cox's implementation is in RE2, and the implementation below is most +// heavily inspired by RE2's. The key thing they have in common is that +// their transitions are defined over an alphabet of bytes. In contrast, +// Go's regex engine also has a one-pass engine, but its transitions are +// more firmly rooted on Unicode codepoints. The ideas are the same, but the +// implementations are different. +// +// RE2 tends to call this a "one-pass NFA." Here, we call it a "one-pass DFA." +// They're both true in their own ways: +// +// * The "one-pass" criterion is generally a property of the NFA itself. In +// particular, it is said that an NFA is one-pass if, after each byte of input +// during a search, there is at most one "VM thread" remaining to take for the +// next byte of input. That is, there is never any ambiguity as to the path to +// take through the NFA during a search. +// +// * On the other hand, once a one-pass NFA has its representation converted +// to something where a constant number of instructions is used for each byte +// of input, the implementation looks a lot more like a DFA. It's technically +// more powerful than a DFA since it has side effects (storing offsets inside +// of slots activated by a transition), but it is far closer to a DFA than an +// NFA simulation. +// +// Thus, in this crate, we call it a one-pass DFA. + +use alloc::{vec, vec::Vec}; + +use crate::{ + dfa::{remapper::Remapper, DEAD}, + nfa::thompson::{self, NFA}, + util::{ + alphabet::ByteClasses, + captures::Captures, + escape::DebugByte, + int::{Usize, U32, U64, U8}, + look::{Look, LookSet, UnicodeWordBoundaryError}, + primitives::{NonMaxUsize, PatternID, StateID}, + search::{Anchored, Input, Match, MatchError, MatchKind, Span}, + sparse_set::SparseSet, + }, +}; + +/// The configuration used for building a [one-pass DFA](DFA). +/// +/// A one-pass DFA configuration is a simple data object that is typically used +/// with [`Builder::configure`]. It can be cheaply cloned. +/// +/// A default configuration can be created either with `Config::new`, or +/// perhaps more conveniently, with [`DFA::config`]. +#[derive(Clone, Debug, Default)] +pub struct Config { + match_kind: Option, + starts_for_each_pattern: Option, + byte_classes: Option, + size_limit: Option>, +} + +impl Config { + /// Return a new default one-pass DFA configuration. + pub fn new() -> Config { + Config::default() + } + + /// Set the desired match semantics. + /// + /// The default is [`MatchKind::LeftmostFirst`], which corresponds to the + /// match semantics of Perl-like regex engines. That is, when multiple + /// patterns would match at the same leftmost position, the pattern that + /// appears first in the concrete syntax is chosen. + /// + /// Currently, the only other kind of match semantics supported is + /// [`MatchKind::All`]. This corresponds to "classical DFA" construction + /// where all possible matches are visited. + /// + /// When it comes to the one-pass DFA, it is rarer for preference order and + /// "longest match" to actually disagree. Since if they did disagree, then + /// the regex typically isn't one-pass. For example, searching `Samwise` + /// for `Sam|Samwise` will report `Sam` for leftmost-first matching and + /// `Samwise` for "longest match" or "all" matching. However, this regex is + /// not one-pass if taken literally. The equivalent regex, `Sam(?:|wise)` + /// is one-pass and `Sam|Samwise` may be optimized to it. + /// + /// The other main difference is that "all" match semantics don't support + /// non-greedy matches. "All" match semantics always try to match as much + /// as possible. + pub fn match_kind(mut self, kind: MatchKind) -> Config { + self.match_kind = Some(kind); + self + } + + /// Whether to compile a separate start state for each pattern in the + /// one-pass DFA. + /// + /// When enabled, a separate **anchored** start state is added for each + /// pattern in the DFA. When this start state is used, then the DFA will + /// only search for matches for the pattern specified, even if there are + /// other patterns in the DFA. + /// + /// The main downside of this option is that it can potentially increase + /// the size of the DFA and/or increase the time it takes to build the DFA. + /// + /// You might want to enable this option when you want to both search for + /// anchored matches of any pattern or to search for anchored matches of + /// one particular pattern while using the same DFA. (Otherwise, you would + /// need to compile a new DFA for each pattern.) + /// + /// By default this is disabled. + /// + /// # Example + /// + /// This example shows how to build a multi-regex and then search for + /// matches for a any of the patterns or matches for a specific pattern. + /// + /// ``` + /// use regex_automata::{ + /// dfa::onepass::DFA, Anchored, Input, Match, PatternID, + /// }; + /// + /// let re = DFA::builder() + /// .configure(DFA::config().starts_for_each_pattern(true)) + /// .build_many(&["[a-z]+", "[0-9]+"])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "123abc"; + /// let input = Input::new(haystack).anchored(Anchored::Yes); + /// + /// // A normal multi-pattern search will show pattern 1 matches. + /// re.try_search(&mut cache, &input, &mut caps)?; + /// assert_eq!(Some(Match::must(1, 0..3)), caps.get_match()); + /// + /// // If we only want to report pattern 0 matches, then we'll get no + /// // match here. + /// let input = input.anchored(Anchored::Pattern(PatternID::must(0))); + /// re.try_search(&mut cache, &input, &mut caps)?; + /// assert_eq!(None, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn starts_for_each_pattern(mut self, yes: bool) -> Config { + self.starts_for_each_pattern = Some(yes); + self + } + + /// Whether to attempt to shrink the size of the DFA's alphabet or not. + /// + /// This option is enabled by default and should never be disabled unless + /// one is debugging a one-pass DFA. + /// + /// When enabled, the DFA will use a map from all possible bytes to their + /// corresponding equivalence class. Each equivalence class represents a + /// set of bytes that does not discriminate between a match and a non-match + /// in the DFA. For example, the pattern `[ab]+` has at least two + /// equivalence classes: a set containing `a` and `b` and a set containing + /// every byte except for `a` and `b`. `a` and `b` are in the same + /// equivalence class because they never discriminate between a match and a + /// non-match. + /// + /// The advantage of this map is that the size of the transition table + /// can be reduced drastically from (approximately) `#states * 256 * + /// sizeof(StateID)` to `#states * k * sizeof(StateID)` where `k` is the + /// number of equivalence classes (rounded up to the nearest power of 2). + /// As a result, total space usage can decrease substantially. Moreover, + /// since a smaller alphabet is used, DFA compilation becomes faster as + /// well. + /// + /// **WARNING:** This is only useful for debugging DFAs. Disabling this + /// does not yield any speed advantages. Namely, even when this is + /// disabled, a byte class map is still used while searching. The only + /// difference is that every byte will be forced into its own distinct + /// equivalence class. This is useful for debugging the actual generated + /// transitions because it lets one see the transitions defined on actual + /// bytes instead of the equivalence classes. + pub fn byte_classes(mut self, yes: bool) -> Config { + self.byte_classes = Some(yes); + self + } + + /// Set a size limit on the total heap used by a one-pass DFA. + /// + /// This size limit is expressed in bytes and is applied during + /// construction of a one-pass DFA. If the DFA's heap usage exceeds + /// this configured limit, then construction is stopped and an error is + /// returned. + /// + /// The default is no limit. + /// + /// # Example + /// + /// This example shows a one-pass DFA that fails to build because of + /// a configured size limit. This particular example also serves as a + /// cautionary tale demonstrating just how big DFAs with large Unicode + /// character classes can get. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::onepass::DFA, Match}; + /// + /// // 6MB isn't enough! + /// DFA::builder() + /// .configure(DFA::config().size_limit(Some(6_000_000))) + /// .build(r"\w{20}") + /// .unwrap_err(); + /// + /// // ... but 7MB probably is! + /// // (Note that DFA sizes aren't necessarily stable between releases.) + /// let re = DFA::builder() + /// .configure(DFA::config().size_limit(Some(7_000_000))) + /// .build(r"\w{20}")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "A".repeat(20); + /// re.captures(&mut cache, &haystack, &mut caps); + /// assert_eq!(Some(Match::must(0, 0..20)), caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// While one needs a little more than 3MB to represent `\w{20}`, it + /// turns out that you only need a little more than 4KB to represent + /// `(?-u:\w{20})`. So only use Unicode if you need it! + pub fn size_limit(mut self, limit: Option) -> Config { + self.size_limit = Some(limit); + self + } + + /// Returns the match semantics set in this configuration. + pub fn get_match_kind(&self) -> MatchKind { + self.match_kind.unwrap_or(MatchKind::LeftmostFirst) + } + + /// Returns whether this configuration has enabled anchored starting states + /// for every pattern in the DFA. + pub fn get_starts_for_each_pattern(&self) -> bool { + self.starts_for_each_pattern.unwrap_or(false) + } + + /// Returns whether this configuration has enabled byte classes or not. + /// This is typically a debugging oriented option, as disabling it confers + /// no speed benefit. + pub fn get_byte_classes(&self) -> bool { + self.byte_classes.unwrap_or(true) + } + + /// Returns the DFA size limit of this configuration if one was set. + /// The size limit is total number of bytes on the heap that a DFA is + /// permitted to use. If the DFA exceeds this limit during construction, + /// then construction is stopped and an error is returned. + pub fn get_size_limit(&self) -> Option { + self.size_limit.unwrap_or(None) + } + + /// Overwrite the default configuration such that the options in `o` are + /// always used. If an option in `o` is not set, then the corresponding + /// option in `self` is used. If it's not set in `self` either, then it + /// remains not set. + pub(crate) fn overwrite(&self, o: Config) -> Config { + Config { + match_kind: o.match_kind.or(self.match_kind), + starts_for_each_pattern: o + .starts_for_each_pattern + .or(self.starts_for_each_pattern), + byte_classes: o.byte_classes.or(self.byte_classes), + size_limit: o.size_limit.or(self.size_limit), + } + } +} + +/// A builder for a [one-pass DFA](DFA). +/// +/// This builder permits configuring options for the syntax of a pattern, the +/// NFA construction and the DFA construction. This builder is different from a +/// general purpose regex builder in that it permits fine grain configuration +/// of the construction process. The trade off for this is complexity, and +/// the possibility of setting a configuration that might not make sense. For +/// example, there are two different UTF-8 modes: +/// +/// * [`syntax::Config::utf8`](crate::util::syntax::Config::utf8) controls +/// whether the pattern itself can contain sub-expressions that match invalid +/// UTF-8. +/// * [`thompson::Config::utf8`] controls whether empty matches that split a +/// Unicode codepoint are reported or not. +/// +/// Generally speaking, callers will want to either enable all of these or +/// disable all of these. +/// +/// # Example +/// +/// This example shows how to disable UTF-8 mode in the syntax and the NFA. +/// This is generally what you want for matching on arbitrary bytes. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{ +/// dfa::onepass::DFA, +/// nfa::thompson, +/// util::syntax, +/// Match, +/// }; +/// +/// let re = DFA::builder() +/// .syntax(syntax::Config::new().utf8(false)) +/// .thompson(thompson::Config::new().utf8(false)) +/// .build(r"foo(?-u:[^b])ar.*")?; +/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); +/// +/// let haystack = b"foo\xFFarzz\xE2\x98\xFF\n"; +/// re.captures(&mut cache, haystack, &mut caps); +/// // Notice that `(?-u:[^b])` matches invalid UTF-8, +/// // but the subsequent `.*` does not! Disabling UTF-8 +/// // on the syntax permits this. +/// // +/// // N.B. This example does not show the impact of +/// // disabling UTF-8 mode on a one-pass DFA Config, +/// // since that only impacts regexes that can +/// // produce matches of length 0. +/// assert_eq!(Some(Match::must(0, 0..8)), caps.get_match()); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Builder { + config: Config, + #[cfg(feature = "syntax")] + thompson: thompson::Compiler, +} + +impl Builder { + /// Create a new one-pass DFA builder with the default configuration. + pub fn new() -> Builder { + Builder { + config: Config::default(), + #[cfg(feature = "syntax")] + thompson: thompson::Compiler::new(), + } + } + + /// Build a one-pass DFA from the given pattern. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(feature = "syntax")] + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Build a one-pass DFA from the given patterns. + /// + /// When matches are returned, the pattern ID corresponds to the index of + /// the pattern in the slice given. + #[cfg(feature = "syntax")] + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let nfa = + self.thompson.build_many(patterns).map_err(BuildError::nfa)?; + self.build_from_nfa(nfa) + } + + /// Build a DFA from the given NFA. + /// + /// # Example + /// + /// This example shows how to build a DFA if you already have an NFA in + /// hand. + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, nfa::thompson::NFA, Match}; + /// + /// // This shows how to set non-default options for building an NFA. + /// let nfa = NFA::compiler() + /// .configure(NFA::config().shrink(true)) + /// .build(r"[a-z0-9]+")?; + /// let re = DFA::builder().build_from_nfa(nfa)?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// re.captures(&mut cache, "foo123bar", &mut caps); + /// assert_eq!(Some(Match::must(0, 0..9)), caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_from_nfa(&self, nfa: NFA) -> Result { + // Why take ownership if we're just going to pass a reference to the + // NFA to our internal builder? Well, the first thing to note is that + // an NFA uses reference counting internally, so either choice is going + // to be cheap. So there isn't much cost either way. + // + // The real reason is that a one-pass DFA, semantically, shares + // ownership of an NFA. This is unlike other DFAs that don't share + // ownership of an NFA at all, primarily because they want to be + // self-contained in order to support cheap (de)serialization. + // + // But then why pass a '&nfa' below if we want to share ownership? + // Well, it turns out that using a '&NFA' in our internal builder + // separates its lifetime from the DFA we're building, and this turns + // out to make code a bit more composable. e.g., We can iterate over + // things inside the NFA while borrowing the builder as mutable because + // we know the NFA cannot be mutated. So TL;DR --- this weirdness is + // "because borrow checker." + InternalBuilder::new(self.config.clone(), &nfa).build() + } + + /// Apply the given one-pass DFA configuration options to this builder. + pub fn configure(&mut self, config: Config) -> &mut Builder { + self.config = self.config.overwrite(config); + self + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + /// + /// These settings only apply when constructing a one-pass DFA directly + /// from a pattern. + #[cfg(feature = "syntax")] + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + self.thompson.syntax(config); + self + } + + /// Set the Thompson NFA configuration for this builder using + /// [`nfa::thompson::Config`](crate::nfa::thompson::Config). + /// + /// This permits setting things like whether additional time should be + /// spent shrinking the size of the NFA. + /// + /// These settings only apply when constructing a DFA directly from a + /// pattern. + #[cfg(feature = "syntax")] + pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder { + self.thompson.configure(config); + self + } +} + +/// An internal builder for encapsulating the state necessary to build a +/// one-pass DFA. Typical use is just `InternalBuilder::new(..).build()`. +/// +/// There is no separate pass for determining whether the NFA is one-pass or +/// not. We just try to build the DFA. If during construction we discover that +/// it is not one-pass, we bail out. This is likely to lead to some undesirable +/// expense in some cases, so it might make sense to try an identify common +/// patterns in the NFA that make it definitively not one-pass. That way, we +/// can avoid ever trying to build a one-pass DFA in the first place. For +/// example, '\w*\s' is not one-pass, and since '\w' is Unicode-aware by +/// default, it's probably not a trivial cost to try and build a one-pass DFA +/// for it and then fail. +/// +/// Note that some (immutable) fields are duplicated here. For example, the +/// 'nfa' and 'classes' fields are both in the 'DFA'. They are the same thing, +/// but we duplicate them because it makes composition easier below. Otherwise, +/// since the borrow checker can't see through method calls, the mutable borrow +/// we use to mutate the DFA winds up preventing borrowing from any other part +/// of the DFA, even though we aren't mutating those parts. We only do this +/// because the duplication is cheap. +#[derive(Debug)] +struct InternalBuilder<'a> { + /// The DFA we're building. + dfa: DFA, + /// An unordered collection of NFA state IDs that we haven't yet tried to + /// build into a DFA state yet. + /// + /// This collection does not ultimately wind up including every NFA state + /// ID. Instead, each ID represents a "start" state for a sub-graph of the + /// NFA. The set of NFA states we then use to build a DFA state consists + /// of that "start" state and all states reachable from it via epsilon + /// transitions. + uncompiled_nfa_ids: Vec, + /// A map from NFA state ID to DFA state ID. This is useful for easily + /// determining whether an NFA state has been used as a "starting" point + /// to build a DFA state yet. If it hasn't, then it is mapped to DEAD, + /// and since DEAD is specially added and never corresponds to any NFA + /// state, it follows that a mapping to DEAD implies the NFA state has + /// no corresponding DFA state yet. + nfa_to_dfa_id: Vec, + /// A stack used to traverse the NFA states that make up a single DFA + /// state. Traversal occurs until the stack is empty, and we only push to + /// the stack when the state ID isn't in 'seen'. Actually, even more than + /// that, if we try to push something on to this stack that is already in + /// 'seen', then we bail out on construction completely, since it implies + /// that the NFA is not one-pass. + stack: Vec<(StateID, Epsilons)>, + /// The set of NFA states that we've visited via 'stack'. + seen: SparseSet, + /// Whether a match NFA state has been observed while constructing a + /// one-pass DFA state. Once a match state is seen, assuming we are using + /// leftmost-first match semantics, then we don't add any more transitions + /// to the DFA state we're building. + matched: bool, + /// The config passed to the builder. + /// + /// This is duplicated in dfa.config. + config: Config, + /// The NFA we're building a one-pass DFA from. + /// + /// This is duplicated in dfa.nfa. + nfa: &'a NFA, + /// The equivalence classes that make up the alphabet for this DFA> + /// + /// This is duplicated in dfa.classes. + classes: ByteClasses, +} + +impl<'a> InternalBuilder<'a> { + /// Create a new builder with an initial empty DFA. + fn new(config: Config, nfa: &'a NFA) -> InternalBuilder { + let classes = if !config.get_byte_classes() { + // A one-pass DFA will always use the equivalence class map, but + // enabling this option is useful for debugging. Namely, this will + // cause all transitions to be defined over their actual bytes + // instead of an opaque equivalence class identifier. The former is + // much easier to grok as a human. + ByteClasses::singletons() + } else { + nfa.byte_classes().clone() + }; + // Normally a DFA alphabet includes the EOI symbol, but we don't need + // that in the one-pass DFA since we handle look-around explicitly + // without encoding it into the DFA. Thus, we don't need to delay + // matches by 1 byte. However, we reuse the space that *would* be used + // by the EOI transition by putting match information there (like which + // pattern matches and which look-around assertions need to hold). So + // this means our real alphabet length is 1 fewer than what the byte + // classes report, since we don't use EOI. + let alphabet_len = classes.alphabet_len().checked_sub(1).unwrap(); + let stride2 = classes.stride2(); + let dfa = DFA { + config: config.clone(), + nfa: nfa.clone(), + table: vec![], + starts: vec![], + // Since one-pass DFAs have a smaller state ID max than + // StateID::MAX, it follows that StateID::MAX is a valid initial + // value for min_match_id since no state ID can ever be greater + // than it. In the case of a one-pass DFA with no match states, the + // min_match_id will keep this sentinel value. + min_match_id: StateID::MAX, + classes: classes.clone(), + alphabet_len, + stride2, + pateps_offset: alphabet_len, + // OK because PatternID::MAX*2 is guaranteed not to overflow. + explicit_slot_start: nfa.pattern_len().checked_mul(2).unwrap(), + }; + InternalBuilder { + dfa, + uncompiled_nfa_ids: vec![], + nfa_to_dfa_id: vec![DEAD; nfa.states().len()], + stack: vec![], + seen: SparseSet::new(nfa.states().len()), + matched: false, + config, + nfa, + classes, + } + } + + /// Build the DFA from the NFA given to this builder. If the NFA is not + /// one-pass, then return an error. An error may also be returned if a + /// particular limit is exceeded. (Some limits, like the total heap memory + /// used, are configurable. Others, like the total patterns or slots, are + /// hard-coded based on representational limitations.) + fn build(mut self) -> Result { + self.nfa.look_set_any().available().map_err(BuildError::word)?; + for look in self.nfa.look_set_any().iter() { + // This is a future incompatibility check where if we add any + // more look-around assertions, then the one-pass DFA either + // needs to reject them (what we do here) or it needs to have its + // Transition representation modified to be capable of storing the + // new assertions. + if look.as_repr() > Look::WordUnicodeNegate.as_repr() { + return Err(BuildError::unsupported_look(look)); + } + } + if self.nfa.pattern_len().as_u64() > PatternEpsilons::PATTERN_ID_LIMIT + { + return Err(BuildError::too_many_patterns( + PatternEpsilons::PATTERN_ID_LIMIT, + )); + } + if self.nfa.group_info().explicit_slot_len() > Slots::LIMIT { + return Err(BuildError::not_one_pass( + "too many explicit capturing groups (max is 16)", + )); + } + assert_eq!(DEAD, self.add_empty_state()?); + + // This is where the explicit slots start. We care about this because + // we only need to track explicit slots. The implicit slots---two for + // each pattern---are tracked as part of the search routine itself. + let explicit_slot_start = self.nfa.pattern_len() * 2; + self.add_start_state(None, self.nfa.start_anchored())?; + if self.config.get_starts_for_each_pattern() { + for pid in self.nfa.patterns() { + self.add_start_state( + Some(pid), + self.nfa.start_pattern(pid).unwrap(), + )?; + } + } + // NOTE: One wonders what the effects of treating 'uncompiled_nfa_ids' + // as a stack are. It is really an unordered *set* of NFA state IDs. + // If it, for example, in practice led to discovering whether a regex + // was or wasn't one-pass later than if we processed NFA state IDs in + // ascending order, then that would make this routine more costly in + // the somewhat common case of a regex that isn't one-pass. + while let Some(nfa_id) = self.uncompiled_nfa_ids.pop() { + let dfa_id = self.nfa_to_dfa_id[nfa_id]; + // Once we see a match, we keep going, but don't add any new + // transitions. Normally we'd just stop, but we have to keep + // going in order to verify that our regex is actually one-pass. + self.matched = false; + // The NFA states we've already explored for this DFA state. + self.seen.clear(); + // The NFA states to explore via epsilon transitions. If we ever + // try to push an NFA state that we've already seen, then the NFA + // is not one-pass because it implies there are multiple epsilon + // transition paths that lead to the same NFA state. In other + // words, there is ambiguity. + self.stack_push(nfa_id, Epsilons::empty())?; + while let Some((id, epsilons)) = self.stack.pop() { + match *self.nfa.state(id) { + thompson::State::ByteRange { ref trans } => { + self.compile_transition(dfa_id, trans, epsilons)?; + } + thompson::State::Sparse(ref sparse) => { + for trans in sparse.transitions.iter() { + self.compile_transition(dfa_id, trans, epsilons)?; + } + } + thompson::State::Dense(ref dense) => { + for trans in dense.iter() { + self.compile_transition(dfa_id, &trans, epsilons)?; + } + } + thompson::State::Look { look, next } => { + let looks = epsilons.looks().insert(look); + self.stack_push(next, epsilons.set_looks(looks))?; + } + thompson::State::Union { ref alternates } => { + for &sid in alternates.iter().rev() { + self.stack_push(sid, epsilons)?; + } + } + thompson::State::BinaryUnion { alt1, alt2 } => { + self.stack_push(alt2, epsilons)?; + self.stack_push(alt1, epsilons)?; + } + thompson::State::Capture { next, slot, .. } => { + let slot = slot.as_usize(); + let epsilons = if slot < explicit_slot_start { + // If this is an implicit slot, we don't care + // about it, since we handle implicit slots in + // the search routine. We can get away with that + // because there are 2 implicit slots for every + // pattern. + epsilons + } else { + // Offset our explicit slots so that they start + // at index 0. + let offset = slot - explicit_slot_start; + epsilons.set_slots(epsilons.slots().insert(offset)) + }; + self.stack_push(next, epsilons)?; + } + thompson::State::Fail => { + continue; + } + thompson::State::Match { pattern_id } => { + // If we found two different paths to a match state + // for the same DFA state, then we have ambiguity. + // Thus, it's not one-pass. + if self.matched { + return Err(BuildError::not_one_pass( + "multiple epsilon transitions to match state", + )); + } + self.matched = true; + // Shove the matching pattern ID and the 'epsilons' + // into the current DFA state's pattern epsilons. The + // 'epsilons' includes the slots we need to capture + // before reporting the match and also the conditional + // epsilon transitions we need to check before we can + // report a match. + self.dfa.set_pattern_epsilons( + dfa_id, + PatternEpsilons::empty() + .set_pattern_id(pattern_id) + .set_epsilons(epsilons), + ); + // N.B. It is tempting to just bail out here when + // compiling a leftmost-first DFA, since we will never + // compile any more transitions in that case. But we + // actually need to keep going in order to verify that + // we actually have a one-pass regex. e.g., We might + // see more Match states (e.g., for other patterns) + // that imply that we don't have a one-pass regex. + // So instead, we mark that we've found a match and + // continue on. When we go to compile a new DFA state, + // we just skip that part. But otherwise check that the + // one-pass property is upheld. + } + } + } + } + self.shuffle_states(); + Ok(self.dfa) + } + + /// Shuffle all match states to the end of the transition table and set + /// 'min_match_id' to the ID of the first such match state. + /// + /// The point of this is to make it extremely cheap to determine whether + /// a state is a match state or not. We need to check on this on every + /// transition during a search, so it being cheap is important. This + /// permits us to check it by simply comparing two state identifiers, as + /// opposed to looking for the pattern ID in the state's `PatternEpsilons`. + /// (Which requires a memory load and some light arithmetic.) + fn shuffle_states(&mut self) { + let mut remapper = Remapper::new(&self.dfa); + let mut next_dest = self.dfa.last_state_id(); + for i in (0..self.dfa.state_len()).rev() { + let id = StateID::must(i); + let is_match = + self.dfa.pattern_epsilons(id).pattern_id().is_some(); + if !is_match { + continue; + } + remapper.swap(&mut self.dfa, next_dest, id); + self.dfa.min_match_id = next_dest; + next_dest = self.dfa.prev_state_id(next_dest).expect( + "match states should be a proper subset of all states", + ); + } + remapper.remap(&mut self.dfa); + } + + /// Compile the given NFA transition into the DFA state given. + /// + /// 'Epsilons' corresponds to any conditional epsilon transitions that need + /// to be satisfied to follow this transition, and any slots that need to + /// be saved if the transition is followed. + /// + /// If this transition indicates that the NFA is not one-pass, then + /// this returns an error. (This occurs, for example, if the DFA state + /// already has a transition defined for the same input symbols as the + /// given transition, *and* the result of the old and new transitions is + /// different.) + fn compile_transition( + &mut self, + dfa_id: StateID, + trans: &thompson::Transition, + epsilons: Epsilons, + ) -> Result<(), BuildError> { + let next_dfa_id = self.add_dfa_state_for_nfa_state(trans.next)?; + for byte in self + .classes + .representatives(trans.start..=trans.end) + .filter_map(|r| r.as_u8()) + { + let oldtrans = self.dfa.transition(dfa_id, byte); + let newtrans = + Transition::new(self.matched, next_dfa_id, epsilons); + // If the old transition points to the DEAD state, then we know + // 'byte' has not been mapped to any transition for this DFA state + // yet. So set it unconditionally. Otherwise, we require that the + // old and new transitions are equivalent. Otherwise, there is + // ambiguity and thus the regex is not one-pass. + if oldtrans.state_id() == DEAD { + self.dfa.set_transition(dfa_id, byte, newtrans); + } else if oldtrans != newtrans { + return Err(BuildError::not_one_pass( + "conflicting transition", + )); + } + } + Ok(()) + } + + /// Add a start state to the DFA corresponding to the given NFA starting + /// state ID. + /// + /// If adding a state would blow any limits (configured or hard-coded), + /// then an error is returned. + /// + /// If the starting state is an anchored state for a particular pattern, + /// then callers must provide the pattern ID for that starting state. + /// Callers must also ensure that the first starting state added is the + /// start state for all patterns, and then each anchored starting state for + /// each pattern (if necessary) added in order. Otherwise, this panics. + fn add_start_state( + &mut self, + pid: Option, + nfa_id: StateID, + ) -> Result { + match pid { + // With no pid, this should be the start state for all patterns + // and thus be the first one. + None => assert!(self.dfa.starts.is_empty()), + // With a pid, we want it to be at self.dfa.starts[pid+1]. + Some(pid) => assert!(self.dfa.starts.len() == pid.one_more()), + } + let dfa_id = self.add_dfa_state_for_nfa_state(nfa_id)?; + self.dfa.starts.push(dfa_id); + Ok(dfa_id) + } + + /// Add a new DFA state corresponding to the given NFA state. If adding a + /// state would blow any limits (configured or hard-coded), then an error + /// is returned. If a DFA state already exists for the given NFA state, + /// then that DFA state's ID is returned and no new states are added. + /// + /// It is not expected that this routine is called for every NFA state. + /// Instead, an NFA state ID will usually correspond to the "start" state + /// for a sub-graph of the NFA, where all states in the sub-graph are + /// reachable via epsilon transitions (conditional or unconditional). That + /// sub-graph of NFA states is ultimately what produces a single DFA state. + fn add_dfa_state_for_nfa_state( + &mut self, + nfa_id: StateID, + ) -> Result { + // If we've already built a DFA state for the given NFA state, then + // just return that. We definitely do not want to have more than one + // DFA state in existence for the same NFA state, since all but one of + // them will likely become unreachable. And at least some of them are + // likely to wind up being incomplete. + let existing_dfa_id = self.nfa_to_dfa_id[nfa_id]; + if existing_dfa_id != DEAD { + return Ok(existing_dfa_id); + } + // If we don't have any DFA state yet, add it and then add the given + // NFA state to the list of states to explore. + let dfa_id = self.add_empty_state()?; + self.nfa_to_dfa_id[nfa_id] = dfa_id; + self.uncompiled_nfa_ids.push(nfa_id); + Ok(dfa_id) + } + + /// Unconditionally add a new empty DFA state. If adding it would exceed + /// any limits (configured or hard-coded), then an error is returned. The + /// ID of the new state is returned on success. + /// + /// The added state is *not* a match state. + fn add_empty_state(&mut self) -> Result { + let state_limit = Transition::STATE_ID_LIMIT; + // Note that unlike dense and lazy DFAs, we specifically do NOT + // premultiply our state IDs here. The reason is that we want to pack + // our state IDs into 64-bit transitions with other info, so the fewer + // the bits we use for state IDs the better. If we premultiply, then + // our state ID space shrinks. We justify this by the assumption that + // a one-pass DFA is just already doing a fair bit more work than a + // normal DFA anyway, so an extra multiplication to compute a state + // transition doesn't seem like a huge deal. + let next_id = self.dfa.table.len() >> self.dfa.stride2(); + let id = StateID::new(next_id) + .map_err(|_| BuildError::too_many_states(state_limit))?; + if id.as_u64() > Transition::STATE_ID_LIMIT { + return Err(BuildError::too_many_states(state_limit)); + } + self.dfa + .table + .extend(core::iter::repeat(Transition(0)).take(self.dfa.stride())); + // The default empty value for 'PatternEpsilons' is sadly not all + // zeroes. Instead, a special sentinel is used to indicate that there + // is no pattern. So we need to explicitly set the pattern epsilons to + // the correct "empty" PatternEpsilons. + self.dfa.set_pattern_epsilons(id, PatternEpsilons::empty()); + if let Some(size_limit) = self.config.get_size_limit() { + if self.dfa.memory_usage() > size_limit { + return Err(BuildError::exceeded_size_limit(size_limit)); + } + } + Ok(id) + } + + /// Push the given NFA state ID and its corresponding epsilons (slots and + /// conditional epsilon transitions) on to a stack for use in a depth first + /// traversal of a sub-graph of the NFA. + /// + /// If the given NFA state ID has already been pushed on to the stack, then + /// it indicates the regex is not one-pass and this correspondingly returns + /// an error. + fn stack_push( + &mut self, + nfa_id: StateID, + epsilons: Epsilons, + ) -> Result<(), BuildError> { + // If we already have seen a match and we are compiling a leftmost + // first DFA, then we shouldn't add any more states to look at. This is + // effectively how preference order and non-greediness is implemented. + // if !self.config.get_match_kind().continue_past_first_match() + // && self.matched + // { + // return Ok(()); + // } + if !self.seen.insert(nfa_id) { + return Err(BuildError::not_one_pass( + "multiple epsilon transitions to same state", + )); + } + self.stack.push((nfa_id, epsilons)); + Ok(()) + } +} + +/// A one-pass DFA for executing a subset of anchored regex searches while +/// resolving capturing groups. +/// +/// A one-pass DFA can be built from an NFA that is one-pass. An NFA is +/// one-pass when there is never any ambiguity about how to continue a search. +/// For example, `a*a` is not one-pass becuase during a search, it's not +/// possible to know whether to continue matching the `a*` or to move on to +/// the single `a`. However, `a*b` is one-pass, because for every byte in the +/// input, it's always clear when to move on from `a*` to `b`. +/// +/// # Only anchored searches are supported +/// +/// In this crate, especially for DFAs, unanchored searches are implemented by +/// treating the pattern as if it had a `(?s-u:.)*?` prefix. While the prefix +/// is one-pass on its own, adding anything after it, e.g., `(?s-u:.)*?a` will +/// make the overall pattern not one-pass. Why? Because the `(?s-u:.)` matches +/// any byte, and there is therefore ambiguity as to when the prefix should +/// stop matching and something else should start matching. +/// +/// Therefore, one-pass DFAs do not support unanchored searches. In addition +/// to many regexes simply not being one-pass, it implies that one-pass DFAs +/// have limited utility. With that said, when a one-pass DFA can be used, it +/// can potentially provide a dramatic speed up over alternatives like the +/// [`BoundedBacktracker`](crate::nfa::thompson::backtrack::BoundedBacktracker) +/// and the [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM). In particular, +/// a one-pass DFA is the only DFA capable of reporting the spans of matching +/// capturing groups. +/// +/// To clarify, when we say that unanchored searches are not supported, what +/// that actually means is: +/// +/// * The high level routines, [`DFA::is_match`] and [`DFA::captures`], always +/// do anchored searches. +/// * Since iterators are most useful in the context of unanchored searches, +/// there is no `DFA::captures_iter` method. +/// * For lower level routines like [`DFA::try_search`], an error will be +/// returned if the given [`Input`] is configured to do an unanchored search or +/// search for an invalid pattern ID. (Note that an [`Input`] is configured to +/// do an unanchored search by default, so just giving a `Input::new` is +/// guaranteed to return an error.) +/// +/// # Other limitations +/// +/// In addition to the [configurable heap limit](Config::size_limit) and +/// the requirement that a regex pattern be one-pass, there are some other +/// limitations: +/// +/// * There is an internal limit on the total number of explicit capturing +/// groups that appear across all patterns. It is somewhat small and there is +/// no way to configure it. If your pattern(s) exceed this limit, then building +/// a one-pass DFA will fail. +/// * If the number of patterns exceeds an internal unconfigurable limit, then +/// building a one-pass DFA will fail. This limit is quite large and you're +/// unlikely to hit it. +/// * If the total number of states exceeds an internal unconfigurable limit, +/// then building a one-pass DFA will fail. This limit is quite large and +/// you're unlikely to hit it. +/// +/// # Other examples of regexes that aren't one-pass +/// +/// One particularly unfortunate example is that enabling Unicode can cause +/// regexes that were one-pass to no longer be one-pass. Consider the regex +/// `(?-u)\w*\s` for example. It is one-pass because there is exactly no +/// overlap between the ASCII definitions of `\w` and `\s`. But `\w*\s` +/// (i.e., with Unicode enabled) is *not* one-pass because `\w` and `\s` get +/// translated to UTF-8 automatons. And while the *codepoints* in `\w` and `\s` +/// do not overlap, the underlying UTF-8 encodings do. Indeed, because of the +/// overlap between UTF-8 automata, the use of Unicode character classes will +/// tend to vastly increase the likelihood of a regex not being one-pass. +/// +/// # How does one know if a regex is one-pass or not? +/// +/// At the time of writing, the only way to know is to try and build a one-pass +/// DFA. The one-pass property is checked while constructing the DFA. +/// +/// This does mean that you might potentially waste some CPU cycles and memory +/// by optimistically trying to build a one-pass DFA. But this is currently the +/// only way. In the future, building a one-pass DFA might be able to use some +/// heuristics to detect common violations of the one-pass property and bail +/// more quickly. +/// +/// # Resource usage +/// +/// Unlike a general DFA, a one-pass DFA has stricter bounds on its resource +/// usage. Namely, construction of a one-pass DFA has a time and space +/// complexity of `O(n)`, where `n ~ nfa.states().len()`. (A general DFA's time +/// and space complexity is `O(2^n)`.) This smaller time bound is achieved +/// because there is at most one DFA state created for each NFA state. If +/// additional DFA states would be required, then the pattern is not one-pass +/// and construction will fail. +/// +/// Note though that currently, this DFA uses a fully dense representation. +/// This means that while its space complexity is no worse than an NFA, it may +/// in practice use more memory because of higher constant factors. The reason +/// for this trade off is two-fold. Firstly, a dense representation makes the +/// search faster. Secondly, the bigger an NFA, the more unlikely it is to be +/// one-pass. Therefore, most one-pass DFAs are usually pretty small. +/// +/// # Example +/// +/// This example shows that the one-pass DFA implements Unicode word boundaries +/// correctly while simultaneously reporting spans for capturing groups that +/// participate in a match. (This is the only DFA that implements full support +/// for Unicode word boundaries.) +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{dfa::onepass::DFA, Match, Span}; +/// +/// let re = DFA::new(r"\b(?P\w+)[[:space:]]+(?P\w+)\b")?; +/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); +/// +/// re.captures(&mut cache, "Шерлок Холмс", &mut caps); +/// assert_eq!(Some(Match::must(0, 0..23)), caps.get_match()); +/// assert_eq!(Some(Span::from(0..12)), caps.get_group_by_name("first")); +/// assert_eq!(Some(Span::from(13..23)), caps.get_group_by_name("last")); +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: iteration +/// +/// Unlike other regex engines in this crate, this one does not provide +/// iterator search functions. This is because a one-pass DFA only supports +/// anchored searches, and so iterator functions are generally not applicable. +/// +/// However, if you know that all of your matches are +/// directly adjacent, then an iterator can be used. The +/// [`util::iter::Searcher`](crate::util::iter::Searcher) type can be used for +/// this purpose: +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{ +/// dfa::onepass::DFA, +/// util::iter::Searcher, +/// Anchored, Input, Span, +/// }; +/// +/// let re = DFA::new(r"\w(\d)\w")?; +/// let (mut cache, caps) = (re.create_cache(), re.create_captures()); +/// let input = Input::new("a1zb2yc3x").anchored(Anchored::Yes); +/// +/// let mut it = Searcher::new(input).into_captures_iter(caps, |input, caps| { +/// Ok(re.try_search(&mut cache, input, caps)?) +/// }).infallible(); +/// let caps0 = it.next().unwrap(); +/// assert_eq!(Some(Span::from(1..2)), caps0.get_group(1)); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone)] +pub struct DFA { + /// The configuration provided by the caller. + config: Config, + /// The NFA used to build this DFA. + /// + /// NOTE: We probably don't need to store the NFA here, but we use enough + /// bits from it that it's convenient to do so. And there really isn't much + /// cost to doing so either, since an NFA is reference counted internally. + nfa: NFA, + /// The transition table. Given a state ID 's' and a byte of haystack 'b', + /// the next state is `table[sid + classes[byte]]`. + /// + /// The stride of this table (i.e., the number of columns) is always + /// a power of 2, even if the alphabet length is smaller. This makes + /// converting between state IDs and state indices very cheap. + /// + /// Note that the stride always includes room for one extra "transition" + /// that isn't actually a transition. It is a 'PatternEpsilons' that is + /// used for match states only. Because of this, the maximum number of + /// active columns in the transition table is 257, which means the maximum + /// stride is 512 (the next power of 2 greater than or equal to 257). + table: Vec, + /// The DFA state IDs of the starting states. + /// + /// `starts[0]` is always present and corresponds to the starting state + /// when searching for matches of any pattern in the DFA. + /// + /// `starts[i]` where i>0 corresponds to the starting state for the pattern + /// ID 'i-1'. These starting states are optional. + starts: Vec, + /// Every state ID >= this value corresponds to a match state. + /// + /// This is what a search uses to detect whether a state is a match state + /// or not. It requires only a simple comparison instead of bit-unpacking + /// the PatternEpsilons from every state. + min_match_id: StateID, + /// The alphabet of this DFA, split into equivalence classes. Bytes in the + /// same equivalence class can never discriminate between a match and a + /// non-match. + classes: ByteClasses, + /// The number of elements in each state in the transition table. This may + /// be less than the stride, since the stride is always a power of 2 and + /// the alphabet length can be anything up to and including 256. + alphabet_len: usize, + /// The number of columns in the transition table, expressed as a power of + /// 2. + stride2: usize, + /// The offset at which the PatternEpsilons for a match state is stored in + /// the transition table. + /// + /// PERF: One wonders whether it would be better to put this in a separate + /// allocation, since only match states have a non-empty PatternEpsilons + /// and the number of match states tends be dwarfed by the number of + /// non-match states. So this would save '8*len(non_match_states)' for each + /// DFA. The question is whether moving this to a different allocation will + /// lead to a perf hit during searches. You might think dealing with match + /// states is rare, but some regexes spend a lot of time in match states + /// gobbling up input. But... match state handling is already somewhat + /// expensive, so maybe this wouldn't do much? Either way, it's worth + /// experimenting. + pateps_offset: usize, + /// The first explicit slot index. This refers to the first slot appearing + /// immediately after the last implicit slot. It is always 'patterns.len() + /// * 2'. + /// + /// We record this because we only store the explicit slots in our DFA + /// transition table that need to be saved. Implicit slots are handled + /// automatically as part of the search. + explicit_slot_start: usize, +} + +impl DFA { + /// Parse the given regular expression using the default configuration and + /// return the corresponding one-pass DFA. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Match}; + /// + /// let re = DFA::new("foo[0-9]+bar")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "foo12345barzzz", &mut caps); + /// assert_eq!(Some(Match::must(0, 0..11)), caps.get_match()); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + #[inline] + pub fn new(pattern: &str) -> Result { + DFA::builder().build(pattern) + } + + /// Like `new`, but parses multiple patterns into a single "multi regex." + /// This similarly uses the default regex configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Match}; + /// + /// let re = DFA::new_many(&["[a-z]+", "[0-9]+"])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "abc123", &mut caps); + /// assert_eq!(Some(Match::must(0, 0..3)), caps.get_match()); + /// + /// re.captures(&mut cache, "123abc", &mut caps); + /// assert_eq!(Some(Match::must(1, 0..3)), caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + #[inline] + pub fn new_many>(patterns: &[P]) -> Result { + DFA::builder().build_many(patterns) + } + + /// Like `new`, but builds a one-pass DFA directly from an NFA. This is + /// useful if you already have an NFA, or even if you hand-assembled the + /// NFA. + /// + /// # Example + /// + /// This shows how to hand assemble a regular expression via its HIR, + /// compile an NFA from it and build a one-pass DFA from the NFA. + /// + /// ``` + /// use regex_automata::{ + /// dfa::onepass::DFA, + /// nfa::thompson::NFA, + /// Match, + /// }; + /// use regex_syntax::hir::{Hir, Class, ClassBytes, ClassBytesRange}; + /// + /// let hir = Hir::class(Class::Bytes(ClassBytes::new(vec![ + /// ClassBytesRange::new(b'0', b'9'), + /// ClassBytesRange::new(b'A', b'Z'), + /// ClassBytesRange::new(b'_', b'_'), + /// ClassBytesRange::new(b'a', b'z'), + /// ]))); + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build_from_hir(&hir)?; + /// + /// let re = DFA::new_from_nfa(nfa)?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let expected = Some(Match::must(0, 0..1)); + /// re.captures(&mut cache, "A", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn new_from_nfa(nfa: NFA) -> Result { + DFA::builder().build_from_nfa(nfa) + } + + /// Create a new one-pass DFA that matches every input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Match}; + /// + /// let dfa = DFA::always_match()?; + /// let mut cache = dfa.create_cache(); + /// let mut caps = dfa.create_captures(); + /// + /// let expected = Match::must(0, 0..0); + /// dfa.captures(&mut cache, "", &mut caps); + /// assert_eq!(Some(expected), caps.get_match()); + /// dfa.captures(&mut cache, "foo", &mut caps); + /// assert_eq!(Some(expected), caps.get_match()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn always_match() -> Result { + let nfa = thompson::NFA::always_match(); + Builder::new().build_from_nfa(nfa) + } + + /// Create a new one-pass DFA that never matches any input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::dfa::onepass::DFA; + /// + /// let dfa = DFA::never_match()?; + /// let mut cache = dfa.create_cache(); + /// let mut caps = dfa.create_captures(); + /// + /// dfa.captures(&mut cache, "", &mut caps); + /// assert_eq!(None, caps.get_match()); + /// dfa.captures(&mut cache, "foo", &mut caps); + /// assert_eq!(None, caps.get_match()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn never_match() -> Result { + let nfa = thompson::NFA::never_match(); + Builder::new().build_from_nfa(nfa) + } + + /// Return a default configuration for a DFA. + /// + /// This is a convenience routine to avoid needing to import the `Config` + /// type when customizing the construction of a DFA. + /// + /// # Example + /// + /// This example shows how to change the match semantics of this DFA from + /// its default "leftmost first" to "all." When using "all," non-greediness + /// doesn't apply and neither does preference order matching. Instead, the + /// longest match possible is always returned. (Although, by construction, + /// it's impossible for a one-pass DFA to have a different answer for + /// "preference order" vs "longest match.") + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Match, MatchKind}; + /// + /// let re = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .build(r"(abc)+?")?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// re.captures(&mut cache, "abcabc", &mut caps); + /// // Normally, the non-greedy repetition would give us a 0..3 match. + /// assert_eq!(Some(Match::must(0, 0..6)), caps.get_match()); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn config() -> Config { + Config::new() + } + + /// Return a builder for configuring the construction of a DFA. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + /// + /// # Example + /// + /// This example shows how to use the builder to disable UTF-8 mode. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// dfa::onepass::DFA, + /// nfa::thompson, + /// util::syntax, + /// Match, + /// }; + /// + /// let re = DFA::builder() + /// .syntax(syntax::Config::new().utf8(false)) + /// .thompson(thompson::Config::new().utf8(false)) + /// .build(r"foo(?-u:[^b])ar.*")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let haystack = b"foo\xFFarzz\xE2\x98\xFF\n"; + /// let expected = Some(Match::must(0, 0..8)); + /// re.captures(&mut cache, haystack, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn builder() -> Builder { + Builder::new() + } + + /// Create a new empty set of capturing groups that is guaranteed to be + /// valid for the search APIs on this DFA. + /// + /// A `Captures` value created for a specific DFA cannot be used with any + /// other DFA. + /// + /// This is a convenience function for [`Captures::all`]. See the + /// [`Captures`] documentation for an explanation of its alternative + /// constructors that permit the DFA to do less work during a search, and + /// thus might make it faster. + #[inline] + pub fn create_captures(&self) -> Captures { + Captures::all(self.nfa.group_info().clone()) + } + + /// Create a new cache for this DFA. + /// + /// The cache returned should only be used for searches for this + /// DFA. If you want to reuse the cache for another DFA, then you + /// must call [`Cache::reset`] with that DFA (or, equivalently, + /// [`DFA::reset_cache`]). + #[inline] + pub fn create_cache(&self) -> Cache { + Cache::new(self) + } + + /// Reset the given cache such that it can be used for searching with the + /// this DFA (and only this DFA). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different DFA. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different DFA. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::onepass::DFA, Match}; + /// + /// let re1 = DFA::new(r"\w")?; + /// let re2 = DFA::new(r"\W")?; + /// let mut caps1 = re1.create_captures(); + /// let mut caps2 = re2.create_captures(); + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 0..2)), + /// { re1.captures(&mut cache, "Δ", &mut caps1); caps1.get_match() }, + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the one-pass DFA we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// re2.reset_cache(&mut cache); + /// assert_eq!( + /// Some(Match::must(0, 0..3)), + /// { re2.captures(&mut cache, "☃", &mut caps2); caps2.get_match() }, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn reset_cache(&self, cache: &mut Cache) { + cache.reset(self); + } + + /// Return the config for this one-pass DFA. + #[inline] + pub fn get_config(&self) -> &Config { + &self.config + } + + /// Returns a reference to the underlying NFA. + #[inline] + pub fn get_nfa(&self) -> &NFA { + &self.nfa + } + + /// Returns the total number of patterns compiled into this DFA. + /// + /// In the case of a DFA that contains no patterns, this returns `0`. + #[inline] + pub fn pattern_len(&self) -> usize { + self.get_nfa().pattern_len() + } + + /// Returns the total number of states in this one-pass DFA. + /// + /// Note that unlike dense or sparse DFAs, a one-pass DFA does not expose + /// a low level DFA API. Therefore, this routine has little use other than + /// being informational. + #[inline] + pub fn state_len(&self) -> usize { + self.table.len() >> self.stride2() + } + + /// Returns the total number of elements in the alphabet for this DFA. + /// + /// That is, this returns the total number of transitions that each + /// state in this DFA must have. The maximum alphabet size is 256, which + /// corresponds to each possible byte value. + /// + /// The alphabet size may be less than 256 though, and unless + /// [`Config::byte_classes`] is disabled, it is typically must less than + /// 256. Namely, bytes are grouped into equivalence classes such that no + /// two bytes in the same class can distinguish a match from a non-match. + /// For example, in the regex `^[a-z]+$`, the ASCII bytes `a-z` could + /// all be in the same equivalence class. This leads to a massive space + /// savings. + /// + /// Note though that the alphabet length does _not_ necessarily equal the + /// total stride space taken up by a single DFA state in the transition + /// table. Namely, for performance reasons, the stride is always the + /// smallest power of two that is greater than or equal to the alphabet + /// length. For this reason, [`DFA::stride`] or [`DFA::stride2`] are + /// often more useful. The alphabet length is typically useful only for + /// informational purposes. + /// + /// Note also that unlike dense or sparse DFAs, a one-pass DFA does + /// not have a special end-of-input (EOI) transition. This is because + /// a one-pass DFA handles look-around assertions explicitly (like the + /// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM)) and does not build + /// them into the transitions of the DFA. + #[inline] + pub fn alphabet_len(&self) -> usize { + self.alphabet_len + } + + /// Returns the total stride for every state in this DFA, expressed as the + /// exponent of a power of 2. The stride is the amount of space each state + /// takes up in the transition table, expressed as a number of transitions. + /// (Unused transitions map to dead states.) + /// + /// The stride of a DFA is always equivalent to the smallest power of + /// 2 that is greater than or equal to the DFA's alphabet length. This + /// definition uses extra space, but possibly permits faster translation + /// between state identifiers and their corresponding offsets in this DFA's + /// transition table. + /// + /// For example, if the DFA's stride is 16 transitions, then its `stride2` + /// is `4` since `2^4 = 16`. + /// + /// The minimum `stride2` value is `1` (corresponding to a stride of `2`) + /// while the maximum `stride2` value is `9` (corresponding to a stride + /// of `512`). The maximum in theory should be `8`, but because of some + /// implementation quirks that may be relaxed in the future, it is one more + /// than `8`. (Do note that a maximal stride is incredibly rare, as it + /// would imply that there is almost no redundant in the regex pattern.) + /// + /// Note that unlike dense or sparse DFAs, a one-pass DFA does not expose + /// a low level DFA API. Therefore, this routine has little use other than + /// being informational. + #[inline] + pub fn stride2(&self) -> usize { + self.stride2 + } + + /// Returns the total stride for every state in this DFA. This corresponds + /// to the total number of transitions used by each state in this DFA's + /// transition table. + /// + /// Please see [`DFA::stride2`] for more information. In particular, this + /// returns the stride as the number of transitions, where as `stride2` + /// returns it as the exponent of a power of 2. + /// + /// Note that unlike dense or sparse DFAs, a one-pass DFA does not expose + /// a low level DFA API. Therefore, this routine has little use other than + /// being informational. + #[inline] + pub fn stride(&self) -> usize { + 1 << self.stride2() + } + + /// Returns the memory usage, in bytes, of this DFA. + /// + /// The memory usage is computed based on the number of bytes used to + /// represent this DFA. + /// + /// This does **not** include the stack size used up by this DFA. To + /// compute that, use `std::mem::size_of::()`. + #[inline] + pub fn memory_usage(&self) -> usize { + use core::mem::size_of; + + self.table.len() * size_of::() + + self.starts.len() * size_of::() + } +} + +impl DFA { + /// Executes an anchored leftmost forward search, and returns true if and + /// only if this one-pass DFA matches the given haystack. + /// + /// This routine may short circuit if it knows that scanning future + /// input will never lead to a different result. In particular, if the + /// underlying DFA enters a match state, then this routine will return + /// `true` immediately without inspecting any future input. (Consider how + /// this might make a difference given the regex `a+` on the haystack + /// `aaaaaaaaaaaaaaa`. This routine can stop after it sees the first `a`, + /// but routines like `find` need to continue searching because `+` is + /// greedy by default.) + /// + /// The given `Input` is forcefully set to use [`Anchored::Yes`] if the + /// given configuration was [`Anchored::No`] (which is the default). + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in the following circumstances: + /// + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. Concretely, + /// this occurs when using [`Anchored::Pattern`] without enabling + /// [`Config::starts_for_each_pattern`]. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// Use [`DFA::try_search`] if you want to handle these panics as error + /// values instead. + /// + /// # Example + /// + /// This shows basic usage: + /// + /// ``` + /// use regex_automata::dfa::onepass::DFA; + /// + /// let re = DFA::new("foo[0-9]+bar")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.is_match(&mut cache, "foo12345bar")); + /// assert!(!re.is_match(&mut cache, "foobar")); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: consistency with search APIs + /// + /// `is_match` is guaranteed to return `true` whenever `captures` returns + /// a match. This includes searches that are executed entirely within a + /// codepoint: + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Input}; + /// + /// let re = DFA::new("a*")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(!re.is_match(&mut cache, Input::new("☃").span(1..2))); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Notice that when UTF-8 mode is disabled, then the above reports a + /// match because the restriction against zero-width matches that split a + /// codepoint has been lifted: + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, nfa::thompson::NFA, Input}; + /// + /// let re = DFA::builder() + /// .thompson(NFA::config().utf8(false)) + /// .build("a*")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.is_match(&mut cache, Input::new("☃").span(1..2))); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_match<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> bool { + let mut input = input.into().earliest(true); + if matches!(input.get_anchored(), Anchored::No) { + input.set_anchored(Anchored::Yes); + } + self.try_search_slots(cache, &input, &mut []).unwrap().is_some() + } + + /// Executes an anchored leftmost forward search, and returns a `Match` if + /// and only if this one-pass DFA matches the given haystack. + /// + /// This routine only includes the overall match span. To get access to the + /// individual spans of each capturing group, use [`DFA::captures`]. + /// + /// The given `Input` is forcefully set to use [`Anchored::Yes`] if the + /// given configuration was [`Anchored::No`] (which is the default). + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in the following circumstances: + /// + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. Concretely, + /// this occurs when using [`Anchored::Pattern`] without enabling + /// [`Config::starts_for_each_pattern`]. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// Use [`DFA::try_search`] if you want to handle these panics as error + /// values instead. + /// + /// # Example + /// + /// Leftmost first match semantics corresponds to the match with the + /// smallest starting offset, but where the end offset is determined by + /// preferring earlier branches in the original regular expression. For + /// example, `Sam|Samwise` will match `Sam` in `Samwise`, but `Samwise|Sam` + /// will match `Samwise` in `Samwise`. + /// + /// Generally speaking, the "leftmost first" match is how most backtracking + /// regular expressions tend to work. This is in contrast to POSIX-style + /// regular expressions that yield "leftmost longest" matches. Namely, + /// both `Sam|Samwise` and `Samwise|Sam` match `Samwise` when using + /// leftmost longest semantics. (This crate does not currently support + /// leftmost longest semantics.) + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Match}; + /// + /// let re = DFA::new("foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// let expected = Match::must(0, 0..8); + /// assert_eq!(Some(expected), re.find(&mut cache, "foo12345")); + /// + /// // Even though a match is found after reading the first byte (`a`), + /// // the leftmost first match semantics demand that we find the earliest + /// // match that prefers earlier parts of the pattern over later parts. + /// let re = DFA::new("abc|a")?; + /// let mut cache = re.create_cache(); + /// let expected = Match::must(0, 0..3); + /// assert_eq!(Some(expected), re.find(&mut cache, "abc")); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> Option { + let mut input = input.into(); + if matches!(input.get_anchored(), Anchored::No) { + input.set_anchored(Anchored::Yes); + } + if self.get_nfa().pattern_len() == 1 { + let mut slots = [None, None]; + let pid = + self.try_search_slots(cache, &input, &mut slots).unwrap()?; + let start = slots[0].unwrap().get(); + let end = slots[1].unwrap().get(); + return Some(Match::new(pid, Span { start, end })); + } + let ginfo = self.get_nfa().group_info(); + let slots_len = ginfo.implicit_slot_len(); + let mut slots = vec![None; slots_len]; + let pid = self.try_search_slots(cache, &input, &mut slots).unwrap()?; + let start = slots[pid.as_usize() * 2].unwrap().get(); + let end = slots[pid.as_usize() * 2 + 1].unwrap().get(); + Some(Match::new(pid, Span { start, end })) + } + + /// Executes an anchored leftmost forward search and writes the spans + /// of capturing groups that participated in a match into the provided + /// [`Captures`] value. If no match was found, then [`Captures::is_match`] + /// is guaranteed to return `false`. + /// + /// The given `Input` is forcefully set to use [`Anchored::Yes`] if the + /// given configuration was [`Anchored::No`] (which is the default). + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in the following circumstances: + /// + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. Concretely, + /// this occurs when using [`Anchored::Pattern`] without enabling + /// [`Config::starts_for_each_pattern`]. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// Use [`DFA::try_search`] if you want to handle these panics as error + /// values instead. + /// + /// # Example + /// + /// This shows a simple example of a one-pass regex that extracts + /// capturing group spans. + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Match, Span}; + /// + /// let re = DFA::new( + /// // Notice that we use ASCII here. The corresponding Unicode regex + /// // is sadly not one-pass. + /// "(?P[[:alpha:]]+)[[:space:]]+(?P[[:alpha:]]+)", + /// )?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Bruce Springsteen", &mut caps); + /// assert_eq!(Some(Match::must(0, 0..17)), caps.get_match()); + /// assert_eq!(Some(Span::from(0..5)), caps.get_group(1)); + /// assert_eq!(Some(Span::from(6..17)), caps.get_group_by_name("last")); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn captures<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + caps: &mut Captures, + ) { + let mut input = input.into(); + if matches!(input.get_anchored(), Anchored::No) { + input.set_anchored(Anchored::Yes); + } + self.try_search(cache, &input, caps).unwrap(); + } + + /// Executes an anchored leftmost forward search and writes the spans + /// of capturing groups that participated in a match into the provided + /// [`Captures`] value. If no match was found, then [`Captures::is_match`] + /// is guaranteed to return `false`. + /// + /// The differences with [`DFA::captures`] are: + /// + /// 1. This returns an error instead of panicking if the search fails. + /// 2. Accepts an `&Input` instead of a `Into`. This permits reusing + /// the same input for multiple searches, which _may_ be important for + /// latency. + /// 3. This does not automatically change the [`Anchored`] mode from `No` + /// to `Yes`. Instead, if [`Input::anchored`] is `Anchored::No`, then an + /// error is returned. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in the following circumstances: + /// + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. Concretely, + /// this occurs when using [`Anchored::Pattern`] without enabling + /// [`Config::starts_for_each_pattern`]. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example: specific pattern search + /// + /// This example shows how to build a multi-regex that permits searching + /// for specific patterns. Note that this is somewhat less useful than + /// in other regex engines, since a one-pass DFA by definition has no + /// ambiguity about which pattern can match at a position. That is, if it + /// were possible for two different patterns to match at the same starting + /// position, then the multi-regex would not be one-pass and construction + /// would have failed. + /// + /// Nevertheless, this can still be useful if you only care about matches + /// for a specific pattern, and want the DFA to report "no match" even if + /// some other pattern would have matched. + /// + /// Note that in order to make use of this functionality, + /// [`Config::starts_for_each_pattern`] must be enabled. It is disabled + /// by default since it may result in higher memory usage. + /// + /// ``` + /// use regex_automata::{ + /// dfa::onepass::DFA, Anchored, Input, Match, PatternID, + /// }; + /// + /// let re = DFA::builder() + /// .configure(DFA::config().starts_for_each_pattern(true)) + /// .build_many(&["[a-z]+", "[0-9]+"])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "123abc"; + /// let input = Input::new(haystack).anchored(Anchored::Yes); + /// + /// // A normal multi-pattern search will show pattern 1 matches. + /// re.try_search(&mut cache, &input, &mut caps)?; + /// assert_eq!(Some(Match::must(1, 0..3)), caps.get_match()); + /// + /// // If we only want to report pattern 0 matches, then we'll get no + /// // match here. + /// let input = input.anchored(Anchored::Pattern(PatternID::must(0))); + /// re.try_search(&mut cache, &input, &mut caps)?; + /// assert_eq!(None, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specifying the bounds of a search + /// + /// This example shows how providing the bounds of a search can produce + /// different results than simply sub-slicing the haystack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::onepass::DFA, Anchored, Input, Match}; + /// + /// // one-pass DFAs fully support Unicode word boundaries! + /// // A sad joke is that a Unicode aware regex like \w+\s is not one-pass. + /// // :-( + /// let re = DFA::new(r"\b[0-9]{3}\b")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "foo123bar"; + /// + /// // Since we sub-slice the haystack, the search doesn't know about + /// // the larger context and assumes that `123` is surrounded by word + /// // boundaries. And of course, the match position is reported relative + /// // to the sub-slice as well, which means we get `0..3` instead of + /// // `3..6`. + /// let expected = Some(Match::must(0, 0..3)); + /// let input = Input::new(&haystack[3..6]).anchored(Anchored::Yes); + /// re.try_search(&mut cache, &input, &mut caps)?; + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we provide the bounds of the search within the context of the + /// // entire haystack, then the search can take the surrounding context + /// // into account. (And if we did find a match, it would be reported + /// // as a valid offset into `haystack` instead of its sub-slice.) + /// let expected = None; + /// let input = Input::new(haystack).range(3..6).anchored(Anchored::Yes); + /// re.try_search(&mut cache, &input, &mut caps)?; + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search( + &self, + cache: &mut Cache, + input: &Input<'_>, + caps: &mut Captures, + ) -> Result<(), MatchError> { + let pid = self.try_search_slots(cache, input, caps.slots_mut())?; + caps.set_pattern(pid); + Ok(()) + } + + /// Executes an anchored leftmost forward search and writes the spans + /// of capturing groups that participated in a match into the provided + /// `slots`, and returns the matching pattern ID. The contents of the + /// slots for patterns other than the matching pattern are unspecified. If + /// no match was found, then `None` is returned and the contents of all + /// `slots` is unspecified. + /// + /// This is like [`DFA::try_search`], but it accepts a raw slots slice + /// instead of a `Captures` value. This is useful in contexts where you + /// don't want or need to allocate a `Captures`. + /// + /// It is legal to pass _any_ number of slots to this routine. If the regex + /// engine would otherwise write a slot offset that doesn't fit in the + /// provided slice, then it is simply skipped. In general though, there are + /// usually three slice lengths you might want to use: + /// + /// * An empty slice, if you only care about which pattern matched. + /// * A slice with + /// [`pattern_len() * 2`](crate::dfa::onepass::DFA::pattern_len) + /// slots, if you only care about the overall match spans for each matching + /// pattern. + /// * A slice with + /// [`slot_len()`](crate::util::captures::GroupInfo::slot_len) slots, which + /// permits recording match offsets for every capturing group in every + /// pattern. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in the following circumstances: + /// + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. Concretely, + /// this occurs when using [`Anchored::Pattern`] without enabling + /// [`Config::starts_for_each_pattern`]. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to find the overall match offsets in a + /// multi-pattern search without allocating a `Captures` value. Indeed, we + /// can put our slots right on the stack. + /// + /// ``` + /// use regex_automata::{dfa::onepass::DFA, Anchored, Input, PatternID}; + /// + /// let re = DFA::new_many(&[ + /// r"[a-zA-Z]+", + /// r"[0-9]+", + /// ])?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("123").anchored(Anchored::Yes); + /// + /// // We only care about the overall match offsets here, so we just + /// // allocate two slots for each pattern. Each slot records the start + /// // and end of the match. + /// let mut slots = [None; 4]; + /// let pid = re.try_search_slots(&mut cache, &input, &mut slots)?; + /// assert_eq!(Some(PatternID::must(1)), pid); + /// + /// // The overall match offsets are always at 'pid * 2' and 'pid * 2 + 1'. + /// // See 'GroupInfo' for more details on the mapping between groups and + /// // slot indices. + /// let slot_start = pid.unwrap().as_usize() * 2; + /// let slot_end = slot_start + 1; + /// assert_eq!(Some(0), slots[slot_start].map(|s| s.get())); + /// assert_eq!(Some(3), slots[slot_end].map(|s| s.get())); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Result, MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + if !utf8empty { + return self.try_search_slots_imp(cache, input, slots); + } + // See PikeVM::try_search_slots for why we do this. + let min = self.get_nfa().group_info().implicit_slot_len(); + if slots.len() >= min { + return self.try_search_slots_imp(cache, input, slots); + } + if self.get_nfa().pattern_len() == 1 { + let mut enough = [None, None]; + let got = self.try_search_slots_imp(cache, input, &mut enough)?; + // This is OK because we know `enough_slots` is strictly bigger + // than `slots`, otherwise this special case isn't reached. + slots.copy_from_slice(&enough[..slots.len()]); + return Ok(got); + } + let mut enough = vec![None; min]; + let got = self.try_search_slots_imp(cache, input, &mut enough)?; + // This is OK because we know `enough_slots` is strictly bigger than + // `slots`, otherwise this special case isn't reached. + slots.copy_from_slice(&enough[..slots.len()]); + Ok(got) + } + + #[inline(never)] + fn try_search_slots_imp( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Result, MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + match self.search_imp(cache, input, slots)? { + None => return Ok(None), + Some(pid) if !utf8empty => return Ok(Some(pid)), + Some(pid) => { + // These slot indices are always correct because we know our + // 'pid' is valid and thus we know that the slot indices for it + // are valid. + let slot_start = pid.as_usize().wrapping_mul(2); + let slot_end = slot_start.wrapping_add(1); + // OK because we know we have a match and we know our caller + // provided slots are big enough (which we make true above if + // the caller didn't). Namely, we're only here when 'utf8empty' + // is true, and when that's true, we require slots for every + // pattern. + let start = slots[slot_start].unwrap().get(); + let end = slots[slot_end].unwrap().get(); + // If our match splits a codepoint, then we cannot report is + // as a match. And since one-pass DFAs only support anchored + // searches, we don't try to skip ahead to find the next match. + // We can just quit with nothing. + if start == end && !input.is_char_boundary(start) { + return Ok(None); + } + Ok(Some(pid)) + } + } + } +} + +impl DFA { + fn search_imp( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Result, MatchError> { + // PERF: Some ideas. I ran out of steam after my initial impl to try + // many of these. + // + // 1) Try doing more state shuffling. Right now, all we do is push + // match states to the end of the transition table so that we can do + // 'if sid >= self.min_match_id' to know whether we're in a match + // state or not. But what about doing something like dense DFAs and + // pushing dead, match and states with captures/looks all toward the + // beginning of the transition table. Then we could do 'if sid <= + // self.max_special_id', in which case, we need to do some special + // handling of some sort. Otherwise, we get the happy path, just + // like in a DFA search. The main argument against this is that the + // one-pass DFA is likely to be used most often with capturing groups + // and if capturing groups are common, then this might wind up being a + // pessimization. + // + // 2) Consider moving 'PatternEpsilons' out of the transition table. + // It is only needed for match states and usually a small minority of + // states are match states. Therefore, we're using an extra 'u64' for + // most states. + // + // 3) I played around with the match state handling and it seems like + // there is probably a lot left on the table for improvement. The + // key tension is that the 'find_match' routine is a giant mess, but + // splitting it out into a non-inlineable function is a non-starter + // because the match state might consume input, so 'find_match' COULD + // be called quite a lot, and a function call at that point would trash + // perf. In theory, we could detect whether a match state consumes + // input and then specialize our search routine based on that. In that + // case, maybe an extra function call is OK, but even then, it might be + // too much of a latency hit. Another idea is to just try and figure + // out how to reduce the code size of 'find_match'. RE2 has a trick + // here where the match handling isn't done if we know the next byte of + // input yields a match too. Maybe we adopt that? + // + // This just might be a tricky DFA to optimize. + + if input.is_done() { + return Ok(None); + } + // We unfortunately have a bit of book-keeping to do to set things + // up. We do have to setup our cache and clear all of our slots. In + // particular, clearing the slots is necessary for the case where we + // report a match, but one of the capturing groups didn't participate + // in the match but had a span set from a previous search. That would + // be bad. In theory, we could avoid all this slot clearing if we knew + // that every slot was always activated for every match. Then we would + // know they would always be overwritten when a match is found. + let explicit_slots_len = core::cmp::min( + Slots::LIMIT, + slots.len().saturating_sub(self.explicit_slot_start), + ); + cache.setup_search(explicit_slots_len); + for slot in cache.explicit_slots() { + *slot = None; + } + for slot in slots.iter_mut() { + *slot = None; + } + // We set the starting slots for every pattern up front. This does + // increase our latency somewhat, but it avoids having to do it every + // time we see a match state (which could be many times in a single + // search if the match state consumes input). + for pid in self.nfa.patterns() { + let i = pid.as_usize() * 2; + if i >= slots.len() { + break; + } + slots[i] = NonMaxUsize::new(input.start()); + } + let mut pid = None; + let mut next_sid = match input.get_anchored() { + Anchored::Yes => self.start(), + Anchored::Pattern(pid) => self.start_pattern(pid)?, + Anchored::No => { + // If the regex is itself always anchored, then we're fine, + // even if the search is configured to be unanchored. + if !self.nfa.is_always_start_anchored() { + return Err(MatchError::unsupported_anchored( + Anchored::No, + )); + } + self.start() + } + }; + let leftmost_first = + matches!(self.config.get_match_kind(), MatchKind::LeftmostFirst); + for at in input.start()..input.end() { + let sid = next_sid; + let trans = self.transition(sid, input.haystack()[at]); + next_sid = trans.state_id(); + let epsilons = trans.epsilons(); + if sid >= self.min_match_id { + if self.find_match(cache, input, at, sid, slots, &mut pid) { + if input.get_earliest() + || (leftmost_first && trans.match_wins()) + { + return Ok(pid); + } + } + } + if sid == DEAD + || (!epsilons.looks().is_empty() + && !self.nfa.look_matcher().matches_set_inline( + epsilons.looks(), + input.haystack(), + at, + )) + { + return Ok(pid); + } + epsilons.slots().apply(at, cache.explicit_slots()); + } + if next_sid >= self.min_match_id { + self.find_match( + cache, + input, + input.end(), + next_sid, + slots, + &mut pid, + ); + } + Ok(pid) + } + + /// Assumes 'sid' is a match state and looks for whether a match can + /// be reported. If so, appropriate offsets are written to 'slots' and + /// 'matched_pid' is set to the matching pattern ID. + /// + /// Even when 'sid' is a match state, it's possible that a match won't + /// be reported. For example, when the conditional epsilon transitions + /// leading to the match state aren't satisfied at the given position in + /// the haystack. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn find_match( + &self, + cache: &mut Cache, + input: &Input<'_>, + at: usize, + sid: StateID, + slots: &mut [Option], + matched_pid: &mut Option, + ) -> bool { + debug_assert!(sid >= self.min_match_id); + let pateps = self.pattern_epsilons(sid); + let epsilons = pateps.epsilons(); + if !epsilons.looks().is_empty() + && !self.nfa.look_matcher().matches_set_inline( + epsilons.looks(), + input.haystack(), + at, + ) + { + return false; + } + let pid = pateps.pattern_id_unchecked(); + // This calculation is always correct because we know our 'pid' is + // valid and thus we know that the slot indices for it are valid. + let slot_end = pid.as_usize().wrapping_mul(2).wrapping_add(1); + // Set the implicit 'end' slot for the matching pattern. (The 'start' + // slot was set at the beginning of the search.) + if slot_end < slots.len() { + slots[slot_end] = NonMaxUsize::new(at); + } + // If the caller provided enough room, copy the previously recorded + // explicit slots from our scratch space to the caller provided slots. + // We *also* need to set any explicit slots that are active as part of + // the path to the match state. + if self.explicit_slot_start < slots.len() { + // NOTE: The 'cache.explicit_slots()' slice is setup at the + // beginning of every search such that it is guaranteed to return a + // slice of length equivalent to 'slots[explicit_slot_start..]'. + slots[self.explicit_slot_start..] + .copy_from_slice(cache.explicit_slots()); + epsilons.slots().apply(at, &mut slots[self.explicit_slot_start..]); + } + *matched_pid = Some(pid); + true + } +} + +impl DFA { + /// Returns the anchored start state for matching any pattern in this DFA. + fn start(&self) -> StateID { + self.starts[0] + } + + /// Returns the anchored start state for matching the given pattern. If + /// 'starts_for_each_pattern' + /// was not enabled, then this returns an error. If the given pattern is + /// not in this DFA, then `Ok(None)` is returned. + fn start_pattern(&self, pid: PatternID) -> Result { + if !self.config.get_starts_for_each_pattern() { + return Err(MatchError::unsupported_anchored(Anchored::Pattern( + pid, + ))); + } + // 'starts' always has non-zero length. The first entry is always the + // anchored starting state for all patterns, and the following entries + // are optional and correspond to the anchored starting states for + // patterns at pid+1. Thus, starts.len()-1 corresponds to the total + // number of patterns that one can explicitly search for. (And it may + // be zero.) + Ok(self.starts.get(pid.one_more()).copied().unwrap_or(DEAD)) + } + + /// Returns the transition from the given state ID and byte of input. The + /// transition includes the next state ID, the slots that should be saved + /// and any conditional epsilon transitions that must be satisfied in order + /// to take this transition. + fn transition(&self, sid: StateID, byte: u8) -> Transition { + let offset = sid.as_usize() << self.stride2(); + let class = self.classes.get(byte).as_usize(); + self.table[offset + class] + } + + /// Set the transition from the given state ID and byte of input to the + /// transition given. + fn set_transition(&mut self, sid: StateID, byte: u8, to: Transition) { + let offset = sid.as_usize() << self.stride2(); + let class = self.classes.get(byte).as_usize(); + self.table[offset + class] = to; + } + + /// Return an iterator of "sparse" transitions for the given state ID. + /// "sparse" in this context means that consecutive transitions that are + /// equivalent are returned as one group, and transitions to the DEAD state + /// are ignored. + /// + /// This winds up being useful for debug printing, since it's much terser + /// to display runs of equivalent transitions than the transition for every + /// possible byte value. Indeed, in practice, it's very common for runs + /// of equivalent transitions to appear. + fn sparse_transitions(&self, sid: StateID) -> SparseTransitionIter<'_> { + let start = sid.as_usize() << self.stride2(); + let end = start + self.alphabet_len(); + SparseTransitionIter { + it: self.table[start..end].iter().enumerate(), + cur: None, + } + } + + /// Return the pattern epsilons for the given state ID. + /// + /// If the given state ID does not correspond to a match state ID, then the + /// pattern epsilons returned is empty. + fn pattern_epsilons(&self, sid: StateID) -> PatternEpsilons { + let offset = sid.as_usize() << self.stride2(); + PatternEpsilons(self.table[offset + self.pateps_offset].0) + } + + /// Set the pattern epsilons for the given state ID. + fn set_pattern_epsilons(&mut self, sid: StateID, pateps: PatternEpsilons) { + let offset = sid.as_usize() << self.stride2(); + self.table[offset + self.pateps_offset] = Transition(pateps.0); + } + + /// Returns the state ID prior to the one given. This returns None if the + /// given ID is the first DFA state. + fn prev_state_id(&self, id: StateID) -> Option { + if id == DEAD { + None + } else { + // CORRECTNESS: Since 'id' is not the first state, subtracting 1 + // is always valid. + Some(StateID::new_unchecked(id.as_usize().checked_sub(1).unwrap())) + } + } + + /// Returns the state ID of the last state in this DFA's transition table. + /// "last" in this context means the last state to appear in memory, i.e., + /// the one with the greatest ID. + fn last_state_id(&self) -> StateID { + // CORRECTNESS: A DFA table is always non-empty since it always at + // least contains a DEAD state. Since every state has the same stride, + // we can just compute what the "next" state ID would have been and + // then subtract 1 from it. + StateID::new_unchecked( + (self.table.len() >> self.stride2()).checked_sub(1).unwrap(), + ) + } + + /// Move the transitions from 'id1' to 'id2' and vice versa. + /// + /// WARNING: This does not update the rest of the transition table to have + /// transitions to 'id1' changed to 'id2' and vice versa. This merely moves + /// the states in memory. + pub(super) fn swap_states(&mut self, id1: StateID, id2: StateID) { + let o1 = id1.as_usize() << self.stride2(); + let o2 = id2.as_usize() << self.stride2(); + for b in 0..self.stride() { + self.table.swap(o1 + b, o2 + b); + } + } + + /// Map all state IDs in this DFA (transition table + start states) + /// according to the closure given. + pub(super) fn remap(&mut self, map: impl Fn(StateID) -> StateID) { + for i in 0..self.state_len() { + let offset = i << self.stride2(); + for b in 0..self.alphabet_len() { + let next = self.table[offset + b].state_id(); + self.table[offset + b].set_state_id(map(next)); + } + } + for i in 0..self.starts.len() { + self.starts[i] = map(self.starts[i]); + } + } +} + +impl core::fmt::Debug for DFA { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + fn debug_state_transitions( + f: &mut core::fmt::Formatter, + dfa: &DFA, + sid: StateID, + ) -> core::fmt::Result { + for (i, (start, end, trans)) in + dfa.sparse_transitions(sid).enumerate() + { + let next = trans.state_id(); + if i > 0 { + write!(f, ", ")?; + } + if start == end { + write!( + f, + "{:?} => {:?}", + DebugByte(start), + next.as_usize(), + )?; + } else { + write!( + f, + "{:?}-{:?} => {:?}", + DebugByte(start), + DebugByte(end), + next.as_usize(), + )?; + } + if trans.match_wins() { + write!(f, " (MW)")?; + } + if !trans.epsilons().is_empty() { + write!(f, " ({:?})", trans.epsilons())?; + } + } + Ok(()) + } + + writeln!(f, "onepass::DFA(")?; + for index in 0..self.state_len() { + let sid = StateID::must(index); + let pateps = self.pattern_epsilons(sid); + if sid == DEAD { + write!(f, "D ")?; + } else if pateps.pattern_id().is_some() { + write!(f, "* ")?; + } else { + write!(f, " ")?; + } + write!(f, "{:06?}", sid.as_usize())?; + if !pateps.is_empty() { + write!(f, " ({:?})", pateps)?; + } + write!(f, ": ")?; + debug_state_transitions(f, self, sid)?; + write!(f, "\n")?; + } + writeln!(f, "")?; + for (i, &sid) in self.starts.iter().enumerate() { + if i == 0 { + writeln!(f, "START(ALL): {:?}", sid.as_usize())?; + } else { + writeln!( + f, + "START(pattern: {:?}): {:?}", + i - 1, + sid.as_usize(), + )?; + } + } + writeln!(f, "state length: {:?}", self.state_len())?; + writeln!(f, "pattern length: {:?}", self.pattern_len())?; + writeln!(f, ")")?; + Ok(()) + } +} + +/// An iterator over groups of consecutive equivalent transitions in a single +/// state. +#[derive(Debug)] +struct SparseTransitionIter<'a> { + it: core::iter::Enumerate>, + cur: Option<(u8, u8, Transition)>, +} + +impl<'a> Iterator for SparseTransitionIter<'a> { + type Item = (u8, u8, Transition); + + fn next(&mut self) -> Option<(u8, u8, Transition)> { + while let Some((b, &trans)) = self.it.next() { + // Fine because we'll never have more than u8::MAX transitions in + // one state. + let b = b.as_u8(); + let (prev_start, prev_end, prev_trans) = match self.cur { + Some(t) => t, + None => { + self.cur = Some((b, b, trans)); + continue; + } + }; + if prev_trans == trans { + self.cur = Some((prev_start, b, prev_trans)); + } else { + self.cur = Some((b, b, trans)); + if prev_trans.state_id() != DEAD { + return Some((prev_start, prev_end, prev_trans)); + } + } + } + if let Some((start, end, trans)) = self.cur.take() { + if trans.state_id() != DEAD { + return Some((start, end, trans)); + } + } + None + } +} + +/// A cache represents mutable state that a one-pass [`DFA`] requires during a +/// search. +/// +/// For a given one-pass DFA, its corresponding cache may be created either via +/// [`DFA::create_cache`], or via [`Cache::new`]. They are equivalent in every +/// way, except the former does not require explicitly importing `Cache`. +/// +/// A particular `Cache` is coupled with the one-pass DFA from which it was +/// created. It may only be used with that one-pass DFA. A cache and its +/// allocations may be re-purposed via [`Cache::reset`], in which case, it can +/// only be used with the new one-pass DFA (and not the old one). +#[derive(Clone, Debug)] +pub struct Cache { + /// Scratch space used to store slots during a search. Basically, we use + /// the caller provided slots to store slots known when a match occurs. + /// But after a match occurs, we might continue a search but ultimately + /// fail to extend the match. When continuing the search, we need some + /// place to store candidate capture offsets without overwriting the slot + /// offsets recorded for the most recently seen match. + explicit_slots: Vec>, + /// The number of slots in the caller-provided 'Captures' value for the + /// current search. This is always at most 'explicit_slots.len()', but + /// might be less than it, if the caller provided fewer slots to fill. + explicit_slot_len: usize, +} + +impl Cache { + /// Create a new [`onepass::DFA`](DFA) cache. + /// + /// A potentially more convenient routine to create a cache is + /// [`DFA::create_cache`], as it does not require also importing the + /// `Cache` type. + /// + /// If you want to reuse the returned `Cache` with some other one-pass DFA, + /// then you must call [`Cache::reset`] with the desired one-pass DFA. + pub fn new(re: &DFA) -> Cache { + let mut cache = Cache { explicit_slots: vec![], explicit_slot_len: 0 }; + cache.reset(re); + cache + } + + /// Reset this cache such that it can be used for searching with a + /// different [`onepass::DFA`](DFA). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different one-pass DFA. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different one-pass + /// DFA. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::onepass::DFA, Match}; + /// + /// let re1 = DFA::new(r"\w")?; + /// let re2 = DFA::new(r"\W")?; + /// let mut caps1 = re1.create_captures(); + /// let mut caps2 = re2.create_captures(); + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 0..2)), + /// { re1.captures(&mut cache, "Δ", &mut caps1); caps1.get_match() }, + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the one-pass DFA we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// re2.reset_cache(&mut cache); + /// assert_eq!( + /// Some(Match::must(0, 0..3)), + /// { re2.captures(&mut cache, "☃", &mut caps2); caps2.get_match() }, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset(&mut self, re: &DFA) { + let explicit_slot_len = re.get_nfa().group_info().explicit_slot_len(); + self.explicit_slots.resize(explicit_slot_len, None); + self.explicit_slot_len = explicit_slot_len; + } + + /// Returns the heap memory usage, in bytes, of this cache. + /// + /// This does **not** include the stack size used up by this cache. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + self.explicit_slots.len() * core::mem::size_of::>() + } + + fn explicit_slots(&mut self) -> &mut [Option] { + &mut self.explicit_slots[..self.explicit_slot_len] + } + + fn setup_search(&mut self, explicit_slot_len: usize) { + self.explicit_slot_len = explicit_slot_len; + } +} + +/// Represents a single transition in a one-pass DFA. +/// +/// The high 21 bits corresponds to the state ID. The bit following corresponds +/// to the special "match wins" flag. The remaining low 42 bits corresponds to +/// the transition epsilons, which contains the slots that should be saved when +/// this transition is followed and the conditional epsilon transitions that +/// must be satisfied in order to follow this transition. +#[derive(Clone, Copy, Eq, PartialEq)] +struct Transition(u64); + +impl Transition { + const STATE_ID_BITS: u64 = 21; + const STATE_ID_SHIFT: u64 = 64 - Transition::STATE_ID_BITS; + const STATE_ID_LIMIT: u64 = 1 << Transition::STATE_ID_BITS; + const MATCH_WINS_SHIFT: u64 = 64 - (Transition::STATE_ID_BITS + 1); + const INFO_MASK: u64 = 0x000003FF_FFFFFFFF; + + /// Return a new transition to the given state ID with the given epsilons. + fn new(match_wins: bool, sid: StateID, epsilons: Epsilons) -> Transition { + let match_wins = + if match_wins { 1 << Transition::MATCH_WINS_SHIFT } else { 0 }; + let sid = sid.as_u64() << Transition::STATE_ID_SHIFT; + Transition(sid | match_wins | epsilons.0) + } + + /// Returns true if and only if this transition points to the DEAD state. + fn is_dead(self) -> bool { + self.state_id() == DEAD + } + + /// Return whether this transition has a "match wins" property. + /// + /// When a transition has this property, it means that if a match has been + /// found and the search uses leftmost-first semantics, then that match + /// should be returned immediately instead of continuing on. + /// + /// The "match wins" name comes from RE2, which uses a pretty much + /// identical mechanism for implementing leftmost-first semantics. + fn match_wins(&self) -> bool { + (self.0 >> Transition::MATCH_WINS_SHIFT & 1) == 1 + } + + /// Return the "next" state ID that this transition points to. + fn state_id(&self) -> StateID { + // OK because a Transition has a valid StateID in its upper bits by + // construction. The cast to usize is also correct, even on 16-bit + // targets because, again, we know the upper bits is a valid StateID, + // which can never overflow usize on any supported target. + StateID::new_unchecked( + (self.0 >> Transition::STATE_ID_SHIFT).as_usize(), + ) + } + + /// Set the "next" state ID in this transition. + fn set_state_id(&mut self, sid: StateID) { + *self = Transition::new(self.match_wins(), sid, self.epsilons()); + } + + /// Return the epsilons embedded in this transition. + fn epsilons(&self) -> Epsilons { + Epsilons(self.0 & Transition::INFO_MASK) + } +} + +impl core::fmt::Debug for Transition { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + if self.is_dead() { + return write!(f, "0"); + } + write!(f, "{}", self.state_id().as_usize())?; + if self.match_wins() { + write!(f, "-MW")?; + } + if !self.epsilons().is_empty() { + write!(f, "-{:?}", self.epsilons())?; + } + Ok(()) + } +} + +/// A representation of a match state's pattern ID along with the epsilons for +/// when a match occurs. +/// +/// A match state in a one-pass DFA, unlike in a more general DFA, has exactly +/// one pattern ID. If it had more, then the original NFA would not have been +/// one-pass. +/// +/// The "epsilons" part of this corresponds to what was found in the epsilon +/// transitions between the transition taken in the last byte of input and the +/// ultimate match state. This might include saving slots and/or conditional +/// epsilon transitions that must be satisfied before one can report the match. +/// +/// Technically, every state has room for a 'PatternEpsilons', but it is only +/// ever non-empty for match states. +#[derive(Clone, Copy)] +struct PatternEpsilons(u64); + +impl PatternEpsilons { + const PATTERN_ID_BITS: u64 = 22; + const PATTERN_ID_SHIFT: u64 = 64 - PatternEpsilons::PATTERN_ID_BITS; + // A sentinel value indicating that this is not a match state. We don't + // use 0 since 0 is a valid pattern ID. + const PATTERN_ID_NONE: u64 = 0x00000000_003FFFFF; + const PATTERN_ID_LIMIT: u64 = PatternEpsilons::PATTERN_ID_NONE; + const PATTERN_ID_MASK: u64 = 0xFFFFFC00_00000000; + const EPSILONS_MASK: u64 = 0x000003FF_FFFFFFFF; + + /// Return a new empty pattern epsilons that has no pattern ID and has no + /// epsilons. This is suitable for non-match states. + fn empty() -> PatternEpsilons { + PatternEpsilons( + PatternEpsilons::PATTERN_ID_NONE + << PatternEpsilons::PATTERN_ID_SHIFT, + ) + } + + /// Whether this pattern epsilons is empty or not. It's empty when it has + /// no pattern ID and an empty epsilons. + fn is_empty(self) -> bool { + self.pattern_id().is_none() && self.epsilons().is_empty() + } + + /// Return the pattern ID in this pattern epsilons if one exists. + fn pattern_id(self) -> Option { + let pid = self.0 >> PatternEpsilons::PATTERN_ID_SHIFT; + if pid == PatternEpsilons::PATTERN_ID_LIMIT { + None + } else { + Some(PatternID::new_unchecked(pid.as_usize())) + } + } + + /// Returns the pattern ID without checking whether it's valid. If this is + /// called and there is no pattern ID in this `PatternEpsilons`, then this + /// will likely produce an incorrect result or possibly even a panic or + /// an overflow. But safety will not be violated. + /// + /// This is useful when you know a particular state is a match state. If + /// it's a match state, then it must have a pattern ID. + fn pattern_id_unchecked(self) -> PatternID { + let pid = self.0 >> PatternEpsilons::PATTERN_ID_SHIFT; + PatternID::new_unchecked(pid.as_usize()) + } + + /// Return a new pattern epsilons with the given pattern ID, but the same + /// epsilons. + fn set_pattern_id(self, pid: PatternID) -> PatternEpsilons { + PatternEpsilons( + (pid.as_u64() << PatternEpsilons::PATTERN_ID_SHIFT) + | (self.0 & PatternEpsilons::EPSILONS_MASK), + ) + } + + /// Return the epsilons part of this pattern epsilons. + fn epsilons(self) -> Epsilons { + Epsilons(self.0 & PatternEpsilons::EPSILONS_MASK) + } + + /// Return a new pattern epsilons with the given epsilons, but the same + /// pattern ID. + fn set_epsilons(self, epsilons: Epsilons) -> PatternEpsilons { + PatternEpsilons( + (self.0 & PatternEpsilons::PATTERN_ID_MASK) + | (u64::from(epsilons.0) & PatternEpsilons::EPSILONS_MASK), + ) + } +} + +impl core::fmt::Debug for PatternEpsilons { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + if self.is_empty() { + return write!(f, "N/A"); + } + if let Some(pid) = self.pattern_id() { + write!(f, "{}", pid.as_usize())?; + } + if !self.epsilons().is_empty() { + if self.pattern_id().is_some() { + write!(f, "/")?; + } + write!(f, "{:?}", self.epsilons())?; + } + Ok(()) + } +} + +/// Epsilons represents all of the NFA epsilons transitions that went into a +/// single transition in a single DFA state. In this case, it only represents +/// the epsilon transitions that have some kind of non-consuming side effect: +/// either the transition requires storing the current position of the search +/// into a slot, or the transition is conditional and requires the current +/// position in the input to satisfy an assertion before the transition may be +/// taken. +/// +/// This folds the cumulative effect of a group of NFA states (all connected +/// by epsilon transitions) down into a single set of bits. While these bits +/// can represent all possible conditional epsilon transitions, it only permits +/// storing up to a somewhat small number of slots. +/// +/// Epsilons is represented as a 42-bit integer. For example, it is packed into +/// the lower 42 bits of a `Transition`. (Where the high 22 bits contains a +/// `StateID` and a special "match wins" property.) +#[derive(Clone, Copy)] +struct Epsilons(u64); + +impl Epsilons { + const SLOT_MASK: u64 = 0x000003FF_FFFFFC00; + const SLOT_SHIFT: u64 = 10; + const LOOK_MASK: u64 = 0x00000000_000003FF; + + /// Create a new empty epsilons. It has no slots and no assertions that + /// need to be satisfied. + fn empty() -> Epsilons { + Epsilons(0) + } + + /// Returns true if this epsilons contains no slots and no assertions. + fn is_empty(self) -> bool { + self.0 == 0 + } + + /// Returns the slot epsilon transitions. + fn slots(self) -> Slots { + Slots((self.0 >> Epsilons::SLOT_SHIFT).low_u32()) + } + + /// Set the slot epsilon transitions. + fn set_slots(self, slots: Slots) -> Epsilons { + Epsilons( + (u64::from(slots.0) << Epsilons::SLOT_SHIFT) + | (self.0 & Epsilons::LOOK_MASK), + ) + } + + /// Return the set of look-around assertions in these epsilon transitions. + fn looks(self) -> LookSet { + LookSet { bits: (self.0 & Epsilons::LOOK_MASK).low_u32() } + } + + /// Set the look-around assertions on these epsilon transitions. + fn set_looks(self, look_set: LookSet) -> Epsilons { + Epsilons( + (self.0 & Epsilons::SLOT_MASK) + | (u64::from(look_set.bits) & Epsilons::LOOK_MASK), + ) + } +} + +impl core::fmt::Debug for Epsilons { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut wrote = false; + if !self.slots().is_empty() { + write!(f, "{:?}", self.slots())?; + wrote = true; + } + if !self.looks().is_empty() { + if wrote { + write!(f, "/")?; + } + write!(f, "{:?}", self.looks())?; + wrote = true; + } + if !wrote { + write!(f, "N/A")?; + } + Ok(()) + } +} + +/// The set of epsilon transitions indicating that the current position in a +/// search should be saved to a slot. +/// +/// This *only* represents explicit slots. So for example, the pattern +/// `[a-z]+([0-9]+)([a-z]+)` has: +/// +/// * 3 capturing groups, thus 6 slots. +/// * 1 implicit capturing group, thus 2 implicit slots. +/// * 2 explicit capturing groups, thus 4 explicit slots. +/// +/// While implicit slots are represented by epsilon transitions in an NFA, we +/// do not explicitly represent them here. Instead, implicit slots are assumed +/// to be present and handled automatically in the search code. Therefore, +/// that means we only need to represent explicit slots in our epsilon +/// transitions. +/// +/// Its representation is a bit set. The bit 'i' is set if and only if there +/// exists an explicit slot at index 'c', where 'c = (#patterns * 2) + i'. That +/// is, the bit 'i' corresponds to the first explicit slot and the first +/// explicit slot appears immediately following the last implicit slot. (If +/// this is confusing, see `GroupInfo` for more details on how slots works.) +/// +/// A single `Slots` represents all the active slots in a sub-graph of an NFA, +/// where all the states are connected by epsilon transitions. In effect, when +/// traversing the one-pass DFA during a search, all slots set in a particular +/// transition must be captured by recording the current search position. +/// +/// The API of `Slots` requires the caller to handle the explicit slot offset. +/// That is, a `Slots` doesn't know where the explicit slots start for a +/// particular NFA. Thus, if the callers see's the bit 'i' is set, then they +/// need to do the arithmetic above to find 'c', which is the real actual slot +/// index in the corresponding NFA. +#[derive(Clone, Copy)] +struct Slots(u32); + +impl Slots { + const LIMIT: usize = 32; + + /// Insert the slot at the given bit index. + fn insert(self, slot: usize) -> Slots { + debug_assert!(slot < Slots::LIMIT); + Slots(self.0 | (1 << slot.as_u32())) + } + + /// Remove the slot at the given bit index. + fn remove(self, slot: usize) -> Slots { + debug_assert!(slot < Slots::LIMIT); + Slots(self.0 & !(1 << slot.as_u32())) + } + + /// Returns true if and only if this set contains no slots. + fn is_empty(self) -> bool { + self.0 == 0 + } + + /// Returns an iterator over all of the set bits in this set. + fn iter(self) -> SlotsIter { + SlotsIter { slots: self } + } + + /// For the position `at` in the current haystack, copy it to + /// `caller_explicit_slots` for all slots that are in this set. + /// + /// Callers may pass a slice of any length. Slots in this set bigger than + /// the length of the given explicit slots are simply skipped. + /// + /// The slice *must* correspond only to the explicit slots and the first + /// element of the slice must always correspond to the first explicit slot + /// in the corresponding NFA. + fn apply( + self, + at: usize, + caller_explicit_slots: &mut [Option], + ) { + if self.is_empty() { + return; + } + let at = NonMaxUsize::new(at); + for slot in self.iter() { + if slot >= caller_explicit_slots.len() { + break; + } + caller_explicit_slots[slot] = at; + } + } +} + +impl core::fmt::Debug for Slots { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "S")?; + for slot in self.iter() { + write!(f, "-{:?}", slot)?; + } + Ok(()) + } +} + +/// An iterator over all of the bits set in a slot set. +/// +/// This returns the bit index that is set, so callers may need to offset it +/// to get the actual NFA slot index. +#[derive(Debug)] +struct SlotsIter { + slots: Slots, +} + +impl Iterator for SlotsIter { + type Item = usize; + + fn next(&mut self) -> Option { + // Number of zeroes here is always <= u8::MAX, and so fits in a usize. + let slot = self.slots.0.trailing_zeros().as_usize(); + if slot >= Slots::LIMIT { + return None; + } + self.slots = self.slots.remove(slot); + Some(slot) + } +} + +/// An error that occurred during the construction of a one-pass DFA. +/// +/// This error does not provide many introspection capabilities. There are +/// generally only two things you can do with it: +/// +/// * Obtain a human readable message via its `std::fmt::Display` impl. +/// * Access an underlying [`thompson::BuildError`] type from its `source` +/// method via the `std::error::Error` trait. This error only occurs when using +/// convenience routines for building a one-pass DFA directly from a pattern +/// string. +/// +/// When the `std` feature is enabled, this implements the `std::error::Error` +/// trait. +#[derive(Clone, Debug)] +pub struct BuildError { + kind: BuildErrorKind, +} + +/// The kind of error that occurred during the construction of a one-pass DFA. +#[derive(Clone, Debug)] +enum BuildErrorKind { + NFA(crate::nfa::thompson::BuildError), + Word(UnicodeWordBoundaryError), + TooManyStates { limit: u64 }, + TooManyPatterns { limit: u64 }, + UnsupportedLook { look: Look }, + ExceededSizeLimit { limit: usize }, + NotOnePass { msg: &'static str }, +} + +impl BuildError { + fn nfa(err: crate::nfa::thompson::BuildError) -> BuildError { + BuildError { kind: BuildErrorKind::NFA(err) } + } + + fn word(err: UnicodeWordBoundaryError) -> BuildError { + BuildError { kind: BuildErrorKind::Word(err) } + } + + fn too_many_states(limit: u64) -> BuildError { + BuildError { kind: BuildErrorKind::TooManyStates { limit } } + } + + fn too_many_patterns(limit: u64) -> BuildError { + BuildError { kind: BuildErrorKind::TooManyPatterns { limit } } + } + + fn unsupported_look(look: Look) -> BuildError { + BuildError { kind: BuildErrorKind::UnsupportedLook { look } } + } + + fn exceeded_size_limit(limit: usize) -> BuildError { + BuildError { kind: BuildErrorKind::ExceededSizeLimit { limit } } + } + + fn not_one_pass(msg: &'static str) -> BuildError { + BuildError { kind: BuildErrorKind::NotOnePass { msg } } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for BuildError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + use self::BuildErrorKind::*; + + match self.kind { + NFA(ref err) => Some(err), + Word(ref err) => Some(err), + _ => None, + } + } +} + +impl core::fmt::Display for BuildError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + use self::BuildErrorKind::*; + + match self.kind { + NFA(_) => write!(f, "error building NFA"), + Word(_) => write!(f, "NFA contains Unicode word boundary"), + TooManyStates { limit } => write!( + f, + "one-pass DFA exceeded a limit of {:?} for number of states", + limit, + ), + TooManyPatterns { limit } => write!( + f, + "one-pass DFA exceeded a limit of {:?} for number of patterns", + limit, + ), + UnsupportedLook { look } => write!( + f, + "one-pass DFA does not support the {:?} assertion", + look, + ), + ExceededSizeLimit { limit } => write!( + f, + "one-pass DFA exceeded size limit of {:?} during building", + limit, + ), + NotOnePass { msg } => write!( + f, + "one-pass DFA could not be built because \ + pattern is not one-pass: {}", + msg, + ), + } + } +} + +#[cfg(all(test, feature = "syntax"))] +mod tests { + use alloc::string::ToString; + + use super::*; + + #[test] + fn fail_conflicting_transition() { + let predicate = |err: &str| err.contains("conflicting transition"); + + let err = DFA::new(r"a*[ab]").unwrap_err().to_string(); + assert!(predicate(&err), "{}", err); + } + + #[test] + fn fail_multiple_epsilon() { + let predicate = |err: &str| { + err.contains("multiple epsilon transitions to same state") + }; + + let err = DFA::new(r"(^|$)a").unwrap_err().to_string(); + assert!(predicate(&err), "{}", err); + } + + #[test] + fn fail_multiple_match() { + let predicate = |err: &str| { + err.contains("multiple epsilon transitions to match state") + }; + + let err = DFA::new_many(&[r"^", r"$"]).unwrap_err().to_string(); + assert!(predicate(&err), "{}", err); + } + + // This test is meant to build a one-pass regex with the maximum number of + // possible slots. + // + // NOTE: Remember that the slot limit only applies to explicit capturing + // groups. Any number of implicit capturing groups is supported (up to the + // maximum number of supported patterns), since implicit groups are handled + // by the search loop itself. + #[test] + fn max_slots() { + // One too many... + let pat = r"(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)(n)(o)(p)(q)"; + assert!(DFA::new(pat).is_err()); + // Just right. + let pat = r"(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)(n)(o)(p)"; + assert!(DFA::new(pat).is_ok()); + } + + // This test ensures that the one-pass DFA works with all look-around + // assertions that we expect it to work with. + // + // The utility of this test is that each one-pass transition has a small + // amount of space to store look-around assertions. Currently, there is + // logic in the one-pass constructor to ensure there aren't more than ten + // possible assertions. And indeed, there are only ten possible assertions + // (at time of writing), so this is okay. But conceivably, more assertions + // could be added. So we check that things at least work with what we + // expect them to work with. + #[test] + fn assertions() { + // haystack anchors + assert!(DFA::new(r"^").is_ok()); + assert!(DFA::new(r"$").is_ok()); + + // line anchors + assert!(DFA::new(r"(?m)^").is_ok()); + assert!(DFA::new(r"(?m)$").is_ok()); + assert!(DFA::new(r"(?Rm)^").is_ok()); + assert!(DFA::new(r"(?Rm)$").is_ok()); + + // word boundaries + if cfg!(feature = "unicode-word-boundary") { + assert!(DFA::new(r"\b").is_ok()); + assert!(DFA::new(r"\B").is_ok()); + } + assert!(DFA::new(r"(?-u)\b").is_ok()); + assert!(DFA::new(r"(?-u)\B").is_ok()); + } + + #[cfg(not(miri))] // takes too long on miri + #[test] + fn is_one_pass() { + use crate::util::syntax; + + assert!(DFA::new(r"a*b").is_ok()); + if cfg!(feature = "unicode-perl") { + assert!(DFA::new(r"\w").is_ok()); + } + assert!(DFA::new(r"(?-u)\w*\s").is_ok()); + assert!(DFA::new(r"(?s:.)*?").is_ok()); + assert!(DFA::builder() + .syntax(syntax::Config::new().utf8(false)) + .build(r"(?s-u:.)*?") + .is_ok()); + } + + #[test] + fn is_not_one_pass() { + assert!(DFA::new(r"a*a").is_err()); + assert!(DFA::new(r"(?s-u:.)*?").is_err()); + assert!(DFA::new(r"(?s:.)*?a").is_err()); + } + + #[cfg(not(miri))] + #[test] + fn is_not_one_pass_bigger() { + assert!(DFA::new(r"\w*\s").is_err()); + } +} diff --git a/vendor/regex-automata/src/dfa/regex.rs b/vendor/regex-automata/src/dfa/regex.rs new file mode 100644 index 0000000..5e7e6e3 --- /dev/null +++ b/vendor/regex-automata/src/dfa/regex.rs @@ -0,0 +1,871 @@ +/*! +A DFA-backed `Regex`. + +This module provides [`Regex`], which is defined generically over the +[`Automaton`] trait. A `Regex` implements convenience routines you might have +come to expect, such as finding the start/end of a match and iterating over +all non-overlapping matches. This `Regex` type is limited in its capabilities +to what a DFA can provide. Therefore, APIs involving capturing groups, for +example, are not provided. + +Internally, a `Regex` is composed of two DFAs. One is a "forward" DFA that +finds the end offset of a match, where as the other is a "reverse" DFA that +find the start offset of a match. + +See the [parent module](crate::dfa) for examples. +*/ + +#[cfg(feature = "alloc")] +use alloc::vec::Vec; + +#[cfg(feature = "dfa-build")] +use crate::dfa::dense::BuildError; +use crate::{ + dfa::{automaton::Automaton, dense}, + util::{iter, search::Input}, + Anchored, Match, MatchError, +}; +#[cfg(feature = "alloc")] +use crate::{ + dfa::{sparse, StartKind}, + util::search::MatchKind, +}; + +// When the alloc feature is enabled, the regex type sets its A type parameter +// to default to an owned dense DFA. But without alloc, we set no default. This +// makes things a lot more convenient in the common case, since writing out the +// DFA types is pretty annoying. +// +// Since we have two different definitions but only want to write one doc +// string, we use a macro to capture the doc and other attributes once and then +// repeat them for each definition. +macro_rules! define_regex_type { + ($(#[$doc:meta])*) => { + #[cfg(feature = "alloc")] + $(#[$doc])* + pub struct Regex { + forward: A, + reverse: A, + } + + #[cfg(not(feature = "alloc"))] + $(#[$doc])* + pub struct Regex { + forward: A, + reverse: A, + } + }; +} + +define_regex_type!( + /// A regular expression that uses deterministic finite automata for fast + /// searching. + /// + /// A regular expression is comprised of two DFAs, a "forward" DFA and a + /// "reverse" DFA. The forward DFA is responsible for detecting the end of + /// a match while the reverse DFA is responsible for detecting the start + /// of a match. Thus, in order to find the bounds of any given match, a + /// forward search must first be run followed by a reverse search. A match + /// found by the forward DFA guarantees that the reverse DFA will also find + /// a match. + /// + /// The type of the DFA used by a `Regex` corresponds to the `A` type + /// parameter, which must satisfy the [`Automaton`] trait. Typically, + /// `A` is either a [`dense::DFA`](crate::dfa::dense::DFA) or a + /// [`sparse::DFA`](crate::dfa::sparse::DFA), where dense DFAs use more + /// memory but search faster, while sparse DFAs use less memory but search + /// more slowly. + /// + /// # Crate features + /// + /// Note that despite what the documentation auto-generates, the _only_ + /// crate feature needed to use this type is `dfa-search`. You do _not_ + /// need to enable the `alloc` feature. + /// + /// By default, a regex's automaton type parameter is set to + /// `dense::DFA>` when the `alloc` feature is enabled. For most + /// in-memory work loads, this is the most convenient type that gives the + /// best search performance. When the `alloc` feature is disabled, no + /// default type is used. + /// + /// # When should I use this? + /// + /// Generally speaking, if you can afford the overhead of building a full + /// DFA for your regex, and you don't need things like capturing groups, + /// then this is a good choice if you're looking to optimize for matching + /// speed. Note however that its speed may be worse than a general purpose + /// regex engine if you don't provide a [`dense::Config::prefilter`] to the + /// underlying DFA. + /// + /// # Sparse DFAs + /// + /// Since a `Regex` is generic over the [`Automaton`] trait, it can be + /// used with any kind of DFA. While this crate constructs dense DFAs by + /// default, it is easy enough to build corresponding sparse DFAs, and then + /// build a regex from them: + /// + /// ``` + /// use regex_automata::dfa::regex::Regex; + /// + /// // First, build a regex that uses dense DFAs. + /// let dense_re = Regex::new("foo[0-9]+")?; + /// + /// // Second, build sparse DFAs from the forward and reverse dense DFAs. + /// let fwd = dense_re.forward().to_sparse()?; + /// let rev = dense_re.reverse().to_sparse()?; + /// + /// // Third, build a new regex from the constituent sparse DFAs. + /// let sparse_re = Regex::builder().build_from_dfas(fwd, rev); + /// + /// // A regex that uses sparse DFAs can be used just like with dense DFAs. + /// assert_eq!(true, sparse_re.is_match(b"foo123")); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Alternatively, one can use a [`Builder`] to construct a sparse DFA + /// more succinctly. (Note though that dense DFAs are still constructed + /// first internally, and then converted to sparse DFAs, as in the example + /// above.) + /// + /// ``` + /// use regex_automata::dfa::regex::Regex; + /// + /// let sparse_re = Regex::builder().build_sparse(r"foo[0-9]+")?; + /// // A regex that uses sparse DFAs can be used just like with dense DFAs. + /// assert!(sparse_re.is_match(b"foo123")); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Fallibility + /// + /// Most of the search routines defined on this type will _panic_ when the + /// underlying search fails. This might be because the DFA gave up because + /// it saw a quit byte, whether configured explicitly or via heuristic + /// Unicode word boundary support, although neither are enabled by default. + /// Or it might fail because an invalid `Input` configuration is given, + /// for example, with an unsupported [`Anchored`] mode. + /// + /// If you need to handle these error cases instead of allowing them to + /// trigger a panic, then the lower level [`Regex::try_search`] provides + /// a fallible API that never panics. + /// + /// # Example + /// + /// This example shows how to cause a search to terminate if it sees a + /// `\n` byte, and handle the error returned. This could be useful if, for + /// example, you wanted to prevent a user supplied pattern from matching + /// across a line boundary. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{dfa::{self, regex::Regex}, Input, MatchError}; + /// + /// let re = Regex::builder() + /// .dense(dfa::dense::Config::new().quit(b'\n', true)) + /// .build(r"foo\p{any}+bar")?; + /// + /// let input = Input::new("foo\nbar"); + /// // Normally this would produce a match, since \p{any} contains '\n'. + /// // But since we instructed the automaton to enter a quit state if a + /// // '\n' is observed, this produces a match error instead. + /// let expected = MatchError::quit(b'\n', 3); + /// let got = re.try_search(&input).unwrap_err(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[derive(Clone, Debug)] +); + +#[cfg(all(feature = "syntax", feature = "dfa-build"))] +impl Regex { + /// Parse the given regular expression using the default configuration and + /// return the corresponding regex. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Match, dfa::regex::Regex}; + /// + /// let re = Regex::new("foo[0-9]+bar")?; + /// assert_eq!( + /// Some(Match::must(0, 3..14)), + /// re.find(b"zzzfoo12345barzzz"), + /// ); + /// # Ok::<(), Box>(()) + /// ``` + pub fn new(pattern: &str) -> Result { + Builder::new().build(pattern) + } + + /// Like `new`, but parses multiple patterns into a single "regex set." + /// This similarly uses the default regex configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Match, dfa::regex::Regex}; + /// + /// let re = Regex::new_many(&["[a-z]+", "[0-9]+"])?; + /// + /// let mut it = re.find_iter(b"abc 1 foo 4567 0 quux"); + /// assert_eq!(Some(Match::must(0, 0..3)), it.next()); + /// assert_eq!(Some(Match::must(1, 4..5)), it.next()); + /// assert_eq!(Some(Match::must(0, 6..9)), it.next()); + /// assert_eq!(Some(Match::must(1, 10..14)), it.next()); + /// assert_eq!(Some(Match::must(1, 15..16)), it.next()); + /// assert_eq!(Some(Match::must(0, 17..21)), it.next()); + /// assert_eq!(None, it.next()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn new_many>( + patterns: &[P], + ) -> Result { + Builder::new().build_many(patterns) + } +} + +#[cfg(all(feature = "syntax", feature = "dfa-build"))] +impl Regex>> { + /// Parse the given regular expression using the default configuration, + /// except using sparse DFAs, and return the corresponding regex. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Match, dfa::regex::Regex}; + /// + /// let re = Regex::new_sparse("foo[0-9]+bar")?; + /// assert_eq!( + /// Some(Match::must(0, 3..14)), + /// re.find(b"zzzfoo12345barzzz"), + /// ); + /// # Ok::<(), Box>(()) + /// ``` + pub fn new_sparse( + pattern: &str, + ) -> Result>>, BuildError> { + Builder::new().build_sparse(pattern) + } + + /// Like `new`, but parses multiple patterns into a single "regex set" + /// using sparse DFAs. This otherwise similarly uses the default regex + /// configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Match, dfa::regex::Regex}; + /// + /// let re = Regex::new_many_sparse(&["[a-z]+", "[0-9]+"])?; + /// + /// let mut it = re.find_iter(b"abc 1 foo 4567 0 quux"); + /// assert_eq!(Some(Match::must(0, 0..3)), it.next()); + /// assert_eq!(Some(Match::must(1, 4..5)), it.next()); + /// assert_eq!(Some(Match::must(0, 6..9)), it.next()); + /// assert_eq!(Some(Match::must(1, 10..14)), it.next()); + /// assert_eq!(Some(Match::must(1, 15..16)), it.next()); + /// assert_eq!(Some(Match::must(0, 17..21)), it.next()); + /// assert_eq!(None, it.next()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn new_many_sparse>( + patterns: &[P], + ) -> Result>>, BuildError> { + Builder::new().build_many_sparse(patterns) + } +} + +/// Convenience routines for regex construction. +impl Regex> { + /// Return a builder for configuring the construction of a `Regex`. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + /// + /// # Example + /// + /// This example shows how to use the builder to disable UTF-8 mode + /// everywhere. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// dfa::regex::Regex, nfa::thompson, util::syntax, Match, + /// }; + /// + /// let re = Regex::builder() + /// .syntax(syntax::Config::new().utf8(false)) + /// .thompson(thompson::Config::new().utf8(false)) + /// .build(r"foo(?-u:[^b])ar.*")?; + /// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; + /// let expected = Some(Match::must(0, 1..9)); + /// let got = re.find(haystack); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn builder() -> Builder { + Builder::new() + } +} + +/// Standard search routines for finding and iterating over matches. +impl Regex { + /// Returns true if and only if this regex matches the given haystack. + /// + /// This routine may short circuit if it knows that scanning future input + /// will never lead to a different result. In particular, if the underlying + /// DFA enters a match state or a dead state, then this routine will return + /// `true` or `false`, respectively, without inspecting any future input. + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// Use [`Regex::try_search`] if you want to handle these error conditions. + /// + /// # Example + /// + /// ``` + /// use regex_automata::dfa::regex::Regex; + /// + /// let re = Regex::new("foo[0-9]+bar")?; + /// assert_eq!(true, re.is_match("foo12345bar")); + /// assert_eq!(false, re.is_match("foobar")); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_match<'h, I: Into>>(&self, input: I) -> bool { + // Not only can we do an "earliest" search, but we can avoid doing a + // reverse scan too. + let input = input.into().earliest(true); + self.forward().try_search_fwd(&input).map(|x| x.is_some()).unwrap() + } + + /// Returns the start and end offset of the leftmost match. If no match + /// exists, then `None` is returned. + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// Use [`Regex::try_search`] if you want to handle these error conditions. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Match, dfa::regex::Regex}; + /// + /// // Greediness is applied appropriately. + /// let re = Regex::new("foo[0-9]+")?; + /// assert_eq!(Some(Match::must(0, 3..11)), re.find("zzzfoo12345zzz")); + /// + /// // Even though a match is found after reading the first byte (`a`), + /// // the default leftmost-first match semantics demand that we find the + /// // earliest match that prefers earlier parts of the pattern over latter + /// // parts. + /// let re = Regex::new("abc|a")?; + /// assert_eq!(Some(Match::must(0, 0..3)), re.find("abc")); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find<'h, I: Into>>(&self, input: I) -> Option { + self.try_search(&input.into()).unwrap() + } + + /// Returns an iterator over all non-overlapping leftmost matches in the + /// given bytes. If no match exists, then the iterator yields no elements. + /// + /// This corresponds to the "standard" regex search iterator. + /// + /// # Panics + /// + /// If the search returns an error during iteration, then iteration + /// panics. See [`Regex::find`] for the panic conditions. + /// + /// Use [`Regex::try_search`] with + /// [`util::iter::Searcher`](crate::util::iter::Searcher) if you want to + /// handle these error conditions. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Match, dfa::regex::Regex}; + /// + /// let re = Regex::new("foo[0-9]+")?; + /// let text = "foo1 foo12 foo123"; + /// let matches: Vec = re.find_iter(text).collect(); + /// assert_eq!(matches, vec![ + /// Match::must(0, 0..4), + /// Match::must(0, 5..10), + /// Match::must(0, 11..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find_iter<'r, 'h, I: Into>>( + &'r self, + input: I, + ) -> FindMatches<'r, 'h, A> { + let it = iter::Searcher::new(input.into()); + FindMatches { re: self, it } + } +} + +/// Lower level fallible search routines that permit controlling where the +/// search starts and ends in a particular sequence. +impl Regex { + /// Returns the start and end offset of the leftmost match. If no match + /// exists, then `None` is returned. + /// + /// This is like [`Regex::find`] but with two differences: + /// + /// 1. It is not generic over `Into` and instead accepts a + /// `&Input`. This permits reusing the same `Input` for multiple searches + /// without needing to create a new one. This _may_ help with latency. + /// 2. It returns an error if the search could not complete where as + /// [`Regex::find`] will panic. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in the following circumstances: + /// + /// * The configuration of the DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the DFA quitting. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + #[inline] + pub fn try_search( + &self, + input: &Input<'_>, + ) -> Result, MatchError> { + let (fwd, rev) = (self.forward(), self.reverse()); + let end = match fwd.try_search_fwd(input)? { + None => return Ok(None), + Some(end) => end, + }; + // This special cases an empty match at the beginning of the search. If + // our end matches our start, then since a reverse DFA can't match past + // the start, it must follow that our starting position is also our end + // position. So short circuit and skip the reverse search. + if input.start() == end.offset() { + return Ok(Some(Match::new( + end.pattern(), + end.offset()..end.offset(), + ))); + } + // We can also skip the reverse search if we know our search was + // anchored. This occurs either when the input config is anchored or + // when we know the regex itself is anchored. In this case, we know the + // start of the match, if one is found, must be the start of the + // search. + if self.is_anchored(input) { + return Ok(Some(Match::new( + end.pattern(), + input.start()..end.offset(), + ))); + } + // N.B. I have tentatively convinced myself that it isn't necessary + // to specify the specific pattern for the reverse search since the + // reverse search will always find the same pattern to match as the + // forward search. But I lack a rigorous proof. Why not just provide + // the pattern anyway? Well, if it is needed, then leaving it out + // gives us a chance to find a witness. (Also, if we don't need to + // specify the pattern, then we don't need to build the reverse DFA + // with 'starts_for_each_pattern' enabled.) + // + // We also need to be careful to disable 'earliest' for the reverse + // search, since it could be enabled for the forward search. In the + // reverse case, to satisfy "leftmost" criteria, we need to match + // as much as we can. We also need to be careful to make the search + // anchored. We don't want the reverse search to report any matches + // other than the one beginning at the end of our forward search. + let revsearch = input + .clone() + .span(input.start()..end.offset()) + .anchored(Anchored::Yes) + .earliest(false); + let start = rev + .try_search_rev(&revsearch)? + .expect("reverse search must match if forward search does"); + assert_eq!( + start.pattern(), + end.pattern(), + "forward and reverse search must match same pattern", + ); + assert!(start.offset() <= end.offset()); + Ok(Some(Match::new(end.pattern(), start.offset()..end.offset()))) + } + + /// Returns true if either the given input specifies an anchored search + /// or if the underlying DFA is always anchored. + fn is_anchored(&self, input: &Input<'_>) -> bool { + match input.get_anchored() { + Anchored::No => self.forward().is_always_start_anchored(), + Anchored::Yes | Anchored::Pattern(_) => true, + } + } +} + +/// Non-search APIs for querying information about the regex and setting a +/// prefilter. +impl Regex { + /// Return the underlying DFA responsible for forward matching. + /// + /// This is useful for accessing the underlying DFA and converting it to + /// some other format or size. See the [`Builder::build_from_dfas`] docs + /// for an example of where this might be useful. + pub fn forward(&self) -> &A { + &self.forward + } + + /// Return the underlying DFA responsible for reverse matching. + /// + /// This is useful for accessing the underlying DFA and converting it to + /// some other format or size. See the [`Builder::build_from_dfas`] docs + /// for an example of where this might be useful. + pub fn reverse(&self) -> &A { + &self.reverse + } + + /// Returns the total number of patterns matched by this regex. + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::dfa::regex::Regex; + /// + /// let re = Regex::new_many(&[r"[a-z]+", r"[0-9]+", r"\w+"])?; + /// assert_eq!(3, re.pattern_len()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn pattern_len(&self) -> usize { + assert_eq!(self.forward().pattern_len(), self.reverse().pattern_len()); + self.forward().pattern_len() + } +} + +/// An iterator over all non-overlapping matches for an infallible search. +/// +/// The iterator yields a [`Match`] value until no more matches could be found. +/// If the underlying regex engine returns an error, then a panic occurs. +/// +/// The type parameters are as follows: +/// +/// * `A` represents the type of the underlying DFA that implements the +/// [`Automaton`] trait. +/// +/// The lifetime parameters are as follows: +/// +/// * `'h` represents the lifetime of the haystack being searched. +/// * `'r` represents the lifetime of the regex object itself. +/// +/// This iterator can be created with the [`Regex::find_iter`] method. +#[derive(Debug)] +pub struct FindMatches<'r, 'h, A> { + re: &'r Regex, + it: iter::Searcher<'h>, +} + +impl<'r, 'h, A: Automaton> Iterator for FindMatches<'r, 'h, A> { + type Item = Match; + + #[inline] + fn next(&mut self) -> Option { + let FindMatches { re, ref mut it } = *self; + it.advance(|input| re.try_search(input)) + } +} + +/// A builder for a regex based on deterministic finite automatons. +/// +/// This builder permits configuring options for the syntax of a pattern, the +/// NFA construction, the DFA construction and finally the regex searching +/// itself. This builder is different from a general purpose regex builder in +/// that it permits fine grain configuration of the construction process. The +/// trade off for this is complexity, and the possibility of setting a +/// configuration that might not make sense. For example, there are two +/// different UTF-8 modes: +/// +/// * [`syntax::Config::utf8`](crate::util::syntax::Config::utf8) controls +/// whether the pattern itself can contain sub-expressions that match invalid +/// UTF-8. +/// * [`thompson::Config::utf8`](crate::nfa::thompson::Config::utf8) controls +/// how the regex iterators themselves advance the starting position of the +/// next search when a match with zero length is found. +/// +/// Generally speaking, callers will want to either enable all of these or +/// disable all of these. +/// +/// Internally, building a regex requires building two DFAs, where one is +/// responsible for finding the end of a match and the other is responsible +/// for finding the start of a match. If you only need to detect whether +/// something matched, or only the end of a match, then you should use a +/// [`dense::Builder`] to construct a single DFA, which is cheaper than +/// building two DFAs. +/// +/// # Build methods +/// +/// This builder has a few "build" methods. In general, it's the result of +/// combining the following parameters: +/// +/// * Building one or many regexes. +/// * Building a regex with dense or sparse DFAs. +/// +/// The simplest "build" method is [`Builder::build`]. It accepts a single +/// pattern and builds a dense DFA using `usize` for the state identifier +/// representation. +/// +/// The most general "build" method is [`Builder::build_many`], which permits +/// building a regex that searches for multiple patterns simultaneously while +/// using a specific state identifier representation. +/// +/// The most flexible "build" method, but hardest to use, is +/// [`Builder::build_from_dfas`]. This exposes the fact that a [`Regex`] is +/// just a pair of DFAs, and this method allows you to specify those DFAs +/// exactly. +/// +/// # Example +/// +/// This example shows how to disable UTF-8 mode in the syntax and the regex +/// itself. This is generally what you want for matching on arbitrary bytes. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{ +/// dfa::regex::Regex, nfa::thompson, util::syntax, Match, +/// }; +/// +/// let re = Regex::builder() +/// .syntax(syntax::Config::new().utf8(false)) +/// .thompson(thompson::Config::new().utf8(false)) +/// .build(r"foo(?-u:[^b])ar.*")?; +/// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; +/// let expected = Some(Match::must(0, 1..9)); +/// let got = re.find(haystack); +/// assert_eq!(expected, got); +/// // Notice that `(?-u:[^b])` matches invalid UTF-8, +/// // but the subsequent `.*` does not! Disabling UTF-8 +/// // on the syntax permits this. +/// assert_eq!(b"foo\xFFarzz", &haystack[got.unwrap().range()]); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Builder { + #[cfg(feature = "dfa-build")] + dfa: dense::Builder, +} + +impl Builder { + /// Create a new regex builder with the default configuration. + pub fn new() -> Builder { + Builder { + #[cfg(feature = "dfa-build")] + dfa: dense::Builder::new(), + } + } + + /// Build a regex from the given pattern. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(all(feature = "syntax", feature = "dfa-build"))] + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Build a regex from the given pattern using sparse DFAs. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(all(feature = "syntax", feature = "dfa-build"))] + pub fn build_sparse( + &self, + pattern: &str, + ) -> Result>>, BuildError> { + self.build_many_sparse(&[pattern]) + } + + /// Build a regex from the given patterns. + #[cfg(all(feature = "syntax", feature = "dfa-build"))] + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let forward = self.dfa.build_many(patterns)?; + let reverse = self + .dfa + .clone() + .configure( + dense::Config::new() + .prefilter(None) + .specialize_start_states(false) + .start_kind(StartKind::Anchored) + .match_kind(MatchKind::All), + ) + .thompson(crate::nfa::thompson::Config::new().reverse(true)) + .build_many(patterns)?; + Ok(self.build_from_dfas(forward, reverse)) + } + + /// Build a sparse regex from the given patterns. + #[cfg(all(feature = "syntax", feature = "dfa-build"))] + pub fn build_many_sparse>( + &self, + patterns: &[P], + ) -> Result>>, BuildError> { + let re = self.build_many(patterns)?; + let forward = re.forward().to_sparse()?; + let reverse = re.reverse().to_sparse()?; + Ok(self.build_from_dfas(forward, reverse)) + } + + /// Build a regex from its component forward and reverse DFAs. + /// + /// This is useful when deserializing a regex from some arbitrary + /// memory region. This is also useful for building regexes from other + /// types of DFAs. + /// + /// If you're building the DFAs from scratch instead of building new DFAs + /// from other DFAs, then you'll need to make sure that the reverse DFA is + /// configured correctly to match the intended semantics. Namely: + /// + /// * It should be anchored. + /// * It should use [`MatchKind::All`] semantics. + /// * It should match in reverse. + /// * Otherwise, its configuration should match the forward DFA. + /// + /// If these conditions aren't satisfied, then the behavior of searches is + /// unspecified. + /// + /// Note that when using this constructor, no configuration is applied. + /// Since this routine provides the DFAs to the builder, there is no + /// opportunity to apply other configuration options. + /// + /// # Example + /// + /// This example is a bit a contrived. The usual use of these methods + /// would involve serializing `initial_re` somewhere and then deserializing + /// it later to build a regex. But in this case, we do everything in + /// memory. + /// + /// ``` + /// use regex_automata::dfa::regex::Regex; + /// + /// let initial_re = Regex::new("foo[0-9]+")?; + /// assert_eq!(true, initial_re.is_match(b"foo123")); + /// + /// let (fwd, rev) = (initial_re.forward(), initial_re.reverse()); + /// let re = Regex::builder().build_from_dfas(fwd, rev); + /// assert_eq!(true, re.is_match(b"foo123")); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// This example shows how to build a `Regex` that uses sparse DFAs instead + /// of dense DFAs without using one of the convenience `build_sparse` + /// routines: + /// + /// ``` + /// use regex_automata::dfa::regex::Regex; + /// + /// let initial_re = Regex::new("foo[0-9]+")?; + /// assert_eq!(true, initial_re.is_match(b"foo123")); + /// + /// let fwd = initial_re.forward().to_sparse()?; + /// let rev = initial_re.reverse().to_sparse()?; + /// let re = Regex::builder().build_from_dfas(fwd, rev); + /// assert_eq!(true, re.is_match(b"foo123")); + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_from_dfas( + &self, + forward: A, + reverse: A, + ) -> Regex { + Regex { forward, reverse } + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + #[cfg(all(feature = "syntax", feature = "dfa-build"))] + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + self.dfa.syntax(config); + self + } + + /// Set the Thompson NFA configuration for this builder using + /// [`nfa::thompson::Config`](crate::nfa::thompson::Config). + /// + /// This permits setting things like whether additional time should be + /// spent shrinking the size of the NFA. + #[cfg(all(feature = "syntax", feature = "dfa-build"))] + pub fn thompson( + &mut self, + config: crate::nfa::thompson::Config, + ) -> &mut Builder { + self.dfa.thompson(config); + self + } + + /// Set the dense DFA compilation configuration for this builder using + /// [`dense::Config`]. + /// + /// This permits setting things like whether the underlying DFAs should + /// be minimized. + #[cfg(feature = "dfa-build")] + pub fn dense(&mut self, config: dense::Config) -> &mut Builder { + self.dfa.configure(config); + self + } +} + +impl Default for Builder { + fn default() -> Builder { + Builder::new() + } +} diff --git a/vendor/regex-automata/src/dfa/remapper.rs b/vendor/regex-automata/src/dfa/remapper.rs new file mode 100644 index 0000000..6e49646 --- /dev/null +++ b/vendor/regex-automata/src/dfa/remapper.rs @@ -0,0 +1,242 @@ +use alloc::vec::Vec; + +use crate::util::primitives::StateID; + +/// Remappable is a tightly coupled abstraction that facilitates remapping +/// state identifiers in DFAs. +/// +/// The main idea behind remapping state IDs is that DFAs often need to check +/// if a certain state is a "special" state of some kind (like a match state) +/// during a search. Since this is extremely perf critical code, we want this +/// check to be as fast as possible. Partitioning state IDs into, for example, +/// into "non-match" and "match" states means one can tell if a state is a +/// match state via a simple comparison of the state ID. +/// +/// The issue is that during the DFA construction process, it's not +/// particularly easy to partition the states. Instead, the simplest thing is +/// to often just do a pass over all of the states and shuffle them into their +/// desired partitionings. To do that, we need a mechanism for swapping states. +/// Hence, this abstraction. +/// +/// Normally, for such little code, I would just duplicate it. But this is a +/// key optimization and the implementation is a bit subtle. So the abstraction +/// is basically a ham-fisted attempt at DRY. The only place we use this is in +/// the dense and one-pass DFAs. +/// +/// See also src/dfa/special.rs for a more detailed explanation of how dense +/// DFAs are partitioned. +pub(super) trait Remappable: core::fmt::Debug { + /// Return the total number of states. + fn state_len(&self) -> usize; + /// Return the power-of-2 exponent that yields the stride. The pertinent + /// laws here are, where N=stride2: 2^N=stride and len(alphabet) <= stride. + fn stride2(&self) -> usize; + /// Swap the states pointed to by the given IDs. The underlying finite + /// state machine should be mutated such that all of the transitions in + /// `id1` are now in the memory region where the transitions for `id2` + /// were, and all of the transitions in `id2` are now in the memory region + /// where the transitions for `id1` were. + /// + /// Essentially, this "moves" `id1` to `id2` and `id2` to `id1`. + /// + /// It is expected that, after calling this, the underlying value will be + /// left in an inconsistent state, since any other transitions pointing to, + /// e.g., `id1` need to be updated to point to `id2`, since that's where + /// `id1` moved to. + /// + /// In order to "fix" the underlying inconsistent state, a `Remapper` + /// should be used to guarantee that `remap` is called at the appropriate + /// time. + fn swap_states(&mut self, id1: StateID, id2: StateID); + /// This must remap every single state ID in the underlying value according + /// to the function given. For example, in a DFA, this should remap every + /// transition and every starting state ID. + fn remap(&mut self, map: impl Fn(StateID) -> StateID); +} + +/// Remapper is an abstraction the manages the remapping of state IDs in a +/// finite state machine. This is useful when one wants to shuffle states into +/// different positions in the machine. +/// +/// One of the key complexities this manages is the ability to correctly move +/// one state multiple times. +/// +/// Once shuffling is complete, `remap` must be called, which will rewrite +/// all pertinent transitions to updated state IDs. Neglecting to call `remap` +/// will almost certainly result in a corrupt machine. +#[derive(Debug)] +pub(super) struct Remapper { + /// A map from the index of a state to its pre-multiplied identifier. + /// + /// When a state is swapped with another, then their corresponding + /// locations in this map are also swapped. Thus, its new position will + /// still point to its old pre-multiplied StateID. + /// + /// While there is a bit more to it, this then allows us to rewrite the + /// state IDs in a DFA's transition table in a single pass. This is done + /// by iterating over every ID in this map, then iterating over each + /// transition for the state at that ID and re-mapping the transition from + /// `old_id` to `map[dfa.to_index(old_id)]`. That is, we find the position + /// in this map where `old_id` *started*, and set it to where it ended up + /// after all swaps have been completed. + map: Vec, + /// A mapper from state index to state ID (and back). + idxmap: IndexMapper, +} + +impl Remapper { + /// Create a new remapper from the given remappable implementation. The + /// remapper can then be used to swap states. The remappable value given + /// here must the same one given to `swap` and `remap`. + pub(super) fn new(r: &impl Remappable) -> Remapper { + let idxmap = IndexMapper { stride2: r.stride2() }; + let map = (0..r.state_len()).map(|i| idxmap.to_state_id(i)).collect(); + Remapper { map, idxmap } + } + + /// Swap two states. Once this is called, callers must follow through to + /// call `remap`, or else it's possible for the underlying remappable + /// value to be in a corrupt state. + pub(super) fn swap( + &mut self, + r: &mut impl Remappable, + id1: StateID, + id2: StateID, + ) { + if id1 == id2 { + return; + } + r.swap_states(id1, id2); + self.map.swap(self.idxmap.to_index(id1), self.idxmap.to_index(id2)); + } + + /// Complete the remapping process by rewriting all state IDs in the + /// remappable value according to the swaps performed. + pub(super) fn remap(mut self, r: &mut impl Remappable) { + // Update the map to account for states that have been swapped + // multiple times. For example, if (A, C) and (C, G) are swapped, then + // transitions previously pointing to A should now point to G. But if + // we don't update our map, they will erroneously be set to C. All we + // do is follow the swaps in our map until we see our original state + // ID. + // + // The intuition here is to think about how changes are made to the + // map: only through pairwise swaps. That means that starting at any + // given state, it is always possible to find the loop back to that + // state by following the swaps represented in the map (which might be + // 0 swaps). + // + // We are also careful to clone the map before starting in order to + // freeze it. We use the frozen map to find our loops, since we need to + // update our map as well. Without freezing it, our updates could break + // the loops referenced above and produce incorrect results. + let oldmap = self.map.clone(); + for i in 0..r.state_len() { + let cur_id = self.idxmap.to_state_id(i); + let mut new_id = oldmap[i]; + if cur_id == new_id { + continue; + } + loop { + let id = oldmap[self.idxmap.to_index(new_id)]; + if cur_id == id { + self.map[i] = new_id; + break; + } + new_id = id; + } + } + r.remap(|next| self.map[self.idxmap.to_index(next)]); + } +} + +/// A simple type for mapping between state indices and state IDs. +/// +/// The reason why this exists is because state IDs are "premultiplied." That +/// is, in order to get to the transitions for a particular state, one need +/// only use the state ID as-is, instead of having to multiple it by transition +/// table's stride. +/// +/// The downside of this is that it's inconvenient to map between state IDs +/// using a dense map, e.g., Vec. That's because state IDs look like +/// `0`, `0+stride`, `0+2*stride`, `0+3*stride`, etc., instead of `0`, `1`, +/// `2`, `3`, etc. +/// +/// Since our state IDs are premultiplied, we can convert back-and-forth +/// between IDs and indices by simply unmultiplying the IDs and multiplying the +/// indices. +#[derive(Debug)] +struct IndexMapper { + /// The power of 2 corresponding to the stride of the corresponding + /// transition table. 'id >> stride2' de-multiplies an ID while 'index << + /// stride2' pre-multiplies an index to an ID. + stride2: usize, +} + +impl IndexMapper { + /// Convert a state ID to a state index. + fn to_index(&self, id: StateID) -> usize { + id.as_usize() >> self.stride2 + } + + /// Convert a state index to a state ID. + fn to_state_id(&self, index: usize) -> StateID { + // CORRECTNESS: If the given index is not valid, then it is not + // required for this to panic or return a valid state ID. We'll "just" + // wind up with panics or silent logic errors at some other point. + StateID::new_unchecked(index << self.stride2) + } +} + +#[cfg(feature = "dfa-build")] +mod dense { + use crate::{dfa::dense::OwnedDFA, util::primitives::StateID}; + + use super::Remappable; + + impl Remappable for OwnedDFA { + fn state_len(&self) -> usize { + OwnedDFA::state_len(self) + } + + fn stride2(&self) -> usize { + OwnedDFA::stride2(self) + } + + fn swap_states(&mut self, id1: StateID, id2: StateID) { + OwnedDFA::swap_states(self, id1, id2) + } + + fn remap(&mut self, map: impl Fn(StateID) -> StateID) { + OwnedDFA::remap(self, map) + } + } +} + +#[cfg(feature = "dfa-onepass")] +mod onepass { + use crate::{dfa::onepass::DFA, util::primitives::StateID}; + + use super::Remappable; + + impl Remappable for DFA { + fn state_len(&self) -> usize { + DFA::state_len(self) + } + + fn stride2(&self) -> usize { + // We don't do pre-multiplication for the one-pass DFA, so + // returning 0 has the effect of making state IDs and state indices + // equivalent. + 0 + } + + fn swap_states(&mut self, id1: StateID, id2: StateID) { + DFA::swap_states(self, id1, id2) + } + + fn remap(&mut self, map: impl Fn(StateID) -> StateID) { + DFA::remap(self, map) + } + } +} diff --git a/vendor/regex-automata/src/dfa/search.rs b/vendor/regex-automata/src/dfa/search.rs new file mode 100644 index 0000000..5a82261 --- /dev/null +++ b/vendor/regex-automata/src/dfa/search.rs @@ -0,0 +1,644 @@ +use crate::{ + dfa::{ + accel, + automaton::{Automaton, OverlappingState}, + }, + util::{ + prefilter::Prefilter, + primitives::StateID, + search::{Anchored, HalfMatch, Input, Span}, + }, + MatchError, +}; + +#[inline(never)] +pub fn find_fwd( + dfa: &A, + input: &Input<'_>, +) -> Result, MatchError> { + if input.is_done() { + return Ok(None); + } + let pre = if input.get_anchored().is_anchored() { + None + } else { + dfa.get_prefilter() + }; + // Searching with a pattern ID is always anchored, so we should never use + // a prefilter. + if pre.is_some() { + if input.get_earliest() { + find_fwd_imp(dfa, input, pre, true) + } else { + find_fwd_imp(dfa, input, pre, false) + } + } else { + if input.get_earliest() { + find_fwd_imp(dfa, input, None, true) + } else { + find_fwd_imp(dfa, input, None, false) + } + } +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn find_fwd_imp( + dfa: &A, + input: &Input<'_>, + pre: Option<&'_ Prefilter>, + earliest: bool, +) -> Result, MatchError> { + // See 'prefilter_restart' docs for explanation. + let universal_start = dfa.universal_start_state(Anchored::No).is_some(); + let mut mat = None; + let mut sid = init_fwd(dfa, input)?; + let mut at = input.start(); + // This could just be a closure, but then I think it would be unsound + // because it would need to be safe to invoke. This way, the lack of safety + // is clearer in the code below. + macro_rules! next_unchecked { + ($sid:expr, $at:expr) => {{ + let byte = *input.haystack().get_unchecked($at); + dfa.next_state_unchecked($sid, byte) + }}; + } + + if let Some(ref pre) = pre { + let span = Span::from(at..input.end()); + // If a prefilter doesn't report false positives, then we don't need to + // touch the DFA at all. However, since all matches include the pattern + // ID, and the prefilter infrastructure doesn't report pattern IDs, we + // limit this optimization to cases where there is exactly one pattern. + // In that case, any match must be the 0th pattern. + match pre.find(input.haystack(), span) { + None => return Ok(mat), + Some(ref span) => { + at = span.start; + if !universal_start { + sid = prefilter_restart(dfa, &input, at)?; + } + } + } + } + while at < input.end() { + // SAFETY: There are two safety invariants we need to uphold here in + // the loops below: that 'sid' and 'prev_sid' are valid state IDs + // for this DFA, and that 'at' is a valid index into 'haystack'. + // For the former, we rely on the invariant that next_state* and + // start_state_forward always returns a valid state ID (given a valid + // state ID in the former case). For the latter safety invariant, we + // always guard unchecked access with a check that 'at' is less than + // 'end', where 'end <= haystack.len()'. In the unrolled loop below, we + // ensure that 'at' is always in bounds. + // + // PERF: See a similar comment in src/hybrid/search.rs that justifies + // this extra work to make the search loop fast. The same reasoning and + // benchmarks apply here. + let mut prev_sid; + while at < input.end() { + prev_sid = unsafe { next_unchecked!(sid, at) }; + if dfa.is_special_state(prev_sid) || at + 3 >= input.end() { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at += 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if dfa.is_special_state(sid) { + break; + } + at += 1; + + prev_sid = unsafe { next_unchecked!(sid, at) }; + if dfa.is_special_state(prev_sid) { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at += 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if dfa.is_special_state(sid) { + break; + } + at += 1; + } + if dfa.is_special_state(sid) { + if dfa.is_start_state(sid) { + if let Some(ref pre) = pre { + let span = Span::from(at..input.end()); + match pre.find(input.haystack(), span) { + None => return Ok(mat), + Some(ref span) => { + // We want to skip any update to 'at' below + // at the end of this iteration and just + // jump immediately back to the next state + // transition at the leading position of the + // candidate match. + // + // ... but only if we actually made progress + // with our prefilter, otherwise if the start + // state has a self-loop, we can get stuck. + if span.start > at { + at = span.start; + if !universal_start { + sid = prefilter_restart(dfa, &input, at)?; + } + continue; + } + } + } + } else if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + at = accel::find_fwd(needles, input.haystack(), at + 1) + .unwrap_or(input.end()); + continue; + } + } else if dfa.is_match_state(sid) { + let pattern = dfa.match_pattern(sid, 0); + mat = Some(HalfMatch::new(pattern, at)); + if earliest { + return Ok(mat); + } + if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + at = accel::find_fwd(needles, input.haystack(), at + 1) + .unwrap_or(input.end()); + continue; + } + } else if dfa.is_accel_state(sid) { + let needs = dfa.accelerator(sid); + at = accel::find_fwd(needs, input.haystack(), at + 1) + .unwrap_or(input.end()); + continue; + } else if dfa.is_dead_state(sid) { + return Ok(mat); + } else { + // It's important that this is a debug_assert, since this can + // actually be tripped even if DFA::from_bytes succeeds and + // returns a supposedly valid DFA. + return Err(MatchError::quit(input.haystack()[at], at)); + } + } + at += 1; + } + eoi_fwd(dfa, input, &mut sid, &mut mat)?; + Ok(mat) +} + +#[inline(never)] +pub fn find_rev( + dfa: &A, + input: &Input<'_>, +) -> Result, MatchError> { + if input.is_done() { + return Ok(None); + } + if input.get_earliest() { + find_rev_imp(dfa, input, true) + } else { + find_rev_imp(dfa, input, false) + } +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn find_rev_imp( + dfa: &A, + input: &Input<'_>, + earliest: bool, +) -> Result, MatchError> { + let mut mat = None; + let mut sid = init_rev(dfa, input)?; + // In reverse search, the loop below can't handle the case of searching an + // empty slice. Ideally we could write something congruent to the forward + // search, i.e., 'while at >= start', but 'start' might be 0. Since we use + // an unsigned offset, 'at >= 0' is trivially always true. We could avoid + // this extra case handling by using a signed offset, but Rust makes it + // annoying to do. So... We just handle the empty case separately. + if input.start() == input.end() { + eoi_rev(dfa, input, &mut sid, &mut mat)?; + return Ok(mat); + } + + let mut at = input.end() - 1; + macro_rules! next_unchecked { + ($sid:expr, $at:expr) => {{ + let byte = *input.haystack().get_unchecked($at); + dfa.next_state_unchecked($sid, byte) + }}; + } + loop { + // SAFETY: See comments in 'find_fwd' for a safety argument. + let mut prev_sid; + while at >= input.start() { + prev_sid = unsafe { next_unchecked!(sid, at) }; + if dfa.is_special_state(prev_sid) + || at <= input.start().saturating_add(3) + { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at -= 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if dfa.is_special_state(sid) { + break; + } + at -= 1; + + prev_sid = unsafe { next_unchecked!(sid, at) }; + if dfa.is_special_state(prev_sid) { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at -= 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if dfa.is_special_state(sid) { + break; + } + at -= 1; + } + if dfa.is_special_state(sid) { + if dfa.is_start_state(sid) { + if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + at = accel::find_rev(needles, input.haystack(), at) + .map(|i| i + 1) + .unwrap_or(input.start()); + } + } else if dfa.is_match_state(sid) { + let pattern = dfa.match_pattern(sid, 0); + // Since reverse searches report the beginning of a match + // and the beginning is inclusive (not exclusive like the + // end of a match), we add 1 to make it inclusive. + mat = Some(HalfMatch::new(pattern, at + 1)); + if earliest { + return Ok(mat); + } + if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + at = accel::find_rev(needles, input.haystack(), at) + .map(|i| i + 1) + .unwrap_or(input.start()); + } + } else if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + // If the accelerator returns nothing, why don't we quit the + // search? Well, if the accelerator doesn't find anything, that + // doesn't mean we don't have a match. It just means that we + // can't leave the current state given one of the 255 possible + // byte values. However, there might be an EOI transition. So + // we set 'at' to the end of the haystack, which will cause + // this loop to stop and fall down into the EOI transition. + at = accel::find_rev(needles, input.haystack(), at) + .map(|i| i + 1) + .unwrap_or(input.start()); + } else if dfa.is_dead_state(sid) { + return Ok(mat); + } else { + return Err(MatchError::quit(input.haystack()[at], at)); + } + } + if at == input.start() { + break; + } + at -= 1; + } + eoi_rev(dfa, input, &mut sid, &mut mat)?; + Ok(mat) +} + +#[inline(never)] +pub fn find_overlapping_fwd( + dfa: &A, + input: &Input<'_>, + state: &mut OverlappingState, +) -> Result<(), MatchError> { + state.mat = None; + if input.is_done() { + return Ok(()); + } + let pre = if input.get_anchored().is_anchored() { + None + } else { + dfa.get_prefilter() + }; + if pre.is_some() { + find_overlapping_fwd_imp(dfa, input, pre, state) + } else { + find_overlapping_fwd_imp(dfa, input, None, state) + } +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn find_overlapping_fwd_imp( + dfa: &A, + input: &Input<'_>, + pre: Option<&'_ Prefilter>, + state: &mut OverlappingState, +) -> Result<(), MatchError> { + // See 'prefilter_restart' docs for explanation. + let universal_start = dfa.universal_start_state(Anchored::No).is_some(); + let mut sid = match state.id { + None => { + state.at = input.start(); + init_fwd(dfa, input)? + } + Some(sid) => { + if let Some(match_index) = state.next_match_index { + let match_len = dfa.match_len(sid); + if match_index < match_len { + state.next_match_index = Some(match_index + 1); + let pattern = dfa.match_pattern(sid, match_index); + state.mat = Some(HalfMatch::new(pattern, state.at)); + return Ok(()); + } + } + // Once we've reported all matches at a given position, we need to + // advance the search to the next position. + state.at += 1; + if state.at > input.end() { + return Ok(()); + } + sid + } + }; + + // NOTE: We don't optimize the crap out of this routine primarily because + // it seems like most find_overlapping searches will have higher match + // counts, and thus, throughput is perhaps not as important. But if you + // have a use case for something faster, feel free to file an issue. + while state.at < input.end() { + sid = dfa.next_state(sid, input.haystack()[state.at]); + if dfa.is_special_state(sid) { + state.id = Some(sid); + if dfa.is_start_state(sid) { + if let Some(ref pre) = pre { + let span = Span::from(state.at..input.end()); + match pre.find(input.haystack(), span) { + None => return Ok(()), + Some(ref span) => { + if span.start > state.at { + state.at = span.start; + if !universal_start { + sid = prefilter_restart( + dfa, &input, state.at, + )?; + } + continue; + } + } + } + } else if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + state.at = accel::find_fwd( + needles, + input.haystack(), + state.at + 1, + ) + .unwrap_or(input.end()); + continue; + } + } else if dfa.is_match_state(sid) { + state.next_match_index = Some(1); + let pattern = dfa.match_pattern(sid, 0); + state.mat = Some(HalfMatch::new(pattern, state.at)); + return Ok(()); + } else if dfa.is_accel_state(sid) { + let needs = dfa.accelerator(sid); + // If the accelerator returns nothing, why don't we quit the + // search? Well, if the accelerator doesn't find anything, that + // doesn't mean we don't have a match. It just means that we + // can't leave the current state given one of the 255 possible + // byte values. However, there might be an EOI transition. So + // we set 'at' to the end of the haystack, which will cause + // this loop to stop and fall down into the EOI transition. + state.at = + accel::find_fwd(needs, input.haystack(), state.at + 1) + .unwrap_or(input.end()); + continue; + } else if dfa.is_dead_state(sid) { + return Ok(()); + } else { + return Err(MatchError::quit( + input.haystack()[state.at], + state.at, + )); + } + } + state.at += 1; + } + + let result = eoi_fwd(dfa, input, &mut sid, &mut state.mat); + state.id = Some(sid); + if state.mat.is_some() { + // '1' is always correct here since if we get to this point, this + // always corresponds to the first (index '0') match discovered at + // this position. So the next match to report at this position (if + // it exists) is at index '1'. + state.next_match_index = Some(1); + } + result +} + +#[inline(never)] +pub(crate) fn find_overlapping_rev( + dfa: &A, + input: &Input<'_>, + state: &mut OverlappingState, +) -> Result<(), MatchError> { + state.mat = None; + if input.is_done() { + return Ok(()); + } + let mut sid = match state.id { + None => { + let sid = init_rev(dfa, input)?; + state.id = Some(sid); + if input.start() == input.end() { + state.rev_eoi = true; + } else { + state.at = input.end() - 1; + } + sid + } + Some(sid) => { + if let Some(match_index) = state.next_match_index { + let match_len = dfa.match_len(sid); + if match_index < match_len { + state.next_match_index = Some(match_index + 1); + let pattern = dfa.match_pattern(sid, match_index); + state.mat = Some(HalfMatch::new(pattern, state.at)); + return Ok(()); + } + } + // Once we've reported all matches at a given position, we need + // to advance the search to the next position. However, if we've + // already followed the EOI transition, then we know we're done + // with the search and there cannot be any more matches to report. + if state.rev_eoi { + return Ok(()); + } else if state.at == input.start() { + // At this point, we should follow the EOI transition. This + // will cause us the skip the main loop below and fall through + // to the final 'eoi_rev' transition. + state.rev_eoi = true; + } else { + // We haven't hit the end of the search yet, so move on. + state.at -= 1; + } + sid + } + }; + while !state.rev_eoi { + sid = dfa.next_state(sid, input.haystack()[state.at]); + if dfa.is_special_state(sid) { + state.id = Some(sid); + if dfa.is_start_state(sid) { + if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + state.at = + accel::find_rev(needles, input.haystack(), state.at) + .map(|i| i + 1) + .unwrap_or(input.start()); + } + } else if dfa.is_match_state(sid) { + state.next_match_index = Some(1); + let pattern = dfa.match_pattern(sid, 0); + state.mat = Some(HalfMatch::new(pattern, state.at + 1)); + return Ok(()); + } else if dfa.is_accel_state(sid) { + let needles = dfa.accelerator(sid); + // If the accelerator returns nothing, why don't we quit the + // search? Well, if the accelerator doesn't find anything, that + // doesn't mean we don't have a match. It just means that we + // can't leave the current state given one of the 255 possible + // byte values. However, there might be an EOI transition. So + // we set 'at' to the end of the haystack, which will cause + // this loop to stop and fall down into the EOI transition. + state.at = + accel::find_rev(needles, input.haystack(), state.at) + .map(|i| i + 1) + .unwrap_or(input.start()); + } else if dfa.is_dead_state(sid) { + return Ok(()); + } else { + return Err(MatchError::quit( + input.haystack()[state.at], + state.at, + )); + } + } + if state.at == input.start() { + break; + } + state.at -= 1; + } + + let result = eoi_rev(dfa, input, &mut sid, &mut state.mat); + state.rev_eoi = true; + state.id = Some(sid); + if state.mat.is_some() { + // '1' is always correct here since if we get to this point, this + // always corresponds to the first (index '0') match discovered at + // this position. So the next match to report at this position (if + // it exists) is at index '1'. + state.next_match_index = Some(1); + } + result +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn init_fwd( + dfa: &A, + input: &Input<'_>, +) -> Result { + let sid = dfa.start_state_forward(input)?; + // Start states can never be match states, since all matches are delayed + // by 1 byte. + debug_assert!(!dfa.is_match_state(sid)); + Ok(sid) +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn init_rev( + dfa: &A, + input: &Input<'_>, +) -> Result { + let sid = dfa.start_state_reverse(input)?; + // Start states can never be match states, since all matches are delayed + // by 1 byte. + debug_assert!(!dfa.is_match_state(sid)); + Ok(sid) +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn eoi_fwd( + dfa: &A, + input: &Input<'_>, + sid: &mut StateID, + mat: &mut Option, +) -> Result<(), MatchError> { + let sp = input.get_span(); + match input.haystack().get(sp.end) { + Some(&b) => { + *sid = dfa.next_state(*sid, b); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.end)); + } else if dfa.is_quit_state(*sid) { + return Err(MatchError::quit(b, sp.end)); + } + } + None => { + *sid = dfa.next_eoi_state(*sid); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, input.haystack().len())); + } + } + } + Ok(()) +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn eoi_rev( + dfa: &A, + input: &Input<'_>, + sid: &mut StateID, + mat: &mut Option, +) -> Result<(), MatchError> { + let sp = input.get_span(); + if sp.start > 0 { + let byte = input.haystack()[sp.start - 1]; + *sid = dfa.next_state(*sid, byte); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.start)); + } else if dfa.is_quit_state(*sid) { + return Err(MatchError::quit(byte, sp.start - 1)); + } + } else { + *sid = dfa.next_eoi_state(*sid); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, 0)); + } + } + Ok(()) +} + +/// Re-compute the starting state that a DFA should be in after finding a +/// prefilter candidate match at the position `at`. +/// +/// The function with the same name has a bit more docs in hybrid/search.rs. +#[cfg_attr(feature = "perf-inline", inline(always))] +fn prefilter_restart( + dfa: &A, + input: &Input<'_>, + at: usize, +) -> Result { + let mut input = input.clone(); + input.set_start(at); + init_fwd(dfa, &input) +} diff --git a/vendor/regex-automata/src/dfa/sparse.rs b/vendor/regex-automata/src/dfa/sparse.rs new file mode 100644 index 0000000..d461e0a --- /dev/null +++ b/vendor/regex-automata/src/dfa/sparse.rs @@ -0,0 +1,2639 @@ +/*! +Types and routines specific to sparse DFAs. + +This module is the home of [`sparse::DFA`](DFA). + +Unlike the [`dense`] module, this module does not contain a builder or +configuration specific for sparse DFAs. Instead, the intended way to build a +sparse DFA is either by using a default configuration with its constructor +[`sparse::DFA::new`](DFA::new), or by first configuring the construction of a +dense DFA with [`dense::Builder`] and then calling [`dense::DFA::to_sparse`]. +For example, this configures a sparse DFA to do an overlapping search: + +``` +use regex_automata::{ + dfa::{Automaton, OverlappingState, dense}, + HalfMatch, Input, MatchKind, +}; + +let dense_re = dense::Builder::new() + .configure(dense::Config::new().match_kind(MatchKind::All)) + .build(r"Samwise|Sam")?; +let sparse_re = dense_re.to_sparse()?; + +// Setup our haystack and initial start state. +let input = Input::new("Samwise"); +let mut state = OverlappingState::start(); + +// First, 'Sam' will match. +sparse_re.try_search_overlapping_fwd(&input, &mut state)?; +assert_eq!(Some(HalfMatch::must(0, 3)), state.get_match()); + +// And now 'Samwise' will match. +sparse_re.try_search_overlapping_fwd(&input, &mut state)?; +assert_eq!(Some(HalfMatch::must(0, 7)), state.get_match()); +# Ok::<(), Box>(()) +``` +*/ + +#[cfg(feature = "dfa-build")] +use core::iter; +use core::{ + convert::{TryFrom, TryInto}, + fmt, + mem::size_of, +}; + +#[cfg(feature = "dfa-build")] +use alloc::{vec, vec::Vec}; + +#[cfg(feature = "dfa-build")] +use crate::dfa::dense::{self, BuildError}; +use crate::{ + dfa::{ + automaton::{fmt_state_indicator, Automaton, StartError}, + dense::Flags, + special::Special, + StartKind, DEAD, + }, + util::{ + alphabet::{ByteClasses, ByteSet}, + escape::DebugByte, + int::{Pointer, Usize, U16, U32}, + prefilter::Prefilter, + primitives::{PatternID, StateID}, + search::Anchored, + start::{self, Start, StartByteMap}, + wire::{self, DeserializeError, Endian, SerializeError}, + }, +}; + +const LABEL: &str = "rust-regex-automata-dfa-sparse"; +const VERSION: u32 = 2; + +/// A sparse deterministic finite automaton (DFA) with variable sized states. +/// +/// In contrast to a [dense::DFA], a sparse DFA uses a more space efficient +/// representation for its transitions. Consequently, sparse DFAs may use much +/// less memory than dense DFAs, but this comes at a price. In particular, +/// reading the more space efficient transitions takes more work, and +/// consequently, searching using a sparse DFA is typically slower than a dense +/// DFA. +/// +/// A sparse DFA can be built using the default configuration via the +/// [`DFA::new`] constructor. Otherwise, one can configure various aspects of a +/// dense DFA via [`dense::Builder`], and then convert a dense DFA to a sparse +/// DFA using [`dense::DFA::to_sparse`]. +/// +/// In general, a sparse DFA supports all the same search operations as a dense +/// DFA. +/// +/// Making the choice between a dense and sparse DFA depends on your specific +/// work load. If you can sacrifice a bit of search time performance, then a +/// sparse DFA might be the best choice. In particular, while sparse DFAs are +/// probably always slower than dense DFAs, you may find that they are easily +/// fast enough for your purposes! +/// +/// # Type parameters +/// +/// A `DFA` has one type parameter, `T`, which is used to represent the parts +/// of a sparse DFA. `T` is typically a `Vec` or a `&[u8]`. +/// +/// # The `Automaton` trait +/// +/// This type implements the [`Automaton`] trait, which means it can be used +/// for searching. For example: +/// +/// ``` +/// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; +/// +/// let dfa = DFA::new("foo[0-9]+")?; +/// let expected = Some(HalfMatch::must(0, 8)); +/// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone)] +pub struct DFA { + // When compared to a dense DFA, a sparse DFA *looks* a lot simpler + // representation-wise. In reality, it is perhaps more complicated. Namely, + // in a dense DFA, all information needs to be very cheaply accessible + // using only state IDs. In a sparse DFA however, each state uses a + // variable amount of space because each state encodes more information + // than just its transitions. Each state also includes an accelerator if + // one exists, along with the matching pattern IDs if the state is a match + // state. + // + // That is, a lot of the complexity is pushed down into how each state + // itself is represented. + tt: Transitions, + st: StartTable, + special: Special, + pre: Option, + quitset: ByteSet, + flags: Flags, +} + +#[cfg(feature = "dfa-build")] +impl DFA> { + /// Parse the given regular expression using a default configuration and + /// return the corresponding sparse DFA. + /// + /// If you want a non-default configuration, then use the + /// [`dense::Builder`] to set your own configuration, and then call + /// [`dense::DFA::to_sparse`] to create a sparse DFA. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse}, HalfMatch, Input}; + /// + /// let dfa = sparse::DFA::new("foo[0-9]+bar")?; + /// + /// let expected = Some(HalfMatch::must(0, 11)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345bar"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new(pattern: &str) -> Result>, BuildError> { + dense::Builder::new() + .build(pattern) + .and_then(|dense| dense.to_sparse()) + } + + /// Parse the given regular expressions using a default configuration and + /// return the corresponding multi-DFA. + /// + /// If you want a non-default configuration, then use the + /// [`dense::Builder`] to set your own configuration, and then call + /// [`dense::DFA::to_sparse`] to create a sparse DFA. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse}, HalfMatch, Input}; + /// + /// let dfa = sparse::DFA::new_many(&["[0-9]+", "[a-z]+"])?; + /// let expected = Some(HalfMatch::must(1, 3)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345bar"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new_many>( + patterns: &[P], + ) -> Result>, BuildError> { + dense::Builder::new() + .build_many(patterns) + .and_then(|dense| dense.to_sparse()) + } +} + +#[cfg(feature = "dfa-build")] +impl DFA> { + /// Create a new DFA that matches every input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// dfa::{Automaton, sparse}, + /// HalfMatch, Input, + /// }; + /// + /// let dfa = sparse::DFA::always_match()?; + /// + /// let expected = Some(HalfMatch::must(0, 0)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new(""))?); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn always_match() -> Result>, BuildError> { + dense::DFA::always_match()?.to_sparse() + } + + /// Create a new sparse DFA that never matches any input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse}, Input}; + /// + /// let dfa = sparse::DFA::never_match()?; + /// assert_eq!(None, dfa.try_search_fwd(&Input::new(""))?); + /// assert_eq!(None, dfa.try_search_fwd(&Input::new("foo"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn never_match() -> Result>, BuildError> { + dense::DFA::never_match()?.to_sparse() + } + + /// The implementation for constructing a sparse DFA from a dense DFA. + pub(crate) fn from_dense>( + dfa: &dense::DFA, + ) -> Result>, BuildError> { + // In order to build the transition table, we need to be able to write + // state identifiers for each of the "next" transitions in each state. + // Our state identifiers correspond to the byte offset in the + // transition table at which the state is encoded. Therefore, we do not + // actually know what the state identifiers are until we've allocated + // exactly as much space as we need for each state. Thus, construction + // of the transition table happens in two passes. + // + // In the first pass, we fill out the shell of each state, which + // includes the transition length, the input byte ranges and + // zero-filled space for the transitions and accelerators, if present. + // In this first pass, we also build up a map from the state identifier + // index of the dense DFA to the state identifier in this sparse DFA. + // + // In the second pass, we fill in the transitions based on the map + // built in the first pass. + + // The capacity given here reflects a minimum. (Well, the true minimum + // is likely even bigger, but hopefully this saves a few reallocs.) + let mut sparse = Vec::with_capacity(StateID::SIZE * dfa.state_len()); + // This maps state indices from the dense DFA to StateIDs in the sparse + // DFA. We build out this map on the first pass, and then use it in the + // second pass to back-fill our transitions. + let mut remap: Vec = vec![DEAD; dfa.state_len()]; + for state in dfa.states() { + let pos = sparse.len(); + + remap[dfa.to_index(state.id())] = StateID::new(pos) + .map_err(|_| BuildError::too_many_states())?; + // zero-filled space for the transition length + sparse.push(0); + sparse.push(0); + + let mut transition_len = 0; + for (unit1, unit2, _) in state.sparse_transitions() { + match (unit1.as_u8(), unit2.as_u8()) { + (Some(b1), Some(b2)) => { + transition_len += 1; + sparse.push(b1); + sparse.push(b2); + } + (None, None) => {} + (Some(_), None) | (None, Some(_)) => { + // can never occur because sparse_transitions never + // groups EOI with any other transition. + unreachable!() + } + } + } + // Add dummy EOI transition. This is never actually read while + // searching, but having space equivalent to the total number + // of transitions is convenient. Otherwise, we'd need to track + // a different number of transitions for the byte ranges as for + // the 'next' states. + // + // N.B. The loop above is not guaranteed to yield the EOI + // transition, since it may point to a DEAD state. By putting + // it here, we always write the EOI transition, and thus + // guarantee that our transition length is >0. Why do we always + // need the EOI transition? Because in order to implement + // Automaton::next_eoi_state, this lets us just ask for the last + // transition. There are probably other/better ways to do this. + transition_len += 1; + sparse.push(0); + sparse.push(0); + + // Check some assumptions about transition length. + assert_ne!( + transition_len, 0, + "transition length should be non-zero", + ); + assert!( + transition_len <= 257, + "expected transition length {} to be <= 257", + transition_len, + ); + + // Fill in the transition length. + // Since transition length is always <= 257, we use the most + // significant bit to indicate whether this is a match state or + // not. + let ntrans = if dfa.is_match_state(state.id()) { + transition_len | (1 << 15) + } else { + transition_len + }; + wire::NE::write_u16(ntrans, &mut sparse[pos..]); + + // zero-fill the actual transitions. + // Unwraps are OK since transition_length <= 257 and our minimum + // support usize size is 16-bits. + let zeros = usize::try_from(transition_len) + .unwrap() + .checked_mul(StateID::SIZE) + .unwrap(); + sparse.extend(iter::repeat(0).take(zeros)); + + // If this is a match state, write the pattern IDs matched by this + // state. + if dfa.is_match_state(state.id()) { + let plen = dfa.match_pattern_len(state.id()); + // Write the actual pattern IDs with a u32 length prefix. + // First, zero-fill space. + let mut pos = sparse.len(); + // Unwraps are OK since it's guaranteed that plen <= + // PatternID::LIMIT, which is in turn guaranteed to fit into a + // u32. + let zeros = size_of::() + .checked_mul(plen) + .unwrap() + .checked_add(size_of::()) + .unwrap(); + sparse.extend(iter::repeat(0).take(zeros)); + + // Now write the length prefix. + wire::NE::write_u32( + // Will never fail since u32::MAX is invalid pattern ID. + // Thus, the number of pattern IDs is representable by a + // u32. + plen.try_into().expect("pattern ID length fits in u32"), + &mut sparse[pos..], + ); + pos += size_of::(); + + // Now write the pattern IDs. + for &pid in dfa.pattern_id_slice(state.id()) { + pos += wire::write_pattern_id::( + pid, + &mut sparse[pos..], + ); + } + } + + // And now add the accelerator, if one exists. An accelerator is + // at most 4 bytes and at least 1 byte. The first byte is the + // length, N. N bytes follow the length. The set of bytes that + // follow correspond (exhaustively) to the bytes that must be seen + // to leave this state. + let accel = dfa.accelerator(state.id()); + sparse.push(accel.len().try_into().unwrap()); + sparse.extend_from_slice(accel); + } + + let mut new = DFA { + tt: Transitions { + sparse, + classes: dfa.byte_classes().clone(), + state_len: dfa.state_len(), + pattern_len: dfa.pattern_len(), + }, + st: StartTable::from_dense_dfa(dfa, &remap)?, + special: dfa.special().remap(|id| remap[dfa.to_index(id)]), + pre: dfa.get_prefilter().map(|p| p.clone()), + quitset: dfa.quitset().clone(), + flags: dfa.flags().clone(), + }; + // And here's our second pass. Iterate over all of the dense states + // again, and update the transitions in each of the states in the + // sparse DFA. + for old_state in dfa.states() { + let new_id = remap[dfa.to_index(old_state.id())]; + let mut new_state = new.tt.state_mut(new_id); + let sparse = old_state.sparse_transitions(); + for (i, (_, _, next)) in sparse.enumerate() { + let next = remap[dfa.to_index(next)]; + new_state.set_next_at(i, next); + } + } + debug!( + "created sparse DFA, memory usage: {} (dense memory usage: {})", + new.memory_usage(), + dfa.memory_usage(), + ); + Ok(new) + } +} + +impl> DFA { + /// Cheaply return a borrowed version of this sparse DFA. Specifically, the + /// DFA returned always uses `&[u8]` for its transitions. + pub fn as_ref<'a>(&'a self) -> DFA<&'a [u8]> { + DFA { + tt: self.tt.as_ref(), + st: self.st.as_ref(), + special: self.special, + pre: self.pre.clone(), + quitset: self.quitset, + flags: self.flags, + } + } + + /// Return an owned version of this sparse DFA. Specifically, the DFA + /// returned always uses `Vec` for its transitions. + /// + /// Effectively, this returns a sparse DFA whose transitions live on the + /// heap. + #[cfg(feature = "alloc")] + pub fn to_owned(&self) -> DFA> { + DFA { + tt: self.tt.to_owned(), + st: self.st.to_owned(), + special: self.special, + pre: self.pre.clone(), + quitset: self.quitset, + flags: self.flags, + } + } + + /// Returns the starting state configuration for this DFA. + /// + /// The default is [`StartKind::Both`], which means the DFA supports both + /// unanchored and anchored searches. However, this can generally lead to + /// bigger DFAs. Therefore, a DFA might be compiled with support for just + /// unanchored or anchored searches. In that case, running a search with + /// an unsupported configuration will panic. + pub fn start_kind(&self) -> StartKind { + self.st.kind + } + + /// Returns true only if this DFA has starting states for each pattern. + /// + /// When a DFA has starting states for each pattern, then a search with the + /// DFA can be configured to only look for anchored matches of a specific + /// pattern. Specifically, APIs like [`Automaton::try_search_fwd`] can + /// accept a [`Anchored::Pattern`] if and only if this method returns true. + /// Otherwise, an error will be returned. + /// + /// Note that if the DFA is empty, this always returns false. + pub fn starts_for_each_pattern(&self) -> bool { + self.st.pattern_len.is_some() + } + + /// Returns the equivalence classes that make up the alphabet for this DFA. + /// + /// Unless [`dense::Config::byte_classes`] was disabled, it is possible + /// that multiple distinct bytes are grouped into the same equivalence + /// class if it is impossible for them to discriminate between a match and + /// a non-match. This has the effect of reducing the overall alphabet size + /// and in turn potentially substantially reducing the size of the DFA's + /// transition table. + /// + /// The downside of using equivalence classes like this is that every state + /// transition will automatically use this map to convert an arbitrary + /// byte to its corresponding equivalence class. In practice this has a + /// negligible impact on performance. + pub fn byte_classes(&self) -> &ByteClasses { + &self.tt.classes + } + + /// Returns the memory usage, in bytes, of this DFA. + /// + /// The memory usage is computed based on the number of bytes used to + /// represent this DFA. + /// + /// This does **not** include the stack size used up by this DFA. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + self.tt.memory_usage() + self.st.memory_usage() + } +} + +/// Routines for converting a sparse DFA to other representations, such as raw +/// bytes suitable for persistent storage. +impl> DFA { + /// Serialize this DFA as raw bytes to a `Vec` in little endian + /// format. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// Note that unlike a [`dense::DFA`]'s serialization methods, this does + /// not add any initial padding to the returned bytes. Padding isn't + /// required for sparse DFAs since they have no alignment requirements. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // N.B. We use native endianness here to make the example work, but + /// // using to_bytes_little_endian would work on a little endian target. + /// let buf = original_dfa.to_bytes_native_endian(); + /// // Even if buf has initial padding, DFA::from_bytes will automatically + /// // ignore it. + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "dfa-build")] + pub fn to_bytes_little_endian(&self) -> Vec { + self.to_bytes::() + } + + /// Serialize this DFA as raw bytes to a `Vec` in big endian + /// format. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// Note that unlike a [`dense::DFA`]'s serialization methods, this does + /// not add any initial padding to the returned bytes. Padding isn't + /// required for sparse DFAs since they have no alignment requirements. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // N.B. We use native endianness here to make the example work, but + /// // using to_bytes_big_endian would work on a big endian target. + /// let buf = original_dfa.to_bytes_native_endian(); + /// // Even if buf has initial padding, DFA::from_bytes will automatically + /// // ignore it. + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "dfa-build")] + pub fn to_bytes_big_endian(&self) -> Vec { + self.to_bytes::() + } + + /// Serialize this DFA as raw bytes to a `Vec` in native endian + /// format. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// Note that unlike a [`dense::DFA`]'s serialization methods, this does + /// not add any initial padding to the returned bytes. Padding isn't + /// required for sparse DFAs since they have no alignment requirements. + /// + /// Generally speaking, native endian format should only be used when + /// you know that the target you're compiling the DFA for matches the + /// endianness of the target on which you're compiling DFA. For example, + /// if serialization and deserialization happen in the same process or on + /// the same machine. Otherwise, when serializing a DFA for use in a + /// portable environment, you'll almost certainly want to serialize _both_ + /// a little endian and a big endian version and then load the correct one + /// based on the target's configuration. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA: + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// let buf = original_dfa.to_bytes_native_endian(); + /// // Even if buf has initial padding, DFA::from_bytes will automatically + /// // ignore it. + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "dfa-build")] + pub fn to_bytes_native_endian(&self) -> Vec { + self.to_bytes::() + } + + /// The implementation of the public `to_bytes` serialization methods, + /// which is generic over endianness. + #[cfg(feature = "dfa-build")] + fn to_bytes(&self) -> Vec { + let mut buf = vec![0; self.write_to_len()]; + // This should always succeed since the only possible serialization + // error is providing a buffer that's too small, but we've ensured that + // `buf` is big enough here. + self.write_to::(&mut buf).unwrap(); + buf + } + + /// Serialize this DFA as raw bytes to the given slice, in little endian + /// format. Upon success, the total number of bytes written to `dst` is + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// # Errors + /// + /// This returns an error if the given destination slice is not big enough + /// to contain the full serialized DFA. If an error occurs, then nothing + /// is written to `dst`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA without + /// dynamic memory allocation. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // Create a 4KB buffer on the stack to store our serialized DFA. + /// let mut buf = [0u8; 4 * (1<<10)]; + /// // N.B. We use native endianness here to make the example work, but + /// // using write_to_little_endian would work on a little endian target. + /// let written = original_dfa.write_to_native_endian(&mut buf)?; + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf[..written])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn write_to_little_endian( + &self, + dst: &mut [u8], + ) -> Result { + self.write_to::(dst) + } + + /// Serialize this DFA as raw bytes to the given slice, in big endian + /// format. Upon success, the total number of bytes written to `dst` is + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// # Errors + /// + /// This returns an error if the given destination slice is not big enough + /// to contain the full serialized DFA. If an error occurs, then nothing + /// is written to `dst`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA without + /// dynamic memory allocation. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // Create a 4KB buffer on the stack to store our serialized DFA. + /// let mut buf = [0u8; 4 * (1<<10)]; + /// // N.B. We use native endianness here to make the example work, but + /// // using write_to_big_endian would work on a big endian target. + /// let written = original_dfa.write_to_native_endian(&mut buf)?; + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf[..written])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn write_to_big_endian( + &self, + dst: &mut [u8], + ) -> Result { + self.write_to::(dst) + } + + /// Serialize this DFA as raw bytes to the given slice, in native endian + /// format. Upon success, the total number of bytes written to `dst` is + /// returned. + /// + /// The written bytes are guaranteed to be deserialized correctly and + /// without errors in a semver compatible release of this crate by a + /// `DFA`'s deserialization APIs (assuming all other criteria for the + /// deserialization APIs has been satisfied): + /// + /// * [`DFA::from_bytes`] + /// * [`DFA::from_bytes_unchecked`] + /// + /// Generally speaking, native endian format should only be used when + /// you know that the target you're compiling the DFA for matches the + /// endianness of the target on which you're compiling DFA. For example, + /// if serialization and deserialization happen in the same process or on + /// the same machine. Otherwise, when serializing a DFA for use in a + /// portable environment, you'll almost certainly want to serialize _both_ + /// a little endian and a big endian version and then load the correct one + /// based on the target's configuration. + /// + /// # Errors + /// + /// This returns an error if the given destination slice is not big enough + /// to contain the full serialized DFA. If an error occurs, then nothing + /// is written to `dst`. + /// + /// # Example + /// + /// This example shows how to serialize and deserialize a DFA without + /// dynamic memory allocation. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// // Create a 4KB buffer on the stack to store our serialized DFA. + /// let mut buf = [0u8; 4 * (1<<10)]; + /// let written = original_dfa.write_to_native_endian(&mut buf)?; + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf[..written])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn write_to_native_endian( + &self, + dst: &mut [u8], + ) -> Result { + self.write_to::(dst) + } + + /// The implementation of the public `write_to` serialization methods, + /// which is generic over endianness. + fn write_to( + &self, + dst: &mut [u8], + ) -> Result { + let mut nw = 0; + nw += wire::write_label(LABEL, &mut dst[nw..])?; + nw += wire::write_endianness_check::(&mut dst[nw..])?; + nw += wire::write_version::(VERSION, &mut dst[nw..])?; + nw += { + // Currently unused, intended for future flexibility + E::write_u32(0, &mut dst[nw..]); + size_of::() + }; + nw += self.flags.write_to::(&mut dst[nw..])?; + nw += self.tt.write_to::(&mut dst[nw..])?; + nw += self.st.write_to::(&mut dst[nw..])?; + nw += self.special.write_to::(&mut dst[nw..])?; + nw += self.quitset.write_to::(&mut dst[nw..])?; + Ok(nw) + } + + /// Return the total number of bytes required to serialize this DFA. + /// + /// This is useful for determining the size of the buffer required to pass + /// to one of the serialization routines: + /// + /// * [`DFA::write_to_little_endian`] + /// * [`DFA::write_to_big_endian`] + /// * [`DFA::write_to_native_endian`] + /// + /// Passing a buffer smaller than the size returned by this method will + /// result in a serialization error. + /// + /// # Example + /// + /// This example shows how to dynamically allocate enough room to serialize + /// a sparse DFA. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// // Compile our original DFA. + /// let original_dfa = DFA::new("foo[0-9]+")?; + /// + /// let mut buf = vec![0; original_dfa.write_to_len()]; + /// let written = original_dfa.write_to_native_endian(&mut buf)?; + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&buf[..written])?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn write_to_len(&self) -> usize { + wire::write_label_len(LABEL) + + wire::write_endianness_check_len() + + wire::write_version_len() + + size_of::() // unused, intended for future flexibility + + self.flags.write_to_len() + + self.tt.write_to_len() + + self.st.write_to_len() + + self.special.write_to_len() + + self.quitset.write_to_len() + } +} + +impl<'a> DFA<&'a [u8]> { + /// Safely deserialize a sparse DFA with a specific state identifier + /// representation. Upon success, this returns both the deserialized DFA + /// and the number of bytes read from the given slice. Namely, the contents + /// of the slice beyond the DFA are not read. + /// + /// Deserializing a DFA using this routine will never allocate heap memory. + /// For safety purposes, the DFA's transitions will be verified such that + /// every transition points to a valid state. If this verification is too + /// costly, then a [`DFA::from_bytes_unchecked`] API is provided, which + /// will always execute in constant time. + /// + /// The bytes given must be generated by one of the serialization APIs + /// of a `DFA` using a semver compatible release of this crate. Those + /// include: + /// + /// * [`DFA::to_bytes_little_endian`] + /// * [`DFA::to_bytes_big_endian`] + /// * [`DFA::to_bytes_native_endian`] + /// * [`DFA::write_to_little_endian`] + /// * [`DFA::write_to_big_endian`] + /// * [`DFA::write_to_native_endian`] + /// + /// The `to_bytes` methods allocate and return a `Vec` for you. The + /// `write_to` methods do not allocate and write to an existing slice + /// (which may be on the stack). Since deserialization always uses the + /// native endianness of the target platform, the serialization API you use + /// should match the endianness of the target platform. (It's often a good + /// idea to generate serialized DFAs for both forms of endianness and then + /// load the correct one based on endianness.) + /// + /// # Errors + /// + /// Generally speaking, it's easier to state the conditions in which an + /// error is _not_ returned. All of the following must be true: + /// + /// * The bytes given must be produced by one of the serialization APIs + /// on this DFA, as mentioned above. + /// * The endianness of the target platform matches the endianness used to + /// serialized the provided DFA. + /// + /// If any of the above are not true, then an error will be returned. + /// + /// Note that unlike deserializing a [`dense::DFA`], deserializing a sparse + /// DFA has no alignment requirements. That is, an alignment of `1` is + /// valid. + /// + /// # Panics + /// + /// This routine will never panic for any input. + /// + /// # Example + /// + /// This example shows how to serialize a DFA to raw bytes, deserialize it + /// and then use it for searching. + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// let initial = DFA::new("foo[0-9]+")?; + /// let bytes = initial.to_bytes_native_endian(); + /// let dfa: DFA<&[u8]> = DFA::from_bytes(&bytes)?.0; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: loading a DFA from static memory + /// + /// One use case this library supports is the ability to serialize a + /// DFA to disk and then use `include_bytes!` to store it in a compiled + /// Rust program. Those bytes can then be cheaply deserialized into a + /// `DFA` structure at runtime and used for searching without having to + /// re-compile the DFA (which can be quite costly). + /// + /// We can show this in two parts. The first part is serializing the DFA to + /// a file: + /// + /// ```no_run + /// use regex_automata::dfa::sparse::DFA; + /// + /// let dfa = DFA::new("foo[0-9]+")?; + /// + /// // Write a big endian serialized version of this DFA to a file. + /// let bytes = dfa.to_bytes_big_endian(); + /// std::fs::write("foo.bigendian.dfa", &bytes)?; + /// + /// // Do it again, but this time for little endian. + /// let bytes = dfa.to_bytes_little_endian(); + /// std::fs::write("foo.littleendian.dfa", &bytes)?; + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And now the second part is embedding the DFA into the compiled program + /// and deserializing it at runtime on first use. We use conditional + /// compilation to choose the correct endianness. We do not need to employ + /// any special tricks to ensure a proper alignment, since a sparse DFA has + /// no alignment requirements. + /// + /// ```no_run + /// use regex_automata::{ + /// dfa::{Automaton, sparse::DFA}, + /// util::lazy::Lazy, + /// HalfMatch, Input, + /// }; + /// + /// // This crate provides its own "lazy" type, kind of like + /// // lazy_static! or once_cell::sync::Lazy. But it works in no-alloc + /// // no-std environments and let's us write this using completely + /// // safe code. + /// static RE: Lazy> = Lazy::new(|| { + /// # const _: &str = stringify! { + /// #[cfg(target_endian = "big")] + /// static BYTES: &[u8] = include_bytes!("foo.bigendian.dfa"); + /// #[cfg(target_endian = "little")] + /// static BYTES: &[u8] = include_bytes!("foo.littleendian.dfa"); + /// # }; + /// # static BYTES: &[u8] = b""; + /// + /// let (dfa, _) = DFA::from_bytes(BYTES) + /// .expect("serialized DFA should be valid"); + /// dfa + /// }); + /// + /// let expected = Ok(Some(HalfMatch::must(0, 8))); + /// assert_eq!(expected, RE.try_search_fwd(&Input::new("foo12345"))); + /// ``` + /// + /// Alternatively, consider using + /// [`lazy_static`](https://crates.io/crates/lazy_static) + /// or + /// [`once_cell`](https://crates.io/crates/once_cell), + /// which will guarantee safety for you. + pub fn from_bytes( + slice: &'a [u8], + ) -> Result<(DFA<&'a [u8]>, usize), DeserializeError> { + // SAFETY: This is safe because we validate both the sparse transitions + // (by trying to decode every state) and start state ID list below. If + // either validation fails, then we return an error. + let (dfa, nread) = unsafe { DFA::from_bytes_unchecked(slice)? }; + let seen = dfa.tt.validate(&dfa.special)?; + dfa.st.validate(&dfa.special, &seen)?; + // N.B. dfa.special doesn't have a way to do unchecked deserialization, + // so it has already been validated. + Ok((dfa, nread)) + } + + /// Deserialize a DFA with a specific state identifier representation in + /// constant time by omitting the verification of the validity of the + /// sparse transitions. + /// + /// This is just like [`DFA::from_bytes`], except it can potentially return + /// a DFA that exhibits undefined behavior if its transitions contains + /// invalid state identifiers. + /// + /// This routine is useful if you need to deserialize a DFA cheaply and + /// cannot afford the transition validation performed by `from_bytes`. + /// + /// # Safety + /// + /// This routine is not safe because it permits callers to provide + /// arbitrary transitions with possibly incorrect state identifiers. While + /// the various serialization routines will never return an incorrect + /// DFA, there is no guarantee that the bytes provided here are correct. + /// While `from_bytes_unchecked` will still do several forms of basic + /// validation, this routine does not check that the transitions themselves + /// are correct. Given an incorrect transition table, it is possible for + /// the search routines to access out-of-bounds memory because of explicit + /// bounds check elision. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{dfa::{Automaton, sparse::DFA}, HalfMatch, Input}; + /// + /// let initial = DFA::new("foo[0-9]+")?; + /// let bytes = initial.to_bytes_native_endian(); + /// // SAFETY: This is guaranteed to be safe since the bytes given come + /// // directly from a compatible serialization routine. + /// let dfa: DFA<&[u8]> = unsafe { DFA::from_bytes_unchecked(&bytes)?.0 }; + /// + /// let expected = Some(HalfMatch::must(0, 8)); + /// assert_eq!(expected, dfa.try_search_fwd(&Input::new("foo12345"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub unsafe fn from_bytes_unchecked( + slice: &'a [u8], + ) -> Result<(DFA<&'a [u8]>, usize), DeserializeError> { + let mut nr = 0; + + nr += wire::read_label(&slice[nr..], LABEL)?; + nr += wire::read_endianness_check(&slice[nr..])?; + nr += wire::read_version(&slice[nr..], VERSION)?; + + let _unused = wire::try_read_u32(&slice[nr..], "unused space")?; + nr += size_of::(); + + let (flags, nread) = Flags::from_bytes(&slice[nr..])?; + nr += nread; + + let (tt, nread) = Transitions::from_bytes_unchecked(&slice[nr..])?; + nr += nread; + + let (st, nread) = StartTable::from_bytes_unchecked(&slice[nr..])?; + nr += nread; + + let (special, nread) = Special::from_bytes(&slice[nr..])?; + nr += nread; + if special.max.as_usize() >= tt.sparse().len() { + return Err(DeserializeError::generic( + "max should not be greater than or equal to sparse bytes", + )); + } + + let (quitset, nread) = ByteSet::from_bytes(&slice[nr..])?; + nr += nread; + + // Prefilters don't support serialization, so they're always absent. + let pre = None; + Ok((DFA { tt, st, special, pre, quitset, flags }, nr)) + } +} + +impl> fmt::Debug for DFA { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "sparse::DFA(")?; + for state in self.tt.states() { + fmt_state_indicator(f, self, state.id())?; + writeln!(f, "{:06?}: {:?}", state.id().as_usize(), state)?; + } + writeln!(f, "")?; + for (i, (start_id, anchored, sty)) in self.st.iter().enumerate() { + if i % self.st.stride == 0 { + match anchored { + Anchored::No => writeln!(f, "START-GROUP(unanchored)")?, + Anchored::Yes => writeln!(f, "START-GROUP(anchored)")?, + Anchored::Pattern(pid) => writeln!( + f, + "START_GROUP(pattern: {:?})", + pid.as_usize() + )?, + } + } + writeln!(f, " {:?} => {:06?}", sty, start_id.as_usize())?; + } + writeln!(f, "state length: {:?}", self.tt.state_len)?; + writeln!(f, "pattern length: {:?}", self.pattern_len())?; + writeln!(f, "flags: {:?}", self.flags)?; + writeln!(f, ")")?; + Ok(()) + } +} + +// SAFETY: We assert that our implementation of each method is correct. +unsafe impl> Automaton for DFA { + #[inline] + fn is_special_state(&self, id: StateID) -> bool { + self.special.is_special_state(id) + } + + #[inline] + fn is_dead_state(&self, id: StateID) -> bool { + self.special.is_dead_state(id) + } + + #[inline] + fn is_quit_state(&self, id: StateID) -> bool { + self.special.is_quit_state(id) + } + + #[inline] + fn is_match_state(&self, id: StateID) -> bool { + self.special.is_match_state(id) + } + + #[inline] + fn is_start_state(&self, id: StateID) -> bool { + self.special.is_start_state(id) + } + + #[inline] + fn is_accel_state(&self, id: StateID) -> bool { + self.special.is_accel_state(id) + } + + // This is marked as inline to help dramatically boost sparse searching, + // which decodes each state it enters to follow the next transition. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn next_state(&self, current: StateID, input: u8) -> StateID { + let input = self.tt.classes.get(input); + self.tt.state(current).next(input) + } + + #[inline] + unsafe fn next_state_unchecked( + &self, + current: StateID, + input: u8, + ) -> StateID { + self.next_state(current, input) + } + + #[inline] + fn next_eoi_state(&self, current: StateID) -> StateID { + self.tt.state(current).next_eoi() + } + + #[inline] + fn pattern_len(&self) -> usize { + self.tt.pattern_len + } + + #[inline] + fn match_len(&self, id: StateID) -> usize { + self.tt.state(id).pattern_len() + } + + #[inline] + fn match_pattern(&self, id: StateID, match_index: usize) -> PatternID { + // This is an optimization for the very common case of a DFA with a + // single pattern. This conditional avoids a somewhat more costly path + // that finds the pattern ID from the state machine, which requires + // a bit of slicing/pointer-chasing. This optimization tends to only + // matter when matches are frequent. + if self.tt.pattern_len == 1 { + return PatternID::ZERO; + } + self.tt.state(id).pattern_id(match_index) + } + + #[inline] + fn has_empty(&self) -> bool { + self.flags.has_empty + } + + #[inline] + fn is_utf8(&self) -> bool { + self.flags.is_utf8 + } + + #[inline] + fn is_always_start_anchored(&self) -> bool { + self.flags.is_always_start_anchored + } + + #[inline] + fn start_state( + &self, + config: &start::Config, + ) -> Result { + let anchored = config.get_anchored(); + let start = match config.get_look_behind() { + None => Start::Text, + Some(byte) => { + if !self.quitset.is_empty() && self.quitset.contains(byte) { + return Err(StartError::quit(byte)); + } + self.st.start_map.get(byte) + } + }; + self.st.start(anchored, start) + } + + #[inline] + fn universal_start_state(&self, mode: Anchored) -> Option { + match mode { + Anchored::No => self.st.universal_start_unanchored, + Anchored::Yes => self.st.universal_start_anchored, + Anchored::Pattern(_) => None, + } + } + + #[inline] + fn accelerator(&self, id: StateID) -> &[u8] { + self.tt.state(id).accelerator() + } + + #[inline] + fn get_prefilter(&self) -> Option<&Prefilter> { + self.pre.as_ref() + } +} + +/// The transition table portion of a sparse DFA. +/// +/// The transition table is the core part of the DFA in that it describes how +/// to move from one state to another based on the input sequence observed. +/// +/// Unlike a typical dense table based DFA, states in a sparse transition +/// table have variable size. That is, states with more transitions use more +/// space than states with fewer transitions. This means that finding the next +/// transition takes more work than with a dense DFA, but also typically uses +/// much less space. +#[derive(Clone)] +struct Transitions { + /// The raw encoding of each state in this DFA. + /// + /// Each state has the following information: + /// + /// * A set of transitions to subsequent states. Transitions to the dead + /// state are omitted. + /// * If the state can be accelerated, then any additional accelerator + /// information. + /// * If the state is a match state, then the state contains all pattern + /// IDs that match when in that state. + /// + /// To decode a state, use Transitions::state. + /// + /// In practice, T is either Vec or &[u8]. + sparse: T, + /// A set of equivalence classes, where a single equivalence class + /// represents a set of bytes that never discriminate between a match + /// and a non-match in the DFA. Each equivalence class corresponds to a + /// single character in this DFA's alphabet, where the maximum number of + /// characters is 257 (each possible value of a byte plus the special + /// EOI transition). Consequently, the number of equivalence classes + /// corresponds to the number of transitions for each DFA state. Note + /// though that the *space* used by each DFA state in the transition table + /// may be larger. The total space used by each DFA state is known as the + /// stride and is documented above. + /// + /// The only time the number of equivalence classes is fewer than 257 is + /// if the DFA's kind uses byte classes which is the default. Equivalence + /// classes should generally only be disabled when debugging, so that + /// the transitions themselves aren't obscured. Disabling them has no + /// other benefit, since the equivalence class map is always used while + /// searching. In the vast majority of cases, the number of equivalence + /// classes is substantially smaller than 257, particularly when large + /// Unicode classes aren't used. + /// + /// N.B. Equivalence classes aren't particularly useful in a sparse DFA + /// in the current implementation, since equivalence classes generally tend + /// to correspond to continuous ranges of bytes that map to the same + /// transition. So in a sparse DFA, equivalence classes don't really lead + /// to a space savings. In the future, it would be good to try and remove + /// them from sparse DFAs entirely, but requires a bit of work since sparse + /// DFAs are built from dense DFAs, which are in turn built on top of + /// equivalence classes. + classes: ByteClasses, + /// The total number of states in this DFA. Note that a DFA always has at + /// least one state---the dead state---even the empty DFA. In particular, + /// the dead state always has ID 0 and is correspondingly always the first + /// state. The dead state is never a match state. + state_len: usize, + /// The total number of unique patterns represented by these match states. + pattern_len: usize, +} + +impl<'a> Transitions<&'a [u8]> { + unsafe fn from_bytes_unchecked( + mut slice: &'a [u8], + ) -> Result<(Transitions<&'a [u8]>, usize), DeserializeError> { + let slice_start = slice.as_ptr().as_usize(); + + let (state_len, nr) = + wire::try_read_u32_as_usize(&slice, "state length")?; + slice = &slice[nr..]; + + let (pattern_len, nr) = + wire::try_read_u32_as_usize(&slice, "pattern length")?; + slice = &slice[nr..]; + + let (classes, nr) = ByteClasses::from_bytes(&slice)?; + slice = &slice[nr..]; + + let (len, nr) = + wire::try_read_u32_as_usize(&slice, "sparse transitions length")?; + slice = &slice[nr..]; + + wire::check_slice_len(slice, len, "sparse states byte length")?; + let sparse = &slice[..len]; + slice = &slice[len..]; + + let trans = Transitions { sparse, classes, state_len, pattern_len }; + Ok((trans, slice.as_ptr().as_usize() - slice_start)) + } +} + +impl> Transitions { + /// Writes a serialized form of this transition table to the buffer given. + /// If the buffer is too small, then an error is returned. To determine + /// how big the buffer must be, use `write_to_len`. + fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small( + "sparse transition table", + )); + } + dst = &mut dst[..nwrite]; + + // write state length + E::write_u32(u32::try_from(self.state_len).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write pattern length + E::write_u32(u32::try_from(self.pattern_len).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write byte class map + let n = self.classes.write_to(dst)?; + dst = &mut dst[n..]; + + // write number of bytes in sparse transitions + E::write_u32(u32::try_from(self.sparse().len()).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + // write actual transitions + let mut id = DEAD; + while id.as_usize() < self.sparse().len() { + let state = self.state(id); + let n = state.write_to::(&mut dst)?; + dst = &mut dst[n..]; + // The next ID is the offset immediately following `state`. + id = StateID::new(id.as_usize() + state.write_to_len()).unwrap(); + } + Ok(nwrite) + } + + /// Returns the number of bytes the serialized form of this transition + /// table will use. + fn write_to_len(&self) -> usize { + size_of::() // state length + + size_of::() // pattern length + + self.classes.write_to_len() + + size_of::() // sparse transitions length + + self.sparse().len() + } + + /// Validates that every state ID in this transition table is valid. + /// + /// That is, every state ID can be used to correctly index a state in this + /// table. + fn validate(&self, sp: &Special) -> Result { + let mut verified = Seen::new(); + // We need to make sure that we decode the correct number of states. + // Otherwise, an empty set of transitions would validate even if the + // recorded state length is non-empty. + let mut len = 0; + // We can't use the self.states() iterator because it assumes the state + // encodings are valid. It could panic if they aren't. + let mut id = DEAD; + while id.as_usize() < self.sparse().len() { + // Before we even decode the state, we check that the ID itself + // is well formed. That is, if it's a special state then it must + // actually be a quit, dead, accel, match or start state. + if sp.is_special_state(id) { + let is_actually_special = sp.is_dead_state(id) + || sp.is_quit_state(id) + || sp.is_match_state(id) + || sp.is_start_state(id) + || sp.is_accel_state(id); + if !is_actually_special { + // This is kind of a cryptic error message... + return Err(DeserializeError::generic( + "found sparse state tagged as special but \ + wasn't actually special", + )); + } + } + let state = self.try_state(sp, id)?; + verified.insert(id); + // The next ID should be the offset immediately following `state`. + id = StateID::new(wire::add( + id.as_usize(), + state.write_to_len(), + "next state ID offset", + )?) + .map_err(|err| { + DeserializeError::state_id_error(err, "next state ID offset") + })?; + len += 1; + } + // Now that we've checked that all top-level states are correct and + // importantly, collected a set of valid state IDs, we have all the + // information we need to check that all transitions are correct too. + // + // Note that we can't use `valid_ids` to iterate because it will + // be empty in no-std no-alloc contexts. (And yes, that means our + // verification isn't quite as good.) We can use `self.states()` + // though at least, since we know that all states can at least be + // decoded and traversed correctly. + for state in self.states() { + // Check that all transitions in this state are correct. + for i in 0..state.ntrans { + let to = state.next_at(i); + // For no-alloc, we just check that the state can decode. It is + // technically possible that the state ID could still point to + // a non-existent state even if it decodes (fuzzing proved this + // to be true), but it shouldn't result in any memory unsafety + // or panics in non-debug mode. + #[cfg(not(feature = "alloc"))] + { + let _ = self.try_state(sp, to)?; + } + #[cfg(feature = "alloc")] + { + if !verified.contains(&to) { + return Err(DeserializeError::generic( + "found transition that points to a \ + non-existent state", + )); + } + } + } + } + if len != self.state_len { + return Err(DeserializeError::generic( + "mismatching sparse state length", + )); + } + Ok(verified) + } + + /// Converts these transitions to a borrowed value. + fn as_ref(&self) -> Transitions<&'_ [u8]> { + Transitions { + sparse: self.sparse(), + classes: self.classes.clone(), + state_len: self.state_len, + pattern_len: self.pattern_len, + } + } + + /// Converts these transitions to an owned value. + #[cfg(feature = "alloc")] + fn to_owned(&self) -> Transitions> { + Transitions { + sparse: self.sparse().to_vec(), + classes: self.classes.clone(), + state_len: self.state_len, + pattern_len: self.pattern_len, + } + } + + /// Return a convenient representation of the given state. + /// + /// This panics if the state is invalid. + /// + /// This is marked as inline to help dramatically boost sparse searching, + /// which decodes each state it enters to follow the next transition. Other + /// functions involved are also inlined, which should hopefully eliminate + /// a lot of the extraneous decoding that is never needed just to follow + /// the next transition. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn state(&self, id: StateID) -> State<'_> { + let mut state = &self.sparse()[id.as_usize()..]; + let mut ntrans = wire::read_u16(&state).as_usize(); + let is_match = (1 << 15) & ntrans != 0; + ntrans &= !(1 << 15); + state = &state[2..]; + + let (input_ranges, state) = state.split_at(ntrans * 2); + let (next, state) = state.split_at(ntrans * StateID::SIZE); + let (pattern_ids, state) = if is_match { + let npats = wire::read_u32(&state).as_usize(); + state[4..].split_at(npats * 4) + } else { + (&[][..], state) + }; + + let accel_len = usize::from(state[0]); + let accel = &state[1..accel_len + 1]; + State { id, is_match, ntrans, input_ranges, next, pattern_ids, accel } + } + + /// Like `state`, but will return an error if the state encoding is + /// invalid. This is useful for verifying states after deserialization, + /// which is required for a safe deserialization API. + /// + /// Note that this only verifies that this state is decodable and that + /// all of its data is consistent. It does not verify that its state ID + /// transitions point to valid states themselves, nor does it verify that + /// every pattern ID is valid. + fn try_state( + &self, + sp: &Special, + id: StateID, + ) -> Result, DeserializeError> { + if id.as_usize() > self.sparse().len() { + return Err(DeserializeError::generic( + "invalid caller provided sparse state ID", + )); + } + let mut state = &self.sparse()[id.as_usize()..]; + // Encoding format starts with a u16 that stores the total number of + // transitions in this state. + let (mut ntrans, _) = + wire::try_read_u16_as_usize(state, "state transition length")?; + let is_match = ((1 << 15) & ntrans) != 0; + ntrans &= !(1 << 15); + state = &state[2..]; + if ntrans > 257 || ntrans == 0 { + return Err(DeserializeError::generic( + "invalid transition length", + )); + } + if is_match && !sp.is_match_state(id) { + return Err(DeserializeError::generic( + "state marked as match but not in match ID range", + )); + } else if !is_match && sp.is_match_state(id) { + return Err(DeserializeError::generic( + "state in match ID range but not marked as match state", + )); + } + + // Each transition has two pieces: an inclusive range of bytes on which + // it is defined, and the state ID that those bytes transition to. The + // pairs come first, followed by a corresponding sequence of state IDs. + let input_ranges_len = ntrans.checked_mul(2).unwrap(); + wire::check_slice_len(state, input_ranges_len, "sparse byte pairs")?; + let (input_ranges, state) = state.split_at(input_ranges_len); + // Every range should be of the form A-B, where A<=B. + for pair in input_ranges.chunks(2) { + let (start, end) = (pair[0], pair[1]); + if start > end { + return Err(DeserializeError::generic("invalid input range")); + } + } + + // And now extract the corresponding sequence of state IDs. We leave + // this sequence as a &[u8] instead of a &[S] because sparse DFAs do + // not have any alignment requirements. + let next_len = ntrans + .checked_mul(self.id_len()) + .expect("state size * #trans should always fit in a usize"); + wire::check_slice_len(state, next_len, "sparse trans state IDs")?; + let (next, state) = state.split_at(next_len); + // We can at least verify that every state ID is in bounds. + for idbytes in next.chunks(self.id_len()) { + let (id, _) = + wire::read_state_id(idbytes, "sparse state ID in try_state")?; + wire::check_slice_len( + self.sparse(), + id.as_usize(), + "invalid sparse state ID", + )?; + } + + // If this is a match state, then read the pattern IDs for this state. + // Pattern IDs is a u32-length prefixed sequence of native endian + // encoded 32-bit integers. + let (pattern_ids, state) = if is_match { + let (npats, nr) = + wire::try_read_u32_as_usize(state, "pattern ID length")?; + let state = &state[nr..]; + if npats == 0 { + return Err(DeserializeError::generic( + "state marked as a match, but pattern length is zero", + )); + } + + let pattern_ids_len = + wire::mul(npats, 4, "sparse pattern ID byte length")?; + wire::check_slice_len( + state, + pattern_ids_len, + "sparse pattern IDs", + )?; + let (pattern_ids, state) = state.split_at(pattern_ids_len); + for patbytes in pattern_ids.chunks(PatternID::SIZE) { + wire::read_pattern_id( + patbytes, + "sparse pattern ID in try_state", + )?; + } + (pattern_ids, state) + } else { + (&[][..], state) + }; + if is_match && pattern_ids.is_empty() { + return Err(DeserializeError::generic( + "state marked as a match, but has no pattern IDs", + )); + } + if sp.is_match_state(id) && pattern_ids.is_empty() { + return Err(DeserializeError::generic( + "state marked special as a match, but has no pattern IDs", + )); + } + if sp.is_match_state(id) != is_match { + return Err(DeserializeError::generic( + "whether state is a match or not is inconsistent", + )); + } + + // Now read this state's accelerator info. The first byte is the length + // of the accelerator, which is typically 0 (for no acceleration) but + // is no bigger than 3. The length indicates the number of bytes that + // follow, where each byte corresponds to a transition out of this + // state. + if state.is_empty() { + return Err(DeserializeError::generic("no accelerator length")); + } + let (accel_len, state) = (usize::from(state[0]), &state[1..]); + + if accel_len > 3 { + return Err(DeserializeError::generic( + "sparse invalid accelerator length", + )); + } else if accel_len == 0 && sp.is_accel_state(id) { + return Err(DeserializeError::generic( + "got no accelerators in state, but in accelerator ID range", + )); + } else if accel_len > 0 && !sp.is_accel_state(id) { + return Err(DeserializeError::generic( + "state in accelerator ID range, but has no accelerators", + )); + } + + wire::check_slice_len( + state, + accel_len, + "sparse corrupt accelerator length", + )?; + let (accel, _) = (&state[..accel_len], &state[accel_len..]); + + let state = State { + id, + is_match, + ntrans, + input_ranges, + next, + pattern_ids, + accel, + }; + if sp.is_quit_state(state.next_at(state.ntrans - 1)) { + return Err(DeserializeError::generic( + "state with EOI transition to quit state is illegal", + )); + } + Ok(state) + } + + /// Return an iterator over all of the states in this DFA. + /// + /// The iterator returned yields tuples, where the first element is the + /// state ID and the second element is the state itself. + fn states(&self) -> StateIter<'_, T> { + StateIter { trans: self, id: DEAD.as_usize() } + } + + /// Returns the sparse transitions as raw bytes. + fn sparse(&self) -> &[u8] { + self.sparse.as_ref() + } + + /// Returns the number of bytes represented by a single state ID. + fn id_len(&self) -> usize { + StateID::SIZE + } + + /// Return the memory usage, in bytes, of these transitions. + /// + /// This does not include the size of a `Transitions` value itself. + fn memory_usage(&self) -> usize { + self.sparse().len() + } +} + +#[cfg(feature = "dfa-build")] +impl> Transitions { + /// Return a convenient mutable representation of the given state. + /// This panics if the state is invalid. + fn state_mut(&mut self, id: StateID) -> StateMut<'_> { + let mut state = &mut self.sparse_mut()[id.as_usize()..]; + let mut ntrans = wire::read_u16(&state).as_usize(); + let is_match = (1 << 15) & ntrans != 0; + ntrans &= !(1 << 15); + state = &mut state[2..]; + + let (input_ranges, state) = state.split_at_mut(ntrans * 2); + let (next, state) = state.split_at_mut(ntrans * StateID::SIZE); + let (pattern_ids, state) = if is_match { + let npats = wire::read_u32(&state).as_usize(); + state[4..].split_at_mut(npats * 4) + } else { + (&mut [][..], state) + }; + + let accel_len = usize::from(state[0]); + let accel = &mut state[1..accel_len + 1]; + StateMut { + id, + is_match, + ntrans, + input_ranges, + next, + pattern_ids, + accel, + } + } + + /// Returns the sparse transitions as raw mutable bytes. + fn sparse_mut(&mut self) -> &mut [u8] { + self.sparse.as_mut() + } +} + +/// The set of all possible starting states in a DFA. +/// +/// See the eponymous type in the `dense` module for more details. This type +/// is very similar to `dense::StartTable`, except that its underlying +/// representation is `&[u8]` instead of `&[S]`. (The latter would require +/// sparse DFAs to be aligned, which is explicitly something we do not require +/// because we don't really need it.) +#[derive(Clone)] +struct StartTable { + /// The initial start state IDs as a contiguous table of native endian + /// encoded integers, represented by `S`. + /// + /// In practice, T is either Vec or &[u8] and has no alignment + /// requirements. + /// + /// The first `2 * stride` (currently always 8) entries always correspond + /// to the starts states for the entire DFA, with the first 4 entries being + /// for unanchored searches and the second 4 entries being for anchored + /// searches. To keep things simple, we always use 8 entries even if the + /// `StartKind` is not both. + /// + /// After that, there are `stride * patterns` state IDs, where `patterns` + /// may be zero in the case of a DFA with no patterns or in the case where + /// the DFA was built without enabling starting states for each pattern. + table: T, + /// The starting state configuration supported. When 'both', both + /// unanchored and anchored searches work. When 'unanchored', anchored + /// searches panic. When 'anchored', unanchored searches panic. + kind: StartKind, + /// The start state configuration for every possible byte. + start_map: StartByteMap, + /// The number of starting state IDs per pattern. + stride: usize, + /// The total number of patterns for which starting states are encoded. + /// This is `None` for DFAs that were built without start states for each + /// pattern. Thus, one cannot use this field to say how many patterns + /// are in the DFA in all cases. It is specific to how many patterns are + /// represented in this start table. + pattern_len: Option, + /// The universal starting state for unanchored searches. This is only + /// present when the DFA supports unanchored searches and when all starting + /// state IDs for an unanchored search are equivalent. + universal_start_unanchored: Option, + /// The universal starting state for anchored searches. This is only + /// present when the DFA supports anchored searches and when all starting + /// state IDs for an anchored search are equivalent. + universal_start_anchored: Option, +} + +#[cfg(feature = "dfa-build")] +impl StartTable> { + fn new>( + dfa: &dense::DFA, + pattern_len: Option, + ) -> StartTable> { + let stride = Start::len(); + // This is OK since the only way we're here is if a dense DFA could be + // constructed successfully, which uses the same space. + let len = stride + .checked_mul(pattern_len.unwrap_or(0)) + .unwrap() + .checked_add(stride.checked_mul(2).unwrap()) + .unwrap() + .checked_mul(StateID::SIZE) + .unwrap(); + StartTable { + table: vec![0; len], + kind: dfa.start_kind(), + start_map: dfa.start_map().clone(), + stride, + pattern_len, + universal_start_unanchored: dfa + .universal_start_state(Anchored::No), + universal_start_anchored: dfa.universal_start_state(Anchored::Yes), + } + } + + fn from_dense_dfa>( + dfa: &dense::DFA, + remap: &[StateID], + ) -> Result>, BuildError> { + // Unless the DFA has start states compiled for each pattern, then + // as far as the starting state table is concerned, there are zero + // patterns to account for. It will instead only store starting states + // for the entire DFA. + let start_pattern_len = if dfa.starts_for_each_pattern() { + Some(dfa.pattern_len()) + } else { + None + }; + let mut sl = StartTable::new(dfa, start_pattern_len); + for (old_start_id, anchored, sty) in dfa.starts() { + let new_start_id = remap[dfa.to_index(old_start_id)]; + sl.set_start(anchored, sty, new_start_id); + } + Ok(sl) + } +} + +impl<'a> StartTable<&'a [u8]> { + unsafe fn from_bytes_unchecked( + mut slice: &'a [u8], + ) -> Result<(StartTable<&'a [u8]>, usize), DeserializeError> { + let slice_start = slice.as_ptr().as_usize(); + + let (kind, nr) = StartKind::from_bytes(slice)?; + slice = &slice[nr..]; + + let (start_map, nr) = StartByteMap::from_bytes(slice)?; + slice = &slice[nr..]; + + let (stride, nr) = + wire::try_read_u32_as_usize(slice, "sparse start table stride")?; + slice = &slice[nr..]; + if stride != Start::len() { + return Err(DeserializeError::generic( + "invalid sparse starting table stride", + )); + } + + let (maybe_pattern_len, nr) = + wire::try_read_u32_as_usize(slice, "sparse start table patterns")?; + slice = &slice[nr..]; + let pattern_len = if maybe_pattern_len.as_u32() == u32::MAX { + None + } else { + Some(maybe_pattern_len) + }; + if pattern_len.map_or(false, |len| len > PatternID::LIMIT) { + return Err(DeserializeError::generic( + "sparse invalid number of patterns", + )); + } + + let (universal_unanchored, nr) = + wire::try_read_u32(slice, "universal unanchored start")?; + slice = &slice[nr..]; + let universal_start_unanchored = if universal_unanchored == u32::MAX { + None + } else { + Some(StateID::try_from(universal_unanchored).map_err(|e| { + DeserializeError::state_id_error( + e, + "universal unanchored start", + ) + })?) + }; + + let (universal_anchored, nr) = + wire::try_read_u32(slice, "universal anchored start")?; + slice = &slice[nr..]; + let universal_start_anchored = if universal_anchored == u32::MAX { + None + } else { + Some(StateID::try_from(universal_anchored).map_err(|e| { + DeserializeError::state_id_error(e, "universal anchored start") + })?) + }; + + let pattern_table_size = wire::mul( + stride, + pattern_len.unwrap_or(0), + "sparse invalid pattern length", + )?; + // Our start states always start with a single stride of start states + // for the entire automaton which permit it to match any pattern. What + // follows it are an optional set of start states for each pattern. + let start_state_len = wire::add( + wire::mul(2, stride, "start state stride too big")?, + pattern_table_size, + "sparse invalid 'any' pattern starts size", + )?; + let table_bytes_len = wire::mul( + start_state_len, + StateID::SIZE, + "sparse pattern table bytes length", + )?; + wire::check_slice_len( + slice, + table_bytes_len, + "sparse start ID table", + )?; + let table = &slice[..table_bytes_len]; + slice = &slice[table_bytes_len..]; + + let sl = StartTable { + table, + kind, + start_map, + stride, + pattern_len, + universal_start_unanchored, + universal_start_anchored, + }; + Ok((sl, slice.as_ptr().as_usize() - slice_start)) + } +} + +impl> StartTable { + fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small( + "sparse starting table ids", + )); + } + dst = &mut dst[..nwrite]; + + // write start kind + let nw = self.kind.write_to::(dst)?; + dst = &mut dst[nw..]; + // write start byte map + let nw = self.start_map.write_to(dst)?; + dst = &mut dst[nw..]; + // write stride + E::write_u32(u32::try_from(self.stride).unwrap(), dst); + dst = &mut dst[size_of::()..]; + // write pattern length + E::write_u32( + u32::try_from(self.pattern_len.unwrap_or(0xFFFF_FFFF)).unwrap(), + dst, + ); + dst = &mut dst[size_of::()..]; + // write universal start unanchored state id, u32::MAX if absent + E::write_u32( + self.universal_start_unanchored + .map_or(u32::MAX, |sid| sid.as_u32()), + dst, + ); + dst = &mut dst[size_of::()..]; + // write universal start anchored state id, u32::MAX if absent + E::write_u32( + self.universal_start_anchored.map_or(u32::MAX, |sid| sid.as_u32()), + dst, + ); + dst = &mut dst[size_of::()..]; + // write start IDs + for (sid, _, _) in self.iter() { + E::write_u32(sid.as_u32(), dst); + dst = &mut dst[StateID::SIZE..]; + } + Ok(nwrite) + } + + /// Returns the number of bytes the serialized form of this transition + /// table will use. + fn write_to_len(&self) -> usize { + self.kind.write_to_len() + + self.start_map.write_to_len() + + size_of::() // stride + + size_of::() // # patterns + + size_of::() // universal unanchored start + + size_of::() // universal anchored start + + self.table().len() + } + + /// Validates that every starting state ID in this table is valid. + /// + /// That is, every starting state ID can be used to correctly decode a + /// state in the DFA's sparse transitions. + fn validate( + &self, + sp: &Special, + seen: &Seen, + ) -> Result<(), DeserializeError> { + for (id, _, _) in self.iter() { + if !seen.contains(&id) { + return Err(DeserializeError::generic( + "found invalid start state ID", + )); + } + if sp.is_match_state(id) { + return Err(DeserializeError::generic( + "start states cannot be match states", + )); + } + } + Ok(()) + } + + /// Converts this start list to a borrowed value. + fn as_ref(&self) -> StartTable<&'_ [u8]> { + StartTable { + table: self.table(), + kind: self.kind, + start_map: self.start_map.clone(), + stride: self.stride, + pattern_len: self.pattern_len, + universal_start_unanchored: self.universal_start_unanchored, + universal_start_anchored: self.universal_start_anchored, + } + } + + /// Converts this start list to an owned value. + #[cfg(feature = "alloc")] + fn to_owned(&self) -> StartTable> { + StartTable { + table: self.table().to_vec(), + kind: self.kind, + start_map: self.start_map.clone(), + stride: self.stride, + pattern_len: self.pattern_len, + universal_start_unanchored: self.universal_start_unanchored, + universal_start_anchored: self.universal_start_anchored, + } + } + + /// Return the start state for the given index and pattern ID. If the + /// pattern ID is None, then the corresponding start state for the entire + /// DFA is returned. If the pattern ID is not None, then the corresponding + /// starting state for the given pattern is returned. If this start table + /// does not have individual starting states for each pattern, then this + /// panics. + fn start( + &self, + anchored: Anchored, + start: Start, + ) -> Result { + let start_index = start.as_usize(); + let index = match anchored { + Anchored::No => { + if !self.kind.has_unanchored() { + return Err(StartError::unsupported_anchored(anchored)); + } + start_index + } + Anchored::Yes => { + if !self.kind.has_anchored() { + return Err(StartError::unsupported_anchored(anchored)); + } + self.stride + start_index + } + Anchored::Pattern(pid) => { + let len = match self.pattern_len { + None => { + return Err(StartError::unsupported_anchored(anchored)) + } + Some(len) => len, + }; + if pid.as_usize() >= len { + return Ok(DEAD); + } + (2 * self.stride) + + (self.stride * pid.as_usize()) + + start_index + } + }; + let start = index * StateID::SIZE; + // This OK since we're allowed to assume that the start table contains + // valid StateIDs. + Ok(wire::read_state_id_unchecked(&self.table()[start..]).0) + } + + /// Return an iterator over all start IDs in this table. + fn iter(&self) -> StartStateIter<'_, T> { + StartStateIter { st: self, i: 0 } + } + + /// Returns the total number of start state IDs in this table. + fn len(&self) -> usize { + self.table().len() / StateID::SIZE + } + + /// Returns the table as a raw slice of bytes. + fn table(&self) -> &[u8] { + self.table.as_ref() + } + + /// Return the memory usage, in bytes, of this start list. + /// + /// This does not include the size of a `StartTable` value itself. + fn memory_usage(&self) -> usize { + self.table().len() + } +} + +#[cfg(feature = "dfa-build")] +impl> StartTable { + /// Set the start state for the given index and pattern. + /// + /// If the pattern ID or state ID are not valid, then this will panic. + fn set_start(&mut self, anchored: Anchored, start: Start, id: StateID) { + let start_index = start.as_usize(); + let index = match anchored { + Anchored::No => start_index, + Anchored::Yes => self.stride + start_index, + Anchored::Pattern(pid) => { + let pid = pid.as_usize(); + let len = self + .pattern_len + .expect("start states for each pattern enabled"); + assert!(pid < len, "invalid pattern ID {:?}", pid); + self.stride + .checked_mul(pid) + .unwrap() + .checked_add(self.stride.checked_mul(2).unwrap()) + .unwrap() + .checked_add(start_index) + .unwrap() + } + }; + let start = index * StateID::SIZE; + let end = start + StateID::SIZE; + wire::write_state_id::( + id, + &mut self.table.as_mut()[start..end], + ); + } +} + +/// An iterator over all state state IDs in a sparse DFA. +struct StartStateIter<'a, T> { + st: &'a StartTable, + i: usize, +} + +impl<'a, T: AsRef<[u8]>> Iterator for StartStateIter<'a, T> { + type Item = (StateID, Anchored, Start); + + fn next(&mut self) -> Option<(StateID, Anchored, Start)> { + let i = self.i; + if i >= self.st.len() { + return None; + } + self.i += 1; + + // This unwrap is okay since the stride of any DFA must always match + // the number of start state types. + let start_type = Start::from_usize(i % self.st.stride).unwrap(); + let anchored = if i < self.st.stride { + Anchored::No + } else if i < (2 * self.st.stride) { + Anchored::Yes + } else { + let pid = (i - (2 * self.st.stride)) / self.st.stride; + Anchored::Pattern(PatternID::new(pid).unwrap()) + }; + let start = i * StateID::SIZE; + let end = start + StateID::SIZE; + let bytes = self.st.table()[start..end].try_into().unwrap(); + // This is OK since we're allowed to assume that any IDs in this start + // table are correct and valid for this DFA. + let id = StateID::from_ne_bytes_unchecked(bytes); + Some((id, anchored, start_type)) + } +} + +impl<'a, T> fmt::Debug for StartStateIter<'a, T> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("StartStateIter").field("i", &self.i).finish() + } +} + +/// An iterator over all states in a sparse DFA. +/// +/// This iterator yields tuples, where the first element is the state ID and +/// the second element is the state itself. +struct StateIter<'a, T> { + trans: &'a Transitions, + id: usize, +} + +impl<'a, T: AsRef<[u8]>> Iterator for StateIter<'a, T> { + type Item = State<'a>; + + fn next(&mut self) -> Option> { + if self.id >= self.trans.sparse().len() { + return None; + } + let state = self.trans.state(StateID::new_unchecked(self.id)); + self.id = self.id + state.write_to_len(); + Some(state) + } +} + +impl<'a, T> fmt::Debug for StateIter<'a, T> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("StateIter").field("id", &self.id).finish() + } +} + +/// A representation of a sparse DFA state that can be cheaply materialized +/// from a state identifier. +#[derive(Clone)] +struct State<'a> { + /// The identifier of this state. + id: StateID, + /// Whether this is a match state or not. + is_match: bool, + /// The number of transitions in this state. + ntrans: usize, + /// Pairs of input ranges, where there is one pair for each transition. + /// Each pair specifies an inclusive start and end byte range for the + /// corresponding transition. + input_ranges: &'a [u8], + /// Transitions to the next state. This slice contains native endian + /// encoded state identifiers, with `S` as the representation. Thus, there + /// are `ntrans * size_of::()` bytes in this slice. + next: &'a [u8], + /// If this is a match state, then this contains the pattern IDs that match + /// when the DFA is in this state. + /// + /// This is a contiguous sequence of 32-bit native endian encoded integers. + pattern_ids: &'a [u8], + /// An accelerator for this state, if present. If this state has no + /// accelerator, then this is an empty slice. When non-empty, this slice + /// has length at most 3 and corresponds to the exhaustive set of bytes + /// that must be seen in order to transition out of this state. + accel: &'a [u8], +} + +impl<'a> State<'a> { + /// Searches for the next transition given an input byte. If no such + /// transition could be found, then a dead state is returned. + /// + /// This is marked as inline to help dramatically boost sparse searching, + /// which decodes each state it enters to follow the next transition. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn next(&self, input: u8) -> StateID { + // This straight linear search was observed to be much better than + // binary search on ASCII haystacks, likely because a binary search + // visits the ASCII case last but a linear search sees it first. A + // binary search does do a little better on non-ASCII haystacks, but + // not by much. There might be a better trade off lurking here. + for i in 0..(self.ntrans - 1) { + let (start, end) = self.range(i); + if start <= input && input <= end { + return self.next_at(i); + } + // We could bail early with an extra branch: if input < b1, then + // we know we'll never find a matching transition. Interestingly, + // this extra branch seems to not help performance, or will even + // hurt it. It's likely very dependent on the DFA itself and what + // is being searched. + } + DEAD + } + + /// Returns the next state ID for the special EOI transition. + fn next_eoi(&self) -> StateID { + self.next_at(self.ntrans - 1) + } + + /// Returns the identifier for this state. + fn id(&self) -> StateID { + self.id + } + + /// Returns the inclusive input byte range for the ith transition in this + /// state. + fn range(&self, i: usize) -> (u8, u8) { + (self.input_ranges[i * 2], self.input_ranges[i * 2 + 1]) + } + + /// Returns the next state for the ith transition in this state. + fn next_at(&self, i: usize) -> StateID { + let start = i * StateID::SIZE; + let end = start + StateID::SIZE; + let bytes = self.next[start..end].try_into().unwrap(); + StateID::from_ne_bytes_unchecked(bytes) + } + + /// Returns the pattern ID for the given match index. If the match index + /// is invalid, then this panics. + fn pattern_id(&self, match_index: usize) -> PatternID { + let start = match_index * PatternID::SIZE; + wire::read_pattern_id_unchecked(&self.pattern_ids[start..]).0 + } + + /// Returns the total number of pattern IDs for this state. This is always + /// zero when `is_match` is false. + fn pattern_len(&self) -> usize { + assert_eq!(0, self.pattern_ids.len() % 4); + self.pattern_ids.len() / 4 + } + + /// Return an accelerator for this state. + fn accelerator(&self) -> &'a [u8] { + self.accel + } + + /// Write the raw representation of this state to the given buffer using + /// the given endianness. + fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small( + "sparse state transitions", + )); + } + + let ntrans = + if self.is_match { self.ntrans | (1 << 15) } else { self.ntrans }; + E::write_u16(u16::try_from(ntrans).unwrap(), dst); + dst = &mut dst[size_of::()..]; + + dst[..self.input_ranges.len()].copy_from_slice(self.input_ranges); + dst = &mut dst[self.input_ranges.len()..]; + + for i in 0..self.ntrans { + E::write_u32(self.next_at(i).as_u32(), dst); + dst = &mut dst[StateID::SIZE..]; + } + + if self.is_match { + E::write_u32(u32::try_from(self.pattern_len()).unwrap(), dst); + dst = &mut dst[size_of::()..]; + for i in 0..self.pattern_len() { + let pid = self.pattern_id(i); + E::write_u32(pid.as_u32(), dst); + dst = &mut dst[PatternID::SIZE..]; + } + } + + dst[0] = u8::try_from(self.accel.len()).unwrap(); + dst[1..][..self.accel.len()].copy_from_slice(self.accel); + + Ok(nwrite) + } + + /// Return the total number of bytes that this state consumes in its + /// encoded form. + fn write_to_len(&self) -> usize { + let mut len = 2 + + (self.ntrans * 2) + + (self.ntrans * StateID::SIZE) + + (1 + self.accel.len()); + if self.is_match { + len += size_of::() + self.pattern_ids.len(); + } + len + } +} + +impl<'a> fmt::Debug for State<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut printed = false; + for i in 0..(self.ntrans - 1) { + let next = self.next_at(i); + if next == DEAD { + continue; + } + + if printed { + write!(f, ", ")?; + } + let (start, end) = self.range(i); + if start == end { + write!(f, "{:?} => {:?}", DebugByte(start), next.as_usize())?; + } else { + write!( + f, + "{:?}-{:?} => {:?}", + DebugByte(start), + DebugByte(end), + next.as_usize(), + )?; + } + printed = true; + } + let eoi = self.next_at(self.ntrans - 1); + if eoi != DEAD { + if printed { + write!(f, ", ")?; + } + write!(f, "EOI => {:?}", eoi.as_usize())?; + } + Ok(()) + } +} + +/// A representation of a mutable sparse DFA state that can be cheaply +/// materialized from a state identifier. +#[cfg(feature = "dfa-build")] +struct StateMut<'a> { + /// The identifier of this state. + id: StateID, + /// Whether this is a match state or not. + is_match: bool, + /// The number of transitions in this state. + ntrans: usize, + /// Pairs of input ranges, where there is one pair for each transition. + /// Each pair specifies an inclusive start and end byte range for the + /// corresponding transition. + input_ranges: &'a mut [u8], + /// Transitions to the next state. This slice contains native endian + /// encoded state identifiers, with `S` as the representation. Thus, there + /// are `ntrans * size_of::()` bytes in this slice. + next: &'a mut [u8], + /// If this is a match state, then this contains the pattern IDs that match + /// when the DFA is in this state. + /// + /// This is a contiguous sequence of 32-bit native endian encoded integers. + pattern_ids: &'a [u8], + /// An accelerator for this state, if present. If this state has no + /// accelerator, then this is an empty slice. When non-empty, this slice + /// has length at most 3 and corresponds to the exhaustive set of bytes + /// that must be seen in order to transition out of this state. + accel: &'a mut [u8], +} + +#[cfg(feature = "dfa-build")] +impl<'a> StateMut<'a> { + /// Sets the ith transition to the given state. + fn set_next_at(&mut self, i: usize, next: StateID) { + let start = i * StateID::SIZE; + let end = start + StateID::SIZE; + wire::write_state_id::(next, &mut self.next[start..end]); + } +} + +#[cfg(feature = "dfa-build")] +impl<'a> fmt::Debug for StateMut<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let state = State { + id: self.id, + is_match: self.is_match, + ntrans: self.ntrans, + input_ranges: self.input_ranges, + next: self.next, + pattern_ids: self.pattern_ids, + accel: self.accel, + }; + fmt::Debug::fmt(&state, f) + } +} + +// In order to validate everything, we not only need to make sure we +// can decode every state, but that every transition in every state +// points to a valid state. There are many duplicative transitions, so +// we record state IDs that we've verified so that we don't redo the +// decoding work. +// +// Except, when in no_std mode, we don't have dynamic memory allocation +// available to us, so we skip this optimization. It's not clear +// whether doing something more clever is worth it just yet. If you're +// profiling this code and need it to run faster, please file an issue. +// +// OK, so we also use this to record the set of valid state IDs. Since +// it is possible for a transition to point to an invalid state ID that +// still (somehow) deserializes to a valid state. So we need to make +// sure our transitions are limited to actually correct state IDs. +// The problem is, I'm not sure how to do this verification step in +// no-std no-alloc mode. I think we'd *have* to store the set of valid +// state IDs in the DFA itself. For now, we don't do this verification +// in no-std no-alloc mode. The worst thing that can happen is an +// incorrect result. But no panics or memory safety problems should +// result. Because we still do validate that the state itself is +// "valid" in the sense that everything it points to actually exists. +// +// ---AG +#[derive(Debug)] +struct Seen { + #[cfg(feature = "alloc")] + set: alloc::collections::BTreeSet, + #[cfg(not(feature = "alloc"))] + set: core::marker::PhantomData, +} + +#[cfg(feature = "alloc")] +impl Seen { + fn new() -> Seen { + Seen { set: alloc::collections::BTreeSet::new() } + } + fn insert(&mut self, id: StateID) { + self.set.insert(id); + } + fn contains(&self, id: &StateID) -> bool { + self.set.contains(id) + } +} + +#[cfg(not(feature = "alloc"))] +impl Seen { + fn new() -> Seen { + Seen { set: core::marker::PhantomData } + } + fn insert(&mut self, _id: StateID) {} + fn contains(&self, _id: &StateID) -> bool { + true + } +} + +/* +/// A binary search routine specialized specifically to a sparse DFA state's +/// transitions. Specifically, the transitions are defined as a set of pairs +/// of input bytes that delineate an inclusive range of bytes. If the input +/// byte is in the range, then the corresponding transition is a match. +/// +/// This binary search accepts a slice of these pairs and returns the position +/// of the matching pair (the ith transition), or None if no matching pair +/// could be found. +/// +/// Note that this routine is not currently used since it was observed to +/// either decrease performance when searching ASCII, or did not provide enough +/// of a boost on non-ASCII haystacks to be worth it. However, we leave it here +/// for posterity in case we can find a way to use it. +/// +/// In theory, we could use the standard library's search routine if we could +/// cast a `&[u8]` to a `&[(u8, u8)]`, but I don't believe this is currently +/// guaranteed to be safe and is thus UB (since I don't think the in-memory +/// representation of `(u8, u8)` has been nailed down). One could define a +/// repr(C) type, but the casting doesn't seem justified. +#[cfg_attr(feature = "perf-inline", inline(always))] +fn binary_search_ranges(ranges: &[u8], needle: u8) -> Option { + debug_assert!(ranges.len() % 2 == 0, "ranges must have even length"); + debug_assert!(ranges.len() <= 512, "ranges should be short"); + + let (mut left, mut right) = (0, ranges.len() / 2); + while left < right { + let mid = (left + right) / 2; + let (b1, b2) = (ranges[mid * 2], ranges[mid * 2 + 1]); + if needle < b1 { + right = mid; + } else if needle > b2 { + left = mid + 1; + } else { + return Some(mid); + } + } + None +} +*/ + +#[cfg(all(test, feature = "syntax", feature = "dfa-build"))] +mod tests { + use crate::{ + dfa::{dense::DFA, Automaton}, + nfa::thompson, + Input, MatchError, + }; + + // See the analogous test in src/hybrid/dfa.rs and src/dfa/dense.rs. + #[test] + fn heuristic_unicode_forward() { + let dfa = DFA::builder() + .configure(DFA::config().unicode_word_boundary(true)) + .thompson(thompson::Config::new().reverse(true)) + .build(r"\b[0-9]+\b") + .unwrap() + .to_sparse() + .unwrap(); + + let input = Input::new("β123").range(2..); + let expected = MatchError::quit(0xB2, 1); + let got = dfa.try_search_fwd(&input); + assert_eq!(Err(expected), got); + + let input = Input::new("123β").range(..3); + let expected = MatchError::quit(0xCE, 3); + let got = dfa.try_search_fwd(&input); + assert_eq!(Err(expected), got); + } + + // See the analogous test in src/hybrid/dfa.rs and src/dfa/dense.rs. + #[test] + fn heuristic_unicode_reverse() { + let dfa = DFA::builder() + .configure(DFA::config().unicode_word_boundary(true)) + .thompson(thompson::Config::new().reverse(true)) + .build(r"\b[0-9]+\b") + .unwrap() + .to_sparse() + .unwrap(); + + let input = Input::new("β123").range(2..); + let expected = MatchError::quit(0xB2, 1); + let got = dfa.try_search_rev(&input); + assert_eq!(Err(expected), got); + + let input = Input::new("123β").range(..3); + let expected = MatchError::quit(0xCE, 3); + let got = dfa.try_search_rev(&input); + assert_eq!(Err(expected), got); + } +} diff --git a/vendor/regex-automata/src/dfa/special.rs b/vendor/regex-automata/src/dfa/special.rs new file mode 100644 index 0000000..a831df5 --- /dev/null +++ b/vendor/regex-automata/src/dfa/special.rs @@ -0,0 +1,494 @@ +use crate::{ + dfa::DEAD, + util::{ + primitives::StateID, + wire::{self, DeserializeError, Endian, SerializeError}, + }, +}; + +macro_rules! err { + ($msg:expr) => { + return Err(DeserializeError::generic($msg)); + }; +} + +// Special represents the identifiers in a DFA that correspond to "special" +// states. If a state is one or more of the following, then it is considered +// special: +// +// * dead - A non-matching state where all outgoing transitions lead back to +// itself. There is only one of these, regardless of whether minimization +// has run. The dead state always has an ID of 0. i.e., It is always the +// first state in a DFA. +// * quit - A state that is entered whenever a byte is seen that should cause +// a DFA to give up and stop searching. This results in a MatchError::quit +// error being returned at search time. The default configuration for a DFA +// has no quit bytes, which means this state is unreachable by default, +// although it is always present for reasons of implementation simplicity. +// This state is only reachable when the caller configures the DFA to quit +// on certain bytes. There is always exactly one of these states and it +// is always the second state. (Its actual ID depends on the size of the +// alphabet in dense DFAs, since state IDs are premultiplied in order to +// allow them to be used directly as indices into the transition table.) +// * match - An accepting state, i.e., indicative of a match. There may be +// zero or more of these states. +// * accelerated - A state where all of its outgoing transitions, except a +// few, loop back to itself. These states are candidates for acceleration +// via memchr during search. There may be zero or more of these states. +// * start - A non-matching state that indicates where the automaton should +// start during a search. There is always at least one starting state and +// all are guaranteed to be non-match states. (A start state cannot be a +// match state because the DFAs in this crate delay all matches by one byte. +// So every search that finds a match must move through one transition to +// some other match state, even when searching an empty string.) +// +// These are not mutually exclusive categories. Namely, the following +// overlappings can occur: +// +// * {dead, start} - If a DFA can never lead to a match and it is minimized, +// then it will typically compile to something where all starting IDs point +// to the DFA's dead state. +// * {match, accelerated} - It is possible for a match state to have the +// majority of its transitions loop back to itself, which means it's +// possible for a match state to be accelerated. +// * {start, accelerated} - Similarly, it is possible for a start state to be +// accelerated. Note that it is possible for an accelerated state to be +// neither a match or a start state. Also note that just because both match +// and start states overlap with accelerated states does not mean that +// match and start states overlap with each other. In fact, they are +// guaranteed not to overlap. +// +// As a special mention, every DFA always has a dead and a quit state, even +// though from the perspective of the DFA, they are equivalent. (Indeed, +// minimization special cases them to ensure they don't get merged.) The +// purpose of keeping them distinct is to use the quit state as a sentinel to +// distguish between whether a search finished successfully without finding +// anything or whether it gave up before finishing. +// +// So the main problem we want to solve here is the *fast* detection of whether +// a state is special or not. And we also want to do this while storing as +// little extra data as possible. AND we want to be able to quickly determine +// which categories a state falls into above if it is special. +// +// We achieve this by essentially shuffling all special states to the beginning +// of a DFA. That is, all special states appear before every other non-special +// state. By representing special states this way, we can determine whether a +// state is special or not by a single comparison, where special.max is the +// identifier of the last special state in the DFA: +// +// if current_state <= special.max: +// ... do something with special state +// +// The only thing left to do is to determine what kind of special state +// it is. Because what we do next depends on that. Since special states +// are typically rare, we can afford to do a bit more extra work, but we'd +// still like this to be as fast as possible. The trick we employ here is to +// continue shuffling states even within the special state range. Such that +// one contiguous region corresponds to match states, another for start states +// and then an overlapping range for accelerated states. At a high level, our +// special state detection might look like this (for leftmost searching, where +// we continue searching even after seeing a match): +// +// byte = input[offset] +// current_state = next_state(current_state, byte) +// offset += 1 +// if current_state <= special.max: +// if current_state == 0: +// # We can never leave a dead state, so this always marks the +// # end of our search. +// return last_match +// if current_state == special.quit_id: +// # A quit state means we give up. If he DFA has no quit state, +// # then special.quit_id == 0 == dead, which is handled by the +// # conditional above. +// return Err(MatchError::quit { byte, offset: offset - 1 }) +// if special.min_match <= current_state <= special.max_match: +// last_match = Some(offset) +// if special.min_accel <= current_state <= special.max_accel: +// offset = accelerate(input, offset) +// last_match = Some(offset) +// elif special.min_start <= current_state <= special.max_start: +// offset = prefilter.find(input, offset) +// if special.min_accel <= current_state <= special.max_accel: +// offset = accelerate(input, offset) +// elif special.min_accel <= current_state <= special.max_accel: +// offset = accelerate(input, offset) +// +// There are some small details left out of the logic above. For example, +// in order to accelerate a state, we need to know which bytes to search for. +// This in turn implies some extra data we need to store in the DFA. To keep +// things compact, we would ideally only store +// +// N = special.max_accel - special.min_accel + 1 +// +// items. But state IDs are premultiplied, which means they are not contiguous. +// So in order to take a state ID and index an array of accelerated structures, +// we need to do: +// +// i = (state_id - special.min_accel) / stride +// +// (N.B. 'stride' is always a power of 2, so the above can be implemented via +// '(state_id - special.min_accel) >> stride2', where 'stride2' is x in +// 2^x=stride.) +// +// Moreover, some of these specialty categories may be empty. For example, +// DFAs are not required to have any match states or any accelerated states. +// In that case, the lower and upper bounds are both set to 0 (the dead state +// ID) and the first `current_state == 0` check subsumes cases where the +// ranges are empty. +// +// Loop unrolling, if applicable, has also been left out of the logic above. +// +// Graphically, the ranges look like this, where asterisks indicate ranges +// that can be empty. Each 'x' is a state. +// +// quit +// dead| +// || +// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +// | | | | start | | +// | |-------------| |-------| | +// | match* | | | | +// | | | | | +// | |----------| | | +// | accel* | | +// | | | +// | | | +// |----------------------------|------------------------ +// special non-special* +#[derive(Clone, Copy, Debug)] +pub(crate) struct Special { + /// The identifier of the last special state in a DFA. A state is special + /// if and only if its identifier is less than or equal to `max`. + pub(crate) max: StateID, + /// The identifier of the quit state in a DFA. (There is no analogous field + /// for the dead state since the dead state's ID is always zero, regardless + /// of state ID size.) + pub(crate) quit_id: StateID, + /// The identifier of the first match state. + pub(crate) min_match: StateID, + /// The identifier of the last match state. + pub(crate) max_match: StateID, + /// The identifier of the first accelerated state. + pub(crate) min_accel: StateID, + /// The identifier of the last accelerated state. + pub(crate) max_accel: StateID, + /// The identifier of the first start state. + pub(crate) min_start: StateID, + /// The identifier of the last start state. + pub(crate) max_start: StateID, +} + +impl Special { + /// Creates a new set of special ranges for a DFA. All ranges are initially + /// set to only contain the dead state. This is interpreted as an empty + /// range. + #[cfg(feature = "dfa-build")] + pub(crate) fn new() -> Special { + Special { + max: DEAD, + quit_id: DEAD, + min_match: DEAD, + max_match: DEAD, + min_accel: DEAD, + max_accel: DEAD, + min_start: DEAD, + max_start: DEAD, + } + } + + /// Remaps all of the special state identifiers using the function given. + #[cfg(feature = "dfa-build")] + pub(crate) fn remap(&self, map: impl Fn(StateID) -> StateID) -> Special { + Special { + max: map(self.max), + quit_id: map(self.quit_id), + min_match: map(self.min_match), + max_match: map(self.max_match), + min_accel: map(self.min_accel), + max_accel: map(self.max_accel), + min_start: map(self.min_start), + max_start: map(self.max_start), + } + } + + /// Deserialize the given bytes into special state ranges. If the slice + /// given is not big enough, then this returns an error. Similarly, if + /// any of the expected invariants around special state ranges aren't + /// upheld, an error is returned. Note that this does not guarantee that + /// the information returned is correct. + /// + /// Upon success, this returns the number of bytes read in addition to the + /// special state IDs themselves. + pub(crate) fn from_bytes( + mut slice: &[u8], + ) -> Result<(Special, usize), DeserializeError> { + wire::check_slice_len(slice, 8 * StateID::SIZE, "special states")?; + + let mut nread = 0; + let mut read_id = |what| -> Result { + let (id, nr) = wire::try_read_state_id(slice, what)?; + nread += nr; + slice = &slice[StateID::SIZE..]; + Ok(id) + }; + + let max = read_id("special max id")?; + let quit_id = read_id("special quit id")?; + let min_match = read_id("special min match id")?; + let max_match = read_id("special max match id")?; + let min_accel = read_id("special min accel id")?; + let max_accel = read_id("special max accel id")?; + let min_start = read_id("special min start id")?; + let max_start = read_id("special max start id")?; + + let special = Special { + max, + quit_id, + min_match, + max_match, + min_accel, + max_accel, + min_start, + max_start, + }; + special.validate()?; + assert_eq!(nread, special.write_to_len()); + Ok((special, nread)) + } + + /// Validate that the information describing special states satisfies + /// all known invariants. + pub(crate) fn validate(&self) -> Result<(), DeserializeError> { + // Check that both ends of the range are DEAD or neither are. + if self.min_match == DEAD && self.max_match != DEAD { + err!("min_match is DEAD, but max_match is not"); + } + if self.min_match != DEAD && self.max_match == DEAD { + err!("max_match is DEAD, but min_match is not"); + } + if self.min_accel == DEAD && self.max_accel != DEAD { + err!("min_accel is DEAD, but max_accel is not"); + } + if self.min_accel != DEAD && self.max_accel == DEAD { + err!("max_accel is DEAD, but min_accel is not"); + } + if self.min_start == DEAD && self.max_start != DEAD { + err!("min_start is DEAD, but max_start is not"); + } + if self.min_start != DEAD && self.max_start == DEAD { + err!("max_start is DEAD, but min_start is not"); + } + + // Check that ranges are well formed. + if self.min_match > self.max_match { + err!("min_match should not be greater than max_match"); + } + if self.min_accel > self.max_accel { + err!("min_accel should not be greater than max_accel"); + } + if self.min_start > self.max_start { + err!("min_start should not be greater than max_start"); + } + + // Check that ranges are ordered with respect to one another. + if self.matches() && self.quit_id >= self.min_match { + err!("quit_id should not be greater than min_match"); + } + if self.accels() && self.quit_id >= self.min_accel { + err!("quit_id should not be greater than min_accel"); + } + if self.starts() && self.quit_id >= self.min_start { + err!("quit_id should not be greater than min_start"); + } + if self.matches() && self.accels() && self.min_accel < self.min_match { + err!("min_match should not be greater than min_accel"); + } + if self.matches() && self.starts() && self.min_start < self.min_match { + err!("min_match should not be greater than min_start"); + } + if self.accels() && self.starts() && self.min_start < self.min_accel { + err!("min_accel should not be greater than min_start"); + } + + // Check that max is at least as big as everything else. + if self.max < self.quit_id { + err!("quit_id should not be greater than max"); + } + if self.max < self.max_match { + err!("max_match should not be greater than max"); + } + if self.max < self.max_accel { + err!("max_accel should not be greater than max"); + } + if self.max < self.max_start { + err!("max_start should not be greater than max"); + } + + Ok(()) + } + + /// Validate that the special state information is compatible with the + /// given state len. + pub(crate) fn validate_state_len( + &self, + len: usize, + stride2: usize, + ) -> Result<(), DeserializeError> { + // We assume that 'validate' has already passed, so we know that 'max' + // is truly the max. So all we need to check is that the max state ID + // is less than the state ID len. The max legal value here is len-1, + // which occurs when there are no non-special states. + if (self.max.as_usize() >> stride2) >= len { + err!("max should not be greater than or equal to state length"); + } + Ok(()) + } + + /// Write the IDs and ranges for special states to the given byte buffer. + /// The buffer given must have enough room to store all data, otherwise + /// this will return an error. The number of bytes written is returned + /// on success. The number of bytes written is guaranteed to be a multiple + /// of 8. + pub(crate) fn write_to( + &self, + dst: &mut [u8], + ) -> Result { + use crate::util::wire::write_state_id as write; + + if dst.len() < self.write_to_len() { + return Err(SerializeError::buffer_too_small("special state ids")); + } + + let mut nwrite = 0; + nwrite += write::(self.max, &mut dst[nwrite..]); + nwrite += write::(self.quit_id, &mut dst[nwrite..]); + nwrite += write::(self.min_match, &mut dst[nwrite..]); + nwrite += write::(self.max_match, &mut dst[nwrite..]); + nwrite += write::(self.min_accel, &mut dst[nwrite..]); + nwrite += write::(self.max_accel, &mut dst[nwrite..]); + nwrite += write::(self.min_start, &mut dst[nwrite..]); + nwrite += write::(self.max_start, &mut dst[nwrite..]); + + assert_eq!( + self.write_to_len(), + nwrite, + "expected to write certain number of bytes", + ); + assert_eq!( + nwrite % 8, + 0, + "expected to write multiple of 8 bytes for special states", + ); + Ok(nwrite) + } + + /// Returns the total number of bytes written by `write_to`. + pub(crate) fn write_to_len(&self) -> usize { + 8 * StateID::SIZE + } + + /// Sets the maximum special state ID based on the current values. This + /// should be used once all possible state IDs are set. + #[cfg(feature = "dfa-build")] + pub(crate) fn set_max(&mut self) { + use core::cmp::max; + self.max = max( + self.quit_id, + max(self.max_match, max(self.max_accel, self.max_start)), + ); + } + + /// Sets the maximum special state ID such that starting states are not + /// considered "special." This also marks the min/max starting states as + /// DEAD such that 'is_start_state' always returns false, even if the state + /// is actually a starting state. + /// + /// This is useful when there is no prefilter set. It will avoid + /// ping-ponging between the hot path in the DFA search code and the start + /// state handling code, which is typically only useful for executing a + /// prefilter. + #[cfg(feature = "dfa-build")] + pub(crate) fn set_no_special_start_states(&mut self) { + use core::cmp::max; + self.max = max(self.quit_id, max(self.max_match, self.max_accel)); + self.min_start = DEAD; + self.max_start = DEAD; + } + + /// Returns true if and only if the given state ID is a special state. + #[inline] + pub(crate) fn is_special_state(&self, id: StateID) -> bool { + id <= self.max + } + + /// Returns true if and only if the given state ID is a dead state. + #[inline] + pub(crate) fn is_dead_state(&self, id: StateID) -> bool { + id == DEAD + } + + /// Returns true if and only if the given state ID is a quit state. + #[inline] + pub(crate) fn is_quit_state(&self, id: StateID) -> bool { + !self.is_dead_state(id) && self.quit_id == id + } + + /// Returns true if and only if the given state ID is a match state. + #[inline] + pub(crate) fn is_match_state(&self, id: StateID) -> bool { + !self.is_dead_state(id) && self.min_match <= id && id <= self.max_match + } + + /// Returns true if and only if the given state ID is an accel state. + #[inline] + pub(crate) fn is_accel_state(&self, id: StateID) -> bool { + !self.is_dead_state(id) && self.min_accel <= id && id <= self.max_accel + } + + /// Returns true if and only if the given state ID is a start state. + #[inline] + pub(crate) fn is_start_state(&self, id: StateID) -> bool { + !self.is_dead_state(id) && self.min_start <= id && id <= self.max_start + } + + /// Returns the total number of match states for a dense table based DFA. + #[inline] + pub(crate) fn match_len(&self, stride: usize) -> usize { + if self.matches() { + (self.max_match.as_usize() - self.min_match.as_usize() + stride) + / stride + } else { + 0 + } + } + + /// Returns true if and only if there is at least one match state. + #[inline] + pub(crate) fn matches(&self) -> bool { + self.min_match != DEAD + } + + /// Returns the total number of accel states. + #[cfg(feature = "dfa-build")] + pub(crate) fn accel_len(&self, stride: usize) -> usize { + if self.accels() { + (self.max_accel.as_usize() - self.min_accel.as_usize() + stride) + / stride + } else { + 0 + } + } + + /// Returns true if and only if there is at least one accel state. + #[inline] + pub(crate) fn accels(&self) -> bool { + self.min_accel != DEAD + } + + /// Returns true if and only if there is at least one start state. + #[inline] + pub(crate) fn starts(&self) -> bool { + self.min_start != DEAD + } +} diff --git a/vendor/regex-automata/src/dfa/start.rs b/vendor/regex-automata/src/dfa/start.rs new file mode 100644 index 0000000..fddc702 --- /dev/null +++ b/vendor/regex-automata/src/dfa/start.rs @@ -0,0 +1,74 @@ +use core::mem::size_of; + +use crate::util::wire::{self, DeserializeError, Endian, SerializeError}; + +/// The kind of anchored starting configurations to support in a DFA. +/// +/// Fully compiled DFAs need to be explicitly configured as to which anchored +/// starting configurations to support. The reason for not just supporting +/// everything unconditionally is that it can use more resources (such as +/// memory and build time). The downside of this is that if you try to execute +/// a search using an [`Anchored`](crate::Anchored) mode that is not supported +/// by the DFA, then the search will return an error. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum StartKind { + /// Support both anchored and unanchored searches. + Both, + /// Support only unanchored searches. Requesting an anchored search will + /// panic. + /// + /// Note that even if an unanchored search is requested, the pattern itself + /// may still be anchored. For example, `^abc` will only match `abc` at the + /// start of a haystack. This will remain true, even if the regex engine + /// only supported unanchored searches. + Unanchored, + /// Support only anchored searches. Requesting an unanchored search will + /// panic. + Anchored, +} + +impl StartKind { + pub(crate) fn from_bytes( + slice: &[u8], + ) -> Result<(StartKind, usize), DeserializeError> { + wire::check_slice_len(slice, size_of::(), "start kind bytes")?; + let (n, nr) = wire::try_read_u32(slice, "start kind integer")?; + match n { + 0 => Ok((StartKind::Both, nr)), + 1 => Ok((StartKind::Unanchored, nr)), + 2 => Ok((StartKind::Anchored, nr)), + _ => Err(DeserializeError::generic("unrecognized start kind")), + } + } + + pub(crate) fn write_to( + &self, + dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("start kind")); + } + let n = match *self { + StartKind::Both => 0, + StartKind::Unanchored => 1, + StartKind::Anchored => 2, + }; + E::write_u32(n, dst); + Ok(nwrite) + } + + pub(crate) fn write_to_len(&self) -> usize { + size_of::() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn has_unanchored(&self) -> bool { + matches!(*self, StartKind::Both | StartKind::Unanchored) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn has_anchored(&self) -> bool { + matches!(*self, StartKind::Both | StartKind::Anchored) + } +} diff --git a/vendor/regex-automata/src/hybrid/dfa.rs b/vendor/regex-automata/src/hybrid/dfa.rs new file mode 100644 index 0000000..bd9179b --- /dev/null +++ b/vendor/regex-automata/src/hybrid/dfa.rs @@ -0,0 +1,4418 @@ +/*! +Types and routines specific to lazy DFAs. + +This module is the home of [`hybrid::dfa::DFA`](DFA). + +This module also contains a [`hybrid::dfa::Builder`](Builder) and a +[`hybrid::dfa::Config`](Config) for configuring and building a lazy DFA. +*/ + +use core::{iter, mem::size_of}; + +use alloc::vec::Vec; + +use crate::{ + hybrid::{ + error::{BuildError, CacheError, StartError}, + id::{LazyStateID, LazyStateIDError}, + search, + }, + nfa::thompson, + util::{ + alphabet::{self, ByteClasses, ByteSet}, + determinize::{self, State, StateBuilderEmpty, StateBuilderNFA}, + empty, + prefilter::Prefilter, + primitives::{PatternID, StateID as NFAStateID}, + search::{ + Anchored, HalfMatch, Input, MatchError, MatchKind, PatternSet, + }, + sparse_set::SparseSets, + start::{self, Start, StartByteMap}, + }, +}; + +/// The minimum number of states that a lazy DFA's cache size must support. +/// +/// This is checked at time of construction to ensure that at least some small +/// number of states can fit in the given capacity allotment. If we can't fit +/// at least this number of states, then the thinking is that it's pretty +/// senseless to use the lazy DFA. More to the point, parts of the code do +/// assume that the cache can fit at least some small number of states. +const MIN_STATES: usize = SENTINEL_STATES + 2; + +/// The number of "sentinel" states that get added to every lazy DFA. +/// +/// These are special states indicating status conditions of a search: unknown, +/// dead and quit. These states in particular also use zero NFA states, so +/// their memory usage is quite small. This is relevant for computing the +/// minimum memory needed for a lazy DFA cache. +const SENTINEL_STATES: usize = 3; + +/// A hybrid NFA/DFA (also called a "lazy DFA") for regex searching. +/// +/// A lazy DFA is a DFA that builds itself at search time. It otherwise has +/// very similar characteristics as a [`dense::DFA`](crate::dfa::dense::DFA). +/// Indeed, both support precisely the same regex features with precisely the +/// same semantics. +/// +/// Where as a `dense::DFA` must be completely built to handle any input before +/// it may be used for search, a lazy DFA starts off effectively empty. During +/// a search, a lazy DFA will build itself depending on whether it has already +/// computed the next transition or not. If it has, then it looks a lot like +/// a `dense::DFA` internally: it does a very fast table based access to find +/// the next transition. Otherwise, if the state hasn't been computed, then it +/// does determinization _for that specific transition_ to compute the next DFA +/// state. +/// +/// The main selling point of a lazy DFA is that, in practice, it has +/// the performance profile of a `dense::DFA` without the weakness of it +/// taking worst case exponential time to build. Indeed, for each byte of +/// input, the lazy DFA will construct as most one new DFA state. Thus, a +/// lazy DFA achieves worst case `O(mn)` time for regex search (where `m ~ +/// pattern.len()` and `n ~ haystack.len()`). +/// +/// The main downsides of a lazy DFA are: +/// +/// 1. It requires mutable "cache" space during search. This is where the +/// transition table, among other things, is stored. +/// 2. In pathological cases (e.g., if the cache is too small), it will run +/// out of room and either require a bigger cache capacity or will repeatedly +/// clear the cache and thus repeatedly regenerate DFA states. Overall, this +/// will tend to be slower than a typical NFA simulation. +/// +/// # Capabilities +/// +/// Like a `dense::DFA`, a single lazy DFA fundamentally supports the following +/// operations: +/// +/// 1. Detection of a match. +/// 2. Location of the end of a match. +/// 3. In the case of a lazy DFA with multiple patterns, which pattern matched +/// is reported as well. +/// +/// A notable absence from the above list of capabilities is the location of +/// the *start* of a match. In order to provide both the start and end of +/// a match, *two* lazy DFAs are required. This functionality is provided by a +/// [`Regex`](crate::hybrid::regex::Regex). +/// +/// # Example +/// +/// This shows how to build a lazy DFA with the default configuration and +/// execute a search. Notice how, in contrast to a `dense::DFA`, we must create +/// a cache and pass it to our search routine. +/// +/// ``` +/// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; +/// +/// let dfa = DFA::new("foo[0-9]+")?; +/// let mut cache = dfa.create_cache(); +/// +/// let expected = Some(HalfMatch::must(0, 8)); +/// assert_eq!(expected, dfa.try_search_fwd( +/// &mut cache, &Input::new("foo12345"))?, +/// ); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct DFA { + config: Config, + nfa: thompson::NFA, + stride2: usize, + start_map: StartByteMap, + classes: ByteClasses, + quitset: ByteSet, + cache_capacity: usize, +} + +impl DFA { + /// Parse the given regular expression using a default configuration and + /// return the corresponding lazy DFA. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let dfa = DFA::new("foo[0-9]+bar")?; + /// let mut cache = dfa.create_cache(); + /// + /// let expected = HalfMatch::must(0, 11); + /// assert_eq!( + /// Some(expected), + /// dfa.try_search_fwd(&mut cache, &Input::new("foo12345bar"))?, + /// ); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new(pattern: &str) -> Result { + DFA::builder().build(pattern) + } + + /// Parse the given regular expressions using a default configuration and + /// return the corresponding lazy multi-DFA. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let dfa = DFA::new_many(&["[0-9]+", "[a-z]+"])?; + /// let mut cache = dfa.create_cache(); + /// + /// let expected = HalfMatch::must(1, 3); + /// assert_eq!( + /// Some(expected), + /// dfa.try_search_fwd(&mut cache, &Input::new("foo12345bar"))?, + /// ); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new_many>(patterns: &[P]) -> Result { + DFA::builder().build_many(patterns) + } + + /// Create a new lazy DFA that matches every input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let dfa = DFA::always_match()?; + /// let mut cache = dfa.create_cache(); + /// + /// let expected = HalfMatch::must(0, 0); + /// assert_eq!(Some(expected), dfa.try_search_fwd( + /// &mut cache, &Input::new(""))?, + /// ); + /// assert_eq!(Some(expected), dfa.try_search_fwd( + /// &mut cache, &Input::new("foo"))?, + /// ); + /// # Ok::<(), Box>(()) + /// ``` + pub fn always_match() -> Result { + let nfa = thompson::NFA::always_match(); + Builder::new().build_from_nfa(nfa) + } + + /// Create a new lazy DFA that never matches any input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, Input}; + /// + /// let dfa = DFA::never_match()?; + /// let mut cache = dfa.create_cache(); + /// + /// assert_eq!(None, dfa.try_search_fwd(&mut cache, &Input::new(""))?); + /// assert_eq!(None, dfa.try_search_fwd(&mut cache, &Input::new("foo"))?); + /// # Ok::<(), Box>(()) + /// ``` + pub fn never_match() -> Result { + let nfa = thompson::NFA::never_match(); + Builder::new().build_from_nfa(nfa) + } + + /// Return a default configuration for a `DFA`. + /// + /// This is a convenience routine to avoid needing to import the [`Config`] + /// type when customizing the construction of a lazy DFA. + /// + /// # Example + /// + /// This example shows how to build a lazy DFA that heuristically supports + /// Unicode word boundaries. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, MatchError, Input}; + /// + /// let re = DFA::builder() + /// .configure(DFA::config().unicode_word_boundary(true)) + /// .build(r"\b\w+\b")?; + /// let mut cache = re.create_cache(); + /// + /// // Since our haystack is all ASCII, the DFA search sees then and knows + /// // it is legal to interpret Unicode word boundaries as ASCII word + /// // boundaries. + /// let input = Input::new("!!foo!!"); + /// let expected = HalfMatch::must(0, 5); + /// assert_eq!(Some(expected), re.try_search_fwd(&mut cache, &input)?); + /// + /// // But if our haystack contains non-ASCII, then the search will fail + /// // with an error. + /// let input = Input::new("!!βββ!!"); + /// let expected = MatchError::quit(b'\xCE', 2); + /// assert_eq!(Err(expected), re.try_search_fwd(&mut cache, &input)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn config() -> Config { + Config::new() + } + + /// Return a builder for configuring the construction of a `Regex`. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + /// + /// # Example + /// + /// This example shows how to use the builder to disable UTF-8 mode + /// everywhere for lazy DFAs. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, util::syntax, HalfMatch, Input}; + /// + /// let re = DFA::builder() + /// .syntax(syntax::Config::new().utf8(false)) + /// .build(r"foo(?-u:[^b])ar.*")?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new(b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"); + /// let expected = Some(HalfMatch::must(0, 9)); + /// let got = re.try_search_fwd(&mut cache, &input)?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn builder() -> Builder { + Builder::new() + } + + /// Create a new cache for this lazy DFA. + /// + /// The cache returned should only be used for searches for this + /// lazy DFA. If you want to reuse the cache for another DFA, then + /// you must call [`Cache::reset`] with that DFA (or, equivalently, + /// [`DFA::reset_cache`]). + pub fn create_cache(&self) -> Cache { + Cache::new(self) + } + + /// Reset the given cache such that it can be used for searching with the + /// this lazy DFA (and only this DFA). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different lazy DFA. + /// + /// Resetting a cache sets its "clear count" to 0. This is relevant if the + /// lazy DFA has been configured to "give up" after it has cleared the + /// cache a certain number of times. + /// + /// Any lazy state ID generated by the cache prior to resetting it is + /// invalid after the reset. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different DFA. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let dfa1 = DFA::new(r"\w")?; + /// let dfa2 = DFA::new(r"\W")?; + /// + /// let mut cache = dfa1.create_cache(); + /// assert_eq!( + /// Some(HalfMatch::must(0, 2)), + /// dfa1.try_search_fwd(&mut cache, &Input::new("Δ"))?, + /// ); + /// + /// // Using 'cache' with dfa2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the DFA we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 'dfa1' is also not + /// // allowed. + /// dfa2.reset_cache(&mut cache); + /// assert_eq!( + /// Some(HalfMatch::must(0, 3)), + /// dfa2.try_search_fwd(&mut cache, &Input::new("☃"))?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset_cache(&self, cache: &mut Cache) { + Lazy::new(self, cache).reset_cache() + } + + /// Returns the total number of patterns compiled into this lazy DFA. + /// + /// In the case of a DFA that contains no patterns, this returns `0`. + /// + /// # Example + /// + /// This example shows the pattern length for a DFA that never matches: + /// + /// ``` + /// use regex_automata::hybrid::dfa::DFA; + /// + /// let dfa = DFA::never_match()?; + /// assert_eq!(dfa.pattern_len(), 0); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And another example for a DFA that matches at every position: + /// + /// ``` + /// use regex_automata::hybrid::dfa::DFA; + /// + /// let dfa = DFA::always_match()?; + /// assert_eq!(dfa.pattern_len(), 1); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And finally, a DFA that was constructed from multiple patterns: + /// + /// ``` + /// use regex_automata::hybrid::dfa::DFA; + /// + /// let dfa = DFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?; + /// assert_eq!(dfa.pattern_len(), 3); + /// # Ok::<(), Box>(()) + /// ``` + pub fn pattern_len(&self) -> usize { + self.nfa.pattern_len() + } + + /// Returns the equivalence classes that make up the alphabet for this DFA. + /// + /// Unless [`Config::byte_classes`] was disabled, it is possible that + /// multiple distinct bytes are grouped into the same equivalence class + /// if it is impossible for them to discriminate between a match and a + /// non-match. This has the effect of reducing the overall alphabet size + /// and in turn potentially substantially reducing the size of the DFA's + /// transition table. + /// + /// The downside of using equivalence classes like this is that every state + /// transition will automatically use this map to convert an arbitrary + /// byte to its corresponding equivalence class. In practice this has a + /// negligible impact on performance. + pub fn byte_classes(&self) -> &ByteClasses { + &self.classes + } + + /// Returns this lazy DFA's configuration. + pub fn get_config(&self) -> &Config { + &self.config + } + + /// Returns a reference to the underlying NFA. + pub fn get_nfa(&self) -> &thompson::NFA { + &self.nfa + } + + /// Returns the stride, as a base-2 exponent, required for these + /// equivalence classes. + /// + /// The stride is always the smallest power of 2 that is greater than or + /// equal to the alphabet length. This is done so that converting between + /// state IDs and indices can be done with shifts alone, which is much + /// faster than integer division. + fn stride2(&self) -> usize { + self.stride2 + } + + /// Returns the total stride for every state in this lazy DFA. This + /// corresponds to the total number of transitions used by each state in + /// this DFA's transition table. + fn stride(&self) -> usize { + 1 << self.stride2() + } + + /// Returns the memory usage, in bytes, of this lazy DFA. + /// + /// This does **not** include the stack size used up by this lazy DFA. To + /// compute that, use `std::mem::size_of::()`. This also does not + /// include the size of the `Cache` used. + /// + /// This also does not include any heap memory used by the NFA inside of + /// this hybrid NFA/DFA. This is because the NFA's ownership is shared, and + /// thus not owned by this hybrid NFA/DFA. More practically, several regex + /// engines in this crate embed an NFA, and reporting the NFA's memory + /// usage in all of them would likely result in reporting higher heap + /// memory than is actually used. + pub fn memory_usage(&self) -> usize { + // The only thing that uses heap memory in a DFA is the NFA. But the + // NFA has shared ownership, so reporting its memory as part of the + // hybrid DFA is likely to lead to double-counting the NFA memory + // somehow. In particular, this DFA does not really own an NFA, so + // including it in the DFA's memory usage doesn't seem semantically + // correct. + 0 + } +} + +impl DFA { + /// Executes a forward search and returns the end position of the leftmost + /// match that is found. If no match exists, then `None` is returned. + /// + /// In particular, this method continues searching even after it enters + /// a match state. The search only terminates once it has reached the + /// end of the input or when it has entered a dead or quit state. Upon + /// termination, the position of the last byte seen while still in a match + /// state is returned. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to run a basic search. + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let dfa = DFA::new("foo[0-9]+")?; + /// let mut cache = dfa.create_cache(); + /// let expected = HalfMatch::must(0, 8); + /// assert_eq!(Some(expected), dfa.try_search_fwd( + /// &mut cache, &Input::new("foo12345"))?, + /// ); + /// + /// // Even though a match is found after reading the first byte (`a`), + /// // the leftmost first match semantics demand that we find the earliest + /// // match that prefers earlier parts of the pattern over later parts. + /// let dfa = DFA::new("abc|a")?; + /// let mut cache = dfa.create_cache(); + /// let expected = HalfMatch::must(0, 3); + /// assert_eq!(Some(expected), dfa.try_search_fwd( + /// &mut cache, &Input::new("abc"))?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specific pattern search + /// + /// This example shows how to build a lazy multi-DFA that permits searching + /// for specific patterns. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// Anchored, HalfMatch, PatternID, Input, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().starts_for_each_pattern(true)) + /// .build_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "foo123"; + /// + /// // Since we are using the default leftmost-first match and both + /// // patterns match at the same starting position, only the first pattern + /// // will be returned in this case when doing a search for any of the + /// // patterns. + /// let expected = Some(HalfMatch::must(0, 6)); + /// let got = dfa.try_search_fwd(&mut cache, &Input::new(haystack))?; + /// assert_eq!(expected, got); + /// + /// // But if we want to check whether some other pattern matches, then we + /// // can provide its pattern ID. + /// let expected = Some(HalfMatch::must(1, 6)); + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(1))); + /// let got = dfa.try_search_fwd(&mut cache, &input)?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specifying the bounds of a search + /// + /// This example shows how providing the bounds of a search can produce + /// different results than simply sub-slicing the haystack. + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// // N.B. We disable Unicode here so that we use a simple ASCII word + /// // boundary. Alternatively, we could enable heuristic support for + /// // Unicode word boundaries since our haystack is pure ASCII. + /// let dfa = DFA::new(r"(?-u)\b[0-9]{3}\b")?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "foo123bar"; + /// + /// // Since we sub-slice the haystack, the search doesn't know about the + /// // larger context and assumes that `123` is surrounded by word + /// // boundaries. And of course, the match position is reported relative + /// // to the sub-slice as well, which means we get `3` instead of `6`. + /// let expected = Some(HalfMatch::must(0, 3)); + /// let got = dfa.try_search_fwd( + /// &mut cache, + /// &Input::new(&haystack[3..6]), + /// )?; + /// assert_eq!(expected, got); + /// + /// // But if we provide the bounds of the search within the context of the + /// // entire haystack, then the search can take the surrounding context + /// // into account. (And if we did find a match, it would be reported + /// // as a valid offset into `haystack` instead of its sub-slice.) + /// let expected = None; + /// let got = dfa.try_search_fwd( + /// &mut cache, + /// &Input::new(haystack).range(3..6), + /// )?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search_fwd( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + let hm = match search::find_fwd(self, cache, input)? { + None => return Ok(None), + Some(hm) if !utf8empty => return Ok(Some(hm)), + Some(hm) => hm, + }; + // We get to this point when we know our DFA can match the empty string + // AND when UTF-8 mode is enabled. In this case, we skip any matches + // whose offset splits a codepoint. Such a match is necessarily a + // zero-width match, because UTF-8 mode requires the underlying NFA + // to be built such that all non-empty matches span valid UTF-8. + // Therefore, any match that ends in the middle of a codepoint cannot + // be part of a span of valid UTF-8 and thus must be an empty match. + // In such cases, we skip it, so as not to report matches that split a + // codepoint. + // + // Note that this is not a checked assumption. Callers *can* provide an + // NFA with UTF-8 mode enabled but produces non-empty matches that span + // invalid UTF-8. But doing so is documented to result in unspecified + // behavior. + empty::skip_splits_fwd(input, hm, hm.offset(), |input| { + let got = search::find_fwd(self, cache, input)?; + Ok(got.map(|hm| (hm, hm.offset()))) + }) + } + + /// Executes a reverse search and returns the start of the position of the + /// leftmost match that is found. If no match exists, then `None` is + /// returned. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This routine is principally useful when used in + /// conjunction with the + /// [`nfa::thompson::Config::reverse`](crate::nfa::thompson::Config::reverse) + /// configuration. In general, it's unlikely to be correct to use both + /// `try_search_fwd` and `try_search_rev` with the same DFA since any + /// particular DFA will only support searching in one direction with + /// respect to the pattern. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson, + /// hybrid::dfa::DFA, + /// HalfMatch, Input, + /// }; + /// + /// let dfa = DFA::builder() + /// .thompson(thompson::Config::new().reverse(true)) + /// .build("foo[0-9]+")?; + /// let mut cache = dfa.create_cache(); + /// let expected = HalfMatch::must(0, 0); + /// assert_eq!( + /// Some(expected), + /// dfa.try_search_rev(&mut cache, &Input::new("foo12345"))?, + /// ); + /// + /// // Even though a match is found after reading the last byte (`c`), + /// // the leftmost first match semantics demand that we find the earliest + /// // match that prefers earlier parts of the pattern over latter parts. + /// let dfa = DFA::builder() + /// .thompson(thompson::Config::new().reverse(true)) + /// .build("abc|c")?; + /// let mut cache = dfa.create_cache(); + /// let expected = HalfMatch::must(0, 0); + /// assert_eq!(Some(expected), dfa.try_search_rev( + /// &mut cache, &Input::new("abc"))?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: UTF-8 mode + /// + /// This examples demonstrates that UTF-8 mode applies to reverse + /// DFAs. When UTF-8 mode is enabled in the underlying NFA, then all + /// matches reported must correspond to valid UTF-8 spans. This includes + /// prohibiting zero-width matches that split a codepoint. + /// + /// UTF-8 mode is enabled by default. Notice below how the only zero-width + /// matches reported are those at UTF-8 boundaries: + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .thompson(thompson::Config::new().reverse(true)) + /// .build(r"")?; + /// let mut cache = dfa.create_cache(); + /// + /// // Run the reverse DFA to collect all matches. + /// let mut input = Input::new("☃"); + /// let mut matches = vec![]; + /// loop { + /// match dfa.try_search_rev(&mut cache, &input)? { + /// None => break, + /// Some(hm) => { + /// matches.push(hm); + /// if hm.offset() == 0 || input.end() == 0 { + /// break; + /// } else if hm.offset() < input.end() { + /// input.set_end(hm.offset()); + /// } else { + /// // This is only necessary to handle zero-width + /// // matches, which of course occur in this example. + /// // Without this, the search would never advance + /// // backwards beyond the initial match. + /// input.set_end(input.end() - 1); + /// } + /// } + /// } + /// } + /// + /// // No matches split a codepoint. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Now let's look at the same example, but with UTF-8 mode on the + /// underlying NFA disabled: + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .thompson(thompson::Config::new().reverse(true).utf8(false)) + /// .build(r"")?; + /// let mut cache = dfa.create_cache(); + /// + /// // Run the reverse DFA to collect all matches. + /// let mut input = Input::new("☃"); + /// let mut matches = vec![]; + /// loop { + /// match dfa.try_search_rev(&mut cache, &input)? { + /// None => break, + /// Some(hm) => { + /// matches.push(hm); + /// if hm.offset() == 0 || input.end() == 0 { + /// break; + /// } else if hm.offset() < input.end() { + /// input.set_end(hm.offset()); + /// } else { + /// // This is only necessary to handle zero-width + /// // matches, which of course occur in this example. + /// // Without this, the search would never advance + /// // backwards beyond the initial match. + /// input.set_end(input.end() - 1); + /// } + /// } + /// } + /// } + /// + /// // No matches split a codepoint. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(0, 2), + /// HalfMatch::must(0, 1), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search_rev( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + let hm = match search::find_rev(self, cache, input)? { + None => return Ok(None), + Some(hm) if !utf8empty => return Ok(Some(hm)), + Some(hm) => hm, + }; + empty::skip_splits_rev(input, hm, hm.offset(), |input| { + let got = search::find_rev(self, cache, input)?; + Ok(got.map(|hm| (hm, hm.offset()))) + }) + } + + /// Executes an overlapping forward search and returns the end position of + /// matches as they are found. If no match exists, then `None` is returned. + /// + /// This routine is principally only useful when searching for multiple + /// patterns on inputs where multiple patterns may match the same regions + /// of text. In particular, callers must preserve the automaton's search + /// state from prior calls so that the implementation knows where the last + /// match occurred. + /// + /// When using this routine to implement an iterator of overlapping + /// matches, the `start` of the search should remain invariant throughout + /// iteration. The `OverlappingState` given to the search will keep track + /// of the current position of the search. (This is because multiple + /// matches may be reported at the same position, so only the search + /// implementation itself knows when to advance the position.) + /// + /// If for some reason you want the search to forget about its previous + /// state and restart the search at a particular position, then setting the + /// state to [`OverlappingState::start`] will accomplish that. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to run a basic overlapping search. Notice + /// that we build the automaton with a `MatchKind::All` configuration. + /// Overlapping searches are unlikely to work as one would expect when + /// using the default `MatchKind::LeftmostFirst` match semantics, since + /// leftmost-first matching is fundamentally incompatible with overlapping + /// searches. Namely, overlapping searches need to report matches as they + /// are seen, where as leftmost-first searches will continue searching even + /// after a match has been observed in order to find the conventional end + /// position of the match. More concretely, leftmost-first searches use + /// dead states to terminate a search after a specific match can no longer + /// be extended. Overlapping searches instead do the opposite by continuing + /// the search to find totally new matches (potentially of other patterns). + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// hybrid::dfa::{DFA, OverlappingState}, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .build_many(&[r"\w+$", r"\S+$"])?; + /// let mut cache = dfa.create_cache(); + /// + /// let haystack = "@foo"; + /// let mut state = OverlappingState::start(); + /// + /// let expected = Some(HalfMatch::must(1, 4)); + /// dfa.try_search_overlapping_fwd( + /// &mut cache, &Input::new(haystack), &mut state, + /// )?; + /// assert_eq!(expected, state.get_match()); + /// + /// // The first pattern also matches at the same position, so re-running + /// // the search will yield another match. Notice also that the first + /// // pattern is returned after the second. This is because the second + /// // pattern begins its match before the first, is therefore an earlier + /// // match and is thus reported first. + /// let expected = Some(HalfMatch::must(0, 4)); + /// dfa.try_search_overlapping_fwd( + /// &mut cache, &Input::new(haystack), &mut state, + /// )?; + /// assert_eq!(expected, state.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search_overlapping_fwd( + &self, + cache: &mut Cache, + input: &Input<'_>, + state: &mut OverlappingState, + ) -> Result<(), MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + search::find_overlapping_fwd(self, cache, input, state)?; + match state.get_match() { + None => Ok(()), + Some(_) if !utf8empty => Ok(()), + Some(_) => skip_empty_utf8_splits_overlapping( + input, + state, + |input, state| { + search::find_overlapping_fwd(self, cache, input, state) + }, + ), + } + } + + /// Executes a reverse overlapping search and returns the start of the + /// position of the leftmost match that is found. If no match exists, then + /// `None` is returned. + /// + /// When using this routine to implement an iterator of overlapping + /// matches, the `start` of the search should remain invariant throughout + /// iteration. The `OverlappingState` given to the search will keep track + /// of the current position of the search. (This is because multiple + /// matches may be reported at the same position, so only the search + /// implementation itself knows when to advance the position.) + /// + /// If for some reason you want the search to forget about its previous + /// state and restart the search at a particular position, then setting the + /// state to [`OverlappingState::start`] will accomplish that. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example: UTF-8 mode + /// + /// This examples demonstrates that UTF-8 mode applies to reverse + /// DFAs. When UTF-8 mode is enabled in the underlying NFA, then all + /// matches reported must correspond to valid UTF-8 spans. This includes + /// prohibiting zero-width matches that split a codepoint. + /// + /// UTF-8 mode is enabled by default. Notice below how the only zero-width + /// matches reported are those at UTF-8 boundaries: + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::{DFA, OverlappingState}, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .thompson(thompson::Config::new().reverse(true)) + /// .build_many(&[r"", r"☃"])?; + /// let mut cache = dfa.create_cache(); + /// + /// // Run the reverse DFA to collect all matches. + /// let input = Input::new("☃"); + /// let mut state = OverlappingState::start(); + /// let mut matches = vec![]; + /// loop { + /// dfa.try_search_overlapping_rev(&mut cache, &input, &mut state)?; + /// match state.get_match() { + /// None => break, + /// Some(hm) => matches.push(hm), + /// } + /// } + /// + /// // No matches split a codepoint. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(1, 0), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Now let's look at the same example, but with UTF-8 mode on the + /// underlying NFA disabled: + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::{DFA, OverlappingState}, + /// nfa::thompson, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .thompson(thompson::Config::new().reverse(true).utf8(false)) + /// .build_many(&[r"", r"☃"])?; + /// let mut cache = dfa.create_cache(); + /// + /// // Run the reverse DFA to collect all matches. + /// let input = Input::new("☃"); + /// let mut state = OverlappingState::start(); + /// let mut matches = vec![]; + /// loop { + /// dfa.try_search_overlapping_rev(&mut cache, &input, &mut state)?; + /// match state.get_match() { + /// None => break, + /// Some(hm) => matches.push(hm), + /// } + /// } + /// + /// // Now *all* positions match, even within a codepoint, + /// // because we lifted the requirement that matches + /// // correspond to valid UTF-8 spans. + /// let expected = vec![ + /// HalfMatch::must(0, 3), + /// HalfMatch::must(0, 2), + /// HalfMatch::must(0, 1), + /// HalfMatch::must(1, 0), + /// HalfMatch::must(0, 0), + /// ]; + /// assert_eq!(expected, matches); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search_overlapping_rev( + &self, + cache: &mut Cache, + input: &Input<'_>, + state: &mut OverlappingState, + ) -> Result<(), MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + search::find_overlapping_rev(self, cache, input, state)?; + match state.get_match() { + None => Ok(()), + Some(_) if !utf8empty => Ok(()), + Some(_) => skip_empty_utf8_splits_overlapping( + input, + state, + |input, state| { + search::find_overlapping_rev(self, cache, input, state) + }, + ), + } + } + + /// Writes the set of patterns that match anywhere in the given search + /// configuration to `patset`. If multiple patterns match at the same + /// position and the underlying DFA supports overlapping matches, then all + /// matching patterns are written to the given set. + /// + /// Unless all of the patterns in this DFA are anchored, then generally + /// speaking, this will visit every byte in the haystack. + /// + /// This search routine *does not* clear the pattern set. This gives some + /// flexibility to the caller (e.g., running multiple searches with the + /// same pattern set), but does make the API bug-prone if you're reusing + /// the same pattern set for multiple searches but intended them to be + /// independent. + /// + /// If a pattern ID matched but the given `PatternSet` does not have + /// sufficient capacity to store it, then it is not inserted and silently + /// dropped. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to find all matching patterns in a haystack, + /// even when some patterns match at the same position as other patterns. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// Input, MatchKind, PatternSet, + /// }; + /// + /// let patterns = &[ + /// r"\w+", r"\d+", r"\pL+", r"foo", r"bar", r"barfoo", r"foobar", + /// ]; + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .build_many(patterns)?; + /// let mut cache = dfa.create_cache(); + /// + /// let input = Input::new("foobar"); + /// let mut patset = PatternSet::new(dfa.pattern_len()); + /// dfa.try_which_overlapping_matches(&mut cache, &input, &mut patset)?; + /// let expected = vec![0, 2, 3, 4, 6]; + /// let got: Vec = patset.iter().map(|p| p.as_usize()).collect(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) -> Result<(), MatchError> { + let mut state = OverlappingState::start(); + while let Some(m) = { + self.try_search_overlapping_fwd(cache, input, &mut state)?; + state.get_match() + } { + let _ = patset.try_insert(m.pattern()); + // There's nothing left to find, so we can stop. Or the caller + // asked us to. + if patset.is_full() || input.get_earliest() { + break; + } + } + Ok(()) + } +} + +impl DFA { + /// Transitions from the current state to the next state, given the next + /// byte of input. + /// + /// The given cache is used to either reuse pre-computed state + /// transitions, or to store this newly computed transition for future + /// reuse. Thus, this routine guarantees that it will never return a state + /// ID that has an "unknown" tag. + /// + /// # State identifier validity + /// + /// The only valid value for `current` is the lazy state ID returned + /// by the most recent call to `next_state`, `next_state_untagged`, + /// `next_state_untagged_unchecked`, `start_state_forward` or + /// `state_state_reverse` for the given `cache`. Any state ID returned from + /// prior calls to these routines (with the same `cache`) is considered + /// invalid (even if it gives an appearance of working). State IDs returned + /// from _any_ prior call for different `cache` values are also always + /// invalid. + /// + /// The returned ID is always a valid ID when `current` refers to a valid + /// ID. Moreover, this routine is defined for all possible values of + /// `input`. + /// + /// These validity rules are not checked, even in debug mode. Callers are + /// required to uphold these rules themselves. + /// + /// Violating these state ID validity rules will not sacrifice memory + /// safety, but _may_ produce an incorrect result or a panic. + /// + /// # Panics + /// + /// If the given ID does not refer to a valid state, then this routine + /// may panic but it also may not panic and instead return an invalid or + /// incorrect ID. + /// + /// # Example + /// + /// This shows a simplistic example for walking a lazy DFA for a given + /// haystack by using the `next_state` method. + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, Input}; + /// + /// let dfa = DFA::new(r"[a-z]+r")?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "bar".as_bytes(); + /// + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. + /// let mut sid = dfa.start_state_forward( + /// &mut cache, &Input::new(haystack), + /// )?; + /// // Walk all the bytes in the haystack. + /// for &b in haystack { + /// sid = dfa.next_state(&mut cache, sid, b)?; + /// } + /// // Matches are always delayed by 1 byte, so we must explicitly walk the + /// // special "EOI" transition at the end of the search. + /// sid = dfa.next_eoi_state(&mut cache, sid)?; + /// assert!(sid.is_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn next_state( + &self, + cache: &mut Cache, + current: LazyStateID, + input: u8, + ) -> Result { + let class = usize::from(self.classes.get(input)); + let offset = current.as_usize_untagged() + class; + let sid = cache.trans[offset]; + if !sid.is_unknown() { + return Ok(sid); + } + let unit = alphabet::Unit::u8(input); + Lazy::new(self, cache).cache_next_state(current, unit) + } + + /// Transitions from the current state to the next state, given the next + /// byte of input and a state ID that is not tagged. + /// + /// The only reason to use this routine is performance. In particular, the + /// `next_state` method needs to do some additional checks, among them is + /// to account for identifiers to states that are not yet computed. In + /// such a case, the transition is computed on the fly. However, if it is + /// known that the `current` state ID is untagged, then these checks can be + /// omitted. + /// + /// Since this routine does not compute states on the fly, it does not + /// modify the cache and thus cannot return an error. Consequently, `cache` + /// does not need to be mutable and it is possible for this routine to + /// return a state ID corresponding to the special "unknown" state. In + /// this case, it is the caller's responsibility to use the prior state + /// ID and `input` with `next_state` in order to force the computation of + /// the unknown transition. Otherwise, trying to use the "unknown" state + /// ID will just result in transitioning back to itself, and thus never + /// terminating. (This is technically a special exemption to the state ID + /// validity rules, but is permissible since this routine is guarateed to + /// never mutate the given `cache`, and thus the identifier is guaranteed + /// to remain valid.) + /// + /// See [`LazyStateID`] for more details on what it means for a state ID + /// to be tagged. Also, see + /// [`next_state_untagged_unchecked`](DFA::next_state_untagged_unchecked) + /// for this same idea, but with bounds checks forcefully elided. + /// + /// # State identifier validity + /// + /// The only valid value for `current` is an **untagged** lazy + /// state ID returned by the most recent call to `next_state`, + /// `next_state_untagged`, `next_state_untagged_unchecked`, + /// `start_state_forward` or `state_state_reverse` for the given `cache`. + /// Any state ID returned from prior calls to these routines (with the + /// same `cache`) is considered invalid (even if it gives an appearance + /// of working). State IDs returned from _any_ prior call for different + /// `cache` values are also always invalid. + /// + /// The returned ID is always a valid ID when `current` refers to a valid + /// ID, although it may be tagged. Moreover, this routine is defined for + /// all possible values of `input`. + /// + /// Not all validity rules are checked, even in debug mode. Callers are + /// required to uphold these rules themselves. + /// + /// Violating these state ID validity rules will not sacrifice memory + /// safety, but _may_ produce an incorrect result or a panic. + /// + /// # Panics + /// + /// If the given ID does not refer to a valid state, then this routine + /// may panic but it also may not panic and instead return an invalid or + /// incorrect ID. + /// + /// # Example + /// + /// This shows a simplistic example for walking a lazy DFA for a given + /// haystack by using the `next_state_untagged` method where possible. + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, Input}; + /// + /// let dfa = DFA::new(r"[a-z]+r")?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "bar".as_bytes(); + /// + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. + /// let mut sid = dfa.start_state_forward( + /// &mut cache, &Input::new(haystack), + /// )?; + /// // Walk all the bytes in the haystack. + /// let mut at = 0; + /// while at < haystack.len() { + /// if sid.is_tagged() { + /// sid = dfa.next_state(&mut cache, sid, haystack[at])?; + /// } else { + /// let mut prev_sid = sid; + /// // We attempt to chew through as much as we can while moving + /// // through untagged state IDs. Thus, the transition function + /// // does less work on average per byte. (Unrolling this loop + /// // may help even more.) + /// while at < haystack.len() { + /// prev_sid = sid; + /// sid = dfa.next_state_untagged( + /// &mut cache, sid, haystack[at], + /// ); + /// at += 1; + /// if sid.is_tagged() { + /// break; + /// } + /// } + /// // We must ensure that we never proceed to the next iteration + /// // with an unknown state ID. If we don't account for this + /// // case, then search isn't guaranteed to terminate since all + /// // transitions on unknown states loop back to itself. + /// if sid.is_unknown() { + /// sid = dfa.next_state( + /// &mut cache, prev_sid, haystack[at - 1], + /// )?; + /// } + /// } + /// } + /// // Matches are always delayed by 1 byte, so we must explicitly walk the + /// // special "EOI" transition at the end of the search. + /// sid = dfa.next_eoi_state(&mut cache, sid)?; + /// assert!(sid.is_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn next_state_untagged( + &self, + cache: &Cache, + current: LazyStateID, + input: u8, + ) -> LazyStateID { + debug_assert!(!current.is_tagged()); + let class = usize::from(self.classes.get(input)); + let offset = current.as_usize_unchecked() + class; + cache.trans[offset] + } + + /// Transitions from the current state to the next state, eliding bounds + /// checks, given the next byte of input and a state ID that is not tagged. + /// + /// The only reason to use this routine is performance. In particular, the + /// `next_state` method needs to do some additional checks, among them is + /// to account for identifiers to states that are not yet computed. In + /// such a case, the transition is computed on the fly. However, if it is + /// known that the `current` state ID is untagged, then these checks can be + /// omitted. + /// + /// Since this routine does not compute states on the fly, it does not + /// modify the cache and thus cannot return an error. Consequently, `cache` + /// does not need to be mutable and it is possible for this routine to + /// return a state ID corresponding to the special "unknown" state. In + /// this case, it is the caller's responsibility to use the prior state + /// ID and `input` with `next_state` in order to force the computation of + /// the unknown transition. Otherwise, trying to use the "unknown" state + /// ID will just result in transitioning back to itself, and thus never + /// terminating. (This is technically a special exemption to the state ID + /// validity rules, but is permissible since this routine is guarateed to + /// never mutate the given `cache`, and thus the identifier is guaranteed + /// to remain valid.) + /// + /// See [`LazyStateID`] for more details on what it means for a state ID + /// to be tagged. Also, see + /// [`next_state_untagged`](DFA::next_state_untagged) + /// for this same idea, but with memory safety guaranteed by retaining + /// bounds checks. + /// + /// # State identifier validity + /// + /// The only valid value for `current` is an **untagged** lazy + /// state ID returned by the most recent call to `next_state`, + /// `next_state_untagged`, `next_state_untagged_unchecked`, + /// `start_state_forward` or `state_state_reverse` for the given `cache`. + /// Any state ID returned from prior calls to these routines (with the + /// same `cache`) is considered invalid (even if it gives an appearance + /// of working). State IDs returned from _any_ prior call for different + /// `cache` values are also always invalid. + /// + /// The returned ID is always a valid ID when `current` refers to a valid + /// ID, although it may be tagged. Moreover, this routine is defined for + /// all possible values of `input`. + /// + /// Not all validity rules are checked, even in debug mode. Callers are + /// required to uphold these rules themselves. + /// + /// Violating these state ID validity rules will not sacrifice memory + /// safety, but _may_ produce an incorrect result or a panic. + /// + /// # Safety + /// + /// Callers of this method must guarantee that `current` refers to a valid + /// state ID according to the rules described above. If `current` is not a + /// valid state ID for this automaton, then calling this routine may result + /// in undefined behavior. + /// + /// If `current` is valid, then the ID returned is valid for all possible + /// values of `input`. + #[inline] + pub unsafe fn next_state_untagged_unchecked( + &self, + cache: &Cache, + current: LazyStateID, + input: u8, + ) -> LazyStateID { + debug_assert!(!current.is_tagged()); + let class = usize::from(self.classes.get(input)); + let offset = current.as_usize_unchecked() + class; + *cache.trans.get_unchecked(offset) + } + + /// Transitions from the current state to the next state for the special + /// EOI symbol. + /// + /// The given cache is used to either reuse pre-computed state + /// transitions, or to store this newly computed transition for future + /// reuse. Thus, this routine guarantees that it will never return a state + /// ID that has an "unknown" tag. + /// + /// This routine must be called at the end of every search in a correct + /// implementation of search. Namely, lazy DFAs in this crate delay matches + /// by one byte in order to support look-around operators. Thus, after + /// reaching the end of a haystack, a search implementation must follow one + /// last EOI transition. + /// + /// It is best to think of EOI as an additional symbol in the alphabet of a + /// DFA that is distinct from every other symbol. That is, the alphabet of + /// lazy DFAs in this crate has a logical size of 257 instead of 256, where + /// 256 corresponds to every possible inhabitant of `u8`. (In practice, the + /// physical alphabet size may be smaller because of alphabet compression + /// via equivalence classes, but EOI is always represented somehow in the + /// alphabet.) + /// + /// # State identifier validity + /// + /// The only valid value for `current` is the lazy state ID returned + /// by the most recent call to `next_state`, `next_state_untagged`, + /// `next_state_untagged_unchecked`, `start_state_forward` or + /// `state_state_reverse` for the given `cache`. Any state ID returned from + /// prior calls to these routines (with the same `cache`) is considered + /// invalid (even if it gives an appearance of working). State IDs returned + /// from _any_ prior call for different `cache` values are also always + /// invalid. + /// + /// The returned ID is always a valid ID when `current` refers to a valid + /// ID. + /// + /// These validity rules are not checked, even in debug mode. Callers are + /// required to uphold these rules themselves. + /// + /// Violating these state ID validity rules will not sacrifice memory + /// safety, but _may_ produce an incorrect result or a panic. + /// + /// # Panics + /// + /// If the given ID does not refer to a valid state, then this routine + /// may panic but it also may not panic and instead return an invalid or + /// incorrect ID. + /// + /// # Example + /// + /// This shows a simplistic example for walking a DFA for a given haystack, + /// and then finishing the search with the final EOI transition. + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, Input}; + /// + /// let dfa = DFA::new(r"[a-z]+r")?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "bar".as_bytes(); + /// + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. + /// let mut sid = dfa.start_state_forward( + /// &mut cache, &Input::new(haystack), + /// )?; + /// // Walk all the bytes in the haystack. + /// for &b in haystack { + /// sid = dfa.next_state(&mut cache, sid, b)?; + /// } + /// // Matches are always delayed by 1 byte, so we must explicitly walk + /// // the special "EOI" transition at the end of the search. Without this + /// // final transition, the assert below will fail since the DFA will not + /// // have entered a match state yet! + /// sid = dfa.next_eoi_state(&mut cache, sid)?; + /// assert!(sid.is_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn next_eoi_state( + &self, + cache: &mut Cache, + current: LazyStateID, + ) -> Result { + let eoi = self.classes.eoi().as_usize(); + let offset = current.as_usize_untagged() + eoi; + let sid = cache.trans[offset]; + if !sid.is_unknown() { + return Ok(sid); + } + let unit = self.classes.eoi(); + Lazy::new(self, cache).cache_next_state(current, unit) + } + + /// Return the ID of the start state for this lazy DFA for the given + /// starting configuration. + /// + /// Unlike typical DFA implementations, the start state for DFAs in this + /// crate is dependent on a few different factors: + /// + /// * The [`Anchored`] mode of the search. Unanchored, anchored and + /// anchored searches for a specific [`PatternID`] all use different start + /// states. + /// * Whether a "look-behind" byte exists. For example, the `^` anchor + /// matches if and only if there is no look-behind byte. + /// * The specific value of that look-behind byte. For example, a `(?m:^)` + /// assertion only matches when there is either no look-behind byte, or + /// when the look-behind byte is a line terminator. + /// + /// The [starting configuration](start::Config) provides the above + /// information. + /// + /// This routine can be used for either forward or reverse searches. + /// Although, as a convenience, if you have an [`Input`], then it + /// may be more succinct to use [`DFA::start_state_forward`] or + /// [`DFA::start_state_reverse`]. Note, for example, that the convenience + /// routines return a [`MatchError`] on failure where as this routine + /// returns a [`StartError`]. + /// + /// # Errors + /// + /// This may return a [`StartError`] if the search needs to give up when + /// determining the start state (for example, if it sees a "quit" byte + /// or if the cache has become inefficient). This can also return an + /// error if the given configuration contains an unsupported [`Anchored`] + /// configuration. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub fn start_state( + &self, + cache: &mut Cache, + config: &start::Config, + ) -> Result { + let lazy = LazyRef::new(self, cache); + let anchored = config.get_anchored(); + let start = match config.get_look_behind() { + None => Start::Text, + Some(byte) => { + if !self.quitset.is_empty() && self.quitset.contains(byte) { + return Err(StartError::quit(byte)); + } + self.start_map.get(byte) + } + }; + let start_id = lazy.get_cached_start_id(anchored, start)?; + if !start_id.is_unknown() { + return Ok(start_id); + } + Lazy::new(self, cache).cache_start_group(anchored, start) + } + + /// Return the ID of the start state for this lazy DFA when executing a + /// forward search. + /// + /// This is a convenience routine for calling [`DFA::start_state`] that + /// converts the given [`Input`] to a [start configuration](start::Config). + /// Additionally, if an error occurs, it is converted from a [`StartError`] + /// to a [`MatchError`] using the offset information in the given + /// [`Input`]. + /// + /// # Errors + /// + /// This may return a [`MatchError`] if the search needs to give up when + /// determining the start state (for example, if it sees a "quit" byte or + /// if the cache has become inefficient). This can also return an error if + /// the given `Input` contains an unsupported [`Anchored`] configuration. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub fn start_state_forward( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result { + let config = start::Config::from_input_forward(input); + self.start_state(cache, &config).map_err(|err| match err { + StartError::Cache { .. } => MatchError::gave_up(input.start()), + StartError::Quit { byte } => { + let offset = input + .start() + .checked_sub(1) + .expect("no quit in start without look-behind"); + MatchError::quit(byte, offset) + } + StartError::UnsupportedAnchored { mode } => { + MatchError::unsupported_anchored(mode) + } + }) + } + + /// Return the ID of the start state for this lazy DFA when executing a + /// reverse search. + /// + /// This is a convenience routine for calling [`DFA::start_state`] that + /// converts the given [`Input`] to a [start configuration](start::Config). + /// Additionally, if an error occurs, it is converted from a [`StartError`] + /// to a [`MatchError`] using the offset information in the given + /// [`Input`]. + /// + /// # Errors + /// + /// This may return a [`MatchError`] if the search needs to give up when + /// determining the start state (for example, if it sees a "quit" byte or + /// if the cache has become inefficient). This can also return an error if + /// the given `Input` contains an unsupported [`Anchored`] configuration. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub fn start_state_reverse( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result { + let config = start::Config::from_input_reverse(input); + self.start_state(cache, &config).map_err(|err| match err { + StartError::Cache { .. } => MatchError::gave_up(input.end()), + StartError::Quit { byte } => { + let offset = input.end(); + MatchError::quit(byte, offset) + } + StartError::UnsupportedAnchored { mode } => { + MatchError::unsupported_anchored(mode) + } + }) + } + + /// Returns the total number of patterns that match in this state. + /// + /// If the lazy DFA was compiled with one pattern, then this must + /// necessarily always return `1` for all match states. + /// + /// A lazy DFA guarantees that [`DFA::match_pattern`] can be called with + /// indices up to (but not including) the length returned by this routine + /// without panicking. + /// + /// # Panics + /// + /// If the given state is not a match state, then this may either panic + /// or return an incorrect result. + /// + /// # Example + /// + /// This example shows a simple instance of implementing overlapping + /// matches. In particular, it shows not only how to determine how many + /// patterns have matched in a particular state, but also how to access + /// which specific patterns have matched. + /// + /// Notice that we must use [`MatchKind::All`] when building the DFA. If we + /// used [`MatchKind::LeftmostFirst`] instead, then the DFA would not be + /// constructed in a way that supports overlapping matches. (It would only + /// report a single pattern that matches at any particular point in time.) + /// + /// Another thing to take note of is the patterns used and the order in + /// which the pattern IDs are reported. In the example below, pattern `3` + /// is yielded first. Why? Because it corresponds to the match that + /// appears first. Namely, the `@` symbol is part of `\S+` but not part + /// of any of the other patterns. Since the `\S+` pattern has a match that + /// starts to the left of any other pattern, its ID is returned before any + /// other. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, Input, MatchKind}; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .build_many(&[ + /// r"\w+", r"[a-z]+", r"[A-Z]+", r"\S+", + /// ])?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "@bar".as_bytes(); + /// + /// // The start state is determined by inspecting the position and the + /// // initial bytes of the haystack. + /// let mut sid = dfa.start_state_forward( + /// &mut cache, &Input::new(haystack), + /// )?; + /// // Walk all the bytes in the haystack. + /// for &b in haystack { + /// sid = dfa.next_state(&mut cache, sid, b)?; + /// } + /// sid = dfa.next_eoi_state(&mut cache, sid)?; + /// + /// assert!(sid.is_match()); + /// assert_eq!(dfa.match_len(&mut cache, sid), 3); + /// // The following calls are guaranteed to not panic since `match_len` + /// // returned `3` above. + /// assert_eq!(dfa.match_pattern(&mut cache, sid, 0).as_usize(), 3); + /// assert_eq!(dfa.match_pattern(&mut cache, sid, 1).as_usize(), 0); + /// assert_eq!(dfa.match_pattern(&mut cache, sid, 2).as_usize(), 1); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn match_len(&self, cache: &Cache, id: LazyStateID) -> usize { + assert!(id.is_match()); + LazyRef::new(self, cache).get_cached_state(id).match_len() + } + + /// Returns the pattern ID corresponding to the given match index in the + /// given state. + /// + /// See [`DFA::match_len`] for an example of how to use this method + /// correctly. Note that if you know your lazy DFA is configured with a + /// single pattern, then this routine is never necessary since it will + /// always return a pattern ID of `0` for an index of `0` when `id` + /// corresponds to a match state. + /// + /// Typically, this routine is used when implementing an overlapping + /// search, as the example for `DFA::match_len` does. + /// + /// # Panics + /// + /// If the state ID is not a match state or if the match index is out + /// of bounds for the given state, then this routine may either panic + /// or produce an incorrect result. If the state ID is correct and the + /// match index is correct, then this routine always produces a valid + /// `PatternID`. + #[inline] + pub fn match_pattern( + &self, + cache: &Cache, + id: LazyStateID, + match_index: usize, + ) -> PatternID { + // This is an optimization for the very common case of a DFA with a + // single pattern. This conditional avoids a somewhat more costly path + // that finds the pattern ID from the corresponding `State`, which + // requires a bit of slicing/pointer-chasing. This optimization tends + // to only matter when matches are frequent. + if self.pattern_len() == 1 { + return PatternID::ZERO; + } + LazyRef::new(self, cache) + .get_cached_state(id) + .match_pattern(match_index) + } +} + +/// A cache represents a partially computed DFA. +/// +/// A cache is the key component that differentiates a classical DFA and a +/// hybrid NFA/DFA (also called a "lazy DFA"). Where a classical DFA builds a +/// complete transition table that can handle all possible inputs, a hybrid +/// NFA/DFA starts with an empty transition table and builds only the parts +/// required during search. The parts that are built are stored in a cache. For +/// this reason, a cache is a required parameter for nearly every operation on +/// a [`DFA`]. +/// +/// Caches can be created from their corresponding DFA via +/// [`DFA::create_cache`]. A cache can only be used with either the DFA that +/// created it, or the DFA that was most recently used to reset it with +/// [`Cache::reset`]. Using a cache with any other DFA may result in panics +/// or incorrect results. +#[derive(Clone, Debug)] +pub struct Cache { + // N.B. If you're looking to understand how determinization works, it + // is probably simpler to first grok src/dfa/determinize.rs, since that + // doesn't have the "laziness" component. + /// The transition table. + /// + /// Given a `current` LazyStateID and an `input` byte, the next state can + /// be computed via `trans[untagged(current) + equiv_class(input)]`. Notice + /// that no multiplication is used. That's because state identifiers are + /// "premultiplied." + /// + /// Note that the next state may be the "unknown" state. In this case, the + /// next state is not known and determinization for `current` on `input` + /// must be performed. + trans: Vec, + /// The starting states for this DFA. + /// + /// These are computed lazily. Initially, these are all set to "unknown" + /// lazy state IDs. + /// + /// When 'starts_for_each_pattern' is disabled (the default), then the size + /// of this is constrained to the possible starting configurations based + /// on the search parameters. (At time of writing, that's 4.) However, + /// when starting states for each pattern is enabled, then there are N + /// additional groups of starting states, where each group reflects the + /// different possible configurations and N is the number of patterns. + starts: Vec, + /// A sequence of NFA/DFA powerset states that have been computed for this + /// lazy DFA. This sequence is indexable by untagged LazyStateIDs. (Every + /// tagged LazyStateID can be used to index this sequence by converting it + /// to its untagged form.) + states: Vec, + /// A map from states to their corresponding IDs. This map may be accessed + /// via the raw byte representation of a state, which means that a `State` + /// does not need to be allocated to determine whether it already exists + /// in this map. Indeed, the existence of such a state is what determines + /// whether we allocate a new `State` or not. + /// + /// The higher level idea here is that we do just enough determinization + /// for a state to check whether we've already computed it. If we have, + /// then we can save a little (albeit not much) work. The real savings is + /// in memory usage. If we never checked for trivially duplicate states, + /// then our memory usage would explode to unreasonable levels. + states_to_id: StateMap, + /// Sparse sets used to track which NFA states have been visited during + /// various traversals. + sparses: SparseSets, + /// Scratch space for traversing the NFA graph. (We use space on the heap + /// instead of the call stack.) + stack: Vec, + /// Scratch space for building a NFA/DFA powerset state. This is used to + /// help amortize allocation since not every powerset state generated is + /// added to the cache. In particular, if it already exists in the cache, + /// then there is no need to allocate a new `State` for it. + scratch_state_builder: StateBuilderEmpty, + /// A simple abstraction for handling the saving of at most a single state + /// across a cache clearing. This is required for correctness. Namely, if + /// adding a new state after clearing the cache fails, then the caller + /// must retain the ability to continue using the state ID given. The + /// state corresponding to the state ID is what we preserve across cache + /// clearings. + state_saver: StateSaver, + /// The memory usage, in bytes, used by 'states' and 'states_to_id'. We + /// track this as new states are added since states use a variable amount + /// of heap. Tracking this as we add states makes it possible to compute + /// the total amount of memory used by the determinizer in constant time. + memory_usage_state: usize, + /// The number of times the cache has been cleared. When a minimum cache + /// clear count is set, then the cache will return an error instead of + /// clearing the cache if the count has been exceeded. + clear_count: usize, + /// The total number of bytes searched since the last time this cache was + /// cleared, not including the current search. + /// + /// This can be added to the length of the current search to get the true + /// total number of bytes searched. + /// + /// This is generally only non-zero when the + /// `Cache::search_{start,update,finish}` APIs are used to track search + /// progress. + bytes_searched: usize, + /// The progress of the current search. + /// + /// This is only non-`None` when callers utlize the `Cache::search_start`, + /// `Cache::search_update` and `Cache::search_finish` APIs. + /// + /// The purpose of recording search progress is to be able to make a + /// determination about the efficiency of the cache. Namely, by keeping + /// track of the + progress: Option, +} + +impl Cache { + /// Create a new cache for the given lazy DFA. + /// + /// The cache returned should only be used for searches for the given DFA. + /// If you want to reuse the cache for another DFA, then you must call + /// [`Cache::reset`] with that DFA. + pub fn new(dfa: &DFA) -> Cache { + let mut cache = Cache { + trans: alloc::vec![], + starts: alloc::vec![], + states: alloc::vec![], + states_to_id: StateMap::new(), + sparses: SparseSets::new(dfa.get_nfa().states().len()), + stack: alloc::vec![], + scratch_state_builder: StateBuilderEmpty::new(), + state_saver: StateSaver::none(), + memory_usage_state: 0, + clear_count: 0, + bytes_searched: 0, + progress: None, + }; + debug!("pre-init lazy DFA cache size: {}", cache.memory_usage()); + Lazy { dfa, cache: &mut cache }.init_cache(); + debug!("post-init lazy DFA cache size: {}", cache.memory_usage()); + cache + } + + /// Reset this cache such that it can be used for searching with the given + /// lazy DFA (and only that DFA). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different lazy DFA. + /// + /// Resetting a cache sets its "clear count" to 0. This is relevant if the + /// lazy DFA has been configured to "give up" after it has cleared the + /// cache a certain number of times. + /// + /// Any lazy state ID generated by the cache prior to resetting it is + /// invalid after the reset. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different DFA. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let dfa1 = DFA::new(r"\w")?; + /// let dfa2 = DFA::new(r"\W")?; + /// + /// let mut cache = dfa1.create_cache(); + /// assert_eq!( + /// Some(HalfMatch::must(0, 2)), + /// dfa1.try_search_fwd(&mut cache, &Input::new("Δ"))?, + /// ); + /// + /// // Using 'cache' with dfa2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the DFA we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 'dfa1' is also not + /// // allowed. + /// cache.reset(&dfa2); + /// assert_eq!( + /// Some(HalfMatch::must(0, 3)), + /// dfa2.try_search_fwd(&mut cache, &Input::new("☃"))?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset(&mut self, dfa: &DFA) { + Lazy::new(dfa, self).reset_cache() + } + + /// Initializes a new search starting at the given position. + /// + /// If a previous search was unfinished, then it is finished automatically + /// and a new search is begun. + /// + /// Note that keeping track of search progress is _not necessary_ + /// for correct implementations of search using a lazy DFA. Keeping + /// track of search progress is only necessary if you want the + /// [`Config::minimum_bytes_per_state`] configuration knob to work. + #[inline] + pub fn search_start(&mut self, at: usize) { + // If a previous search wasn't marked as finished, then finish it + // now automatically. + if let Some(p) = self.progress.take() { + self.bytes_searched += p.len(); + } + self.progress = Some(SearchProgress { start: at, at }); + } + + /// Updates the current search to indicate that it has search to the + /// current position. + /// + /// No special care needs to be taken for reverse searches. Namely, the + /// position given may be _less than_ the starting position of the search. + /// + /// # Panics + /// + /// This panics if no search has been started by [`Cache::search_start`]. + #[inline] + pub fn search_update(&mut self, at: usize) { + let p = + self.progress.as_mut().expect("no in-progress search to update"); + p.at = at; + } + + /// Indicates that a search has finished at the given position. + /// + /// # Panics + /// + /// This panics if no search has been started by [`Cache::search_start`]. + #[inline] + pub fn search_finish(&mut self, at: usize) { + let mut p = + self.progress.take().expect("no in-progress search to finish"); + p.at = at; + self.bytes_searched += p.len(); + } + + /// Returns the total number of bytes that have been searched since this + /// cache was last cleared. + /// + /// This is useful for determining the efficiency of the cache. For + /// example, the lazy DFA uses this value in conjunction with the + /// [`Config::minimum_bytes_per_state`] knob to help determine whether it + /// should quit searching. + /// + /// This always returns `0` if search progress isn't being tracked. Note + /// that the lazy DFA search routines in this crate always track search + /// progress. + pub fn search_total_len(&self) -> usize { + self.bytes_searched + self.progress.as_ref().map_or(0, |p| p.len()) + } + + /// Returns the total number of times this cache has been cleared since it + /// was either created or last reset. + /// + /// This is useful for informational purposes or if you want to change + /// search strategies based on the number of times the cache has been + /// cleared. + pub fn clear_count(&self) -> usize { + self.clear_count + } + + /// Returns the heap memory usage, in bytes, of this cache. + /// + /// This does **not** include the stack size used up by this cache. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + const ID_SIZE: usize = size_of::(); + const STATE_SIZE: usize = size_of::(); + + // NOTE: If you make changes to the below, then + // 'minimum_cache_capacity' should be updated correspondingly. + + self.trans.len() * ID_SIZE + + self.starts.len() * ID_SIZE + + self.states.len() * STATE_SIZE + // Maps likely use more memory than this, but it's probably close. + + self.states_to_id.len() * (STATE_SIZE + ID_SIZE) + + self.sparses.memory_usage() + + self.stack.capacity() * ID_SIZE + + self.scratch_state_builder.capacity() + // Heap memory used by 'State' in both 'states' and 'states_to_id'. + + self.memory_usage_state + } +} + +/// Keeps track of the progress of the current search. +/// +/// This is updated via the `Cache::search_{start,update,finish}` APIs to +/// record how many bytes have been searched. This permits computing a +/// heuristic that represents the efficiency of a cache, and thus helps inform +/// whether the lazy DFA should give up or not. +#[derive(Clone, Debug)] +struct SearchProgress { + start: usize, + at: usize, +} + +impl SearchProgress { + /// Returns the length, in bytes, of this search so far. + /// + /// This automatically handles the case of a reverse search, where `at` + /// is likely to be less than `start`. + fn len(&self) -> usize { + if self.start <= self.at { + self.at - self.start + } else { + self.start - self.at + } + } +} + +/// A map from states to state identifiers. When using std, we use a standard +/// hashmap, since it's a bit faster for this use case. (Other maps, like +/// one's based on FNV, have not yet been benchmarked.) +/// +/// The main purpose of this map is to reuse states where possible. This won't +/// fully minimize the DFA, but it works well in a lot of cases. +#[cfg(feature = "std")] +type StateMap = std::collections::HashMap; +#[cfg(not(feature = "std"))] +type StateMap = alloc::collections::BTreeMap; + +/// A type that groups methods that require the base NFA/DFA and writable +/// access to the cache. +#[derive(Debug)] +struct Lazy<'i, 'c> { + dfa: &'i DFA, + cache: &'c mut Cache, +} + +impl<'i, 'c> Lazy<'i, 'c> { + /// Creates a new 'Lazy' wrapper for a DFA and its corresponding cache. + fn new(dfa: &'i DFA, cache: &'c mut Cache) -> Lazy<'i, 'c> { + Lazy { dfa, cache } + } + + /// Return an immutable view by downgrading a writable cache to a read-only + /// cache. + fn as_ref<'a>(&'a self) -> LazyRef<'i, 'a> { + LazyRef::new(self.dfa, self.cache) + } + + /// This is marked as 'inline(never)' to avoid bloating methods on 'DFA' + /// like 'next_state' and 'next_eoi_state' that are called in critical + /// areas. The idea is to let the optimizer focus on the other areas of + /// those methods as the hot path. + /// + /// Here's an example that justifies 'inline(never)' + /// + /// ```ignore + /// regex-cli find match hybrid \ + /// --cache-capacity 100000000 \ + /// -p '\pL{100}' + /// all-codepoints-utf8-100x + /// ``` + /// + /// Where 'all-codepoints-utf8-100x' is the UTF-8 encoding of every + /// codepoint, in sequence, repeated 100 times. + /// + /// With 'inline(never)' hyperfine reports 1.1s per run. With + /// 'inline(always)', hyperfine reports 1.23s. So that's a 10% improvement. + #[cold] + #[inline(never)] + fn cache_next_state( + &mut self, + mut current: LazyStateID, + unit: alphabet::Unit, + ) -> Result { + let stride2 = self.dfa.stride2(); + let empty_builder = self.get_state_builder(); + let builder = determinize::next( + self.dfa.get_nfa(), + self.dfa.get_config().get_match_kind(), + &mut self.cache.sparses, + &mut self.cache.stack, + &self.cache.states[current.as_usize_untagged() >> stride2], + unit, + empty_builder, + ); + let save_state = !self.as_ref().state_builder_fits_in_cache(&builder); + if save_state { + self.save_state(current); + } + let next = self.add_builder_state(builder, |sid| sid)?; + if save_state { + current = self.saved_state_id(); + } + // This is the payoff. The next time 'next_state' is called with this + // state and alphabet unit, it will find this transition and avoid + // having to re-determinize this transition. + self.set_transition(current, unit, next); + Ok(next) + } + + /// Compute and cache the starting state for the given pattern ID (if + /// present) and the starting configuration. + /// + /// This panics if a pattern ID is given and the DFA isn't configured to + /// build anchored start states for each pattern. + /// + /// This will never return an unknown lazy state ID. + /// + /// If caching this state would otherwise result in a cache that has been + /// cleared too many times, then an error is returned. + #[cold] + #[inline(never)] + fn cache_start_group( + &mut self, + anchored: Anchored, + start: Start, + ) -> Result { + let nfa_start_id = match anchored { + Anchored::No => self.dfa.get_nfa().start_unanchored(), + Anchored::Yes => self.dfa.get_nfa().start_anchored(), + Anchored::Pattern(pid) => { + if !self.dfa.get_config().get_starts_for_each_pattern() { + return Err(StartError::unsupported_anchored(anchored)); + } + match self.dfa.get_nfa().start_pattern(pid) { + None => return Ok(self.as_ref().dead_id()), + Some(sid) => sid, + } + } + }; + + let id = self + .cache_start_one(nfa_start_id, start) + .map_err(StartError::cache)?; + self.set_start_state(anchored, start, id); + Ok(id) + } + + /// Compute and cache the starting state for the given NFA state ID and the + /// starting configuration. The NFA state ID might be one of the following: + /// + /// 1) An unanchored start state to match any pattern. + /// 2) An anchored start state to match any pattern. + /// 3) An anchored start state for a particular pattern. + /// + /// This will never return an unknown lazy state ID. + /// + /// If caching this state would otherwise result in a cache that has been + /// cleared too many times, then an error is returned. + fn cache_start_one( + &mut self, + nfa_start_id: NFAStateID, + start: Start, + ) -> Result { + let mut builder_matches = self.get_state_builder().into_matches(); + determinize::set_lookbehind_from_start( + self.dfa.get_nfa(), + &start, + &mut builder_matches, + ); + self.cache.sparses.set1.clear(); + determinize::epsilon_closure( + self.dfa.get_nfa(), + nfa_start_id, + builder_matches.look_have(), + &mut self.cache.stack, + &mut self.cache.sparses.set1, + ); + let mut builder = builder_matches.into_nfa(); + determinize::add_nfa_states( + &self.dfa.get_nfa(), + &self.cache.sparses.set1, + &mut builder, + ); + let tag_starts = self.dfa.get_config().get_specialize_start_states(); + self.add_builder_state(builder, |id| { + if tag_starts { + id.to_start() + } else { + id + } + }) + } + + /// Either add the given builder state to this cache, or return an ID to an + /// equivalent state already in this cache. + /// + /// In the case where no equivalent state exists, the idmap function given + /// may be used to transform the identifier allocated. This is useful if + /// the caller needs to tag the ID with additional information. + /// + /// This will never return an unknown lazy state ID. + /// + /// If caching this state would otherwise result in a cache that has been + /// cleared too many times, then an error is returned. + fn add_builder_state( + &mut self, + builder: StateBuilderNFA, + idmap: impl Fn(LazyStateID) -> LazyStateID, + ) -> Result { + if let Some(&cached_id) = + self.cache.states_to_id.get(builder.as_bytes()) + { + // Since we have a cached state, put the constructed state's + // memory back into our scratch space, so that it can be reused. + self.put_state_builder(builder); + return Ok(cached_id); + } + let result = self.add_state(builder.to_state(), idmap); + self.put_state_builder(builder); + result + } + + /// Allocate a new state ID and add the given state to this cache. + /// + /// The idmap function given may be used to transform the identifier + /// allocated. This is useful if the caller needs to tag the ID with + /// additional information. + /// + /// This will never return an unknown lazy state ID. + /// + /// If caching this state would otherwise result in a cache that has been + /// cleared too many times, then an error is returned. + fn add_state( + &mut self, + state: State, + idmap: impl Fn(LazyStateID) -> LazyStateID, + ) -> Result { + if !self.as_ref().state_fits_in_cache(&state) { + self.try_clear_cache()?; + } + // It's important for this to come second, since the above may clear + // the cache. If we clear the cache after ID generation, then the ID + // is likely bunk since it would have been generated based on a larger + // transition table. + let mut id = idmap(self.next_state_id()?); + if state.is_match() { + id = id.to_match(); + } + // Add room in the transition table. Since this is a fresh state, all + // of its transitions are unknown. + self.cache.trans.extend( + iter::repeat(self.as_ref().unknown_id()).take(self.dfa.stride()), + ); + // When we add a sentinel state, we never want to set any quit + // transitions. Technically, this is harmless, since sentinel states + // have all of their transitions set to loop back to themselves. But + // when creating sentinel states before the quit sentinel state, + // this will try to call 'set_transition' on a state ID that doesn't + // actually exist yet, which isn't allowed. So we just skip doing so + // entirely. + if !self.dfa.quitset.is_empty() && !self.as_ref().is_sentinel(id) { + let quit_id = self.as_ref().quit_id(); + for b in self.dfa.quitset.iter() { + self.set_transition(id, alphabet::Unit::u8(b), quit_id); + } + } + self.cache.memory_usage_state += state.memory_usage(); + self.cache.states.push(state.clone()); + self.cache.states_to_id.insert(state, id); + Ok(id) + } + + /// Allocate a new state ID. + /// + /// This will never return an unknown lazy state ID. + /// + /// If caching this state would otherwise result in a cache that has been + /// cleared too many times, then an error is returned. + fn next_state_id(&mut self) -> Result { + let sid = match LazyStateID::new(self.cache.trans.len()) { + Ok(sid) => sid, + Err(_) => { + self.try_clear_cache()?; + // This has to pass since we check that ID capacity at + // construction time can fit at least MIN_STATES states. + LazyStateID::new(self.cache.trans.len()).unwrap() + } + }; + Ok(sid) + } + + /// Attempt to clear the cache used by this lazy DFA. + /// + /// If clearing the cache exceeds the minimum number of required cache + /// clearings, then this will return a cache error. In this case, + /// callers should bubble this up as the cache can't be used until it is + /// reset. Implementations of search should convert this error into a + /// [`MatchError::gave_up`]. + /// + /// If 'self.state_saver' is set to save a state, then this state is + /// persisted through cache clearing. Otherwise, the cache is returned to + /// its state after initialization with two exceptions: its clear count + /// is incremented and some of its memory likely has additional capacity. + /// That is, clearing a cache does _not_ release memory. + /// + /// Otherwise, any lazy state ID generated by the cache prior to resetting + /// it is invalid after the reset. + fn try_clear_cache(&mut self) -> Result<(), CacheError> { + let c = self.dfa.get_config(); + if let Some(min_count) = c.get_minimum_cache_clear_count() { + if self.cache.clear_count >= min_count { + if let Some(min_bytes_per) = c.get_minimum_bytes_per_state() { + let len = self.cache.search_total_len(); + let min_bytes = + min_bytes_per.saturating_mul(self.cache.states.len()); + // If we've searched 0 bytes then probably something has + // gone wrong and the lazy DFA search implementation isn't + // correctly updating the search progress state. + if len == 0 { + trace!( + "number of bytes searched is 0, but \ + a minimum bytes per state searched ({}) is \ + enabled, maybe Cache::search_update \ + is not being used?", + min_bytes_per, + ); + } + if len < min_bytes { + trace!( + "lazy DFA cache has been cleared {} times, \ + which exceeds the limit of {}, \ + AND its bytes searched per state is less \ + than the configured minimum of {}, \ + therefore lazy DFA is giving up \ + (bytes searched since cache clear = {}, \ + number of states = {})", + self.cache.clear_count, + min_count, + min_bytes_per, + len, + self.cache.states.len(), + ); + return Err(CacheError::bad_efficiency()); + } else { + trace!( + "lazy DFA cache has been cleared {} times, \ + which exceeds the limit of {}, \ + AND its bytes searched per state is greater \ + than the configured minimum of {}, \ + therefore lazy DFA is continuing! \ + (bytes searched since cache clear = {}, \ + number of states = {})", + self.cache.clear_count, + min_count, + min_bytes_per, + len, + self.cache.states.len(), + ); + } + } else { + trace!( + "lazy DFA cache has been cleared {} times, \ + which exceeds the limit of {}, \ + since there is no configured bytes per state \ + minimum, lazy DFA is giving up", + self.cache.clear_count, + min_count, + ); + return Err(CacheError::too_many_cache_clears()); + } + } + } + self.clear_cache(); + Ok(()) + } + + /// Clears _and_ resets the cache. Resetting the cache means that no + /// states are persisted and the clear count is reset to 0. No heap memory + /// is released. + /// + /// Note that the caller may reset a cache with a different DFA than what + /// it was created from. In which case, the cache can now be used with the + /// new DFA (and not the old DFA). + fn reset_cache(&mut self) { + self.cache.state_saver = StateSaver::none(); + self.clear_cache(); + // If a new DFA is used, it might have a different number of NFA + // states, so we need to make sure our sparse sets have the appropriate + // size. + self.cache.sparses.resize(self.dfa.get_nfa().states().len()); + self.cache.clear_count = 0; + self.cache.progress = None; + } + + /// Clear the cache used by this lazy DFA. + /// + /// If 'self.state_saver' is set to save a state, then this state is + /// persisted through cache clearing. Otherwise, the cache is returned to + /// its state after initialization with two exceptions: its clear count + /// is incremented and some of its memory likely has additional capacity. + /// That is, clearing a cache does _not_ release memory. + /// + /// Otherwise, any lazy state ID generated by the cache prior to resetting + /// it is invalid after the reset. + fn clear_cache(&mut self) { + self.cache.trans.clear(); + self.cache.starts.clear(); + self.cache.states.clear(); + self.cache.states_to_id.clear(); + self.cache.memory_usage_state = 0; + self.cache.clear_count += 1; + self.cache.bytes_searched = 0; + if let Some(ref mut progress) = self.cache.progress { + progress.start = progress.at; + } + trace!( + "lazy DFA cache has been cleared (count: {})", + self.cache.clear_count + ); + self.init_cache(); + // If the state we want to save is one of the sentinel + // (unknown/dead/quit) states, then 'init_cache' adds those back, and + // their identifier values remains invariant. So there's no need to add + // it again. (And indeed, doing so would be incorrect!) + if let Some((old_id, state)) = self.cache.state_saver.take_to_save() { + // If the state is one of the special sentinel states, then it is + // automatically added by cache initialization and its ID always + // remains the same. With that said, this should never occur since + // the sentinel states are all loop states back to themselves. So + // we should never be in a position where we're attempting to save + // a sentinel state since we never compute transitions out of a + // sentinel state. + assert!( + !self.as_ref().is_sentinel(old_id), + "cannot save sentinel state" + ); + let new_id = self + .add_state(state, |id| { + if old_id.is_start() { + // We don't need to consult the + // 'specialize_start_states' config knob here, because + // if it's disabled, old_id.is_start() will never + // return true. + id.to_start() + } else { + id + } + }) + // The unwrap here is OK because lazy DFA creation ensures that + // we have room in the cache to add MIN_STATES states. Since + // 'init_cache' above adds 3, this adds a 4th. + .expect("adding one state after cache clear must work"); + self.cache.state_saver = StateSaver::Saved(new_id); + } + } + + /// Initialize this cache from emptiness to a place where it can be used + /// for search. + /// + /// This is called both at cache creation time and after the cache has been + /// cleared. + /// + /// Primarily, this adds the three sentinel states and allocates some + /// initial memory. + fn init_cache(&mut self) { + // Why multiply by 2 here? Because we make room for both the unanchored + // and anchored start states. Unanchored is first and then anchored. + let mut starts_len = Start::len().checked_mul(2).unwrap(); + // ... but if we also want start states for every pattern, we make room + // for that too. + if self.dfa.get_config().get_starts_for_each_pattern() { + starts_len += Start::len() * self.dfa.pattern_len(); + } + self.cache + .starts + .extend(iter::repeat(self.as_ref().unknown_id()).take(starts_len)); + // This is the set of NFA states that corresponds to each of our three + // sentinel states: the empty set. + let dead = State::dead(); + // This sets up some states that we use as sentinels that are present + // in every DFA. While it would be technically possible to implement + // this DFA without explicitly putting these states in the transition + // table, this is convenient to do to make `next_state` correct for all + // valid state IDs without needing explicit conditionals to special + // case these sentinel states. + // + // All three of these states are "dead" states. That is, all of + // them transition only to themselves. So once you enter one of + // these states, it's impossible to leave them. Thus, any correct + // search routine must explicitly check for these state types. (Sans + // `unknown`, since that is only used internally to represent missing + // states.) + let unk_id = + self.add_state(dead.clone(), |id| id.to_unknown()).unwrap(); + let dead_id = self.add_state(dead.clone(), |id| id.to_dead()).unwrap(); + let quit_id = self.add_state(dead.clone(), |id| id.to_quit()).unwrap(); + assert_eq!(unk_id, self.as_ref().unknown_id()); + assert_eq!(dead_id, self.as_ref().dead_id()); + assert_eq!(quit_id, self.as_ref().quit_id()); + // The idea here is that if you start in an unknown/dead/quit state and + // try to transition on them, then you should end up where you started. + self.set_all_transitions(unk_id, unk_id); + self.set_all_transitions(dead_id, dead_id); + self.set_all_transitions(quit_id, quit_id); + // All of these states are technically equivalent from the FSM + // perspective, so putting all three of them in the cache isn't + // possible. (They are distinct merely because we use their + // identifiers as sentinels to mean something, as indicated by the + // names.) Moreover, we wouldn't want to do that. Unknown and quit + // states are special in that they are artificial constructions + // this implementation. But dead states are a natural part of + // determinization. When you reach a point in the NFA where you cannot + // go anywhere else, a dead state will naturally arise and we MUST + // reuse the canonical dead state that we've created here. Why? Because + // it is the state ID that tells the search routine whether a state is + // dead or not, and thus, whether to stop the search. Having a bunch of + // distinct dead states would be quite wasteful! + self.cache.states_to_id.insert(dead, dead_id); + } + + /// Save the state corresponding to the ID given such that the state + /// persists through a cache clearing. + /// + /// While the state may persist, the ID may not. In order to discover the + /// new state ID, one must call 'saved_state_id' after a cache clearing. + fn save_state(&mut self, id: LazyStateID) { + let state = self.as_ref().get_cached_state(id).clone(); + self.cache.state_saver = StateSaver::ToSave { id, state }; + } + + /// Returns the updated lazy state ID for a state that was persisted + /// through a cache clearing. + /// + /// It is only correct to call this routine when both a state has been + /// saved and the cache has just been cleared. Otherwise, this panics. + fn saved_state_id(&mut self) -> LazyStateID { + self.cache + .state_saver + .take_saved() + .expect("state saver does not have saved state ID") + } + + /// Set all transitions on the state 'from' to 'to'. + fn set_all_transitions(&mut self, from: LazyStateID, to: LazyStateID) { + for unit in self.dfa.classes.representatives(..) { + self.set_transition(from, unit, to); + } + } + + /// Set the transition on 'from' for 'unit' to 'to'. + /// + /// This panics if either 'from' or 'to' is invalid. + /// + /// All unit values are OK. + fn set_transition( + &mut self, + from: LazyStateID, + unit: alphabet::Unit, + to: LazyStateID, + ) { + assert!(self.as_ref().is_valid(from), "invalid 'from' id: {:?}", from); + assert!(self.as_ref().is_valid(to), "invalid 'to' id: {:?}", to); + let offset = + from.as_usize_untagged() + self.dfa.classes.get_by_unit(unit); + self.cache.trans[offset] = to; + } + + /// Set the start ID for the given pattern ID (if given) and starting + /// configuration to the ID given. + /// + /// This panics if 'id' is not valid or if a pattern ID is given and + /// 'starts_for_each_pattern' is not enabled. + fn set_start_state( + &mut self, + anchored: Anchored, + start: Start, + id: LazyStateID, + ) { + assert!(self.as_ref().is_valid(id)); + let start_index = start.as_usize(); + let index = match anchored { + Anchored::No => start_index, + Anchored::Yes => Start::len() + start_index, + Anchored::Pattern(pid) => { + assert!( + self.dfa.get_config().get_starts_for_each_pattern(), + "attempted to search for a specific pattern \ + without enabling starts_for_each_pattern", + ); + let pid = pid.as_usize(); + (2 * Start::len()) + (Start::len() * pid) + start_index + } + }; + self.cache.starts[index] = id; + } + + /// Returns a state builder from this DFA that might have existing + /// capacity. This helps avoid allocs in cases where a state is built that + /// turns out to already be cached. + /// + /// Callers must put the state builder back with 'put_state_builder', + /// otherwise the allocation reuse won't work. + fn get_state_builder(&mut self) -> StateBuilderEmpty { + core::mem::replace( + &mut self.cache.scratch_state_builder, + StateBuilderEmpty::new(), + ) + } + + /// Puts the given state builder back into this DFA for reuse. + /// + /// Note that building a 'State' from a builder always creates a new alloc, + /// so callers should always put the builder back. + fn put_state_builder(&mut self, builder: StateBuilderNFA) { + let _ = core::mem::replace( + &mut self.cache.scratch_state_builder, + builder.clear(), + ); + } +} + +/// A type that groups methods that require the base NFA/DFA and read-only +/// access to the cache. +#[derive(Debug)] +struct LazyRef<'i, 'c> { + dfa: &'i DFA, + cache: &'c Cache, +} + +impl<'i, 'c> LazyRef<'i, 'c> { + /// Creates a new 'Lazy' wrapper for a DFA and its corresponding cache. + fn new(dfa: &'i DFA, cache: &'c Cache) -> LazyRef<'i, 'c> { + LazyRef { dfa, cache } + } + + /// Return the ID of the start state for the given configuration. + /// + /// If the start state has not yet been computed, then this returns an + /// unknown lazy state ID. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn get_cached_start_id( + &self, + anchored: Anchored, + start: Start, + ) -> Result { + let start_index = start.as_usize(); + let index = match anchored { + Anchored::No => start_index, + Anchored::Yes => Start::len() + start_index, + Anchored::Pattern(pid) => { + if !self.dfa.get_config().get_starts_for_each_pattern() { + return Err(StartError::unsupported_anchored(anchored)); + } + if pid.as_usize() >= self.dfa.pattern_len() { + return Ok(self.dead_id()); + } + (2 * Start::len()) + + (Start::len() * pid.as_usize()) + + start_index + } + }; + Ok(self.cache.starts[index]) + } + + /// Return the cached NFA/DFA powerset state for the given ID. + /// + /// This panics if the given ID does not address a valid state. + fn get_cached_state(&self, sid: LazyStateID) -> &State { + let index = sid.as_usize_untagged() >> self.dfa.stride2(); + &self.cache.states[index] + } + + /// Returns true if and only if the given ID corresponds to a "sentinel" + /// state. + /// + /// A sentinel state is a state that signifies a special condition of + /// search, and where every transition maps back to itself. See LazyStateID + /// for more details. Note that start and match states are _not_ sentinels + /// since they may otherwise be real states with non-trivial transitions. + /// The purposes of sentinel states is purely to indicate something. Their + /// transitions are not meant to be followed. + fn is_sentinel(&self, id: LazyStateID) -> bool { + id == self.unknown_id() || id == self.dead_id() || id == self.quit_id() + } + + /// Returns the ID of the unknown state for this lazy DFA. + fn unknown_id(&self) -> LazyStateID { + // This unwrap is OK since 0 is always a valid state ID. + LazyStateID::new(0).unwrap().to_unknown() + } + + /// Returns the ID of the dead state for this lazy DFA. + fn dead_id(&self) -> LazyStateID { + // This unwrap is OK since the maximum value here is 1 * 512 = 512, + // which is <= 2047 (the maximum state ID on 16-bit systems). Where + // 512 is the worst case for our equivalence classes (every byte is a + // distinct class). + LazyStateID::new(1 << self.dfa.stride2()).unwrap().to_dead() + } + + /// Returns the ID of the quit state for this lazy DFA. + fn quit_id(&self) -> LazyStateID { + // This unwrap is OK since the maximum value here is 2 * 512 = 1024, + // which is <= 2047 (the maximum state ID on 16-bit systems). Where + // 512 is the worst case for our equivalence classes (every byte is a + // distinct class). + LazyStateID::new(2 << self.dfa.stride2()).unwrap().to_quit() + } + + /// Returns true if and only if the given ID is valid. + /// + /// An ID is valid if it is both a valid index into the transition table + /// and is a multiple of the DFA's stride. + fn is_valid(&self, id: LazyStateID) -> bool { + let id = id.as_usize_untagged(); + id < self.cache.trans.len() && id % self.dfa.stride() == 0 + } + + /// Returns true if adding the state given would fit in this cache. + fn state_fits_in_cache(&self, state: &State) -> bool { + let needed = self.cache.memory_usage() + + self.memory_usage_for_one_more_state(state.memory_usage()); + trace!( + "lazy DFA cache capacity check: {:?} ?<=? {:?}", + needed, + self.dfa.cache_capacity + ); + needed <= self.dfa.cache_capacity + } + + /// Returns true if adding the state to be built by the given builder would + /// fit in this cache. + fn state_builder_fits_in_cache(&self, state: &StateBuilderNFA) -> bool { + let needed = self.cache.memory_usage() + + self.memory_usage_for_one_more_state(state.as_bytes().len()); + needed <= self.dfa.cache_capacity + } + + /// Returns the additional memory usage, in bytes, required to add one more + /// state to this cache. The given size should be the heap size, in bytes, + /// that would be used by the new state being added. + fn memory_usage_for_one_more_state( + &self, + state_heap_size: usize, + ) -> usize { + const ID_SIZE: usize = size_of::(); + const STATE_SIZE: usize = size_of::(); + + self.dfa.stride() * ID_SIZE // additional space needed in trans table + + STATE_SIZE // space in cache.states + + (STATE_SIZE + ID_SIZE) // space in cache.states_to_id + + state_heap_size // heap memory used by state itself + } +} + +/// A simple type that encapsulates the saving of a state ID through a cache +/// clearing. +/// +/// A state ID can be marked for saving with ToSave, while a state ID can be +/// saved itself with Saved. +#[derive(Clone, Debug)] +enum StateSaver { + /// An empty state saver. In this case, no states (other than the special + /// sentinel states) are preserved after clearing the cache. + None, + /// An ID of a state (and the state itself) that should be preserved after + /// the lazy DFA's cache has been cleared. After clearing, the updated ID + /// is stored in 'Saved' since it may have changed. + ToSave { id: LazyStateID, state: State }, + /// An ID that of a state that has been persisted through a lazy DFA + /// cache clearing. The ID recorded here corresponds to an ID that was + /// once marked as ToSave. The IDs are likely not equivalent even though + /// the states they point to are. + Saved(LazyStateID), +} + +impl StateSaver { + /// Create an empty state saver. + fn none() -> StateSaver { + StateSaver::None + } + + /// Replace this state saver with an empty saver, and if this saver is a + /// request to save a state, return that request. + fn take_to_save(&mut self) -> Option<(LazyStateID, State)> { + match core::mem::replace(self, StateSaver::None) { + StateSaver::None | StateSaver::Saved(_) => None, + StateSaver::ToSave { id, state } => Some((id, state)), + } + } + + /// Replace this state saver with an empty saver, and if this saver is a + /// saved state (or a request to save a state), return that state's ID. + /// + /// The idea here is that a request to save a state isn't necessarily + /// honored because it might not be needed. e.g., Some higher level code + /// might request a state to be saved on the off chance that the cache gets + /// cleared when a new state is added at a lower level. But if that new + /// state is never added, then the cache is never cleared and the state and + /// its ID remain unchanged. + fn take_saved(&mut self) -> Option { + match core::mem::replace(self, StateSaver::None) { + StateSaver::None => None, + StateSaver::Saved(id) | StateSaver::ToSave { id, .. } => Some(id), + } + } +} + +/// The configuration used for building a lazy DFA. +/// +/// As a convenience, [`DFA::config`] is an alias for [`Config::new`]. The +/// advantage of the former is that it often lets you avoid importing the +/// `Config` type directly. +/// +/// A lazy DFA configuration is a simple data object that is typically used +/// with [`Builder::configure`]. +/// +/// The default configuration guarantees that a search will never return a +/// "gave up" or "quit" error, although it is possible for a search to fail +/// if [`Config::starts_for_each_pattern`] wasn't enabled (which it is not by +/// default) and an [`Anchored::Pattern`] mode is requested via [`Input`]. +#[derive(Clone, Debug, Default)] +pub struct Config { + // As with other configuration types in this crate, we put all our knobs + // in options so that we can distinguish between "default" and "not set." + // This makes it possible to easily combine multiple configurations + // without default values overwriting explicitly specified values. See the + // 'overwrite' method. + // + // For docs on the fields below, see the corresponding method setters. + match_kind: Option, + pre: Option>, + starts_for_each_pattern: Option, + byte_classes: Option, + unicode_word_boundary: Option, + quitset: Option, + specialize_start_states: Option, + cache_capacity: Option, + skip_cache_capacity_check: Option, + minimum_cache_clear_count: Option>, + minimum_bytes_per_state: Option>, +} + +impl Config { + /// Return a new default lazy DFA builder configuration. + pub fn new() -> Config { + Config::default() + } + + /// Set the desired match semantics. + /// + /// The default is [`MatchKind::LeftmostFirst`], which corresponds to the + /// match semantics of Perl-like regex engines. That is, when multiple + /// patterns would match at the same leftmost position, the pattern that + /// appears first in the concrete syntax is chosen. + /// + /// Currently, the only other kind of match semantics supported is + /// [`MatchKind::All`]. This corresponds to classical DFA construction + /// where all possible matches are added to the lazy DFA. + /// + /// Typically, `All` is used when one wants to execute an overlapping + /// search and `LeftmostFirst` otherwise. In particular, it rarely makes + /// sense to use `All` with the various "leftmost" find routines, since the + /// leftmost routines depend on the `LeftmostFirst` automata construction + /// strategy. Specifically, `LeftmostFirst` adds dead states to the + /// lazy DFA as a way to terminate the search and report a match. + /// `LeftmostFirst` also supports non-greedy matches using this strategy + /// where as `All` does not. + /// + /// # Example: overlapping search + /// + /// This example shows the typical use of `MatchKind::All`, which is to + /// report overlapping matches. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// hybrid::dfa::{DFA, OverlappingState}, + /// HalfMatch, Input, MatchKind, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .build_many(&[r"\w+$", r"\S+$"])?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "@foo"; + /// let mut state = OverlappingState::start(); + /// + /// let expected = Some(HalfMatch::must(1, 4)); + /// dfa.try_search_overlapping_fwd( + /// &mut cache, &Input::new(haystack), &mut state, + /// )?; + /// assert_eq!(expected, state.get_match()); + /// + /// // The first pattern also matches at the same position, so re-running + /// // the search will yield another match. Notice also that the first + /// // pattern is returned after the second. This is because the second + /// // pattern begins its match before the first, is therefore an earlier + /// // match and is thus reported first. + /// let expected = Some(HalfMatch::must(0, 4)); + /// dfa.try_search_overlapping_fwd( + /// &mut cache, &Input::new(haystack), &mut state, + /// )?; + /// assert_eq!(expected, state.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: reverse automaton to find start of match + /// + /// Another example for using `MatchKind::All` is for constructing a + /// reverse automaton to find the start of a match. `All` semantics are + /// used for this in order to find the longest possible match, which + /// corresponds to the leftmost starting position. + /// + /// Note that if you need the starting position then + /// [`hybrid::regex::Regex`](crate::hybrid::regex::Regex) will handle this + /// for you, so it's usually not necessary to do this yourself. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// nfa::thompson::NFA, + /// Anchored, HalfMatch, Input, MatchKind, + /// }; + /// + /// let input = Input::new("123foobar456"); + /// let pattern = r"[a-z]+r"; + /// + /// let dfa_fwd = DFA::new(pattern)?; + /// let dfa_rev = DFA::builder() + /// .thompson(NFA::config().reverse(true)) + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .build(pattern)?; + /// let mut cache_fwd = dfa_fwd.create_cache(); + /// let mut cache_rev = dfa_rev.create_cache(); + /// + /// let expected_fwd = HalfMatch::must(0, 9); + /// let expected_rev = HalfMatch::must(0, 3); + /// let got_fwd = dfa_fwd.try_search_fwd(&mut cache_fwd, &input)?.unwrap(); + /// // Here we don't specify the pattern to search for since there's only + /// // one pattern and we're doing a leftmost search. But if this were an + /// // overlapping search, you'd need to specify the pattern that matched + /// // in the forward direction. (Otherwise, you might wind up finding the + /// // starting position of a match of some other pattern.) That in turn + /// // requires building the reverse automaton with starts_for_each_pattern + /// // enabled. + /// let input = input + /// .clone() + /// .range(..got_fwd.offset()) + /// .anchored(Anchored::Yes); + /// let got_rev = dfa_rev.try_search_rev(&mut cache_rev, &input)?.unwrap(); + /// assert_eq!(expected_fwd, got_fwd); + /// assert_eq!(expected_rev, got_rev); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn match_kind(mut self, kind: MatchKind) -> Config { + self.match_kind = Some(kind); + self + } + + /// Set a prefilter to be used whenever a start state is entered. + /// + /// A [`Prefilter`] in this context is meant to accelerate searches by + /// looking for literal prefixes that every match for the corresponding + /// pattern (or patterns) must start with. Once a prefilter produces a + /// match, the underlying search routine continues on to try and confirm + /// the match. + /// + /// Be warned that setting a prefilter does not guarantee that the search + /// will be faster. While it's usually a good bet, if the prefilter + /// produces a lot of false positive candidates (i.e., positions matched + /// by the prefilter but not by the regex), then the overall result can + /// be slower than if you had just executed the regex engine without any + /// prefilters. + /// + /// Note that unless [`Config::specialize_start_states`] has been + /// explicitly set, then setting this will also enable (when `pre` is + /// `Some`) or disable (when `pre` is `None`) start state specialization. + /// This occurs because without start state specialization, a prefilter + /// is likely to be less effective. And without a prefilter, start state + /// specialization is usually pointless. + /// + /// By default no prefilter is set. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// util::prefilter::Prefilter, + /// Input, HalfMatch, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "bar"]); + /// let re = DFA::builder() + /// .configure(DFA::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("foo1 barfox bar"); + /// assert_eq!( + /// Some(HalfMatch::must(0, 11)), + /// re.try_search_fwd(&mut cache, &input)?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Be warned though that an incorrect prefilter can lead to incorrect + /// results! + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// util::prefilter::Prefilter, + /// Input, HalfMatch, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "car"]); + /// let re = DFA::builder() + /// .configure(DFA::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("foo1 barfox bar"); + /// assert_eq!( + /// // No match reported even though there clearly is one! + /// None, + /// re.try_search_fwd(&mut cache, &input)?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn prefilter(mut self, pre: Option) -> Config { + self.pre = Some(pre); + if self.specialize_start_states.is_none() { + self.specialize_start_states = + Some(self.get_prefilter().is_some()); + } + self + } + + /// Whether to compile a separate start state for each pattern in the + /// lazy DFA. + /// + /// When enabled, a separate **anchored** start state is added for each + /// pattern in the lazy DFA. When this start state is used, then the DFA + /// will only search for matches for the pattern specified, even if there + /// are other patterns in the DFA. + /// + /// The main downside of this option is that it can potentially increase + /// the size of the DFA and/or increase the time it takes to build the + /// DFA at search time. However, since this is configuration for a lazy + /// DFA, these states aren't actually built unless they're used. Enabling + /// this isn't necessarily free, however, as it may result in higher cache + /// usage. + /// + /// There are a few reasons one might want to enable this (it's disabled + /// by default): + /// + /// 1. When looking for the start of an overlapping match (using a reverse + /// DFA), doing it correctly requires starting the reverse search using the + /// starting state of the pattern that matched in the forward direction. + /// Indeed, when building a [`Regex`](crate::hybrid::regex::Regex), it + /// will automatically enable this option when building the reverse DFA + /// internally. + /// 2. When you want to use a DFA with multiple patterns to both search + /// for matches of any pattern or to search for anchored matches of one + /// particular pattern while using the same DFA. (Otherwise, you would need + /// to compile a new DFA for each pattern.) + /// + /// By default this is disabled. + /// + /// # Example + /// + /// This example shows how to use this option to permit the same lazy DFA + /// to run both general searches for any pattern and anchored searches for + /// a specific pattern. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// Anchored, HalfMatch, Input, PatternID, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().starts_for_each_pattern(true)) + /// .build_many(&[r"[a-z0-9]{6}", r"[a-z][a-z0-9]{5}"])?; + /// let mut cache = dfa.create_cache(); + /// let haystack = "bar foo123"; + /// + /// // Here's a normal unanchored search that looks for any pattern. + /// let expected = HalfMatch::must(0, 10); + /// let input = Input::new(haystack); + /// assert_eq!(Some(expected), dfa.try_search_fwd(&mut cache, &input)?); + /// // We can also do a normal anchored search for any pattern. Since it's + /// // an anchored search, we position the start of the search where we + /// // know the match will begin. + /// let expected = HalfMatch::must(0, 10); + /// let input = Input::new(haystack).range(4..); + /// assert_eq!(Some(expected), dfa.try_search_fwd(&mut cache, &input)?); + /// // Since we compiled anchored start states for each pattern, we can + /// // also look for matches of other patterns explicitly, even if a + /// // different pattern would have normally matched. + /// let expected = HalfMatch::must(1, 10); + /// let input = Input::new(haystack) + /// .range(4..) + /// .anchored(Anchored::Pattern(PatternID::must(1))); + /// assert_eq!(Some(expected), dfa.try_search_fwd(&mut cache, &input)?); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn starts_for_each_pattern(mut self, yes: bool) -> Config { + self.starts_for_each_pattern = Some(yes); + self + } + + /// Whether to attempt to shrink the size of the lazy DFA's alphabet or + /// not. + /// + /// This option is enabled by default and should never be disabled unless + /// one is debugging the lazy DFA. + /// + /// When enabled, the lazy DFA will use a map from all possible bytes + /// to their corresponding equivalence class. Each equivalence class + /// represents a set of bytes that does not discriminate between a match + /// and a non-match in the DFA. For example, the pattern `[ab]+` has at + /// least two equivalence classes: a set containing `a` and `b` and a set + /// containing every byte except for `a` and `b`. `a` and `b` are in the + /// same equivalence classes because they never discriminate between a + /// match and a non-match. + /// + /// The advantage of this map is that the size of the transition table + /// can be reduced drastically from `#states * 256 * sizeof(LazyStateID)` + /// to `#states * k * sizeof(LazyStateID)` where `k` is the number of + /// equivalence classes (rounded up to the nearest power of 2). As a + /// result, total space usage can decrease substantially. Moreover, since a + /// smaller alphabet is used, DFA compilation during search becomes faster + /// as well since it will potentially be able to reuse a single transition + /// for multiple bytes. + /// + /// **WARNING:** This is only useful for debugging lazy DFAs. Disabling + /// this does not yield any speed advantages. Namely, even when this is + /// disabled, a byte class map is still used while searching. The only + /// difference is that every byte will be forced into its own distinct + /// equivalence class. This is useful for debugging the actual generated + /// transitions because it lets one see the transitions defined on actual + /// bytes instead of the equivalence classes. + pub fn byte_classes(mut self, yes: bool) -> Config { + self.byte_classes = Some(yes); + self + } + + /// Heuristically enable Unicode word boundaries. + /// + /// When set, this will attempt to implement Unicode word boundaries as if + /// they were ASCII word boundaries. This only works when the search input + /// is ASCII only. If a non-ASCII byte is observed while searching, then a + /// [`MatchError::quit`] error is returned. + /// + /// A possible alternative to enabling this option is to simply use an + /// ASCII word boundary, e.g., via `(?-u:\b)`. The main reason to use this + /// option is if you absolutely need Unicode support. This option lets one + /// use a fast search implementation (a DFA) for some potentially very + /// common cases, while providing the option to fall back to some other + /// regex engine to handle the general case when an error is returned. + /// + /// If the pattern provided has no Unicode word boundary in it, then this + /// option has no effect. (That is, quitting on a non-ASCII byte only + /// occurs when this option is enabled _and_ a Unicode word boundary is + /// present in the pattern.) + /// + /// This is almost equivalent to setting all non-ASCII bytes to be quit + /// bytes. The only difference is that this will cause non-ASCII bytes to + /// be quit bytes _only_ when a Unicode word boundary is present in the + /// pattern. + /// + /// When enabling this option, callers _must_ be prepared to + /// handle a [`MatchError`] error during search. When using a + /// [`Regex`](crate::hybrid::regex::Regex), this corresponds to using the + /// `try_` suite of methods. Alternatively, if callers can guarantee that + /// their input is ASCII only, then a [`MatchError::quit`] error will never + /// be returned while searching. + /// + /// This is disabled by default. + /// + /// # Example + /// + /// This example shows how to heuristically enable Unicode word boundaries + /// in a pattern. It also shows what happens when a search comes across a + /// non-ASCII byte. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// HalfMatch, Input, MatchError, + /// }; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().unicode_word_boundary(true)) + /// .build(r"\b[0-9]+\b")?; + /// let mut cache = dfa.create_cache(); + /// + /// // The match occurs before the search ever observes the snowman + /// // character, so no error occurs. + /// let haystack = "foo 123 ☃"; + /// let expected = Some(HalfMatch::must(0, 7)); + /// let got = dfa.try_search_fwd(&mut cache, &Input::new(haystack))?; + /// assert_eq!(expected, got); + /// + /// // Notice that this search fails, even though the snowman character + /// // occurs after the ending match offset. This is because search + /// // routines read one byte past the end of the search to account for + /// // look-around, and indeed, this is required here to determine whether + /// // the trailing \b matches. + /// let haystack = "foo 123 ☃"; + /// let expected = MatchError::quit(0xE2, 8); + /// let got = dfa.try_search_fwd(&mut cache, &Input::new(haystack)); + /// assert_eq!(Err(expected), got); + /// + /// // Another example is executing a search where the span of the haystack + /// // we specify is all ASCII, but there is non-ASCII just before it. This + /// // correctly also reports an error. + /// let input = Input::new("β123").range(2..); + /// let expected = MatchError::quit(0xB2, 1); + /// let got = dfa.try_search_fwd(&mut cache, &input); + /// assert_eq!(Err(expected), got); + /// + /// // And similarly for the trailing word boundary. + /// let input = Input::new("123β").range(..3); + /// let expected = MatchError::quit(0xCE, 3); + /// let got = dfa.try_search_fwd(&mut cache, &input); + /// assert_eq!(Err(expected), got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn unicode_word_boundary(mut self, yes: bool) -> Config { + // We have a separate option for this instead of just setting the + // appropriate quit bytes here because we don't want to set quit bytes + // for every regex. We only want to set them when the regex contains a + // Unicode word boundary. + self.unicode_word_boundary = Some(yes); + self + } + + /// Add a "quit" byte to the lazy DFA. + /// + /// When a quit byte is seen during search time, then search will return a + /// [`MatchError::quit`] error indicating the offset at which the search + /// stopped. + /// + /// A quit byte will always overrule any other aspects of a regex. For + /// example, if the `x` byte is added as a quit byte and the regex `\w` is + /// used, then observing `x` will cause the search to quit immediately + /// despite the fact that `x` is in the `\w` class. + /// + /// This mechanism is primarily useful for heuristically enabling certain + /// features like Unicode word boundaries in a DFA. Namely, if the input + /// to search is ASCII, then a Unicode word boundary can be implemented + /// via an ASCII word boundary with no change in semantics. Thus, a DFA + /// can attempt to match a Unicode word boundary but give up as soon as it + /// observes a non-ASCII byte. Indeed, if callers set all non-ASCII bytes + /// to be quit bytes, then Unicode word boundaries will be permitted when + /// building lazy DFAs. Of course, callers should enable + /// [`Config::unicode_word_boundary`] if they want this behavior instead. + /// (The advantage being that non-ASCII quit bytes will only be added if a + /// Unicode word boundary is in the pattern.) + /// + /// When enabling this option, callers _must_ be prepared to + /// handle a [`MatchError`] error during search. When using a + /// [`Regex`](crate::hybrid::regex::Regex), this corresponds to using the + /// `try_` suite of methods. + /// + /// By default, there are no quit bytes set. + /// + /// # Panics + /// + /// This panics if heuristic Unicode word boundaries are enabled and any + /// non-ASCII byte is removed from the set of quit bytes. Namely, enabling + /// Unicode word boundaries requires setting every non-ASCII byte to a quit + /// byte. So if the caller attempts to undo any of that, then this will + /// panic. + /// + /// # Example + /// + /// This example shows how to cause a search to terminate if it sees a + /// `\n` byte. This could be useful if, for example, you wanted to prevent + /// a user supplied pattern from matching across a line boundary. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, MatchError, Input}; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().quit(b'\n', true)) + /// .build(r"foo\p{any}+bar")?; + /// let mut cache = dfa.create_cache(); + /// + /// let haystack = "foo\nbar"; + /// // Normally this would produce a match, since \p{any} contains '\n'. + /// // But since we instructed the automaton to enter a quit state if a + /// // '\n' is observed, this produces a match error instead. + /// let expected = MatchError::quit(b'\n', 3); + /// let got = dfa.try_search_fwd( + /// &mut cache, + /// &Input::new(haystack), + /// ).unwrap_err(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn quit(mut self, byte: u8, yes: bool) -> Config { + if self.get_unicode_word_boundary() && !byte.is_ascii() && !yes { + panic!( + "cannot set non-ASCII byte to be non-quit when \ + Unicode word boundaries are enabled" + ); + } + if self.quitset.is_none() { + self.quitset = Some(ByteSet::empty()); + } + if yes { + self.quitset.as_mut().unwrap().add(byte); + } else { + self.quitset.as_mut().unwrap().remove(byte); + } + self + } + + /// Enable specializing start states in the lazy DFA. + /// + /// When start states are specialized, an implementor of a search routine + /// using a lazy DFA can tell when the search has entered a starting state. + /// When start states aren't specialized, then it is impossible to know + /// whether the search has entered a start state. + /// + /// Ideally, this option wouldn't need to exist and we could always + /// specialize start states. The problem is that start states can be quite + /// active. This in turn means that an efficient search routine is likely + /// to ping-pong between a heavily optimized hot loop that handles most + /// states and to a less optimized specialized handling of start states. + /// This causes branches to get heavily mispredicted and overall can + /// materially decrease throughput. Therefore, specializing start states + /// should only be enabled when it is needed. + /// + /// Knowing whether a search is in a start state is typically useful when a + /// prefilter is active for the search. A prefilter is typically only run + /// when in a start state and a prefilter can greatly accelerate a search. + /// Therefore, the possible cost of specializing start states is worth it + /// in this case. Otherwise, if you have no prefilter, there is likely no + /// reason to specialize start states. + /// + /// This is disabled by default, but note that it is automatically + /// enabled (or disabled) if [`Config::prefilter`] is set. Namely, unless + /// `specialize_start_states` has already been set, [`Config::prefilter`] + /// will automatically enable or disable it based on whether a prefilter + /// is present or not, respectively. This is done because a prefilter's + /// effectiveness is rooted in being executed whenever the DFA is in a + /// start state, and that's only possible to do when they are specialized. + /// + /// Note that it is plausibly reasonable to _disable_ this option + /// explicitly while _enabling_ a prefilter. In that case, a prefilter + /// will still be run at the beginning of a search, but never again. This + /// in theory could strike a good balance if you're in a situation where a + /// prefilter is likely to produce many false positive candidates. + /// + /// # Example + /// + /// This example shows how to enable start state specialization and then + /// shows how to check whether a state is a start state or not. + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, MatchError, Input}; + /// + /// let dfa = DFA::builder() + /// .configure(DFA::config().specialize_start_states(true)) + /// .build(r"[a-z]+")?; + /// let mut cache = dfa.create_cache(); + /// + /// let haystack = "123 foobar 4567".as_bytes(); + /// let sid = dfa.start_state_forward(&mut cache, &Input::new(haystack))?; + /// // The ID returned by 'start_state_forward' will always be tagged as + /// // a start state when start state specialization is enabled. + /// assert!(sid.is_tagged()); + /// assert!(sid.is_start()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Compare the above with the default lazy DFA configuration where + /// start states are _not_ specialized. In this case, the start state + /// is not tagged and `sid.is_start()` returns false. + /// + /// ``` + /// use regex_automata::{hybrid::dfa::DFA, MatchError, Input}; + /// + /// let dfa = DFA::new(r"[a-z]+")?; + /// let mut cache = dfa.create_cache(); + /// + /// let haystack = "123 foobar 4567".as_bytes(); + /// let sid = dfa.start_state_forward(&mut cache, &Input::new(haystack))?; + /// // Start states are not tagged in the default configuration! + /// assert!(!sid.is_tagged()); + /// assert!(!sid.is_start()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn specialize_start_states(mut self, yes: bool) -> Config { + self.specialize_start_states = Some(yes); + self + } + + /// Sets the maximum amount of heap memory, in bytes, to allocate to the + /// cache for use during a lazy DFA search. If the lazy DFA would otherwise + /// use more heap memory, then, depending on other configuration knobs, + /// either stop the search and return an error or clear the cache and + /// continue the search. + /// + /// The default cache capacity is some "reasonable" number that will + /// accommodate most regular expressions. You may find that if you need + /// to build a large DFA then it may be necessary to increase the cache + /// capacity. + /// + /// Note that while building a lazy DFA will do a "minimum" check to ensure + /// the capacity is big enough, this is more or less about correctness. + /// If the cache is bigger than the minimum but still "too small," then the + /// lazy DFA could wind up spending a lot of time clearing the cache and + /// recomputing transitions, thus negating the performance benefits of a + /// lazy DFA. Thus, setting the cache capacity is mostly an experimental + /// endeavor. For most common patterns, however, the default should be + /// sufficient. + /// + /// For more details on how the lazy DFA's cache is used, see the + /// documentation for [`Cache`]. + /// + /// # Example + /// + /// This example shows what happens if the configured cache capacity is + /// too small. In such cases, one can override the cache capacity to make + /// it bigger. Alternatively, one might want to use less memory by setting + /// a smaller cache capacity. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let pattern = r"\p{L}{1000}"; + /// + /// // The default cache capacity is likely too small to deal with regexes + /// // that are very large. Large repetitions of large Unicode character + /// // classes are a common way to make very large regexes. + /// let _ = DFA::new(pattern).unwrap_err(); + /// // Bump up the capacity to something bigger. + /// let dfa = DFA::builder() + /// .configure(DFA::config().cache_capacity(100 * (1<<20))) // 100 MB + /// .build(pattern)?; + /// let mut cache = dfa.create_cache(); + /// + /// let haystack = "ͰͲͶͿΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙ".repeat(50); + /// let expected = Some(HalfMatch::must(0, 2000)); + /// let got = dfa.try_search_fwd(&mut cache, &Input::new(&haystack))?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn cache_capacity(mut self, bytes: usize) -> Config { + self.cache_capacity = Some(bytes); + self + } + + /// Configures construction of a lazy DFA to use the minimum cache capacity + /// if the configured capacity is otherwise too small for the provided NFA. + /// + /// This is useful if you never want lazy DFA construction to fail because + /// of a capacity that is too small. + /// + /// In general, this option is typically not a good idea. In particular, + /// while a minimum cache capacity does permit the lazy DFA to function + /// where it otherwise couldn't, it's plausible that it may not function + /// well if it's constantly running out of room. In that case, the speed + /// advantages of the lazy DFA may be negated. On the other hand, the + /// "minimum" cache capacity computed may not be completely accurate and + /// could actually be bigger than what is really necessary. Therefore, it + /// is plausible that using the minimum cache capacity could still result + /// in very good performance. + /// + /// This is disabled by default. + /// + /// # Example + /// + /// This example shows what happens if the configured cache capacity is + /// too small. In such cases, one could override the capacity explicitly. + /// An alternative, demonstrated here, let's us force construction to use + /// the minimum cache capacity if the configured capacity is otherwise + /// too small. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, HalfMatch, Input}; + /// + /// let pattern = r"\p{L}{1000}"; + /// + /// // The default cache capacity is likely too small to deal with regexes + /// // that are very large. Large repetitions of large Unicode character + /// // classes are a common way to make very large regexes. + /// let _ = DFA::new(pattern).unwrap_err(); + /// // Configure construction such it automatically selects the minimum + /// // cache capacity if it would otherwise be too small. + /// let dfa = DFA::builder() + /// .configure(DFA::config().skip_cache_capacity_check(true)) + /// .build(pattern)?; + /// let mut cache = dfa.create_cache(); + /// + /// let haystack = "ͰͲͶͿΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙ".repeat(50); + /// let expected = Some(HalfMatch::must(0, 2000)); + /// let got = dfa.try_search_fwd(&mut cache, &Input::new(&haystack))?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn skip_cache_capacity_check(mut self, yes: bool) -> Config { + self.skip_cache_capacity_check = Some(yes); + self + } + + /// Configure a lazy DFA search to quit after a certain number of cache + /// clearings. + /// + /// When a minimum is set, then a lazy DFA search will *possibly* "give + /// up" after the minimum number of cache clearings has occurred. This is + /// typically useful in scenarios where callers want to detect whether the + /// lazy DFA search is "efficient" or not. If the cache is cleared too many + /// times, this is a good indicator that it is not efficient, and thus, the + /// caller may wish to use some other regex engine. + /// + /// Note that the number of times a cache is cleared is a property of + /// the cache itself. Thus, if a cache is used in a subsequent search + /// with a similarly configured lazy DFA, then it could cause the + /// search to "give up" if the cache needed to be cleared, depending + /// on its internal count and configured minimum. The cache clear + /// count can only be reset to `0` via [`DFA::reset_cache`] (or + /// [`Regex::reset_cache`](crate::hybrid::regex::Regex::reset_cache) if + /// you're using the `Regex` API). + /// + /// By default, no minimum is configured. Thus, a lazy DFA search will + /// never give up due to cache clearings. If you do set this option, you + /// might consider also setting [`Config::minimum_bytes_per_state`] in + /// order for the lazy DFA to take efficiency into account before giving + /// up. + /// + /// # Example + /// + /// This example uses a somewhat pathological configuration to demonstrate + /// the _possible_ behavior of cache clearing and how it might result + /// in a search that returns an error. + /// + /// It is important to note that the precise mechanics of how and when + /// a cache gets cleared is an implementation detail. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::dfa::DFA, Input, MatchError, MatchErrorKind}; + /// + /// // This is a carefully chosen regex. The idea is to pick one + /// // that requires some decent number of states (hence the bounded + /// // repetition). But we specifically choose to create a class with an + /// // ASCII letter and a non-ASCII letter so that we can check that no new + /// // states are created once the cache is full. Namely, if we fill up the + /// // cache on a haystack of 'a's, then in order to match one 'β', a new + /// // state will need to be created since a 'β' is encoded with multiple + /// // bytes. Since there's no room for this state, the search should quit + /// // at the very first position. + /// let pattern = r"[aβ]{100}"; + /// let dfa = DFA::builder() + /// .configure( + /// // Configure it so that we have the minimum cache capacity + /// // possible. And that if any clearings occur, the search quits. + /// DFA::config() + /// .skip_cache_capacity_check(true) + /// .cache_capacity(0) + /// .minimum_cache_clear_count(Some(0)), + /// ) + /// .build(pattern)?; + /// let mut cache = dfa.create_cache(); + /// + /// // Our search will give up before reaching the end! + /// let haystack = "a".repeat(101).into_bytes(); + /// let result = dfa.try_search_fwd(&mut cache, &Input::new(&haystack)); + /// assert!(matches!( + /// *result.unwrap_err().kind(), + /// MatchErrorKind::GaveUp { .. }, + /// )); + /// + /// // Now that we know the cache is full, if we search a haystack that we + /// // know will require creating at least one new state, it should not + /// // be able to make much progress. + /// let haystack = "β".repeat(101).into_bytes(); + /// let result = dfa.try_search_fwd(&mut cache, &Input::new(&haystack)); + /// assert!(matches!( + /// *result.unwrap_err().kind(), + /// MatchErrorKind::GaveUp { .. }, + /// )); + /// + /// // If we reset the cache, then we should be able to create more states + /// // and make more progress with searching for betas. + /// cache.reset(&dfa); + /// let haystack = "β".repeat(101).into_bytes(); + /// let result = dfa.try_search_fwd(&mut cache, &Input::new(&haystack)); + /// assert!(matches!( + /// *result.unwrap_err().kind(), + /// MatchErrorKind::GaveUp { .. }, + /// )); + /// + /// // ... switching back to ASCII still makes progress since it just needs + /// // to set transitions on existing states! + /// let haystack = "a".repeat(101).into_bytes(); + /// let result = dfa.try_search_fwd(&mut cache, &Input::new(&haystack)); + /// assert!(matches!( + /// *result.unwrap_err().kind(), + /// MatchErrorKind::GaveUp { .. }, + /// )); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn minimum_cache_clear_count(mut self, min: Option) -> Config { + self.minimum_cache_clear_count = Some(min); + self + } + + /// Configure a lazy DFA search to quit only when its efficiency drops + /// below the given minimum. + /// + /// The efficiency of the cache is determined by the number of DFA states + /// compiled per byte of haystack searched. For example, if the efficiency + /// is 2, then it means the lazy DFA is creating a new DFA state after + /// searching approximately 2 bytes in a haystack. Generally speaking, 2 + /// is quite bad and it's likely that even a slower regex engine like the + /// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) would be faster. + /// + /// This has no effect if [`Config::minimum_cache_clear_count`] is not set. + /// Namely, this option only kicks in when the cache has been cleared more + /// than the minimum number. If no minimum is set, then the cache is simply + /// cleared whenever it fills up and it is impossible for the lazy DFA to + /// quit due to ineffective use of the cache. + /// + /// In general, if one is setting [`Config::minimum_cache_clear_count`], + /// then one should probably also set this knob as well. The reason is + /// that the absolute number of times the cache is cleared is generally + /// not a great predictor of efficiency. For example, if a new DFA state + /// is created for every 1,000 bytes searched, then it wouldn't be hard + /// for the cache to get cleared more than `N` times and then cause the + /// lazy DFA to quit. But a new DFA state every 1,000 bytes is likely quite + /// good from a performance perspective, and it's likely that the lazy + /// DFA should continue searching, even if it requires clearing the cache + /// occasionally. + /// + /// Finally, note that if you're implementing your own lazy DFA search + /// routine and also want this efficiency check to work correctly, then + /// you'll need to use the following routines to record search progress: + /// + /// * Call [`Cache::search_start`] at the beginning of every search. + /// * Call [`Cache::search_update`] whenever [`DFA::next_state`] is + /// called. + /// * Call [`Cache::search_finish`] before completing a search. (It is + /// not strictly necessary to call this when an error is returned, as + /// `Cache::search_start` will automatically finish the previous search + /// for you. But calling it where possible before returning helps improve + /// the accuracy of how many bytes have actually been searched.) + pub fn minimum_bytes_per_state(mut self, min: Option) -> Config { + self.minimum_bytes_per_state = Some(min); + self + } + + /// Returns the match semantics set in this configuration. + pub fn get_match_kind(&self) -> MatchKind { + self.match_kind.unwrap_or(MatchKind::LeftmostFirst) + } + + /// Returns the prefilter set in this configuration, if one at all. + pub fn get_prefilter(&self) -> Option<&Prefilter> { + self.pre.as_ref().unwrap_or(&None).as_ref() + } + + /// Returns whether this configuration has enabled anchored starting states + /// for every pattern in the DFA. + pub fn get_starts_for_each_pattern(&self) -> bool { + self.starts_for_each_pattern.unwrap_or(false) + } + + /// Returns whether this configuration has enabled byte classes or not. + /// This is typically a debugging oriented option, as disabling it confers + /// no speed benefit. + pub fn get_byte_classes(&self) -> bool { + self.byte_classes.unwrap_or(true) + } + + /// Returns whether this configuration has enabled heuristic Unicode word + /// boundary support. When enabled, it is possible for a search to return + /// an error. + pub fn get_unicode_word_boundary(&self) -> bool { + self.unicode_word_boundary.unwrap_or(false) + } + + /// Returns whether this configuration will instruct the lazy DFA to enter + /// a quit state whenever the given byte is seen during a search. When at + /// least one byte has this enabled, it is possible for a search to return + /// an error. + pub fn get_quit(&self, byte: u8) -> bool { + self.quitset.map_or(false, |q| q.contains(byte)) + } + + /// Returns whether this configuration will instruct the lazy DFA to + /// "specialize" start states. When enabled, the lazy DFA will tag start + /// states so that search routines using the lazy DFA can detect when + /// it's in a start state and do some kind of optimization (like run a + /// prefilter). + pub fn get_specialize_start_states(&self) -> bool { + self.specialize_start_states.unwrap_or(false) + } + + /// Returns the cache capacity set on this configuration. + pub fn get_cache_capacity(&self) -> usize { + self.cache_capacity.unwrap_or(2 * (1 << 20)) + } + + /// Returns whether the cache capacity check should be skipped. + pub fn get_skip_cache_capacity_check(&self) -> bool { + self.skip_cache_capacity_check.unwrap_or(false) + } + + /// Returns, if set, the minimum number of times the cache must be cleared + /// before a lazy DFA search can give up. When no minimum is set, then a + /// search will never quit and will always clear the cache whenever it + /// fills up. + pub fn get_minimum_cache_clear_count(&self) -> Option { + self.minimum_cache_clear_count.unwrap_or(None) + } + + /// Returns, if set, the minimum number of bytes per state that need to be + /// processed in order for the lazy DFA to keep going. If the minimum falls + /// below this number (and the cache has been cleared a minimum number of + /// times), then the lazy DFA will return a "gave up" error. + pub fn get_minimum_bytes_per_state(&self) -> Option { + self.minimum_bytes_per_state.unwrap_or(None) + } + + /// Returns the minimum lazy DFA cache capacity required for the given NFA. + /// + /// The cache capacity required for a particular NFA may change without + /// notice. Callers should not rely on it being stable. + /// + /// This is useful for informational purposes, but can also be useful for + /// other reasons. For example, if one wants to check the minimum cache + /// capacity themselves or if one wants to set the capacity based on the + /// minimum. + /// + /// This may return an error if this configuration does not support all of + /// the instructions used in the given NFA. For example, if the NFA has a + /// Unicode word boundary but this configuration does not enable heuristic + /// support for Unicode word boundaries. + pub fn get_minimum_cache_capacity( + &self, + nfa: &thompson::NFA, + ) -> Result { + let quitset = self.quit_set_from_nfa(nfa)?; + let classes = self.byte_classes_from_nfa(nfa, &quitset); + let starts = self.get_starts_for_each_pattern(); + Ok(minimum_cache_capacity(nfa, &classes, starts)) + } + + /// Returns the byte class map used during search from the given NFA. + /// + /// If byte classes are disabled on this configuration, then a map is + /// returned that puts each byte in its own equivalent class. + fn byte_classes_from_nfa( + &self, + nfa: &thompson::NFA, + quit: &ByteSet, + ) -> ByteClasses { + if !self.get_byte_classes() { + // The lazy DFA will always use the equivalence class map, but + // enabling this option is useful for debugging. Namely, this will + // cause all transitions to be defined over their actual bytes + // instead of an opaque equivalence class identifier. The former is + // much easier to grok as a human. + ByteClasses::singletons() + } else { + let mut set = nfa.byte_class_set().clone(); + // It is important to distinguish any "quit" bytes from all other + // bytes. Otherwise, a non-quit byte may end up in the same class + // as a quit byte, and thus cause the DFA stop when it shouldn't. + // + // Test case: + // + // regex-cli find match hybrid --unicode-word-boundary \ + // -p '^#' -p '\b10\.55\.182\.100\b' -y @conn.json.1000x.log + if !quit.is_empty() { + set.add_set(&quit); + } + set.byte_classes() + } + } + + /// Return the quit set for this configuration and the given NFA. + /// + /// This may return an error if the NFA is incompatible with this + /// configuration's quit set. For example, if the NFA has a Unicode word + /// boundary and the quit set doesn't include non-ASCII bytes. + fn quit_set_from_nfa( + &self, + nfa: &thompson::NFA, + ) -> Result { + let mut quit = self.quitset.unwrap_or(ByteSet::empty()); + if nfa.look_set_any().contains_word_unicode() { + if self.get_unicode_word_boundary() { + for b in 0x80..=0xFF { + quit.add(b); + } + } else { + // If heuristic support for Unicode word boundaries wasn't + // enabled, then we can still check if our quit set is correct. + // If the caller set their quit bytes in a way that causes the + // DFA to quit on at least all non-ASCII bytes, then that's all + // we need for heuristic support to work. + if !quit.contains_range(0x80, 0xFF) { + return Err( + BuildError::unsupported_dfa_word_boundary_unicode(), + ); + } + } + } + Ok(quit) + } + + /// Overwrite the default configuration such that the options in `o` are + /// always used. If an option in `o` is not set, then the corresponding + /// option in `self` is used. If it's not set in `self` either, then it + /// remains not set. + fn overwrite(&self, o: Config) -> Config { + Config { + match_kind: o.match_kind.or(self.match_kind), + pre: o.pre.or_else(|| self.pre.clone()), + starts_for_each_pattern: o + .starts_for_each_pattern + .or(self.starts_for_each_pattern), + byte_classes: o.byte_classes.or(self.byte_classes), + unicode_word_boundary: o + .unicode_word_boundary + .or(self.unicode_word_boundary), + quitset: o.quitset.or(self.quitset), + specialize_start_states: o + .specialize_start_states + .or(self.specialize_start_states), + cache_capacity: o.cache_capacity.or(self.cache_capacity), + skip_cache_capacity_check: o + .skip_cache_capacity_check + .or(self.skip_cache_capacity_check), + minimum_cache_clear_count: o + .minimum_cache_clear_count + .or(self.minimum_cache_clear_count), + minimum_bytes_per_state: o + .minimum_bytes_per_state + .or(self.minimum_bytes_per_state), + } + } +} + +/// A builder for constructing a lazy deterministic finite automaton from +/// regular expressions. +/// +/// As a convenience, [`DFA::builder`] is an alias for [`Builder::new`]. The +/// advantage of the former is that it often lets you avoid importing the +/// `Builder` type directly. +/// +/// This builder provides two main things: +/// +/// 1. It provides a few different `build` routines for actually constructing +/// a DFA from different kinds of inputs. The most convenient is +/// [`Builder::build`], which builds a DFA directly from a pattern string. The +/// most flexible is [`Builder::build_from_nfa`], which builds a DFA straight +/// from an NFA. +/// 2. The builder permits configuring a number of things. +/// [`Builder::configure`] is used with [`Config`] to configure aspects of +/// the DFA and the construction process itself. [`Builder::syntax`] and +/// [`Builder::thompson`] permit configuring the regex parser and Thompson NFA +/// construction, respectively. The syntax and thompson configurations only +/// apply when building from a pattern string. +/// +/// This builder always constructs a *single* lazy DFA. As such, this builder +/// can only be used to construct regexes that either detect the presence +/// of a match or find the end location of a match. A single DFA cannot +/// produce both the start and end of a match. For that information, use a +/// [`Regex`](crate::hybrid::regex::Regex), which can be similarly configured +/// using [`regex::Builder`](crate::hybrid::regex::Builder). The main reason +/// to use a DFA directly is if the end location of a match is enough for your +/// use case. Namely, a `Regex` will construct two lazy DFAs instead of one, +/// since a second reverse DFA is needed to find the start of a match. +/// +/// # Example +/// +/// This example shows how to build a lazy DFA that uses a tiny cache capacity +/// and completely disables Unicode. That is: +/// +/// * Things such as `\w`, `.` and `\b` are no longer Unicode-aware. `\w` +/// and `\b` are ASCII-only while `.` matches any byte except for `\n` +/// (instead of any UTF-8 encoding of a Unicode scalar value except for +/// `\n`). Things that are Unicode only, such as `\pL`, are not allowed. +/// * The pattern itself is permitted to match invalid UTF-8. For example, +/// things like `[^a]` that match any byte except for `a` are permitted. +/// +/// ``` +/// use regex_automata::{ +/// hybrid::dfa::DFA, +/// nfa::thompson, +/// util::syntax, +/// HalfMatch, Input, +/// }; +/// +/// let dfa = DFA::builder() +/// .configure(DFA::config().cache_capacity(5_000)) +/// .thompson(thompson::Config::new().utf8(false)) +/// .syntax(syntax::Config::new().unicode(false).utf8(false)) +/// .build(r"foo[^b]ar.*")?; +/// let mut cache = dfa.create_cache(); +/// +/// let haystack = b"\xFEfoo\xFFar\xE2\x98\xFF\n"; +/// let expected = Some(HalfMatch::must(0, 10)); +/// let got = dfa.try_search_fwd(&mut cache, &Input::new(haystack))?; +/// assert_eq!(expected, got); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Builder { + config: Config, + #[cfg(feature = "syntax")] + thompson: thompson::Compiler, +} + +impl Builder { + /// Create a new lazy DFA builder with the default configuration. + pub fn new() -> Builder { + Builder { + config: Config::default(), + #[cfg(feature = "syntax")] + thompson: thompson::Compiler::new(), + } + } + + /// Build a lazy DFA from the given pattern. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(feature = "syntax")] + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Build a lazy DFA from the given patterns. + /// + /// When matches are returned, the pattern ID corresponds to the index of + /// the pattern in the slice given. + #[cfg(feature = "syntax")] + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let nfa = self + .thompson + .clone() + // We can always forcefully disable captures because DFAs do not + // support them. + .configure( + thompson::Config::new() + .which_captures(thompson::WhichCaptures::None), + ) + .build_many(patterns) + .map_err(BuildError::nfa)?; + self.build_from_nfa(nfa) + } + + /// Build a DFA from the given NFA. + /// + /// Note that this requires owning a `thompson::NFA`. While this may force + /// you to clone the NFA, such a clone is not a deep clone. Namely, NFAs + /// are defined internally to support shared ownership such that cloning is + /// very cheap. + /// + /// # Example + /// + /// This example shows how to build a lazy DFA if you already have an NFA + /// in hand. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// nfa::thompson, + /// HalfMatch, Input, + /// }; + /// + /// let haystack = "foo123bar"; + /// + /// // This shows how to set non-default options for building an NFA. + /// let nfa = thompson::Compiler::new() + /// .configure(thompson::Config::new().shrink(true)) + /// .build(r"[0-9]+")?; + /// let dfa = DFA::builder().build_from_nfa(nfa)?; + /// let mut cache = dfa.create_cache(); + /// let expected = Some(HalfMatch::must(0, 6)); + /// let got = dfa.try_search_fwd(&mut cache, &Input::new(haystack))?; + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_from_nfa( + &self, + nfa: thompson::NFA, + ) -> Result { + let quitset = self.config.quit_set_from_nfa(&nfa)?; + let classes = self.config.byte_classes_from_nfa(&nfa, &quitset); + // Check that we can fit at least a few states into our cache, + // otherwise it's pretty senseless to use the lazy DFA. This does have + // a possible failure mode though. This assumes the maximum size of a + // state in powerset space (so, the total number of NFA states), which + // may never actually materialize, and could be quite a bit larger + // than the actual biggest state. If this turns out to be a problem, + // we could expose a knob that disables this check. But if so, we have + // to be careful not to panic in other areas of the code (the cache + // clearing and init code) that tend to assume some minimum useful + // cache capacity. + let min_cache = minimum_cache_capacity( + &nfa, + &classes, + self.config.get_starts_for_each_pattern(), + ); + let mut cache_capacity = self.config.get_cache_capacity(); + if cache_capacity < min_cache { + // When the caller has asked us to skip the cache capacity check, + // then we simply force the cache capacity to its minimum amount + // and mush on. + if self.config.get_skip_cache_capacity_check() { + debug!( + "given capacity ({}) is too small, \ + since skip_cache_capacity_check is enabled, \ + setting cache capacity to minimum ({})", + cache_capacity, min_cache, + ); + cache_capacity = min_cache; + } else { + return Err(BuildError::insufficient_cache_capacity( + min_cache, + cache_capacity, + )); + } + } + // We also need to check that we can fit at least some small number + // of states in our state ID space. This is unlikely to trigger in + // >=32-bit systems, but 16-bit systems have a pretty small state ID + // space since a number of bits are used up as sentinels. + if let Err(err) = minimum_lazy_state_id(&classes) { + return Err(BuildError::insufficient_state_id_capacity(err)); + } + let stride2 = classes.stride2(); + let start_map = StartByteMap::new(nfa.look_matcher()); + Ok(DFA { + config: self.config.clone(), + nfa, + stride2, + start_map, + classes, + quitset, + cache_capacity, + }) + } + + /// Apply the given lazy DFA configuration options to this builder. + pub fn configure(&mut self, config: Config) -> &mut Builder { + self.config = self.config.overwrite(config); + self + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + /// + /// These settings only apply when constructing a lazy DFA directly from a + /// pattern. + #[cfg(feature = "syntax")] + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + self.thompson.syntax(config); + self + } + + /// Set the Thompson NFA configuration for this builder using + /// [`nfa::thompson::Config`](crate::nfa::thompson::Config). + /// + /// This permits setting things like whether the DFA should match the regex + /// in reverse or if additional time should be spent shrinking the size of + /// the NFA. + /// + /// These settings only apply when constructing a DFA directly from a + /// pattern. + #[cfg(feature = "syntax")] + pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder { + self.thompson.configure(config); + self + } +} + +/// Represents the current state of an overlapping search. +/// +/// This is used for overlapping searches since they need to know something +/// about the previous search. For example, when multiple patterns match at the +/// same position, this state tracks the last reported pattern so that the next +/// search knows whether to report another matching pattern or continue with +/// the search at the next position. Additionally, it also tracks which state +/// the last search call terminated in. +/// +/// This type provides little introspection capabilities. The only thing a +/// caller can do is construct it and pass it around to permit search routines +/// to use it to track state, and also ask whether a match has been found. +/// +/// Callers should always provide a fresh state constructed via +/// [`OverlappingState::start`] when starting a new search. Reusing state from +/// a previous search may result in incorrect results. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct OverlappingState { + /// The match reported by the most recent overlapping search to use this + /// state. + /// + /// If a search does not find any matches, then it is expected to clear + /// this value. + pub(crate) mat: Option, + /// The state ID of the state at which the search was in when the call + /// terminated. When this is a match state, `last_match` must be set to a + /// non-None value. + /// + /// A `None` value indicates the start state of the corresponding + /// automaton. We cannot use the actual ID, since any one automaton may + /// have many start states, and which one is in use depends on several + /// search-time factors. + pub(crate) id: Option, + /// The position of the search. + /// + /// When `id` is None (i.e., we are starting a search), this is set to + /// the beginning of the search as given by the caller regardless of its + /// current value. Subsequent calls to an overlapping search pick up at + /// this offset. + pub(crate) at: usize, + /// The index into the matching patterns of the next match to report if the + /// current state is a match state. Note that this may be 1 greater than + /// the total number of matches to report for the current match state. (In + /// which case, no more matches should be reported at the current position + /// and the search should advance to the next position.) + pub(crate) next_match_index: Option, + /// This is set to true when a reverse overlapping search has entered its + /// EOI transitions. + /// + /// This isn't used in a forward search because it knows to stop once the + /// position exceeds the end of the search range. In a reverse search, + /// since we use unsigned offsets, we don't "know" once we've gone past + /// `0`. So the only way to detect it is with this extra flag. The reverse + /// overlapping search knows to terminate specifically after it has + /// reported all matches after following the EOI transition. + pub(crate) rev_eoi: bool, +} + +impl OverlappingState { + /// Create a new overlapping state that begins at the start state of any + /// automaton. + pub fn start() -> OverlappingState { + OverlappingState { + mat: None, + id: None, + at: 0, + next_match_index: None, + rev_eoi: false, + } + } + + /// Return the match result of the most recent search to execute with this + /// state. + /// + /// A searches will clear this result automatically, such that if no + /// match is found, this will correctly report `None`. + pub fn get_match(&self) -> Option { + self.mat + } +} + +/// Runs the given overlapping `search` function (forwards or backwards) until +/// a match is found whose offset does not split a codepoint. +/// +/// This is *not* always correct to call. It should only be called when the +/// underlying NFA has UTF-8 mode enabled *and* it can produce zero-width +/// matches. Calling this when both of those things aren't true might result +/// in legitimate matches getting skipped. +#[cold] +#[inline(never)] +fn skip_empty_utf8_splits_overlapping( + input: &Input<'_>, + state: &mut OverlappingState, + mut search: F, +) -> Result<(), MatchError> +where + F: FnMut(&Input<'_>, &mut OverlappingState) -> Result<(), MatchError>, +{ + // Note that this routine works for forwards and reverse searches + // even though there's no code here to handle those cases. That's + // because overlapping searches drive themselves to completion via + // `OverlappingState`. So all we have to do is push it until no matches are + // found. + + let mut hm = match state.get_match() { + None => return Ok(()), + Some(hm) => hm, + }; + if input.get_anchored().is_anchored() { + if !input.is_char_boundary(hm.offset()) { + state.mat = None; + } + return Ok(()); + } + while !input.is_char_boundary(hm.offset()) { + search(input, state)?; + hm = match state.get_match() { + None => return Ok(()), + Some(hm) => hm, + }; + } + Ok(()) +} + +/// Based on the minimum number of states required for a useful lazy DFA cache, +/// this returns the minimum lazy state ID that must be representable. +/// +/// It's not likely for this to have any impact 32-bit systems (or higher), but +/// on 16-bit systems, the lazy state ID space is quite constrained and thus +/// may be insufficient if our MIN_STATES value is (for some reason) too high. +fn minimum_lazy_state_id( + classes: &ByteClasses, +) -> Result { + let stride = 1 << classes.stride2(); + let min_state_index = MIN_STATES.checked_sub(1).unwrap(); + LazyStateID::new(min_state_index * stride) +} + +/// Based on the minimum number of states required for a useful lazy DFA cache, +/// this returns a heuristic minimum number of bytes of heap space required. +/// +/// This is a "heuristic" because the minimum it returns is likely bigger than +/// the true minimum. Namely, it assumes that each powerset NFA/DFA state uses +/// the maximum number of NFA states (all of them). This is likely bigger +/// than what is required in practice. Computing the true minimum effectively +/// requires determinization, which is probably too much work to do for a +/// simple check like this. +/// +/// One of the issues with this approach IMO is that it requires that this +/// be in sync with the calculation above for computing how much heap memory +/// the DFA cache uses. If we get it wrong, it's possible for example for the +/// minimum to be smaller than the computed heap memory, and thus, it may be +/// the case that we can't add the required minimum number of states. That in +/// turn will make lazy DFA panic because we assume that we can add at least a +/// minimum number of states. +/// +/// Another approach would be to always allow the minimum number of states to +/// be added to the lazy DFA cache, even if it exceeds the configured cache +/// limit. This does mean that the limit isn't really a limit in all cases, +/// which is unfortunate. But it does at least guarantee that the lazy DFA can +/// always make progress, even if it is slow. (This approach is very similar to +/// enabling the 'skip_cache_capacity_check' config knob, except it wouldn't +/// rely on cache size calculation. Instead, it would just always permit a +/// minimum number of states to be added.) +fn minimum_cache_capacity( + nfa: &thompson::NFA, + classes: &ByteClasses, + starts_for_each_pattern: bool, +) -> usize { + const ID_SIZE: usize = size_of::(); + const STATE_SIZE: usize = size_of::(); + + let stride = 1 << classes.stride2(); + let states_len = nfa.states().len(); + let sparses = 2 * states_len * NFAStateID::SIZE; + let trans = MIN_STATES * stride * ID_SIZE; + + let mut starts = Start::len() * ID_SIZE; + if starts_for_each_pattern { + starts += (Start::len() * nfa.pattern_len()) * ID_SIZE; + } + + // The min number of states HAS to be at least 4: we have 3 sentinel states + // and then we need space for one more when we save a state after clearing + // the cache. We also need space for one more, otherwise we get stuck in a + // loop where we try to add a 5th state, which gets rejected, which clears + // the cache, which adds back a saved state (4th total state) which then + // tries to add the 5th state again. + assert!(MIN_STATES >= 5, "minimum number of states has to be at least 5"); + // The minimum number of non-sentinel states. We consider this separately + // because sentinel states are much smaller in that they contain no NFA + // states. Given our aggressive calculation here, it's worth being more + // precise with the number of states we need. + let non_sentinel = MIN_STATES.checked_sub(SENTINEL_STATES).unwrap(); + + // Every `State` has 5 bytes for flags, 4 bytes (max) for the number of + // patterns, followed by 32-bit encodings of patterns and then delta + // varint encodings of NFA state IDs. We use the worst case (which isn't + // technically possible) of 5 bytes for each NFA state ID. + // + // HOWEVER, three of the states needed by a lazy DFA are just the sentinel + // unknown, dead and quit states. Those states have a known size and it is + // small. + let dead_state_size = State::dead().memory_usage(); + let max_state_size = 5 + 4 + (nfa.pattern_len() * 4) + (states_len * 5); + let states = (SENTINEL_STATES * (STATE_SIZE + dead_state_size)) + + (non_sentinel * (STATE_SIZE + max_state_size)); + // NOTE: We don't double count heap memory used by State for this map since + // we use reference counting to avoid doubling memory usage. (This tends to + // be where most memory is allocated in the cache.) + let states_to_sid = (MIN_STATES * STATE_SIZE) + (MIN_STATES * ID_SIZE); + let stack = states_len * NFAStateID::SIZE; + let scratch_state_builder = max_state_size; + + trans + + starts + + states + + states_to_sid + + sparses + + stack + + scratch_state_builder +} + +#[cfg(all(test, feature = "syntax"))] +mod tests { + use super::*; + + // Tests that we handle heuristic Unicode word boundary support in reverse + // DFAs in the specific case of contextual searches. + // + // I wrote this test when I discovered a bug in how heuristic word + // boundaries were handled. Namely, that the starting state selection + // didn't consider the DFA's quit byte set when looking at the byte + // immediately before the start of the search (or immediately after the + // end of the search in the case of a reverse search). As a result, it was + // possible for '\bfoo\b' to match 'β123' because the trailing \xB2 byte + // in the 'β' codepoint would be treated as a non-word character. But of + // course, this search should trigger the DFA to quit, since there is a + // non-ASCII byte in consideration. + // + // Thus, I fixed 'start_state_{forward,reverse}' to check the quit byte set + // if it wasn't empty. The forward case is tested in the doc test for the + // Config::unicode_word_boundary API. We test the reverse case here, which + // is sufficiently niche that it doesn't really belong in a doc test. + #[test] + fn heuristic_unicode_reverse() { + let dfa = DFA::builder() + .configure(DFA::config().unicode_word_boundary(true)) + .thompson(thompson::Config::new().reverse(true)) + .build(r"\b[0-9]+\b") + .unwrap(); + let mut cache = dfa.create_cache(); + + let input = Input::new("β123").range(2..); + let expected = MatchError::quit(0xB2, 1); + let got = dfa.try_search_rev(&mut cache, &input); + assert_eq!(Err(expected), got); + + let input = Input::new("123β").range(..3); + let expected = MatchError::quit(0xCE, 3); + let got = dfa.try_search_rev(&mut cache, &input); + assert_eq!(Err(expected), got); + } +} diff --git a/vendor/regex-automata/src/hybrid/error.rs b/vendor/regex-automata/src/hybrid/error.rs new file mode 100644 index 0000000..d134e7e --- /dev/null +++ b/vendor/regex-automata/src/hybrid/error.rs @@ -0,0 +1,242 @@ +use crate::{hybrid::id::LazyStateIDError, nfa, util::search::Anchored}; + +/// An error that occurs when initial construction of a lazy DFA fails. +/// +/// A build error can occur when insufficient cache capacity is configured or +/// if something about the NFA is unsupported. (For example, if one attempts +/// to build a lazy DFA without heuristic Unicode support but with an NFA that +/// contains a Unicode word boundary.) +/// +/// This error does not provide many introspection capabilities. There are +/// generally only two things you can do with it: +/// +/// * Obtain a human readable message via its `std::fmt::Display` impl. +/// * Access an underlying +/// [`nfa::thompson::BuildError`](crate::nfa::thompson::BuildError) +/// type from its `source` method via the `std::error::Error` trait. This error +/// only occurs when using convenience routines for building a lazy DFA +/// directly from a pattern string. +/// +/// When the `std` feature is enabled, this implements the `std::error::Error` +/// trait. +#[derive(Clone, Debug)] +pub struct BuildError { + kind: BuildErrorKind, +} + +#[derive(Clone, Debug)] +enum BuildErrorKind { + NFA(nfa::thompson::BuildError), + InsufficientCacheCapacity { minimum: usize, given: usize }, + InsufficientStateIDCapacity { err: LazyStateIDError }, + Unsupported(&'static str), +} + +impl BuildError { + pub(crate) fn nfa(err: nfa::thompson::BuildError) -> BuildError { + BuildError { kind: BuildErrorKind::NFA(err) } + } + + pub(crate) fn insufficient_cache_capacity( + minimum: usize, + given: usize, + ) -> BuildError { + BuildError { + kind: BuildErrorKind::InsufficientCacheCapacity { minimum, given }, + } + } + + pub(crate) fn insufficient_state_id_capacity( + err: LazyStateIDError, + ) -> BuildError { + BuildError { + kind: BuildErrorKind::InsufficientStateIDCapacity { err }, + } + } + + pub(crate) fn unsupported_dfa_word_boundary_unicode() -> BuildError { + let msg = "cannot build lazy DFAs for regexes with Unicode word \ + boundaries; switch to ASCII word boundaries, or \ + heuristically enable Unicode word boundaries or use a \ + different regex engine"; + BuildError { kind: BuildErrorKind::Unsupported(msg) } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for BuildError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self.kind { + BuildErrorKind::NFA(ref err) => Some(err), + _ => None, + } + } +} + +impl core::fmt::Display for BuildError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.kind { + BuildErrorKind::NFA(_) => write!(f, "error building NFA"), + BuildErrorKind::InsufficientCacheCapacity { minimum, given } => { + write!( + f, + "given cache capacity ({}) is smaller than \ + minimum required ({})", + given, minimum, + ) + } + BuildErrorKind::InsufficientStateIDCapacity { ref err } => { + err.fmt(f) + } + BuildErrorKind::Unsupported(ref msg) => { + write!(f, "unsupported regex feature for DFAs: {}", msg) + } + } + } +} + +/// An error that can occur when computing the start state for a search. +/// +/// Computing a start state can fail for a few reasons, either +/// based on incorrect configuration or even based on whether +/// the look-behind byte triggers a quit state. Typically +/// one does not need to handle this error if you're using +/// [`DFA::start_state_forward`](crate::hybrid::dfa::DFA::start_state_forward) +/// (or its reverse counterpart), as that routine automatically converts +/// `StartError` to a [`MatchError`](crate::MatchError) for you. +/// +/// This error may be returned by the +/// [`DFA::start_state`](crate::hybrid::dfa::DFA::start_state) routine. +/// +/// This error implements the `std::error::Error` trait when the `std` feature +/// is enabled. +/// +/// This error is marked as non-exhaustive. New variants may be added in a +/// semver compatible release. +#[non_exhaustive] +#[derive(Clone, Debug)] +pub enum StartError { + /// An error that occurs when cache inefficiency has dropped below the + /// configured heuristic thresholds. + Cache { + /// The underlying cache error that occurred. + err: CacheError, + }, + /// An error that occurs when a starting configuration's look-behind byte + /// is in this DFA's quit set. + Quit { + /// The quit byte that was found. + byte: u8, + }, + /// An error that occurs when the caller requests an anchored mode that + /// isn't supported by the DFA. + UnsupportedAnchored { + /// The anchored mode given that is unsupported. + mode: Anchored, + }, +} + +impl StartError { + pub(crate) fn cache(err: CacheError) -> StartError { + StartError::Cache { err } + } + + pub(crate) fn quit(byte: u8) -> StartError { + StartError::Quit { byte } + } + + pub(crate) fn unsupported_anchored(mode: Anchored) -> StartError { + StartError::UnsupportedAnchored { mode } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for StartError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match *self { + StartError::Cache { ref err } => Some(err), + _ => None, + } + } +} + +impl core::fmt::Display for StartError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match *self { + StartError::Cache { .. } => write!( + f, + "error computing start state because of cache inefficiency" + ), + StartError::Quit { byte } => write!( + f, + "error computing start state because the look-behind byte \ + {:?} triggered a quit state", + crate::util::escape::DebugByte(byte), + ), + StartError::UnsupportedAnchored { mode: Anchored::Yes } => { + write!( + f, + "error computing start state because \ + anchored searches are not supported or enabled" + ) + } + StartError::UnsupportedAnchored { mode: Anchored::No } => { + write!( + f, + "error computing start state because \ + unanchored searches are not supported or enabled" + ) + } + StartError::UnsupportedAnchored { + mode: Anchored::Pattern(pid), + } => { + write!( + f, + "error computing start state because \ + anchored searches for a specific pattern ({}) \ + are not supported or enabled", + pid.as_usize(), + ) + } + } + } +} + +/// An error that occurs when cache usage has become inefficient. +/// +/// One of the weaknesses of a lazy DFA is that it may need to clear its +/// cache repeatedly if it's not big enough. If this happens too much, then it +/// can slow searching down significantly. A mitigation to this is to use +/// heuristics to detect whether the cache is being used efficiently or not. +/// If not, then a lazy DFA can return a `CacheError`. +/// +/// The default configuration of a lazy DFA in this crate is +/// set such that a `CacheError` will never occur. Instead, +/// callers must opt into this behavior with settings like +/// [`dfa::Config::minimum_cache_clear_count`](crate::hybrid::dfa::Config::minimum_cache_clear_count) +/// and +/// [`dfa::Config::minimum_bytes_per_state`](crate::hybrid::dfa::Config::minimum_bytes_per_state). +/// +/// When the `std` feature is enabled, this implements the `std::error::Error` +/// trait. +#[derive(Clone, Debug)] +pub struct CacheError(()); + +impl CacheError { + pub(crate) fn too_many_cache_clears() -> CacheError { + CacheError(()) + } + + pub(crate) fn bad_efficiency() -> CacheError { + CacheError(()) + } +} + +#[cfg(feature = "std")] +impl std::error::Error for CacheError {} + +impl core::fmt::Display for CacheError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "lazy DFA cache has been cleared too many times") + } +} diff --git a/vendor/regex-automata/src/hybrid/id.rs b/vendor/regex-automata/src/hybrid/id.rs new file mode 100644 index 0000000..662e3c9 --- /dev/null +++ b/vendor/regex-automata/src/hybrid/id.rs @@ -0,0 +1,354 @@ +/// A state identifier specifically tailored for lazy DFAs. +/// +/// A lazy state ID logically represents a pointer to a DFA state. In practice, +/// by limiting the number of DFA states it can address, it reserves some +/// bits of its representation to encode some additional information. That +/// additional information is called a "tag." That tag is used to record +/// whether the state it points to is an unknown, dead, quit, start or match +/// state. +/// +/// When implementing a low level search routine with a lazy DFA, it is +/// necessary to query the type of the current state to know what to do: +/// +/// * **Unknown** - The state has not yet been computed. The +/// parameters used to get this state ID must be re-passed to +/// [`DFA::next_state`](crate::hybrid::dfa::DFA::next_state), which will never +/// return an unknown state ID. +/// * **Dead** - A dead state only has transitions to itself. It indicates that +/// the search cannot do anything else and should stop with whatever result it +/// has. +/// * **Quit** - A quit state indicates that the automaton could not answer +/// whether a match exists or not. Correct search implementations must return a +/// [`MatchError::quit`](crate::MatchError::quit) when a DFA enters a quit +/// state. +/// * **Start** - A start state is a state in which a search can begin. +/// Lazy DFAs usually have more than one start state. Branching on +/// this isn't required for correctness, but a common optimization is +/// to run a prefilter when a search enters a start state. Note that +/// start states are *not* tagged automatically, and one must enable the +/// [`Config::specialize_start_states`](crate::hybrid::dfa::Config::specialize_start_states) +/// setting for start states to be tagged. The reason for this is +/// that a DFA search loop is usually written to execute a prefilter once it +/// enters a start state. But if there is no prefilter, this handling can be +/// quite diastrous as the DFA may ping-pong between the special handling code +/// and a possible optimized hot path for handling untagged states. When start +/// states aren't specialized, then they are untagged and remain in the hot +/// path. +/// * **Match** - A match state indicates that a match has been found. +/// Depending on the semantics of your search implementation, it may either +/// continue until the end of the haystack or a dead state, or it might quit +/// and return the match immediately. +/// +/// As an optimization, the [`is_tagged`](LazyStateID::is_tagged) predicate +/// can be used to determine if a tag exists at all. This is useful to avoid +/// branching on all of the above types for every byte searched. +/// +/// # Example +/// +/// This example shows how `LazyStateID` can be used to implement a correct +/// search routine with minimal branching. In particular, this search routine +/// implements "leftmost" matching, which means that it doesn't immediately +/// stop once a match is found. Instead, it continues until it reaches a dead +/// state. +/// +/// Notice also how a correct search implementation deals with +/// [`CacheError`](crate::hybrid::CacheError)s returned by some of +/// the lazy DFA routines. When a `CacheError` occurs, it returns +/// [`MatchError::gave_up`](crate::MatchError::gave_up). +/// +/// ``` +/// use regex_automata::{ +/// hybrid::dfa::{Cache, DFA}, +/// HalfMatch, MatchError, Input, +/// }; +/// +/// fn find_leftmost_first( +/// dfa: &DFA, +/// cache: &mut Cache, +/// haystack: &[u8], +/// ) -> Result, MatchError> { +/// // The start state is determined by inspecting the position and the +/// // initial bytes of the haystack. Note that start states can never +/// // be match states (since DFAs in this crate delay matches by 1 +/// // byte), so we don't need to check if the start state is a match. +/// let mut sid = dfa.start_state_forward( +/// cache, +/// &Input::new(haystack), +/// )?; +/// let mut last_match = None; +/// // Walk all the bytes in the haystack. We can quit early if we see +/// // a dead or a quit state. The former means the automaton will +/// // never transition to any other state. The latter means that the +/// // automaton entered a condition in which its search failed. +/// for (i, &b) in haystack.iter().enumerate() { +/// sid = dfa +/// .next_state(cache, sid, b) +/// .map_err(|_| MatchError::gave_up(i))?; +/// if sid.is_tagged() { +/// if sid.is_match() { +/// last_match = Some(HalfMatch::new( +/// dfa.match_pattern(cache, sid, 0), +/// i, +/// )); +/// } else if sid.is_dead() { +/// return Ok(last_match); +/// } else if sid.is_quit() { +/// // It is possible to enter into a quit state after +/// // observing a match has occurred. In that case, we +/// // should return the match instead of an error. +/// if last_match.is_some() { +/// return Ok(last_match); +/// } +/// return Err(MatchError::quit(b, i)); +/// } +/// // Implementors may also want to check for start states and +/// // handle them differently for performance reasons. But it is +/// // not necessary for correctness. Note that in order to check +/// // for start states, you'll need to enable the +/// // 'specialize_start_states' config knob, otherwise start +/// // states will not be tagged. +/// } +/// } +/// // Matches are always delayed by 1 byte, so we must explicitly walk +/// // the special "EOI" transition at the end of the search. +/// sid = dfa +/// .next_eoi_state(cache, sid) +/// .map_err(|_| MatchError::gave_up(haystack.len()))?; +/// if sid.is_match() { +/// last_match = Some(HalfMatch::new( +/// dfa.match_pattern(cache, sid, 0), +/// haystack.len(), +/// )); +/// } +/// Ok(last_match) +/// } +/// +/// // We use a greedy '+' operator to show how the search doesn't just stop +/// // once a match is detected. It continues extending the match. Using +/// // '[a-z]+?' would also work as expected and stop the search early. +/// // Greediness is built into the automaton. +/// let dfa = DFA::new(r"[a-z]+")?; +/// let mut cache = dfa.create_cache(); +/// let haystack = "123 foobar 4567".as_bytes(); +/// let mat = find_leftmost_first(&dfa, &mut cache, haystack)?.unwrap(); +/// assert_eq!(mat.pattern().as_usize(), 0); +/// assert_eq!(mat.offset(), 10); +/// +/// // Here's another example that tests our handling of the special +/// // EOI transition. This will fail to find a match if we don't call +/// // 'next_eoi_state' at the end of the search since the match isn't found +/// // until the final byte in the haystack. +/// let dfa = DFA::new(r"[0-9]{4}")?; +/// let mut cache = dfa.create_cache(); +/// let haystack = "123 foobar 4567".as_bytes(); +/// let mat = find_leftmost_first(&dfa, &mut cache, haystack)?.unwrap(); +/// assert_eq!(mat.pattern().as_usize(), 0); +/// assert_eq!(mat.offset(), 15); +/// +/// // And note that our search implementation above automatically works +/// // with multi-DFAs. Namely, `dfa.match_pattern(match_state, 0)` selects +/// // the appropriate pattern ID for us. +/// let dfa = DFA::new_many(&[r"[a-z]+", r"[0-9]+"])?; +/// let mut cache = dfa.create_cache(); +/// let haystack = "123 foobar 4567".as_bytes(); +/// let mat = find_leftmost_first(&dfa, &mut cache, haystack)?.unwrap(); +/// assert_eq!(mat.pattern().as_usize(), 1); +/// assert_eq!(mat.offset(), 3); +/// let mat = find_leftmost_first(&dfa, &mut cache, &haystack[3..])?.unwrap(); +/// assert_eq!(mat.pattern().as_usize(), 0); +/// assert_eq!(mat.offset(), 7); +/// let mat = find_leftmost_first(&dfa, &mut cache, &haystack[10..])?.unwrap(); +/// assert_eq!(mat.pattern().as_usize(), 1); +/// assert_eq!(mat.offset(), 5); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive( + Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord, +)] +pub struct LazyStateID(u32); + +impl LazyStateID { + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] + const MAX_BIT: usize = 31; + + #[cfg(target_pointer_width = "16")] + const MAX_BIT: usize = 15; + + const MASK_UNKNOWN: usize = 1 << (LazyStateID::MAX_BIT); + const MASK_DEAD: usize = 1 << (LazyStateID::MAX_BIT - 1); + const MASK_QUIT: usize = 1 << (LazyStateID::MAX_BIT - 2); + const MASK_START: usize = 1 << (LazyStateID::MAX_BIT - 3); + const MASK_MATCH: usize = 1 << (LazyStateID::MAX_BIT - 4); + const MAX: usize = LazyStateID::MASK_MATCH - 1; + + /// Create a new lazy state ID. + /// + /// If the given identifier exceeds [`LazyStateID::MAX`], then this returns + /// an error. + #[inline] + pub(crate) fn new(id: usize) -> Result { + if id > LazyStateID::MAX { + let attempted = u64::try_from(id).unwrap(); + return Err(LazyStateIDError { attempted }); + } + Ok(LazyStateID::new_unchecked(id)) + } + + /// Create a new lazy state ID without checking whether the given value + /// exceeds [`LazyStateID::MAX`]. + /// + /// While this is unchecked, providing an incorrect value must never + /// sacrifice memory safety. + #[inline] + const fn new_unchecked(id: usize) -> LazyStateID { + // FIXME: Use as_u32() once const functions in traits are stable. + LazyStateID(id as u32) + } + + /// Return this lazy state ID as an untagged `usize`. + /// + /// If this lazy state ID is tagged, then the usize returned is the state + /// ID without the tag. If the ID was not tagged, then the usize returned + /// is equivalent to the state ID. + #[inline] + pub(crate) fn as_usize_untagged(&self) -> usize { + self.as_usize_unchecked() & LazyStateID::MAX + } + + /// Return this lazy state ID as its raw internal `usize` value, which may + /// be tagged (and thus greater than LazyStateID::MAX). + #[inline] + pub(crate) const fn as_usize_unchecked(&self) -> usize { + // FIXME: Use as_usize() once const functions in traits are stable. + self.0 as usize + } + + #[inline] + pub(crate) const fn to_unknown(&self) -> LazyStateID { + LazyStateID::new_unchecked( + self.as_usize_unchecked() | LazyStateID::MASK_UNKNOWN, + ) + } + + #[inline] + pub(crate) const fn to_dead(&self) -> LazyStateID { + LazyStateID::new_unchecked( + self.as_usize_unchecked() | LazyStateID::MASK_DEAD, + ) + } + + #[inline] + pub(crate) const fn to_quit(&self) -> LazyStateID { + LazyStateID::new_unchecked( + self.as_usize_unchecked() | LazyStateID::MASK_QUIT, + ) + } + + /// Return this lazy state ID as a state ID that is tagged as a start + /// state. + #[inline] + pub(crate) const fn to_start(&self) -> LazyStateID { + LazyStateID::new_unchecked( + self.as_usize_unchecked() | LazyStateID::MASK_START, + ) + } + + /// Return this lazy state ID as a lazy state ID that is tagged as a match + /// state. + #[inline] + pub(crate) const fn to_match(&self) -> LazyStateID { + LazyStateID::new_unchecked( + self.as_usize_unchecked() | LazyStateID::MASK_MATCH, + ) + } + + /// Return true if and only if this lazy state ID is tagged. + /// + /// When a lazy state ID is tagged, then one can conclude that it is one + /// of a match, start, dead, quit or unknown state. + #[inline] + pub const fn is_tagged(&self) -> bool { + self.as_usize_unchecked() > LazyStateID::MAX + } + + /// Return true if and only if this represents a lazy state ID that is + /// "unknown." That is, the state has not yet been created. When a caller + /// sees this state ID, it generally means that a state has to be computed + /// in order to proceed. + #[inline] + pub const fn is_unknown(&self) -> bool { + self.as_usize_unchecked() & LazyStateID::MASK_UNKNOWN > 0 + } + + /// Return true if and only if this represents a dead state. A dead state + /// is a state that can never transition to any other state except the + /// dead state. When a dead state is seen, it generally indicates that a + /// search should stop. + #[inline] + pub const fn is_dead(&self) -> bool { + self.as_usize_unchecked() & LazyStateID::MASK_DEAD > 0 + } + + /// Return true if and only if this represents a quit state. A quit state + /// is a state that is representationally equivalent to a dead state, + /// except it indicates the automaton has reached a point at which it can + /// no longer determine whether a match exists or not. In general, this + /// indicates an error during search and the caller must either pass this + /// error up or use a different search technique. + #[inline] + pub const fn is_quit(&self) -> bool { + self.as_usize_unchecked() & LazyStateID::MASK_QUIT > 0 + } + + /// Return true if and only if this lazy state ID has been tagged as a + /// start state. + /// + /// Note that if + /// [`Config::specialize_start_states`](crate::hybrid::dfa::Config) is + /// disabled (which is the default), then this will always return false + /// since start states won't be tagged. + #[inline] + pub const fn is_start(&self) -> bool { + self.as_usize_unchecked() & LazyStateID::MASK_START > 0 + } + + /// Return true if and only if this lazy state ID has been tagged as a + /// match state. + #[inline] + pub const fn is_match(&self) -> bool { + self.as_usize_unchecked() & LazyStateID::MASK_MATCH > 0 + } +} + +/// This error occurs when a lazy state ID could not be constructed. +/// +/// This occurs when given an integer exceeding the maximum lazy state ID +/// value. +/// +/// When the `std` feature is enabled, this implements the `Error` trait. +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct LazyStateIDError { + attempted: u64, +} + +impl LazyStateIDError { + /// Returns the value that failed to constructed a lazy state ID. + pub(crate) fn attempted(&self) -> u64 { + self.attempted + } +} + +#[cfg(feature = "std")] +impl std::error::Error for LazyStateIDError {} + +impl core::fmt::Display for LazyStateIDError { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "failed to create LazyStateID from {:?}, which exceeds {:?}", + self.attempted(), + LazyStateID::MAX, + ) + } +} diff --git a/vendor/regex-automata/src/hybrid/mod.rs b/vendor/regex-automata/src/hybrid/mod.rs new file mode 100644 index 0000000..2feb839 --- /dev/null +++ b/vendor/regex-automata/src/hybrid/mod.rs @@ -0,0 +1,144 @@ +/*! +A module for building and searching with lazy deterministic finite automata +(DFAs). + +Like other modules in this crate, lazy DFAs support a rich regex syntax with +Unicode features. The key feature of a lazy DFA is that it builds itself +incrementally during search, and never uses more than a configured capacity of +memory. Thus, when searching with a lazy DFA, one must supply a mutable "cache" +in which the actual DFA's transition table is stored. + +If you're looking for fully compiled DFAs, then please see the top-level +[`dfa` module](crate::dfa). + +# Overview + +This section gives a brief overview of the primary types in this module: + +* A [`regex::Regex`] provides a way to search for matches of a regular +expression using lazy DFAs. This includes iterating over matches with both the +start and end positions of each match. +* A [`dfa::DFA`] provides direct low level access to a lazy DFA. + +# Example: basic regex searching + +This example shows how to compile a regex using the default configuration +and then use it to find matches in a byte string: + +``` +use regex_automata::{hybrid::regex::Regex, Match}; + +let re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?; +let mut cache = re.create_cache(); + +let haystack = "2018-12-24 2016-10-08"; +let matches: Vec = re.find_iter(&mut cache, haystack).collect(); +assert_eq!(matches, vec![ + Match::must(0, 0..10), + Match::must(0, 11..21), +]); +# Ok::<(), Box>(()) +``` + +# Example: searching with multiple regexes + +The lazy DFAs in this module all fully support searching with multiple regexes +simultaneously. You can use this support with standard leftmost-first style +searching to find non-overlapping matches: + +``` +# if cfg!(miri) { return Ok(()); } // miri takes too long +use regex_automata::{hybrid::regex::Regex, Match}; + +let re = Regex::new_many(&[r"\w+", r"\S+"])?; +let mut cache = re.create_cache(); + +let haystack = "@foo bar"; +let matches: Vec = re.find_iter(&mut cache, haystack).collect(); +assert_eq!(matches, vec![ + Match::must(1, 0..4), + Match::must(0, 5..8), +]); +# Ok::<(), Box>(()) +``` + +# When should I use this? + +Generally speaking, if you can abide the use of mutable state during search, +and you don't need things like capturing groups or Unicode word boundary +support in non-ASCII text, then a lazy DFA is likely a robust choice with +respect to both search speed and memory usage. Note however that its speed +may be worse than a general purpose regex engine if you don't select a good +[prefilter](crate::util::prefilter). + +If you know ahead of time that your pattern would result in a very large DFA +if it was fully compiled, it may be better to use an NFA simulation instead +of a lazy DFA. Either that, or increase the cache capacity of your lazy DFA +to something that is big enough to hold the state machine (likely through +experimentation). The issue here is that if the cache is too small, then it +could wind up being reset too frequently and this might decrease searching +speed significantly. + +# Differences with fully compiled DFAs + +A [`hybrid::regex::Regex`](crate::hybrid::regex::Regex) and a +[`dfa::regex::Regex`](crate::dfa::regex::Regex) both have the same capabilities +(and similarly for their underlying DFAs), but they achieve them through +different means. The main difference is that a hybrid or "lazy" regex builds +its DFA lazily during search, where as a fully compiled regex will build its +DFA at construction time. While building a DFA at search time might sound like +it's slow, it tends to work out where most bytes seen during a search will +reuse pre-built parts of the DFA and thus can be almost as fast as a fully +compiled DFA. The main downside is that searching requires mutable space to +store the DFA, and, in the worst case, a search can result in a new state being +created for each byte seen, which would make searching quite a bit slower. + +A fully compiled DFA never has to worry about searches being slower once +it's built. (Aside from, say, the transition table being so large that it +is subject to harsh CPU cache effects.) However, of course, building a full +DFA can be quite time consuming and memory hungry. Particularly when large +Unicode character classes are used, which tend to translate into very large +DFAs. + +A lazy DFA strikes a nice balance _in practice_, particularly in the +presence of Unicode mode, by only building what is needed. It avoids the +worst case exponential time complexity of DFA compilation by guaranteeing that +it will only build at most one state per byte searched. While the worst +case here can lead to a very high constant, it will never be exponential. + +# Syntax + +This module supports the same syntax as the `regex` crate, since they share the +same parser. You can find an exhaustive list of supported syntax in the +[documentation for the `regex` crate](https://docs.rs/regex/1/regex/#syntax). + +There are two things that are not supported by the lazy DFAs in this module: + +* Capturing groups. The DFAs (and [`Regex`](regex::Regex)es built on top +of them) can only find the offsets of an entire match, but cannot resolve +the offsets of each capturing group. This is because DFAs do not have the +expressive power necessary. Note that it is okay to build a lazy DFA from an +NFA that contains capture groups. The capture groups will simply be ignored. +* Unicode word boundaries. These present particularly difficult challenges for +DFA construction and would result in an explosion in the number of states. +One can enable [`dfa::Config::unicode_word_boundary`] though, which provides +heuristic support for Unicode word boundaries that only works on ASCII text. +Otherwise, one can use `(?-u:\b)` for an ASCII word boundary, which will work +on any input. + +There are no plans to lift either of these limitations. + +Note that these restrictions are identical to the restrictions on fully +compiled DFAs. +*/ + +pub use self::{ + error::{BuildError, CacheError, StartError}, + id::LazyStateID, +}; + +pub mod dfa; +mod error; +mod id; +pub mod regex; +mod search; diff --git a/vendor/regex-automata/src/hybrid/regex.rs b/vendor/regex-automata/src/hybrid/regex.rs new file mode 100644 index 0000000..b3b1fe3 --- /dev/null +++ b/vendor/regex-automata/src/hybrid/regex.rs @@ -0,0 +1,895 @@ +/*! +A lazy DFA backed `Regex`. + +This module provides a [`Regex`] backed by a lazy DFA. A `Regex` implements +convenience routines you might have come to expect, such as finding a match +and iterating over all non-overlapping matches. This `Regex` type is limited +in its capabilities to what a lazy DFA can provide. Therefore, APIs involving +capturing groups, for example, are not provided. + +Internally, a `Regex` is composed of two DFAs. One is a "forward" DFA that +finds the end offset of a match, where as the other is a "reverse" DFA that +find the start offset of a match. + +See the [parent module](crate::hybrid) for examples. +*/ + +use crate::{ + hybrid::{ + dfa::{self, DFA}, + error::BuildError, + }, + nfa::thompson, + util::{ + iter, + search::{Anchored, Input, Match, MatchError, MatchKind}, + }, +}; + +/// A regular expression that uses hybrid NFA/DFAs (also called "lazy DFAs") +/// for searching. +/// +/// A regular expression is comprised of two lazy DFAs, a "forward" DFA and a +/// "reverse" DFA. The forward DFA is responsible for detecting the end of +/// a match while the reverse DFA is responsible for detecting the start +/// of a match. Thus, in order to find the bounds of any given match, a +/// forward search must first be run followed by a reverse search. A match +/// found by the forward DFA guarantees that the reverse DFA will also find +/// a match. +/// +/// # Fallibility +/// +/// Most of the search routines defined on this type will _panic_ when the +/// underlying search fails. This might be because the DFA gave up because it +/// saw a quit byte, whether configured explicitly or via heuristic Unicode +/// word boundary support, although neither are enabled by default. It might +/// also fail if the underlying DFA determines it isn't making effective use of +/// the cache (which also never happens by default). Or it might fail because +/// an invalid `Input` configuration is given, for example, with an unsupported +/// [`Anchored`] mode. +/// +/// If you need to handle these error cases instead of allowing them to trigger +/// a panic, then the lower level [`Regex::try_search`] provides a fallible API +/// that never panics. +/// +/// # Example +/// +/// This example shows how to cause a search to terminate if it sees a +/// `\n` byte, and handle the error returned. This could be useful if, for +/// example, you wanted to prevent a user supplied pattern from matching +/// across a line boundary. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{hybrid::{dfa, regex::Regex}, Input, MatchError}; +/// +/// let re = Regex::builder() +/// .dfa(dfa::Config::new().quit(b'\n', true)) +/// .build(r"foo\p{any}+bar")?; +/// let mut cache = re.create_cache(); +/// +/// let input = Input::new("foo\nbar"); +/// // Normally this would produce a match, since \p{any} contains '\n'. +/// // But since we instructed the automaton to enter a quit state if a +/// // '\n' is observed, this produces a match error instead. +/// let expected = MatchError::quit(b'\n', 3); +/// let got = re.try_search(&mut cache, &input).unwrap_err(); +/// assert_eq!(expected, got); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Debug)] +pub struct Regex { + /// The forward lazy DFA. This can only find the end of a match. + forward: DFA, + /// The reverse lazy DFA. This can only find the start of a match. + /// + /// This is built with 'all' match semantics (instead of leftmost-first) + /// so that it always finds the longest possible match (which corresponds + /// to the leftmost starting position). It is also compiled as an anchored + /// matcher and has 'starts_for_each_pattern' enabled. Including starting + /// states for each pattern is necessary to ensure that we only look for + /// matches of a pattern that matched in the forward direction. Otherwise, + /// we might wind up finding the "leftmost" starting position of a totally + /// different pattern! + reverse: DFA, +} + +/// Convenience routines for regex and cache construction. +impl Regex { + /// Parse the given regular expression using the default configuration and + /// return the corresponding regex. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{hybrid::regex::Regex, Match}; + /// + /// let re = Regex::new("foo[0-9]+bar")?; + /// let mut cache = re.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 3..14)), + /// re.find(&mut cache, "zzzfoo12345barzzz"), + /// ); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new(pattern: &str) -> Result { + Regex::builder().build(pattern) + } + + /// Like `new`, but parses multiple patterns into a single "multi regex." + /// This similarly uses the default regex configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{hybrid::regex::Regex, Match}; + /// + /// let re = Regex::new_many(&["[a-z]+", "[0-9]+"])?; + /// let mut cache = re.create_cache(); + /// + /// let mut it = re.find_iter(&mut cache, "abc 1 foo 4567 0 quux"); + /// assert_eq!(Some(Match::must(0, 0..3)), it.next()); + /// assert_eq!(Some(Match::must(1, 4..5)), it.next()); + /// assert_eq!(Some(Match::must(0, 6..9)), it.next()); + /// assert_eq!(Some(Match::must(1, 10..14)), it.next()); + /// assert_eq!(Some(Match::must(1, 15..16)), it.next()); + /// assert_eq!(Some(Match::must(0, 17..21)), it.next()); + /// assert_eq!(None, it.next()); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new_many>( + patterns: &[P], + ) -> Result { + Regex::builder().build_many(patterns) + } + + /// Return a builder for configuring the construction of a `Regex`. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + /// + /// # Example + /// + /// This example shows how to use the builder to disable UTF-8 mode + /// everywhere. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// hybrid::regex::Regex, nfa::thompson, util::syntax, Match, + /// }; + /// + /// let re = Regex::builder() + /// .syntax(syntax::Config::new().utf8(false)) + /// .thompson(thompson::Config::new().utf8(false)) + /// .build(r"foo(?-u:[^b])ar.*")?; + /// let mut cache = re.create_cache(); + /// + /// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; + /// let expected = Some(Match::must(0, 1..9)); + /// let got = re.find(&mut cache, haystack); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn builder() -> Builder { + Builder::new() + } + + /// Create a new cache for this `Regex`. + /// + /// The cache returned should only be used for searches for this + /// `Regex`. If you want to reuse the cache for another `Regex`, then + /// you must call [`Cache::reset`] with that `Regex` (or, equivalently, + /// [`Regex::reset_cache`]). + pub fn create_cache(&self) -> Cache { + Cache::new(self) + } + + /// Reset the given cache such that it can be used for searching with the + /// this `Regex` (and only this `Regex`). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different `Regex`. + /// + /// Resetting a cache sets its "clear count" to 0. This is relevant if the + /// `Regex` has been configured to "give up" after it has cleared the cache + /// a certain number of times. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different `Regex`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::regex::Regex, Match}; + /// + /// let re1 = Regex::new(r"\w")?; + /// let re2 = Regex::new(r"\W")?; + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 0..2)), + /// re1.find(&mut cache, "Δ"), + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the Regex we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// re2.reset_cache(&mut cache); + /// assert_eq!( + /// Some(Match::must(0, 0..3)), + /// re2.find(&mut cache, "☃"), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset_cache(&self, cache: &mut Cache) { + self.forward().reset_cache(&mut cache.forward); + self.reverse().reset_cache(&mut cache.reverse); + } +} + +/// Standard infallible search routines for finding and iterating over matches. +impl Regex { + /// Returns true if and only if this regex matches the given haystack. + /// + /// This routine may short circuit if it knows that scanning future input + /// will never lead to a different result. In particular, if the underlying + /// DFA enters a match state or a dead state, then this routine will return + /// `true` or `false`, respectively, without inspecting any future input. + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// Use [`Regex::try_search`] if you want to handle these error conditions. + /// + /// # Example + /// + /// ``` + /// use regex_automata::hybrid::regex::Regex; + /// + /// let re = Regex::new("foo[0-9]+bar")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.is_match(&mut cache, "foo12345bar")); + /// assert!(!re.is_match(&mut cache, "foobar")); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_match<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> bool { + // Not only can we do an "earliest" search, but we can avoid doing a + // reverse scan too. + self.forward() + .try_search_fwd(&mut cache.forward, &input.into().earliest(true)) + .unwrap() + .is_some() + } + + /// Returns the start and end offset of the leftmost match. If no match + /// exists, then `None` is returned. + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// Use [`Regex::try_search`] if you want to handle these error conditions. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Match, hybrid::regex::Regex}; + /// + /// let re = Regex::new("foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 3..11)), + /// re.find(&mut cache, "zzzfoo12345zzz"), + /// ); + /// + /// // Even though a match is found after reading the first byte (`a`), + /// // the default leftmost-first match semantics demand that we find the + /// // earliest match that prefers earlier parts of the pattern over latter + /// // parts. + /// let re = Regex::new("abc|a")?; + /// let mut cache = re.create_cache(); + /// assert_eq!(Some(Match::must(0, 0..3)), re.find(&mut cache, "abc")); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> Option { + self.try_search(cache, &input.into()).unwrap() + } + + /// Returns an iterator over all non-overlapping leftmost matches in the + /// given bytes. If no match exists, then the iterator yields no elements. + /// + /// # Panics + /// + /// This routine panics if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search panics, callers cannot know whether a match exists or + /// not. + /// + /// The above conditions also apply to the iterator returned as well. For + /// example, if the lazy DFA gives up or quits during a search using this + /// method, then a panic will occur during iteration. + /// + /// Use [`Regex::try_search`] with [`util::iter::Searcher`](iter::Searcher) + /// if you want to handle these error conditions. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{hybrid::regex::Regex, Match}; + /// + /// let re = Regex::new("foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// + /// let text = "foo1 foo12 foo123"; + /// let matches: Vec = re.find_iter(&mut cache, text).collect(); + /// assert_eq!(matches, vec![ + /// Match::must(0, 0..4), + /// Match::must(0, 5..10), + /// Match::must(0, 11..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find_iter<'r, 'c, 'h, I: Into>>( + &'r self, + cache: &'c mut Cache, + input: I, + ) -> FindMatches<'r, 'c, 'h> { + let it = iter::Searcher::new(input.into()); + FindMatches { re: self, cache, it } + } +} + +/// Lower level "search" primitives that accept a `&Input` for cheap reuse +/// and return an error if one occurs instead of panicking. +impl Regex { + /// Returns the start and end offset of the leftmost match. If no match + /// exists, then `None` is returned. + /// + /// This is like [`Regex::find`] but with two differences: + /// + /// 1. It is not generic over `Into` and instead accepts a + /// `&Input`. This permits reusing the same `Input` for multiple searches + /// without needing to create a new one. This _may_ help with latency. + /// 2. It returns an error if the search could not complete where as + /// [`Regex::find`] will panic. + /// + /// # Errors + /// + /// This routine errors if the search could not complete. This can occur + /// in a number of circumstances: + /// + /// * The configuration of the lazy DFA may permit it to "quit" the search. + /// For example, setting quit bytes or enabling heuristic support for + /// Unicode word boundaries. The default configuration does not enable any + /// option that could result in the lazy DFA quitting. + /// * The configuration of the lazy DFA may also permit it to "give up" + /// on a search if it makes ineffective use of its transition table + /// cache. The default configuration does not enable this by default, + /// although it is typically a good idea to. + /// * When the provided `Input` configuration is not supported. For + /// example, by providing an unsupported anchor mode. + /// + /// When a search returns an error, callers cannot know whether a match + /// exists or not. + #[inline] + pub fn try_search( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, MatchError> { + let (fcache, rcache) = (&mut cache.forward, &mut cache.reverse); + let end = match self.forward().try_search_fwd(fcache, input)? { + None => return Ok(None), + Some(end) => end, + }; + // This special cases an empty match at the beginning of the search. If + // our end matches our start, then since a reverse DFA can't match past + // the start, it must follow that our starting position is also our end + // position. So short circuit and skip the reverse search. + if input.start() == end.offset() { + return Ok(Some(Match::new( + end.pattern(), + end.offset()..end.offset(), + ))); + } + // We can also skip the reverse search if we know our search was + // anchored. This occurs either when the input config is anchored or + // when we know the regex itself is anchored. In this case, we know the + // start of the match, if one is found, must be the start of the + // search. + if self.is_anchored(input) { + return Ok(Some(Match::new( + end.pattern(), + input.start()..end.offset(), + ))); + } + // N.B. I have tentatively convinced myself that it isn't necessary + // to specify the specific pattern for the reverse search since the + // reverse search will always find the same pattern to match as the + // forward search. But I lack a rigorous proof. Why not just provide + // the pattern anyway? Well, if it is needed, then leaving it out + // gives us a chance to find a witness. (Also, if we don't need to + // specify the pattern, then we don't need to build the reverse DFA + // with 'starts_for_each_pattern' enabled. It doesn't matter too much + // for the lazy DFA, but does make the overall DFA bigger.) + // + // We also need to be careful to disable 'earliest' for the reverse + // search, since it could be enabled for the forward search. In the + // reverse case, to satisfy "leftmost" criteria, we need to match as + // much as we can. We also need to be careful to make the search + // anchored. We don't want the reverse search to report any matches + // other than the one beginning at the end of our forward search. + let revsearch = input + .clone() + .span(input.start()..end.offset()) + .anchored(Anchored::Yes) + .earliest(false); + let start = self + .reverse() + .try_search_rev(rcache, &revsearch)? + .expect("reverse search must match if forward search does"); + debug_assert_eq!( + start.pattern(), + end.pattern(), + "forward and reverse search must match same pattern", + ); + debug_assert!(start.offset() <= end.offset()); + Ok(Some(Match::new(end.pattern(), start.offset()..end.offset()))) + } + + /// Returns true if either the given input specifies an anchored search + /// or if the underlying NFA is always anchored. + fn is_anchored(&self, input: &Input<'_>) -> bool { + match input.get_anchored() { + Anchored::No => { + self.forward().get_nfa().is_always_start_anchored() + } + Anchored::Yes | Anchored::Pattern(_) => true, + } + } +} + +/// Non-search APIs for querying information about the regex and setting a +/// prefilter. +impl Regex { + /// Return the underlying lazy DFA responsible for forward matching. + /// + /// This is useful for accessing the underlying lazy DFA and using it + /// directly if the situation calls for it. + pub fn forward(&self) -> &DFA { + &self.forward + } + + /// Return the underlying lazy DFA responsible for reverse matching. + /// + /// This is useful for accessing the underlying lazy DFA and using it + /// directly if the situation calls for it. + pub fn reverse(&self) -> &DFA { + &self.reverse + } + + /// Returns the total number of patterns matched by this regex. + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::hybrid::regex::Regex; + /// + /// let re = Regex::new_many(&[r"[a-z]+", r"[0-9]+", r"\w+"])?; + /// assert_eq!(3, re.pattern_len()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn pattern_len(&self) -> usize { + assert_eq!(self.forward().pattern_len(), self.reverse().pattern_len()); + self.forward().pattern_len() + } +} + +/// An iterator over all non-overlapping matches for an infallible search. +/// +/// The iterator yields a [`Match`] value until no more matches could be found. +/// If the underlying regex engine returns an error, then a panic occurs. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the regex object. +/// * `'h` represents the lifetime of the haystack being searched. +/// * `'c` represents the lifetime of the regex cache. +/// +/// This iterator can be created with the [`Regex::find_iter`] method. +#[derive(Debug)] +pub struct FindMatches<'r, 'c, 'h> { + re: &'r Regex, + cache: &'c mut Cache, + it: iter::Searcher<'h>, +} + +impl<'r, 'c, 'h> Iterator for FindMatches<'r, 'c, 'h> { + type Item = Match; + + #[inline] + fn next(&mut self) -> Option { + let FindMatches { re, ref mut cache, ref mut it } = *self; + it.advance(|input| re.try_search(cache, input)) + } +} + +/// A cache represents a partially computed forward and reverse DFA. +/// +/// A cache is the key component that differentiates a classical DFA and a +/// hybrid NFA/DFA (also called a "lazy DFA"). Where a classical DFA builds a +/// complete transition table that can handle all possible inputs, a hybrid +/// NFA/DFA starts with an empty transition table and builds only the parts +/// required during search. The parts that are built are stored in a cache. For +/// this reason, a cache is a required parameter for nearly every operation on +/// a [`Regex`]. +/// +/// Caches can be created from their corresponding `Regex` via +/// [`Regex::create_cache`]. A cache can only be used with either the `Regex` +/// that created it, or the `Regex` that was most recently used to reset it +/// with [`Cache::reset`]. Using a cache with any other `Regex` may result in +/// panics or incorrect results. +#[derive(Debug, Clone)] +pub struct Cache { + forward: dfa::Cache, + reverse: dfa::Cache, +} + +impl Cache { + /// Create a new cache for the given `Regex`. + /// + /// The cache returned should only be used for searches for the given + /// `Regex`. If you want to reuse the cache for another `Regex`, then you + /// must call [`Cache::reset`] with that `Regex`. + pub fn new(re: &Regex) -> Cache { + let forward = dfa::Cache::new(re.forward()); + let reverse = dfa::Cache::new(re.reverse()); + Cache { forward, reverse } + } + + /// Reset this cache such that it can be used for searching with the given + /// `Regex` (and only that `Regex`). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different `Regex`. + /// + /// Resetting a cache sets its "clear count" to 0. This is relevant if the + /// `Regex` has been configured to "give up" after it has cleared the cache + /// a certain number of times. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different `Regex`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{hybrid::regex::Regex, Match}; + /// + /// let re1 = Regex::new(r"\w")?; + /// let re2 = Regex::new(r"\W")?; + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 0..2)), + /// re1.find(&mut cache, "Δ"), + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the Regex we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// cache.reset(&re2); + /// assert_eq!( + /// Some(Match::must(0, 0..3)), + /// re2.find(&mut cache, "☃"), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset(&mut self, re: &Regex) { + self.forward.reset(re.forward()); + self.reverse.reset(re.reverse()); + } + + /// Return a reference to the forward cache. + pub fn forward(&mut self) -> &dfa::Cache { + &self.forward + } + + /// Return a reference to the reverse cache. + pub fn reverse(&mut self) -> &dfa::Cache { + &self.reverse + } + + /// Return a mutable reference to the forward cache. + /// + /// If you need mutable references to both the forward and reverse caches, + /// then use [`Cache::as_parts_mut`]. + pub fn forward_mut(&mut self) -> &mut dfa::Cache { + &mut self.forward + } + + /// Return a mutable reference to the reverse cache. + /// + /// If you need mutable references to both the forward and reverse caches, + /// then use [`Cache::as_parts_mut`]. + pub fn reverse_mut(&mut self) -> &mut dfa::Cache { + &mut self.reverse + } + + /// Return references to the forward and reverse caches, respectively. + pub fn as_parts(&self) -> (&dfa::Cache, &dfa::Cache) { + (&self.forward, &self.reverse) + } + + /// Return mutable references to the forward and reverse caches, + /// respectively. + pub fn as_parts_mut(&mut self) -> (&mut dfa::Cache, &mut dfa::Cache) { + (&mut self.forward, &mut self.reverse) + } + + /// Returns the heap memory usage, in bytes, as a sum of the forward and + /// reverse lazy DFA caches. + /// + /// This does **not** include the stack size used up by this cache. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + self.forward.memory_usage() + self.reverse.memory_usage() + } +} + +/// A builder for a regex based on a hybrid NFA/DFA. +/// +/// This builder permits configuring options for the syntax of a pattern, the +/// NFA construction, the lazy DFA construction and finally the regex searching +/// itself. This builder is different from a general purpose regex builder +/// in that it permits fine grain configuration of the construction process. +/// The trade off for this is complexity, and the possibility of setting a +/// configuration that might not make sense. For example, there are two +/// different UTF-8 modes: +/// +/// * [`syntax::Config::utf8`](crate::util::syntax::Config::utf8) controls +/// whether the pattern itself can contain sub-expressions that match invalid +/// UTF-8. +/// * [`thompson::Config::utf8`] controls how the regex iterators themselves +/// advance the starting position of the next search when a match with zero +/// length is found. +/// +/// Generally speaking, callers will want to either enable all of these or +/// disable all of these. +/// +/// Internally, building a regex requires building two hybrid NFA/DFAs, +/// where one is responsible for finding the end of a match and the other is +/// responsible for finding the start of a match. If you only need to detect +/// whether something matched, or only the end of a match, then you should use +/// a [`dfa::Builder`] to construct a single hybrid NFA/DFA, which is cheaper +/// than building two of them. +/// +/// # Example +/// +/// This example shows how to disable UTF-8 mode in the syntax and the regex +/// itself. This is generally what you want for matching on arbitrary bytes. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{ +/// hybrid::regex::Regex, nfa::thompson, util::syntax, Match, +/// }; +/// +/// let re = Regex::builder() +/// .syntax(syntax::Config::new().utf8(false)) +/// .thompson(thompson::Config::new().utf8(false)) +/// .build(r"foo(?-u:[^b])ar.*")?; +/// let mut cache = re.create_cache(); +/// +/// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; +/// let expected = Some(Match::must(0, 1..9)); +/// let got = re.find(&mut cache, haystack); +/// assert_eq!(expected, got); +/// // Notice that `(?-u:[^b])` matches invalid UTF-8, +/// // but the subsequent `.*` does not! Disabling UTF-8 +/// // on the syntax permits this. +/// assert_eq!(b"foo\xFFarzz", &haystack[got.unwrap().range()]); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Builder { + dfa: dfa::Builder, +} + +impl Builder { + /// Create a new regex builder with the default configuration. + pub fn new() -> Builder { + Builder { dfa: DFA::builder() } + } + + /// Build a regex from the given pattern. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(feature = "syntax")] + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Build a regex from the given patterns. + #[cfg(feature = "syntax")] + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let forward = self.dfa.build_many(patterns)?; + let reverse = self + .dfa + .clone() + .configure( + DFA::config() + .prefilter(None) + .specialize_start_states(false) + .match_kind(MatchKind::All), + ) + .thompson(thompson::Config::new().reverse(true)) + .build_many(patterns)?; + Ok(self.build_from_dfas(forward, reverse)) + } + + /// Build a regex from its component forward and reverse hybrid NFA/DFAs. + /// + /// This is useful when you've built a forward and reverse lazy DFA + /// separately, and want to combine them into a single regex. Once build, + /// the individual DFAs given can still be accessed via [`Regex::forward`] + /// and [`Regex::reverse`]. + /// + /// It is important that the reverse lazy DFA be compiled under the + /// following conditions: + /// + /// * It should use [`MatchKind::All`] semantics. + /// * It should match in reverse. + /// * Otherwise, its configuration should match the forward DFA. + /// + /// If these conditions aren't satisfied, then the behavior of searches is + /// unspecified. + /// + /// Note that when using this constructor, no configuration is applied. + /// Since this routine provides the DFAs to the builder, there is no + /// opportunity to apply other configuration options. + /// + /// # Example + /// + /// This shows how to build individual lazy forward and reverse DFAs, and + /// then combine them into a single `Regex`. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::{dfa::DFA, regex::Regex}, + /// nfa::thompson, + /// MatchKind, + /// }; + /// + /// let fwd = DFA::new(r"foo[0-9]+")?; + /// let rev = DFA::builder() + /// .configure(DFA::config().match_kind(MatchKind::All)) + /// .thompson(thompson::Config::new().reverse(true)) + /// .build(r"foo[0-9]+")?; + /// + /// let re = Regex::builder().build_from_dfas(fwd, rev); + /// let mut cache = re.create_cache(); + /// assert_eq!(true, re.is_match(&mut cache, "foo123")); + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_from_dfas(&self, forward: DFA, reverse: DFA) -> Regex { + Regex { forward, reverse } + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + #[cfg(feature = "syntax")] + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + self.dfa.syntax(config); + self + } + + /// Set the Thompson NFA configuration for this builder using + /// [`nfa::thompson::Config`](thompson::Config). + /// + /// This permits setting things like whether additional time should be + /// spent shrinking the size of the NFA. + #[cfg(feature = "syntax")] + pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder { + self.dfa.thompson(config); + self + } + + /// Set the lazy DFA compilation configuration for this builder using + /// [`dfa::Config`]. + /// + /// This permits setting things like whether Unicode word boundaries should + /// be heuristically supported or settings how the behavior of the cache. + pub fn dfa(&mut self, config: dfa::Config) -> &mut Builder { + self.dfa.configure(config); + self + } +} + +impl Default for Builder { + fn default() -> Builder { + Builder::new() + } +} diff --git a/vendor/regex-automata/src/hybrid/search.rs b/vendor/regex-automata/src/hybrid/search.rs new file mode 100644 index 0000000..1f4a505 --- /dev/null +++ b/vendor/regex-automata/src/hybrid/search.rs @@ -0,0 +1,802 @@ +use crate::{ + hybrid::{ + dfa::{Cache, OverlappingState, DFA}, + id::LazyStateID, + }, + util::{ + prefilter::Prefilter, + search::{HalfMatch, Input, MatchError, Span}, + }, +}; + +#[inline(never)] +pub(crate) fn find_fwd( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, +) -> Result, MatchError> { + if input.is_done() { + return Ok(None); + } + let pre = if input.get_anchored().is_anchored() { + None + } else { + dfa.get_config().get_prefilter() + }; + // So what we do here is specialize four different versions of 'find_fwd': + // one for each of the combinations for 'has prefilter' and 'is earliest + // search'. The reason for doing this is that both of these things require + // branches and special handling in some code that can be very hot, + // and shaving off as much as we can when we don't need it tends to be + // beneficial in ad hoc benchmarks. To see these differences, you often + // need a query with a high match count. In other words, specializing these + // four routines *tends* to help latency more than throughput. + if pre.is_some() { + if input.get_earliest() { + find_fwd_imp(dfa, cache, input, pre, true) + } else { + find_fwd_imp(dfa, cache, input, pre, false) + } + } else { + if input.get_earliest() { + find_fwd_imp(dfa, cache, input, None, true) + } else { + find_fwd_imp(dfa, cache, input, None, false) + } + } +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn find_fwd_imp( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + pre: Option<&'_ Prefilter>, + earliest: bool, +) -> Result, MatchError> { + // See 'prefilter_restart' docs for explanation. + let universal_start = dfa.get_nfa().look_set_prefix_any().is_empty(); + let mut mat = None; + let mut sid = init_fwd(dfa, cache, input)?; + let mut at = input.start(); + // This could just be a closure, but then I think it would be unsound + // because it would need to be safe to invoke. This way, the lack of safety + // is clearer in the code below. + macro_rules! next_unchecked { + ($sid:expr, $at:expr) => {{ + let byte = *input.haystack().get_unchecked($at); + dfa.next_state_untagged_unchecked(cache, $sid, byte) + }}; + } + + if let Some(ref pre) = pre { + let span = Span::from(at..input.end()); + match pre.find(input.haystack(), span) { + None => return Ok(mat), + Some(ref span) => { + at = span.start; + if !universal_start { + sid = prefilter_restart(dfa, cache, &input, at)?; + } + } + } + } + cache.search_start(at); + while at < input.end() { + if sid.is_tagged() { + cache.search_update(at); + sid = dfa + .next_state(cache, sid, input.haystack()[at]) + .map_err(|_| gave_up(at))?; + } else { + // SAFETY: There are two safety invariants we need to uphold + // here in the loops below: that 'sid' and 'prev_sid' are valid + // state IDs for this DFA, and that 'at' is a valid index into + // 'haystack'. For the former, we rely on the invariant that + // next_state* and start_state_forward always returns a valid state + // ID (given a valid state ID in the former case), and that we are + // only at this place in the code if 'sid' is untagged. Moreover, + // every call to next_state_untagged_unchecked below is guarded by + // a check that sid is untagged. For the latter safety invariant, + // we always guard unchecked access with a check that 'at' is less + // than 'end', where 'end <= haystack.len()'. In the unrolled loop + // below, we ensure that 'at' is always in bounds. + // + // PERF: For justification of omitting bounds checks, it gives us a + // ~10% bump in search time. This was used for a benchmark: + // + // regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile + // + // PERF: For justification for the loop unrolling, we use a few + // different tests: + // + // regex-cli find half hybrid -p '\w{50}' -UBb bigfile + // regex-cli find half hybrid -p '(?m)^.+$' -UBb bigfile + // regex-cli find half hybrid -p 'ZQZQZQZQ' -UBb bigfile + // + // And there are three different configurations: + // + // nounroll: this entire 'else' block vanishes and we just + // always use 'dfa.next_state(..)'. + // unroll1: just the outer loop below + // unroll2: just the inner loop below + // unroll3: both the outer and inner loops below + // + // This results in a matrix of timings for each of the above + // regexes with each of the above unrolling configurations: + // + // '\w{50}' '(?m)^.+$' 'ZQZQZQZQ' + // nounroll 1.51s 2.34s 1.51s + // unroll1 1.53s 2.32s 1.56s + // unroll2 2.22s 1.50s 0.61s + // unroll3 1.67s 1.45s 0.61s + // + // Ideally we'd be able to find a configuration that yields the + // best time for all regexes, but alas we settle for unroll3 that + // gives us *almost* the best for '\w{50}' and the best for the + // other two regexes. + // + // So what exactly is going on here? The first unrolling (grouping + // together runs of untagged transitions) specifically targets + // our choice of representation. The second unrolling (grouping + // together runs of self-transitions) specifically targets a common + // DFA topology. Let's dig in a little bit by looking at our + // regexes: + // + // '\w{50}': This regex spends a lot of time outside of the DFA's + // start state matching some part of the '\w' repetition. This + // means that it's a bit of a worst case for loop unrolling that + // targets self-transitions since the self-transitions in '\w{50}' + // are not particularly active for this haystack. However, the + // first unrolling (grouping together untagged transitions) + // does apply quite well here since very few transitions hit + // match/dead/quit/unknown states. It is however worth mentioning + // that if start states are configured to be tagged (which you + // typically want to do if you have a prefilter), then this regex + // actually slows way down because it is constantly ping-ponging + // out of the unrolled loop and into the handling of a tagged start + // state below. But when start states aren't tagged, the unrolled + // loop stays hot. (This is why it's imperative that start state + // tagging be disabled when there isn't a prefilter!) + // + // '(?m)^.+$': There are two important aspects of this regex: 1) + // on this haystack, its match count is very high, much higher + // than the other two regex and 2) it spends the vast majority + // of its time matching '.+'. Since Unicode mode is disabled, + // this corresponds to repeatedly following self transitions for + // the vast majority of the input. This does benefit from the + // untagged unrolling since most of the transitions will be to + // untagged states, but the untagged unrolling does more work than + // what is actually required. Namely, it has to keep track of the + // previous and next state IDs, which I guess requires a bit more + // shuffling. This is supported by the fact that nounroll+unroll1 + // are both slower than unroll2+unroll3, where the latter has a + // loop unrolling that specifically targets self-transitions. + // + // 'ZQZQZQZQ': This one is very similar to '(?m)^.+$' because it + // spends the vast majority of its time in self-transitions for + // the (implicit) unanchored prefix. The main difference with + // '(?m)^.+$' is that it has a much lower match count. So there + // isn't much time spent in the overhead of reporting matches. This + // is the primary explainer in the perf difference here. We include + // this regex and the former to make sure we have comparison points + // with high and low match counts. + // + // NOTE: I used 'OpenSubtitles2018.raw.sample.en' for 'bigfile'. + // + // NOTE: In a follow-up, it turns out that the "inner" loop + // mentioned above was a pretty big pessimization in some other + // cases. Namely, it resulted in too much ping-ponging into and out + // of the loop, which resulted in nearly ~2x regressions in search + // time when compared to the originally lazy DFA in the regex crate. + // So I've removed the second loop unrolling that targets the + // self-transition case. + let mut prev_sid = sid; + while at < input.end() { + prev_sid = unsafe { next_unchecked!(sid, at) }; + if prev_sid.is_tagged() || at + 3 >= input.end() { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at += 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if sid.is_tagged() { + break; + } + at += 1; + + prev_sid = unsafe { next_unchecked!(sid, at) }; + if prev_sid.is_tagged() { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at += 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if sid.is_tagged() { + break; + } + at += 1; + } + // If we quit out of the code above with an unknown state ID at + // any point, then we need to re-compute that transition using + // 'next_state', which will do NFA powerset construction for us. + if sid.is_unknown() { + cache.search_update(at); + sid = dfa + .next_state(cache, prev_sid, input.haystack()[at]) + .map_err(|_| gave_up(at))?; + } + } + if sid.is_tagged() { + if sid.is_start() { + if let Some(ref pre) = pre { + let span = Span::from(at..input.end()); + match pre.find(input.haystack(), span) { + None => { + cache.search_finish(span.end); + return Ok(mat); + } + Some(ref span) => { + // We want to skip any update to 'at' below + // at the end of this iteration and just + // jump immediately back to the next state + // transition at the leading position of the + // candidate match. + // + // ... but only if we actually made progress + // with our prefilter, otherwise if the start + // state has a self-loop, we can get stuck. + if span.start > at { + at = span.start; + if !universal_start { + sid = prefilter_restart( + dfa, cache, &input, at, + )?; + } + continue; + } + } + } + } + } else if sid.is_match() { + let pattern = dfa.match_pattern(cache, sid, 0); + // Since slice ranges are inclusive at the beginning and + // exclusive at the end, and since forward searches report + // the end, we can return 'at' as-is. This only works because + // matches are delayed by 1 byte. So by the time we observe a + // match, 'at' has already been set to 1 byte past the actual + // match location, which is precisely the exclusive ending + // bound of the match. + mat = Some(HalfMatch::new(pattern, at)); + if earliest { + cache.search_finish(at); + return Ok(mat); + } + } else if sid.is_dead() { + cache.search_finish(at); + return Ok(mat); + } else if sid.is_quit() { + cache.search_finish(at); + return Err(MatchError::quit(input.haystack()[at], at)); + } else { + debug_assert!(sid.is_unknown()); + unreachable!("sid being unknown is a bug"); + } + } + at += 1; + } + eoi_fwd(dfa, cache, input, &mut sid, &mut mat)?; + cache.search_finish(input.end()); + Ok(mat) +} + +#[inline(never)] +pub(crate) fn find_rev( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, +) -> Result, MatchError> { + if input.is_done() { + return Ok(None); + } + if input.get_earliest() { + find_rev_imp(dfa, cache, input, true) + } else { + find_rev_imp(dfa, cache, input, false) + } +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn find_rev_imp( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + earliest: bool, +) -> Result, MatchError> { + let mut mat = None; + let mut sid = init_rev(dfa, cache, input)?; + // In reverse search, the loop below can't handle the case of searching an + // empty slice. Ideally we could write something congruent to the forward + // search, i.e., 'while at >= start', but 'start' might be 0. Since we use + // an unsigned offset, 'at >= 0' is trivially always true. We could avoid + // this extra case handling by using a signed offset, but Rust makes it + // annoying to do. So... We just handle the empty case separately. + if input.start() == input.end() { + eoi_rev(dfa, cache, input, &mut sid, &mut mat)?; + return Ok(mat); + } + + let mut at = input.end() - 1; + macro_rules! next_unchecked { + ($sid:expr, $at:expr) => {{ + let byte = *input.haystack().get_unchecked($at); + dfa.next_state_untagged_unchecked(cache, $sid, byte) + }}; + } + cache.search_start(at); + loop { + if sid.is_tagged() { + cache.search_update(at); + sid = dfa + .next_state(cache, sid, input.haystack()[at]) + .map_err(|_| gave_up(at))?; + } else { + // SAFETY: See comments in 'find_fwd' for a safety argument. + // + // PERF: The comments in 'find_fwd' also provide a justification + // from a performance perspective as to 1) why we elide bounds + // checks and 2) why we do a specialized version of unrolling + // below. The reverse search does have a slightly different + // consideration in that most reverse searches tend to be + // anchored and on shorter haystacks. However, this still makes a + // difference. Take this command for example: + // + // regex-cli find match hybrid -p '(?m)^.+$' -UBb bigfile + // + // (Notice that we use 'find hybrid regex', not 'find hybrid dfa' + // like in the justification for the forward direction. The 'regex' + // sub-command will find start-of-match and thus run the reverse + // direction.) + // + // Without unrolling below, the above command takes around 3.76s. + // But with the unrolling below, we get down to 2.55s. If we keep + // the unrolling but add in bounds checks, then we get 2.86s. + // + // NOTE: I used 'OpenSubtitles2018.raw.sample.en' for 'bigfile'. + let mut prev_sid = sid; + while at >= input.start() { + prev_sid = unsafe { next_unchecked!(sid, at) }; + if prev_sid.is_tagged() + || at <= input.start().saturating_add(3) + { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at -= 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if sid.is_tagged() { + break; + } + at -= 1; + + prev_sid = unsafe { next_unchecked!(sid, at) }; + if prev_sid.is_tagged() { + core::mem::swap(&mut prev_sid, &mut sid); + break; + } + at -= 1; + + sid = unsafe { next_unchecked!(prev_sid, at) }; + if sid.is_tagged() { + break; + } + at -= 1; + } + // If we quit out of the code above with an unknown state ID at + // any point, then we need to re-compute that transition using + // 'next_state', which will do NFA powerset construction for us. + if sid.is_unknown() { + cache.search_update(at); + sid = dfa + .next_state(cache, prev_sid, input.haystack()[at]) + .map_err(|_| gave_up(at))?; + } + } + if sid.is_tagged() { + if sid.is_start() { + // do nothing + } else if sid.is_match() { + let pattern = dfa.match_pattern(cache, sid, 0); + // Since reverse searches report the beginning of a match + // and the beginning is inclusive (not exclusive like the + // end of a match), we add 1 to make it inclusive. + mat = Some(HalfMatch::new(pattern, at + 1)); + if earliest { + cache.search_finish(at); + return Ok(mat); + } + } else if sid.is_dead() { + cache.search_finish(at); + return Ok(mat); + } else if sid.is_quit() { + cache.search_finish(at); + return Err(MatchError::quit(input.haystack()[at], at)); + } else { + debug_assert!(sid.is_unknown()); + unreachable!("sid being unknown is a bug"); + } + } + if at == input.start() { + break; + } + at -= 1; + } + cache.search_finish(input.start()); + eoi_rev(dfa, cache, input, &mut sid, &mut mat)?; + Ok(mat) +} + +#[inline(never)] +pub(crate) fn find_overlapping_fwd( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + state: &mut OverlappingState, +) -> Result<(), MatchError> { + state.mat = None; + if input.is_done() { + return Ok(()); + } + let pre = if input.get_anchored().is_anchored() { + None + } else { + dfa.get_config().get_prefilter() + }; + if pre.is_some() { + find_overlapping_fwd_imp(dfa, cache, input, pre, state) + } else { + find_overlapping_fwd_imp(dfa, cache, input, None, state) + } +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn find_overlapping_fwd_imp( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + pre: Option<&'_ Prefilter>, + state: &mut OverlappingState, +) -> Result<(), MatchError> { + // See 'prefilter_restart' docs for explanation. + let universal_start = dfa.get_nfa().look_set_prefix_any().is_empty(); + let mut sid = match state.id { + None => { + state.at = input.start(); + init_fwd(dfa, cache, input)? + } + Some(sid) => { + if let Some(match_index) = state.next_match_index { + let match_len = dfa.match_len(cache, sid); + if match_index < match_len { + state.next_match_index = Some(match_index + 1); + let pattern = dfa.match_pattern(cache, sid, match_index); + state.mat = Some(HalfMatch::new(pattern, state.at)); + return Ok(()); + } + } + // Once we've reported all matches at a given position, we need to + // advance the search to the next position. + state.at += 1; + if state.at > input.end() { + return Ok(()); + } + sid + } + }; + + // NOTE: We don't optimize the crap out of this routine primarily because + // it seems like most overlapping searches will have higher match counts, + // and thus, throughput is perhaps not as important. But if you have a use + // case for something faster, feel free to file an issue. + cache.search_start(state.at); + while state.at < input.end() { + sid = dfa + .next_state(cache, sid, input.haystack()[state.at]) + .map_err(|_| gave_up(state.at))?; + if sid.is_tagged() { + state.id = Some(sid); + if sid.is_start() { + if let Some(ref pre) = pre { + let span = Span::from(state.at..input.end()); + match pre.find(input.haystack(), span) { + None => return Ok(()), + Some(ref span) => { + if span.start > state.at { + state.at = span.start; + if !universal_start { + sid = prefilter_restart( + dfa, cache, &input, state.at, + )?; + } + continue; + } + } + } + } + } else if sid.is_match() { + state.next_match_index = Some(1); + let pattern = dfa.match_pattern(cache, sid, 0); + state.mat = Some(HalfMatch::new(pattern, state.at)); + cache.search_finish(state.at); + return Ok(()); + } else if sid.is_dead() { + cache.search_finish(state.at); + return Ok(()); + } else if sid.is_quit() { + cache.search_finish(state.at); + return Err(MatchError::quit( + input.haystack()[state.at], + state.at, + )); + } else { + debug_assert!(sid.is_unknown()); + unreachable!("sid being unknown is a bug"); + } + } + state.at += 1; + cache.search_update(state.at); + } + + let result = eoi_fwd(dfa, cache, input, &mut sid, &mut state.mat); + state.id = Some(sid); + if state.mat.is_some() { + // '1' is always correct here since if we get to this point, this + // always corresponds to the first (index '0') match discovered at + // this position. So the next match to report at this position (if + // it exists) is at index '1'. + state.next_match_index = Some(1); + } + cache.search_finish(input.end()); + result +} + +#[inline(never)] +pub(crate) fn find_overlapping_rev( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + state: &mut OverlappingState, +) -> Result<(), MatchError> { + state.mat = None; + if input.is_done() { + return Ok(()); + } + let mut sid = match state.id { + None => { + let sid = init_rev(dfa, cache, input)?; + state.id = Some(sid); + if input.start() == input.end() { + state.rev_eoi = true; + } else { + state.at = input.end() - 1; + } + sid + } + Some(sid) => { + if let Some(match_index) = state.next_match_index { + let match_len = dfa.match_len(cache, sid); + if match_index < match_len { + state.next_match_index = Some(match_index + 1); + let pattern = dfa.match_pattern(cache, sid, match_index); + state.mat = Some(HalfMatch::new(pattern, state.at)); + return Ok(()); + } + } + // Once we've reported all matches at a given position, we need + // to advance the search to the next position. However, if we've + // already followed the EOI transition, then we know we're done + // with the search and there cannot be any more matches to report. + if state.rev_eoi { + return Ok(()); + } else if state.at == input.start() { + // At this point, we should follow the EOI transition. This + // will cause us the skip the main loop below and fall through + // to the final 'eoi_rev' transition. + state.rev_eoi = true; + } else { + // We haven't hit the end of the search yet, so move on. + state.at -= 1; + } + sid + } + }; + cache.search_start(state.at); + while !state.rev_eoi { + sid = dfa + .next_state(cache, sid, input.haystack()[state.at]) + .map_err(|_| gave_up(state.at))?; + if sid.is_tagged() { + state.id = Some(sid); + if sid.is_start() { + // do nothing + } else if sid.is_match() { + state.next_match_index = Some(1); + let pattern = dfa.match_pattern(cache, sid, 0); + state.mat = Some(HalfMatch::new(pattern, state.at + 1)); + cache.search_finish(state.at); + return Ok(()); + } else if sid.is_dead() { + cache.search_finish(state.at); + return Ok(()); + } else if sid.is_quit() { + cache.search_finish(state.at); + return Err(MatchError::quit( + input.haystack()[state.at], + state.at, + )); + } else { + debug_assert!(sid.is_unknown()); + unreachable!("sid being unknown is a bug"); + } + } + if state.at == input.start() { + break; + } + state.at -= 1; + cache.search_update(state.at); + } + + let result = eoi_rev(dfa, cache, input, &mut sid, &mut state.mat); + state.rev_eoi = true; + state.id = Some(sid); + if state.mat.is_some() { + // '1' is always correct here since if we get to this point, this + // always corresponds to the first (index '0') match discovered at + // this position. So the next match to report at this position (if + // it exists) is at index '1'. + state.next_match_index = Some(1); + } + cache.search_finish(input.start()); + result +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn init_fwd( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, +) -> Result { + let sid = dfa.start_state_forward(cache, input)?; + // Start states can never be match states, since all matches are delayed + // by 1 byte. + debug_assert!(!sid.is_match()); + Ok(sid) +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn init_rev( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, +) -> Result { + let sid = dfa.start_state_reverse(cache, input)?; + // Start states can never be match states, since all matches are delayed + // by 1 byte. + debug_assert!(!sid.is_match()); + Ok(sid) +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn eoi_fwd( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + sid: &mut LazyStateID, + mat: &mut Option, +) -> Result<(), MatchError> { + let sp = input.get_span(); + match input.haystack().get(sp.end) { + Some(&b) => { + *sid = + dfa.next_state(cache, *sid, b).map_err(|_| gave_up(sp.end))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.end)); + } else if sid.is_quit() { + return Err(MatchError::quit(b, sp.end)); + } + } + None => { + *sid = dfa + .next_eoi_state(cache, *sid) + .map_err(|_| gave_up(input.haystack().len()))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, input.haystack().len())); + } + // N.B. We don't have to check 'is_quit' here because the EOI + // transition can never lead to a quit state. + debug_assert!(!sid.is_quit()); + } + } + Ok(()) +} + +#[cfg_attr(feature = "perf-inline", inline(always))] +fn eoi_rev( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + sid: &mut LazyStateID, + mat: &mut Option, +) -> Result<(), MatchError> { + let sp = input.get_span(); + if sp.start > 0 { + let byte = input.haystack()[sp.start - 1]; + *sid = dfa + .next_state(cache, *sid, byte) + .map_err(|_| gave_up(sp.start))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.start)); + } else if sid.is_quit() { + return Err(MatchError::quit(byte, sp.start - 1)); + } + } else { + *sid = + dfa.next_eoi_state(cache, *sid).map_err(|_| gave_up(sp.start))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, 0)); + } + // N.B. We don't have to check 'is_quit' here because the EOI + // transition can never lead to a quit state. + debug_assert!(!sid.is_quit()); + } + Ok(()) +} + +/// Re-compute the starting state that a DFA should be in after finding a +/// prefilter candidate match at the position `at`. +/// +/// It is always correct to call this, but not always necessary. Namely, +/// whenever the DFA has a universal start state, the DFA can remain in the +/// start state that it was in when it ran the prefilter. Why? Because in that +/// case, there is only one start state. +/// +/// When does a DFA have a universal start state? In precisely cases where +/// it has no look-around assertions in its prefix. So for example, `\bfoo` +/// does not have a universal start state because the start state depends on +/// whether the byte immediately before the start position is a word byte or +/// not. However, `foo\b` does have a universal start state because the word +/// boundary does not appear in the pattern's prefix. +/// +/// So... most cases don't need this, but when a pattern doesn't have a +/// universal start state, then after a prefilter candidate has been found, the +/// current state *must* be re-litigated as if computing the start state at the +/// beginning of the search because it might change. That is, not all start +/// states are created equal. +/// +/// Why avoid it? Because while it's not super expensive, it isn't a trivial +/// operation to compute the start state. It is much better to avoid it and +/// just state in the current state if you know it to be correct. +#[cfg_attr(feature = "perf-inline", inline(always))] +fn prefilter_restart( + dfa: &DFA, + cache: &mut Cache, + input: &Input<'_>, + at: usize, +) -> Result { + let mut input = input.clone(); + input.set_start(at); + init_fwd(dfa, cache, &input) +} + +/// A convenience routine for constructing a "gave up" match error. +#[cfg_attr(feature = "perf-inline", inline(always))] +fn gave_up(offset: usize) -> MatchError { + MatchError::gave_up(offset) +} diff --git a/vendor/regex-automata/src/lib.rs b/vendor/regex-automata/src/lib.rs new file mode 100644 index 0000000..62260a5 --- /dev/null +++ b/vendor/regex-automata/src/lib.rs @@ -0,0 +1,648 @@ +/*! +This crate exposes a variety of regex engines used by the `regex` crate. +It provides a vast, sprawling and "expert" level API to each regex engine. +The regex engines provided by this crate focus heavily on finite automata +implementations and specifically guarantee worst case `O(m * n)` time +complexity for all searches. (Where `m ~ len(regex)` and `n ~ len(haystack)`.) + +The primary goal of this crate is to serve as an implementation detail for the +`regex` crate. A secondary goal is to make its internals available for use by +others. + +# Table of contents + +* [Should I be using this crate?](#should-i-be-using-this-crate) gives some +reasons for and against using this crate. +* [Examples](#examples) provides a small selection of things you can do with +this crate. +* [Available regex engines](#available-regex-engines) provides a hyperlinked +list of all regex engines in this crate. +* [API themes](#api-themes) discusses common elements used throughout this +crate. +* [Crate features](#crate-features) documents the extensive list of Cargo +features available. + +# Should I be using this crate? + +If you find yourself here because you just want to use regexes, then you should +first check out whether the [`regex` crate](https://docs.rs/regex) meets +your needs. It provides a streamlined and difficult-to-misuse API for regex +searching. + +If you're here because there is something specific you want to do that can't +be easily done with `regex` crate, then you are perhaps in the right place. +It's most likely that the first stop you'll want to make is to explore the +[`meta` regex APIs](meta). Namely, the `regex` crate is just a light wrapper +over a [`meta::Regex`], so its API will probably be the easiest to transition +to. In contrast to the `regex` crate, the `meta::Regex` API supports more +search parameters and does multi-pattern searches. However, it isn't quite as +ergonomic. + +Otherwise, the following is an inexhaustive list of reasons to use this crate: + +* You want to analyze or use a [Thompson `NFA`](nfa::thompson::NFA) directly. +* You want more powerful multi-pattern search than what is provided by +`RegexSet` in the `regex` crate. All regex engines in this crate support +multi-pattern searches. +* You want to use one of the `regex` crate's internal engines directly because +of some interesting configuration that isn't possible via the `regex` crate. +For example, a [lazy DFA's configuration](hybrid::dfa::Config) exposes a +dizzying number of options for controlling its execution. +* You want to use the lower level search APIs. For example, both the [lazy +DFA](hybrid::dfa) and [fully compiled DFAs](dfa) support searching by exploring +the automaton one state at a time. This might be useful, for example, for +stream searches or searches of strings stored in non-contiguous in memory. +* You want to build a fully compiled DFA and then [use zero-copy +deserialization](dfa::dense::DFA::from_bytes) to load it into memory and use +it for searching. This use case is supported in core-only no-std/no-alloc +environments. +* You want to run [anchored searches](Input::anchored) without using the `^` +anchor in your regex pattern. +* You need to work-around contention issues with +sharing a regex across multiple threads. The +[`meta::Regex::search_with`](meta::Regex::search_with) API permits bypassing +any kind of synchronization at all by requiring the caller to provide the +mutable scratch spaced needed during a search. +* You want to build your own regex engine on top of the `regex` crate's +infrastructure. + +# Examples + +This section tries to identify a few interesting things you can do with this +crate and demonstrates them. + +### Multi-pattern searches with capture groups + +One of the more frustrating limitations of `RegexSet` in the `regex` crate +(at the time of writing) is that it doesn't report match positions. With this +crate, multi-pattern support was intentionally designed in from the beginning, +which means it works in all regex engines and even for capture groups as well. + +This example shows how to search for matches of multiple regexes, where each +regex uses the same capture group names to parse different key-value formats. + +``` +use regex_automata::{meta::Regex, PatternID}; + +let re = Regex::new_many(&[ + r#"(?m)^(?[[:word:]]+)=(?[[:word:]]+)$"#, + r#"(?m)^(?[[:word:]]+)="(?[^"]+)"$"#, + r#"(?m)^(?[[:word:]]+)='(?[^']+)'$"#, + r#"(?m)^(?[[:word:]]+):\s*(?[[:word:]]+)$"#, +])?; +let hay = r#" +best_album="Blow Your Face Out" +best_quote='"then as it was, then again it will be"' +best_year=1973 +best_simpsons_episode: HOMR +"#; +let mut kvs = vec![]; +for caps in re.captures_iter(hay) { + // N.B. One could use capture indices '1' and '2' here + // as well. Capture indices are local to each pattern. + // (Just like names are.) + let key = &hay[caps.get_group_by_name("key").unwrap()]; + let val = &hay[caps.get_group_by_name("val").unwrap()]; + kvs.push((key, val)); +} +assert_eq!(kvs, vec![ + ("best_album", "Blow Your Face Out"), + ("best_quote", "\"then as it was, then again it will be\""), + ("best_year", "1973"), + ("best_simpsons_episode", "HOMR"), +]); + +# Ok::<(), Box>(()) +``` + +### Build a full DFA and walk it manually + +One of the regex engines in this crate is a fully compiled DFA. It takes worst +case exponential time to build, but once built, it can be easily explored and +used for searches. Here's a simple example that uses its lower level APIs to +implement a simple anchored search by hand. + +``` +use regex_automata::{dfa::{Automaton, dense}, Input}; + +let dfa = dense::DFA::new(r"(?-u)\b[A-Z]\w+z\b")?; +let haystack = "Quartz"; + +// The start state is determined by inspecting the position and the +// initial bytes of the haystack. +let mut state = dfa.start_state_forward(&Input::new(haystack))?; +// Walk all the bytes in the haystack. +for &b in haystack.as_bytes().iter() { + state = dfa.next_state(state, b); +} +// DFAs in this crate require an explicit +// end-of-input transition if a search reaches +// the end of a haystack. +state = dfa.next_eoi_state(state); +assert!(dfa.is_match_state(state)); + +# Ok::<(), Box>(()) +``` + +Or do the same with a lazy DFA that avoids exponential worst case compile time, +but requires mutable scratch space to lazily build the DFA during the search. + +``` +use regex_automata::{hybrid::dfa::DFA, Input}; + +let dfa = DFA::new(r"(?-u)\b[A-Z]\w+z\b")?; +let mut cache = dfa.create_cache(); +let hay = "Quartz"; + +// The start state is determined by inspecting the position and the +// initial bytes of the haystack. +let mut state = dfa.start_state_forward(&mut cache, &Input::new(hay))?; +// Walk all the bytes in the haystack. +for &b in hay.as_bytes().iter() { + state = dfa.next_state(&mut cache, state, b)?; +} +// DFAs in this crate require an explicit +// end-of-input transition if a search reaches +// the end of a haystack. +state = dfa.next_eoi_state(&mut cache, state)?; +assert!(state.is_match()); + +# Ok::<(), Box>(()) +``` + +### Find all overlapping matches + +This example shows how to build a DFA and use it to find all possible matches, +including overlapping matches. A similar example will work with a lazy DFA as +well. This also works with multiple patterns and will report all matches at the +same position where multiple patterns match. + +``` +use regex_automata::{ + dfa::{dense, Automaton, OverlappingState}, + Input, MatchKind, +}; + +let dfa = dense::DFA::builder() + .configure(dense::DFA::config().match_kind(MatchKind::All)) + .build(r"(?-u)\w{3,}")?; +let input = Input::new("homer marge bart lisa maggie"); +let mut state = OverlappingState::start(); + +let mut matches = vec![]; +while let Some(hm) = { + dfa.try_search_overlapping_fwd(&input, &mut state)?; + state.get_match() +} { + matches.push(hm.offset()); +} +assert_eq!(matches, vec![ + 3, 4, 5, // hom, home, homer + 9, 10, 11, // mar, marg, marge + 15, 16, // bar, bart + 20, 21, // lis, lisa + 25, 26, 27, 28, // mag, magg, maggi, maggie +]); + +# Ok::<(), Box>(()) +``` + +# Available regex engines + +The following is a complete list of all regex engines provided by this crate, +along with a very brief description of it and why you might want to use it. + +* [`dfa::regex::Regex`] is a regex engine that works on top of either +[dense](dfa::dense) or [sparse](dfa::sparse) fully compiled DFAs. You might +use a DFA if you need the fastest possible regex engine in this crate and can +afford the exorbitant memory usage usually required by DFAs. Low level APIs on +fully compiled DFAs are provided by the [`Automaton` trait](dfa::Automaton). +Fully compiled dense DFAs can handle all regexes except for searching a regex +with a Unicode word boundary on non-ASCII haystacks. A fully compiled DFA based +regex can only report the start and end of each match. +* [`hybrid::regex::Regex`] is a regex engine that works on top of a lazily +built DFA. Its performance profile is very similar to that of fully compiled +DFAs, but can be slower in some pathological cases. Fully compiled DFAs are +also amenable to more optimizations, such as state acceleration, that aren't +available in a lazy DFA. You might use this lazy DFA if you can't abide the +worst case exponential compile time of a full DFA, but still want the DFA +search performance in the vast majority of cases. A lazy DFA based regex can +only report the start and end of each match. +* [`dfa::onepass::DFA`] is a regex engine that is implemented as a DFA, but +can report the matches of each capture group in addition to the start and end +of each match. The catch is that it only works on a somewhat small subset of +regexes known as "one-pass." You'll want to use this for cases when you need +capture group matches and the regex is one-pass since it is likely to be faster +than any alternative. A one-pass DFA can handle all types of regexes, but does +have some reasonable limits on the number of capture groups it can handle. +* [`nfa::thompson::backtrack::BoundedBacktracker`] is a regex engine that uses +backtracking, but keeps track of the work it has done to avoid catastrophic +backtracking. Like the one-pass DFA, it provides the matches of each capture +group. It retains the `O(m * n)` worst case time bound. This tends to be slower +than the one-pass DFA regex engine, but faster than the PikeVM. It can handle +all types of regexes, but usually only works well with small haystacks and +small regexes due to the memory required to avoid redoing work. +* [`nfa::thompson::pikevm::PikeVM`] is a regex engine that can handle all +regexes, of all sizes and provides capture group matches. It tends to be a tool +of last resort because it is also usually the slowest regex engine. +* [`meta::Regex`] is the meta regex engine that combines *all* of the above +engines into one. The reason for this is that each of the engines above have +their own caveats such as, "only handles a subset of regexes" or "is generally +slow." The meta regex engine accounts for all of these caveats and composes +the engines in a way that attempts to mitigate each engine's weaknesses while +emphasizing its strengths. For example, it will attempt to run a lazy DFA even +if it might fail. In which case, it will restart the search with a likely +slower but more capable regex engine. The meta regex engine is what you should +default to. Use one of the above engines directly only if you have a specific +reason to. + +# API themes + +While each regex engine has its own APIs and configuration options, there are +some general themes followed by all of them. + +### The `Input` abstraction + +Most search routines in this crate accept anything that implements +`Into`. Both `&str` and `&[u8]` haystacks satisfy this constraint, which +means that things like `engine.search("foo")` will work as you would expect. + +By virtue of accepting an `Into` though, callers can provide more than +just a haystack. Indeed, the [`Input`] type has more details, but briefly, +callers can use it to configure various aspects of the search: + +* The span of the haystack to search via [`Input::span`] or [`Input::range`], +which might be a substring of the haystack. +* Whether to run an anchored search or not via [`Input::anchored`]. This +permits one to require matches to start at the same offset that the search +started. +* Whether to ask the regex engine to stop as soon as a match is seen via +[`Input::earliest`]. This can be used to find the offset of a match as soon +as it is known without waiting for the full leftmost-first match to be found. +This can also be used to avoid the worst case `O(m * n^2)` time complexity +of iteration. + +Some lower level search routines accept an `&Input` for performance reasons. +In which case, `&Input::new("haystack")` can be used for a simple search. + +### Error reporting + +Most, but not all, regex engines in this crate can fail to execute a search. +When a search fails, callers cannot determine whether or not a match exists. +That is, the result is indeterminate. + +Search failure, in all cases in this crate, is represented by a [`MatchError`]. +Routines that can fail start with the `try_` prefix in their name. For example, +[`hybrid::regex::Regex::try_search`] can fail for a number of reasons. +Conversely, routines that either can't fail or can panic on failure lack the +`try_` prefix. For example, [`hybrid::regex::Regex::find`] will panic in +cases where [`hybrid::regex::Regex::try_search`] would return an error, and +[`meta::Regex::find`] will never panic. Therefore, callers need to pay close +attention to the panicking conditions in the documentation. + +In most cases, the reasons that a search fails are either predictable or +configurable, albeit at some additional cost. + +An example of predictable failure is +[`BoundedBacktracker::try_search`](nfa::thompson::backtrack::BoundedBacktracker::try_search). +Namely, it fails whenever the multiplication of the haystack, the regex and some +constant exceeds the +[configured visited capacity](nfa::thompson::backtrack::Config::visited_capacity). +Callers can predict the failure in terms of haystack length via the +[`BoundedBacktracker::max_haystack_len`](nfa::thompson::backtrack::BoundedBacktracker::max_haystack_len) +method. While this form of failure is technically avoidable by increasing the +visited capacity, it isn't practical to do so for all inputs because the +memory usage required for larger haystacks becomes impractically large. So in +practice, if one is using the bounded backtracker, you really do have to deal +with the failure. + +An example of configurable failure happens when one enables heuristic support +for Unicode word boundaries in a DFA. Namely, since the DFAs in this crate +(except for the one-pass DFA) do not support Unicode word boundaries on +non-ASCII haystacks, building a DFA from an NFA that contains a Unicode word +boundary will itself fail. However, one can configure DFAs to still be built in +this case by +[configuring heuristic support for Unicode word boundaries](hybrid::dfa::Config::unicode_word_boundary). +If the NFA the DFA is built from contains a Unicode word boundary, then the +DFA will still be built, but special transitions will be added to every state +that cause the DFA to fail if any non-ASCII byte is seen. This failure happens +at search time and it requires the caller to opt into this. + +There are other ways for regex engines to fail in this crate, but the above +two should represent the general theme of failures one can find. Dealing +with these failures is, in part, one the responsibilities of the [meta regex +engine](meta). Notice, for example, that the meta regex engine exposes an API +that never returns an error nor panics. It carefully manages all of the ways +in which the regex engines can fail and either avoids the predictable ones +entirely (e.g., the bounded backtracker) or reacts to configured failures by +falling back to a different engine (e.g., the lazy DFA quitting because it saw +a non-ASCII byte). + +### Configuration and Builders + +Most of the regex engines in this crate come with two types to facilitate +building the regex engine: a `Config` and a `Builder`. A `Config` is usually +specific to that particular regex engine, but other objects such as parsing and +NFA compilation have `Config` types too. A `Builder` is the thing responsible +for taking inputs (either pattern strings or already-parsed patterns or even +NFAs directly) and turning them into an actual regex engine that can be used +for searching. + +The main reason why building a regex engine is a bit complicated is because +of the desire to permit composition with de-coupled components. For example, +you might want to [manually construct a Thompson NFA](nfa::thompson::Builder) +and then build a regex engine from it without ever using a regex parser +at all. On the other hand, you might also want to build a regex engine directly +from the concrete syntax. This demonstrates why regex engine construction is +so flexible: it needs to support not just convenient construction, but also +construction from parts built elsewhere. + +This is also in turn why there are many different `Config` structs in this +crate. Let's look more closely at an example: [`hybrid::regex::Builder`]. It +accepts three different `Config` types for configuring construction of a lazy +DFA regex: + +* [`hybrid::regex::Builder::syntax`] accepts a +[`util::syntax::Config`] for configuring the options found in the +[`regex-syntax`](regex_syntax) crate. For example, whether to match +case insensitively. +* [`hybrid::regex::Builder::thompson`] accepts a [`nfa::thompson::Config`] for +configuring construction of a [Thompson NFA](nfa::thompson::NFA). For example, +whether to build an NFA that matches the reverse language described by the +regex. +* [`hybrid::regex::Builder::dfa`] accept a [`hybrid::dfa::Config`] for +configuring construction of the pair of underlying lazy DFAs that make up the +lazy DFA regex engine. For example, changing the capacity of the cache used to +store the transition table. + +The lazy DFA regex engine uses all three of those configuration objects for +methods like [`hybrid::regex::Builder::build`], which accepts a pattern +string containing the concrete syntax of your regex. It uses the syntax +configuration to parse it into an AST and translate it into an HIR. Then the +NFA configuration when compiling the HIR into an NFA. And then finally the DFA +configuration when lazily determinizing the NFA into a DFA. + +Notice though that the builder also has a +[`hybrid::regex::Builder::build_from_dfas`] constructor. This permits callers +to build the underlying pair of lazy DFAs themselves (one for the forward +searching to find the end of a match and one for the reverse searching to find +the start of a match), and then build the regex engine from them. The lazy +DFAs, in turn, have their own builder that permits [construction directly from +a Thompson NFA](hybrid::dfa::Builder::build_from_nfa). Continuing down the +rabbit hole, a Thompson NFA has its own compiler that permits [construction +directly from an HIR](nfa::thompson::Compiler::build_from_hir). The lazy DFA +regex engine builder lets you follow this rabbit hole all the way down, but +also provides convenience routines that do it for you when you don't need +precise control over every component. + +The [meta regex engine](meta) is a good example of something that utilizes the +full flexibility of these builders. It often needs not only precise control +over each component, but also shares them across multiple regex engines. +(Most sharing is done by internal reference accounting. For example, an +[`NFA`](nfa::thompson::NFA) is reference counted internally which makes cloning +cheap.) + +### Size limits + +Unlike the `regex` crate, the `regex-automata` crate specifically does not +enable any size limits by default. That means users of this crate need to +be quite careful when using untrusted patterns. Namely, because bounded +repetitions can grow exponentially by stacking them, it is possible to build a +very large internal regex object from just a small pattern string. For example, +the NFA built from the pattern `a{10}{10}{10}{10}{10}{10}{10}` is over 240MB. + +There are multiple size limit options in this crate. If one or more size limits +are relevant for the object you're building, they will be configurable via +methods on a corresponding `Config` type. + +# Crate features + +This crate has a dizzying number of features. The main idea is to be able to +control how much stuff you pull in for your specific use case, since the full +crate is quite large and can dramatically increase compile times and binary +size. + +The most barebones but useful configuration is to disable all default features +and enable only `dfa-search`. This will bring in just the DFA deserialization +and search routines without any dependency on `std` or `alloc`. This does +require generating and serializing a DFA, and then storing it somewhere, but +it permits regex searches in freestanding or embedded environments. + +Because there are so many features, they are split into a few groups. + +The default set of features is: `std`, `syntax`, `perf`, `unicode`, `meta`, +`nfa`, `dfa` and `hybrid`. Basically, the default is to enable everything +except for development related features like `logging`. + +### Ecosystem features + +* **std** - Enables use of the standard library. In terms of APIs, this usually +just means that error types implement the `std::error::Error` trait. Otherwise, +`std` sometimes enables the code to be faster, for example, using a `HashMap` +instead of a `BTreeMap`. (The `std` feature matters more for dependencies like +`aho-corasick` and `memchr`, where `std` is required to enable certain classes +of SIMD optimizations.) Enabling `std` automatically enables `alloc`. +* **alloc** - Enables use of the `alloc` library. This is required for most +APIs in this crate. The main exception is deserializing and searching with +fully compiled DFAs. +* **logging** - Adds a dependency on the `log` crate and makes this crate emit +log messages of varying degrees of utility. The log messages are especially +useful in trying to understand what the meta regex engine is doing. + +### Performance features + +* **perf** - Enables all of the below features. +* **perf-inline** - When enabled, `inline(always)` is used in (many) strategic +locations to help performance at the expense of longer compile times and +increased binary size. +* **perf-literal** - Enables all literal related optimizations. + * **perf-literal-substring** - Enables all single substring literal + optimizations. This includes adding a dependency on the `memchr` crate. + * **perf-literal-multisubstring** - Enables all multiple substring literal + optimizations. This includes adding a dependency on the `aho-corasick` + crate. + +### Unicode features + +* **unicode** - + Enables all Unicode features. This feature is enabled by default, and will + always cover all Unicode features, even if more are added in the future. +* **unicode-age** - + Provide the data for the + [Unicode `Age` property](https://www.unicode.org/reports/tr44/tr44-24.html#Character_Age). + This makes it possible to use classes like `\p{Age:6.0}` to refer to all + codepoints first introduced in Unicode 6.0 +* **unicode-bool** - + Provide the data for numerous Unicode boolean properties. The full list + is not included here, but contains properties like `Alphabetic`, `Emoji`, + `Lowercase`, `Math`, `Uppercase` and `White_Space`. +* **unicode-case** - + Provide the data for case insensitive matching using + [Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches). +* **unicode-gencat** - + Provide the data for + [Unicode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values). + This includes, but is not limited to, `Decimal_Number`, `Letter`, + `Math_Symbol`, `Number` and `Punctuation`. +* **unicode-perl** - + Provide the data for supporting the Unicode-aware Perl character classes, + corresponding to `\w`, `\s` and `\d`. This is also necessary for using + Unicode-aware word boundary assertions. Note that if this feature is + disabled, the `\s` and `\d` character classes are still available if the + `unicode-bool` and `unicode-gencat` features are enabled, respectively. +* **unicode-script** - + Provide the data for + [Unicode scripts and script extensions](https://www.unicode.org/reports/tr24/). + This includes, but is not limited to, `Arabic`, `Cyrillic`, `Hebrew`, + `Latin` and `Thai`. +* **unicode-segment** - + Provide the data necessary to provide the properties used to implement the + [Unicode text segmentation algorithms](https://www.unicode.org/reports/tr29/). + This enables using classes like `\p{gcb=Extend}`, `\p{wb=Katakana}` and + `\p{sb=ATerm}`. +* **unicode-word-boundary** - + Enables support for Unicode word boundaries, i.e., `\b`, in regexes. When + this and `unicode-perl` are enabled, then data tables from `regex-syntax` are + used to implement Unicode word boundaries. However, if `regex-syntax` isn't + enabled as a dependency then one can still enable this feature. It will + cause `regex-automata` to bundle its own data table that would otherwise be + redundant with `regex-syntax`'s table. + +### Regex engine features + +* **syntax** - Enables a dependency on `regex-syntax`. This makes APIs +for building regex engines from pattern strings available. Without the +`regex-syntax` dependency, the only way to build a regex engine is generally +to deserialize a previously built DFA or to hand assemble an NFA using its +[builder API](nfa::thompson::Builder). Once you have an NFA, you can build any +of the regex engines in this crate. The `syntax` feature also enables `alloc`. +* **meta** - Enables the meta regex engine. This also enables the `syntax` and +`nfa-pikevm` features, as both are the minimal requirements needed. The meta +regex engine benefits from enabling any of the other regex engines and will +use them automatically when appropriate. +* **nfa** - Enables all NFA related features below. + * **nfa-thompson** - Enables the Thompson NFA APIs. This enables `alloc`. + * **nfa-pikevm** - Enables the PikeVM regex engine. This enables + `nfa-thompson`. + * **nfa-backtrack** - Enables the bounded backtracker regex engine. This + enables `nfa-thompson`. +* **dfa** - Enables all DFA related features below. + * **dfa-build** - Enables APIs for determinizing DFAs from NFAs. This + enables `nfa-thompson` and `dfa-search`. + * **dfa-search** - Enables APIs for searching with DFAs. + * **dfa-onepass** - Enables the one-pass DFA API. This enables + `nfa-thompson`. +* **hybrid** - Enables the hybrid NFA/DFA or "lazy DFA" regex engine. This +enables `alloc` and `nfa-thompson`. + +*/ + +// We are no_std. +#![no_std] +// All APIs need docs! +#![deny(missing_docs)] +// Some intra-doc links are broken when certain features are disabled, so we +// only bleat about it when most (all?) features are enabled. But when we do, +// we block the build. Links need to work. +#![cfg_attr( + all( + feature = "std", + feature = "nfa", + feature = "dfa", + feature = "hybrid" + ), + deny(rustdoc::broken_intra_doc_links) +)] +// Broken rustdoc links are very easy to come by when you start disabling +// features. Namely, features tend to change imports, and imports change what's +// available to link to. +// +// Basically, we just don't support rustdoc for anything other than the maximal +// feature configuration. Other configurations will work, they just won't be +// perfect. +// +// So here, we specifically allow them so we don't even get warned about them. +#![cfg_attr( + not(all( + feature = "std", + feature = "nfa", + feature = "dfa", + feature = "hybrid" + )), + allow(rustdoc::broken_intra_doc_links) +)] +// Kinda similar, but eliminating all of the dead code and unused import +// warnings for every feature combo is a fool's errand. Instead, we just +// suppress those, but still let them through in a common configuration when we +// build most of everything. +// +// This does actually suggest that when features are disabled, we are actually +// compiling more code than we need to be. And this is perhaps not so great +// because disabling features is usually done in order to reduce compile times +// by reducing the amount of code one compiles... However, usually, most of the +// time this dead code is a relatively small amount from the 'util' module. +// But... I confess... There isn't a ton of visibility on this. +// +// I'm happy to try to address this in a different way, but "let's annotate +// every function in 'util' with some non-local combination of features" just +// cannot be the way forward. +#![cfg_attr( + not(all( + feature = "std", + feature = "nfa", + feature = "dfa", + feature = "hybrid", + feature = "perf-literal-substring", + feature = "perf-literal-multisubstring", + )), + allow(dead_code, unused_imports, unused_variables) +)] +// We generally want all types to impl Debug. +#![warn(missing_debug_implementations)] +// No clue why this thing is still unstable because it's pretty amazing. This +// adds Cargo feature annotations to items in the rustdoc output. Which is +// sadly hugely beneficial for this crate due to the number of features. +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + +// I have literally never tested this crate on 16-bit, so it is quite +// suspicious to advertise support for it. But... the regex crate, at time +// of writing, at least claims to support it by not doing any conditional +// compilation based on the target pointer width. So I guess I remain +// consistent with that here. +// +// If you are here because you're on a 16-bit system and you were somehow using +// the regex crate previously, please file an issue. Please be prepared to +// provide some kind of reproduction or carve out some path to getting 16-bit +// working in CI. (Via qemu?) +#[cfg(not(any( + target_pointer_width = "16", + target_pointer_width = "32", + target_pointer_width = "64" +)))] +compile_error!("not supported on non-{16,32,64}, please file an issue"); + +#[cfg(any(test, feature = "std"))] +extern crate std; + +#[cfg(feature = "alloc")] +extern crate alloc; + +#[cfg(doctest)] +doc_comment::doctest!("../README.md"); + +#[doc(inline)] +pub use crate::util::primitives::PatternID; +pub use crate::util::search::*; + +#[macro_use] +mod macros; + +#[cfg(any(feature = "dfa-search", feature = "dfa-onepass"))] +pub mod dfa; +#[cfg(feature = "hybrid")] +pub mod hybrid; +#[cfg(feature = "meta")] +pub mod meta; +#[cfg(feature = "nfa-thompson")] +pub mod nfa; +pub mod util; diff --git a/vendor/regex-automata/src/macros.rs b/vendor/regex-automata/src/macros.rs new file mode 100644 index 0000000..31b4ca3 --- /dev/null +++ b/vendor/regex-automata/src/macros.rs @@ -0,0 +1,20 @@ +// Some feature combinations result in some of these macros never being used. +// Which is fine. Just squash the warnings. +#![allow(unused_macros)] + +macro_rules! log { + ($($tt:tt)*) => { + #[cfg(feature = "logging")] + { + $($tt)* + } + } +} + +macro_rules! debug { + ($($tt:tt)*) => { log!(log::debug!($($tt)*)) } +} + +macro_rules! trace { + ($($tt:tt)*) => { log!(log::trace!($($tt)*)) } +} diff --git a/vendor/regex-automata/src/meta/error.rs b/vendor/regex-automata/src/meta/error.rs new file mode 100644 index 0000000..ea9a316 --- /dev/null +++ b/vendor/regex-automata/src/meta/error.rs @@ -0,0 +1,241 @@ +use regex_syntax::{ast, hir}; + +use crate::{nfa, util::search::MatchError, PatternID}; + +/// An error that occurs when construction of a `Regex` fails. +/// +/// A build error is generally a result of one of two possible failure +/// modes. First is a parse or syntax error in the concrete syntax of a +/// pattern. Second is that the construction of the underlying regex matcher +/// fails, usually because it gets too big with respect to limits like +/// [`Config::nfa_size_limit`](crate::meta::Config::nfa_size_limit). +/// +/// This error provides very little introspection capabilities. You can: +/// +/// * Ask for the [`PatternID`] of the pattern that caused an error, if one +/// is available. This is available for things like syntax errors, but not for +/// cases where build limits are exceeded. +/// * Ask for the underlying syntax error, but only if the error is a syntax +/// error. +/// * Ask for a human readable message corresponding to the underlying error. +/// * The `BuildError::source` method (from the `std::error::Error` +/// trait implementation) may be used to query for an underlying error if one +/// exists. There are no API guarantees about which error is returned. +/// +/// When the `std` feature is enabled, this implements `std::error::Error`. +#[derive(Clone, Debug)] +pub struct BuildError { + kind: BuildErrorKind, +} + +#[derive(Clone, Debug)] +enum BuildErrorKind { + Syntax { pid: PatternID, err: regex_syntax::Error }, + NFA(nfa::thompson::BuildError), +} + +impl BuildError { + /// If it is known which pattern ID caused this build error to occur, then + /// this method returns it. + /// + /// Some errors are not associated with a particular pattern. However, any + /// errors that occur as part of parsing a pattern are guaranteed to be + /// associated with a pattern ID. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, PatternID}; + /// + /// let err = Regex::new_many(&["a", "b", r"\p{Foo}", "c"]).unwrap_err(); + /// assert_eq!(Some(PatternID::must(2)), err.pattern()); + /// ``` + pub fn pattern(&self) -> Option { + match self.kind { + BuildErrorKind::Syntax { pid, .. } => Some(pid), + _ => None, + } + } + + /// If this error occurred because the regex exceeded the configured size + /// limit before being built, then this returns the configured size limit. + /// + /// The limit returned is what was configured, and corresponds to the + /// maximum amount of heap usage in bytes. + pub fn size_limit(&self) -> Option { + match self.kind { + BuildErrorKind::NFA(ref err) => err.size_limit(), + _ => None, + } + } + + /// If this error corresponds to a syntax error, then a reference to it is + /// returned by this method. + pub fn syntax_error(&self) -> Option<®ex_syntax::Error> { + match self.kind { + BuildErrorKind::Syntax { ref err, .. } => Some(err), + _ => None, + } + } + + pub(crate) fn ast(pid: PatternID, err: ast::Error) -> BuildError { + let err = regex_syntax::Error::from(err); + BuildError { kind: BuildErrorKind::Syntax { pid, err } } + } + + pub(crate) fn hir(pid: PatternID, err: hir::Error) -> BuildError { + let err = regex_syntax::Error::from(err); + BuildError { kind: BuildErrorKind::Syntax { pid, err } } + } + + pub(crate) fn nfa(err: nfa::thompson::BuildError) -> BuildError { + BuildError { kind: BuildErrorKind::NFA(err) } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for BuildError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self.kind { + BuildErrorKind::Syntax { ref err, .. } => Some(err), + BuildErrorKind::NFA(ref err) => Some(err), + } + } +} + +impl core::fmt::Display for BuildError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.kind { + BuildErrorKind::Syntax { pid, .. } => { + write!(f, "error parsing pattern {}", pid.as_usize()) + } + BuildErrorKind::NFA(_) => write!(f, "error building NFA"), + } + } +} + +/// An error that occurs when a search should be retried. +/// +/// This retry error distinguishes between two different failure modes. +/// +/// The first is one where potential quadratic behavior has been detected. +/// In this case, whatever optimization that led to this behavior should be +/// stopped, and the next best strategy should be used. +/// +/// The second indicates that the underlying regex engine has failed for some +/// reason. This usually occurs because either a lazy DFA's cache has become +/// ineffective or because a non-ASCII byte has been seen *and* a Unicode word +/// boundary was used in one of the patterns. In this failure case, a different +/// regex engine that won't fail in these ways (PikeVM, backtracker or the +/// one-pass DFA) should be used. +/// +/// This is an internal error only and should never bleed into the public +/// API. +#[derive(Debug)] +pub(crate) enum RetryError { + Quadratic(RetryQuadraticError), + Fail(RetryFailError), +} + +#[cfg(feature = "std")] +impl std::error::Error for RetryError {} + +impl core::fmt::Display for RetryError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match *self { + RetryError::Quadratic(ref err) => err.fmt(f), + RetryError::Fail(ref err) => err.fmt(f), + } + } +} + +impl From for RetryError { + fn from(merr: MatchError) -> RetryError { + RetryError::Fail(RetryFailError::from(merr)) + } +} + +/// An error that occurs when potential quadratic behavior has been detected +/// when applying either the "reverse suffix" or "reverse inner" optimizations. +/// +/// When this error occurs, callers should abandon the "reverse" optimization +/// and use a normal forward search. +#[derive(Debug)] +pub(crate) struct RetryQuadraticError(()); + +impl RetryQuadraticError { + pub(crate) fn new() -> RetryQuadraticError { + RetryQuadraticError(()) + } +} + +#[cfg(feature = "std")] +impl std::error::Error for RetryQuadraticError {} + +impl core::fmt::Display for RetryQuadraticError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "regex engine gave up to avoid quadratic behavior") + } +} + +impl From for RetryError { + fn from(err: RetryQuadraticError) -> RetryError { + RetryError::Quadratic(err) + } +} + +/// An error that occurs when a regex engine "gives up" for some reason before +/// finishing a search. Usually this occurs because of heuristic Unicode word +/// boundary support or because of ineffective cache usage in the lazy DFA. +/// +/// When this error occurs, callers should retry the regex search with a +/// different regex engine. +/// +/// Note that this has convenient `From` impls that will automatically +/// convert a `MatchError` into this error. This works because the meta +/// regex engine internals guarantee that errors like `HaystackTooLong` and +/// `UnsupportedAnchored` will never occur. The only errors left are `Quit` and +/// `GaveUp`, which both correspond to this "failure" error. +#[derive(Debug)] +pub(crate) struct RetryFailError { + offset: usize, +} + +impl RetryFailError { + pub(crate) fn from_offset(offset: usize) -> RetryFailError { + RetryFailError { offset } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for RetryFailError {} + +impl core::fmt::Display for RetryFailError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "regex engine failed at offset {:?}", self.offset) + } +} + +impl From for RetryError { + fn from(err: RetryFailError) -> RetryError { + RetryError::Fail(err) + } +} + +impl From for RetryFailError { + fn from(merr: MatchError) -> RetryFailError { + use crate::util::search::MatchErrorKind::*; + + match *merr.kind() { + Quit { offset, .. } => RetryFailError::from_offset(offset), + GaveUp { offset } => RetryFailError::from_offset(offset), + // These can never occur because we avoid them by construction + // or with higher level control flow logic. For example, the + // backtracker's wrapper will never hand out a backtracker engine + // when the haystack would be too long. + HaystackTooLong { .. } | UnsupportedAnchored { .. } => { + unreachable!("found impossible error in meta engine: {}", merr) + } + } + } +} diff --git a/vendor/regex-automata/src/meta/limited.rs b/vendor/regex-automata/src/meta/limited.rs new file mode 100644 index 0000000..5653adc --- /dev/null +++ b/vendor/regex-automata/src/meta/limited.rs @@ -0,0 +1,255 @@ +/*! +This module defines two bespoke reverse DFA searching routines. (One for the +lazy DFA and one for the fully compiled DFA.) These routines differ from the +usual ones by permitting the caller to specify a minimum starting position. +That is, the search will begin at `input.end()` and will usually stop at +`input.start()`, unless `min_start > input.start()`, in which case, the search +will stop at `min_start`. + +In other words, this lets you say, "no, the search must not extend past this +point, even if it's within the bounds of the given `Input`." And if the search +*does* want to go past that point, it stops and returns a "may be quadratic" +error, which indicates that the caller should retry using some other technique. + +These routines specifically exist to protect against quadratic behavior when +employing the "reverse suffix" and "reverse inner" optimizations. Without the +backstop these routines provide, it is possible for parts of the haystack to +get re-scanned over and over again. The backstop not only prevents this, but +*tells you when it is happening* so that you can change the strategy. + +Why can't we just use the normal search routines? We could use the normal +search routines and just set the start bound on the provided `Input` to our +`min_start` position. The problem here is that it's impossible to distinguish +between "no match because we reached the end of input" and "determined there +was no match well before the end of input." The former case is what we care +about with respect to quadratic behavior. The latter case is totally fine. + +Why don't we modify the normal search routines to report the position at which +the search stops? I considered this, and I still wonder if it is indeed the +right thing to do. However, I think the straight-forward thing to do there +would be to complicate the return type signature of almost every search routine +in this crate, which I really do not want to do. It therefore might make more +sense to provide a richer way for search routines to report meta data, but that +was beyond my bandwidth to work on at the time of writing. + +See the 'opt/reverse-inner' and 'opt/reverse-suffix' benchmarks in rebar for a +real demonstration of how quadratic behavior is mitigated. +*/ + +use crate::{ + meta::error::{RetryError, RetryQuadraticError}, + HalfMatch, Input, MatchError, +}; + +#[cfg(feature = "dfa-build")] +pub(crate) fn dfa_try_search_half_rev( + dfa: &crate::dfa::dense::DFA>, + input: &Input<'_>, + min_start: usize, +) -> Result, RetryError> { + use crate::dfa::Automaton; + + let mut mat = None; + let mut sid = dfa.start_state_reverse(input)?; + if input.start() == input.end() { + dfa_eoi_rev(dfa, input, &mut sid, &mut mat)?; + return Ok(mat); + } + let mut at = input.end() - 1; + loop { + sid = dfa.next_state(sid, input.haystack()[at]); + if dfa.is_special_state(sid) { + if dfa.is_match_state(sid) { + let pattern = dfa.match_pattern(sid, 0); + // Since reverse searches report the beginning of a + // match and the beginning is inclusive (not exclusive + // like the end of a match), we add 1 to make it + // inclusive. + mat = Some(HalfMatch::new(pattern, at + 1)); + } else if dfa.is_dead_state(sid) { + return Ok(mat); + } else if dfa.is_quit_state(sid) { + return Err(MatchError::quit(input.haystack()[at], at).into()); + } + } + if at == input.start() { + break; + } + at -= 1; + if at < min_start { + trace!( + "reached position {} which is before the previous literal \ + match, quitting to avoid quadratic behavior", + at, + ); + return Err(RetryError::Quadratic(RetryQuadraticError::new())); + } + } + let was_dead = dfa.is_dead_state(sid); + dfa_eoi_rev(dfa, input, &mut sid, &mut mat)?; + // If we reach the beginning of the search and we could otherwise still + // potentially keep matching if there was more to match, then we actually + // return an error to indicate giving up on this optimization. Why? Because + // we can't prove that the real match begins at where we would report it. + // + // This only happens when all of the following are true: + // + // 1) We reach the starting point of our search span. + // 2) The match we found is before the starting point. + // 3) The FSM reports we could possibly find a longer match. + // + // We need (1) because otherwise the search stopped before the starting + // point and there is no possible way to find a more leftmost position. + // + // We need (2) because if the match found has an offset equal to the minimum + // possible offset, then there is no possible more leftmost match. + // + // We need (3) because if the FSM couldn't continue anyway (i.e., it's in + // a dead state), then we know we couldn't find anything more leftmost + // than what we have. (We have to check the state we were in prior to the + // EOI transition since the EOI transition will usually bring us to a dead + // state by virtue of it represents the end-of-input.) + if at == input.start() + && mat.map_or(false, |m| m.offset() > input.start()) + && !was_dead + { + trace!( + "reached beginning of search at offset {} without hitting \ + a dead state, quitting to avoid potential false positive match", + at, + ); + return Err(RetryError::Quadratic(RetryQuadraticError::new())); + } + Ok(mat) +} + +#[cfg(feature = "hybrid")] +pub(crate) fn hybrid_try_search_half_rev( + dfa: &crate::hybrid::dfa::DFA, + cache: &mut crate::hybrid::dfa::Cache, + input: &Input<'_>, + min_start: usize, +) -> Result, RetryError> { + let mut mat = None; + let mut sid = dfa.start_state_reverse(cache, input)?; + if input.start() == input.end() { + hybrid_eoi_rev(dfa, cache, input, &mut sid, &mut mat)?; + return Ok(mat); + } + let mut at = input.end() - 1; + loop { + sid = dfa + .next_state(cache, sid, input.haystack()[at]) + .map_err(|_| MatchError::gave_up(at))?; + if sid.is_tagged() { + if sid.is_match() { + let pattern = dfa.match_pattern(cache, sid, 0); + // Since reverse searches report the beginning of a + // match and the beginning is inclusive (not exclusive + // like the end of a match), we add 1 to make it + // inclusive. + mat = Some(HalfMatch::new(pattern, at + 1)); + } else if sid.is_dead() { + return Ok(mat); + } else if sid.is_quit() { + return Err(MatchError::quit(input.haystack()[at], at).into()); + } + } + if at == input.start() { + break; + } + at -= 1; + if at < min_start { + trace!( + "reached position {} which is before the previous literal \ + match, quitting to avoid quadratic behavior", + at, + ); + return Err(RetryError::Quadratic(RetryQuadraticError::new())); + } + } + let was_dead = sid.is_dead(); + hybrid_eoi_rev(dfa, cache, input, &mut sid, &mut mat)?; + // See the comments in the full DFA routine above for why we need this. + if at == input.start() + && mat.map_or(false, |m| m.offset() > input.start()) + && !was_dead + { + trace!( + "reached beginning of search at offset {} without hitting \ + a dead state, quitting to avoid potential false positive match", + at, + ); + return Err(RetryError::Quadratic(RetryQuadraticError::new())); + } + Ok(mat) +} + +#[cfg(feature = "dfa-build")] +#[cfg_attr(feature = "perf-inline", inline(always))] +fn dfa_eoi_rev( + dfa: &crate::dfa::dense::DFA>, + input: &Input<'_>, + sid: &mut crate::util::primitives::StateID, + mat: &mut Option, +) -> Result<(), MatchError> { + use crate::dfa::Automaton; + + let sp = input.get_span(); + if sp.start > 0 { + let byte = input.haystack()[sp.start - 1]; + *sid = dfa.next_state(*sid, byte); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.start)); + } else if dfa.is_quit_state(*sid) { + return Err(MatchError::quit(byte, sp.start - 1)); + } + } else { + *sid = dfa.next_eoi_state(*sid); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, 0)); + } + // N.B. We don't have to check 'is_quit' here because the EOI + // transition can never lead to a quit state. + debug_assert!(!dfa.is_quit_state(*sid)); + } + Ok(()) +} + +#[cfg(feature = "hybrid")] +#[cfg_attr(feature = "perf-inline", inline(always))] +fn hybrid_eoi_rev( + dfa: &crate::hybrid::dfa::DFA, + cache: &mut crate::hybrid::dfa::Cache, + input: &Input<'_>, + sid: &mut crate::hybrid::LazyStateID, + mat: &mut Option, +) -> Result<(), MatchError> { + let sp = input.get_span(); + if sp.start > 0 { + let byte = input.haystack()[sp.start - 1]; + *sid = dfa + .next_state(cache, *sid, byte) + .map_err(|_| MatchError::gave_up(sp.start))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.start)); + } else if sid.is_quit() { + return Err(MatchError::quit(byte, sp.start - 1)); + } + } else { + *sid = dfa + .next_eoi_state(cache, *sid) + .map_err(|_| MatchError::gave_up(sp.start))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, 0)); + } + // N.B. We don't have to check 'is_quit' here because the EOI + // transition can never lead to a quit state. + debug_assert!(!sid.is_quit()); + } + Ok(()) +} diff --git a/vendor/regex-automata/src/meta/literal.rs b/vendor/regex-automata/src/meta/literal.rs new file mode 100644 index 0000000..a68b93b --- /dev/null +++ b/vendor/regex-automata/src/meta/literal.rs @@ -0,0 +1,81 @@ +use alloc::{vec, vec::Vec}; + +use regex_syntax::hir::Hir; + +use crate::{meta::regex::RegexInfo, util::search::MatchKind}; + +/// Pull out an alternation of literals from the given sequence of HIR +/// expressions. +/// +/// There are numerous ways for this to fail. Generally, this only applies +/// to regexes of the form 'foo|bar|baz|...|quux'. It can also fail if there +/// are "too few" alternates, in which case, the regex engine is likely faster. +/// +/// And currently, this only returns something when 'hirs.len() == 1'. +pub(crate) fn alternation_literals( + info: &RegexInfo, + hirs: &[&Hir], +) -> Option>> { + use regex_syntax::hir::{HirKind, Literal}; + + // Might as well skip the work below if we know we can't build an + // Aho-Corasick searcher. + if !cfg!(feature = "perf-literal-multisubstring") { + return None; + } + // This is pretty hacky, but basically, if `is_alternation_literal` is + // true, then we can make several assumptions about the structure of our + // HIR. This is what justifies the `unreachable!` statements below. + if hirs.len() != 1 + || !info.props()[0].look_set().is_empty() + || info.props()[0].explicit_captures_len() > 0 + || !info.props()[0].is_alternation_literal() + || info.config().get_match_kind() != MatchKind::LeftmostFirst + { + return None; + } + let hir = &hirs[0]; + let alts = match *hir.kind() { + HirKind::Alternation(ref alts) => alts, + _ => return None, // one literal isn't worth it + }; + + let mut lits = vec![]; + for alt in alts { + let mut lit = vec![]; + match *alt.kind() { + HirKind::Literal(Literal(ref bytes)) => { + lit.extend_from_slice(bytes) + } + HirKind::Concat(ref exprs) => { + for e in exprs { + match *e.kind() { + HirKind::Literal(Literal(ref bytes)) => { + lit.extend_from_slice(bytes); + } + _ => unreachable!("expected literal, got {:?}", e), + } + } + } + _ => unreachable!("expected literal or concat, got {:?}", alt), + } + lits.push(lit); + } + // Why do this? Well, when the number of literals is small, it's likely + // that we'll use the lazy DFA which is in turn likely to be faster than + // Aho-Corasick in such cases. Primarily because Aho-Corasick doesn't have + // a "lazy DFA" but either a contiguous NFA or a full DFA. We rarely use + // the latter because it is so hungry (in time and space), and the former + // is decently fast, but not as fast as a well oiled lazy DFA. + // + // However, once the number starts getting large, the lazy DFA is likely + // to start thrashing because of the modest default cache size. When + // exactly does this happen? Dunno. But at whatever point that is (we make + // a guess below based on ad hoc benchmarking), we'll want to cut over to + // Aho-Corasick, where even the contiguous NFA is likely to do much better. + if lits.len() < 3000 { + debug!("skipping Aho-Corasick because there are too few literals"); + return None; + } + Some(lits) +} diff --git a/vendor/regex-automata/src/meta/mod.rs b/vendor/regex-automata/src/meta/mod.rs new file mode 100644 index 0000000..01f430f --- /dev/null +++ b/vendor/regex-automata/src/meta/mod.rs @@ -0,0 +1,62 @@ +/*! +Provides a regex matcher that composes several other regex matchers +automatically. + +This module is home to a meta [`Regex`], which provides a convenient high +level API for executing regular expressions in linear time. + +# Comparison with the `regex` crate + +A meta `Regex` is the implementation used directly by the `regex` crate. +Indeed, the `regex` crate API is essentially just a light wrapper over a meta +`Regex`. This means that if you need the full flexibility offered by this +API, then you should be able to switch to using this API directly without +any changes in match semantics or syntax. However, there are some API level +differences: + +* The `regex` crate API returns match objects that include references to the +haystack itself, which in turn makes it easy to access the matching strings +without having to slice the haystack yourself. In contrast, a meta `Regex` +returns match objects that only have offsets in them. +* At time of writing, a meta `Regex` doesn't have some of the convenience +routines that the `regex` crate has, such as replacements. Note though that +[`Captures::interpolate_string`](crate::util::captures::Captures::interpolate_string) +will handle the replacement string interpolation for you. +* A meta `Regex` supports the [`Input`](crate::Input) abstraction, which +provides a way to configure a search in more ways than is supported by the +`regex` crate. For example, [`Input::anchored`](crate::Input::anchored) can +be used to run an anchored search, regardless of whether the pattern is itself +anchored with a `^`. +* A meta `Regex` supports multi-pattern searching everywhere. +Indeed, every [`Match`](crate::Match) returned by the search APIs +include a [`PatternID`](crate::PatternID) indicating which pattern +matched. In the single pattern case, all matches correspond to +[`PatternID::ZERO`](crate::PatternID::ZERO). In contrast, the `regex` crate +has distinct `Regex` and a `RegexSet` APIs. The former only supports a single +pattern, while the latter supports multiple patterns but cannot report the +offsets of a match. +* A meta `Regex` provides the explicit capability of bypassing its internal +memory pool for automatically acquiring mutable scratch space required by its +internal regex engines. Namely, a [`Cache`] can be explicitly provided to lower +level routines such as [`Regex::search_with`]. + +*/ + +pub use self::{ + error::BuildError, + regex::{ + Builder, Cache, CapturesMatches, Config, FindMatches, Regex, Split, + SplitN, + }, +}; + +mod error; +#[cfg(any(feature = "dfa-build", feature = "hybrid"))] +mod limited; +mod literal; +mod regex; +mod reverse_inner; +#[cfg(any(feature = "dfa-build", feature = "hybrid"))] +mod stopat; +mod strategy; +mod wrappers; diff --git a/vendor/regex-automata/src/meta/regex.rs b/vendor/regex-automata/src/meta/regex.rs new file mode 100644 index 0000000..a06d2bb --- /dev/null +++ b/vendor/regex-automata/src/meta/regex.rs @@ -0,0 +1,3649 @@ +use core::{ + borrow::Borrow, + panic::{RefUnwindSafe, UnwindSafe}, +}; + +use alloc::{boxed::Box, sync::Arc, vec, vec::Vec}; + +use regex_syntax::{ + ast, + hir::{self, Hir}, +}; + +use crate::{ + meta::{ + error::BuildError, + strategy::{self, Strategy}, + wrappers, + }, + nfa::thompson::WhichCaptures, + util::{ + captures::{Captures, GroupInfo}, + iter, + pool::{Pool, PoolGuard}, + prefilter::Prefilter, + primitives::{NonMaxUsize, PatternID}, + search::{HalfMatch, Input, Match, MatchKind, PatternSet, Span}, + }, +}; + +/// A type alias for our pool of meta::Cache that fixes the type parameters to +/// what we use for the meta regex below. +type CachePool = Pool; + +/// Same as above, but for the guard returned by a pool. +type CachePoolGuard<'a> = PoolGuard<'a, Cache, CachePoolFn>; + +/// The type of the closure we use to create new caches. We need to spell out +/// all of the marker traits or else we risk leaking !MARKER impls. +type CachePoolFn = + Box Cache + Send + Sync + UnwindSafe + RefUnwindSafe>; + +/// A regex matcher that works by composing several other regex matchers +/// automatically. +/// +/// In effect, a meta regex papers over a lot of the quirks or performance +/// problems in each of the regex engines in this crate. Its goal is to provide +/// an infallible and simple API that "just does the right thing" in the common +/// case. +/// +/// A meta regex is the implementation of a `Regex` in the `regex` crate. +/// Indeed, the `regex` crate API is essentially just a light wrapper over +/// this type. This includes the `regex` crate's `RegexSet` API! +/// +/// # Composition +/// +/// This is called a "meta" matcher precisely because it uses other regex +/// matchers to provide a convenient high level regex API. Here are some +/// examples of how other regex matchers are composed: +/// +/// * When calling [`Regex::captures`], instead of immediately +/// running a slower but more capable regex engine like the +/// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM), the meta regex engine +/// will usually first look for the bounds of a match with a higher throughput +/// regex engine like a [lazy DFA](crate::hybrid). Only when a match is found +/// is a slower engine like `PikeVM` used to find the matching span for each +/// capture group. +/// * While higher throughout engines like the lazy DFA cannot handle +/// Unicode word boundaries in general, they can still be used on pure ASCII +/// haystacks by pretending that Unicode word boundaries are just plain ASCII +/// word boundaries. However, if a haystack is not ASCII, the meta regex engine +/// will automatically switch to a (possibly slower) regex engine that supports +/// Unicode word boundaries in general. +/// * In some cases where a regex pattern is just a simple literal or a small +/// set of literals, an actual regex engine won't be used at all. Instead, +/// substring or multi-substring search algorithms will be employed. +/// +/// There are many other forms of composition happening too, but the above +/// should give a general idea. In particular, it may perhaps be surprising +/// that *multiple* regex engines might get executed for a single search. That +/// is, the decision of what regex engine to use is not _just_ based on the +/// pattern, but also based on the dynamic execution of the search itself. +/// +/// The primary reason for this composition is performance. The fundamental +/// tension is that the faster engines tend to be less capable, and the more +/// capable engines tend to be slower. +/// +/// Note that the forms of composition that are allowed are determined by +/// compile time crate features and configuration. For example, if the `hybrid` +/// feature isn't enabled, or if [`Config::hybrid`] has been disabled, then the +/// meta regex engine will never use a lazy DFA. +/// +/// # Synchronization and cloning +/// +/// Most of the regex engines in this crate require some kind of mutable +/// "scratch" space to read and write from while performing a search. Since +/// a meta regex composes these regex engines, a meta regex also requires +/// mutable scratch space. This scratch space is called a [`Cache`]. +/// +/// Most regex engines _also_ usually have a read-only component, typically +/// a [Thompson `NFA`](crate::nfa::thompson::NFA). +/// +/// In order to make the `Regex` API convenient, most of the routines hide +/// the fact that a `Cache` is needed at all. To achieve this, a [memory +/// pool](crate::util::pool::Pool) is used internally to retrieve `Cache` +/// values in a thread safe way that also permits reuse. This in turn implies +/// that every such search call requires some form of synchronization. Usually +/// this synchronization is fast enough to not notice, but in some cases, it +/// can be a bottleneck. This typically occurs when all of the following are +/// true: +/// +/// * The same `Regex` is shared across multiple threads simultaneously, +/// usually via a [`util::lazy::Lazy`](crate::util::lazy::Lazy) or something +/// similar from the `once_cell` or `lazy_static` crates. +/// * The primary unit of work in each thread is a regex search. +/// * Searches are run on very short haystacks. +/// +/// This particular case can lead to high contention on the pool used by a +/// `Regex` internally, which can in turn increase latency to a noticeable +/// effect. This cost can be mitigated in one of the following ways: +/// +/// * Use a distinct copy of a `Regex` in each thread, usually by cloning it. +/// Cloning a `Regex` _does not_ do a deep copy of its read-only component. +/// But it does lead to each `Regex` having its own memory pool, which in +/// turn eliminates the problem of contention. In general, this technique should +/// not result in any additional memory usage when compared to sharing the same +/// `Regex` across multiple threads simultaneously. +/// * Use lower level APIs, like [`Regex::search_with`], which permit passing +/// a `Cache` explicitly. In this case, it is up to you to determine how best +/// to provide a `Cache`. For example, you might put a `Cache` in thread-local +/// storage if your use case allows for it. +/// +/// Overall, this is an issue that happens rarely in practice, but it can +/// happen. +/// +/// # Warning: spin-locks may be used in alloc-only mode +/// +/// When this crate is built without the `std` feature and the high level APIs +/// on a `Regex` are used, then a spin-lock will be used to synchronize access +/// to an internal pool of `Cache` values. This may be undesirable because +/// a spin-lock is [effectively impossible to implement correctly in user +/// space][spinlocks-are-bad]. That is, more concretely, the spin-lock could +/// result in a deadlock. +/// +/// [spinlocks-are-bad]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html +/// +/// If one wants to avoid the use of spin-locks when the `std` feature is +/// disabled, then you must use APIs that accept a `Cache` value explicitly. +/// For example, [`Regex::search_with`]. +/// +/// # Example +/// +/// ``` +/// use regex_automata::meta::Regex; +/// +/// let re = Regex::new(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$")?; +/// assert!(re.is_match("2010-03-14")); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: anchored search +/// +/// This example shows how to use [`Input::anchored`] to run an anchored +/// search, even when the regex pattern itself isn't anchored. An anchored +/// search guarantees that if a match is found, then the start offset of the +/// match corresponds to the offset at which the search was started. +/// +/// ``` +/// use regex_automata::{meta::Regex, Anchored, Input, Match}; +/// +/// let re = Regex::new(r"\bfoo\b")?; +/// let input = Input::new("xx foo xx").range(3..).anchored(Anchored::Yes); +/// // The offsets are in terms of the original haystack. +/// assert_eq!(Some(Match::must(0, 3..6)), re.find(input)); +/// +/// // Notice that no match occurs here, because \b still takes the +/// // surrounding context into account, even if it means looking back +/// // before the start of your search. +/// let hay = "xxfoo xx"; +/// let input = Input::new(hay).range(2..).anchored(Anchored::Yes); +/// assert_eq!(None, re.find(input)); +/// // Indeed, you cannot achieve the above by simply slicing the +/// // haystack itself, since the regex engine can't see the +/// // surrounding context. This is why 'Input' permits setting +/// // the bounds of a search! +/// let input = Input::new(&hay[2..]).anchored(Anchored::Yes); +/// // WRONG! +/// assert_eq!(Some(Match::must(0, 0..3)), re.find(input)); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: earliest search +/// +/// This example shows how to use [`Input::earliest`] to run a search that +/// might stop before finding the typical leftmost match. +/// +/// ``` +/// use regex_automata::{meta::Regex, Anchored, Input, Match}; +/// +/// let re = Regex::new(r"[a-z]{3}|b")?; +/// let input = Input::new("abc").earliest(true); +/// assert_eq!(Some(Match::must(0, 1..2)), re.find(input)); +/// +/// // Note that "earliest" isn't really a match semantic unto itself. +/// // Instead, it is merely an instruction to whatever regex engine +/// // gets used internally to quit as soon as it can. For example, +/// // this regex uses a different search technique, and winds up +/// // producing a different (but valid) match! +/// let re = Regex::new(r"abc|b")?; +/// let input = Input::new("abc").earliest(true); +/// assert_eq!(Some(Match::must(0, 0..3)), re.find(input)); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: change the line terminator +/// +/// This example shows how to enable multi-line mode by default and change +/// the line terminator to the NUL byte: +/// +/// ``` +/// use regex_automata::{meta::Regex, util::syntax, Match}; +/// +/// let re = Regex::builder() +/// .syntax(syntax::Config::new().multi_line(true)) +/// .configure(Regex::config().line_terminator(b'\x00')) +/// .build(r"^foo$")?; +/// let hay = "\x00foo\x00"; +/// assert_eq!(Some(Match::must(0, 1..4)), re.find(hay)); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Debug)] +pub struct Regex { + /// The actual regex implementation. + imp: Arc, + /// A thread safe pool of caches. + /// + /// For the higher level search APIs, a `Cache` is automatically plucked + /// from this pool before running a search. The lower level `with` methods + /// permit the caller to provide their own cache, thereby bypassing + /// accesses to this pool. + /// + /// Note that we put this outside the `Arc` so that cloning a `Regex` + /// results in creating a fresh `CachePool`. This in turn permits callers + /// to clone regexes into separate threads where each such regex gets + /// the pool's "thread owner" optimization. Otherwise, if one shares the + /// `Regex` directly, then the pool will go through a slower mutex path for + /// all threads except for the "owner." + pool: CachePool, +} + +/// The internal implementation of `Regex`, split out so that it can be wrapped +/// in an `Arc`. +#[derive(Debug)] +struct RegexI { + /// The core matching engine. + /// + /// Why is this reference counted when RegexI is already wrapped in an Arc? + /// Well, we need to capture this in a closure to our `Pool` below in order + /// to create new `Cache` values when needed. So since it needs to be in + /// two places, we make it reference counted. + /// + /// We make `RegexI` itself reference counted too so that `Regex` itself + /// stays extremely small and very cheap to clone. + strat: Arc, + /// Metadata about the regexes driving the strategy. The metadata is also + /// usually stored inside the strategy too, but we put it here as well + /// so that we can get quick access to it (without virtual calls) before + /// executing the regex engine. For example, we use this metadata to + /// detect a subset of cases where we know a match is impossible, and can + /// thus avoid calling into the strategy at all. + /// + /// Since `RegexInfo` is stored in multiple places, it is also reference + /// counted. + info: RegexInfo, +} + +/// Convenience constructors for a `Regex` using the default configuration. +impl Regex { + /// Builds a `Regex` from a single pattern string using the default + /// configuration. + /// + /// If there was a problem parsing the pattern or a problem turning it into + /// a regex matcher, then an error is returned. + /// + /// If you want to change the configuration of a `Regex`, use a [`Builder`] + /// with a [`Config`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::new(r"(?Rm)^foo$")?; + /// let hay = "\r\nfoo\r\n"; + /// assert_eq!(Some(Match::must(0, 2..5)), re.find(hay)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn new(pattern: &str) -> Result { + Regex::builder().build(pattern) + } + + /// Builds a `Regex` from many pattern strings using the default + /// configuration. + /// + /// If there was a problem parsing any of the patterns or a problem turning + /// them into a regex matcher, then an error is returned. + /// + /// If you want to change the configuration of a `Regex`, use a [`Builder`] + /// with a [`Config`]. + /// + /// # Example: simple lexer + /// + /// This simplistic example leverages the multi-pattern support to build a + /// simple little lexer. The pattern ID in the match tells you which regex + /// matched, which in turn might be used to map back to the "type" of the + /// token returned by the lexer. + /// + /// ``` + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::new_many(&[ + /// r"[[:space:]]", + /// r"[A-Za-z0-9][A-Za-z0-9_]+", + /// r"->", + /// r".", + /// ])?; + /// let haystack = "fn is_boss(bruce: i32, springsteen: String) -> bool;"; + /// let matches: Vec = re.find_iter(haystack).collect(); + /// assert_eq!(matches, vec![ + /// Match::must(1, 0..2), // 'fn' + /// Match::must(0, 2..3), // ' ' + /// Match::must(1, 3..10), // 'is_boss' + /// Match::must(3, 10..11), // '(' + /// Match::must(1, 11..16), // 'bruce' + /// Match::must(3, 16..17), // ':' + /// Match::must(0, 17..18), // ' ' + /// Match::must(1, 18..21), // 'i32' + /// Match::must(3, 21..22), // ',' + /// Match::must(0, 22..23), // ' ' + /// Match::must(1, 23..34), // 'springsteen' + /// Match::must(3, 34..35), // ':' + /// Match::must(0, 35..36), // ' ' + /// Match::must(1, 36..42), // 'String' + /// Match::must(3, 42..43), // ')' + /// Match::must(0, 43..44), // ' ' + /// Match::must(2, 44..46), // '->' + /// Match::must(0, 46..47), // ' ' + /// Match::must(1, 47..51), // 'bool' + /// Match::must(3, 51..52), // ';' + /// ]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// One can write a lexer like the above using a regex like + /// `(?P[[:space:]])|(?P[A-Za-z0-9][A-Za-z0-9_]+)|...`, + /// but then you need to ask whether capture group matched to determine + /// which branch in the regex matched, and thus, which token the match + /// corresponds to. In contrast, the above example includes the pattern ID + /// in the match. There's no need to use capture groups at all. + /// + /// # Example: finding the pattern that caused an error + /// + /// When a syntax error occurs, it is possible to ask which pattern + /// caused the syntax error. + /// + /// ``` + /// use regex_automata::{meta::Regex, PatternID}; + /// + /// let err = Regex::new_many(&["a", "b", r"\p{Foo}", "c"]).unwrap_err(); + /// assert_eq!(Some(PatternID::must(2)), err.pattern()); + /// ``` + /// + /// # Example: zero patterns is valid + /// + /// Building a regex with zero patterns results in a regex that never + /// matches anything. Because this routine is generic, passing an empty + /// slice usually requires a turbo-fish (or something else to help type + /// inference). + /// + /// ``` + /// use regex_automata::{meta::Regex, util::syntax, Match}; + /// + /// let re = Regex::new_many::<&str>(&[])?; + /// assert_eq!(None, re.find("")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn new_many>( + patterns: &[P], + ) -> Result { + Regex::builder().build_many(patterns) + } + + /// Return a default configuration for a `Regex`. + /// + /// This is a convenience routine to avoid needing to import the [`Config`] + /// type when customizing the construction of a `Regex`. + /// + /// # Example: lower the NFA size limit + /// + /// In some cases, the default size limit might be too big. The size limit + /// can be lowered, which will prevent large regex patterns from compiling. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::meta::Regex; + /// + /// let result = Regex::builder() + /// .configure(Regex::config().nfa_size_limit(Some(20 * (1<<10)))) + /// // Not even 20KB is enough to build a single large Unicode class! + /// .build(r"\pL"); + /// assert!(result.is_err()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn config() -> Config { + Config::new() + } + + /// Return a builder for configuring the construction of a `Regex`. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + /// + /// # Example: change the line terminator + /// + /// This example shows how to enable multi-line mode by default and change + /// the line terminator to the NUL byte: + /// + /// ``` + /// use regex_automata::{meta::Regex, util::syntax, Match}; + /// + /// let re = Regex::builder() + /// .syntax(syntax::Config::new().multi_line(true)) + /// .configure(Regex::config().line_terminator(b'\x00')) + /// .build(r"^foo$")?; + /// let hay = "\x00foo\x00"; + /// assert_eq!(Some(Match::must(0, 1..4)), re.find(hay)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn builder() -> Builder { + Builder::new() + } +} + +/// High level convenience routines for using a regex to search a haystack. +impl Regex { + /// Returns true if and only if this regex matches the given haystack. + /// + /// This routine may short circuit if it knows that scanning future input + /// will never lead to a different result. (Consider how this might make + /// a difference given the regex `a+` on the haystack `aaaaaaaaaaaaaaa`. + /// This routine _may_ stop after it sees the first `a`, but routines like + /// `find` need to continue searching because `+` is greedy by default.) + /// + /// # Example + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new("foo[0-9]+bar")?; + /// + /// assert!(re.is_match("foo12345bar")); + /// assert!(!re.is_match("foobar")); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: consistency with search APIs + /// + /// `is_match` is guaranteed to return `true` whenever `find` returns a + /// match. This includes searches that are executed entirely within a + /// codepoint: + /// + /// ``` + /// use regex_automata::{meta::Regex, Input}; + /// + /// let re = Regex::new("a*")?; + /// + /// // This doesn't match because the default configuration bans empty + /// // matches from splitting a codepoint. + /// assert!(!re.is_match(Input::new("☃").span(1..2))); + /// assert_eq!(None, re.find(Input::new("☃").span(1..2))); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Notice that when UTF-8 mode is disabled, then the above reports a + /// match because the restriction against zero-width matches that split a + /// codepoint has been lifted: + /// + /// ``` + /// use regex_automata::{meta::Regex, Input, Match}; + /// + /// let re = Regex::builder() + /// .configure(Regex::config().utf8_empty(false)) + /// .build("a*")?; + /// + /// assert!(re.is_match(Input::new("☃").span(1..2))); + /// assert_eq!( + /// Some(Match::must(0, 1..1)), + /// re.find(Input::new("☃").span(1..2)), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// A similar idea applies when using line anchors with CRLF mode enabled, + /// which prevents them from matching between a `\r` and a `\n`. + /// + /// ``` + /// use regex_automata::{meta::Regex, Input, Match}; + /// + /// let re = Regex::new(r"(?Rm:$)")?; + /// assert!(!re.is_match(Input::new("\r\n").span(1..1))); + /// // A regular line anchor, which only considers \n as a + /// // line terminator, will match. + /// let re = Regex::new(r"(?m:$)")?; + /// assert!(re.is_match(Input::new("\r\n").span(1..1))); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_match<'h, I: Into>>(&self, input: I) -> bool { + let input = input.into().earliest(true); + if self.imp.info.is_impossible(&input) { + return false; + } + let mut guard = self.pool.get(); + let result = self.imp.strat.is_match(&mut guard, &input); + // See 'Regex::search' for why we put the guard back explicitly. + PoolGuard::put(guard); + result + } + + /// Executes a leftmost search and returns the first match that is found, + /// if one exists. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::new("foo[0-9]+")?; + /// assert_eq!(Some(Match::must(0, 0..8)), re.find("foo12345")); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find<'h, I: Into>>(&self, input: I) -> Option { + self.search(&input.into()) + } + + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided [`Captures`] + /// value. If no match was found, then [`Captures::is_match`] is guaranteed + /// to return `false`. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Span}; + /// + /// let re = Regex::new(r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$")?; + /// let mut caps = re.create_captures(); + /// + /// re.captures("2010-03-14", &mut caps); + /// assert!(caps.is_match()); + /// assert_eq!(Some(Span::from(0..4)), caps.get_group(1)); + /// assert_eq!(Some(Span::from(5..7)), caps.get_group(2)); + /// assert_eq!(Some(Span::from(8..10)), caps.get_group(3)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn captures<'h, I: Into>>( + &self, + input: I, + caps: &mut Captures, + ) { + self.search_captures(&input.into(), caps) + } + + /// Returns an iterator over all non-overlapping leftmost matches in + /// the given haystack. If no match exists, then the iterator yields no + /// elements. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::new("foo[0-9]+")?; + /// let haystack = "foo1 foo12 foo123"; + /// let matches: Vec = re.find_iter(haystack).collect(); + /// assert_eq!(matches, vec![ + /// Match::must(0, 0..4), + /// Match::must(0, 5..10), + /// Match::must(0, 11..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find_iter<'r, 'h, I: Into>>( + &'r self, + input: I, + ) -> FindMatches<'r, 'h> { + let cache = self.pool.get(); + let it = iter::Searcher::new(input.into()); + FindMatches { re: self, cache, it } + } + + /// Returns an iterator over all non-overlapping `Captures` values. If no + /// match exists, then the iterator yields no elements. + /// + /// This yields the same matches as [`Regex::find_iter`], but it includes + /// the spans of all capturing groups that participate in each match. + /// + /// **Tip:** See [`util::iter::Searcher`](crate::util::iter::Searcher) for + /// how to correctly iterate over all matches in a haystack while avoiding + /// the creation of a new `Captures` value for every match. (Which you are + /// forced to do with an `Iterator`.) + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Span}; + /// + /// let re = Regex::new("foo(?P[0-9]+)")?; + /// + /// let haystack = "foo1 foo12 foo123"; + /// let matches: Vec = re + /// .captures_iter(haystack) + /// // The unwrap is OK since 'numbers' matches if the pattern matches. + /// .map(|caps| caps.get_group_by_name("numbers").unwrap()) + /// .collect(); + /// assert_eq!(matches, vec![ + /// Span::from(3..4), + /// Span::from(8..10), + /// Span::from(14..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn captures_iter<'r, 'h, I: Into>>( + &'r self, + input: I, + ) -> CapturesMatches<'r, 'h> { + let cache = self.pool.get(); + let caps = self.create_captures(); + let it = iter::Searcher::new(input.into()); + CapturesMatches { re: self, cache, caps, it } + } + + /// Returns an iterator of spans of the haystack given, delimited by a + /// match of the regex. Namely, each element of the iterator corresponds to + /// a part of the haystack that *isn't* matched by the regular expression. + /// + /// # Example + /// + /// To split a string delimited by arbitrary amounts of spaces or tabs: + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r"[ \t]+")?; + /// let hay = "a b \t c\td e"; + /// let fields: Vec<&str> = re.split(hay).map(|span| &hay[span]).collect(); + /// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: more cases + /// + /// Basic usage: + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r" ")?; + /// let hay = "Mary had a little lamb"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["Mary", "had", "a", "little", "lamb"]); + /// + /// let re = Regex::new(r"X")?; + /// let hay = ""; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec![""]); + /// + /// let re = Regex::new(r"X")?; + /// let hay = "lionXXtigerXleopard"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["lion", "", "tiger", "leopard"]); + /// + /// let re = Regex::new(r"::")?; + /// let hay = "lion::tiger::leopard"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["lion", "tiger", "leopard"]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// If a haystack contains multiple contiguous matches, you will end up + /// with empty spans yielded by the iterator: + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r"X")?; + /// let hay = "XXXXaXXbXc"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]); + /// + /// let re = Regex::new(r"/")?; + /// let hay = "(///)"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["(", "", "", ")"]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Separators at the start or end of a haystack are neighbored by empty + /// spans. + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r"0")?; + /// let hay = "010"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["", "1", ""]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// When the empty string is used as a regex, it splits at every valid + /// UTF-8 boundary by default (which includes the beginning and end of the + /// haystack): + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r"")?; + /// let hay = "rust"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["", "r", "u", "s", "t", ""]); + /// + /// // Splitting by an empty string is UTF-8 aware by default! + /// let re = Regex::new(r"")?; + /// let hay = "☃"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["", "☃", ""]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// But note that UTF-8 mode for empty strings can be disabled, which will + /// then result in a match at every byte offset in the haystack, + /// including between every UTF-8 code unit. + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::builder() + /// .configure(Regex::config().utf8_empty(false)) + /// .build(r"")?; + /// let hay = "☃".as_bytes(); + /// let got: Vec<&[u8]> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec![ + /// // Writing byte string slices is just brutal. The problem is that + /// // b"foo" has type &[u8; 3] instead of &[u8]. + /// &[][..], &[b'\xE2'][..], &[b'\x98'][..], &[b'\x83'][..], &[][..], + /// ]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Contiguous separators (commonly shows up with whitespace), can lead to + /// possibly surprising behavior. For example, this code is correct: + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r" ")?; + /// let hay = " a b c"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// It does *not* give you `["a", "b", "c"]`. For that behavior, you'd want + /// to match contiguous space characters: + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r" +")?; + /// let hay = " a b c"; + /// let got: Vec<&str> = re.split(hay).map(|sp| &hay[sp]).collect(); + /// // N.B. This does still include a leading empty span because ' +' + /// // matches at the beginning of the haystack. + /// assert_eq!(got, vec!["", "a", "b", "c"]); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn split<'r, 'h, I: Into>>( + &'r self, + input: I, + ) -> Split<'r, 'h> { + Split { finder: self.find_iter(input), last: 0 } + } + + /// Returns an iterator of at most `limit` spans of the haystack given, + /// delimited by a match of the regex. (A `limit` of `0` will return no + /// spans.) Namely, each element of the iterator corresponds to a part + /// of the haystack that *isn't* matched by the regular expression. The + /// remainder of the haystack that is not split will be the last element in + /// the iterator. + /// + /// # Example + /// + /// Get the first two words in some haystack: + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r"\W+").unwrap(); + /// let hay = "Hey! How are you?"; + /// let fields: Vec<&str> = + /// re.splitn(hay, 3).map(|span| &hay[span]).collect(); + /// assert_eq!(fields, vec!["Hey", "How", "are you?"]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Examples: more cases + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r" ")?; + /// let hay = "Mary had a little lamb"; + /// let got: Vec<&str> = re.splitn(hay, 3).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["Mary", "had", "a little lamb"]); + /// + /// let re = Regex::new(r"X")?; + /// let hay = ""; + /// let got: Vec<&str> = re.splitn(hay, 3).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec![""]); + /// + /// let re = Regex::new(r"X")?; + /// let hay = "lionXXtigerXleopard"; + /// let got: Vec<&str> = re.splitn(hay, 3).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["lion", "", "tigerXleopard"]); + /// + /// let re = Regex::new(r"::")?; + /// let hay = "lion::tiger::leopard"; + /// let got: Vec<&str> = re.splitn(hay, 2).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["lion", "tiger::leopard"]); + /// + /// let re = Regex::new(r"X")?; + /// let hay = "abcXdef"; + /// let got: Vec<&str> = re.splitn(hay, 1).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["abcXdef"]); + /// + /// let re = Regex::new(r"X")?; + /// let hay = "abcdef"; + /// let got: Vec<&str> = re.splitn(hay, 2).map(|sp| &hay[sp]).collect(); + /// assert_eq!(got, vec!["abcdef"]); + /// + /// let re = Regex::new(r"X")?; + /// let hay = "abcXdef"; + /// let got: Vec<&str> = re.splitn(hay, 0).map(|sp| &hay[sp]).collect(); + /// assert!(got.is_empty()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn splitn<'r, 'h, I: Into>>( + &'r self, + input: I, + limit: usize, + ) -> SplitN<'r, 'h> { + SplitN { splits: self.split(input), limit } + } +} + +/// Lower level search routines that give more control. +impl Regex { + /// Returns the start and end offset of the leftmost match. If no match + /// exists, then `None` is returned. + /// + /// This is like [`Regex::find`] but, but it accepts a concrete `&Input` + /// instead of an `Into`. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Input, Match}; + /// + /// let re = Regex::new(r"Samwise|Sam")?; + /// let input = Input::new( + /// "one of the chief characters, Samwise the Brave", + /// ); + /// assert_eq!(Some(Match::must(0, 29..36)), re.search(&input)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search(&self, input: &Input<'_>) -> Option { + if self.imp.info.is_impossible(input) { + return None; + } + let mut guard = self.pool.get(); + let result = self.imp.strat.search(&mut guard, input); + // We do this dance with the guard and explicitly put it back in the + // pool because it seems to result in better codegen. If we let the + // guard's Drop impl put it back in the pool, then functions like + // ptr::drop_in_place get called and they *don't* get inlined. This + // isn't usually a big deal, but in latency sensitive benchmarks the + // extra function call can matter. + // + // I used `rebar measure -f '^grep/every-line$' -e meta` to measure + // the effects here. + // + // Note that this doesn't eliminate the latency effects of using the + // pool. There is still some (minor) cost for the "thread owner" of the + // pool. (i.e., The thread that first calls a regex search routine.) + // However, for other threads using the regex, the pool access can be + // quite expensive as it goes through a mutex. Callers can avoid this + // by either cloning the Regex (which creates a distinct copy of the + // pool), or callers can use the lower level APIs that accept a 'Cache' + // directly and do their own handling. + PoolGuard::put(guard); + result + } + + /// Returns the end offset of the leftmost match. If no match exists, then + /// `None` is returned. + /// + /// This is distinct from [`Regex::search`] in that it only returns the end + /// of a match and not the start of the match. Depending on a variety of + /// implementation details, this _may_ permit the regex engine to do less + /// overall work. For example, if a DFA is being used to execute a search, + /// then the start of a match usually requires running a separate DFA in + /// reverse to the find the start of a match. If one only needs the end of + /// a match, then the separate reverse scan to find the start of a match + /// can be skipped. (Note that the reverse scan is avoided even when using + /// `Regex::search` when possible, for example, in the case of an anchored + /// search.) + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Input, HalfMatch}; + /// + /// let re = Regex::new(r"Samwise|Sam")?; + /// let input = Input::new( + /// "one of the chief characters, Samwise the Brave", + /// ); + /// assert_eq!(Some(HalfMatch::must(0, 36)), re.search_half(&input)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_half(&self, input: &Input<'_>) -> Option { + if self.imp.info.is_impossible(input) { + return None; + } + let mut guard = self.pool.get(); + let result = self.imp.strat.search_half(&mut guard, input); + // See 'Regex::search' for why we put the guard back explicitly. + PoolGuard::put(guard); + result + } + + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided [`Captures`] + /// value. If no match was found, then [`Captures::is_match`] is guaranteed + /// to return `false`. + /// + /// This is like [`Regex::captures`], but it accepts a concrete `&Input` + /// instead of an `Into`. + /// + /// # Example: specific pattern search + /// + /// This example shows how to build a multi-pattern `Regex` that permits + /// searching for specific patterns. + /// + /// ``` + /// use regex_automata::{ + /// meta::Regex, + /// Anchored, Match, PatternID, Input, + /// }; + /// + /// let re = Regex::new_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?; + /// let mut caps = re.create_captures(); + /// let haystack = "foo123"; + /// + /// // Since we are using the default leftmost-first match and both + /// // patterns match at the same starting position, only the first pattern + /// // will be returned in this case when doing a search for any of the + /// // patterns. + /// let expected = Some(Match::must(0, 0..6)); + /// re.search_captures(&Input::new(haystack), &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we want to check whether some other pattern matches, then we + /// // can provide its pattern ID. + /// let expected = Some(Match::must(1, 0..6)); + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(1))); + /// re.search_captures(&input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specifying the bounds of a search + /// + /// This example shows how providing the bounds of a search can produce + /// different results than simply sub-slicing the haystack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, Match, Input}; + /// + /// let re = Regex::new(r"\b[0-9]{3}\b")?; + /// let mut caps = re.create_captures(); + /// let haystack = "foo123bar"; + /// + /// // Since we sub-slice the haystack, the search doesn't know about + /// // the larger context and assumes that `123` is surrounded by word + /// // boundaries. And of course, the match position is reported relative + /// // to the sub-slice as well, which means we get `0..3` instead of + /// // `3..6`. + /// let expected = Some(Match::must(0, 0..3)); + /// let input = Input::new(&haystack[3..6]); + /// re.search_captures(&input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we provide the bounds of the search within the context of the + /// // entire haystack, then the search can take the surrounding context + /// // into account. (And if we did find a match, it would be reported + /// // as a valid offset into `haystack` instead of its sub-slice.) + /// let expected = None; + /// let input = Input::new(haystack).range(3..6); + /// re.search_captures(&input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_captures(&self, input: &Input<'_>, caps: &mut Captures) { + caps.set_pattern(None); + let pid = self.search_slots(input, caps.slots_mut()); + caps.set_pattern(pid); + } + + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided `slots`, and + /// returns the matching pattern ID. The contents of the slots for patterns + /// other than the matching pattern are unspecified. If no match was found, + /// then `None` is returned and the contents of `slots` is unspecified. + /// + /// This is like [`Regex::search`], but it accepts a raw slots slice + /// instead of a `Captures` value. This is useful in contexts where you + /// don't want or need to allocate a `Captures`. + /// + /// It is legal to pass _any_ number of slots to this routine. If the regex + /// engine would otherwise write a slot offset that doesn't fit in the + /// provided slice, then it is simply skipped. In general though, there are + /// usually three slice lengths you might want to use: + /// + /// * An empty slice, if you only care about which pattern matched. + /// * A slice with [`pattern_len() * 2`](Regex::pattern_len) slots, if you + /// only care about the overall match spans for each matching pattern. + /// * A slice with + /// [`slot_len()`](crate::util::captures::GroupInfo::slot_len) slots, which + /// permits recording match offsets for every capturing group in every + /// pattern. + /// + /// # Example + /// + /// This example shows how to find the overall match offsets in a + /// multi-pattern search without allocating a `Captures` value. Indeed, we + /// can put our slots right on the stack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, PatternID, Input}; + /// + /// let re = Regex::new_many(&[ + /// r"\pL+", + /// r"\d+", + /// ])?; + /// let input = Input::new("!@#123"); + /// + /// // We only care about the overall match offsets here, so we just + /// // allocate two slots for each pattern. Each slot records the start + /// // and end of the match. + /// let mut slots = [None; 4]; + /// let pid = re.search_slots(&input, &mut slots); + /// assert_eq!(Some(PatternID::must(1)), pid); + /// + /// // The overall match offsets are always at 'pid * 2' and 'pid * 2 + 1'. + /// // See 'GroupInfo' for more details on the mapping between groups and + /// // slot indices. + /// let slot_start = pid.unwrap().as_usize() * 2; + /// let slot_end = slot_start + 1; + /// assert_eq!(Some(3), slots[slot_start].map(|s| s.get())); + /// assert_eq!(Some(6), slots[slot_end].map(|s| s.get())); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_slots( + &self, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + if self.imp.info.is_impossible(input) { + return None; + } + let mut guard = self.pool.get(); + let result = self.imp.strat.search_slots(&mut guard, input, slots); + // See 'Regex::search' for why we put the guard back explicitly. + PoolGuard::put(guard); + result + } + + /// Writes the set of patterns that match anywhere in the given search + /// configuration to `patset`. If multiple patterns match at the same + /// position and this `Regex` was configured with [`MatchKind::All`] + /// semantics, then all matching patterns are written to the given set. + /// + /// Unless all of the patterns in this `Regex` are anchored, then generally + /// speaking, this will scan the entire haystack. + /// + /// This search routine *does not* clear the pattern set. This gives some + /// flexibility to the caller (e.g., running multiple searches with the + /// same pattern set), but does make the API bug-prone if you're reusing + /// the same pattern set for multiple searches but intended them to be + /// independent. + /// + /// If a pattern ID matched but the given `PatternSet` does not have + /// sufficient capacity to store it, then it is not inserted and silently + /// dropped. + /// + /// # Example + /// + /// This example shows how to find all matching patterns in a haystack, + /// even when some patterns match at the same position as other patterns. + /// It is important that we configure the `Regex` with [`MatchKind::All`] + /// semantics here, or else overlapping matches will not be reported. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, Input, MatchKind, PatternSet}; + /// + /// let patterns = &[ + /// r"\w+", r"\d+", r"\pL+", r"foo", r"bar", r"barfoo", r"foobar", + /// ]; + /// let re = Regex::builder() + /// .configure(Regex::config().match_kind(MatchKind::All)) + /// .build_many(patterns)?; + /// + /// let input = Input::new("foobar"); + /// let mut patset = PatternSet::new(re.pattern_len()); + /// re.which_overlapping_matches(&input, &mut patset); + /// let expected = vec![0, 2, 3, 4, 6]; + /// let got: Vec = patset.iter().map(|p| p.as_usize()).collect(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn which_overlapping_matches( + &self, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + if self.imp.info.is_impossible(input) { + return; + } + let mut guard = self.pool.get(); + let result = self + .imp + .strat + .which_overlapping_matches(&mut guard, input, patset); + // See 'Regex::search' for why we put the guard back explicitly. + PoolGuard::put(guard); + result + } +} + +/// Lower level search routines that give more control, and require the caller +/// to provide an explicit [`Cache`] parameter. +impl Regex { + /// This is like [`Regex::search`], but requires the caller to + /// explicitly pass a [`Cache`]. + /// + /// # Why pass a `Cache` explicitly? + /// + /// Passing a `Cache` explicitly will bypass the use of an internal memory + /// pool used by `Regex` to get a `Cache` for a search. The use of this + /// pool can be slower in some cases when a `Regex` is used from multiple + /// threads simultaneously. Typically, performance only becomes an issue + /// when there is heavy contention, which in turn usually only occurs + /// when each thread's primary unit of work is a regex search on a small + /// haystack. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Input, Match}; + /// + /// let re = Regex::new(r"Samwise|Sam")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new( + /// "one of the chief characters, Samwise the Brave", + /// ); + /// assert_eq!( + /// Some(Match::must(0, 29..36)), + /// re.search_with(&mut cache, &input), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_with( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + if self.imp.info.is_impossible(input) { + return None; + } + self.imp.strat.search(cache, input) + } + + /// This is like [`Regex::search_half`], but requires the caller to + /// explicitly pass a [`Cache`]. + /// + /// # Why pass a `Cache` explicitly? + /// + /// Passing a `Cache` explicitly will bypass the use of an internal memory + /// pool used by `Regex` to get a `Cache` for a search. The use of this + /// pool can be slower in some cases when a `Regex` is used from multiple + /// threads simultaneously. Typically, performance only becomes an issue + /// when there is heavy contention, which in turn usually only occurs + /// when each thread's primary unit of work is a regex search on a small + /// haystack. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Input, HalfMatch}; + /// + /// let re = Regex::new(r"Samwise|Sam")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new( + /// "one of the chief characters, Samwise the Brave", + /// ); + /// assert_eq!( + /// Some(HalfMatch::must(0, 36)), + /// re.search_half_with(&mut cache, &input), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_half_with( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + if self.imp.info.is_impossible(input) { + return None; + } + self.imp.strat.search_half(cache, input) + } + + /// This is like [`Regex::search_captures`], but requires the caller to + /// explicitly pass a [`Cache`]. + /// + /// # Why pass a `Cache` explicitly? + /// + /// Passing a `Cache` explicitly will bypass the use of an internal memory + /// pool used by `Regex` to get a `Cache` for a search. The use of this + /// pool can be slower in some cases when a `Regex` is used from multiple + /// threads simultaneously. Typically, performance only becomes an issue + /// when there is heavy contention, which in turn usually only occurs + /// when each thread's primary unit of work is a regex search on a small + /// haystack. + /// + /// # Example: specific pattern search + /// + /// This example shows how to build a multi-pattern `Regex` that permits + /// searching for specific patterns. + /// + /// ``` + /// use regex_automata::{ + /// meta::Regex, + /// Anchored, Match, PatternID, Input, + /// }; + /// + /// let re = Regex::new_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "foo123"; + /// + /// // Since we are using the default leftmost-first match and both + /// // patterns match at the same starting position, only the first pattern + /// // will be returned in this case when doing a search for any of the + /// // patterns. + /// let expected = Some(Match::must(0, 0..6)); + /// re.search_captures_with(&mut cache, &Input::new(haystack), &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we want to check whether some other pattern matches, then we + /// // can provide its pattern ID. + /// let expected = Some(Match::must(1, 0..6)); + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(1))); + /// re.search_captures_with(&mut cache, &input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specifying the bounds of a search + /// + /// This example shows how providing the bounds of a search can produce + /// different results than simply sub-slicing the haystack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, Match, Input}; + /// + /// let re = Regex::new(r"\b[0-9]{3}\b")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "foo123bar"; + /// + /// // Since we sub-slice the haystack, the search doesn't know about + /// // the larger context and assumes that `123` is surrounded by word + /// // boundaries. And of course, the match position is reported relative + /// // to the sub-slice as well, which means we get `0..3` instead of + /// // `3..6`. + /// let expected = Some(Match::must(0, 0..3)); + /// let input = Input::new(&haystack[3..6]); + /// re.search_captures_with(&mut cache, &input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we provide the bounds of the search within the context of the + /// // entire haystack, then the search can take the surrounding context + /// // into account. (And if we did find a match, it would be reported + /// // as a valid offset into `haystack` instead of its sub-slice.) + /// let expected = None; + /// let input = Input::new(haystack).range(3..6); + /// re.search_captures_with(&mut cache, &input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_captures_with( + &self, + cache: &mut Cache, + input: &Input<'_>, + caps: &mut Captures, + ) { + caps.set_pattern(None); + let pid = self.search_slots_with(cache, input, caps.slots_mut()); + caps.set_pattern(pid); + } + + /// This is like [`Regex::search_slots`], but requires the caller to + /// explicitly pass a [`Cache`]. + /// + /// # Why pass a `Cache` explicitly? + /// + /// Passing a `Cache` explicitly will bypass the use of an internal memory + /// pool used by `Regex` to get a `Cache` for a search. The use of this + /// pool can be slower in some cases when a `Regex` is used from multiple + /// threads simultaneously. Typically, performance only becomes an issue + /// when there is heavy contention, which in turn usually only occurs + /// when each thread's primary unit of work is a regex search on a small + /// haystack. + /// + /// # Example + /// + /// This example shows how to find the overall match offsets in a + /// multi-pattern search without allocating a `Captures` value. Indeed, we + /// can put our slots right on the stack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, PatternID, Input}; + /// + /// let re = Regex::new_many(&[ + /// r"\pL+", + /// r"\d+", + /// ])?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("!@#123"); + /// + /// // We only care about the overall match offsets here, so we just + /// // allocate two slots for each pattern. Each slot records the start + /// // and end of the match. + /// let mut slots = [None; 4]; + /// let pid = re.search_slots_with(&mut cache, &input, &mut slots); + /// assert_eq!(Some(PatternID::must(1)), pid); + /// + /// // The overall match offsets are always at 'pid * 2' and 'pid * 2 + 1'. + /// // See 'GroupInfo' for more details on the mapping between groups and + /// // slot indices. + /// let slot_start = pid.unwrap().as_usize() * 2; + /// let slot_end = slot_start + 1; + /// assert_eq!(Some(3), slots[slot_start].map(|s| s.get())); + /// assert_eq!(Some(6), slots[slot_end].map(|s| s.get())); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_slots_with( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + if self.imp.info.is_impossible(input) { + return None; + } + self.imp.strat.search_slots(cache, input, slots) + } + + /// This is like [`Regex::which_overlapping_matches`], but requires the + /// caller to explicitly pass a [`Cache`]. + /// + /// Passing a `Cache` explicitly will bypass the use of an internal memory + /// pool used by `Regex` to get a `Cache` for a search. The use of this + /// pool can be slower in some cases when a `Regex` is used from multiple + /// threads simultaneously. Typically, performance only becomes an issue + /// when there is heavy contention, which in turn usually only occurs + /// when each thread's primary unit of work is a regex search on a small + /// haystack. + /// + /// # Why pass a `Cache` explicitly? + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, Input, MatchKind, PatternSet}; + /// + /// let patterns = &[ + /// r"\w+", r"\d+", r"\pL+", r"foo", r"bar", r"barfoo", r"foobar", + /// ]; + /// let re = Regex::builder() + /// .configure(Regex::config().match_kind(MatchKind::All)) + /// .build_many(patterns)?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new("foobar"); + /// let mut patset = PatternSet::new(re.pattern_len()); + /// re.which_overlapping_matches_with(&mut cache, &input, &mut patset); + /// let expected = vec![0, 2, 3, 4, 6]; + /// let got: Vec = patset.iter().map(|p| p.as_usize()).collect(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn which_overlapping_matches_with( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + if self.imp.info.is_impossible(input) { + return; + } + self.imp.strat.which_overlapping_matches(cache, input, patset) + } +} + +/// Various non-search routines for querying properties of a `Regex` and +/// convenience routines for creating [`Captures`] and [`Cache`] values. +impl Regex { + /// Creates a new object for recording capture group offsets. This is used + /// in search APIs like [`Regex::captures`] and [`Regex::search_captures`]. + /// + /// This is a convenience routine for + /// `Captures::all(re.group_info().clone())`. Callers may build other types + /// of `Captures` values that record less information (and thus require + /// less work from the regex engine) using [`Captures::matches`] and + /// [`Captures::empty`]. + /// + /// # Example + /// + /// This shows some alternatives to [`Regex::create_captures`]: + /// + /// ``` + /// use regex_automata::{ + /// meta::Regex, + /// util::captures::Captures, + /// Match, PatternID, Span, + /// }; + /// + /// let re = Regex::new(r"(?[A-Z][a-z]+) (?[A-Z][a-z]+)")?; + /// + /// // This is equivalent to Regex::create_captures. It stores matching + /// // offsets for all groups in the regex. + /// let mut all = Captures::all(re.group_info().clone()); + /// re.captures("Bruce Springsteen", &mut all); + /// assert_eq!(Some(Match::must(0, 0..17)), all.get_match()); + /// assert_eq!(Some(Span::from(0..5)), all.get_group_by_name("first")); + /// assert_eq!(Some(Span::from(6..17)), all.get_group_by_name("last")); + /// + /// // In this version, we only care about the implicit groups, which + /// // means offsets for the explicit groups will be unavailable. It can + /// // sometimes be faster to ask for fewer groups, since the underlying + /// // regex engine needs to do less work to keep track of them. + /// let mut matches = Captures::matches(re.group_info().clone()); + /// re.captures("Bruce Springsteen", &mut matches); + /// // We still get the overall match info. + /// assert_eq!(Some(Match::must(0, 0..17)), matches.get_match()); + /// // But now the explicit groups are unavailable. + /// assert_eq!(None, matches.get_group_by_name("first")); + /// assert_eq!(None, matches.get_group_by_name("last")); + /// + /// // Finally, in this version, we don't ask to keep track of offsets for + /// // *any* groups. All we get back is whether a match occurred, and if + /// // so, the ID of the pattern that matched. + /// let mut empty = Captures::empty(re.group_info().clone()); + /// re.captures("Bruce Springsteen", &mut empty); + /// // it's a match! + /// assert!(empty.is_match()); + /// // for pattern ID 0 + /// assert_eq!(Some(PatternID::ZERO), empty.pattern()); + /// // Match offsets are unavailable. + /// assert_eq!(None, empty.get_match()); + /// // And of course, explicit groups are unavailable too. + /// assert_eq!(None, empty.get_group_by_name("first")); + /// assert_eq!(None, empty.get_group_by_name("last")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn create_captures(&self) -> Captures { + Captures::all(self.group_info().clone()) + } + + /// Creates a new cache for use with lower level search APIs like + /// [`Regex::search_with`]. + /// + /// The cache returned should only be used for searches for this `Regex`. + /// If you want to reuse the cache for another `Regex`, then you must call + /// [`Cache::reset`] with that `Regex`. + /// + /// This is a convenience routine for [`Cache::new`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Input, Match}; + /// + /// let re = Regex::new(r"(?-u)m\w+\s+m\w+")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("crazy janey and her mission man"); + /// assert_eq!( + /// Some(Match::must(0, 20..31)), + /// re.search_with(&mut cache, &input), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn create_cache(&self) -> Cache { + self.imp.strat.create_cache() + } + + /// Returns the total number of patterns in this regex. + /// + /// The standard [`Regex::new`] constructor always results in a `Regex` + /// with a single pattern, but [`Regex::new_many`] permits building a + /// multi-pattern regex. + /// + /// A `Regex` guarantees that the maximum possible `PatternID` returned in + /// any match is `Regex::pattern_len() - 1`. In the case where the number + /// of patterns is `0`, a match is impossible. + /// + /// # Example + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let re = Regex::new(r"(?m)^[a-z]$")?; + /// assert_eq!(1, re.pattern_len()); + /// + /// let re = Regex::new_many::<&str>(&[])?; + /// assert_eq!(0, re.pattern_len()); + /// + /// let re = Regex::new_many(&["a", "b", "c"])?; + /// assert_eq!(3, re.pattern_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn pattern_len(&self) -> usize { + self.imp.info.pattern_len() + } + + /// Returns the total number of capturing groups. + /// + /// This includes the implicit capturing group corresponding to the + /// entire match. Therefore, the minimum value returned is `1`. + /// + /// # Example + /// + /// This shows a few patterns and how many capture groups they have. + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let len = |pattern| { + /// Regex::new(pattern).map(|re| re.captures_len()) + /// }; + /// + /// assert_eq!(1, len("a")?); + /// assert_eq!(2, len("(a)")?); + /// assert_eq!(3, len("(a)|(b)")?); + /// assert_eq!(5, len("(a)(b)|(c)(d)")?); + /// assert_eq!(2, len("(a)|b")?); + /// assert_eq!(2, len("a|(b)")?); + /// assert_eq!(2, len("(b)*")?); + /// assert_eq!(2, len("(b)+")?); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: multiple patterns + /// + /// This routine also works for multiple patterns. The total number is + /// the sum of the capture groups of each pattern. + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let len = |patterns| { + /// Regex::new_many(patterns).map(|re| re.captures_len()) + /// }; + /// + /// assert_eq!(2, len(&["a", "b"])?); + /// assert_eq!(4, len(&["(a)", "(b)"])?); + /// assert_eq!(6, len(&["(a)|(b)", "(c)|(d)"])?); + /// assert_eq!(8, len(&["(a)(b)|(c)(d)", "(x)(y)"])?); + /// assert_eq!(3, len(&["(a)", "b"])?); + /// assert_eq!(3, len(&["a", "(b)"])?); + /// assert_eq!(4, len(&["(a)", "(b)*"])?); + /// assert_eq!(4, len(&["(a)+", "(b)+"])?); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn captures_len(&self) -> usize { + self.imp + .info + .props_union() + .explicit_captures_len() + .saturating_add(self.pattern_len()) + } + + /// Returns the total number of capturing groups that appear in every + /// possible match. + /// + /// If the number of capture groups can vary depending on the match, then + /// this returns `None`. That is, a value is only returned when the number + /// of matching groups is invariant or "static." + /// + /// Note that like [`Regex::captures_len`], this **does** include the + /// implicit capturing group corresponding to the entire match. Therefore, + /// when a non-None value is returned, it is guaranteed to be at least `1`. + /// Stated differently, a return value of `Some(0)` is impossible. + /// + /// # Example + /// + /// This shows a few cases where a static number of capture groups is + /// available and a few cases where it is not. + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let len = |pattern| { + /// Regex::new(pattern).map(|re| re.static_captures_len()) + /// }; + /// + /// assert_eq!(Some(1), len("a")?); + /// assert_eq!(Some(2), len("(a)")?); + /// assert_eq!(Some(2), len("(a)|(b)")?); + /// assert_eq!(Some(3), len("(a)(b)|(c)(d)")?); + /// assert_eq!(None, len("(a)|b")?); + /// assert_eq!(None, len("a|(b)")?); + /// assert_eq!(None, len("(b)*")?); + /// assert_eq!(Some(2), len("(b)+")?); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: multiple patterns + /// + /// This property extends to regexes with multiple patterns as well. In + /// order for their to be a static number of capture groups in this case, + /// every pattern must have the same static number. + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// let len = |patterns| { + /// Regex::new_many(patterns).map(|re| re.static_captures_len()) + /// }; + /// + /// assert_eq!(Some(1), len(&["a", "b"])?); + /// assert_eq!(Some(2), len(&["(a)", "(b)"])?); + /// assert_eq!(Some(2), len(&["(a)|(b)", "(c)|(d)"])?); + /// assert_eq!(Some(3), len(&["(a)(b)|(c)(d)", "(x)(y)"])?); + /// assert_eq!(None, len(&["(a)", "b"])?); + /// assert_eq!(None, len(&["a", "(b)"])?); + /// assert_eq!(None, len(&["(a)", "(b)*"])?); + /// assert_eq!(Some(2), len(&["(a)+", "(b)+"])?); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn static_captures_len(&self) -> Option { + self.imp + .info + .props_union() + .static_explicit_captures_len() + .map(|len| len.saturating_add(1)) + } + + /// Return information about the capture groups in this `Regex`. + /// + /// A `GroupInfo` is an immutable object that can be cheaply cloned. It + /// is responsible for maintaining a mapping between the capture groups + /// in the concrete syntax of zero or more regex patterns and their + /// internal representation used by some of the regex matchers. It is also + /// responsible for maintaining a mapping between the name of each group + /// (if one exists) and its corresponding group index. + /// + /// A `GroupInfo` is ultimately what is used to build a [`Captures`] value, + /// which is some mutable space where group offsets are stored as a result + /// of a search. + /// + /// # Example + /// + /// This shows some alternatives to [`Regex::create_captures`]: + /// + /// ``` + /// use regex_automata::{ + /// meta::Regex, + /// util::captures::Captures, + /// Match, PatternID, Span, + /// }; + /// + /// let re = Regex::new(r"(?[A-Z][a-z]+) (?[A-Z][a-z]+)")?; + /// + /// // This is equivalent to Regex::create_captures. It stores matching + /// // offsets for all groups in the regex. + /// let mut all = Captures::all(re.group_info().clone()); + /// re.captures("Bruce Springsteen", &mut all); + /// assert_eq!(Some(Match::must(0, 0..17)), all.get_match()); + /// assert_eq!(Some(Span::from(0..5)), all.get_group_by_name("first")); + /// assert_eq!(Some(Span::from(6..17)), all.get_group_by_name("last")); + /// + /// // In this version, we only care about the implicit groups, which + /// // means offsets for the explicit groups will be unavailable. It can + /// // sometimes be faster to ask for fewer groups, since the underlying + /// // regex engine needs to do less work to keep track of them. + /// let mut matches = Captures::matches(re.group_info().clone()); + /// re.captures("Bruce Springsteen", &mut matches); + /// // We still get the overall match info. + /// assert_eq!(Some(Match::must(0, 0..17)), matches.get_match()); + /// // But now the explicit groups are unavailable. + /// assert_eq!(None, matches.get_group_by_name("first")); + /// assert_eq!(None, matches.get_group_by_name("last")); + /// + /// // Finally, in this version, we don't ask to keep track of offsets for + /// // *any* groups. All we get back is whether a match occurred, and if + /// // so, the ID of the pattern that matched. + /// let mut empty = Captures::empty(re.group_info().clone()); + /// re.captures("Bruce Springsteen", &mut empty); + /// // it's a match! + /// assert!(empty.is_match()); + /// // for pattern ID 0 + /// assert_eq!(Some(PatternID::ZERO), empty.pattern()); + /// // Match offsets are unavailable. + /// assert_eq!(None, empty.get_match()); + /// // And of course, explicit groups are unavailable too. + /// assert_eq!(None, empty.get_group_by_name("first")); + /// assert_eq!(None, empty.get_group_by_name("last")); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn group_info(&self) -> &GroupInfo { + self.imp.strat.group_info() + } + + /// Returns the configuration object used to build this `Regex`. + /// + /// If no configuration object was explicitly passed, then the + /// configuration returned represents the default. + #[inline] + pub fn get_config(&self) -> &Config { + self.imp.info.config() + } + + /// Returns true if this regex has a high chance of being "accelerated." + /// + /// The precise meaning of "accelerated" is specifically left unspecified, + /// but the general meaning is that the search is a high likelihood of + /// running faster than than a character-at-a-time loop inside a standard + /// regex engine. + /// + /// When a regex is accelerated, it is only a *probabilistic* claim. That + /// is, just because the regex is believed to be accelerated, that doesn't + /// mean it will definitely execute searches very fast. Similarly, if a + /// regex is *not* accelerated, that is also a probabilistic claim. That + /// is, a regex for which `is_accelerated` returns `false` could still run + /// searches more quickly than a regex for which `is_accelerated` returns + /// `true`. + /// + /// Whether a regex is marked as accelerated or not is dependent on + /// implementations details that may change in a semver compatible release. + /// That is, a regex that is accelerated in a `x.y.1` release might not be + /// accelerated in a `x.y.2` release. + /// + /// Basically, the value of acceleration boils down to a hedge: a hodge + /// podge of internal heuristics combine to make a probabilistic guess + /// that this regex search may run "fast." The value in knowing this from + /// a caller's perspective is that it may act as a signal that no further + /// work should be done to accelerate a search. For example, a grep-like + /// tool might try to do some extra work extracting literals from a regex + /// to create its own heuristic acceleration strategies. But it might + /// choose to defer to this crate's acceleration strategy if one exists. + /// This routine permits querying whether such a strategy is active for a + /// particular regex. + /// + /// # Example + /// + /// ``` + /// use regex_automata::meta::Regex; + /// + /// // A simple literal is very likely to be accelerated. + /// let re = Regex::new(r"foo")?; + /// assert!(re.is_accelerated()); + /// + /// // A regex with no literals is likely to not be accelerated. + /// let re = Regex::new(r"\w")?; + /// assert!(!re.is_accelerated()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_accelerated(&self) -> bool { + self.imp.strat.is_accelerated() + } + + /// Return the total approximate heap memory, in bytes, used by this `Regex`. + /// + /// Note that currently, there is no high level configuration for setting + /// a limit on the specific value returned by this routine. Instead, the + /// following routines can be used to control heap memory at a bit of a + /// lower level: + /// + /// * [`Config::nfa_size_limit`] controls how big _any_ of the NFAs are + /// allowed to be. + /// * [`Config::onepass_size_limit`] controls how big the one-pass DFA is + /// allowed to be. + /// * [`Config::hybrid_cache_capacity`] controls how much memory the lazy + /// DFA is permitted to allocate to store its transition table. + /// * [`Config::dfa_size_limit`] controls how big a fully compiled DFA is + /// allowed to be. + /// * [`Config::dfa_state_limit`] controls the conditions under which the + /// meta regex engine will even attempt to build a fully compiled DFA. + #[inline] + pub fn memory_usage(&self) -> usize { + self.imp.strat.memory_usage() + } +} + +impl Clone for Regex { + fn clone(&self) -> Regex { + let imp = Arc::clone(&self.imp); + let pool = { + let strat = Arc::clone(&imp.strat); + let create: CachePoolFn = Box::new(move || strat.create_cache()); + Pool::new(create) + }; + Regex { imp, pool } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct RegexInfo(Arc); + +#[derive(Clone, Debug)] +struct RegexInfoI { + config: Config, + props: Vec, + props_union: hir::Properties, +} + +impl RegexInfo { + fn new(config: Config, hirs: &[&Hir]) -> RegexInfo { + // Collect all of the properties from each of the HIRs, and also + // union them into one big set of properties representing all HIRs + // as if they were in one big alternation. + let mut props = vec![]; + for hir in hirs.iter() { + props.push(hir.properties().clone()); + } + let props_union = hir::Properties::union(&props); + + RegexInfo(Arc::new(RegexInfoI { config, props, props_union })) + } + + pub(crate) fn config(&self) -> &Config { + &self.0.config + } + + pub(crate) fn props(&self) -> &[hir::Properties] { + &self.0.props + } + + pub(crate) fn props_union(&self) -> &hir::Properties { + &self.0.props_union + } + + pub(crate) fn pattern_len(&self) -> usize { + self.props().len() + } + + pub(crate) fn memory_usage(&self) -> usize { + self.props().iter().map(|p| p.memory_usage()).sum::() + + self.props_union().memory_usage() + } + + /// Returns true when the search is guaranteed to be anchored. That is, + /// when a match is reported, its offset is guaranteed to correspond to + /// the start of the search. + /// + /// This includes returning true when `input` _isn't_ anchored but the + /// underlying regex is. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn is_anchored_start(&self, input: &Input<'_>) -> bool { + input.get_anchored().is_anchored() || self.is_always_anchored_start() + } + + /// Returns true when this regex is always anchored to the start of a + /// search. And in particular, that regardless of an `Input` configuration, + /// if any match is reported it must start at `0`. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn is_always_anchored_start(&self) -> bool { + use regex_syntax::hir::Look; + self.props_union().look_set_prefix().contains(Look::Start) + } + + /// Returns true when this regex is always anchored to the end of a + /// search. And in particular, that regardless of an `Input` configuration, + /// if any match is reported it must end at the end of the haystack. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn is_always_anchored_end(&self) -> bool { + use regex_syntax::hir::Look; + self.props_union().look_set_suffix().contains(Look::End) + } + + /// Returns true if and only if it is known that a match is impossible + /// for the given input. This is useful for short-circuiting and avoiding + /// running the regex engine if it's known no match can be reported. + /// + /// Note that this doesn't necessarily detect every possible case. For + /// example, when `pattern_len() == 0`, a match is impossible, but that + /// case is so rare that it's fine to be handled by the regex engine + /// itself. That is, it's not worth the cost of adding it here in order to + /// make it a little faster. The reason is that this is called for every + /// search. so there is some cost to adding checks here. Arguably, some of + /// the checks that are here already probably shouldn't be here... + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_impossible(&self, input: &Input<'_>) -> bool { + // The underlying regex is anchored, so if we don't start the search + // at position 0, a match is impossible, because the anchor can only + // match at position 0. + if input.start() > 0 && self.is_always_anchored_start() { + return true; + } + // Same idea, but for the end anchor. + if input.end() < input.haystack().len() + && self.is_always_anchored_end() + { + return true; + } + // If the haystack is smaller than the minimum length required, then + // we know there can be no match. + let minlen = match self.props_union().minimum_len() { + None => return false, + Some(minlen) => minlen, + }; + if input.get_span().len() < minlen { + return true; + } + // Same idea as minimum, but for maximum. This is trickier. We can + // only apply the maximum when we know the entire span that we're + // searching *has* to match according to the regex (and possibly the + // input configuration). If we know there is too much for the regex + // to match, we can bail early. + // + // I don't think we can apply the maximum otherwise unfortunately. + if self.is_anchored_start(input) && self.is_always_anchored_end() { + let maxlen = match self.props_union().maximum_len() { + None => return false, + Some(maxlen) => maxlen, + }; + if input.get_span().len() > maxlen { + return true; + } + } + false + } +} + +/// An iterator over all non-overlapping matches. +/// +/// The iterator yields a [`Match`] value until no more matches could be found. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the `Regex` that produced this iterator. +/// * `'h` represents the lifetime of the haystack being searched. +/// +/// This iterator can be created with the [`Regex::find_iter`] method. +#[derive(Debug)] +pub struct FindMatches<'r, 'h> { + re: &'r Regex, + cache: CachePoolGuard<'r>, + it: iter::Searcher<'h>, +} + +impl<'r, 'h> FindMatches<'r, 'h> { + /// Returns the `Regex` value that created this iterator. + #[inline] + pub fn regex(&self) -> &'r Regex { + self.re + } + + /// Returns the current `Input` associated with this iterator. + /// + /// The `start` position on the given `Input` may change during iteration, + /// but all other values are guaranteed to remain invariant. + #[inline] + pub fn input<'s>(&'s self) -> &'s Input<'h> { + self.it.input() + } +} + +impl<'r, 'h> Iterator for FindMatches<'r, 'h> { + type Item = Match; + + #[inline] + fn next(&mut self) -> Option { + let FindMatches { re, ref mut cache, ref mut it } = *self; + it.advance(|input| Ok(re.search_with(cache, input))) + } + + #[inline] + fn count(self) -> usize { + // If all we care about is a count of matches, then we only need to + // find the end position of each match. This can give us a 2x perf + // boost in some cases, because it avoids needing to do a reverse scan + // to find the start of a match. + let FindMatches { re, mut cache, it } = self; + // This does the deref for PoolGuard once instead of every iter. + let cache = &mut *cache; + it.into_half_matches_iter( + |input| Ok(re.search_half_with(cache, input)), + ) + .count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for FindMatches<'r, 'h> {} + +/// An iterator over all non-overlapping leftmost matches with their capturing +/// groups. +/// +/// The iterator yields a [`Captures`] value until no more matches could be +/// found. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the `Regex` that produced this iterator. +/// * `'h` represents the lifetime of the haystack being searched. +/// +/// This iterator can be created with the [`Regex::captures_iter`] method. +#[derive(Debug)] +pub struct CapturesMatches<'r, 'h> { + re: &'r Regex, + cache: CachePoolGuard<'r>, + caps: Captures, + it: iter::Searcher<'h>, +} + +impl<'r, 'h> CapturesMatches<'r, 'h> { + /// Returns the `Regex` value that created this iterator. + #[inline] + pub fn regex(&self) -> &'r Regex { + self.re + } + + /// Returns the current `Input` associated with this iterator. + /// + /// The `start` position on the given `Input` may change during iteration, + /// but all other values are guaranteed to remain invariant. + #[inline] + pub fn input<'s>(&'s self) -> &'s Input<'h> { + self.it.input() + } +} + +impl<'r, 'h> Iterator for CapturesMatches<'r, 'h> { + type Item = Captures; + + #[inline] + fn next(&mut self) -> Option { + // Splitting 'self' apart seems necessary to appease borrowck. + let CapturesMatches { re, ref mut cache, ref mut caps, ref mut it } = + *self; + let _ = it.advance(|input| { + re.search_captures_with(cache, input, caps); + Ok(caps.get_match()) + }); + if caps.is_match() { + Some(caps.clone()) + } else { + None + } + } + + #[inline] + fn count(self) -> usize { + let CapturesMatches { re, mut cache, it, .. } = self; + // This does the deref for PoolGuard once instead of every iter. + let cache = &mut *cache; + it.into_half_matches_iter( + |input| Ok(re.search_half_with(cache, input)), + ) + .count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for CapturesMatches<'r, 'h> {} + +/// Yields all substrings delimited by a regular expression match. +/// +/// The spans correspond to the offsets between matches. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the `Regex` that produced this iterator. +/// * `'h` represents the lifetime of the haystack being searched. +/// +/// This iterator can be created with the [`Regex::split`] method. +#[derive(Debug)] +pub struct Split<'r, 'h> { + finder: FindMatches<'r, 'h>, + last: usize, +} + +impl<'r, 'h> Split<'r, 'h> { + /// Returns the current `Input` associated with this iterator. + /// + /// The `start` position on the given `Input` may change during iteration, + /// but all other values are guaranteed to remain invariant. + #[inline] + pub fn input<'s>(&'s self) -> &'s Input<'h> { + self.finder.input() + } +} + +impl<'r, 'h> Iterator for Split<'r, 'h> { + type Item = Span; + + fn next(&mut self) -> Option { + match self.finder.next() { + None => { + let len = self.finder.it.input().haystack().len(); + if self.last > len { + None + } else { + let span = Span::from(self.last..len); + self.last = len + 1; // Next call will return None + Some(span) + } + } + Some(m) => { + let span = Span::from(self.last..m.start()); + self.last = m.end(); + Some(span) + } + } + } +} + +impl<'r, 'h> core::iter::FusedIterator for Split<'r, 'h> {} + +/// Yields at most `N` spans delimited by a regular expression match. +/// +/// The spans correspond to the offsets between matches. The last span will be +/// whatever remains after splitting. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the `Regex` that produced this iterator. +/// * `'h` represents the lifetime of the haystack being searched. +/// +/// This iterator can be created with the [`Regex::splitn`] method. +#[derive(Debug)] +pub struct SplitN<'r, 'h> { + splits: Split<'r, 'h>, + limit: usize, +} + +impl<'r, 'h> SplitN<'r, 'h> { + /// Returns the current `Input` associated with this iterator. + /// + /// The `start` position on the given `Input` may change during iteration, + /// but all other values are guaranteed to remain invariant. + #[inline] + pub fn input<'s>(&'s self) -> &'s Input<'h> { + self.splits.input() + } +} + +impl<'r, 'h> Iterator for SplitN<'r, 'h> { + type Item = Span; + + fn next(&mut self) -> Option { + if self.limit == 0 { + return None; + } + + self.limit -= 1; + if self.limit > 0 { + return self.splits.next(); + } + + let len = self.splits.finder.it.input().haystack().len(); + if self.splits.last > len { + // We've already returned all substrings. + None + } else { + // self.n == 0, so future calls will return None immediately + Some(Span::from(self.splits.last..len)) + } + } + + fn size_hint(&self) -> (usize, Option) { + (0, Some(self.limit)) + } +} + +impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {} + +/// Represents mutable scratch space used by regex engines during a search. +/// +/// Most of the regex engines in this crate require some kind of +/// mutable state in order to execute a search. This mutable state is +/// explicitly separated from the the core regex object (such as a +/// [`thompson::NFA`](crate::nfa::thompson::NFA)) so that the read-only regex +/// object can be shared across multiple threads simultaneously without any +/// synchronization. Conversely, a `Cache` must either be duplicated if using +/// the same `Regex` from multiple threads, or else there must be some kind of +/// synchronization that guarantees exclusive access while it's in use by one +/// thread. +/// +/// A `Regex` attempts to do this synchronization for you by using a thread +/// pool internally. Its size scales roughly with the number of simultaneous +/// regex searches. +/// +/// For cases where one does not want to rely on a `Regex`'s internal thread +/// pool, lower level routines such as [`Regex::search_with`] are provided +/// that permit callers to pass a `Cache` into the search routine explicitly. +/// +/// General advice is that the thread pool is often more than good enough. +/// However, it may be possible to observe the effects of its latency, +/// especially when searching many small haystacks from many threads +/// simultaneously. +/// +/// Caches can be created from their corresponding `Regex` via +/// [`Regex::create_cache`]. A cache can only be used with either the `Regex` +/// that created it, or the `Regex` that was most recently used to reset it +/// with [`Cache::reset`]. Using a cache with any other `Regex` may result in +/// panics or incorrect results. +/// +/// # Example +/// +/// ``` +/// use regex_automata::{meta::Regex, Input, Match}; +/// +/// let re = Regex::new(r"(?-u)m\w+\s+m\w+")?; +/// let mut cache = re.create_cache(); +/// let input = Input::new("crazy janey and her mission man"); +/// assert_eq!( +/// Some(Match::must(0, 20..31)), +/// re.search_with(&mut cache, &input), +/// ); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Debug, Clone)] +pub struct Cache { + pub(crate) capmatches: Captures, + pub(crate) pikevm: wrappers::PikeVMCache, + pub(crate) backtrack: wrappers::BoundedBacktrackerCache, + pub(crate) onepass: wrappers::OnePassCache, + pub(crate) hybrid: wrappers::HybridCache, + pub(crate) revhybrid: wrappers::ReverseHybridCache, +} + +impl Cache { + /// Creates a new `Cache` for use with this regex. + /// + /// The cache returned should only be used for searches for the given + /// `Regex`. If you want to reuse the cache for another `Regex`, then you + /// must call [`Cache::reset`] with that `Regex`. + pub fn new(re: &Regex) -> Cache { + re.create_cache() + } + + /// Reset this cache such that it can be used for searching with the given + /// `Regex` (and only that `Regex`). + /// + /// A cache reset permits potentially reusing memory already allocated in + /// this cache with a different `Regex`. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different `Regex`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, Match, Input}; + /// + /// let re1 = Regex::new(r"\w")?; + /// let re2 = Regex::new(r"\W")?; + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 0..2)), + /// re1.search_with(&mut cache, &Input::new("Δ")), + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the Regex we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// cache.reset(&re2); + /// assert_eq!( + /// Some(Match::must(0, 0..3)), + /// re2.search_with(&mut cache, &Input::new("☃")), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset(&mut self, re: &Regex) { + re.imp.strat.reset_cache(self) + } + + /// Returns the heap memory usage, in bytes, of this cache. + /// + /// This does **not** include the stack size used up by this cache. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + let mut bytes = 0; + bytes += self.pikevm.memory_usage(); + bytes += self.backtrack.memory_usage(); + bytes += self.onepass.memory_usage(); + bytes += self.hybrid.memory_usage(); + bytes += self.revhybrid.memory_usage(); + bytes + } +} + +/// An object describing the configuration of a `Regex`. +/// +/// This configuration only includes options for the +/// non-syntax behavior of a `Regex`, and can be applied via the +/// [`Builder::configure`] method. For configuring the syntax options, see +/// [`util::syntax::Config`](crate::util::syntax::Config). +/// +/// # Example: lower the NFA size limit +/// +/// In some cases, the default size limit might be too big. The size limit can +/// be lowered, which will prevent large regex patterns from compiling. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::meta::Regex; +/// +/// let result = Regex::builder() +/// .configure(Regex::config().nfa_size_limit(Some(20 * (1<<10)))) +/// // Not even 20KB is enough to build a single large Unicode class! +/// .build(r"\pL"); +/// assert!(result.is_err()); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug, Default)] +pub struct Config { + // As with other configuration types in this crate, we put all our knobs + // in options so that we can distinguish between "default" and "not set." + // This makes it possible to easily combine multiple configurations + // without default values overwriting explicitly specified values. See the + // 'overwrite' method. + // + // For docs on the fields below, see the corresponding method setters. + match_kind: Option, + utf8_empty: Option, + autopre: Option, + pre: Option>, + which_captures: Option, + nfa_size_limit: Option>, + onepass_size_limit: Option>, + hybrid_cache_capacity: Option, + hybrid: Option, + dfa: Option, + dfa_size_limit: Option>, + dfa_state_limit: Option>, + onepass: Option, + backtrack: Option, + byte_classes: Option, + line_terminator: Option, +} + +impl Config { + /// Create a new configuration object for a `Regex`. + pub fn new() -> Config { + Config::default() + } + + /// Set the match semantics for a `Regex`. + /// + /// The default value is [`MatchKind::LeftmostFirst`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Match, MatchKind}; + /// + /// // By default, leftmost-first semantics are used, which + /// // disambiguates matches at the same position by selecting + /// // the one that corresponds earlier in the pattern. + /// let re = Regex::new("sam|samwise")?; + /// assert_eq!(Some(Match::must(0, 0..3)), re.find("samwise")); + /// + /// // But with 'all' semantics, match priority is ignored + /// // and all match states are included. When coupled with + /// // a leftmost search, the search will report the last + /// // possible match. + /// let re = Regex::builder() + /// .configure(Regex::config().match_kind(MatchKind::All)) + /// .build("sam|samwise")?; + /// assert_eq!(Some(Match::must(0, 0..7)), re.find("samwise")); + /// // Beware that this can lead to skipping matches! + /// // Usually 'all' is used for anchored reverse searches + /// // only, or for overlapping searches. + /// assert_eq!(Some(Match::must(0, 4..11)), re.find("sam samwise")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn match_kind(self, kind: MatchKind) -> Config { + Config { match_kind: Some(kind), ..self } + } + + /// Toggles whether empty matches are permitted to occur between the code + /// units of a UTF-8 encoded codepoint. + /// + /// This should generally be enabled when search a `&str` or anything that + /// you otherwise know is valid UTF-8. It should be disabled in all other + /// cases. Namely, if the haystack is not valid UTF-8 and this is enabled, + /// then behavior is unspecified. + /// + /// By default, this is enabled. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::new("")?; + /// let got: Vec = re.find_iter("☃").collect(); + /// // Matches only occur at the beginning and end of the snowman. + /// assert_eq!(got, vec![ + /// Match::must(0, 0..0), + /// Match::must(0, 3..3), + /// ]); + /// + /// let re = Regex::builder() + /// .configure(Regex::config().utf8_empty(false)) + /// .build("")?; + /// let got: Vec = re.find_iter("☃").collect(); + /// // Matches now occur at every position! + /// assert_eq!(got, vec![ + /// Match::must(0, 0..0), + /// Match::must(0, 1..1), + /// Match::must(0, 2..2), + /// Match::must(0, 3..3), + /// ]); + /// + /// Ok::<(), Box>(()) + /// ``` + pub fn utf8_empty(self, yes: bool) -> Config { + Config { utf8_empty: Some(yes), ..self } + } + + /// Toggles whether automatic prefilter support is enabled. + /// + /// If this is disabled and [`Config::prefilter`] is not set, then the + /// meta regex engine will not use any prefilters. This can sometimes + /// be beneficial in cases where you know (or have measured) that the + /// prefilter leads to overall worse search performance. + /// + /// By default, this is enabled. + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::builder() + /// .configure(Regex::config().auto_prefilter(false)) + /// .build(r"Bruce \w+")?; + /// let hay = "Hello Bruce Springsteen!"; + /// assert_eq!(Some(Match::must(0, 6..23)), re.find(hay)); + /// + /// Ok::<(), Box>(()) + /// ``` + pub fn auto_prefilter(self, yes: bool) -> Config { + Config { autopre: Some(yes), ..self } + } + + /// Overrides and sets the prefilter to use inside a `Regex`. + /// + /// This permits one to forcefully set a prefilter in cases where the + /// caller knows better than whatever the automatic prefilter logic is + /// capable of. + /// + /// By default, this is set to `None` and an automatic prefilter will be + /// used if one could be built. (Assuming [`Config::auto_prefilter`] is + /// enabled, which it is by default.) + /// + /// # Example + /// + /// This example shows how to set your own prefilter. In the case of a + /// pattern like `Bruce \w+`, the automatic prefilter is likely to be + /// constructed in a way that it will look for occurrences of `Bruce `. + /// In most cases, this is the best choice. But in some cases, it may be + /// the case that running `memchr` on `B` is the best choice. One can + /// achieve that behavior by overriding the automatic prefilter logic + /// and providing a prefilter that just matches `B`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// meta::Regex, + /// util::prefilter::Prefilter, + /// Match, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["B"]) + /// .expect("a prefilter"); + /// let re = Regex::builder() + /// .configure(Regex::config().prefilter(Some(pre))) + /// .build(r"Bruce \w+")?; + /// let hay = "Hello Bruce Springsteen!"; + /// assert_eq!(Some(Match::must(0, 6..23)), re.find(hay)); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: incorrect prefilters can lead to incorrect results! + /// + /// Be warned that setting an incorrect prefilter can lead to missed + /// matches. So if you use this option, ensure your prefilter can _never_ + /// report false negatives. (A false positive is, on the other hand, quite + /// okay and generally unavoidable.) + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// meta::Regex, + /// util::prefilter::Prefilter, + /// Match, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["Z"]) + /// .expect("a prefilter"); + /// let re = Regex::builder() + /// .configure(Regex::config().prefilter(Some(pre))) + /// .build(r"Bruce \w+")?; + /// let hay = "Hello Bruce Springsteen!"; + /// // Oops! No match found, but there should be one! + /// assert_eq!(None, re.find(hay)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn prefilter(self, pre: Option) -> Config { + Config { pre: Some(pre), ..self } + } + + /// Configures what kinds of groups are compiled as "capturing" in the + /// underlying regex engine. + /// + /// This is set to [`WhichCaptures::All`] by default. Callers may wish to + /// use [`WhichCaptures::Implicit`] in cases where one wants avoid the + /// overhead of capture states for explicit groups. + /// + /// Note that another approach to avoiding the overhead of capture groups + /// is by using non-capturing groups in the regex pattern. That is, + /// `(?:a)` instead of `(a)`. This option is useful when you can't control + /// the concrete syntax but know that you don't need the underlying capture + /// states. For example, using `WhichCaptures::Implicit` will behave as if + /// all explicit capturing groups in the pattern were non-capturing. + /// + /// Setting this to `WhichCaptures::None` is usually not the right thing to + /// do. When no capture states are compiled, some regex engines (such as + /// the `PikeVM`) won't be able to report match offsets. This will manifest + /// as no match being found. + /// + /// # Example + /// + /// This example demonstrates how the results of capture groups can change + /// based on this option. First we show the default (all capture groups in + /// the pattern are capturing): + /// + /// ``` + /// use regex_automata::{meta::Regex, Match, Span}; + /// + /// let re = Regex::new(r"foo([0-9]+)bar")?; + /// let hay = "foo123bar"; + /// + /// let mut caps = re.create_captures(); + /// re.captures(hay, &mut caps); + /// assert_eq!(Some(Span::from(0..9)), caps.get_group(0)); + /// assert_eq!(Some(Span::from(3..6)), caps.get_group(1)); + /// + /// Ok::<(), Box>(()) + /// ``` + /// + /// And now we show the behavior when we only include implicit capture + /// groups. In this case, we can only find the overall match span, but the + /// spans of any other explicit group don't exist because they are treated + /// as non-capturing. (In effect, when `WhichCaptures::Implicit` is used, + /// there is no real point in using [`Regex::captures`] since it will never + /// be able to report more information than [`Regex::find`].) + /// + /// ``` + /// use regex_automata::{ + /// meta::Regex, + /// nfa::thompson::WhichCaptures, + /// Match, + /// Span, + /// }; + /// + /// let re = Regex::builder() + /// .configure(Regex::config().which_captures(WhichCaptures::Implicit)) + /// .build(r"foo([0-9]+)bar")?; + /// let hay = "foo123bar"; + /// + /// let mut caps = re.create_captures(); + /// re.captures(hay, &mut caps); + /// assert_eq!(Some(Span::from(0..9)), caps.get_group(0)); + /// assert_eq!(None, caps.get_group(1)); + /// + /// Ok::<(), Box>(()) + /// ``` + pub fn which_captures(mut self, which_captures: WhichCaptures) -> Config { + self.which_captures = Some(which_captures); + self + } + + /// Sets the size limit, in bytes, to enforce on the construction of every + /// NFA build by the meta regex engine. + /// + /// Setting it to `None` disables the limit. This is not recommended if + /// you're compiling untrusted patterns. + /// + /// Note that this limit is applied to _each_ NFA built, and if any of + /// them exceed the limit, then construction will fail. This limit does + /// _not_ correspond to the total memory used by all NFAs in the meta regex + /// engine. + /// + /// This defaults to some reasonable number that permits most reasonable + /// patterns. + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::meta::Regex; + /// + /// let result = Regex::builder() + /// .configure(Regex::config().nfa_size_limit(Some(20 * (1<<10)))) + /// // Not even 20KB is enough to build a single large Unicode class! + /// .build(r"\pL"); + /// assert!(result.is_err()); + /// + /// // But notice that building such a regex with the exact same limit + /// // can succeed depending on other aspects of the configuration. For + /// // example, a single *forward* NFA will (at time of writing) fit into + /// // the 20KB limit, but a *reverse* NFA of the same pattern will not. + /// // So if one configures a meta regex such that a reverse NFA is never + /// // needed and thus never built, then the 20KB limit will be enough for + /// // a pattern like \pL! + /// let result = Regex::builder() + /// .configure(Regex::config() + /// .nfa_size_limit(Some(20 * (1<<10))) + /// // The DFAs are the only thing that (currently) need a reverse + /// // NFA. So if both are disabled, the meta regex engine will + /// // skip building the reverse NFA. Note that this isn't an API + /// // guarantee. A future semver compatible version may introduce + /// // new use cases for a reverse NFA. + /// .hybrid(false) + /// .dfa(false) + /// ) + /// // Not even 20KB is enough to build a single large Unicode class! + /// .build(r"\pL"); + /// assert!(result.is_ok()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn nfa_size_limit(self, limit: Option) -> Config { + Config { nfa_size_limit: Some(limit), ..self } + } + + /// Sets the size limit, in bytes, for the one-pass DFA. + /// + /// Setting it to `None` disables the limit. Disabling the limit is + /// strongly discouraged when compiling untrusted patterns. Even if the + /// patterns are trusted, it still may not be a good idea, since a one-pass + /// DFA can use a lot of memory. With that said, as the size of a regex + /// increases, the likelihood of it being one-pass likely decreases. + /// + /// This defaults to some reasonable number that permits most reasonable + /// one-pass patterns. + /// + /// # Example + /// + /// This shows how to set the one-pass DFA size limit. Note that since + /// a one-pass DFA is an optional component of the meta regex engine, + /// this size limit only impacts what is built internally and will never + /// determine whether a `Regex` itself fails to build. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::meta::Regex; + /// + /// let result = Regex::builder() + /// .configure(Regex::config().onepass_size_limit(Some(2 * (1<<20)))) + /// .build(r"\pL{5}"); + /// assert!(result.is_ok()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn onepass_size_limit(self, limit: Option) -> Config { + Config { onepass_size_limit: Some(limit), ..self } + } + + /// Set the cache capacity, in bytes, for the lazy DFA. + /// + /// The cache capacity of the lazy DFA determines approximately how much + /// heap memory it is allowed to use to store its state transitions. The + /// state transitions are computed at search time, and if the cache fills + /// up it, it is cleared. At this point, any previously generated state + /// transitions are lost and are re-generated if they're needed again. + /// + /// This sort of cache filling and clearing works quite well _so long as + /// cache clearing happens infrequently_. If it happens too often, then the + /// meta regex engine will stop using the lazy DFA and switch over to a + /// different regex engine. + /// + /// In cases where the cache is cleared too often, it may be possible to + /// give the cache more space and reduce (or eliminate) how often it is + /// cleared. Similarly, sometimes a regex is so big that the lazy DFA isn't + /// used at all if its cache capacity isn't big enough. + /// + /// The capacity set here is a _limit_ on how much memory is used. The + /// actual memory used is only allocated as it's needed. + /// + /// Determining the right value for this is a little tricky and will likely + /// required some profiling. Enabling the `logging` feature and setting the + /// log level to `trace` will also tell you how often the cache is being + /// cleared. + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::meta::Regex; + /// + /// let result = Regex::builder() + /// .configure(Regex::config().hybrid_cache_capacity(20 * (1<<20))) + /// .build(r"\pL{5}"); + /// assert!(result.is_ok()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn hybrid_cache_capacity(self, limit: usize) -> Config { + Config { hybrid_cache_capacity: Some(limit), ..self } + } + + /// Sets the size limit, in bytes, for heap memory used for a fully + /// compiled DFA. + /// + /// **NOTE:** If you increase this, you'll likely also need to increase + /// [`Config::dfa_state_limit`]. + /// + /// In contrast to the lazy DFA, building a full DFA requires computing + /// all of its state transitions up front. This can be a very expensive + /// process, and runs in worst case `2^n` time and space (where `n` is + /// proportional to the size of the regex). However, a full DFA unlocks + /// some additional optimization opportunities. + /// + /// Because full DFAs can be so expensive, the default limits for them are + /// incredibly small. Generally speaking, if your regex is moderately big + /// or if you're using Unicode features (`\w` is Unicode-aware by default + /// for example), then you can expect that the meta regex engine won't even + /// attempt to build a DFA for it. + /// + /// If this and [`Config::dfa_state_limit`] are set to `None`, then the + /// meta regex will not use any sort of limits when deciding whether to + /// build a DFA. This in turn makes construction of a `Regex` take + /// worst case exponential time and space. Even short patterns can result + /// in huge space blow ups. So it is strongly recommended to keep some kind + /// of limit set! + /// + /// The default is set to a small number that permits some simple regexes + /// to get compiled into DFAs in reasonable time. + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::meta::Regex; + /// + /// let result = Regex::builder() + /// // 100MB is much bigger than the default. + /// .configure(Regex::config() + /// .dfa_size_limit(Some(100 * (1<<20))) + /// // We don't care about size too much here, so just + /// // remove the NFA state limit altogether. + /// .dfa_state_limit(None)) + /// .build(r"\pL{5}"); + /// assert!(result.is_ok()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn dfa_size_limit(self, limit: Option) -> Config { + Config { dfa_size_limit: Some(limit), ..self } + } + + /// Sets a limit on the total number of NFA states, beyond which, a full + /// DFA is not attempted to be compiled. + /// + /// This limit works in concert with [`Config::dfa_size_limit`]. Namely, + /// where as `Config::dfa_size_limit` is applied by attempting to construct + /// a DFA, this limit is used to avoid the attempt in the first place. This + /// is useful to avoid hefty initialization costs associated with building + /// a DFA for cases where it is obvious the DFA will ultimately be too big. + /// + /// By default, this is set to a very small number. + /// + /// # Example + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::meta::Regex; + /// + /// let result = Regex::builder() + /// .configure(Regex::config() + /// // Sometimes the default state limit rejects DFAs even + /// // if they would fit in the size limit. Here, we disable + /// // the check on the number of NFA states and just rely on + /// // the size limit. + /// .dfa_state_limit(None)) + /// .build(r"(?-u)\w{30}"); + /// assert!(result.is_ok()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn dfa_state_limit(self, limit: Option) -> Config { + Config { dfa_state_limit: Some(limit), ..self } + } + + /// Whether to attempt to shrink the size of the alphabet for the regex + /// pattern or not. When enabled, the alphabet is shrunk into a set of + /// equivalence classes, where every byte in the same equivalence class + /// cannot discriminate between a match or non-match. + /// + /// **WARNING:** This is only useful for debugging DFAs. Disabling this + /// does not yield any speed advantages. Indeed, disabling it can result + /// in much higher memory usage. Disabling byte classes is useful for + /// debugging the actual generated transitions because it lets one see the + /// transitions defined on actual bytes instead of the equivalence classes. + /// + /// This option is enabled by default and should never be disabled unless + /// one is debugging the meta regex engine's internals. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::builder() + /// .configure(Regex::config().byte_classes(false)) + /// .build(r"[a-z]+")?; + /// let hay = "!!quux!!"; + /// assert_eq!(Some(Match::must(0, 2..6)), re.find(hay)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn byte_classes(self, yes: bool) -> Config { + Config { byte_classes: Some(yes), ..self } + } + + /// Set the line terminator to be used by the `^` and `$` anchors in + /// multi-line mode. + /// + /// This option has no effect when CRLF mode is enabled. That is, + /// regardless of this setting, `(?Rm:^)` and `(?Rm:$)` will always treat + /// `\r` and `\n` as line terminators (and will never match between a `\r` + /// and a `\n`). + /// + /// By default, `\n` is the line terminator. + /// + /// **Warning**: This does not change the behavior of `.`. To do that, + /// you'll need to configure the syntax option + /// [`syntax::Config::line_terminator`](crate::util::syntax::Config::line_terminator) + /// in addition to this. Otherwise, `.` will continue to match any + /// character other than `\n`. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{meta::Regex, util::syntax, Match}; + /// + /// let re = Regex::builder() + /// .syntax(syntax::Config::new().multi_line(true)) + /// .configure(Regex::config().line_terminator(b'\x00')) + /// .build(r"^foo$")?; + /// let hay = "\x00foo\x00"; + /// assert_eq!(Some(Match::must(0, 1..4)), re.find(hay)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn line_terminator(self, byte: u8) -> Config { + Config { line_terminator: Some(byte), ..self } + } + + /// Toggle whether the hybrid NFA/DFA (also known as the "lazy DFA") should + /// be available for use by the meta regex engine. + /// + /// Enabling this does not necessarily mean that the lazy DFA will + /// definitely be used. It just means that it will be _available_ for use + /// if the meta regex engine thinks it will be useful. + /// + /// When the `hybrid` crate feature is enabled, then this is enabled by + /// default. Otherwise, if the crate feature is disabled, then this is + /// always disabled, regardless of its setting by the caller. + pub fn hybrid(self, yes: bool) -> Config { + Config { hybrid: Some(yes), ..self } + } + + /// Toggle whether a fully compiled DFA should be available for use by the + /// meta regex engine. + /// + /// Enabling this does not necessarily mean that a DFA will definitely be + /// used. It just means that it will be _available_ for use if the meta + /// regex engine thinks it will be useful. + /// + /// When the `dfa-build` crate feature is enabled, then this is enabled by + /// default. Otherwise, if the crate feature is disabled, then this is + /// always disabled, regardless of its setting by the caller. + pub fn dfa(self, yes: bool) -> Config { + Config { dfa: Some(yes), ..self } + } + + /// Toggle whether a one-pass DFA should be available for use by the meta + /// regex engine. + /// + /// Enabling this does not necessarily mean that a one-pass DFA will + /// definitely be used. It just means that it will be _available_ for + /// use if the meta regex engine thinks it will be useful. (Indeed, a + /// one-pass DFA can only be used when the regex is one-pass. See the + /// [`dfa::onepass`](crate::dfa::onepass) module for more details.) + /// + /// When the `dfa-onepass` crate feature is enabled, then this is enabled + /// by default. Otherwise, if the crate feature is disabled, then this is + /// always disabled, regardless of its setting by the caller. + pub fn onepass(self, yes: bool) -> Config { + Config { onepass: Some(yes), ..self } + } + + /// Toggle whether a bounded backtracking regex engine should be available + /// for use by the meta regex engine. + /// + /// Enabling this does not necessarily mean that a bounded backtracker will + /// definitely be used. It just means that it will be _available_ for use + /// if the meta regex engine thinks it will be useful. + /// + /// When the `nfa-backtrack` crate feature is enabled, then this is enabled + /// by default. Otherwise, if the crate feature is disabled, then this is + /// always disabled, regardless of its setting by the caller. + pub fn backtrack(self, yes: bool) -> Config { + Config { backtrack: Some(yes), ..self } + } + + /// Returns the match kind on this configuration, as set by + /// [`Config::match_kind`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_match_kind(&self) -> MatchKind { + self.match_kind.unwrap_or(MatchKind::LeftmostFirst) + } + + /// Returns whether empty matches must fall on valid UTF-8 boundaries, as + /// set by [`Config::utf8_empty`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_utf8_empty(&self) -> bool { + self.utf8_empty.unwrap_or(true) + } + + /// Returns whether automatic prefilters are enabled, as set by + /// [`Config::auto_prefilter`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_auto_prefilter(&self) -> bool { + self.autopre.unwrap_or(true) + } + + /// Returns a manually set prefilter, if one was set by + /// [`Config::prefilter`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_prefilter(&self) -> Option<&Prefilter> { + self.pre.as_ref().unwrap_or(&None).as_ref() + } + + /// Returns the capture configuration, as set by + /// [`Config::which_captures`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_which_captures(&self) -> WhichCaptures { + self.which_captures.unwrap_or(WhichCaptures::All) + } + + /// Returns NFA size limit, as set by [`Config::nfa_size_limit`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_nfa_size_limit(&self) -> Option { + self.nfa_size_limit.unwrap_or(Some(10 * (1 << 20))) + } + + /// Returns one-pass DFA size limit, as set by + /// [`Config::onepass_size_limit`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_onepass_size_limit(&self) -> Option { + self.onepass_size_limit.unwrap_or(Some(1 * (1 << 20))) + } + + /// Returns hybrid NFA/DFA cache capacity, as set by + /// [`Config::hybrid_cache_capacity`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_hybrid_cache_capacity(&self) -> usize { + self.hybrid_cache_capacity.unwrap_or(2 * (1 << 20)) + } + + /// Returns DFA size limit, as set by [`Config::dfa_size_limit`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_dfa_size_limit(&self) -> Option { + // The default for this is VERY small because building a full DFA is + // ridiculously costly. But for regexes that are very small, it can be + // beneficial to use a full DFA. In particular, a full DFA can enable + // additional optimizations via something called "accelerated" states. + // Namely, when there's a state with only a few outgoing transitions, + // we can temporary suspend walking the transition table and use memchr + // for just those outgoing transitions to skip ahead very quickly. + // + // Generally speaking, if Unicode is enabled in your regex and you're + // using some kind of Unicode feature, then it's going to blow this + // size limit. Moreover, Unicode tends to defeat the "accelerated" + // state optimization too, so it's a double whammy. + // + // We also use a limit on the number of NFA states to avoid even + // starting the DFA construction process. Namely, DFA construction + // itself could make lots of initial allocs proportional to the size + // of the NFA, and if the NFA is large, it doesn't make sense to pay + // that cost if we know it's likely to be blown by a large margin. + self.dfa_size_limit.unwrap_or(Some(40 * (1 << 10))) + } + + /// Returns DFA size limit in terms of the number of states in the NFA, as + /// set by [`Config::dfa_state_limit`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_dfa_state_limit(&self) -> Option { + // Again, as with the size limit, we keep this very small. + self.dfa_state_limit.unwrap_or(Some(30)) + } + + /// Returns whether byte classes are enabled, as set by + /// [`Config::byte_classes`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_byte_classes(&self) -> bool { + self.byte_classes.unwrap_or(true) + } + + /// Returns the line terminator for this configuration, as set by + /// [`Config::line_terminator`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_line_terminator(&self) -> u8 { + self.line_terminator.unwrap_or(b'\n') + } + + /// Returns whether the hybrid NFA/DFA regex engine may be used, as set by + /// [`Config::hybrid`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_hybrid(&self) -> bool { + #[cfg(feature = "hybrid")] + { + self.hybrid.unwrap_or(true) + } + #[cfg(not(feature = "hybrid"))] + { + false + } + } + + /// Returns whether the DFA regex engine may be used, as set by + /// [`Config::dfa`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_dfa(&self) -> bool { + #[cfg(feature = "dfa-build")] + { + self.dfa.unwrap_or(true) + } + #[cfg(not(feature = "dfa-build"))] + { + false + } + } + + /// Returns whether the one-pass DFA regex engine may be used, as set by + /// [`Config::onepass`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_onepass(&self) -> bool { + #[cfg(feature = "dfa-onepass")] + { + self.onepass.unwrap_or(true) + } + #[cfg(not(feature = "dfa-onepass"))] + { + false + } + } + + /// Returns whether the bounded backtracking regex engine may be used, as + /// set by [`Config::backtrack`]. + /// + /// If it was not explicitly set, then a default value is returned. + pub fn get_backtrack(&self) -> bool { + #[cfg(feature = "nfa-backtrack")] + { + self.backtrack.unwrap_or(true) + } + #[cfg(not(feature = "nfa-backtrack"))] + { + false + } + } + + /// Overwrite the default configuration such that the options in `o` are + /// always used. If an option in `o` is not set, then the corresponding + /// option in `self` is used. If it's not set in `self` either, then it + /// remains not set. + pub(crate) fn overwrite(&self, o: Config) -> Config { + Config { + match_kind: o.match_kind.or(self.match_kind), + utf8_empty: o.utf8_empty.or(self.utf8_empty), + autopre: o.autopre.or(self.autopre), + pre: o.pre.or_else(|| self.pre.clone()), + which_captures: o.which_captures.or(self.which_captures), + nfa_size_limit: o.nfa_size_limit.or(self.nfa_size_limit), + onepass_size_limit: o + .onepass_size_limit + .or(self.onepass_size_limit), + hybrid_cache_capacity: o + .hybrid_cache_capacity + .or(self.hybrid_cache_capacity), + hybrid: o.hybrid.or(self.hybrid), + dfa: o.dfa.or(self.dfa), + dfa_size_limit: o.dfa_size_limit.or(self.dfa_size_limit), + dfa_state_limit: o.dfa_state_limit.or(self.dfa_state_limit), + onepass: o.onepass.or(self.onepass), + backtrack: o.backtrack.or(self.backtrack), + byte_classes: o.byte_classes.or(self.byte_classes), + line_terminator: o.line_terminator.or(self.line_terminator), + } + } +} + +/// A builder for configuring and constructing a `Regex`. +/// +/// The builder permits configuring two different aspects of a `Regex`: +/// +/// * [`Builder::configure`] will set high-level configuration options as +/// described by a [`Config`]. +/// * [`Builder::syntax`] will set the syntax level configuration options +/// as described by a [`util::syntax::Config`](crate::util::syntax::Config). +/// This only applies when building a `Regex` from pattern strings. +/// +/// Once configured, the builder can then be used to construct a `Regex` from +/// one of 4 different inputs: +/// +/// * [`Builder::build`] creates a regex from a single pattern string. +/// * [`Builder::build_many`] creates a regex from many pattern strings. +/// * [`Builder::build_from_hir`] creates a regex from a +/// [`regex-syntax::Hir`](Hir) expression. +/// * [`Builder::build_many_from_hir`] creates a regex from many +/// [`regex-syntax::Hir`](Hir) expressions. +/// +/// The latter two methods in particular provide a way to construct a fully +/// feature regular expression matcher directly from an `Hir` expression +/// without having to first convert it to a string. (This is in contrast to the +/// top-level `regex` crate which intentionally provides no such API in order +/// to avoid making `regex-syntax` a public dependency.) +/// +/// As a convenience, this builder may be created via [`Regex::builder`], which +/// may help avoid an extra import. +/// +/// # Example: change the line terminator +/// +/// This example shows how to enable multi-line mode by default and change the +/// line terminator to the NUL byte: +/// +/// ``` +/// use regex_automata::{meta::Regex, util::syntax, Match}; +/// +/// let re = Regex::builder() +/// .syntax(syntax::Config::new().multi_line(true)) +/// .configure(Regex::config().line_terminator(b'\x00')) +/// .build(r"^foo$")?; +/// let hay = "\x00foo\x00"; +/// assert_eq!(Some(Match::must(0, 1..4)), re.find(hay)); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: disable UTF-8 requirement +/// +/// By default, regex patterns are required to match UTF-8. This includes +/// regex patterns that can produce matches of length zero. In the case of an +/// empty match, by default, matches will not appear between the code units of +/// a UTF-8 encoded codepoint. +/// +/// However, it can be useful to disable this requirement, particularly if +/// you're searching things like `&[u8]` that are not known to be valid UTF-8. +/// +/// ``` +/// use regex_automata::{meta::Regex, util::syntax, Match}; +/// +/// let mut builder = Regex::builder(); +/// // Disables the requirement that non-empty matches match UTF-8. +/// builder.syntax(syntax::Config::new().utf8(false)); +/// // Disables the requirement that empty matches match UTF-8 boundaries. +/// builder.configure(Regex::config().utf8_empty(false)); +/// +/// // We can match raw bytes via \xZZ syntax, but we need to disable +/// // Unicode mode to do that. We could disable it everywhere, or just +/// // selectively, as shown here. +/// let re = builder.build(r"(?-u:\xFF)foo(?-u:\xFF)")?; +/// let hay = b"\xFFfoo\xFF"; +/// assert_eq!(Some(Match::must(0, 0..5)), re.find(hay)); +/// +/// // We can also match between code units. +/// let re = builder.build(r"")?; +/// let hay = "☃"; +/// assert_eq!(re.find_iter(hay).collect::>(), vec![ +/// Match::must(0, 0..0), +/// Match::must(0, 1..1), +/// Match::must(0, 2..2), +/// Match::must(0, 3..3), +/// ]); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Builder { + config: Config, + ast: ast::parse::ParserBuilder, + hir: hir::translate::TranslatorBuilder, +} + +impl Builder { + /// Creates a new builder for configuring and constructing a [`Regex`]. + pub fn new() -> Builder { + Builder { + config: Config::default(), + ast: ast::parse::ParserBuilder::new(), + hir: hir::translate::TranslatorBuilder::new(), + } + } + + /// Builds a `Regex` from a single pattern string. + /// + /// If there was a problem parsing the pattern or a problem turning it into + /// a regex matcher, then an error is returned. + /// + /// # Example + /// + /// This example shows how to configure syntax options. + /// + /// ``` + /// use regex_automata::{meta::Regex, util::syntax, Match}; + /// + /// let re = Regex::builder() + /// .syntax(syntax::Config::new().crlf(true).multi_line(true)) + /// .build(r"^foo$")?; + /// let hay = "\r\nfoo\r\n"; + /// assert_eq!(Some(Match::must(0, 2..5)), re.find(hay)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Builds a `Regex` from many pattern strings. + /// + /// If there was a problem parsing any of the patterns or a problem turning + /// them into a regex matcher, then an error is returned. + /// + /// # Example: finding the pattern that caused an error + /// + /// When a syntax error occurs, it is possible to ask which pattern + /// caused the syntax error. + /// + /// ``` + /// use regex_automata::{meta::Regex, PatternID}; + /// + /// let err = Regex::builder() + /// .build_many(&["a", "b", r"\p{Foo}", "c"]) + /// .unwrap_err(); + /// assert_eq!(Some(PatternID::must(2)), err.pattern()); + /// ``` + /// + /// # Example: zero patterns is valid + /// + /// Building a regex with zero patterns results in a regex that never + /// matches anything. Because this routine is generic, passing an empty + /// slice usually requires a turbo-fish (or something else to help type + /// inference). + /// + /// ``` + /// use regex_automata::{meta::Regex, util::syntax, Match}; + /// + /// let re = Regex::builder() + /// .build_many::<&str>(&[])?; + /// assert_eq!(None, re.find("")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + use crate::util::primitives::IteratorIndexExt; + log! { + debug!("building meta regex with {} patterns:", patterns.len()); + for (pid, p) in patterns.iter().with_pattern_ids() { + let p = p.as_ref(); + // We might split a grapheme with this truncation logic, but + // that's fine. We at least avoid splitting a codepoint. + let maxoff = p + .char_indices() + .map(|(i, ch)| i + ch.len_utf8()) + .take(1000) + .last() + .unwrap_or(0); + if maxoff < p.len() { + debug!("{:?}: {}[... snip ...]", pid, &p[..maxoff]); + } else { + debug!("{:?}: {}", pid, p); + } + } + } + let (mut asts, mut hirs) = (vec![], vec![]); + for (pid, p) in patterns.iter().with_pattern_ids() { + let ast = self + .ast + .build() + .parse(p.as_ref()) + .map_err(|err| BuildError::ast(pid, err))?; + asts.push(ast); + } + for ((pid, p), ast) in + patterns.iter().with_pattern_ids().zip(asts.iter()) + { + let hir = self + .hir + .build() + .translate(p.as_ref(), ast) + .map_err(|err| BuildError::hir(pid, err))?; + hirs.push(hir); + } + self.build_many_from_hir(&hirs) + } + + /// Builds a `Regex` directly from an `Hir` expression. + /// + /// This is useful if you needed to parse a pattern string into an `Hir` + /// for other reasons (such as analysis or transformations). This routine + /// permits building a `Regex` directly from the `Hir` expression instead + /// of first converting the `Hir` back to a pattern string. + /// + /// When using this method, any options set via [`Builder::syntax`] are + /// ignored. Namely, the syntax options only apply when parsing a pattern + /// string, which isn't relevant here. + /// + /// If there was a problem building the underlying regex matcher for the + /// given `Hir`, then an error is returned. + /// + /// # Example + /// + /// This example shows how one can hand-construct an `Hir` expression and + /// build a regex from it without doing any parsing at all. + /// + /// ``` + /// use { + /// regex_automata::{meta::Regex, Match}, + /// regex_syntax::hir::{Hir, Look}, + /// }; + /// + /// // (?Rm)^foo$ + /// let hir = Hir::concat(vec![ + /// Hir::look(Look::StartCRLF), + /// Hir::literal("foo".as_bytes()), + /// Hir::look(Look::EndCRLF), + /// ]); + /// let re = Regex::builder() + /// .build_from_hir(&hir)?; + /// let hay = "\r\nfoo\r\n"; + /// assert_eq!(Some(Match::must(0, 2..5)), re.find(hay)); + /// + /// Ok::<(), Box>(()) + /// ``` + pub fn build_from_hir(&self, hir: &Hir) -> Result { + self.build_many_from_hir(&[hir]) + } + + /// Builds a `Regex` directly from many `Hir` expressions. + /// + /// This is useful if you needed to parse pattern strings into `Hir` + /// expressions for other reasons (such as analysis or transformations). + /// This routine permits building a `Regex` directly from the `Hir` + /// expressions instead of first converting the `Hir` expressions back to + /// pattern strings. + /// + /// When using this method, any options set via [`Builder::syntax`] are + /// ignored. Namely, the syntax options only apply when parsing a pattern + /// string, which isn't relevant here. + /// + /// If there was a problem building the underlying regex matcher for the + /// given `Hir` expressions, then an error is returned. + /// + /// Note that unlike [`Builder::build_many`], this can only fail as a + /// result of building the underlying matcher. In that case, there is + /// no single `Hir` expression that can be isolated as a reason for the + /// failure. So if this routine fails, it's not possible to determine which + /// `Hir` expression caused the failure. + /// + /// # Example + /// + /// This example shows how one can hand-construct multiple `Hir` + /// expressions and build a single regex from them without doing any + /// parsing at all. + /// + /// ``` + /// use { + /// regex_automata::{meta::Regex, Match}, + /// regex_syntax::hir::{Hir, Look}, + /// }; + /// + /// // (?Rm)^foo$ + /// let hir1 = Hir::concat(vec![ + /// Hir::look(Look::StartCRLF), + /// Hir::literal("foo".as_bytes()), + /// Hir::look(Look::EndCRLF), + /// ]); + /// // (?Rm)^bar$ + /// let hir2 = Hir::concat(vec![ + /// Hir::look(Look::StartCRLF), + /// Hir::literal("bar".as_bytes()), + /// Hir::look(Look::EndCRLF), + /// ]); + /// let re = Regex::builder() + /// .build_many_from_hir(&[&hir1, &hir2])?; + /// let hay = "\r\nfoo\r\nbar"; + /// let got: Vec = re.find_iter(hay).collect(); + /// let expected = vec![ + /// Match::must(0, 2..5), + /// Match::must(1, 7..10), + /// ]; + /// assert_eq!(expected, got); + /// + /// Ok::<(), Box>(()) + /// ``` + pub fn build_many_from_hir>( + &self, + hirs: &[H], + ) -> Result { + let config = self.config.clone(); + // We collect the HIRs into a vec so we can write internal routines + // with '&[&Hir]'. i.e., Don't use generics everywhere to keep code + // bloat down.. + let hirs: Vec<&Hir> = hirs.iter().map(|hir| hir.borrow()).collect(); + let info = RegexInfo::new(config, &hirs); + let strat = strategy::new(&info, &hirs)?; + let pool = { + let strat = Arc::clone(&strat); + let create: CachePoolFn = Box::new(move || strat.create_cache()); + Pool::new(create) + }; + Ok(Regex { imp: Arc::new(RegexI { strat, info }), pool }) + } + + /// Configure the behavior of a `Regex`. + /// + /// This configuration controls non-syntax options related to the behavior + /// of a `Regex`. This includes things like whether empty matches can split + /// a codepoint, prefilters, line terminators and a long list of options + /// for configuring which regex engines the meta regex engine will be able + /// to use internally. + /// + /// # Example + /// + /// This example shows how to disable UTF-8 empty mode. This will permit + /// empty matches to occur between the UTF-8 encoding of a codepoint. + /// + /// ``` + /// use regex_automata::{meta::Regex, Match}; + /// + /// let re = Regex::new("")?; + /// let got: Vec = re.find_iter("☃").collect(); + /// // Matches only occur at the beginning and end of the snowman. + /// assert_eq!(got, vec![ + /// Match::must(0, 0..0), + /// Match::must(0, 3..3), + /// ]); + /// + /// let re = Regex::builder() + /// .configure(Regex::config().utf8_empty(false)) + /// .build("")?; + /// let got: Vec = re.find_iter("☃").collect(); + /// // Matches now occur at every position! + /// assert_eq!(got, vec![ + /// Match::must(0, 0..0), + /// Match::must(0, 1..1), + /// Match::must(0, 2..2), + /// Match::must(0, 3..3), + /// ]); + /// + /// Ok::<(), Box>(()) + /// ``` + pub fn configure(&mut self, config: Config) -> &mut Builder { + self.config = self.config.overwrite(config); + self + } + + /// Configure the syntax options when parsing a pattern string while + /// building a `Regex`. + /// + /// These options _only_ apply when [`Builder::build`] or [`Builder::build_many`] + /// are used. The other build methods accept `Hir` values, which have + /// already been parsed. + /// + /// # Example + /// + /// This example shows how to enable case insensitive mode. + /// + /// ``` + /// use regex_automata::{meta::Regex, util::syntax, Match}; + /// + /// let re = Regex::builder() + /// .syntax(syntax::Config::new().case_insensitive(true)) + /// .build(r"δ")?; + /// assert_eq!(Some(Match::must(0, 0..2)), re.find(r"Δ")); + /// + /// Ok::<(), Box>(()) + /// ``` + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + config.apply_ast(&mut self.ast); + config.apply_hir(&mut self.hir); + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // I found this in the course of building out the benchmark suite for + // rebar. + #[test] + fn regression_suffix_literal_count() { + let _ = env_logger::try_init(); + + let re = Regex::new(r"[a-zA-Z]+ing").unwrap(); + assert_eq!(1, re.find_iter("tingling").count()); + } +} diff --git a/vendor/regex-automata/src/meta/reverse_inner.rs b/vendor/regex-automata/src/meta/reverse_inner.rs new file mode 100644 index 0000000..3d78779 --- /dev/null +++ b/vendor/regex-automata/src/meta/reverse_inner.rs @@ -0,0 +1,220 @@ +/*! +A module dedicated to plucking inner literals out of a regex pattern, and +then constructing a prefilter for them. We also include a regex pattern +"prefix" that corresponds to the bits of the regex that need to match before +the literals do. The reverse inner optimization then proceeds by looking for +matches of the inner literal(s), and then doing a reverse search of the prefix +from the start of the literal match to find the overall start position of the +match. + +The essential invariant we want to uphold here is that the literals we return +reflect a set where *at least* one of them must match in order for the overall +regex to match. We also need to maintain the invariant that the regex prefix +returned corresponds to the entirety of the regex up until the literals we +return. + +This somewhat limits what we can do. That is, if we a regex like +`\w+(@!|%%)\w+`, then we can pluck the `{@!, %%}` out and build a prefilter +from it. Then we just need to compile `\w+` in reverse. No fuss no muss. But if +we have a regex like \d+@!|\w+%%`, then we get kind of stymied. Technically, +we could still extract `{@!, %%}`, and it is true that at least of them must +match. But then, what is our regex prefix? Again, in theory, that could be +`\d+|\w+`, but that's not quite right, because the `\d+` only matches when `@!` +matches, and `\w+` only matches when `%%` matches. + +All of that is technically possible to do, but it seemingly requires a lot of +sophistication and machinery. Probably the way to tackle that is with some kind +of formalism and approach this problem more generally. + +For now, the code below basically just looks for a top-level concatenation. +And if it can find one, it looks for literals in each of the direct child +sub-expressions of that concatenation. If some good ones are found, we return +those and a concatenation of the Hir expressions seen up to that point. +*/ + +use alloc::vec::Vec; + +use regex_syntax::hir::{self, literal, Hir, HirKind}; + +use crate::{util::prefilter::Prefilter, MatchKind}; + +/// Attempts to extract an "inner" prefilter from the given HIR expressions. If +/// one was found, then a concatenation of the HIR expressions that precede it +/// is returned. +/// +/// The idea here is that the prefilter returned can be used to find candidate +/// matches. And then the HIR returned can be used to build a reverse regex +/// matcher, which will find the start of the candidate match. Finally, the +/// match still has to be confirmed with a normal anchored forward scan to find +/// the end position of the match. +/// +/// Note that this assumes leftmost-first match semantics, so callers must +/// not call this otherwise. +pub(crate) fn extract(hirs: &[&Hir]) -> Option<(Hir, Prefilter)> { + if hirs.len() != 1 { + debug!( + "skipping reverse inner optimization since it only \ + supports 1 pattern, {} were given", + hirs.len(), + ); + return None; + } + let mut concat = match top_concat(hirs[0]) { + Some(concat) => concat, + None => { + debug!( + "skipping reverse inner optimization because a top-level \ + concatenation could not found", + ); + return None; + } + }; + // We skip the first HIR because if it did have a prefix prefilter in it, + // we probably wouldn't be here looking for an inner prefilter. + for i in 1..concat.len() { + let hir = &concat[i]; + let pre = match prefilter(hir) { + None => continue, + Some(pre) => pre, + }; + // Even if we got a prefilter, if it isn't consider "fast," then we + // probably don't want to bother with it. Namely, since the reverse + // inner optimization requires some overhead, it likely only makes + // sense if the prefilter scan itself is (believed) to be much faster + // than the regex engine. + if !pre.is_fast() { + debug!( + "skipping extracted inner prefilter because \ + it probably isn't fast" + ); + continue; + } + let concat_suffix = Hir::concat(concat.split_off(i)); + let concat_prefix = Hir::concat(concat); + // Look for a prefilter again. Why? Because above we only looked for + // a prefilter on the individual 'hir', but we might be able to find + // something better and more discriminatory by looking at the entire + // suffix. We don't do this above to avoid making this loop worst case + // quadratic in the length of 'concat'. + let pre2 = match prefilter(&concat_suffix) { + None => pre, + Some(pre2) => { + if pre2.is_fast() { + pre2 + } else { + pre + } + } + }; + return Some((concat_prefix, pre2)); + } + debug!( + "skipping reverse inner optimization because a top-level \ + sub-expression with a fast prefilter could not be found" + ); + None +} + +/// Attempt to extract a prefilter from an HIR expression. +/// +/// We do a little massaging here to do our best that the prefilter we get out +/// of this is *probably* fast. Basically, the false positive rate has a much +/// higher impact for things like the reverse inner optimization because more +/// work needs to potentially be done for each candidate match. +/// +/// Note that this assumes leftmost-first match semantics, so callers must +/// not call this otherwise. +fn prefilter(hir: &Hir) -> Option { + let mut extractor = literal::Extractor::new(); + extractor.kind(literal::ExtractKind::Prefix); + let mut prefixes = extractor.extract(hir); + debug!( + "inner prefixes (len={:?}) extracted before optimization: {:?}", + prefixes.len(), + prefixes + ); + // Since these are inner literals, we know they cannot be exact. But the + // extractor doesn't know this. We mark them as inexact because this might + // impact literal optimization. Namely, optimization weights "all literals + // are exact" as very high, because it presumes that any match results in + // an overall match. But of course, that is not the case here. + // + // In practice, this avoids plucking out a ASCII-only \s as an alternation + // of single-byte whitespace characters. + prefixes.make_inexact(); + prefixes.optimize_for_prefix_by_preference(); + debug!( + "inner prefixes (len={:?}) extracted after optimization: {:?}", + prefixes.len(), + prefixes + ); + prefixes + .literals() + .and_then(|lits| Prefilter::new(MatchKind::LeftmostFirst, lits)) +} + +/// Looks for a "top level" HirKind::Concat item in the given HIR. This will +/// try to return one even if it's embedded in a capturing group, but is +/// otherwise pretty conservative in what is returned. +/// +/// The HIR returned is a complete copy of the concat with all capturing +/// groups removed. In effect, the concat returned is "flattened" with respect +/// to capturing groups. This makes the detection logic above for prefixes +/// a bit simpler, and it works because 1) capturing groups never influence +/// whether a match occurs or not and 2) capturing groups are not used when +/// doing the reverse inner search to find the start of the match. +fn top_concat(mut hir: &Hir) -> Option> { + loop { + hir = match hir.kind() { + HirKind::Empty + | HirKind::Literal(_) + | HirKind::Class(_) + | HirKind::Look(_) + | HirKind::Repetition(_) + | HirKind::Alternation(_) => return None, + HirKind::Capture(hir::Capture { ref sub, .. }) => sub, + HirKind::Concat(ref subs) => { + // We are careful to only do the flattening/copy when we know + // we have a "top level" concat we can inspect. This avoids + // doing extra work in cases where we definitely won't use it. + // (This might still be wasted work if we can't go on to find + // some literals to extract.) + let concat = + Hir::concat(subs.iter().map(|h| flatten(h)).collect()); + return match concat.into_kind() { + HirKind::Concat(xs) => Some(xs), + // It is actually possible for this case to occur, because + // 'Hir::concat' might simplify the expression to the point + // that concatenations are actually removed. One wonders + // whether this leads to other cases where we should be + // extracting literals, but in theory, I believe if we do + // get here, then it means that a "real" prefilter failed + // to be extracted and we should probably leave well enough + // alone. (A "real" prefilter is unbothered by "top-level + // concats" and "capturing groups.") + _ => return None, + }; + } + }; + } +} + +/// Returns a copy of the given HIR but with all capturing groups removed. +fn flatten(hir: &Hir) -> Hir { + match hir.kind() { + HirKind::Empty => Hir::empty(), + HirKind::Literal(hir::Literal(ref x)) => Hir::literal(x.clone()), + HirKind::Class(ref x) => Hir::class(x.clone()), + HirKind::Look(ref x) => Hir::look(x.clone()), + HirKind::Repetition(ref x) => Hir::repetition(x.with(flatten(&x.sub))), + // This is the interesting case. We just drop the group information + // entirely and use the child HIR itself. + HirKind::Capture(hir::Capture { ref sub, .. }) => flatten(sub), + HirKind::Alternation(ref xs) => { + Hir::alternation(xs.iter().map(|x| flatten(x)).collect()) + } + HirKind::Concat(ref xs) => { + Hir::concat(xs.iter().map(|x| flatten(x)).collect()) + } + } +} diff --git a/vendor/regex-automata/src/meta/stopat.rs b/vendor/regex-automata/src/meta/stopat.rs new file mode 100644 index 0000000..c4dcd79 --- /dev/null +++ b/vendor/regex-automata/src/meta/stopat.rs @@ -0,0 +1,212 @@ +/*! +This module defines two bespoke forward DFA search routines. One for the lazy +DFA and one for the fully compiled DFA. These routines differ from the normal +ones by reporting the position at which the search terminates when a match +*isn't* found. + +This position at which a search terminates is useful in contexts where the meta +regex engine runs optimizations that could go quadratic if we aren't careful. +Namely, a regex search *could* scan to the end of the haystack only to report a +non-match. If the caller doesn't know that the search scanned to the end of the +haystack, it might restart the search at the next literal candidate it finds +and repeat the process. + +Providing the caller with the position at which the search stopped provides a +way for the caller to determine the point at which subsequent scans should not +pass. This is principally used in the "reverse inner" optimization, which works +like this: + +1. Look for a match of an inner literal. Say, 'Z' in '\w+Z\d+'. +2. At the spot where 'Z' matches, do a reverse anchored search from there for +'\w+'. +3. If the reverse search matches, it corresponds to the start position of a +(possible) match. At this point, do a forward anchored search to find the end +position. If an end position is found, then we have a match and we know its +bounds. + +If the forward anchored search in (3) searches the entire rest of the haystack +but reports a non-match, then a naive implementation of the above will continue +back at step 1 looking for more candidates. There might still be a match to be +found! It's possible. But we already scanned the whole haystack. So if we keep +repeating the process, then we might wind up taking quadratic time in the size +of the haystack, which is not great. + +So if the forward anchored search in (3) reports the position at which it +stops, then we can detect whether quadratic behavior might be occurring in +steps (1) and (2). For (1), it occurs if the literal candidate found occurs +*before* the end of the previous search in (3), since that means we're now +going to look for another match in a place where the forward search has already +scanned. It is *correct* to do so, but our technique has become inefficient. +For (2), quadratic behavior occurs similarly when its reverse search extends +past the point where the previous forward search in (3) terminated. Indeed, to +implement (2), we use the sibling 'limited' module for ensuring our reverse +scan doesn't go further than we want. + +See the 'opt/reverse-inner' benchmarks in rebar for a real demonstration of +how quadratic behavior is mitigated. +*/ + +use crate::{meta::error::RetryFailError, HalfMatch, Input, MatchError}; + +#[cfg(feature = "dfa-build")] +pub(crate) fn dfa_try_search_half_fwd( + dfa: &crate::dfa::dense::DFA>, + input: &Input<'_>, +) -> Result, RetryFailError> { + use crate::dfa::{accel, Automaton}; + + let mut mat = None; + let mut sid = dfa.start_state_forward(input)?; + let mut at = input.start(); + while at < input.end() { + sid = dfa.next_state(sid, input.haystack()[at]); + if dfa.is_special_state(sid) { + if dfa.is_match_state(sid) { + let pattern = dfa.match_pattern(sid, 0); + mat = Some(HalfMatch::new(pattern, at)); + if input.get_earliest() { + return Ok(mat.ok_or(at)); + } + if dfa.is_accel_state(sid) { + let needs = dfa.accelerator(sid); + at = accel::find_fwd(needs, input.haystack(), at) + .unwrap_or(input.end()); + continue; + } + } else if dfa.is_accel_state(sid) { + let needs = dfa.accelerator(sid); + at = accel::find_fwd(needs, input.haystack(), at) + .unwrap_or(input.end()); + continue; + } else if dfa.is_dead_state(sid) { + return Ok(mat.ok_or(at)); + } else if dfa.is_quit_state(sid) { + return Err(MatchError::quit(input.haystack()[at], at).into()); + } else { + // Ideally we wouldn't use a DFA that specialized start states + // and thus 'is_start_state()' could never be true here, but in + // practice we reuse the DFA created for the full regex which + // will specialize start states whenever there is a prefilter. + debug_assert!(dfa.is_start_state(sid)); + } + } + at += 1; + } + dfa_eoi_fwd(dfa, input, &mut sid, &mut mat)?; + Ok(mat.ok_or(at)) +} + +#[cfg(feature = "hybrid")] +pub(crate) fn hybrid_try_search_half_fwd( + dfa: &crate::hybrid::dfa::DFA, + cache: &mut crate::hybrid::dfa::Cache, + input: &Input<'_>, +) -> Result, RetryFailError> { + let mut mat = None; + let mut sid = dfa.start_state_forward(cache, input)?; + let mut at = input.start(); + while at < input.end() { + sid = dfa + .next_state(cache, sid, input.haystack()[at]) + .map_err(|_| MatchError::gave_up(at))?; + if sid.is_tagged() { + if sid.is_match() { + let pattern = dfa.match_pattern(cache, sid, 0); + mat = Some(HalfMatch::new(pattern, at)); + if input.get_earliest() { + return Ok(mat.ok_or(at)); + } + } else if sid.is_dead() { + return Ok(mat.ok_or(at)); + } else if sid.is_quit() { + return Err(MatchError::quit(input.haystack()[at], at).into()); + } else { + // We should NEVER get an unknown state ID back from + // dfa.next_state(). + debug_assert!(!sid.is_unknown()); + // Ideally we wouldn't use a lazy DFA that specialized start + // states and thus 'sid.is_start()' could never be true here, + // but in practice we reuse the lazy DFA created for the full + // regex which will specialize start states whenever there is + // a prefilter. + debug_assert!(sid.is_start()); + } + } + at += 1; + } + hybrid_eoi_fwd(dfa, cache, input, &mut sid, &mut mat)?; + Ok(mat.ok_or(at)) +} + +#[cfg(feature = "dfa-build")] +#[cfg_attr(feature = "perf-inline", inline(always))] +fn dfa_eoi_fwd( + dfa: &crate::dfa::dense::DFA>, + input: &Input<'_>, + sid: &mut crate::util::primitives::StateID, + mat: &mut Option, +) -> Result<(), MatchError> { + use crate::dfa::Automaton; + + let sp = input.get_span(); + match input.haystack().get(sp.end) { + Some(&b) => { + *sid = dfa.next_state(*sid, b); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.end)); + } else if dfa.is_quit_state(*sid) { + return Err(MatchError::quit(b, sp.end)); + } + } + None => { + *sid = dfa.next_eoi_state(*sid); + if dfa.is_match_state(*sid) { + let pattern = dfa.match_pattern(*sid, 0); + *mat = Some(HalfMatch::new(pattern, input.haystack().len())); + } + // N.B. We don't have to check 'is_quit' here because the EOI + // transition can never lead to a quit state. + debug_assert!(!dfa.is_quit_state(*sid)); + } + } + Ok(()) +} + +#[cfg(feature = "hybrid")] +#[cfg_attr(feature = "perf-inline", inline(always))] +fn hybrid_eoi_fwd( + dfa: &crate::hybrid::dfa::DFA, + cache: &mut crate::hybrid::dfa::Cache, + input: &Input<'_>, + sid: &mut crate::hybrid::LazyStateID, + mat: &mut Option, +) -> Result<(), MatchError> { + let sp = input.get_span(); + match input.haystack().get(sp.end) { + Some(&b) => { + *sid = dfa + .next_state(cache, *sid, b) + .map_err(|_| MatchError::gave_up(sp.end))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, sp.end)); + } else if sid.is_quit() { + return Err(MatchError::quit(b, sp.end)); + } + } + None => { + *sid = dfa + .next_eoi_state(cache, *sid) + .map_err(|_| MatchError::gave_up(input.haystack().len()))?; + if sid.is_match() { + let pattern = dfa.match_pattern(cache, *sid, 0); + *mat = Some(HalfMatch::new(pattern, input.haystack().len())); + } + // N.B. We don't have to check 'is_quit' here because the EOI + // transition can never lead to a quit state. + debug_assert!(!sid.is_quit()); + } + } + Ok(()) +} diff --git a/vendor/regex-automata/src/meta/strategy.rs b/vendor/regex-automata/src/meta/strategy.rs new file mode 100644 index 0000000..04f2ba3 --- /dev/null +++ b/vendor/regex-automata/src/meta/strategy.rs @@ -0,0 +1,1914 @@ +use core::{ + fmt::Debug, + panic::{RefUnwindSafe, UnwindSafe}, +}; + +use alloc::sync::Arc; + +use regex_syntax::hir::{literal, Hir}; + +use crate::{ + meta::{ + error::{BuildError, RetryError, RetryFailError, RetryQuadraticError}, + regex::{Cache, RegexInfo}, + reverse_inner, wrappers, + }, + nfa::thompson::{self, WhichCaptures, NFA}, + util::{ + captures::{Captures, GroupInfo}, + look::LookMatcher, + prefilter::{self, Prefilter, PrefilterI}, + primitives::{NonMaxUsize, PatternID}, + search::{Anchored, HalfMatch, Input, Match, MatchKind, PatternSet}, + }, +}; + +/// A trait that represents a single meta strategy. Its main utility is in +/// providing a way to do dynamic dispatch over a few choices. +/// +/// Why dynamic dispatch? I actually don't have a super compelling reason, and +/// importantly, I have not benchmarked it with the main alternative: an enum. +/// I went with dynamic dispatch initially because the regex engine search code +/// really can't be inlined into caller code in most cases because it's just +/// too big. In other words, it is already expected that every regex search +/// will entail at least the cost of a function call. +/// +/// I do wonder whether using enums would result in better codegen overall +/// though. It's a worthwhile experiment to try. Probably the most interesting +/// benchmark to run in such a case would be one with a high match count. That +/// is, a benchmark to test the overall latency of a search call. +pub(super) trait Strategy: + Debug + Send + Sync + RefUnwindSafe + UnwindSafe + 'static +{ + fn group_info(&self) -> &GroupInfo; + + fn create_cache(&self) -> Cache; + + fn reset_cache(&self, cache: &mut Cache); + + fn is_accelerated(&self) -> bool; + + fn memory_usage(&self) -> usize; + + fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option; + + fn search_half( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option; + + fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool; + + fn search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option; + + fn which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ); +} + +pub(super) fn new( + info: &RegexInfo, + hirs: &[&Hir], +) -> Result, BuildError> { + // At this point, we're committed to a regex engine of some kind. So pull + // out a prefilter if we can, which will feed to each of the constituent + // regex engines. + let pre = if info.is_always_anchored_start() { + // PERF: I'm not sure we necessarily want to do this... We may want to + // run a prefilter for quickly rejecting in some cases. The problem + // is that anchored searches overlap quite a bit with the use case + // of "run a regex on every line to extract data." In that case, the + // regex always matches, so running a prefilter doesn't really help us + // there. The main place where a prefilter helps in an anchored search + // is if the anchored search is not expected to match frequently. That + // is, the prefilter gives us a way to possibly reject a haystack very + // quickly. + // + // Maybe we should do use a prefilter, but only for longer haystacks? + // Or maybe we should only use a prefilter when we think it's "fast"? + // + // Interestingly, I think we currently lack the infrastructure for + // disabling a prefilter based on haystack length. That would probably + // need to be a new 'Input' option. (Interestingly, an 'Input' used to + // carry a 'Prefilter' with it, but I moved away from that.) + debug!("skipping literal extraction since regex is anchored"); + None + } else if let Some(pre) = info.config().get_prefilter() { + debug!( + "skipping literal extraction since the caller provided a prefilter" + ); + Some(pre.clone()) + } else if info.config().get_auto_prefilter() { + let kind = info.config().get_match_kind(); + let prefixes = crate::util::prefilter::prefixes(kind, hirs); + // If we can build a full `Strategy` from just the extracted prefixes, + // then we can short-circuit and avoid building a regex engine at all. + if let Some(pre) = Pre::from_prefixes(info, &prefixes) { + debug!( + "found that the regex can be broken down to a literal \ + search, avoiding the regex engine entirely", + ); + return Ok(pre); + } + // This now attempts another short-circuit of the regex engine: if we + // have a huge alternation of just plain literals, then we can just use + // Aho-Corasick for that and avoid the regex engine entirely. + // + // You might think this case would just be handled by + // `Pre::from_prefixes`, but that technique relies on heuristic literal + // extraction from the corresponding `Hir`. That works, but part of + // heuristics limit the size and number of literals returned. This case + // will specifically handle patterns with very large alternations. + // + // One wonders if we should just roll this our heuristic literal + // extraction, and then I think this case could disappear entirely. + if let Some(pre) = Pre::from_alternation_literals(info, hirs) { + debug!( + "found plain alternation of literals, \ + avoiding regex engine entirely and using Aho-Corasick" + ); + return Ok(pre); + } + prefixes.literals().and_then(|strings| { + debug!( + "creating prefilter from {} literals: {:?}", + strings.len(), + strings, + ); + Prefilter::new(kind, strings) + }) + } else { + debug!("skipping literal extraction since prefilters were disabled"); + None + }; + let mut core = Core::new(info.clone(), pre.clone(), hirs)?; + // Now that we have our core regex engines built, there are a few cases + // where we can do a little bit better than just a normal "search forward + // and maybe use a prefilter when in a start state." However, these cases + // may not always work or otherwise build on top of the Core searcher. + // For example, the reverse anchored optimization seems like it might + // always work, but only the DFAs support reverse searching and the DFAs + // might give up or quit for reasons. If we had, e.g., a PikeVM that + // supported reverse searching, then we could avoid building a full Core + // engine for this case. + core = match ReverseAnchored::new(core) { + Err(core) => core, + Ok(ra) => { + debug!("using reverse anchored strategy"); + return Ok(Arc::new(ra)); + } + }; + core = match ReverseSuffix::new(core, hirs) { + Err(core) => core, + Ok(rs) => { + debug!("using reverse suffix strategy"); + return Ok(Arc::new(rs)); + } + }; + core = match ReverseInner::new(core, hirs) { + Err(core) => core, + Ok(ri) => { + debug!("using reverse inner strategy"); + return Ok(Arc::new(ri)); + } + }; + debug!("using core strategy"); + Ok(Arc::new(core)) +} + +#[derive(Clone, Debug)] +struct Pre

{ + pre: P, + group_info: GroupInfo, +} + +impl Pre

{ + fn new(pre: P) -> Arc { + // The only thing we support when we use prefilters directly as a + // strategy is the start and end of the overall match for a single + // pattern. In other words, exactly one implicit capturing group. Which + // is exactly what we use here for a GroupInfo. + let group_info = GroupInfo::new([[None::<&str>]]).unwrap(); + Arc::new(Pre { pre, group_info }) + } +} + +// This is a little weird, but we don't actually care about the type parameter +// here because we're selecting which underlying prefilter to use. So we just +// define it on an arbitrary type. +impl Pre<()> { + /// Given a sequence of prefixes, attempt to return a full `Strategy` using + /// just the prefixes. + /// + /// Basically, this occurs when the prefixes given not just prefixes, + /// but an enumeration of the entire language matched by the regular + /// expression. + /// + /// A number of other conditions need to be true too. For example, there + /// can be only one pattern, the number of explicit capture groups is 0, no + /// look-around assertions and so on. + /// + /// Note that this ignores `Config::get_auto_prefilter` because if this + /// returns something, then it isn't a prefilter but a matcher itself. + /// Therefore, it shouldn't suffer from the problems typical to prefilters + /// (such as a high false positive rate). + fn from_prefixes( + info: &RegexInfo, + prefixes: &literal::Seq, + ) -> Option> { + let kind = info.config().get_match_kind(); + // Check to see if our prefixes are exact, which means we might be + // able to bypass the regex engine entirely and just rely on literal + // searches. + if !prefixes.is_exact() { + return None; + } + // We also require that we have a single regex pattern. Namely, + // we reuse the prefilter infrastructure to implement search and + // prefilters only report spans. Prefilters don't know about pattern + // IDs. The multi-regex case isn't a lost cause, we might still use + // Aho-Corasick and we might still just use a regular prefilter, but + // that's done below. + if info.pattern_len() != 1 { + return None; + } + // We can't have any capture groups either. The literal engines don't + // know how to deal with things like '(foo)(bar)'. In that case, a + // prefilter will just be used and then the regex engine will resolve + // the capture groups. + if info.props()[0].explicit_captures_len() != 0 { + return None; + } + // We also require that it has zero look-around assertions. Namely, + // literal extraction treats look-around assertions as if they match + // *every* empty string. But of course, that isn't true. So for + // example, 'foo\bquux' never matches anything, but 'fooquux' is + // extracted from that as an exact literal. Such cases should just run + // the regex engine. 'fooquux' will be used as a normal prefilter, and + // then the regex engine will try to look for an actual match. + if !info.props()[0].look_set().is_empty() { + return None; + } + // Finally, currently, our prefilters are all oriented around + // leftmost-first match semantics, so don't try to use them if the + // caller asked for anything else. + if kind != MatchKind::LeftmostFirst { + return None; + } + // The above seems like a lot of requirements to meet, but it applies + // to a lot of cases. 'foo', '[abc][123]' and 'foo|bar|quux' all meet + // the above criteria, for example. + // + // Note that this is effectively a latency optimization. If we didn't + // do this, then the extracted literals would still get bundled into + // a prefilter, and every regex engine capable of running unanchored + // searches supports prefilters. So this optimization merely sidesteps + // having to run the regex engine at all to confirm the match. Thus, it + // decreases the latency of a match. + + // OK because we know the set is exact and thus finite. + let prefixes = prefixes.literals().unwrap(); + debug!( + "trying to bypass regex engine by creating \ + prefilter from {} literals: {:?}", + prefixes.len(), + prefixes, + ); + let choice = match prefilter::Choice::new(kind, prefixes) { + Some(choice) => choice, + None => { + debug!( + "regex bypass failed because no prefilter could be built" + ); + return None; + } + }; + let strat: Arc = match choice { + prefilter::Choice::Memchr(pre) => Pre::new(pre), + prefilter::Choice::Memchr2(pre) => Pre::new(pre), + prefilter::Choice::Memchr3(pre) => Pre::new(pre), + prefilter::Choice::Memmem(pre) => Pre::new(pre), + prefilter::Choice::Teddy(pre) => Pre::new(pre), + prefilter::Choice::ByteSet(pre) => Pre::new(pre), + prefilter::Choice::AhoCorasick(pre) => Pre::new(pre), + }; + Some(strat) + } + + /// Attempts to extract an alternation of literals, and if it's deemed + /// worth doing, returns an Aho-Corasick prefilter as a strategy. + /// + /// And currently, this only returns something when 'hirs.len() == 1'. This + /// could in theory do something if there are multiple HIRs where all of + /// them are alternation of literals, but I haven't had the time to go down + /// that path yet. + fn from_alternation_literals( + info: &RegexInfo, + hirs: &[&Hir], + ) -> Option> { + use crate::util::prefilter::AhoCorasick; + + let lits = crate::meta::literal::alternation_literals(info, hirs)?; + let ac = AhoCorasick::new(MatchKind::LeftmostFirst, &lits)?; + Some(Pre::new(ac)) + } +} + +// This implements Strategy for anything that implements PrefilterI. +// +// Note that this must only be used for regexes of length 1. Multi-regexes +// don't work here. The prefilter interface only provides the span of a match +// and not the pattern ID. (I did consider making it more expressive, but I +// couldn't figure out how to tie everything together elegantly.) Thus, so long +// as the regex only contains one pattern, we can simply assume that a match +// corresponds to PatternID::ZERO. And indeed, that's what we do here. +// +// In practice, since this impl is used to report matches directly and thus +// completely bypasses the regex engine, we only wind up using this under the +// following restrictions: +// +// * There must be only one pattern. As explained above. +// * The literal sequence must be finite and only contain exact literals. +// * There must not be any look-around assertions. If there are, the literals +// extracted might be exact, but a match doesn't necessarily imply an overall +// match. As a trivial example, 'foo\bbar' does not match 'foobar'. +// * The pattern must not have any explicit capturing groups. If it does, the +// caller might expect them to be resolved. e.g., 'foo(bar)'. +// +// So when all of those things are true, we use a prefilter directly as a +// strategy. +// +// In the case where the number of patterns is more than 1, we don't use this +// but do use a special Aho-Corasick strategy if all of the regexes are just +// simple literals or alternations of literals. (We also use the Aho-Corasick +// strategy when len(patterns)==1 if the number of literals is large. In that +// case, literal extraction gives up and will return an infinite set.) +impl Strategy for Pre

{ + #[cfg_attr(feature = "perf-inline", inline(always))] + fn group_info(&self) -> &GroupInfo { + &self.group_info + } + + fn create_cache(&self) -> Cache { + Cache { + capmatches: Captures::all(self.group_info().clone()), + pikevm: wrappers::PikeVMCache::none(), + backtrack: wrappers::BoundedBacktrackerCache::none(), + onepass: wrappers::OnePassCache::none(), + hybrid: wrappers::HybridCache::none(), + revhybrid: wrappers::ReverseHybridCache::none(), + } + } + + fn reset_cache(&self, _cache: &mut Cache) {} + + fn is_accelerated(&self) -> bool { + self.pre.is_fast() + } + + fn memory_usage(&self) -> usize { + self.pre.memory_usage() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search(&self, _cache: &mut Cache, input: &Input<'_>) -> Option { + if input.is_done() { + return None; + } + if input.get_anchored().is_anchored() { + return self + .pre + .prefix(input.haystack(), input.get_span()) + .map(|sp| Match::new(PatternID::ZERO, sp)); + } + self.pre + .find(input.haystack(), input.get_span()) + .map(|sp| Match::new(PatternID::ZERO, sp)) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_half( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + self.search(cache, input).map(|m| HalfMatch::new(m.pattern(), m.end())) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool { + self.search(cache, input).is_some() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + let m = self.search(cache, input)?; + if let Some(slot) = slots.get_mut(0) { + *slot = NonMaxUsize::new(m.start()); + } + if let Some(slot) = slots.get_mut(1) { + *slot = NonMaxUsize::new(m.end()); + } + Some(m.pattern()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + if self.search(cache, input).is_some() { + patset.insert(PatternID::ZERO); + } + } +} + +#[derive(Debug)] +struct Core { + info: RegexInfo, + pre: Option, + nfa: NFA, + nfarev: Option, + pikevm: wrappers::PikeVM, + backtrack: wrappers::BoundedBacktracker, + onepass: wrappers::OnePass, + hybrid: wrappers::Hybrid, + dfa: wrappers::DFA, +} + +impl Core { + fn new( + info: RegexInfo, + pre: Option, + hirs: &[&Hir], + ) -> Result { + let mut lookm = LookMatcher::new(); + lookm.set_line_terminator(info.config().get_line_terminator()); + let thompson_config = thompson::Config::new() + .utf8(info.config().get_utf8_empty()) + .nfa_size_limit(info.config().get_nfa_size_limit()) + .shrink(false) + .which_captures(info.config().get_which_captures()) + .look_matcher(lookm); + let nfa = thompson::Compiler::new() + .configure(thompson_config.clone()) + .build_many_from_hir(hirs) + .map_err(BuildError::nfa)?; + // It's possible for the PikeVM or the BB to fail to build, even though + // at this point, we already have a full NFA in hand. They can fail + // when a Unicode word boundary is used but where Unicode word boundary + // support is disabled at compile time, thus making it impossible to + // match. (Construction can also fail if the NFA was compiled without + // captures, but we always enable that above.) + let pikevm = wrappers::PikeVM::new(&info, pre.clone(), &nfa)?; + let backtrack = + wrappers::BoundedBacktracker::new(&info, pre.clone(), &nfa)?; + // The onepass engine can of course fail to build, but we expect it to + // fail in many cases because it is an optimization that doesn't apply + // to all regexes. The 'OnePass' wrapper encapsulates this failure (and + // logs a message if it occurs). + let onepass = wrappers::OnePass::new(&info, &nfa); + // We try to encapsulate whether a particular regex engine should be + // used within each respective wrapper, but the DFAs need a reverse NFA + // to build itself, and we really do not want to build a reverse NFA if + // we know we aren't going to use the lazy DFA. So we do a config check + // up front, which is in practice the only way we won't try to use the + // DFA. + let (nfarev, hybrid, dfa) = + if !info.config().get_hybrid() && !info.config().get_dfa() { + (None, wrappers::Hybrid::none(), wrappers::DFA::none()) + } else { + // FIXME: Technically, we don't quite yet KNOW that we need + // a reverse NFA. It's possible for the DFAs below to both + // fail to build just based on the forward NFA. In which case, + // building the reverse NFA was totally wasted work. But... + // fixing this requires breaking DFA construction apart into + // two pieces: one for the forward part and another for the + // reverse part. Quite annoying. Making it worse, when building + // both DFAs fails, it's quite likely that the NFA is large and + // that it will take quite some time to build the reverse NFA + // too. So... it's really probably worth it to do this! + let nfarev = thompson::Compiler::new() + // Currently, reverse NFAs don't support capturing groups, + // so we MUST disable them. But even if we didn't have to, + // we would, because nothing in this crate does anything + // useful with capturing groups in reverse. And of course, + // the lazy DFA ignores capturing groups in all cases. + .configure( + thompson_config + .clone() + .which_captures(WhichCaptures::None) + .reverse(true), + ) + .build_many_from_hir(hirs) + .map_err(BuildError::nfa)?; + let dfa = if !info.config().get_dfa() { + wrappers::DFA::none() + } else { + wrappers::DFA::new(&info, pre.clone(), &nfa, &nfarev) + }; + let hybrid = if !info.config().get_hybrid() { + wrappers::Hybrid::none() + } else if dfa.is_some() { + debug!("skipping lazy DFA because we have a full DFA"); + wrappers::Hybrid::none() + } else { + wrappers::Hybrid::new(&info, pre.clone(), &nfa, &nfarev) + }; + (Some(nfarev), hybrid, dfa) + }; + Ok(Core { + info, + pre, + nfa, + nfarev, + pikevm, + backtrack, + onepass, + hybrid, + dfa, + }) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_mayfail( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option, RetryFailError>> { + if let Some(e) = self.dfa.get(input) { + trace!("using full DFA for search at {:?}", input.get_span()); + Some(e.try_search(input)) + } else if let Some(e) = self.hybrid.get(input) { + trace!("using lazy DFA for search at {:?}", input.get_span()); + Some(e.try_search(&mut cache.hybrid, input)) + } else { + None + } + } + + fn search_nofail( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + let caps = &mut cache.capmatches; + caps.set_pattern(None); + // We manually inline 'try_search_slots_nofail' here because we need to + // borrow from 'cache.capmatches' in this method, but if we do, then + // we can't pass 'cache' wholesale to to 'try_slots_no_hybrid'. It's a + // classic example of how the borrow checker inhibits decomposition. + // There are of course work-arounds (more types and/or interior + // mutability), but that's more annoying than this IMO. + let pid = if let Some(ref e) = self.onepass.get(input) { + trace!("using OnePass for search at {:?}", input.get_span()); + e.search_slots(&mut cache.onepass, input, caps.slots_mut()) + } else if let Some(ref e) = self.backtrack.get(input) { + trace!( + "using BoundedBacktracker for search at {:?}", + input.get_span() + ); + e.search_slots(&mut cache.backtrack, input, caps.slots_mut()) + } else { + trace!("using PikeVM for search at {:?}", input.get_span()); + let e = self.pikevm.get(); + e.search_slots(&mut cache.pikevm, input, caps.slots_mut()) + }; + caps.set_pattern(pid); + caps.get_match() + } + + fn search_half_nofail( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + // Only the lazy/full DFA returns half-matches, since the DFA requires + // a reverse scan to find the start position. These fallback regex + // engines can find the start and end in a single pass, so we just do + // that and throw away the start offset to conform to the API. + let m = self.search_nofail(cache, input)?; + Some(HalfMatch::new(m.pattern(), m.end())) + } + + fn search_slots_nofail( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + if let Some(ref e) = self.onepass.get(input) { + trace!( + "using OnePass for capture search at {:?}", + input.get_span() + ); + e.search_slots(&mut cache.onepass, input, slots) + } else if let Some(ref e) = self.backtrack.get(input) { + trace!( + "using BoundedBacktracker for capture search at {:?}", + input.get_span() + ); + e.search_slots(&mut cache.backtrack, input, slots) + } else { + trace!( + "using PikeVM for capture search at {:?}", + input.get_span() + ); + let e = self.pikevm.get(); + e.search_slots(&mut cache.pikevm, input, slots) + } + } + + fn is_match_nofail(&self, cache: &mut Cache, input: &Input<'_>) -> bool { + if let Some(ref e) = self.onepass.get(input) { + trace!( + "using OnePass for is-match search at {:?}", + input.get_span() + ); + e.search_slots(&mut cache.onepass, input, &mut []).is_some() + } else if let Some(ref e) = self.backtrack.get(input) { + trace!( + "using BoundedBacktracker for is-match search at {:?}", + input.get_span() + ); + e.is_match(&mut cache.backtrack, input) + } else { + trace!( + "using PikeVM for is-match search at {:?}", + input.get_span() + ); + let e = self.pikevm.get(); + e.is_match(&mut cache.pikevm, input) + } + } + + fn is_capture_search_needed(&self, slots_len: usize) -> bool { + slots_len > self.nfa.group_info().implicit_slot_len() + } +} + +impl Strategy for Core { + #[cfg_attr(feature = "perf-inline", inline(always))] + fn group_info(&self) -> &GroupInfo { + self.nfa.group_info() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn create_cache(&self) -> Cache { + Cache { + capmatches: Captures::all(self.group_info().clone()), + pikevm: self.pikevm.create_cache(), + backtrack: self.backtrack.create_cache(), + onepass: self.onepass.create_cache(), + hybrid: self.hybrid.create_cache(), + revhybrid: wrappers::ReverseHybridCache::none(), + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn reset_cache(&self, cache: &mut Cache) { + cache.pikevm.reset(&self.pikevm); + cache.backtrack.reset(&self.backtrack); + cache.onepass.reset(&self.onepass); + cache.hybrid.reset(&self.hybrid); + } + + fn is_accelerated(&self) -> bool { + self.pre.as_ref().map_or(false, |pre| pre.is_fast()) + } + + fn memory_usage(&self) -> usize { + self.info.memory_usage() + + self.pre.as_ref().map_or(0, |pre| pre.memory_usage()) + + self.nfa.memory_usage() + + self.nfarev.as_ref().map_or(0, |nfa| nfa.memory_usage()) + + self.onepass.memory_usage() + + self.dfa.memory_usage() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option { + // We manually inline try_search_mayfail here because letting the + // compiler do it seems to produce pretty crappy codegen. + return if let Some(e) = self.dfa.get(input) { + trace!("using full DFA for full search at {:?}", input.get_span()); + match e.try_search(input) { + Ok(x) => x, + Err(_err) => { + trace!("full DFA search failed: {}", _err); + self.search_nofail(cache, input) + } + } + } else if let Some(e) = self.hybrid.get(input) { + trace!("using lazy DFA for full search at {:?}", input.get_span()); + match e.try_search(&mut cache.hybrid, input) { + Ok(x) => x, + Err(_err) => { + trace!("lazy DFA search failed: {}", _err); + self.search_nofail(cache, input) + } + } + } else { + self.search_nofail(cache, input) + }; + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_half( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + // The main difference with 'search' is that if we're using a DFA, we + // can use a single forward scan without needing to run the reverse + // DFA. + if let Some(e) = self.dfa.get(input) { + trace!("using full DFA for half search at {:?}", input.get_span()); + match e.try_search_half_fwd(input) { + Ok(x) => x, + Err(_err) => { + trace!("full DFA half search failed: {}", _err); + self.search_half_nofail(cache, input) + } + } + } else if let Some(e) = self.hybrid.get(input) { + trace!("using lazy DFA for half search at {:?}", input.get_span()); + match e.try_search_half_fwd(&mut cache.hybrid, input) { + Ok(x) => x, + Err(_err) => { + trace!("lazy DFA half search failed: {}", _err); + self.search_half_nofail(cache, input) + } + } + } else { + self.search_half_nofail(cache, input) + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool { + if let Some(e) = self.dfa.get(input) { + trace!( + "using full DFA for is-match search at {:?}", + input.get_span() + ); + match e.try_search_half_fwd(input) { + Ok(x) => x.is_some(), + Err(_err) => { + trace!("full DFA half search failed: {}", _err); + self.is_match_nofail(cache, input) + } + } + } else if let Some(e) = self.hybrid.get(input) { + trace!( + "using lazy DFA for is-match search at {:?}", + input.get_span() + ); + match e.try_search_half_fwd(&mut cache.hybrid, input) { + Ok(x) => x.is_some(), + Err(_err) => { + trace!("lazy DFA half search failed: {}", _err); + self.is_match_nofail(cache, input) + } + } + } else { + self.is_match_nofail(cache, input) + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + // Even if the regex has explicit capture groups, if the caller didn't + // provide any explicit slots, then it doesn't make sense to try and do + // extra work to get offsets for those slots. Ideally the caller should + // realize this and not call this routine in the first place, but alas, + // we try to save the caller from themselves if they do. + if !self.is_capture_search_needed(slots.len()) { + trace!("asked for slots unnecessarily, trying fast path"); + let m = self.search(cache, input)?; + copy_match_to_slots(m, slots); + return Some(m.pattern()); + } + // If the onepass DFA is available for this search (which only happens + // when it's anchored), then skip running a fallible DFA. The onepass + // DFA isn't as fast as a full or lazy DFA, but it is typically quite + // a bit faster than the backtracker or the PikeVM. So it isn't as + // advantageous to try and do a full/lazy DFA scan first. + // + // We still theorize that it's better to do a full/lazy DFA scan, even + // when it's anchored, because it's usually much faster and permits us + // to say "no match" much more quickly. This does hurt the case of, + // say, parsing each line in a log file into capture groups, because + // in that case, the line always matches. So the lazy DFA scan is + // usually just wasted work. But, the lazy DFA is usually quite fast + // and doesn't cost too much here. + if self.onepass.get(&input).is_some() { + return self.search_slots_nofail(cache, &input, slots); + } + let m = match self.try_search_mayfail(cache, input) { + Some(Ok(Some(m))) => m, + Some(Ok(None)) => return None, + Some(Err(_err)) => { + trace!("fast capture search failed: {}", _err); + return self.search_slots_nofail(cache, input, slots); + } + None => { + return self.search_slots_nofail(cache, input, slots); + } + }; + // At this point, now that we've found the bounds of the + // match, we need to re-run something that can resolve + // capturing groups. But we only need to run on it on the + // match bounds and not the entire haystack. + trace!( + "match found at {}..{} in capture search, \ + using another engine to find captures", + m.start(), + m.end(), + ); + let input = input + .clone() + .span(m.start()..m.end()) + .anchored(Anchored::Pattern(m.pattern())); + Some( + self.search_slots_nofail(cache, &input, slots) + .expect("should find a match"), + ) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + if let Some(e) = self.dfa.get(input) { + trace!( + "using full DFA for overlapping search at {:?}", + input.get_span() + ); + let _err = match e.try_which_overlapping_matches(input, patset) { + Ok(()) => return, + Err(err) => err, + }; + trace!("fast overlapping search failed: {}", _err); + } else if let Some(e) = self.hybrid.get(input) { + trace!( + "using lazy DFA for overlapping search at {:?}", + input.get_span() + ); + let _err = match e.try_which_overlapping_matches( + &mut cache.hybrid, + input, + patset, + ) { + Ok(()) => { + return; + } + Err(err) => err, + }; + trace!("fast overlapping search failed: {}", _err); + } + trace!( + "using PikeVM for overlapping search at {:?}", + input.get_span() + ); + let e = self.pikevm.get(); + e.which_overlapping_matches(&mut cache.pikevm, input, patset) + } +} + +#[derive(Debug)] +struct ReverseAnchored { + core: Core, +} + +impl ReverseAnchored { + fn new(core: Core) -> Result { + if !core.info.is_always_anchored_end() { + debug!( + "skipping reverse anchored optimization because \ + the regex is not always anchored at the end" + ); + return Err(core); + } + // Note that the caller can still request an anchored search even when + // the regex isn't anchored at the start. We detect that case in the + // search routines below and just fallback to the core engine. This + // is fine because both searches are anchored. It's just a matter of + // picking one. Falling back to the core engine is a little simpler, + // since if we used the reverse anchored approach, we'd have to add an + // extra check to ensure the match reported starts at the place where + // the caller requested the search to start. + if core.info.is_always_anchored_start() { + debug!( + "skipping reverse anchored optimization because \ + the regex is also anchored at the start" + ); + return Err(core); + } + // Only DFAs can do reverse searches (currently), so we need one of + // them in order to do this optimization. It's possible (although + // pretty unlikely) that we have neither and need to give up. + if !core.hybrid.is_some() && !core.dfa.is_some() { + debug!( + "skipping reverse anchored optimization because \ + we don't have a lazy DFA or a full DFA" + ); + return Err(core); + } + Ok(ReverseAnchored { core }) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_half_anchored_rev( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, RetryFailError> { + // We of course always want an anchored search. In theory, the + // underlying regex engines should automatically enable anchored + // searches since the regex is itself anchored, but this more clearly + // expresses intent and is always correct. + let input = input.clone().anchored(Anchored::Yes); + if let Some(e) = self.core.dfa.get(&input) { + trace!( + "using full DFA for reverse anchored search at {:?}", + input.get_span() + ); + e.try_search_half_rev(&input) + } else if let Some(e) = self.core.hybrid.get(&input) { + trace!( + "using lazy DFA for reverse anchored search at {:?}", + input.get_span() + ); + e.try_search_half_rev(&mut cache.hybrid, &input) + } else { + unreachable!("ReverseAnchored always has a DFA") + } + } +} + +// Note that in this impl, we don't check that 'input.end() == +// input.haystack().len()'. In particular, when that condition is false, a +// match is always impossible because we know that the regex is always anchored +// at the end (or else 'ReverseAnchored' won't be built). We don't check that +// here because the 'Regex' wrapper actually does that for us in all cases. +// Thus, in this impl, we can actually assume that the end position in 'input' +// is equivalent to the length of the haystack. +impl Strategy for ReverseAnchored { + #[cfg_attr(feature = "perf-inline", inline(always))] + fn group_info(&self) -> &GroupInfo { + self.core.group_info() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn create_cache(&self) -> Cache { + self.core.create_cache() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn reset_cache(&self, cache: &mut Cache) { + self.core.reset_cache(cache); + } + + fn is_accelerated(&self) -> bool { + // Since this is anchored at the end, a reverse anchored search is + // almost certainly guaranteed to result in a much faster search than + // a standard forward search. + true + } + + fn memory_usage(&self) -> usize { + self.core.memory_usage() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search(cache, input); + } + match self.try_search_half_anchored_rev(cache, input) { + Err(_err) => { + trace!("fast reverse anchored search failed: {}", _err); + self.core.search_nofail(cache, input) + } + Ok(None) => None, + Ok(Some(hm)) => { + Some(Match::new(hm.pattern(), hm.offset()..input.end())) + } + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_half( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search_half(cache, input); + } + match self.try_search_half_anchored_rev(cache, input) { + Err(_err) => { + trace!("fast reverse anchored search failed: {}", _err); + self.core.search_half_nofail(cache, input) + } + Ok(None) => None, + Ok(Some(hm)) => { + // Careful here! 'try_search_half' is a *forward* search that + // only cares about the *end* position of a match. But + // 'hm.offset()' is actually the start of the match. So we + // actually just throw that away here and, since we know we + // have a match, return the only possible position at which a + // match can occur: input.end(). + Some(HalfMatch::new(hm.pattern(), input.end())) + } + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool { + if input.get_anchored().is_anchored() { + return self.core.is_match(cache, input); + } + match self.try_search_half_anchored_rev(cache, input) { + Err(_err) => { + trace!("fast reverse anchored search failed: {}", _err); + self.core.is_match_nofail(cache, input) + } + Ok(None) => false, + Ok(Some(_)) => true, + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search_slots(cache, input, slots); + } + match self.try_search_half_anchored_rev(cache, input) { + Err(_err) => { + trace!("fast reverse anchored search failed: {}", _err); + self.core.search_slots_nofail(cache, input, slots) + } + Ok(None) => None, + Ok(Some(hm)) => { + if !self.core.is_capture_search_needed(slots.len()) { + trace!("asked for slots unnecessarily, skipping captures"); + let m = Match::new(hm.pattern(), hm.offset()..input.end()); + copy_match_to_slots(m, slots); + return Some(m.pattern()); + } + let start = hm.offset(); + let input = input + .clone() + .span(start..input.end()) + .anchored(Anchored::Pattern(hm.pattern())); + self.core.search_slots_nofail(cache, &input, slots) + } + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + // It seems like this could probably benefit from a reverse anchored + // optimization, perhaps by doing an overlapping reverse search (which + // the DFAs do support). I haven't given it much thought though, and + // I'm currently focus more on the single pattern case. + self.core.which_overlapping_matches(cache, input, patset) + } +} + +#[derive(Debug)] +struct ReverseSuffix { + core: Core, + pre: Prefilter, +} + +impl ReverseSuffix { + fn new(core: Core, hirs: &[&Hir]) -> Result { + if !core.info.config().get_auto_prefilter() { + debug!( + "skipping reverse suffix optimization because \ + automatic prefilters are disabled" + ); + return Err(core); + } + // Like the reverse inner optimization, we don't do this for regexes + // that are always anchored. It could lead to scanning too much, but + // could say "no match" much more quickly than running the regex + // engine if the initial literal scan doesn't match. With that said, + // the reverse suffix optimization has lower overhead, since it only + // requires a reverse scan after a literal match to confirm or reject + // the match. (Although, in the case of confirmation, it then needs to + // do another forward scan to find the end position.) + // + // Note that the caller can still request an anchored search even + // when the regex isn't anchored. We detect that case in the search + // routines below and just fallback to the core engine. Currently this + // optimization assumes all searches are unanchored, so if we do want + // to enable this optimization for anchored searches, it will need a + // little work to support it. + if core.info.is_always_anchored_start() { + debug!( + "skipping reverse suffix optimization because \ + the regex is always anchored at the start", + ); + return Err(core); + } + // Only DFAs can do reverse searches (currently), so we need one of + // them in order to do this optimization. It's possible (although + // pretty unlikely) that we have neither and need to give up. + if !core.hybrid.is_some() && !core.dfa.is_some() { + debug!( + "skipping reverse suffix optimization because \ + we don't have a lazy DFA or a full DFA" + ); + return Err(core); + } + if core.pre.as_ref().map_or(false, |p| p.is_fast()) { + debug!( + "skipping reverse suffix optimization because \ + we already have a prefilter that we think is fast" + ); + return Err(core); + } + let kind = core.info.config().get_match_kind(); + let suffixes = crate::util::prefilter::suffixes(kind, hirs); + let lcs = match suffixes.longest_common_suffix() { + None => { + debug!( + "skipping reverse suffix optimization because \ + a longest common suffix could not be found", + ); + return Err(core); + } + Some(lcs) if lcs.is_empty() => { + debug!( + "skipping reverse suffix optimization because \ + the longest common suffix is the empty string", + ); + return Err(core); + } + Some(lcs) => lcs, + }; + let pre = match Prefilter::new(kind, &[lcs]) { + Some(pre) => pre, + None => { + debug!( + "skipping reverse suffix optimization because \ + a prefilter could not be constructed from the \ + longest common suffix", + ); + return Err(core); + } + }; + if !pre.is_fast() { + debug!( + "skipping reverse suffix optimization because \ + while we have a suffix prefilter, it is not \ + believed to be 'fast'" + ); + return Err(core); + } + Ok(ReverseSuffix { core, pre }) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_half_start( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, RetryError> { + let mut span = input.get_span(); + let mut min_start = 0; + loop { + let litmatch = match self.pre.find(input.haystack(), span) { + None => return Ok(None), + Some(span) => span, + }; + trace!("reverse suffix scan found suffix match at {:?}", litmatch); + let revinput = input + .clone() + .anchored(Anchored::Yes) + .span(input.start()..litmatch.end); + match self + .try_search_half_rev_limited(cache, &revinput, min_start)? + { + None => { + if span.start >= span.end { + break; + } + span.start = litmatch.start.checked_add(1).unwrap(); + } + Some(hm) => return Ok(Some(hm)), + } + min_start = litmatch.end; + } + Ok(None) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_half_fwd( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, RetryFailError> { + if let Some(e) = self.core.dfa.get(&input) { + trace!( + "using full DFA for forward reverse suffix search at {:?}", + input.get_span() + ); + e.try_search_half_fwd(&input) + } else if let Some(e) = self.core.hybrid.get(&input) { + trace!( + "using lazy DFA for forward reverse suffix search at {:?}", + input.get_span() + ); + e.try_search_half_fwd(&mut cache.hybrid, &input) + } else { + unreachable!("ReverseSuffix always has a DFA") + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_half_rev_limited( + &self, + cache: &mut Cache, + input: &Input<'_>, + min_start: usize, + ) -> Result, RetryError> { + if let Some(e) = self.core.dfa.get(&input) { + trace!( + "using full DFA for reverse suffix search at {:?}, \ + but will be stopped at {} to avoid quadratic behavior", + input.get_span(), + min_start, + ); + e.try_search_half_rev_limited(&input, min_start) + } else if let Some(e) = self.core.hybrid.get(&input) { + trace!( + "using lazy DFA for reverse suffix search at {:?}, \ + but will be stopped at {} to avoid quadratic behavior", + input.get_span(), + min_start, + ); + e.try_search_half_rev_limited(&mut cache.hybrid, &input, min_start) + } else { + unreachable!("ReverseSuffix always has a DFA") + } + } +} + +impl Strategy for ReverseSuffix { + #[cfg_attr(feature = "perf-inline", inline(always))] + fn group_info(&self) -> &GroupInfo { + self.core.group_info() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn create_cache(&self) -> Cache { + self.core.create_cache() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn reset_cache(&self, cache: &mut Cache) { + self.core.reset_cache(cache); + } + + fn is_accelerated(&self) -> bool { + self.pre.is_fast() + } + + fn memory_usage(&self) -> usize { + self.core.memory_usage() + self.pre.memory_usage() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search(cache, input); + } + match self.try_search_half_start(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!("reverse suffix optimization failed: {}", _err); + self.core.search(cache, input) + } + Err(RetryError::Fail(_err)) => { + trace!("reverse suffix reverse fast search failed: {}", _err); + self.core.search_nofail(cache, input) + } + Ok(None) => None, + Ok(Some(hm_start)) => { + let fwdinput = input + .clone() + .anchored(Anchored::Pattern(hm_start.pattern())) + .span(hm_start.offset()..input.end()); + match self.try_search_half_fwd(cache, &fwdinput) { + Err(_err) => { + trace!( + "reverse suffix forward fast search failed: {}", + _err + ); + self.core.search_nofail(cache, input) + } + Ok(None) => { + unreachable!( + "suffix match plus reverse match implies \ + there must be a match", + ) + } + Ok(Some(hm_end)) => Some(Match::new( + hm_start.pattern(), + hm_start.offset()..hm_end.offset(), + )), + } + } + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_half( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search_half(cache, input); + } + match self.try_search_half_start(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!("reverse suffix half optimization failed: {}", _err); + self.core.search_half(cache, input) + } + Err(RetryError::Fail(_err)) => { + trace!( + "reverse suffix reverse fast half search failed: {}", + _err + ); + self.core.search_half_nofail(cache, input) + } + Ok(None) => None, + Ok(Some(hm_start)) => { + // This is a bit subtle. It is tempting to just stop searching + // at this point and return a half-match with an offset + // corresponding to where the suffix was found. But the suffix + // match does not necessarily correspond to the end of the + // proper leftmost-first match. Consider /[a-z]+ing/ against + // 'tingling'. The first suffix match is the first 'ing', and + // the /[a-z]+/ matches the 't'. So if we stopped here, then + // we'd report 'ting' as the match. But 'tingling' is the + // correct match because of greediness. + let fwdinput = input + .clone() + .anchored(Anchored::Pattern(hm_start.pattern())) + .span(hm_start.offset()..input.end()); + match self.try_search_half_fwd(cache, &fwdinput) { + Err(_err) => { + trace!( + "reverse suffix forward fast search failed: {}", + _err + ); + self.core.search_half_nofail(cache, input) + } + Ok(None) => { + unreachable!( + "suffix match plus reverse match implies \ + there must be a match", + ) + } + Ok(Some(hm_end)) => Some(hm_end), + } + } + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool { + if input.get_anchored().is_anchored() { + return self.core.is_match(cache, input); + } + match self.try_search_half_start(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!("reverse suffix half optimization failed: {}", _err); + self.core.is_match_nofail(cache, input) + } + Err(RetryError::Fail(_err)) => { + trace!( + "reverse suffix reverse fast half search failed: {}", + _err + ); + self.core.is_match_nofail(cache, input) + } + Ok(None) => false, + Ok(Some(_)) => true, + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search_slots(cache, input, slots); + } + if !self.core.is_capture_search_needed(slots.len()) { + trace!("asked for slots unnecessarily, trying fast path"); + let m = self.search(cache, input)?; + copy_match_to_slots(m, slots); + return Some(m.pattern()); + } + let hm_start = match self.try_search_half_start(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!( + "reverse suffix captures optimization failed: {}", + _err + ); + return self.core.search_slots(cache, input, slots); + } + Err(RetryError::Fail(_err)) => { + trace!( + "reverse suffix reverse fast captures search failed: {}", + _err + ); + return self.core.search_slots_nofail(cache, input, slots); + } + Ok(None) => return None, + Ok(Some(hm_start)) => hm_start, + }; + trace!( + "match found at {}..{} in capture search, \ + using another engine to find captures", + hm_start.offset(), + input.end(), + ); + let start = hm_start.offset(); + let input = input + .clone() + .span(start..input.end()) + .anchored(Anchored::Pattern(hm_start.pattern())); + self.core.search_slots_nofail(cache, &input, slots) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + self.core.which_overlapping_matches(cache, input, patset) + } +} + +#[derive(Debug)] +struct ReverseInner { + core: Core, + preinner: Prefilter, + nfarev: NFA, + hybrid: wrappers::ReverseHybrid, + dfa: wrappers::ReverseDFA, +} + +impl ReverseInner { + fn new(core: Core, hirs: &[&Hir]) -> Result { + if !core.info.config().get_auto_prefilter() { + debug!( + "skipping reverse inner optimization because \ + automatic prefilters are disabled" + ); + return Err(core); + } + // Currently we hard-code the assumption of leftmost-first match + // semantics. This isn't a huge deal because 'all' semantics tend to + // only be used for forward overlapping searches with multiple regexes, + // and this optimization only supports a single pattern at the moment. + if core.info.config().get_match_kind() != MatchKind::LeftmostFirst { + debug!( + "skipping reverse inner optimization because \ + match kind is {:?} but this only supports leftmost-first", + core.info.config().get_match_kind(), + ); + return Err(core); + } + // It's likely that a reverse inner scan has too much overhead for it + // to be worth it when the regex is anchored at the start. It is + // possible for it to be quite a bit faster if the initial literal + // scan fails to detect a match, in which case, we can say "no match" + // very quickly. But this could be undesirable, e.g., scanning too far + // or when the literal scan matches. If it matches, then confirming the + // match requires a reverse scan followed by a forward scan to confirm + // or reject, which is a fair bit of work. + // + // Note that the caller can still request an anchored search even + // when the regex isn't anchored. We detect that case in the search + // routines below and just fallback to the core engine. Currently this + // optimization assumes all searches are unanchored, so if we do want + // to enable this optimization for anchored searches, it will need a + // little work to support it. + if core.info.is_always_anchored_start() { + debug!( + "skipping reverse inner optimization because \ + the regex is always anchored at the start", + ); + return Err(core); + } + // Only DFAs can do reverse searches (currently), so we need one of + // them in order to do this optimization. It's possible (although + // pretty unlikely) that we have neither and need to give up. + if !core.hybrid.is_some() && !core.dfa.is_some() { + debug!( + "skipping reverse inner optimization because \ + we don't have a lazy DFA or a full DFA" + ); + return Err(core); + } + if core.pre.as_ref().map_or(false, |p| p.is_fast()) { + debug!( + "skipping reverse inner optimization because \ + we already have a prefilter that we think is fast" + ); + return Err(core); + } else if core.pre.is_some() { + debug!( + "core engine has a prefix prefilter, but it is \ + probably not fast, so continuing with attempt to \ + use reverse inner prefilter" + ); + } + let (concat_prefix, preinner) = match reverse_inner::extract(hirs) { + Some(x) => x, + // N.B. the 'extract' function emits debug messages explaining + // why we bailed out here. + None => return Err(core), + }; + debug!("building reverse NFA for prefix before inner literal"); + let mut lookm = LookMatcher::new(); + lookm.set_line_terminator(core.info.config().get_line_terminator()); + let thompson_config = thompson::Config::new() + .reverse(true) + .utf8(core.info.config().get_utf8_empty()) + .nfa_size_limit(core.info.config().get_nfa_size_limit()) + .shrink(false) + .which_captures(WhichCaptures::None) + .look_matcher(lookm); + let result = thompson::Compiler::new() + .configure(thompson_config) + .build_from_hir(&concat_prefix); + let nfarev = match result { + Ok(nfarev) => nfarev, + Err(_err) => { + debug!( + "skipping reverse inner optimization because the \ + reverse NFA failed to build: {}", + _err, + ); + return Err(core); + } + }; + debug!("building reverse DFA for prefix before inner literal"); + let dfa = if !core.info.config().get_dfa() { + wrappers::ReverseDFA::none() + } else { + wrappers::ReverseDFA::new(&core.info, &nfarev) + }; + let hybrid = if !core.info.config().get_hybrid() { + wrappers::ReverseHybrid::none() + } else if dfa.is_some() { + debug!( + "skipping lazy DFA for reverse inner optimization \ + because we have a full DFA" + ); + wrappers::ReverseHybrid::none() + } else { + wrappers::ReverseHybrid::new(&core.info, &nfarev) + }; + Ok(ReverseInner { core, preinner, nfarev, hybrid, dfa }) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_full( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, RetryError> { + let mut span = input.get_span(); + let mut min_match_start = 0; + let mut min_pre_start = 0; + loop { + let litmatch = match self.preinner.find(input.haystack(), span) { + None => return Ok(None), + Some(span) => span, + }; + if litmatch.start < min_pre_start { + trace!( + "found inner prefilter match at {:?}, which starts \ + before the end of the last forward scan at {}, \ + quitting to avoid quadratic behavior", + litmatch, + min_pre_start, + ); + return Err(RetryError::Quadratic(RetryQuadraticError::new())); + } + trace!("reverse inner scan found inner match at {:?}", litmatch); + let revinput = input + .clone() + .anchored(Anchored::Yes) + .span(input.start()..litmatch.start); + // Note that in addition to the literal search above scanning past + // our minimum start point, this routine can also return an error + // as a result of detecting possible quadratic behavior if the + // reverse scan goes past the minimum start point. That is, the + // literal search might not, but the reverse regex search for the + // prefix might! + match self.try_search_half_rev_limited( + cache, + &revinput, + min_match_start, + )? { + None => { + if span.start >= span.end { + break; + } + span.start = litmatch.start.checked_add(1).unwrap(); + } + Some(hm_start) => { + let fwdinput = input + .clone() + .anchored(Anchored::Pattern(hm_start.pattern())) + .span(hm_start.offset()..input.end()); + match self.try_search_half_fwd_stopat(cache, &fwdinput)? { + Err(stopat) => { + min_pre_start = stopat; + span.start = + litmatch.start.checked_add(1).unwrap(); + } + Ok(hm_end) => { + return Ok(Some(Match::new( + hm_start.pattern(), + hm_start.offset()..hm_end.offset(), + ))) + } + } + } + } + min_match_start = litmatch.end; + } + Ok(None) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_half_fwd_stopat( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Result, RetryFailError> { + if let Some(e) = self.core.dfa.get(&input) { + trace!( + "using full DFA for forward reverse inner search at {:?}", + input.get_span() + ); + e.try_search_half_fwd_stopat(&input) + } else if let Some(e) = self.core.hybrid.get(&input) { + trace!( + "using lazy DFA for forward reverse inner search at {:?}", + input.get_span() + ); + e.try_search_half_fwd_stopat(&mut cache.hybrid, &input) + } else { + unreachable!("ReverseInner always has a DFA") + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn try_search_half_rev_limited( + &self, + cache: &mut Cache, + input: &Input<'_>, + min_start: usize, + ) -> Result, RetryError> { + if let Some(e) = self.dfa.get(&input) { + trace!( + "using full DFA for reverse inner search at {:?}, \ + but will be stopped at {} to avoid quadratic behavior", + input.get_span(), + min_start, + ); + e.try_search_half_rev_limited(&input, min_start) + } else if let Some(e) = self.hybrid.get(&input) { + trace!( + "using lazy DFA for reverse inner search at {:?}, \ + but will be stopped at {} to avoid quadratic behavior", + input.get_span(), + min_start, + ); + e.try_search_half_rev_limited( + &mut cache.revhybrid, + &input, + min_start, + ) + } else { + unreachable!("ReverseInner always has a DFA") + } + } +} + +impl Strategy for ReverseInner { + #[cfg_attr(feature = "perf-inline", inline(always))] + fn group_info(&self) -> &GroupInfo { + self.core.group_info() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn create_cache(&self) -> Cache { + let mut cache = self.core.create_cache(); + cache.revhybrid = self.hybrid.create_cache(); + cache + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn reset_cache(&self, cache: &mut Cache) { + self.core.reset_cache(cache); + cache.revhybrid.reset(&self.hybrid); + } + + fn is_accelerated(&self) -> bool { + self.preinner.is_fast() + } + + fn memory_usage(&self) -> usize { + self.core.memory_usage() + + self.preinner.memory_usage() + + self.nfarev.memory_usage() + + self.dfa.memory_usage() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search(&self, cache: &mut Cache, input: &Input<'_>) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search(cache, input); + } + match self.try_search_full(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!("reverse inner optimization failed: {}", _err); + self.core.search(cache, input) + } + Err(RetryError::Fail(_err)) => { + trace!("reverse inner fast search failed: {}", _err); + self.core.search_nofail(cache, input) + } + Ok(matornot) => matornot, + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_half( + &self, + cache: &mut Cache, + input: &Input<'_>, + ) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search_half(cache, input); + } + match self.try_search_full(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!("reverse inner half optimization failed: {}", _err); + self.core.search_half(cache, input) + } + Err(RetryError::Fail(_err)) => { + trace!("reverse inner fast half search failed: {}", _err); + self.core.search_half_nofail(cache, input) + } + Ok(None) => None, + Ok(Some(m)) => Some(HalfMatch::new(m.pattern(), m.end())), + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_match(&self, cache: &mut Cache, input: &Input<'_>) -> bool { + if input.get_anchored().is_anchored() { + return self.core.is_match(cache, input); + } + match self.try_search_full(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!("reverse inner half optimization failed: {}", _err); + self.core.is_match_nofail(cache, input) + } + Err(RetryError::Fail(_err)) => { + trace!("reverse inner fast half search failed: {}", _err); + self.core.is_match_nofail(cache, input) + } + Ok(None) => false, + Ok(Some(_)) => true, + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + if input.get_anchored().is_anchored() { + return self.core.search_slots(cache, input, slots); + } + if !self.core.is_capture_search_needed(slots.len()) { + trace!("asked for slots unnecessarily, trying fast path"); + let m = self.search(cache, input)?; + copy_match_to_slots(m, slots); + return Some(m.pattern()); + } + let m = match self.try_search_full(cache, input) { + Err(RetryError::Quadratic(_err)) => { + trace!("reverse inner captures optimization failed: {}", _err); + return self.core.search_slots(cache, input, slots); + } + Err(RetryError::Fail(_err)) => { + trace!("reverse inner fast captures search failed: {}", _err); + return self.core.search_slots_nofail(cache, input, slots); + } + Ok(None) => return None, + Ok(Some(m)) => m, + }; + trace!( + "match found at {}..{} in capture search, \ + using another engine to find captures", + m.start(), + m.end(), + ); + let input = input + .clone() + .span(m.start()..m.end()) + .anchored(Anchored::Pattern(m.pattern())); + self.core.search_slots_nofail(cache, &input, slots) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + self.core.which_overlapping_matches(cache, input, patset) + } +} + +/// Copies the offsets in the given match to the corresponding positions in +/// `slots`. +/// +/// In effect, this sets the slots corresponding to the implicit group for the +/// pattern in the given match. If the indices for the corresponding slots do +/// not exist, then no slots are set. +/// +/// This is useful when the caller provides slots (or captures), but you use a +/// regex engine that doesn't operate on slots (like a lazy DFA). This function +/// lets you map the match you get back to the slots provided by the caller. +#[cfg_attr(feature = "perf-inline", inline(always))] +fn copy_match_to_slots(m: Match, slots: &mut [Option]) { + let slot_start = m.pattern().as_usize() * 2; + let slot_end = slot_start + 1; + if let Some(slot) = slots.get_mut(slot_start) { + *slot = NonMaxUsize::new(m.start()); + } + if let Some(slot) = slots.get_mut(slot_end) { + *slot = NonMaxUsize::new(m.end()); + } +} diff --git a/vendor/regex-automata/src/meta/wrappers.rs b/vendor/regex-automata/src/meta/wrappers.rs new file mode 100644 index 0000000..6cb19ba --- /dev/null +++ b/vendor/regex-automata/src/meta/wrappers.rs @@ -0,0 +1,1351 @@ +/*! +This module contains a boat load of wrappers around each of our internal regex +engines. They encapsulate a few things: + +1. The wrappers manage the conditional existence of the regex engine. Namely, +the PikeVM is the only required regex engine. The rest are optional. These +wrappers present a uniform API regardless of which engines are available. And +availability might be determined by compile time features or by dynamic +configuration via `meta::Config`. Encapsulating the conditional compilation +features is in particular a huge simplification for the higher level code that +composes these engines. +2. The wrappers manage construction of each engine, including skipping it if +the engine is unavailable or configured to not be used. +3. The wrappers manage whether an engine *can* be used for a particular +search configuration. For example, `BoundedBacktracker::get` only returns a +backtracking engine when the haystack is bigger than the maximum supported +length. The wrappers also sometimes take a position on when an engine *ought* +to be used, but only in cases where the logic is extremely local to the engine +itself. Otherwise, things like "choose between the backtracker and the one-pass +DFA" are managed by the higher level meta strategy code. + +There are also corresponding wrappers for the various `Cache` types for each +regex engine that needs them. If an engine is unavailable or not used, then a +cache for it will *not* actually be allocated. +*/ + +use alloc::vec::Vec; + +use crate::{ + meta::{ + error::{BuildError, RetryError, RetryFailError}, + regex::RegexInfo, + }, + nfa::thompson::{pikevm, NFA}, + util::{prefilter::Prefilter, primitives::NonMaxUsize}, + HalfMatch, Input, Match, MatchKind, PatternID, PatternSet, +}; + +#[cfg(feature = "dfa-build")] +use crate::dfa; +#[cfg(feature = "dfa-onepass")] +use crate::dfa::onepass; +#[cfg(feature = "hybrid")] +use crate::hybrid; +#[cfg(feature = "nfa-backtrack")] +use crate::nfa::thompson::backtrack; + +#[derive(Debug)] +pub(crate) struct PikeVM(PikeVMEngine); + +impl PikeVM { + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + ) -> Result { + PikeVMEngine::new(info, pre, nfa).map(PikeVM) + } + + pub(crate) fn create_cache(&self) -> PikeVMCache { + PikeVMCache::new(self) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get(&self) -> &PikeVMEngine { + &self.0 + } +} + +#[derive(Debug)] +pub(crate) struct PikeVMEngine(pikevm::PikeVM); + +impl PikeVMEngine { + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + ) -> Result { + let pikevm_config = pikevm::Config::new() + .match_kind(info.config().get_match_kind()) + .prefilter(pre); + let engine = pikevm::Builder::new() + .configure(pikevm_config) + .build_from_nfa(nfa.clone()) + .map_err(BuildError::nfa)?; + debug!("PikeVM built"); + Ok(PikeVMEngine(engine)) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn is_match( + &self, + cache: &mut PikeVMCache, + input: &Input<'_>, + ) -> bool { + self.0.is_match(cache.0.as_mut().unwrap(), input.clone()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn search_slots( + &self, + cache: &mut PikeVMCache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + self.0.search_slots(cache.0.as_mut().unwrap(), input, slots) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn which_overlapping_matches( + &self, + cache: &mut PikeVMCache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + self.0.which_overlapping_matches( + cache.0.as_mut().unwrap(), + input, + patset, + ) + } +} + +#[derive(Clone, Debug)] +pub(crate) struct PikeVMCache(Option); + +impl PikeVMCache { + pub(crate) fn none() -> PikeVMCache { + PikeVMCache(None) + } + + pub(crate) fn new(builder: &PikeVM) -> PikeVMCache { + PikeVMCache(Some(builder.get().0.create_cache())) + } + + pub(crate) fn reset(&mut self, builder: &PikeVM) { + self.0.as_mut().unwrap().reset(&builder.get().0); + } + + pub(crate) fn memory_usage(&self) -> usize { + self.0.as_ref().map_or(0, |c| c.memory_usage()) + } +} + +#[derive(Debug)] +pub(crate) struct BoundedBacktracker(Option); + +impl BoundedBacktracker { + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + ) -> Result { + BoundedBacktrackerEngine::new(info, pre, nfa).map(BoundedBacktracker) + } + + pub(crate) fn create_cache(&self) -> BoundedBacktrackerCache { + BoundedBacktrackerCache::new(self) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get( + &self, + input: &Input<'_>, + ) -> Option<&BoundedBacktrackerEngine> { + let engine = self.0.as_ref()?; + // It is difficult to make the backtracker give up early if it is + // guaranteed to eventually wind up in a match state. This is because + // of the greedy nature of a backtracker: it just blindly mushes + // forward. Every other regex engine is able to give up more quickly, + // so even if the backtracker might be able to zip through faster than + // (say) the PikeVM, we prefer the theoretical benefit that some other + // engine might be able to scan much less of the haystack than the + // backtracker. + // + // Now, if the haystack is really short already, then we allow the + // backtracker to run. (This hasn't been litigated quantitatively with + // benchmarks. Just a hunch.) + if input.get_earliest() && input.haystack().len() > 128 { + return None; + } + // If the backtracker is just going to return an error because the + // haystack is too long, then obviously do not use it. + if input.get_span().len() > engine.max_haystack_len() { + return None; + } + Some(engine) + } +} + +#[derive(Debug)] +pub(crate) struct BoundedBacktrackerEngine( + #[cfg(feature = "nfa-backtrack")] backtrack::BoundedBacktracker, + #[cfg(not(feature = "nfa-backtrack"))] (), +); + +impl BoundedBacktrackerEngine { + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + ) -> Result, BuildError> { + #[cfg(feature = "nfa-backtrack")] + { + if !info.config().get_backtrack() + || info.config().get_match_kind() != MatchKind::LeftmostFirst + { + return Ok(None); + } + let backtrack_config = backtrack::Config::new().prefilter(pre); + let engine = backtrack::Builder::new() + .configure(backtrack_config) + .build_from_nfa(nfa.clone()) + .map_err(BuildError::nfa)?; + debug!( + "BoundedBacktracker built (max haystack length: {:?})", + engine.max_haystack_len() + ); + Ok(Some(BoundedBacktrackerEngine(engine))) + } + #[cfg(not(feature = "nfa-backtrack"))] + { + Ok(None) + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn is_match( + &self, + cache: &mut BoundedBacktrackerCache, + input: &Input<'_>, + ) -> bool { + #[cfg(feature = "nfa-backtrack")] + { + // OK because we only permit access to this engine when we know + // the haystack is short enough for the backtracker to run without + // reporting an error. + self.0 + .try_is_match(cache.0.as_mut().unwrap(), input.clone()) + .unwrap() + } + #[cfg(not(feature = "nfa-backtrack"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn search_slots( + &self, + cache: &mut BoundedBacktrackerCache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + #[cfg(feature = "nfa-backtrack")] + { + // OK because we only permit access to this engine when we know + // the haystack is short enough for the backtracker to run without + // reporting an error. + self.0 + .try_search_slots(cache.0.as_mut().unwrap(), input, slots) + .unwrap() + } + #[cfg(not(feature = "nfa-backtrack"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn max_haystack_len(&self) -> usize { + #[cfg(feature = "nfa-backtrack")] + { + self.0.max_haystack_len() + } + #[cfg(not(feature = "nfa-backtrack"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct BoundedBacktrackerCache( + #[cfg(feature = "nfa-backtrack")] Option, + #[cfg(not(feature = "nfa-backtrack"))] (), +); + +impl BoundedBacktrackerCache { + pub(crate) fn none() -> BoundedBacktrackerCache { + #[cfg(feature = "nfa-backtrack")] + { + BoundedBacktrackerCache(None) + } + #[cfg(not(feature = "nfa-backtrack"))] + { + BoundedBacktrackerCache(()) + } + } + + pub(crate) fn new( + builder: &BoundedBacktracker, + ) -> BoundedBacktrackerCache { + #[cfg(feature = "nfa-backtrack")] + { + BoundedBacktrackerCache( + builder.0.as_ref().map(|e| e.0.create_cache()), + ) + } + #[cfg(not(feature = "nfa-backtrack"))] + { + BoundedBacktrackerCache(()) + } + } + + pub(crate) fn reset(&mut self, builder: &BoundedBacktracker) { + #[cfg(feature = "nfa-backtrack")] + if let Some(ref e) = builder.0 { + self.0.as_mut().unwrap().reset(&e.0); + } + } + + pub(crate) fn memory_usage(&self) -> usize { + #[cfg(feature = "nfa-backtrack")] + { + self.0.as_ref().map_or(0, |c| c.memory_usage()) + } + #[cfg(not(feature = "nfa-backtrack"))] + { + 0 + } + } +} + +#[derive(Debug)] +pub(crate) struct OnePass(Option); + +impl OnePass { + pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> OnePass { + OnePass(OnePassEngine::new(info, nfa)) + } + + pub(crate) fn create_cache(&self) -> OnePassCache { + OnePassCache::new(self) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get(&self, input: &Input<'_>) -> Option<&OnePassEngine> { + let engine = self.0.as_ref()?; + if !input.get_anchored().is_anchored() + && !engine.get_nfa().is_always_start_anchored() + { + return None; + } + Some(engine) + } + + pub(crate) fn memory_usage(&self) -> usize { + self.0.as_ref().map_or(0, |e| e.memory_usage()) + } +} + +#[derive(Debug)] +pub(crate) struct OnePassEngine( + #[cfg(feature = "dfa-onepass")] onepass::DFA, + #[cfg(not(feature = "dfa-onepass"))] (), +); + +impl OnePassEngine { + pub(crate) fn new(info: &RegexInfo, nfa: &NFA) -> Option { + #[cfg(feature = "dfa-onepass")] + { + if !info.config().get_onepass() { + return None; + } + // In order to even attempt building a one-pass DFA, we require + // that we either have at least one explicit capturing group or + // there's a Unicode word boundary somewhere. If we don't have + // either of these things, then the lazy DFA will almost certainly + // be useable and be much faster. The only case where it might + // not is if the lazy DFA isn't utilizing its cache effectively, + // but in those cases, the underlying regex is almost certainly + // not one-pass or is too big to fit within the current one-pass + // implementation limits. + if info.props_union().explicit_captures_len() == 0 + && !info.props_union().look_set().contains_word_unicode() + { + debug!("not building OnePass because it isn't worth it"); + return None; + } + let onepass_config = onepass::Config::new() + .match_kind(info.config().get_match_kind()) + // Like for the lazy DFA, we unconditionally enable this + // because it doesn't cost much and makes the API more + // flexible. + .starts_for_each_pattern(true) + .byte_classes(info.config().get_byte_classes()) + .size_limit(info.config().get_onepass_size_limit()); + let result = onepass::Builder::new() + .configure(onepass_config) + .build_from_nfa(nfa.clone()); + let engine = match result { + Ok(engine) => engine, + Err(_err) => { + debug!("OnePass failed to build: {}", _err); + return None; + } + }; + debug!("OnePass built, {} bytes", engine.memory_usage()); + Some(OnePassEngine(engine)) + } + #[cfg(not(feature = "dfa-onepass"))] + { + None + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn search_slots( + &self, + cache: &mut OnePassCache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + #[cfg(feature = "dfa-onepass")] + { + // OK because we only permit getting a OnePassEngine when we know + // the search is anchored and thus an error cannot occur. + self.0 + .try_search_slots(cache.0.as_mut().unwrap(), input, slots) + .unwrap() + } + #[cfg(not(feature = "dfa-onepass"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + pub(crate) fn memory_usage(&self) -> usize { + #[cfg(feature = "dfa-onepass")] + { + self.0.memory_usage() + } + #[cfg(not(feature = "dfa-onepass"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn get_nfa(&self) -> &NFA { + #[cfg(feature = "dfa-onepass")] + { + self.0.get_nfa() + } + #[cfg(not(feature = "dfa-onepass"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct OnePassCache( + #[cfg(feature = "dfa-onepass")] Option, + #[cfg(not(feature = "dfa-onepass"))] (), +); + +impl OnePassCache { + pub(crate) fn none() -> OnePassCache { + #[cfg(feature = "dfa-onepass")] + { + OnePassCache(None) + } + #[cfg(not(feature = "dfa-onepass"))] + { + OnePassCache(()) + } + } + + pub(crate) fn new(builder: &OnePass) -> OnePassCache { + #[cfg(feature = "dfa-onepass")] + { + OnePassCache(builder.0.as_ref().map(|e| e.0.create_cache())) + } + #[cfg(not(feature = "dfa-onepass"))] + { + OnePassCache(()) + } + } + + pub(crate) fn reset(&mut self, builder: &OnePass) { + #[cfg(feature = "dfa-onepass")] + if let Some(ref e) = builder.0 { + self.0.as_mut().unwrap().reset(&e.0); + } + } + + pub(crate) fn memory_usage(&self) -> usize { + #[cfg(feature = "dfa-onepass")] + { + self.0.as_ref().map_or(0, |c| c.memory_usage()) + } + #[cfg(not(feature = "dfa-onepass"))] + { + 0 + } + } +} + +#[derive(Debug)] +pub(crate) struct Hybrid(Option); + +impl Hybrid { + pub(crate) fn none() -> Hybrid { + Hybrid(None) + } + + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + nfarev: &NFA, + ) -> Hybrid { + Hybrid(HybridEngine::new(info, pre, nfa, nfarev)) + } + + pub(crate) fn create_cache(&self) -> HybridCache { + HybridCache::new(self) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&HybridEngine> { + let engine = self.0.as_ref()?; + Some(engine) + } + + pub(crate) fn is_some(&self) -> bool { + self.0.is_some() + } +} + +#[derive(Debug)] +pub(crate) struct HybridEngine( + #[cfg(feature = "hybrid")] hybrid::regex::Regex, + #[cfg(not(feature = "hybrid"))] (), +); + +impl HybridEngine { + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + nfarev: &NFA, + ) -> Option { + #[cfg(feature = "hybrid")] + { + if !info.config().get_hybrid() { + return None; + } + let dfa_config = hybrid::dfa::Config::new() + .match_kind(info.config().get_match_kind()) + .prefilter(pre.clone()) + // Enabling this is necessary for ensuring we can service any + // kind of 'Input' search without error. For the lazy DFA, + // this is not particularly costly, since the start states are + // generated lazily. + .starts_for_each_pattern(true) + .byte_classes(info.config().get_byte_classes()) + .unicode_word_boundary(true) + .specialize_start_states(pre.is_some()) + .cache_capacity(info.config().get_hybrid_cache_capacity()) + // This makes it possible for building a lazy DFA to + // fail even though the NFA has already been built. Namely, + // if the cache capacity is too small to fit some minimum + // number of states (which is small, like 4 or 5), then the + // DFA will refuse to build. + // + // We shouldn't enable this to make building always work, since + // this could cause the allocation of a cache bigger than the + // provided capacity amount. + // + // This is effectively the only reason why building a lazy DFA + // could fail. If it does, then we simply suppress the error + // and return None. + .skip_cache_capacity_check(false) + // This and enabling heuristic Unicode word boundary support + // above make it so the lazy DFA can quit at match time. + .minimum_cache_clear_count(Some(3)) + .minimum_bytes_per_state(Some(10)); + let result = hybrid::dfa::Builder::new() + .configure(dfa_config.clone()) + .build_from_nfa(nfa.clone()); + let fwd = match result { + Ok(fwd) => fwd, + Err(_err) => { + debug!("forward lazy DFA failed to build: {}", _err); + return None; + } + }; + let result = hybrid::dfa::Builder::new() + .configure( + dfa_config + .clone() + .match_kind(MatchKind::All) + .prefilter(None) + .specialize_start_states(false), + ) + .build_from_nfa(nfarev.clone()); + let rev = match result { + Ok(rev) => rev, + Err(_err) => { + debug!("reverse lazy DFA failed to build: {}", _err); + return None; + } + }; + let engine = + hybrid::regex::Builder::new().build_from_dfas(fwd, rev); + debug!("lazy DFA built"); + Some(HybridEngine(engine)) + } + #[cfg(not(feature = "hybrid"))] + { + None + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search( + &self, + cache: &mut HybridCache, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "hybrid")] + { + let cache = cache.0.as_mut().unwrap(); + self.0.try_search(cache, input).map_err(|e| e.into()) + } + #[cfg(not(feature = "hybrid"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_fwd( + &self, + cache: &mut HybridCache, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "hybrid")] + { + let fwd = self.0.forward(); + let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0; + fwd.try_search_fwd(&mut fwdcache, input).map_err(|e| e.into()) + } + #[cfg(not(feature = "hybrid"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_fwd_stopat( + &self, + cache: &mut HybridCache, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "hybrid")] + { + let dfa = self.0.forward(); + let mut cache = cache.0.as_mut().unwrap().as_parts_mut().0; + crate::meta::stopat::hybrid_try_search_half_fwd( + dfa, &mut cache, input, + ) + } + #[cfg(not(feature = "hybrid"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_rev( + &self, + cache: &mut HybridCache, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "hybrid")] + { + let rev = self.0.reverse(); + let mut revcache = cache.0.as_mut().unwrap().as_parts_mut().1; + rev.try_search_rev(&mut revcache, input).map_err(|e| e.into()) + } + #[cfg(not(feature = "hybrid"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_rev_limited( + &self, + cache: &mut HybridCache, + input: &Input<'_>, + min_start: usize, + ) -> Result, RetryError> { + #[cfg(feature = "hybrid")] + { + let dfa = self.0.reverse(); + let mut cache = cache.0.as_mut().unwrap().as_parts_mut().1; + crate::meta::limited::hybrid_try_search_half_rev( + dfa, &mut cache, input, min_start, + ) + } + #[cfg(not(feature = "hybrid"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[inline] + pub(crate) fn try_which_overlapping_matches( + &self, + cache: &mut HybridCache, + input: &Input<'_>, + patset: &mut PatternSet, + ) -> Result<(), RetryFailError> { + #[cfg(feature = "hybrid")] + { + let fwd = self.0.forward(); + let mut fwdcache = cache.0.as_mut().unwrap().as_parts_mut().0; + fwd.try_which_overlapping_matches(&mut fwdcache, input, patset) + .map_err(|e| e.into()) + } + #[cfg(not(feature = "hybrid"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct HybridCache( + #[cfg(feature = "hybrid")] Option, + #[cfg(not(feature = "hybrid"))] (), +); + +impl HybridCache { + pub(crate) fn none() -> HybridCache { + #[cfg(feature = "hybrid")] + { + HybridCache(None) + } + #[cfg(not(feature = "hybrid"))] + { + HybridCache(()) + } + } + + pub(crate) fn new(builder: &Hybrid) -> HybridCache { + #[cfg(feature = "hybrid")] + { + HybridCache(builder.0.as_ref().map(|e| e.0.create_cache())) + } + #[cfg(not(feature = "hybrid"))] + { + HybridCache(()) + } + } + + pub(crate) fn reset(&mut self, builder: &Hybrid) { + #[cfg(feature = "hybrid")] + if let Some(ref e) = builder.0 { + self.0.as_mut().unwrap().reset(&e.0); + } + } + + pub(crate) fn memory_usage(&self) -> usize { + #[cfg(feature = "hybrid")] + { + self.0.as_ref().map_or(0, |c| c.memory_usage()) + } + #[cfg(not(feature = "hybrid"))] + { + 0 + } + } +} + +#[derive(Debug)] +pub(crate) struct DFA(Option); + +impl DFA { + pub(crate) fn none() -> DFA { + DFA(None) + } + + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + nfarev: &NFA, + ) -> DFA { + DFA(DFAEngine::new(info, pre, nfa, nfarev)) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&DFAEngine> { + let engine = self.0.as_ref()?; + Some(engine) + } + + pub(crate) fn is_some(&self) -> bool { + self.0.is_some() + } + + pub(crate) fn memory_usage(&self) -> usize { + self.0.as_ref().map_or(0, |e| e.memory_usage()) + } +} + +#[derive(Debug)] +pub(crate) struct DFAEngine( + #[cfg(feature = "dfa-build")] dfa::regex::Regex, + #[cfg(not(feature = "dfa-build"))] (), +); + +impl DFAEngine { + pub(crate) fn new( + info: &RegexInfo, + pre: Option, + nfa: &NFA, + nfarev: &NFA, + ) -> Option { + #[cfg(feature = "dfa-build")] + { + if !info.config().get_dfa() { + return None; + } + // If our NFA is anything but small, don't even bother with a DFA. + if let Some(state_limit) = info.config().get_dfa_state_limit() { + if nfa.states().len() > state_limit { + debug!( + "skipping full DFA because NFA has {} states, \ + which exceeds the heuristic limit of {}", + nfa.states().len(), + state_limit, + ); + return None; + } + } + // We cut the size limit in four because the total heap used by + // DFA construction is determinization aux memory and the DFA + // itself, and those things are configured independently in the + // lower level DFA builder API. And then split that in two because + // of forward and reverse DFAs. + let size_limit = info.config().get_dfa_size_limit().map(|n| n / 4); + let dfa_config = dfa::dense::Config::new() + .match_kind(info.config().get_match_kind()) + .prefilter(pre.clone()) + // Enabling this is necessary for ensuring we can service any + // kind of 'Input' search without error. For the full DFA, this + // can be quite costly. But since we have such a small bound + // on the size of the DFA, in practice, any multl-regexes are + // probably going to blow the limit anyway. + .starts_for_each_pattern(true) + .byte_classes(info.config().get_byte_classes()) + .unicode_word_boundary(true) + .specialize_start_states(pre.is_some()) + .determinize_size_limit(size_limit) + .dfa_size_limit(size_limit); + let result = dfa::dense::Builder::new() + .configure(dfa_config.clone()) + .build_from_nfa(&nfa); + let fwd = match result { + Ok(fwd) => fwd, + Err(_err) => { + debug!("forward full DFA failed to build: {}", _err); + return None; + } + }; + let result = dfa::dense::Builder::new() + .configure( + dfa_config + .clone() + // We never need unanchored reverse searches, so + // there's no point in building it into the DFA, which + // WILL take more space. (This isn't done for the lazy + // DFA because the DFA is, well, lazy. It doesn't pay + // the cost for supporting unanchored searches unless + // you actually do an unanchored search, which we + // don't.) + .start_kind(dfa::StartKind::Anchored) + .match_kind(MatchKind::All) + .prefilter(None) + .specialize_start_states(false), + ) + .build_from_nfa(&nfarev); + let rev = match result { + Ok(rev) => rev, + Err(_err) => { + debug!("reverse full DFA failed to build: {}", _err); + return None; + } + }; + let engine = dfa::regex::Builder::new().build_from_dfas(fwd, rev); + debug!( + "fully compiled forward and reverse DFAs built, {} bytes", + engine.forward().memory_usage() + + engine.reverse().memory_usage(), + ); + Some(DFAEngine(engine)) + } + #[cfg(not(feature = "dfa-build"))] + { + None + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search( + &self, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "dfa-build")] + { + self.0.try_search(input).map_err(|e| e.into()) + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_fwd( + &self, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "dfa-build")] + { + use crate::dfa::Automaton; + self.0.forward().try_search_fwd(input).map_err(|e| e.into()) + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_fwd_stopat( + &self, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "dfa-build")] + { + let dfa = self.0.forward(); + crate::meta::stopat::dfa_try_search_half_fwd(dfa, input) + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_rev( + &self, + input: &Input<'_>, + ) -> Result, RetryFailError> { + #[cfg(feature = "dfa-build")] + { + use crate::dfa::Automaton; + self.0.reverse().try_search_rev(&input).map_err(|e| e.into()) + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_rev_limited( + &self, + input: &Input<'_>, + min_start: usize, + ) -> Result, RetryError> { + #[cfg(feature = "dfa-build")] + { + let dfa = self.0.reverse(); + crate::meta::limited::dfa_try_search_half_rev( + dfa, input, min_start, + ) + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + #[inline] + pub(crate) fn try_which_overlapping_matches( + &self, + input: &Input<'_>, + patset: &mut PatternSet, + ) -> Result<(), RetryFailError> { + #[cfg(feature = "dfa-build")] + { + use crate::dfa::Automaton; + self.0 + .forward() + .try_which_overlapping_matches(input, patset) + .map_err(|e| e.into()) + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + pub(crate) fn memory_usage(&self) -> usize { + #[cfg(feature = "dfa-build")] + { + self.0.forward().memory_usage() + self.0.reverse().memory_usage() + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } +} + +#[derive(Debug)] +pub(crate) struct ReverseHybrid(Option); + +impl ReverseHybrid { + pub(crate) fn none() -> ReverseHybrid { + ReverseHybrid(None) + } + + pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseHybrid { + ReverseHybrid(ReverseHybridEngine::new(info, nfarev)) + } + + pub(crate) fn create_cache(&self) -> ReverseHybridCache { + ReverseHybridCache::new(self) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get( + &self, + _input: &Input<'_>, + ) -> Option<&ReverseHybridEngine> { + let engine = self.0.as_ref()?; + Some(engine) + } +} + +#[derive(Debug)] +pub(crate) struct ReverseHybridEngine( + #[cfg(feature = "hybrid")] hybrid::dfa::DFA, + #[cfg(not(feature = "hybrid"))] (), +); + +impl ReverseHybridEngine { + pub(crate) fn new( + info: &RegexInfo, + nfarev: &NFA, + ) -> Option { + #[cfg(feature = "hybrid")] + { + if !info.config().get_hybrid() { + return None; + } + // Since we only use this for reverse searches, we can hard-code + // a number of things like match semantics, prefilters, starts + // for each pattern and so on. + let dfa_config = hybrid::dfa::Config::new() + .match_kind(MatchKind::All) + .prefilter(None) + .starts_for_each_pattern(false) + .byte_classes(info.config().get_byte_classes()) + .unicode_word_boundary(true) + .specialize_start_states(false) + .cache_capacity(info.config().get_hybrid_cache_capacity()) + .skip_cache_capacity_check(false) + .minimum_cache_clear_count(Some(3)) + .minimum_bytes_per_state(Some(10)); + let result = hybrid::dfa::Builder::new() + .configure(dfa_config) + .build_from_nfa(nfarev.clone()); + let rev = match result { + Ok(rev) => rev, + Err(_err) => { + debug!("lazy reverse DFA failed to build: {}", _err); + return None; + } + }; + debug!("lazy reverse DFA built"); + Some(ReverseHybridEngine(rev)) + } + #[cfg(not(feature = "hybrid"))] + { + None + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_rev_limited( + &self, + cache: &mut ReverseHybridCache, + input: &Input<'_>, + min_start: usize, + ) -> Result, RetryError> { + #[cfg(feature = "hybrid")] + { + let dfa = &self.0; + let mut cache = cache.0.as_mut().unwrap(); + crate::meta::limited::hybrid_try_search_half_rev( + dfa, &mut cache, input, min_start, + ) + } + #[cfg(not(feature = "hybrid"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct ReverseHybridCache( + #[cfg(feature = "hybrid")] Option, + #[cfg(not(feature = "hybrid"))] (), +); + +impl ReverseHybridCache { + pub(crate) fn none() -> ReverseHybridCache { + #[cfg(feature = "hybrid")] + { + ReverseHybridCache(None) + } + #[cfg(not(feature = "hybrid"))] + { + ReverseHybridCache(()) + } + } + + pub(crate) fn new(builder: &ReverseHybrid) -> ReverseHybridCache { + #[cfg(feature = "hybrid")] + { + ReverseHybridCache(builder.0.as_ref().map(|e| e.0.create_cache())) + } + #[cfg(not(feature = "hybrid"))] + { + ReverseHybridCache(()) + } + } + + pub(crate) fn reset(&mut self, builder: &ReverseHybrid) { + #[cfg(feature = "hybrid")] + if let Some(ref e) = builder.0 { + self.0.as_mut().unwrap().reset(&e.0); + } + } + + pub(crate) fn memory_usage(&self) -> usize { + #[cfg(feature = "hybrid")] + { + self.0.as_ref().map_or(0, |c| c.memory_usage()) + } + #[cfg(not(feature = "hybrid"))] + { + 0 + } + } +} + +#[derive(Debug)] +pub(crate) struct ReverseDFA(Option); + +impl ReverseDFA { + pub(crate) fn none() -> ReverseDFA { + ReverseDFA(None) + } + + pub(crate) fn new(info: &RegexInfo, nfarev: &NFA) -> ReverseDFA { + ReverseDFA(ReverseDFAEngine::new(info, nfarev)) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get(&self, _input: &Input<'_>) -> Option<&ReverseDFAEngine> { + let engine = self.0.as_ref()?; + Some(engine) + } + + pub(crate) fn is_some(&self) -> bool { + self.0.is_some() + } + + pub(crate) fn memory_usage(&self) -> usize { + self.0.as_ref().map_or(0, |e| e.memory_usage()) + } +} + +#[derive(Debug)] +pub(crate) struct ReverseDFAEngine( + #[cfg(feature = "dfa-build")] dfa::dense::DFA>, + #[cfg(not(feature = "dfa-build"))] (), +); + +impl ReverseDFAEngine { + pub(crate) fn new( + info: &RegexInfo, + nfarev: &NFA, + ) -> Option { + #[cfg(feature = "dfa-build")] + { + if !info.config().get_dfa() { + return None; + } + // If our NFA is anything but small, don't even bother with a DFA. + if let Some(state_limit) = info.config().get_dfa_state_limit() { + if nfarev.states().len() > state_limit { + debug!( + "skipping full reverse DFA because NFA has {} states, \ + which exceeds the heuristic limit of {}", + nfarev.states().len(), + state_limit, + ); + return None; + } + } + // We cut the size limit in two because the total heap used by DFA + // construction is determinization aux memory and the DFA itself, + // and those things are configured independently in the lower level + // DFA builder API. + let size_limit = info.config().get_dfa_size_limit().map(|n| n / 2); + // Since we only use this for reverse searches, we can hard-code + // a number of things like match semantics, prefilters, starts + // for each pattern and so on. We also disable acceleration since + // it's incompatible with limited searches (which is the only + // operation we support for this kind of engine at the moment). + let dfa_config = dfa::dense::Config::new() + .match_kind(MatchKind::All) + .prefilter(None) + .accelerate(false) + .start_kind(dfa::StartKind::Anchored) + .starts_for_each_pattern(false) + .byte_classes(info.config().get_byte_classes()) + .unicode_word_boundary(true) + .specialize_start_states(false) + .determinize_size_limit(size_limit) + .dfa_size_limit(size_limit); + let result = dfa::dense::Builder::new() + .configure(dfa_config) + .build_from_nfa(&nfarev); + let rev = match result { + Ok(rev) => rev, + Err(_err) => { + debug!("full reverse DFA failed to build: {}", _err); + return None; + } + }; + debug!( + "fully compiled reverse DFA built, {} bytes", + rev.memory_usage() + ); + Some(ReverseDFAEngine(rev)) + } + #[cfg(not(feature = "dfa-build"))] + { + None + } + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn try_search_half_rev_limited( + &self, + input: &Input<'_>, + min_start: usize, + ) -> Result, RetryError> { + #[cfg(feature = "dfa-build")] + { + let dfa = &self.0; + crate::meta::limited::dfa_try_search_half_rev( + dfa, input, min_start, + ) + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } + + pub(crate) fn memory_usage(&self) -> usize { + #[cfg(feature = "dfa-build")] + { + self.0.memory_usage() + } + #[cfg(not(feature = "dfa-build"))] + { + // Impossible to reach because this engine is never constructed + // if the requisite features aren't enabled. + unreachable!() + } + } +} diff --git a/vendor/regex-automata/src/nfa/mod.rs b/vendor/regex-automata/src/nfa/mod.rs new file mode 100644 index 0000000..0c36f59 --- /dev/null +++ b/vendor/regex-automata/src/nfa/mod.rs @@ -0,0 +1,55 @@ +/*! +Provides non-deterministic finite automata (NFA) and regex engines that use +them. + +While NFAs and DFAs (deterministic finite automata) have equivalent *theoretical* +power, their usage in practice tends to result in different engineering trade +offs. While this isn't meant to be a comprehensive treatment of the topic, here +are a few key trade offs that are, at minimum, true for this crate: + +* NFAs tend to be represented sparsely where as DFAs are represented densely. +Sparse representations use less memory, but are slower to traverse. Conversely, +dense representations use more memory, but are faster to traverse. (Sometimes +these lines are blurred. For example, an `NFA` might choose to represent a +particular state in a dense fashion, and a DFA can be built using a sparse +representation via [`sparse::DFA`](crate::dfa::sparse::DFA). +* NFAs have espilon transitions and DFAs don't. In practice, this means that +handling a single byte in a haystack with an NFA at search time may require +visiting multiple NFA states. In a DFA, each byte only requires visiting +a single state. Stated differently, NFAs require a variable number of CPU +instructions to process one byte in a haystack where as a DFA uses a constant +number of CPU instructions to process one byte. +* NFAs are generally easier to amend with secondary storage. For example, the +[`thompson::pikevm::PikeVM`] uses an NFA to match, but also uses additional +memory beyond the model of a finite state machine to track offsets for matching +capturing groups. Conversely, the most a DFA can do is report the offset (and +pattern ID) at which a match occurred. This is generally why we also compile +DFAs in reverse, so that we can run them after finding the end of a match to +also find the start of a match. +* NFAs take worst case linear time to build, but DFAs take worst case +exponential time to build. The [hybrid NFA/DFA](crate::hybrid) mitigates this +challenge for DFAs in many practical cases. + +There are likely other differences, but the bottom line is that NFAs tend to be +more memory efficient and give easier opportunities for increasing expressive +power, where as DFAs are faster to search with. + +# Why only a Thompson NFA? + +Currently, the only kind of NFA we support in this crate is a [Thompson +NFA](https://en.wikipedia.org/wiki/Thompson%27s_construction). This refers +to a specific construction algorithm that takes the syntax of a regex +pattern and converts it to an NFA. Specifically, it makes gratuitous use of +epsilon transitions in order to keep its structure simple. In exchange, its +construction time is linear in the size of the regex. A Thompson NFA also makes +the guarantee that given any state and a character in a haystack, there is at +most one transition defined for it. (Although there may be many epsilon +transitions.) + +It possible that other types of NFAs will be added in the future, such as a +[Glushkov NFA](https://en.wikipedia.org/wiki/Glushkov%27s_construction_algorithm). +But currently, this crate only provides a Thompson NFA. +*/ + +#[cfg(feature = "nfa-thompson")] +pub mod thompson; diff --git a/vendor/regex-automata/src/nfa/thompson/backtrack.rs b/vendor/regex-automata/src/nfa/thompson/backtrack.rs new file mode 100644 index 0000000..df99e45 --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/backtrack.rs @@ -0,0 +1,1908 @@ +/*! +An NFA backed bounded backtracker for executing regex searches with capturing +groups. + +This module provides a [`BoundedBacktracker`] that works by simulating an NFA +using the classical backtracking algorithm with a twist: it avoids redoing +work that it has done before and thereby avoids worst case exponential time. +In exchange, it can only be used on "short" haystacks. Its advantage is that +is can be faster than the [`PikeVM`](thompson::pikevm::PikeVM) in many cases +because it does less book-keeping. +*/ + +use alloc::{vec, vec::Vec}; + +use crate::{ + nfa::thompson::{self, BuildError, State, NFA}, + util::{ + captures::Captures, + empty, iter, + prefilter::Prefilter, + primitives::{NonMaxUsize, PatternID, SmallIndex, StateID}, + search::{Anchored, HalfMatch, Input, Match, MatchError, Span}, + }, +}; + +/// Returns the minimum visited capacity for the given haystack. +/// +/// This function can be used as the argument to [`Config::visited_capacity`] +/// in order to guarantee that a backtracking search for the given `input` +/// won't return an error when using a [`BoundedBacktracker`] built from the +/// given `NFA`. +/// +/// This routine exists primarily as a way to test that the bounded backtracker +/// works correctly when its capacity is set to the smallest possible amount. +/// Still, it may be useful in cases where you know you want to use the bounded +/// backtracker for a specific input, and just need to know what visited +/// capacity to provide to make it work. +/// +/// Be warned that this number could be quite large as it is multiplicative in +/// the size the given NFA and haystack. +pub fn min_visited_capacity(nfa: &NFA, input: &Input<'_>) -> usize { + div_ceil(nfa.states().len() * (input.get_span().len() + 1), 8) +} + +/// The configuration used for building a bounded backtracker. +/// +/// A bounded backtracker configuration is a simple data object that is +/// typically used with [`Builder::configure`]. +#[derive(Clone, Debug, Default)] +pub struct Config { + pre: Option>, + visited_capacity: Option, +} + +impl Config { + /// Return a new default regex configuration. + pub fn new() -> Config { + Config::default() + } + + /// Set a prefilter to be used whenever a start state is entered. + /// + /// A [`Prefilter`] in this context is meant to accelerate searches by + /// looking for literal prefixes that every match for the corresponding + /// pattern (or patterns) must start with. Once a prefilter produces a + /// match, the underlying search routine continues on to try and confirm + /// the match. + /// + /// Be warned that setting a prefilter does not guarantee that the search + /// will be faster. While it's usually a good bet, if the prefilter + /// produces a lot of false positive candidates (i.e., positions matched + /// by the prefilter but not by the regex), then the overall result can + /// be slower than if you had just executed the regex engine without any + /// prefilters. + /// + /// By default no prefilter is set. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// util::prefilter::Prefilter, + /// Input, Match, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "bar"]); + /// let re = BoundedBacktracker::builder() + /// .configure(BoundedBacktracker::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("foo1 barfox bar"); + /// assert_eq!( + /// Some(Match::must(0, 5..11)), + /// re.try_find(&mut cache, input)?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Be warned though that an incorrect prefilter can lead to incorrect + /// results! + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// util::prefilter::Prefilter, + /// Input, HalfMatch, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "car"]); + /// let re = BoundedBacktracker::builder() + /// .configure(BoundedBacktracker::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("foo1 barfox bar"); + /// // No match reported even though there clearly is one! + /// assert_eq!(None, re.try_find(&mut cache, input)?); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn prefilter(mut self, pre: Option) -> Config { + self.pre = Some(pre); + self + } + + /// Set the visited capacity used to bound backtracking. + /// + /// The visited capacity represents the amount of heap memory (in bytes) to + /// allocate toward tracking which parts of the backtracking search have + /// been done before. The heap memory needed for any particular search is + /// proportional to `haystack.len() * nfa.states().len()`, which an be + /// quite large. Therefore, the bounded backtracker is typically only able + /// to run on shorter haystacks. + /// + /// For a given regex, increasing the visited capacity means that the + /// maximum haystack length that can be searched is increased. The + /// [`BoundedBacktracker::max_haystack_len`] method returns that maximum. + /// + /// The default capacity is a reasonable but empirically chosen size. + /// + /// # Example + /// + /// As with other regex engines, Unicode is what tends to make the bounded + /// backtracker less useful by making the maximum haystack length quite + /// small. If necessary, increasing the visited capacity using this routine + /// will increase the maximum haystack length at the cost of using more + /// memory. + /// + /// Note though that the specific maximum values here are not an API + /// guarantee. The default visited capacity is subject to change and not + /// covered by semver. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::backtrack::BoundedBacktracker; + /// + /// // Unicode inflates the size of the underlying NFA quite a bit, and + /// // thus means that the backtracker can only handle smaller haystacks, + /// // assuming that the visited capacity remains unchanged. + /// let re = BoundedBacktracker::new(r"\w+")?; + /// assert!(re.max_haystack_len() <= 7_000); + /// // But we can increase the visited capacity to handle bigger haystacks! + /// let re = BoundedBacktracker::builder() + /// .configure(BoundedBacktracker::config().visited_capacity(1<<20)) + /// .build(r"\w+")?; + /// assert!(re.max_haystack_len() >= 25_000); + /// assert!(re.max_haystack_len() <= 28_000); + /// # Ok::<(), Box>(()) + /// ``` + pub fn visited_capacity(mut self, capacity: usize) -> Config { + self.visited_capacity = Some(capacity); + self + } + + /// Returns the prefilter set in this configuration, if one at all. + pub fn get_prefilter(&self) -> Option<&Prefilter> { + self.pre.as_ref().unwrap_or(&None).as_ref() + } + + /// Returns the configured visited capacity. + /// + /// Note that the actual capacity used may be slightly bigger than the + /// configured capacity. + pub fn get_visited_capacity(&self) -> usize { + const DEFAULT: usize = 256 * (1 << 10); // 256 KB + self.visited_capacity.unwrap_or(DEFAULT) + } + + /// Overwrite the default configuration such that the options in `o` are + /// always used. If an option in `o` is not set, then the corresponding + /// option in `self` is used. If it's not set in `self` either, then it + /// remains not set. + pub(crate) fn overwrite(&self, o: Config) -> Config { + Config { + pre: o.pre.or_else(|| self.pre.clone()), + visited_capacity: o.visited_capacity.or(self.visited_capacity), + } + } +} + +/// A builder for a bounded backtracker. +/// +/// This builder permits configuring options for the syntax of a pattern, the +/// NFA construction and the `BoundedBacktracker` construction. This builder +/// is different from a general purpose regex builder in that it permits fine +/// grain configuration of the construction process. The trade off for this is +/// complexity, and the possibility of setting a configuration that might not +/// make sense. For example, there are two different UTF-8 modes: +/// +/// * [`syntax::Config::utf8`](crate::util::syntax::Config::utf8) controls +/// whether the pattern itself can contain sub-expressions that match invalid +/// UTF-8. +/// * [`thompson::Config::utf8`] controls how the regex iterators themselves +/// advance the starting position of the next search when a match with zero +/// length is found. +/// +/// Generally speaking, callers will want to either enable all of these or +/// disable all of these. +/// +/// # Example +/// +/// This example shows how to disable UTF-8 mode in the syntax and the regex +/// itself. This is generally what you want for matching on arbitrary bytes. +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::{self, backtrack::BoundedBacktracker}, +/// util::syntax, +/// Match, +/// }; +/// +/// let re = BoundedBacktracker::builder() +/// .syntax(syntax::Config::new().utf8(false)) +/// .thompson(thompson::Config::new().utf8(false)) +/// .build(r"foo(?-u:[^b])ar.*")?; +/// let mut cache = re.create_cache(); +/// +/// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; +/// let expected = Some(Ok(Match::must(0, 1..9))); +/// let got = re.try_find_iter(&mut cache, haystack).next(); +/// assert_eq!(expected, got); +/// // Notice that `(?-u:[^b])` matches invalid UTF-8, +/// // but the subsequent `.*` does not! Disabling UTF-8 +/// // on the syntax permits this. +/// // +/// // N.B. This example does not show the impact of +/// // disabling UTF-8 mode on a BoundedBacktracker Config, since that +/// // only impacts regexes that can produce matches of +/// // length 0. +/// assert_eq!(b"foo\xFFarzz", &haystack[got.unwrap()?.range()]); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Builder { + config: Config, + #[cfg(feature = "syntax")] + thompson: thompson::Compiler, +} + +impl Builder { + /// Create a new BoundedBacktracker builder with its default configuration. + pub fn new() -> Builder { + Builder { + config: Config::default(), + #[cfg(feature = "syntax")] + thompson: thompson::Compiler::new(), + } + } + + /// Build a `BoundedBacktracker` from the given pattern. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(feature = "syntax")] + pub fn build( + &self, + pattern: &str, + ) -> Result { + self.build_many(&[pattern]) + } + + /// Build a `BoundedBacktracker` from the given patterns. + #[cfg(feature = "syntax")] + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let nfa = self.thompson.build_many(patterns)?; + self.build_from_nfa(nfa) + } + + /// Build a `BoundedBacktracker` directly from its NFA. + /// + /// Note that when using this method, any configuration that applies to the + /// construction of the NFA itself will of course be ignored, since the NFA + /// given here is already built. + pub fn build_from_nfa( + &self, + nfa: NFA, + ) -> Result { + nfa.look_set_any().available().map_err(BuildError::word)?; + Ok(BoundedBacktracker { config: self.config.clone(), nfa }) + } + + /// Apply the given `BoundedBacktracker` configuration options to this + /// builder. + pub fn configure(&mut self, config: Config) -> &mut Builder { + self.config = self.config.overwrite(config); + self + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + /// + /// These settings only apply when constructing a `BoundedBacktracker` + /// directly from a pattern. + #[cfg(feature = "syntax")] + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + self.thompson.syntax(config); + self + } + + /// Set the Thompson NFA configuration for this builder using + /// [`nfa::thompson::Config`](crate::nfa::thompson::Config). + /// + /// This permits setting things like if additional time should be spent + /// shrinking the size of the NFA. + /// + /// These settings only apply when constructing a `BoundedBacktracker` + /// directly from a pattern. + #[cfg(feature = "syntax")] + pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder { + self.thompson.configure(config); + self + } +} + +/// A backtracking regex engine that bounds its execution to avoid exponential +/// blow-up. +/// +/// This regex engine only implements leftmost-first match semantics and +/// only supports leftmost searches. It effectively does the same thing as a +/// [`PikeVM`](thompson::pikevm::PikeVM), but typically does it faster because +/// it doesn't have to worry about copying capturing group spans for most NFA +/// states. Instead, the backtracker can maintain one set of captures (provided +/// by the caller) and never needs to copy them. In exchange, the backtracker +/// bounds itself to ensure it doesn't exhibit worst case exponential time. +/// This results in the backtracker only being able to handle short haystacks +/// given reasonable memory usage. +/// +/// # Searches may return an error! +/// +/// By design, this backtracking regex engine is bounded. This bound is +/// implemented by not visiting any combination of NFA state ID and position +/// in a haystack more than once. Thus, the total memory required to bound +/// backtracking is proportional to `haystack.len() * nfa.states().len()`. +/// This can obviously get quite large, since large haystacks aren't terribly +/// uncommon. To avoid using exorbitant memory, the capacity is bounded by +/// a fixed limit set via [`Config::visited_capacity`]. Thus, if the total +/// capacity required for a particular regex and a haystack exceeds this +/// capacity, then the search routine will return an error. +/// +/// Unlike other regex engines that may return an error at search time (like +/// the DFA or the hybrid NFA/DFA), there is no way to guarantee that a bounded +/// backtracker will work for every haystack. Therefore, this regex engine +/// _only_ exposes fallible search routines to avoid the footgun of panicking +/// when running a search on a haystack that is too big. +/// +/// If one wants to use the fallible search APIs without handling the +/// error, the only way to guarantee an error won't occur from the +/// haystack length is to ensure the haystack length does not exceed +/// [`BoundedBacktracker::max_haystack_len`]. +/// +/// # Example: Unicode word boundaries +/// +/// This example shows that the bounded backtracker implements Unicode word +/// boundaries correctly by default. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{nfa::thompson::backtrack::BoundedBacktracker, Match}; +/// +/// let re = BoundedBacktracker::new(r"\b\w+\b")?; +/// let mut cache = re.create_cache(); +/// +/// let mut it = re.try_find_iter(&mut cache, "Шерлок Холмс"); +/// assert_eq!(Some(Ok(Match::must(0, 0..12))), it.next()); +/// assert_eq!(Some(Ok(Match::must(0, 13..23))), it.next()); +/// assert_eq!(None, it.next()); +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: multiple regex patterns +/// +/// The bounded backtracker supports searching for multiple patterns +/// simultaneously, just like other regex engines. Note though that because it +/// uses a backtracking strategy, this regex engine is unlikely to scale well +/// as more patterns are added. But then again, as more patterns are added, the +/// maximum haystack length allowed will also shorten (assuming the visited +/// capacity remains invariant). +/// +/// ``` +/// use regex_automata::{nfa::thompson::backtrack::BoundedBacktracker, Match}; +/// +/// let re = BoundedBacktracker::new_many(&["[a-z]+", "[0-9]+"])?; +/// let mut cache = re.create_cache(); +/// +/// let mut it = re.try_find_iter(&mut cache, "abc 1 foo 4567 0 quux"); +/// assert_eq!(Some(Ok(Match::must(0, 0..3))), it.next()); +/// assert_eq!(Some(Ok(Match::must(1, 4..5))), it.next()); +/// assert_eq!(Some(Ok(Match::must(0, 6..9))), it.next()); +/// assert_eq!(Some(Ok(Match::must(1, 10..14))), it.next()); +/// assert_eq!(Some(Ok(Match::must(1, 15..16))), it.next()); +/// assert_eq!(Some(Ok(Match::must(0, 17..21))), it.next()); +/// assert_eq!(None, it.next()); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct BoundedBacktracker { + config: Config, + nfa: NFA, +} + +impl BoundedBacktracker { + /// Parse the given regular expression using the default configuration and + /// return the corresponding `BoundedBacktracker`. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, + /// }; + /// + /// let re = BoundedBacktracker::new("foo[0-9]+bar")?; + /// let mut cache = re.create_cache(); + /// assert_eq!( + /// Some(Ok(Match::must(0, 3..14))), + /// re.try_find_iter(&mut cache, "zzzfoo12345barzzz").next(), + /// ); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new(pattern: &str) -> Result { + BoundedBacktracker::builder().build(pattern) + } + + /// Like `new`, but parses multiple patterns into a single "multi regex." + /// This similarly uses the default regex configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, + /// }; + /// + /// let re = BoundedBacktracker::new_many(&["[a-z]+", "[0-9]+"])?; + /// let mut cache = re.create_cache(); + /// + /// let mut it = re.try_find_iter(&mut cache, "abc 1 foo 4567 0 quux"); + /// assert_eq!(Some(Ok(Match::must(0, 0..3))), it.next()); + /// assert_eq!(Some(Ok(Match::must(1, 4..5))), it.next()); + /// assert_eq!(Some(Ok(Match::must(0, 6..9))), it.next()); + /// assert_eq!(Some(Ok(Match::must(1, 10..14))), it.next()); + /// assert_eq!(Some(Ok(Match::must(1, 15..16))), it.next()); + /// assert_eq!(Some(Ok(Match::must(0, 17..21))), it.next()); + /// assert_eq!(None, it.next()); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new_many>( + patterns: &[P], + ) -> Result { + BoundedBacktracker::builder().build_many(patterns) + } + + /// # Example + /// + /// This shows how to hand assemble a regular expression via its HIR, + /// compile an NFA from it and build a BoundedBacktracker from the NFA. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{NFA, backtrack::BoundedBacktracker}, + /// Match, + /// }; + /// use regex_syntax::hir::{Hir, Class, ClassBytes, ClassBytesRange}; + /// + /// let hir = Hir::class(Class::Bytes(ClassBytes::new(vec![ + /// ClassBytesRange::new(b'0', b'9'), + /// ClassBytesRange::new(b'A', b'Z'), + /// ClassBytesRange::new(b'_', b'_'), + /// ClassBytesRange::new(b'a', b'z'), + /// ]))); + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build_from_hir(&hir)?; + /// + /// let re = BoundedBacktracker::new_from_nfa(nfa)?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let expected = Some(Match::must(0, 3..4)); + /// re.try_captures(&mut cache, "!@#A#@!", &mut caps)?; + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn new_from_nfa(nfa: NFA) -> Result { + BoundedBacktracker::builder().build_from_nfa(nfa) + } + + /// Create a new `BoundedBacktracker` that matches every input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, + /// }; + /// + /// let re = BoundedBacktracker::always_match()?; + /// let mut cache = re.create_cache(); + /// + /// let expected = Some(Ok(Match::must(0, 0..0))); + /// assert_eq!(expected, re.try_find_iter(&mut cache, "").next()); + /// assert_eq!(expected, re.try_find_iter(&mut cache, "foo").next()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn always_match() -> Result { + let nfa = thompson::NFA::always_match(); + BoundedBacktracker::new_from_nfa(nfa) + } + + /// Create a new `BoundedBacktracker` that never matches any input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::backtrack::BoundedBacktracker; + /// + /// let re = BoundedBacktracker::never_match()?; + /// let mut cache = re.create_cache(); + /// + /// assert_eq!(None, re.try_find_iter(&mut cache, "").next()); + /// assert_eq!(None, re.try_find_iter(&mut cache, "foo").next()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn never_match() -> Result { + let nfa = thompson::NFA::never_match(); + BoundedBacktracker::new_from_nfa(nfa) + } + + /// Return a default configuration for a `BoundedBacktracker`. + /// + /// This is a convenience routine to avoid needing to import the `Config` + /// type when customizing the construction of a `BoundedBacktracker`. + /// + /// # Example + /// + /// This example shows how to disable UTF-8 mode. When UTF-8 mode is + /// disabled, zero-width matches that split a codepoint are allowed. + /// Otherwise they are never reported. + /// + /// In the code below, notice that `""` is permitted to match positions + /// that split the encoding of a codepoint. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{self, backtrack::BoundedBacktracker}, + /// Match, + /// }; + /// + /// let re = BoundedBacktracker::builder() + /// .thompson(thompson::Config::new().utf8(false)) + /// .build(r"")?; + /// let mut cache = re.create_cache(); + /// + /// let haystack = "a☃z"; + /// let mut it = re.try_find_iter(&mut cache, haystack); + /// assert_eq!(Some(Ok(Match::must(0, 0..0))), it.next()); + /// assert_eq!(Some(Ok(Match::must(0, 1..1))), it.next()); + /// assert_eq!(Some(Ok(Match::must(0, 2..2))), it.next()); + /// assert_eq!(Some(Ok(Match::must(0, 3..3))), it.next()); + /// assert_eq!(Some(Ok(Match::must(0, 4..4))), it.next()); + /// assert_eq!(Some(Ok(Match::must(0, 5..5))), it.next()); + /// assert_eq!(None, it.next()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn config() -> Config { + Config::new() + } + + /// Return a builder for configuring the construction of a + /// `BoundedBacktracker`. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + /// + /// # Example + /// + /// This example shows how to use the builder to disable UTF-8 mode + /// everywhere. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::{self, backtrack::BoundedBacktracker}, + /// util::syntax, + /// Match, + /// }; + /// + /// let re = BoundedBacktracker::builder() + /// .syntax(syntax::Config::new().utf8(false)) + /// .thompson(thompson::Config::new().utf8(false)) + /// .build(r"foo(?-u:[^b])ar.*")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; + /// let expected = Some(Match::must(0, 1..9)); + /// re.try_captures(&mut cache, haystack, &mut caps)?; + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn builder() -> Builder { + Builder::new() + } + + /// Create a new cache for this regex. + /// + /// The cache returned should only be used for searches for this + /// regex. If you want to reuse the cache for another regex, then you + /// must call [`Cache::reset`] with that regex (or, equivalently, + /// [`BoundedBacktracker::reset_cache`]). + pub fn create_cache(&self) -> Cache { + Cache::new(self) + } + + /// Create a new empty set of capturing groups that is guaranteed to be + /// valid for the search APIs on this `BoundedBacktracker`. + /// + /// A `Captures` value created for a specific `BoundedBacktracker` cannot + /// be used with any other `BoundedBacktracker`. + /// + /// This is a convenience function for [`Captures::all`]. See the + /// [`Captures`] documentation for an explanation of its alternative + /// constructors that permit the `BoundedBacktracker` to do less work + /// during a search, and thus might make it faster. + pub fn create_captures(&self) -> Captures { + Captures::all(self.get_nfa().group_info().clone()) + } + + /// Reset the given cache such that it can be used for searching with the + /// this `BoundedBacktracker` (and only this `BoundedBacktracker`). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different `BoundedBacktracker`. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different + /// `BoundedBacktracker`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, + /// }; + /// + /// let re1 = BoundedBacktracker::new(r"\w")?; + /// let re2 = BoundedBacktracker::new(r"\W")?; + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Ok(Match::must(0, 0..2))), + /// re1.try_find_iter(&mut cache, "Δ").next(), + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the BoundedBacktracker we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// cache.reset(&re2); + /// assert_eq!( + /// Some(Ok(Match::must(0, 0..3))), + /// re2.try_find_iter(&mut cache, "☃").next(), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset_cache(&self, cache: &mut Cache) { + cache.reset(self); + } + + /// Returns the total number of patterns compiled into this + /// `BoundedBacktracker`. + /// + /// In the case of a `BoundedBacktracker` that contains no patterns, this + /// returns `0`. + /// + /// # Example + /// + /// This example shows the pattern length for a `BoundedBacktracker` that + /// never matches: + /// + /// ``` + /// use regex_automata::nfa::thompson::backtrack::BoundedBacktracker; + /// + /// let re = BoundedBacktracker::never_match()?; + /// assert_eq!(re.pattern_len(), 0); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And another example for a `BoundedBacktracker` that matches at every + /// position: + /// + /// ``` + /// use regex_automata::nfa::thompson::backtrack::BoundedBacktracker; + /// + /// let re = BoundedBacktracker::always_match()?; + /// assert_eq!(re.pattern_len(), 1); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And finally, a `BoundedBacktracker` that was constructed from multiple + /// patterns: + /// + /// ``` + /// use regex_automata::nfa::thompson::backtrack::BoundedBacktracker; + /// + /// let re = BoundedBacktracker::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?; + /// assert_eq!(re.pattern_len(), 3); + /// # Ok::<(), Box>(()) + /// ``` + pub fn pattern_len(&self) -> usize { + self.nfa.pattern_len() + } + + /// Return the config for this `BoundedBacktracker`. + #[inline] + pub fn get_config(&self) -> &Config { + &self.config + } + + /// Returns a reference to the underlying NFA. + #[inline] + pub fn get_nfa(&self) -> &NFA { + &self.nfa + } + + /// Returns the maximum haystack length supported by this backtracker. + /// + /// This routine is a function of both [`Config::visited_capacity`] and the + /// internal size of the backtracker's NFA. + /// + /// # Example + /// + /// This example shows how the maximum haystack length can vary depending + /// on the size of the regex itself. Note though that the specific maximum + /// values here are not an API guarantee. The default visited capacity is + /// subject to change and not covered by semver. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, MatchError, + /// }; + /// + /// // If you're only using ASCII, you get a big budget. + /// let re = BoundedBacktracker::new(r"(?-u)\w+")?; + /// let mut cache = re.create_cache(); + /// assert_eq!(re.max_haystack_len(), 299_592); + /// // Things work up to the max. + /// let mut haystack = "a".repeat(299_592); + /// let expected = Some(Ok(Match::must(0, 0..299_592))); + /// assert_eq!(expected, re.try_find_iter(&mut cache, &haystack).next()); + /// // But you'll get an error if you provide a haystack that's too big. + /// // Notice that we use the 'try_find_iter' routine instead, which + /// // yields Result instead of Match. + /// haystack.push('a'); + /// let expected = Some(Err(MatchError::haystack_too_long(299_593))); + /// assert_eq!(expected, re.try_find_iter(&mut cache, &haystack).next()); + /// + /// // Unicode inflates the size of the underlying NFA quite a bit, and + /// // thus means that the backtracker can only handle smaller haystacks, + /// // assuming that the visited capacity remains unchanged. + /// let re = BoundedBacktracker::new(r"\w+")?; + /// assert!(re.max_haystack_len() <= 7_000); + /// // But we can increase the visited capacity to handle bigger haystacks! + /// let re = BoundedBacktracker::builder() + /// .configure(BoundedBacktracker::config().visited_capacity(1<<20)) + /// .build(r"\w+")?; + /// assert!(re.max_haystack_len() >= 25_000); + /// assert!(re.max_haystack_len() <= 28_000); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn max_haystack_len(&self) -> usize { + // The capacity given in the config is "bytes of heap memory," but the + // capacity we use here is "number of bits." So convert the capacity in + // bytes to the capacity in bits. + let capacity = 8 * self.get_config().get_visited_capacity(); + let blocks = div_ceil(capacity, Visited::BLOCK_SIZE); + let real_capacity = blocks.saturating_mul(Visited::BLOCK_SIZE); + // It's possible for `real_capacity` to be smaller than the number of + // NFA states for particularly large regexes, so we saturate towards + // zero. + (real_capacity / self.nfa.states().len()).saturating_sub(1) + } +} + +impl BoundedBacktracker { + /// Returns true if and only if this regex matches the given haystack. + /// + /// In the case of a backtracking regex engine, and unlike most other + /// regex engines in this crate, short circuiting isn't practical. However, + /// this routine may still be faster because it instructs backtracking to + /// not keep track of any capturing groups. + /// + /// # Errors + /// + /// This routine only errors if the search could not complete. For this + /// backtracking regex engine, this only occurs when the haystack length + /// exceeds [`BoundedBacktracker::max_haystack_len`]. + /// + /// When a search cannot complete, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::backtrack::BoundedBacktracker; + /// + /// let re = BoundedBacktracker::new("foo[0-9]+bar")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.try_is_match(&mut cache, "foo12345bar")?); + /// assert!(!re.try_is_match(&mut cache, "foobar")?); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: consistency with search APIs + /// + /// `is_match` is guaranteed to return `true` whenever `find` returns a + /// match. This includes searches that are executed entirely within a + /// codepoint: + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Input, + /// }; + /// + /// let re = BoundedBacktracker::new("a*")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(!re.try_is_match(&mut cache, Input::new("☃").span(1..2))?); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Notice that when UTF-8 mode is disabled, then the above reports a + /// match because the restriction against zero-width matches that split a + /// codepoint has been lifted: + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{backtrack::BoundedBacktracker, NFA}, + /// Input, + /// }; + /// + /// let re = BoundedBacktracker::builder() + /// .thompson(NFA::config().utf8(false)) + /// .build("a*")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.try_is_match(&mut cache, Input::new("☃").span(1..2))?); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_is_match<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> Result { + let input = input.into().earliest(true); + self.try_search_slots(cache, &input, &mut []).map(|pid| pid.is_some()) + } + + /// Executes a leftmost forward search and returns a `Match` if one exists. + /// + /// This routine only includes the overall match span. To get + /// access to the individual spans of each capturing group, use + /// [`BoundedBacktracker::try_captures`]. + /// + /// # Errors + /// + /// This routine only errors if the search could not complete. For this + /// backtracking regex engine, this only occurs when the haystack length + /// exceeds [`BoundedBacktracker::max_haystack_len`]. + /// + /// When a search cannot complete, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, + /// }; + /// + /// let re = BoundedBacktracker::new("foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// let expected = Match::must(0, 0..8); + /// assert_eq!(Some(expected), re.try_find(&mut cache, "foo12345")?); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_find<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> Result, MatchError> { + let input = input.into(); + if self.get_nfa().pattern_len() == 1 { + let mut slots = [None, None]; + let pid = match self.try_search_slots(cache, &input, &mut slots)? { + None => return Ok(None), + Some(pid) => pid, + }; + let start = match slots[0] { + None => return Ok(None), + Some(s) => s.get(), + }; + let end = match slots[1] { + None => return Ok(None), + Some(s) => s.get(), + }; + return Ok(Some(Match::new(pid, Span { start, end }))); + } + let ginfo = self.get_nfa().group_info(); + let slots_len = ginfo.implicit_slot_len(); + let mut slots = vec![None; slots_len]; + let pid = match self.try_search_slots(cache, &input, &mut slots)? { + None => return Ok(None), + Some(pid) => pid, + }; + let start = match slots[pid.as_usize() * 2] { + None => return Ok(None), + Some(s) => s.get(), + }; + let end = match slots[pid.as_usize() * 2 + 1] { + None => return Ok(None), + Some(s) => s.get(), + }; + Ok(Some(Match::new(pid, Span { start, end }))) + } + + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided [`Captures`] + /// value. If no match was found, then [`Captures::is_match`] is guaranteed + /// to return `false`. + /// + /// # Errors + /// + /// This routine only errors if the search could not complete. For this + /// backtracking regex engine, this only occurs when the haystack length + /// exceeds [`BoundedBacktracker::max_haystack_len`]. + /// + /// When a search cannot complete, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Span, + /// }; + /// + /// let re = BoundedBacktracker::new( + /// r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$", + /// )?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.try_captures(&mut cache, "2010-03-14", &mut caps)?; + /// assert!(caps.is_match()); + /// assert_eq!(Some(Span::from(0..4)), caps.get_group(1)); + /// assert_eq!(Some(Span::from(5..7)), caps.get_group(2)); + /// assert_eq!(Some(Span::from(8..10)), caps.get_group(3)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_captures<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + caps: &mut Captures, + ) -> Result<(), MatchError> { + self.try_search(cache, &input.into(), caps) + } + + /// Returns an iterator over all non-overlapping leftmost matches in the + /// given bytes. If no match exists, then the iterator yields no elements. + /// + /// If the regex engine returns an error at any point, then the iterator + /// will yield that error. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, MatchError, + /// }; + /// + /// let re = BoundedBacktracker::new("foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// + /// let text = "foo1 foo12 foo123"; + /// let result: Result, MatchError> = re + /// .try_find_iter(&mut cache, text) + /// .collect(); + /// let matches = result?; + /// assert_eq!(matches, vec![ + /// Match::must(0, 0..4), + /// Match::must(0, 5..10), + /// Match::must(0, 11..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_find_iter<'r, 'c, 'h, I: Into>>( + &'r self, + cache: &'c mut Cache, + input: I, + ) -> TryFindMatches<'r, 'c, 'h> { + let caps = Captures::matches(self.get_nfa().group_info().clone()); + let it = iter::Searcher::new(input.into()); + TryFindMatches { re: self, cache, caps, it } + } + + /// Returns an iterator over all non-overlapping `Captures` values. If no + /// match exists, then the iterator yields no elements. + /// + /// This yields the same matches as [`BoundedBacktracker::try_find_iter`], + /// but it includes the spans of all capturing groups that participate in + /// each match. + /// + /// If the regex engine returns an error at any point, then the iterator + /// will yield that error. + /// + /// **Tip:** See [`util::iter::Searcher`](crate::util::iter::Searcher) for + /// how to correctly iterate over all matches in a haystack while avoiding + /// the creation of a new `Captures` value for every match. (Which you are + /// forced to do with an `Iterator`.) + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Span, + /// }; + /// + /// let re = BoundedBacktracker::new("foo(?P[0-9]+)")?; + /// let mut cache = re.create_cache(); + /// + /// let text = "foo1 foo12 foo123"; + /// let mut spans = vec![]; + /// for result in re.try_captures_iter(&mut cache, text) { + /// let caps = result?; + /// // The unwrap is OK since 'numbers' matches if the pattern matches. + /// spans.push(caps.get_group_by_name("numbers").unwrap()); + /// } + /// assert_eq!(spans, vec![ + /// Span::from(3..4), + /// Span::from(8..10), + /// Span::from(14..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_captures_iter<'r, 'c, 'h, I: Into>>( + &'r self, + cache: &'c mut Cache, + input: I, + ) -> TryCapturesMatches<'r, 'c, 'h> { + let caps = self.create_captures(); + let it = iter::Searcher::new(input.into()); + TryCapturesMatches { re: self, cache, caps, it } + } +} + +impl BoundedBacktracker { + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided [`Captures`] + /// value. If no match was found, then [`Captures::is_match`] is guaranteed + /// to return `false`. + /// + /// This is like [`BoundedBacktracker::try_captures`], but it accepts a + /// concrete `&Input` instead of an `Into`. + /// + /// # Errors + /// + /// This routine only errors if the search could not complete. For this + /// backtracking regex engine, this only occurs when the haystack length + /// exceeds [`BoundedBacktracker::max_haystack_len`]. + /// + /// When a search cannot complete, callers cannot know whether a match + /// exists or not. + /// + /// # Example: specific pattern search + /// + /// This example shows how to build a multi bounded backtracker that + /// permits searching for specific patterns. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Anchored, Input, Match, PatternID, + /// }; + /// + /// let re = BoundedBacktracker::new_many(&[ + /// "[a-z0-9]{6}", + /// "[a-z][a-z0-9]{5}", + /// ])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "foo123"; + /// + /// // Since we are using the default leftmost-first match and both + /// // patterns match at the same starting position, only the first pattern + /// // will be returned in this case when doing a search for any of the + /// // patterns. + /// let expected = Some(Match::must(0, 0..6)); + /// re.try_search(&mut cache, &Input::new(haystack), &mut caps)?; + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we want to check whether some other pattern matches, then we + /// // can provide its pattern ID. + /// let expected = Some(Match::must(1, 0..6)); + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(1))); + /// re.try_search(&mut cache, &input, &mut caps)?; + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specifying the bounds of a search + /// + /// This example shows how providing the bounds of a search can produce + /// different results than simply sub-slicing the haystack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, Input, + /// }; + /// + /// let re = BoundedBacktracker::new(r"\b[0-9]{3}\b")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "foo123bar"; + /// + /// // Since we sub-slice the haystack, the search doesn't know about + /// // the larger context and assumes that `123` is surrounded by word + /// // boundaries. And of course, the match position is reported relative + /// // to the sub-slice as well, which means we get `0..3` instead of + /// // `3..6`. + /// let expected = Some(Match::must(0, 0..3)); + /// re.try_search(&mut cache, &Input::new(&haystack[3..6]), &mut caps)?; + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we provide the bounds of the search within the context of the + /// // entire haystack, then the search can take the surrounding context + /// // into account. (And if we did find a match, it would be reported + /// // as a valid offset into `haystack` instead of its sub-slice.) + /// let expected = None; + /// re.try_search( + /// &mut cache, &Input::new(haystack).range(3..6), &mut caps, + /// )?; + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search( + &self, + cache: &mut Cache, + input: &Input<'_>, + caps: &mut Captures, + ) -> Result<(), MatchError> { + caps.set_pattern(None); + let pid = self.try_search_slots(cache, input, caps.slots_mut())?; + caps.set_pattern(pid); + Ok(()) + } + + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided `slots`, and + /// returns the matching pattern ID. The contents of the slots for patterns + /// other than the matching pattern are unspecified. If no match was found, + /// then `None` is returned and the contents of all `slots` is unspecified. + /// + /// This is like [`BoundedBacktracker::try_search`], but it accepts a raw + /// slots slice instead of a `Captures` value. This is useful in contexts + /// where you don't want or need to allocate a `Captures`. + /// + /// It is legal to pass _any_ number of slots to this routine. If the regex + /// engine would otherwise write a slot offset that doesn't fit in the + /// provided slice, then it is simply skipped. In general though, there are + /// usually three slice lengths you might want to use: + /// + /// * An empty slice, if you only care about which pattern matched. + /// * A slice with + /// [`pattern_len() * 2`](crate::nfa::thompson::NFA::pattern_len) + /// slots, if you only care about the overall match spans for each matching + /// pattern. + /// * A slice with + /// [`slot_len()`](crate::util::captures::GroupInfo::slot_len) slots, which + /// permits recording match offsets for every capturing group in every + /// pattern. + /// + /// # Errors + /// + /// This routine only errors if the search could not complete. For this + /// backtracking regex engine, this only occurs when the haystack length + /// exceeds [`BoundedBacktracker::max_haystack_len`]. + /// + /// When a search cannot complete, callers cannot know whether a match + /// exists or not. + /// + /// # Example + /// + /// This example shows how to find the overall match offsets in a + /// multi-pattern search without allocating a `Captures` value. Indeed, we + /// can put our slots right on the stack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// PatternID, Input, + /// }; + /// + /// let re = BoundedBacktracker::new_many(&[ + /// r"\pL+", + /// r"\d+", + /// ])?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("!@#123"); + /// + /// // We only care about the overall match offsets here, so we just + /// // allocate two slots for each pattern. Each slot records the start + /// // and end of the match. + /// let mut slots = [None; 4]; + /// let pid = re.try_search_slots(&mut cache, &input, &mut slots)?; + /// assert_eq!(Some(PatternID::must(1)), pid); + /// + /// // The overall match offsets are always at 'pid * 2' and 'pid * 2 + 1'. + /// // See 'GroupInfo' for more details on the mapping between groups and + /// // slot indices. + /// let slot_start = pid.unwrap().as_usize() * 2; + /// let slot_end = slot_start + 1; + /// assert_eq!(Some(3), slots[slot_start].map(|s| s.get())); + /// assert_eq!(Some(6), slots[slot_end].map(|s| s.get())); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn try_search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Result, MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + if !utf8empty { + let maybe_hm = self.try_search_slots_imp(cache, input, slots)?; + return Ok(maybe_hm.map(|hm| hm.pattern())); + } + // See PikeVM::try_search_slots for why we do this. + let min = self.get_nfa().group_info().implicit_slot_len(); + if slots.len() >= min { + let maybe_hm = self.try_search_slots_imp(cache, input, slots)?; + return Ok(maybe_hm.map(|hm| hm.pattern())); + } + if self.get_nfa().pattern_len() == 1 { + let mut enough = [None, None]; + let got = self.try_search_slots_imp(cache, input, &mut enough)?; + // This is OK because we know `enough_slots` is strictly bigger + // than `slots`, otherwise this special case isn't reached. + slots.copy_from_slice(&enough[..slots.len()]); + return Ok(got.map(|hm| hm.pattern())); + } + let mut enough = vec![None; min]; + let got = self.try_search_slots_imp(cache, input, &mut enough)?; + // This is OK because we know `enough_slots` is strictly bigger than + // `slots`, otherwise this special case isn't reached. + slots.copy_from_slice(&enough[..slots.len()]); + Ok(got.map(|hm| hm.pattern())) + } + + /// This is the actual implementation of `try_search_slots_imp` that + /// doesn't account for the special case when 1) the NFA has UTF-8 mode + /// enabled, 2) the NFA can match the empty string and 3) the caller has + /// provided an insufficient number of slots to record match offsets. + #[inline(never)] + fn try_search_slots_imp( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Result, MatchError> { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + let hm = match self.search_imp(cache, input, slots)? { + None => return Ok(None), + Some(hm) if !utf8empty => return Ok(Some(hm)), + Some(hm) => hm, + }; + empty::skip_splits_fwd(input, hm, hm.offset(), |input| { + Ok(self + .search_imp(cache, input, slots)? + .map(|hm| (hm, hm.offset()))) + }) + } + + /// The implementation of standard leftmost backtracking search. + /// + /// Capturing group spans are written to 'caps', but only if requested. + /// 'caps' can be one of three things: 1) totally empty, in which case, we + /// only report the pattern that matched or 2) only has slots for recording + /// the overall match offsets for any pattern or 3) has all slots available + /// for recording the spans of any groups participating in a match. + fn search_imp( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Result, MatchError> { + // Unlike in the PikeVM, we write our capturing group spans directly + // into the caller's captures groups. So we have to make sure we're + // starting with a blank slate first. In the PikeVM, we avoid this + // by construction: the spans that are copied to every slot in the + // 'Captures' value already account for presence/absence. In this + // backtracker, we write directly into the caller provided slots, where + // as in the PikeVM, we write into scratch space first and only copy + // them to the caller provided slots when a match is found. + for slot in slots.iter_mut() { + *slot = None; + } + cache.setup_search(&self, input)?; + if input.is_done() { + return Ok(None); + } + let (anchored, start_id) = match input.get_anchored() { + // Only way we're unanchored is if both the caller asked for an + // unanchored search *and* the pattern is itself not anchored. + Anchored::No => ( + self.nfa.is_always_start_anchored(), + // We always use the anchored starting state here, even if + // doing an unanchored search. The "unanchored" part of it is + // implemented in the loop below, by simply trying the next + // byte offset if the previous backtracking exploration failed. + self.nfa.start_anchored(), + ), + Anchored::Yes => (true, self.nfa.start_anchored()), + Anchored::Pattern(pid) => match self.nfa.start_pattern(pid) { + None => return Ok(None), + Some(sid) => (true, sid), + }, + }; + if anchored { + let at = input.start(); + return Ok(self.backtrack(cache, input, at, start_id, slots)); + } + let pre = self.get_config().get_prefilter(); + let mut at = input.start(); + while at <= input.end() { + if let Some(ref pre) = pre { + let span = Span::from(at..input.end()); + match pre.find(input.haystack(), span) { + None => break, + Some(ref span) => at = span.start, + } + } + if let Some(hm) = self.backtrack(cache, input, at, start_id, slots) + { + return Ok(Some(hm)); + } + at += 1; + } + Ok(None) + } + + /// Look for a match starting at `at` in `input` and write the matching + /// pattern ID and group spans to `caps`. The search uses `start_id` as its + /// starting state in the underlying NFA. + /// + /// If no match was found, then the caller should increment `at` and try + /// at the next position. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn backtrack( + &self, + cache: &mut Cache, + input: &Input<'_>, + at: usize, + start_id: StateID, + slots: &mut [Option], + ) -> Option { + cache.stack.push(Frame::Step { sid: start_id, at }); + while let Some(frame) = cache.stack.pop() { + match frame { + Frame::Step { sid, at } => { + if let Some(hm) = self.step(cache, input, sid, at, slots) { + return Some(hm); + } + } + Frame::RestoreCapture { slot, offset } => { + slots[slot] = offset; + } + } + } + None + } + + // LAMENTATION: The actual backtracking search is implemented in about + // 75 lines below. Yet this file is over 2,000 lines long. What have I + // done? + + /// Execute a "step" in the backtracing algorithm. + /// + /// A "step" is somewhat of a misnomer, because this routine keeps going + /// until it either runs out of things to try or fins a match. In the + /// former case, it may have pushed some things on to the backtracking + /// stack, in which case, those will be tried next as part of the + /// 'backtrack' routine above. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn step( + &self, + cache: &mut Cache, + input: &Input<'_>, + mut sid: StateID, + mut at: usize, + slots: &mut [Option], + ) -> Option { + loop { + if !cache.visited.insert(sid, at - input.start()) { + return None; + } + match *self.nfa.state(sid) { + State::ByteRange { ref trans } => { + // Why do we need this? Unlike other regex engines in this + // crate, the backtracker can steam roll ahead in the + // haystack outside of the main loop over the bytes in the + // haystack. While 'trans.matches()' below handles the case + // of 'at' being out of bounds of 'input.haystack()', we + // also need to handle the case of 'at' going out of bounds + // of the span the caller asked to search. + // + // We should perhaps make the 'trans.matches()' API accept + // an '&Input' instead of a '&[u8]'. Or at least, add a new + // API that does it. + if at >= input.end() { + return None; + } + if !trans.matches(input.haystack(), at) { + return None; + } + sid = trans.next; + at += 1; + } + State::Sparse(ref sparse) => { + if at >= input.end() { + return None; + } + sid = sparse.matches(input.haystack(), at)?; + at += 1; + } + State::Dense(ref dense) => { + if at >= input.end() { + return None; + } + sid = dense.matches(input.haystack(), at)?; + at += 1; + } + State::Look { look, next } => { + // OK because we don't permit building a searcher with a + // Unicode word boundary if the requisite Unicode data is + // unavailable. + if !self.nfa.look_matcher().matches_inline( + look, + input.haystack(), + at, + ) { + return None; + } + sid = next; + } + State::Union { ref alternates } => { + sid = match alternates.get(0) { + None => return None, + Some(&sid) => sid, + }; + cache.stack.extend( + alternates[1..] + .iter() + .copied() + .rev() + .map(|sid| Frame::Step { sid, at }), + ); + } + State::BinaryUnion { alt1, alt2 } => { + sid = alt1; + cache.stack.push(Frame::Step { sid: alt2, at }); + } + State::Capture { next, slot, .. } => { + if slot.as_usize() < slots.len() { + cache.stack.push(Frame::RestoreCapture { + slot, + offset: slots[slot], + }); + slots[slot] = NonMaxUsize::new(at); + } + sid = next; + } + State::Fail => return None, + State::Match { pattern_id } => { + return Some(HalfMatch::new(pattern_id, at)); + } + } + } + } +} + +/// An iterator over all non-overlapping matches for a fallible search. +/// +/// The iterator yields a `Result { + re: &'r BoundedBacktracker, + cache: &'c mut Cache, + caps: Captures, + it: iter::Searcher<'h>, +} + +impl<'r, 'c, 'h> Iterator for TryFindMatches<'r, 'c, 'h> { + type Item = Result; + + #[inline] + fn next(&mut self) -> Option> { + // Splitting 'self' apart seems necessary to appease borrowck. + let TryFindMatches { re, ref mut cache, ref mut caps, ref mut it } = + *self; + it.try_advance(|input| { + re.try_search(cache, input, caps)?; + Ok(caps.get_match()) + }) + .transpose() + } +} + +/// An iterator over all non-overlapping leftmost matches, with their capturing +/// groups, for a fallible search. +/// +/// The iterator yields a `Result` value until no more +/// matches could be found. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the BoundedBacktracker. +/// * `'c` represents the lifetime of the BoundedBacktracker's cache. +/// * `'h` represents the lifetime of the haystack being searched. +/// +/// This iterator can be created with the +/// [`BoundedBacktracker::try_captures_iter`] method. +#[derive(Debug)] +pub struct TryCapturesMatches<'r, 'c, 'h> { + re: &'r BoundedBacktracker, + cache: &'c mut Cache, + caps: Captures, + it: iter::Searcher<'h>, +} + +impl<'r, 'c, 'h> Iterator for TryCapturesMatches<'r, 'c, 'h> { + type Item = Result; + + #[inline] + fn next(&mut self) -> Option> { + // Splitting 'self' apart seems necessary to appease borrowck. + let TryCapturesMatches { re, ref mut cache, ref mut caps, ref mut it } = + *self; + let _ = it + .try_advance(|input| { + re.try_search(cache, input, caps)?; + Ok(caps.get_match()) + }) + .transpose()?; + if caps.is_match() { + Some(Ok(caps.clone())) + } else { + None + } + } +} + +/// A cache represents mutable state that a [`BoundedBacktracker`] requires +/// during a search. +/// +/// For a given [`BoundedBacktracker`], its corresponding cache may be created +/// either via [`BoundedBacktracker::create_cache`], or via [`Cache::new`]. +/// They are equivalent in every way, except the former does not require +/// explicitly importing `Cache`. +/// +/// A particular `Cache` is coupled with the [`BoundedBacktracker`] from which +/// it was created. It may only be used with that `BoundedBacktracker`. A cache +/// and its allocations may be re-purposed via [`Cache::reset`], in which case, +/// it can only be used with the new `BoundedBacktracker` (and not the old +/// one). +#[derive(Clone, Debug)] +pub struct Cache { + /// Stack used on the heap for doing backtracking instead of the + /// traditional recursive approach. We don't want recursion because then + /// we're likely to hit a stack overflow for bigger regexes. + stack: Vec, + /// The set of (StateID, HaystackOffset) pairs that have been visited + /// by the backtracker within a single search. If such a pair has been + /// visited, then we avoid doing the work for that pair again. This is + /// what "bounds" the backtracking and prevents it from having worst case + /// exponential time. + visited: Visited, +} + +impl Cache { + /// Create a new [`BoundedBacktracker`] cache. + /// + /// A potentially more convenient routine to create a cache is + /// [`BoundedBacktracker::create_cache`], as it does not require also + /// importing the `Cache` type. + /// + /// If you want to reuse the returned `Cache` with some other + /// `BoundedBacktracker`, then you must call [`Cache::reset`] with the + /// desired `BoundedBacktracker`. + pub fn new(re: &BoundedBacktracker) -> Cache { + Cache { stack: vec![], visited: Visited::new(re) } + } + + /// Reset this cache such that it can be used for searching with different + /// [`BoundedBacktracker`]. + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different `BoundedBacktracker`. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different + /// `BoundedBacktracker`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::backtrack::BoundedBacktracker, + /// Match, + /// }; + /// + /// let re1 = BoundedBacktracker::new(r"\w")?; + /// let re2 = BoundedBacktracker::new(r"\W")?; + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Ok(Match::must(0, 0..2))), + /// re1.try_find_iter(&mut cache, "Δ").next(), + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the BoundedBacktracker we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// cache.reset(&re2); + /// assert_eq!( + /// Some(Ok(Match::must(0, 0..3))), + /// re2.try_find_iter(&mut cache, "☃").next(), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset(&mut self, re: &BoundedBacktracker) { + self.visited.reset(re); + } + + /// Returns the heap memory usage, in bytes, of this cache. + /// + /// This does **not** include the stack size used up by this cache. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + self.stack.len() * core::mem::size_of::() + + self.visited.memory_usage() + } + + /// Clears this cache. This should be called at the start of every search + /// to ensure we start with a clean slate. + /// + /// This also sets the length of the capturing groups used in the current + /// search. This permits an optimization where by 'SlotTable::for_state' + /// only returns the number of slots equivalent to the number of slots + /// given in the 'Captures' value. This may be less than the total number + /// of possible slots, e.g., when one only wants to track overall match + /// offsets. This in turn permits less copying of capturing group spans + /// in the BoundedBacktracker. + fn setup_search( + &mut self, + re: &BoundedBacktracker, + input: &Input<'_>, + ) -> Result<(), MatchError> { + self.stack.clear(); + self.visited.setup_search(re, input)?; + Ok(()) + } +} + +/// Represents a stack frame on the heap while doing backtracking. +/// +/// Instead of using explicit recursion for backtracking, we use a stack on +/// the heap to keep track of things that we want to explore if the current +/// backtracking branch turns out to not lead to a match. +#[derive(Clone, Debug)] +enum Frame { + /// Look for a match starting at `sid` and the given position in the + /// haystack. + Step { sid: StateID, at: usize }, + /// Reset the given `slot` to the given `offset` (which might be `None`). + /// This effectively gives a "scope" to capturing groups, such that an + /// offset for a particular group only gets returned if the match goes + /// through that capturing group. If backtracking ends up going down a + /// different branch that results in a different offset (or perhaps none at + /// all), then this "restore capture" frame will cause the offset to get + /// reset. + RestoreCapture { slot: SmallIndex, offset: Option }, +} + +/// A bitset that keeps track of whether a particular (StateID, offset) has +/// been considered during backtracking. If it has already been visited, then +/// backtracking skips it. This is what gives backtracking its "bound." +#[derive(Clone, Debug)] +struct Visited { + /// The actual underlying bitset. Each element in the bitset corresponds + /// to a particular (StateID, offset) pair. States correspond to the rows + /// and the offsets correspond to the columns. + /// + /// If our underlying NFA has N states and the haystack we're searching + /// has M bytes, then we have N*(M+1) entries in our bitset table. The + /// M+1 occurs because our matches are delayed by one byte (to support + /// look-around), and so we need to handle the end position itself rather + /// than stopping just before the end. (If there is no end position, then + /// it's treated as "end-of-input," which is matched by things like '$'.) + /// + /// Given BITS=N*(M+1), we wind up with div_ceil(BITS, sizeof(usize)) + /// blocks. + /// + /// We use 'usize' to represent our blocks because it makes some of the + /// arithmetic in 'insert' a bit nicer. For example, if we used 'u32' for + /// our block, we'd either need to cast u32s to usizes or usizes to u32s. + bitset: Vec, + /// The stride represents one plus length of the haystack we're searching + /// (as described above). The stride must be initialized for each search. + stride: usize, +} + +impl Visited { + /// The size of each block, in bits. + const BLOCK_SIZE: usize = 8 * core::mem::size_of::(); + + /// Create a new visited set for the given backtracker. + /// + /// The set is ready to use, but must be setup at the beginning of each + /// search by calling `setup_search`. + fn new(re: &BoundedBacktracker) -> Visited { + let mut visited = Visited { bitset: vec![], stride: 0 }; + visited.reset(re); + visited + } + + /// Insert the given (StateID, offset) pair into this set. If it already + /// exists, then this is a no-op and it returns false. Otherwise this + /// returns true. + fn insert(&mut self, sid: StateID, at: usize) -> bool { + let table_index = sid.as_usize() * self.stride + at; + let block_index = table_index / Visited::BLOCK_SIZE; + let bit = table_index % Visited::BLOCK_SIZE; + let block_with_bit = 1 << bit; + if self.bitset[block_index] & block_with_bit != 0 { + return false; + } + self.bitset[block_index] |= block_with_bit; + true + } + + /// Reset this visited set to work with the given bounded backtracker. + fn reset(&mut self, _: &BoundedBacktracker) { + self.bitset.truncate(0); + } + + /// Setup this visited set to work for a search using the given NFA + /// and input configuration. The NFA must be the same NFA used by the + /// BoundedBacktracker given to Visited::reset. Failing to call this might + /// result in panics or silently incorrect search behavior. + fn setup_search( + &mut self, + re: &BoundedBacktracker, + input: &Input<'_>, + ) -> Result<(), MatchError> { + // Our haystack length is only the length of the span of the entire + // haystack that we'll be searching. + let haylen = input.get_span().len(); + let err = || MatchError::haystack_too_long(haylen); + // Our stride is one more than the length of the input because our main + // search loop includes the position at input.end(). (And it does this + // because matches are delayed by one byte to account for look-around.) + self.stride = haylen + 1; + let needed_capacity = + match re.get_nfa().states().len().checked_mul(self.stride) { + None => return Err(err()), + Some(capacity) => capacity, + }; + let max_capacity = 8 * re.get_config().get_visited_capacity(); + if needed_capacity > max_capacity { + return Err(err()); + } + let needed_blocks = div_ceil(needed_capacity, Visited::BLOCK_SIZE); + self.bitset.truncate(needed_blocks); + for block in self.bitset.iter_mut() { + *block = 0; + } + if needed_blocks > self.bitset.len() { + self.bitset.resize(needed_blocks, 0); + } + Ok(()) + } + + /// Return the heap memory usage, in bytes, of this visited set. + fn memory_usage(&self) -> usize { + self.bitset.len() * core::mem::size_of::() + } +} + +/// Integer division, but rounds up instead of down. +fn div_ceil(lhs: usize, rhs: usize) -> usize { + if lhs % rhs == 0 { + lhs / rhs + } else { + (lhs / rhs) + 1 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // This is a regression test for the maximum haystack length computation. + // Previously, it assumed that the total capacity of the backtracker's + // bitset would always be greater than the number of NFA states. But there + // is of course no guarantee that this is true. This regression test + // ensures that not only does `max_haystack_len` not panic, but that it + // should return `0`. + #[cfg(feature = "syntax")] + #[test] + fn max_haystack_len_overflow() { + let re = BoundedBacktracker::builder() + .configure(BoundedBacktracker::config().visited_capacity(10)) + .build(r"[0-9A-Za-z]{100}") + .unwrap(); + assert_eq!(0, re.max_haystack_len()); + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/builder.rs b/vendor/regex-automata/src/nfa/thompson/builder.rs new file mode 100644 index 0000000..6b69e87 --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/builder.rs @@ -0,0 +1,1337 @@ +use core::mem; + +use alloc::{sync::Arc, vec, vec::Vec}; + +use crate::{ + nfa::thompson::{ + error::BuildError, + nfa::{self, SparseTransitions, Transition, NFA}, + }, + util::{ + look::{Look, LookMatcher}, + primitives::{IteratorIndexExt, PatternID, SmallIndex, StateID}, + }, +}; + +/// An intermediate NFA state used during construction. +/// +/// During construction of an NFA, it is often convenient to work with states +/// that are amenable to mutation and other carry more information than we +/// otherwise need once an NFA has been built. This type represents those +/// needs. +/// +/// Once construction is finished, the builder will convert these states to a +/// [`nfa::thompson::State`](crate::nfa::thompson::State). This conversion not +/// only results in a simpler representation, but in some cases, entire classes +/// of states are completely removed (such as [`State::Empty`]). +#[derive(Clone, Debug, Eq, PartialEq)] +enum State { + /// An empty state whose only purpose is to forward the automaton to + /// another state via an unconditional epsilon transition. + /// + /// Unconditional epsilon transitions are quite useful during the + /// construction of an NFA, as they permit the insertion of no-op + /// placeholders that make it easier to compose NFA sub-graphs. When + /// the Thompson NFA builder produces a final NFA, all unconditional + /// epsilon transitions are removed, and state identifiers are remapped + /// accordingly. + Empty { + /// The next state that this state should transition to. + next: StateID, + }, + /// A state that only transitions to another state if the current input + /// byte is in a particular range of bytes. + ByteRange { trans: Transition }, + /// A state with possibly many transitions, represented in a sparse + /// fashion. Transitions must be ordered lexicographically by input range + /// and be non-overlapping. As such, this may only be used when every + /// transition has equal priority. (In practice, this is only used for + /// encoding large UTF-8 automata.) In contrast, a `Union` state has each + /// alternate in order of priority. Priority is used to implement greedy + /// matching and also alternations themselves, e.g., `abc|a` where `abc` + /// has priority over `a`. + /// + /// To clarify, it is possible to remove `Sparse` and represent all things + /// that `Sparse` is used for via `Union`. But this creates a more bloated + /// NFA with more epsilon transitions than is necessary in the special case + /// of character classes. + Sparse { transitions: Vec }, + /// A conditional epsilon transition satisfied via some sort of + /// look-around. + Look { look: Look, next: StateID }, + /// An empty state that records the start of a capture location. This is an + /// unconditional epsilon transition like `Empty`, except it can be used to + /// record position information for a capture group when using the NFA for + /// search. + CaptureStart { + /// The ID of the pattern that this capture was defined. + pattern_id: PatternID, + /// The capture group index that this capture state corresponds to. + /// The capture group index is always relative to its corresponding + /// pattern. Therefore, in the presence of multiple patterns, both the + /// pattern ID and the capture group index are required to uniquely + /// identify a capturing group. + group_index: SmallIndex, + /// The next state that this state should transition to. + next: StateID, + }, + /// An empty state that records the end of a capture location. This is an + /// unconditional epsilon transition like `Empty`, except it can be used to + /// record position information for a capture group when using the NFA for + /// search. + CaptureEnd { + /// The ID of the pattern that this capture was defined. + pattern_id: PatternID, + /// The capture group index that this capture state corresponds to. + /// The capture group index is always relative to its corresponding + /// pattern. Therefore, in the presence of multiple patterns, both the + /// pattern ID and the capture group index are required to uniquely + /// identify a capturing group. + group_index: SmallIndex, + /// The next state that this state should transition to. + next: StateID, + }, + /// An alternation such that there exists an epsilon transition to all + /// states in `alternates`, where matches found via earlier transitions + /// are preferred over later transitions. + Union { alternates: Vec }, + /// An alternation such that there exists an epsilon transition to all + /// states in `alternates`, where matches found via later transitions are + /// preferred over earlier transitions. + /// + /// This "reverse" state exists for convenience during compilation that + /// permits easy construction of non-greedy combinations of NFA states. At + /// the end of compilation, Union and UnionReverse states are merged into + /// one Union type of state, where the latter has its epsilon transitions + /// reversed to reflect the priority inversion. + /// + /// The "convenience" here arises from the fact that as new states are + /// added to the list of `alternates`, we would like that add operation + /// to be amortized constant time. But if we used a `Union`, we'd need to + /// prepend the state, which takes O(n) time. There are other approaches we + /// could use to solve this, but this seems simple enough. + UnionReverse { alternates: Vec }, + /// A state that cannot be transitioned out of. This is useful for cases + /// where you want to prevent matching from occurring. For example, if your + /// regex parser permits empty character classes, then one could choose a + /// `Fail` state to represent it. + Fail, + /// A match state. There is at most one such occurrence of this state in + /// an NFA for each pattern compiled into the NFA. At time of writing, a + /// match state is always produced for every pattern given, but in theory, + /// if a pattern can never lead to a match, then the match state could be + /// omitted. + /// + /// `pattern_id` refers to the ID of the pattern itself, which corresponds + /// to the pattern's index (starting at 0). + Match { pattern_id: PatternID }, +} + +impl State { + /// If this state is an unconditional epsilon transition, then this returns + /// the target of the transition. + fn goto(&self) -> Option { + match *self { + State::Empty { next } => Some(next), + State::Union { ref alternates } if alternates.len() == 1 => { + Some(alternates[0]) + } + State::UnionReverse { ref alternates } + if alternates.len() == 1 => + { + Some(alternates[0]) + } + _ => None, + } + } + + /// Returns the heap memory usage, in bytes, of this state. + fn memory_usage(&self) -> usize { + match *self { + State::Empty { .. } + | State::ByteRange { .. } + | State::Look { .. } + | State::CaptureStart { .. } + | State::CaptureEnd { .. } + | State::Fail + | State::Match { .. } => 0, + State::Sparse { ref transitions } => { + transitions.len() * mem::size_of::() + } + State::Union { ref alternates } => { + alternates.len() * mem::size_of::() + } + State::UnionReverse { ref alternates } => { + alternates.len() * mem::size_of::() + } + } + } +} + +/// An abstraction for building Thompson NFAs by hand. +/// +/// A builder is what a [`thompson::Compiler`](crate::nfa::thompson::Compiler) +/// uses internally to translate a regex's high-level intermediate +/// representation into an [`NFA`]. +/// +/// The primary function of this builder is to abstract away the internal +/// representation of an NFA and make it difficult to produce NFAs are that +/// internally invalid or inconsistent. This builder also provides a way to +/// add "empty" states (which can be thought of as unconditional epsilon +/// transitions), despite the fact that [`thompson::State`](nfa::State) does +/// not have any "empty" representation. The advantage of "empty" states is +/// that they make the code for constructing a Thompson NFA logically simpler. +/// +/// Many of the routines on this builder may panic or return errors. Generally +/// speaking, panics occur when an invalid sequence of method calls were made, +/// where as an error occurs if things get too big. (Where "too big" might mean +/// exhausting identifier space or using up too much heap memory in accordance +/// with the configured [`size_limit`](Builder::set_size_limit).) +/// +/// # Overview +/// +/// ## Adding multiple patterns +/// +/// Each pattern you add to an NFA should correspond to a pair of +/// [`Builder::start_pattern`] and [`Builder::finish_pattern`] calls, with +/// calls inbetween that add NFA states for that pattern. NFA states may be +/// added without first calling `start_pattern`, with the exception of adding +/// capturing states. +/// +/// ## Adding NFA states +/// +/// Here is a very brief overview of each of the methods that add NFA states. +/// Every method adds a single state. +/// +/// * [`add_empty`](Builder::add_empty): Add a state with a single +/// unconditional epsilon transition to another state. +/// * [`add_union`](Builder::add_union): Adds a state with unconditional +/// epsilon transitions to two or more states, with earlier transitions +/// preferred over later ones. +/// * [`add_union_reverse`](Builder::add_union_reverse): Adds a state with +/// unconditional epsilon transitions to two or more states, with later +/// transitions preferred over earlier ones. +/// * [`add_range`](Builder::add_range): Adds a state with a single transition +/// to another state that can only be followed if the current input byte is +/// within the range given. +/// * [`add_sparse`](Builder::add_sparse): Adds a state with two or more +/// range transitions to other states, where a transition is only followed +/// if the current input byte is within one of the ranges. All transitions +/// in this state have equal priority, and the corresponding ranges must be +/// non-overlapping. +/// * [`add_look`](Builder::add_look): Adds a state with a single *conditional* +/// epsilon transition to another state, where the condition depends on a +/// limited look-around property. +/// * [`add_capture_start`](Builder::add_capture_start): Adds a state with +/// a single unconditional epsilon transition that also instructs an NFA +/// simulation to record the current input position to a specific location in +/// memory. This is intended to represent the starting location of a capturing +/// group. +/// * [`add_capture_end`](Builder::add_capture_end): Adds a state with +/// a single unconditional epsilon transition that also instructs an NFA +/// simulation to record the current input position to a specific location in +/// memory. This is intended to represent the ending location of a capturing +/// group. +/// * [`add_fail`](Builder::add_fail): Adds a state that never transitions to +/// another state. +/// * [`add_match`](Builder::add_match): Add a state that indicates a match has +/// been found for a particular pattern. A match state is a final state with +/// no outgoing transitions. +/// +/// ## Setting transitions between NFA states +/// +/// The [`Builder::patch`] method creates a transition from one state to the +/// next. If the `from` state corresponds to a state that supports multiple +/// outgoing transitions (such as "union"), then this adds the corresponding +/// transition. Otherwise, it sets the single transition. (This routine panics +/// if `from` corresponds to a state added by `add_sparse`, since sparse states +/// need more specialized handling.) +/// +/// # Example +/// +/// This annotated example shows how to hand construct the regex `[a-z]+` +/// (without an unanchored prefix). +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::{pikevm::PikeVM, Builder, Transition}, +/// util::primitives::StateID, +/// Match, +/// }; +/// +/// let mut builder = Builder::new(); +/// // Before adding NFA states for our pattern, we need to tell the builder +/// // that we are starting the pattern. +/// builder.start_pattern()?; +/// // Since we use the Pike VM below for searching, we need to add capturing +/// // states. If you're just going to build a DFA from the NFA, then capturing +/// // states do not need to be added. +/// let start = builder.add_capture_start(StateID::ZERO, 0, None)?; +/// let range = builder.add_range(Transition { +/// // We don't know the state ID of the 'next' state yet, so we just fill +/// // in a dummy 'ZERO' value. +/// start: b'a', end: b'z', next: StateID::ZERO, +/// })?; +/// // This state will point back to 'range', but also enable us to move ahead. +/// // That is, this implements the '+' repetition operator. We add 'range' and +/// // then 'end' below to this alternation. +/// let alt = builder.add_union(vec![])?; +/// // The final state before the match state, which serves to capture the +/// // end location of the match. +/// let end = builder.add_capture_end(StateID::ZERO, 0)?; +/// // The match state for our pattern. +/// let mat = builder.add_match()?; +/// // Now we fill in the transitions between states. +/// builder.patch(start, range)?; +/// builder.patch(range, alt)?; +/// // If we added 'end' before 'range', then we'd implement non-greedy +/// // matching, i.e., '+?'. +/// builder.patch(alt, range)?; +/// builder.patch(alt, end)?; +/// builder.patch(end, mat)?; +/// // We must explicitly finish pattern and provide the starting state ID for +/// // this particular pattern. +/// builder.finish_pattern(start)?; +/// // Finally, when we build the NFA, we provide the anchored and unanchored +/// // starting state IDs. Since we didn't bother with an unanchored prefix +/// // here, we only support anchored searching. Thus, both starting states are +/// // the same. +/// let nfa = builder.build(start, start)?; +/// +/// // Now build a Pike VM from our NFA, and use it for searching. This shows +/// // how we can use a regex engine without ever worrying about syntax! +/// let re = PikeVM::new_from_nfa(nfa)?; +/// let mut cache = re.create_cache(); +/// let mut caps = re.create_captures(); +/// let expected = Some(Match::must(0, 0..3)); +/// re.captures(&mut cache, "foo0", &mut caps); +/// assert_eq!(expected, caps.get_match()); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug, Default)] +pub struct Builder { + /// The ID of the pattern that we're currently building. + /// + /// Callers are required to set (and unset) this by calling + /// {start,finish}_pattern. Otherwise, most methods will panic. + pattern_id: Option, + /// A sequence of intermediate NFA states. Once a state is added to this + /// sequence, it is assigned a state ID equivalent to its index. Once a + /// state is added, it is still expected to be mutated, e.g., to set its + /// transition to a state that didn't exist at the time it was added. + states: Vec, + /// The starting states for each individual pattern. Starting at any + /// of these states will result in only an anchored search for the + /// corresponding pattern. The vec is indexed by pattern ID. When the NFA + /// contains a single regex, then `start_pattern[0]` and `start_anchored` + /// are always equivalent. + start_pattern: Vec, + /// A map from pattern ID to capture group index to name. (If no name + /// exists, then a None entry is present. Thus, all capturing groups are + /// present in this mapping.) + /// + /// The outer vec is indexed by pattern ID, while the inner vec is indexed + /// by capture index offset for the corresponding pattern. + /// + /// The first capture group for each pattern is always unnamed and is thus + /// always None. + captures: Vec>>>, + /// The combined memory used by each of the 'State's in 'states'. This + /// only includes heap usage by each state, and not the size of the state + /// itself. In other words, this tracks heap memory used that isn't + /// captured via `size_of::() * states.len()`. + memory_states: usize, + /// Whether this NFA only matches UTF-8 and whether regex engines using + /// this NFA for searching should report empty matches that split a + /// codepoint. + utf8: bool, + /// Whether this NFA should be matched in reverse or not. + reverse: bool, + /// The matcher to use for look-around assertions. + look_matcher: LookMatcher, + /// A size limit to respect when building an NFA. If the total heap memory + /// of the intermediate NFA states exceeds (or would exceed) this amount, + /// then an error is returned. + size_limit: Option, +} + +impl Builder { + /// Create a new builder for hand-assembling NFAs. + pub fn new() -> Builder { + Builder::default() + } + + /// Clear this builder. + /// + /// Clearing removes all state associated with building an NFA, but does + /// not reset configuration (such as size limits and whether the NFA + /// should only match UTF-8). After clearing, the builder can be reused to + /// assemble an entirely new NFA. + pub fn clear(&mut self) { + self.pattern_id = None; + self.states.clear(); + self.start_pattern.clear(); + self.captures.clear(); + self.memory_states = 0; + } + + /// Assemble a [`NFA`] from the states added so far. + /// + /// After building an NFA, more states may be added and `build` may be + /// called again. To reuse a builder to produce an entirely new NFA from + /// scratch, call the [`clear`](Builder::clear) method first. + /// + /// `start_anchored` refers to the ID of the starting state that anchored + /// searches should use. That is, searches who matches are limited to the + /// starting position of the search. + /// + /// `start_unanchored` refers to the ID of the starting state that + /// unanchored searches should use. This permits searches to report matches + /// that start after the beginning of the search. In cases where unanchored + /// searches are not supported, the unanchored starting state ID must be + /// the same as the anchored starting state ID. + /// + /// # Errors + /// + /// This returns an error if there was a problem producing the final NFA. + /// In particular, this might include an error if the capturing groups + /// added to this builder violate any of the invariants documented on + /// [`GroupInfo`](crate::util::captures::GroupInfo). + /// + /// # Panics + /// + /// If `start_pattern` was called, then `finish_pattern` must be called + /// before `build`, otherwise this panics. + /// + /// This may panic for other invalid uses of a builder. For example, if + /// a "start capture" state was added without a corresponding "end capture" + /// state. + pub fn build( + &self, + start_anchored: StateID, + start_unanchored: StateID, + ) -> Result { + assert!(self.pattern_id.is_none(), "must call 'finish_pattern' first"); + debug!( + "intermediate NFA compilation via builder is complete, \ + intermediate NFA size: {} states, {} bytes on heap", + self.states.len(), + self.memory_usage(), + ); + + let mut nfa = nfa::Inner::default(); + nfa.set_utf8(self.utf8); + nfa.set_reverse(self.reverse); + nfa.set_look_matcher(self.look_matcher.clone()); + // A set of compiler internal state IDs that correspond to states + // that are exclusively epsilon transitions, i.e., goto instructions, + // combined with the state that they point to. This is used to + // record said states while transforming the compiler's internal NFA + // representation to the external form. + let mut empties = vec![]; + // A map used to re-map state IDs when translating this builder's + // internal NFA state representation to the final NFA representation. + let mut remap = vec![]; + remap.resize(self.states.len(), StateID::ZERO); + + nfa.set_starts(start_anchored, start_unanchored, &self.start_pattern); + nfa.set_captures(&self.captures).map_err(BuildError::captures)?; + // The idea here is to convert our intermediate states to their final + // form. The only real complexity here is the process of converting + // transitions, which are expressed in terms of state IDs. The new + // set of states will be smaller because of partial epsilon removal, + // so the state IDs will not be the same. + for (sid, state) in self.states.iter().with_state_ids() { + match *state { + State::Empty { next } => { + // Since we're removing empty states, we need to handle + // them later since we don't yet know which new state this + // empty state will be mapped to. + empties.push((sid, next)); + } + State::ByteRange { trans } => { + remap[sid] = nfa.add(nfa::State::ByteRange { trans }); + } + State::Sparse { ref transitions } => { + remap[sid] = match transitions.len() { + 0 => nfa.add(nfa::State::Fail), + 1 => nfa.add(nfa::State::ByteRange { + trans: transitions[0], + }), + _ => { + let transitions = + transitions.to_vec().into_boxed_slice(); + let sparse = SparseTransitions { transitions }; + nfa.add(nfa::State::Sparse(sparse)) + } + } + } + State::Look { look, next } => { + remap[sid] = nfa.add(nfa::State::Look { look, next }); + } + State::CaptureStart { pattern_id, group_index, next } => { + // We can't remove this empty state because of the side + // effect of capturing an offset for this capture slot. + let slot = nfa + .group_info() + .slot(pattern_id, group_index.as_usize()) + .expect("invalid capture index"); + let slot = + SmallIndex::new(slot).expect("a small enough slot"); + remap[sid] = nfa.add(nfa::State::Capture { + next, + pattern_id, + group_index, + slot, + }); + } + State::CaptureEnd { pattern_id, group_index, next } => { + // We can't remove this empty state because of the side + // effect of capturing an offset for this capture slot. + // Also, this always succeeds because we check that all + // slot indices are valid for all capture indices when they + // are initially added. + let slot = nfa + .group_info() + .slot(pattern_id, group_index.as_usize()) + .expect("invalid capture index") + .checked_add(1) + .unwrap(); + let slot = + SmallIndex::new(slot).expect("a small enough slot"); + remap[sid] = nfa.add(nfa::State::Capture { + next, + pattern_id, + group_index, + slot, + }); + } + State::Union { ref alternates } => { + if alternates.is_empty() { + remap[sid] = nfa.add(nfa::State::Fail); + } else if alternates.len() == 1 { + empties.push((sid, alternates[0])); + remap[sid] = alternates[0]; + } else if alternates.len() == 2 { + remap[sid] = nfa.add(nfa::State::BinaryUnion { + alt1: alternates[0], + alt2: alternates[1], + }); + } else { + let alternates = + alternates.to_vec().into_boxed_slice(); + remap[sid] = nfa.add(nfa::State::Union { alternates }); + } + } + State::UnionReverse { ref alternates } => { + if alternates.is_empty() { + remap[sid] = nfa.add(nfa::State::Fail); + } else if alternates.len() == 1 { + empties.push((sid, alternates[0])); + remap[sid] = alternates[0]; + } else if alternates.len() == 2 { + remap[sid] = nfa.add(nfa::State::BinaryUnion { + alt1: alternates[1], + alt2: alternates[0], + }); + } else { + let mut alternates = + alternates.to_vec().into_boxed_slice(); + alternates.reverse(); + remap[sid] = nfa.add(nfa::State::Union { alternates }); + } + } + State::Fail => { + remap[sid] = nfa.add(nfa::State::Fail); + } + State::Match { pattern_id } => { + remap[sid] = nfa.add(nfa::State::Match { pattern_id }); + } + } + } + // Some of the new states still point to empty state IDs, so we need to + // follow each of them and remap the empty state IDs to their non-empty + // state IDs. + // + // We also keep track of which states we've already mapped. This helps + // avoid quadratic behavior in a long chain of empty states. For + // example, in 'a{0}{50000}'. + let mut remapped = vec![false; self.states.len()]; + for &(empty_id, empty_next) in empties.iter() { + if remapped[empty_id] { + continue; + } + // empty states can point to other empty states, forming a chain. + // So we must follow the chain until the end, which must end at + // a non-empty state, and therefore, a state that is correctly + // remapped. We are guaranteed to terminate because our compiler + // never builds a loop among only empty states. + let mut new_next = empty_next; + while let Some(next) = self.states[new_next].goto() { + new_next = next; + } + remap[empty_id] = remap[new_next]; + remapped[empty_id] = true; + + // Now that we've remapped the main 'empty_id' above, we re-follow + // the chain from above and remap every empty state we found along + // the way to our ultimate non-empty target. We are careful to set + // 'remapped' to true for each such state. We thus will not need + // to re-compute this chain for any subsequent empty states in + // 'empties' that are part of this chain. + let mut next2 = empty_next; + while let Some(next) = self.states[next2].goto() { + remap[next2] = remap[new_next]; + remapped[next2] = true; + next2 = next; + } + } + // Finally remap all of the state IDs. + nfa.remap(&remap); + let final_nfa = nfa.into_nfa(); + debug!( + "NFA compilation via builder complete, \ + final NFA size: {} states, {} bytes on heap, \ + has empty? {:?}, utf8? {:?}", + final_nfa.states().len(), + final_nfa.memory_usage(), + final_nfa.has_empty(), + final_nfa.is_utf8(), + ); + Ok(final_nfa) + } + + /// Start the assembly of a pattern in this NFA. + /// + /// Upon success, this returns the identifier for the new pattern. + /// Identifiers start at `0` and are incremented by 1 for each new pattern. + /// + /// It is necessary to call this routine before adding capturing states. + /// Otherwise, any other NFA state may be added before starting a pattern. + /// + /// # Errors + /// + /// If the pattern identifier space is exhausted, then this returns an + /// error. + /// + /// # Panics + /// + /// If this is called while assembling another pattern (i.e., before + /// `finish_pattern` is called), then this panics. + pub fn start_pattern(&mut self) -> Result { + assert!(self.pattern_id.is_none(), "must call 'finish_pattern' first"); + + let proposed = self.start_pattern.len(); + let pid = PatternID::new(proposed) + .map_err(|_| BuildError::too_many_patterns(proposed))?; + self.pattern_id = Some(pid); + // This gets filled in when 'finish_pattern' is called. + self.start_pattern.push(StateID::ZERO); + Ok(pid) + } + + /// Finish the assembly of a pattern in this NFA. + /// + /// Upon success, this returns the identifier for the new pattern. + /// Identifiers start at `0` and are incremented by 1 for each new + /// pattern. This is the same identifier returned by the corresponding + /// `start_pattern` call. + /// + /// Note that `start_pattern` and `finish_pattern` pairs cannot be + /// interleaved or nested. A correct `finish_pattern` call _always_ + /// corresponds to the most recently called `start_pattern` routine. + /// + /// # Errors + /// + /// This currently never returns an error, but this is subject to change. + /// + /// # Panics + /// + /// If this is called without a corresponding `start_pattern` call, then + /// this panics. + pub fn finish_pattern( + &mut self, + start_id: StateID, + ) -> Result { + let pid = self.current_pattern_id(); + self.start_pattern[pid] = start_id; + self.pattern_id = None; + Ok(pid) + } + + /// Returns the pattern identifier of the current pattern. + /// + /// # Panics + /// + /// If this doesn't occur after a `start_pattern` call and before the + /// corresponding `finish_pattern` call, then this panics. + pub fn current_pattern_id(&self) -> PatternID { + self.pattern_id.expect("must call 'start_pattern' first") + } + + /// Returns the number of patterns added to this builder so far. + /// + /// This only includes patterns that have had `finish_pattern` called + /// for them. + pub fn pattern_len(&self) -> usize { + self.start_pattern.len() + } + + /// Add an "empty" NFA state. + /// + /// An "empty" NFA state is a state with a single unconditional epsilon + /// transition to another NFA state. Such empty states are removed before + /// building the final [`NFA`] (which has no such "empty" states), but they + /// can be quite useful in the construction process of an NFA. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + pub fn add_empty(&mut self) -> Result { + self.add(State::Empty { next: StateID::ZERO }) + } + + /// Add a "union" NFA state. + /// + /// A "union" NFA state that contains zero or more unconditional epsilon + /// transitions to other NFA states. The order of these transitions + /// reflects a priority order where earlier transitions are preferred over + /// later transitions. + /// + /// Callers may provide an empty set of alternates to this method call, and + /// then later add transitions via `patch`. At final build time, a "union" + /// state with no alternates is converted to a "fail" state, and a "union" + /// state with exactly one alternate is treated as if it were an "empty" + /// state. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + pub fn add_union( + &mut self, + alternates: Vec, + ) -> Result { + self.add(State::Union { alternates }) + } + + /// Add a "reverse union" NFA state. + /// + /// A "reverse union" NFA state contains zero or more unconditional epsilon + /// transitions to other NFA states. The order of these transitions + /// reflects a priority order where later transitions are preferred + /// over earlier transitions. This is an inverted priority order when + /// compared to `add_union`. This is useful, for example, for implementing + /// non-greedy repetition operators. + /// + /// Callers may provide an empty set of alternates to this method call, and + /// then later add transitions via `patch`. At final build time, a "reverse + /// union" state with no alternates is converted to a "fail" state, and a + /// "reverse union" state with exactly one alternate is treated as if it + /// were an "empty" state. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + pub fn add_union_reverse( + &mut self, + alternates: Vec, + ) -> Result { + self.add(State::UnionReverse { alternates }) + } + + /// Add a "range" NFA state. + /// + /// A "range" NFA state is a state with one outgoing transition to another + /// state, where that transition may only be followed if the current input + /// byte falls between a range of bytes given. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + pub fn add_range( + &mut self, + trans: Transition, + ) -> Result { + self.add(State::ByteRange { trans }) + } + + /// Add a "sparse" NFA state. + /// + /// A "sparse" NFA state contains zero or more outgoing transitions, where + /// the transition to be followed (if any) is chosen based on whether the + /// current input byte falls in the range of one such transition. The + /// transitions given *must* be non-overlapping and in ascending order. (A + /// "sparse" state with no transitions is equivalent to a "fail" state.) + /// + /// A "sparse" state is like adding a "union" state and pointing it at a + /// bunch of "range" states, except that the different alternates have + /// equal priority. + /// + /// Note that a "sparse" state is the only state that cannot be patched. + /// This is because a "sparse" state has many transitions, each of which + /// may point to a different NFA state. Moreover, adding more such + /// transitions requires more than just an NFA state ID to point to. It + /// also requires a byte range. The `patch` routine does not support the + /// additional information required. Therefore, callers must ensure that + /// all outgoing transitions for this state are included when `add_sparse` + /// is called. There is no way to add more later. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + /// + /// # Panics + /// + /// This routine _may_ panic if the transitions given overlap or are not + /// in ascending order. + pub fn add_sparse( + &mut self, + transitions: Vec, + ) -> Result { + self.add(State::Sparse { transitions }) + } + + /// Add a "look" NFA state. + /// + /// A "look" NFA state corresponds to a state with exactly one + /// *conditional* epsilon transition to another NFA state. Namely, it + /// represents one of a small set of simplistic look-around operators. + /// + /// Callers may provide a "dummy" state ID (typically [`StateID::ZERO`]), + /// and then change it later with [`patch`](Builder::patch). + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + pub fn add_look( + &mut self, + next: StateID, + look: Look, + ) -> Result { + self.add(State::Look { look, next }) + } + + /// Add a "start capture" NFA state. + /// + /// A "start capture" NFA state corresponds to a state with exactly one + /// outgoing unconditional epsilon transition to another state. Unlike + /// "empty" states, a "start capture" state also carries with it an + /// instruction for saving the current position of input to a particular + /// location in memory. NFA simulations, like the Pike VM, may use this + /// information to report the match locations of capturing groups in a + /// regex pattern. + /// + /// If the corresponding capturing group has a name, then callers should + /// include it here. + /// + /// Callers may provide a "dummy" state ID (typically [`StateID::ZERO`]), + /// and then change it later with [`patch`](Builder::patch). + /// + /// Note that unlike `start_pattern`/`finish_pattern`, capturing start and + /// end states may be interleaved. Indeed, it is typical for many "start + /// capture" NFA states to appear before the first "end capture" state. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded or if the given + /// capture index overflows `usize`. + /// + /// While the above are the only conditions in which this routine can + /// currently return an error, it is possible to call this method with an + /// inputs that results in the final `build()` step failing to produce an + /// NFA. For example, if one adds two distinct capturing groups with the + /// same name, then that will result in `build()` failing with an error. + /// + /// See the [`GroupInfo`](crate::util::captures::GroupInfo) type for + /// more information on what qualifies as valid capturing groups. + /// + /// # Example + /// + /// This example shows that an error occurs when one tries to add multiple + /// capturing groups with the same name to the same pattern. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::Builder, + /// util::primitives::StateID, + /// }; + /// + /// let name = Some(std::sync::Arc::from("foo")); + /// let mut builder = Builder::new(); + /// builder.start_pattern()?; + /// // 0th capture group should always be unnamed. + /// let start = builder.add_capture_start(StateID::ZERO, 0, None)?; + /// // OK + /// builder.add_capture_start(StateID::ZERO, 1, name.clone())?; + /// // This is not OK, but 'add_capture_start' still succeeds. We don't + /// // get an error until we call 'build' below. Without this call, the + /// // call to 'build' below would succeed. + /// builder.add_capture_start(StateID::ZERO, 2, name.clone())?; + /// // Finish our pattern so we can try to build the NFA. + /// builder.finish_pattern(start)?; + /// let result = builder.build(start, start); + /// assert!(result.is_err()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// However, adding multiple capturing groups with the same name to + /// distinct patterns is okay: + /// + /// ``` + /// use std::sync::Arc; + /// + /// use regex_automata::{ + /// nfa::thompson::{pikevm::PikeVM, Builder, Transition}, + /// util::{ + /// captures::Captures, + /// primitives::{PatternID, StateID}, + /// }, + /// Span, + /// }; + /// + /// // Hand-compile the patterns '(?P[a-z])' and '(?P[A-Z])'. + /// let mut builder = Builder::new(); + /// // We compile them to support an unanchored search, which requires + /// // adding an implicit '(?s-u:.)*?' prefix before adding either pattern. + /// let unanchored_prefix = builder.add_union_reverse(vec![])?; + /// let any = builder.add_range(Transition { + /// start: b'\x00', end: b'\xFF', next: StateID::ZERO, + /// })?; + /// builder.patch(unanchored_prefix, any)?; + /// builder.patch(any, unanchored_prefix)?; + /// + /// // Compile an alternation that permits matching multiple patterns. + /// let alt = builder.add_union(vec![])?; + /// builder.patch(unanchored_prefix, alt)?; + /// + /// // Compile '(?P[a-z]+)'. + /// builder.start_pattern()?; + /// let start0 = builder.add_capture_start(StateID::ZERO, 0, None)?; + /// // N.B. 0th capture group must always be unnamed. + /// let foo_start0 = builder.add_capture_start( + /// StateID::ZERO, 1, Some(Arc::from("foo")), + /// )?; + /// let lowercase = builder.add_range(Transition { + /// start: b'a', end: b'z', next: StateID::ZERO, + /// })?; + /// let foo_end0 = builder.add_capture_end(StateID::ZERO, 1)?; + /// let end0 = builder.add_capture_end(StateID::ZERO, 0)?; + /// let match0 = builder.add_match()?; + /// builder.patch(start0, foo_start0)?; + /// builder.patch(foo_start0, lowercase)?; + /// builder.patch(lowercase, foo_end0)?; + /// builder.patch(foo_end0, end0)?; + /// builder.patch(end0, match0)?; + /// builder.finish_pattern(start0)?; + /// + /// // Compile '(?P[A-Z]+)'. + /// builder.start_pattern()?; + /// let start1 = builder.add_capture_start(StateID::ZERO, 0, None)?; + /// // N.B. 0th capture group must always be unnamed. + /// let foo_start1 = builder.add_capture_start( + /// StateID::ZERO, 1, Some(Arc::from("foo")), + /// )?; + /// let uppercase = builder.add_range(Transition { + /// start: b'A', end: b'Z', next: StateID::ZERO, + /// })?; + /// let foo_end1 = builder.add_capture_end(StateID::ZERO, 1)?; + /// let end1 = builder.add_capture_end(StateID::ZERO, 0)?; + /// let match1 = builder.add_match()?; + /// builder.patch(start1, foo_start1)?; + /// builder.patch(foo_start1, uppercase)?; + /// builder.patch(uppercase, foo_end1)?; + /// builder.patch(foo_end1, end1)?; + /// builder.patch(end1, match1)?; + /// builder.finish_pattern(start1)?; + /// + /// // Now add the patterns to our alternation that we started above. + /// builder.patch(alt, start0)?; + /// builder.patch(alt, start1)?; + /// + /// // Finally build the NFA. The first argument is the anchored starting + /// // state (the pattern alternation) where as the second is the + /// // unanchored starting state (the unanchored prefix). + /// let nfa = builder.build(alt, unanchored_prefix)?; + /// + /// // Now build a Pike VM from our NFA and access the 'foo' capture + /// // group regardless of which pattern matched, since it is defined + /// // for both patterns. + /// let vm = PikeVM::new_from_nfa(nfa)?; + /// let mut cache = vm.create_cache(); + /// let caps: Vec = + /// vm.captures_iter(&mut cache, "0123aAaAA").collect(); + /// assert_eq!(5, caps.len()); + /// + /// assert_eq!(Some(PatternID::must(0)), caps[0].pattern()); + /// assert_eq!(Some(Span::from(4..5)), caps[0].get_group_by_name("foo")); + /// + /// assert_eq!(Some(PatternID::must(1)), caps[1].pattern()); + /// assert_eq!(Some(Span::from(5..6)), caps[1].get_group_by_name("foo")); + /// + /// assert_eq!(Some(PatternID::must(0)), caps[2].pattern()); + /// assert_eq!(Some(Span::from(6..7)), caps[2].get_group_by_name("foo")); + /// + /// assert_eq!(Some(PatternID::must(1)), caps[3].pattern()); + /// assert_eq!(Some(Span::from(7..8)), caps[3].get_group_by_name("foo")); + /// + /// assert_eq!(Some(PatternID::must(1)), caps[4].pattern()); + /// assert_eq!(Some(Span::from(8..9)), caps[4].get_group_by_name("foo")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn add_capture_start( + &mut self, + next: StateID, + group_index: u32, + name: Option>, + ) -> Result { + let pid = self.current_pattern_id(); + let group_index = match SmallIndex::try_from(group_index) { + Err(_) => { + return Err(BuildError::invalid_capture_index(group_index)) + } + Ok(group_index) => group_index, + }; + // Make sure we have space to insert our (pid,index)|-->name mapping. + if pid.as_usize() >= self.captures.len() { + for _ in 0..=(pid.as_usize() - self.captures.len()) { + self.captures.push(vec![]); + } + } + // In the case where 'group_index < self.captures[pid].len()', it means + // that we are adding a duplicate capture group. This is somewhat + // weird, but permissible because the capture group itself can be + // repeated in the syntax. For example, '([a-z]){4}' will produce 4 + // capture groups. In practice, only the last will be set at search + // time when a match occurs. For duplicates, we don't need to push + // anything other than a CaptureStart NFA state. + if group_index.as_usize() >= self.captures[pid].len() { + // For discontiguous indices, push placeholders for earlier capture + // groups that weren't explicitly added. + for _ in 0..(group_index.as_usize() - self.captures[pid].len()) { + self.captures[pid].push(None); + } + self.captures[pid].push(name); + } + self.add(State::CaptureStart { pattern_id: pid, group_index, next }) + } + + /// Add a "end capture" NFA state. + /// + /// A "end capture" NFA state corresponds to a state with exactly one + /// outgoing unconditional epsilon transition to another state. Unlike + /// "empty" states, a "end capture" state also carries with it an + /// instruction for saving the current position of input to a particular + /// location in memory. NFA simulations, like the Pike VM, may use this + /// information to report the match locations of capturing groups in a + /// + /// Callers may provide a "dummy" state ID (typically [`StateID::ZERO`]), + /// and then change it later with [`patch`](Builder::patch). + /// + /// Note that unlike `start_pattern`/`finish_pattern`, capturing start and + /// end states may be interleaved. Indeed, it is typical for many "start + /// capture" NFA states to appear before the first "end capture" state. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded or if the given + /// capture index overflows `usize`. + /// + /// While the above are the only conditions in which this routine can + /// currently return an error, it is possible to call this method with an + /// inputs that results in the final `build()` step failing to produce an + /// NFA. For example, if one adds two distinct capturing groups with the + /// same name, then that will result in `build()` failing with an error. + /// + /// See the [`GroupInfo`](crate::util::captures::GroupInfo) type for + /// more information on what qualifies as valid capturing groups. + pub fn add_capture_end( + &mut self, + next: StateID, + group_index: u32, + ) -> Result { + let pid = self.current_pattern_id(); + let group_index = match SmallIndex::try_from(group_index) { + Err(_) => { + return Err(BuildError::invalid_capture_index(group_index)) + } + Ok(group_index) => group_index, + }; + self.add(State::CaptureEnd { pattern_id: pid, group_index, next }) + } + + /// Adds a "fail" NFA state. + /// + /// A "fail" state is simply a state that has no outgoing transitions. It + /// acts as a way to cause a search to stop without reporting a match. + /// For example, one way to represent an NFA with zero patterns is with a + /// single "fail" state. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + pub fn add_fail(&mut self) -> Result { + self.add(State::Fail) + } + + /// Adds a "match" NFA state. + /// + /// A "match" state has no outgoing transitions (just like a "fail" + /// state), but it has special significance in that if a search enters + /// this state, then a match has been found. The match state that is added + /// automatically has the current pattern ID associated with it. This is + /// used to report the matching pattern ID at search time. + /// + /// # Errors + /// + /// This returns an error if the state identifier space is exhausted, or if + /// the configured heap size limit has been exceeded. + /// + /// # Panics + /// + /// This must be called after a `start_pattern` call but before the + /// corresponding `finish_pattern` call. Otherwise, it panics. + pub fn add_match(&mut self) -> Result { + let pattern_id = self.current_pattern_id(); + let sid = self.add(State::Match { pattern_id })?; + Ok(sid) + } + + /// The common implementation of "add a state." It handles the common + /// error cases of state ID exhausting (by owning state ID allocation) and + /// whether the size limit has been exceeded. + fn add(&mut self, state: State) -> Result { + let id = StateID::new(self.states.len()) + .map_err(|_| BuildError::too_many_states(self.states.len()))?; + self.memory_states += state.memory_usage(); + self.states.push(state); + self.check_size_limit()?; + Ok(id) + } + + /// Add a transition from one state to another. + /// + /// This routine is called "patch" since it is very common to add the + /// states you want, typically with "dummy" state ID transitions, and then + /// "patch" in the real state IDs later. This is because you don't always + /// know all of the necessary state IDs to add because they might not + /// exist yet. + /// + /// # Errors + /// + /// This may error if patching leads to an increase in heap usage beyond + /// the configured size limit. Heap usage only grows when patching adds a + /// new transition (as in the case of a "union" state). + /// + /// # Panics + /// + /// This panics if `from` corresponds to a "sparse" state. When "sparse" + /// states are added, there is no way to patch them after-the-fact. (If you + /// have a use case where this would be helpful, please file an issue. It + /// will likely require a new API.) + pub fn patch( + &mut self, + from: StateID, + to: StateID, + ) -> Result<(), BuildError> { + let old_memory_states = self.memory_states; + match self.states[from] { + State::Empty { ref mut next } => { + *next = to; + } + State::ByteRange { ref mut trans } => { + trans.next = to; + } + State::Sparse { .. } => { + panic!("cannot patch from a sparse NFA state") + } + State::Look { ref mut next, .. } => { + *next = to; + } + State::Union { ref mut alternates } => { + alternates.push(to); + self.memory_states += mem::size_of::(); + } + State::UnionReverse { ref mut alternates } => { + alternates.push(to); + self.memory_states += mem::size_of::(); + } + State::CaptureStart { ref mut next, .. } => { + *next = to; + } + State::CaptureEnd { ref mut next, .. } => { + *next = to; + } + State::Fail => {} + State::Match { .. } => {} + } + if old_memory_states != self.memory_states { + self.check_size_limit()?; + } + Ok(()) + } + + /// Set whether the NFA produced by this builder should only match UTF-8. + /// + /// This should be set when both of the following are true: + /// + /// 1. The caller guarantees that the NFA created by this build will only + /// report non-empty matches with spans that are valid UTF-8. + /// 2. The caller desires regex engines using this NFA to avoid reporting + /// empty matches with a span that splits a valid UTF-8 encoded codepoint. + /// + /// Property (1) is not checked. Instead, this requires the caller to + /// promise that it is true. Property (2) corresponds to the behavior of + /// regex engines using the NFA created by this builder. Namely, there + /// is no way in the NFA's graph itself to say that empty matches found + /// by, for example, the regex `a*` will fall on valid UTF-8 boundaries. + /// Instead, this option is used to communicate the UTF-8 semantic to regex + /// engines that will typically implement it as a post-processing step by + /// filtering out empty matches that don't fall on UTF-8 boundaries. + /// + /// If you're building an NFA from an HIR (and not using a + /// [`thompson::Compiler`](crate::nfa::thompson::Compiler)), then you can + /// use the [`syntax::Config::utf8`](crate::util::syntax::Config::utf8) + /// option to guarantee that if the HIR detects a non-empty match, then it + /// is guaranteed to be valid UTF-8. + /// + /// Note that property (2) does *not* specify the behavior of executing + /// a search on a haystack that is not valid UTF-8. Therefore, if you're + /// *not* running this NFA on strings that are guaranteed to be valid + /// UTF-8, you almost certainly do not want to enable this option. + /// Similarly, if you are running the NFA on strings that *are* guaranteed + /// to be valid UTF-8, then you almost certainly want to enable this option + /// unless you can guarantee that your NFA will never produce a zero-width + /// match. + /// + /// It is disabled by default. + pub fn set_utf8(&mut self, yes: bool) { + self.utf8 = yes; + } + + /// Returns whether UTF-8 mode is enabled for this builder. + /// + /// See [`Builder::set_utf8`] for more details about what "UTF-8 mode" is. + pub fn get_utf8(&self) -> bool { + self.utf8 + } + + /// Sets whether the NFA produced by this builder should be matched in + /// reverse or not. Generally speaking, when enabled, the NFA produced + /// should be matched by moving backwards through a haystack, from a higher + /// memory address to a lower memory address. + /// + /// See also [`NFA::is_reverse`] for more details. + /// + /// This is disabled by default, which means NFAs are by default matched + /// in the forward direction. + pub fn set_reverse(&mut self, yes: bool) { + self.reverse = yes; + } + + /// Returns whether reverse mode is enabled for this builder. + /// + /// See [`Builder::set_reverse`] for more details about what "reverse mode" + /// is. + pub fn get_reverse(&self) -> bool { + self.reverse + } + + /// Sets the look-around matcher that should be used for the resulting NFA. + /// + /// A look-around matcher can be used to configure how look-around + /// assertions are matched. For example, a matcher might carry + /// configuration that changes the line terminator used for `(?m:^)` and + /// `(?m:$)` assertions. + pub fn set_look_matcher(&mut self, m: LookMatcher) { + self.look_matcher = m; + } + + /// Returns the look-around matcher used for this builder. + /// + /// If a matcher was not explicitly set, then `LookMatcher::default()` is + /// returned. + pub fn get_look_matcher(&self) -> &LookMatcher { + &self.look_matcher + } + + /// Set the size limit on this builder. + /// + /// Setting the size limit will also check whether the NFA built so far + /// fits within the given size limit. If it doesn't, then an error is + /// returned. + /// + /// By default, there is no configured size limit. + pub fn set_size_limit( + &mut self, + limit: Option, + ) -> Result<(), BuildError> { + self.size_limit = limit; + self.check_size_limit() + } + + /// Return the currently configured size limit. + /// + /// By default, this returns `None`, which corresponds to no configured + /// size limit. + pub fn get_size_limit(&self) -> Option { + self.size_limit + } + + /// Returns the heap memory usage, in bytes, used by the NFA states added + /// so far. + /// + /// Note that this is an approximation of how big the final NFA will be. + /// In practice, the final NFA will likely be a bit smaller because of + /// its simpler state representation. (For example, using things like + /// `Box<[StateID]>` instead of `Vec`.) + pub fn memory_usage(&self) -> usize { + self.states.len() * mem::size_of::() + self.memory_states + } + + fn check_size_limit(&self) -> Result<(), BuildError> { + if let Some(limit) = self.size_limit { + if self.memory_usage() > limit { + return Err(BuildError::exceeded_size_limit(limit)); + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // This asserts that a builder state doesn't have its size changed. It is + // *really* easy to accidentally increase the size, and thus potentially + // dramatically increase the memory usage of NFA builder. + // + // This assert doesn't mean we absolutely cannot increase the size of a + // builder state. We can. It's just here to make sure we do it knowingly + // and intentionally. + // + // A builder state is unfortunately a little bigger than an NFA state, + // since we really want to support adding things to a pre-existing state. + // i.e., We use Vec instead of Box<[thing]>. So we end up using an + // extra 8 bytes per state. Sad, but at least it gets freed once the NFA + // is built. + #[test] + fn state_has_small_size() { + #[cfg(target_pointer_width = "64")] + assert_eq!(32, core::mem::size_of::()); + #[cfg(target_pointer_width = "32")] + assert_eq!(16, core::mem::size_of::()); + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/compiler.rs b/vendor/regex-automata/src/nfa/thompson/compiler.rs new file mode 100644 index 0000000..2d21729 --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/compiler.rs @@ -0,0 +1,2265 @@ +use core::{borrow::Borrow, cell::RefCell}; + +use alloc::{sync::Arc, vec, vec::Vec}; + +use regex_syntax::{ + hir::{self, Hir}, + utf8::{Utf8Range, Utf8Sequences}, + ParserBuilder, +}; + +use crate::{ + nfa::thompson::{ + builder::Builder, + error::BuildError, + literal_trie::LiteralTrie, + map::{Utf8BoundedMap, Utf8SuffixKey, Utf8SuffixMap}, + nfa::{Transition, NFA}, + range_trie::RangeTrie, + }, + util::{ + look::{Look, LookMatcher}, + primitives::{PatternID, StateID}, + }, +}; + +/// The configuration used for a Thompson NFA compiler. +#[derive(Clone, Debug, Default)] +pub struct Config { + utf8: Option, + reverse: Option, + nfa_size_limit: Option>, + shrink: Option, + which_captures: Option, + look_matcher: Option, + #[cfg(test)] + unanchored_prefix: Option, +} + +impl Config { + /// Return a new default Thompson NFA compiler configuration. + pub fn new() -> Config { + Config::default() + } + + /// Whether to enable UTF-8 mode during search or not. + /// + /// A regex engine is said to be in UTF-8 mode when it guarantees that + /// all matches returned by it have spans consisting of only valid UTF-8. + /// That is, it is impossible for a match span to be returned that + /// contains any invalid UTF-8. + /// + /// UTF-8 mode generally consists of two things: + /// + /// 1. Whether the NFA's states are constructed such that all paths to a + /// match state that consume at least one byte always correspond to valid + /// UTF-8. + /// 2. Whether all paths to a match state that do _not_ consume any bytes + /// should always correspond to valid UTF-8 boundaries. + /// + /// (1) is a guarantee made by whoever constructs the NFA. + /// If you're parsing a regex from its concrete syntax, then + /// [`syntax::Config::utf8`](crate::util::syntax::Config::utf8) can make + /// this guarantee for you. It does it by returning an error if the regex + /// pattern could every report a non-empty match span that contains invalid + /// UTF-8. So long as `syntax::Config::utf8` mode is enabled and your regex + /// successfully parses, then you're guaranteed that the corresponding NFA + /// will only ever report non-empty match spans containing valid UTF-8. + /// + /// (2) is a trickier guarantee because it cannot be enforced by the NFA + /// state graph itself. Consider, for example, the regex `a*`. It matches + /// the empty strings in `☃` at positions `0`, `1`, `2` and `3`, where + /// positions `1` and `2` occur within the UTF-8 encoding of a codepoint, + /// and thus correspond to invalid UTF-8 boundaries. Therefore, this + /// guarantee must be made at a higher level than the NFA state graph + /// itself. This crate deals with this case in each regex engine. Namely, + /// when a zero-width match that splits a codepoint is found and UTF-8 + /// mode enabled, then it is ignored and the engine moves on looking for + /// the next match. + /// + /// Thus, UTF-8 mode is both a promise that the NFA built only reports + /// non-empty matches that are valid UTF-8, and an *instruction* to regex + /// engines that empty matches that split codepoints should be banned. + /// + /// Because UTF-8 mode is fundamentally about avoiding invalid UTF-8 spans, + /// it only makes sense to enable this option when you *know* your haystack + /// is valid UTF-8. (For example, a `&str`.) Enabling UTF-8 mode and + /// searching a haystack that contains invalid UTF-8 leads to **unspecified + /// behavior**. + /// + /// Therefore, it may make sense to enable `syntax::Config::utf8` while + /// simultaneously *disabling* this option. That would ensure all non-empty + /// match spans are valid UTF-8, but that empty match spans may still split + /// a codepoint or match at other places that aren't valid UTF-8. + /// + /// In general, this mode is only relevant if your regex can match the + /// empty string. Most regexes don't. + /// + /// This is enabled by default. + /// + /// # Example + /// + /// This example shows how UTF-8 mode can impact the match spans that may + /// be reported in certain cases. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{self, pikevm::PikeVM}, + /// Match, Input, + /// }; + /// + /// let re = PikeVM::new("")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// // UTF-8 mode is enabled by default. + /// let mut input = Input::new("☃"); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 0..0)), caps.get_match()); + /// + /// // Even though an empty regex matches at 1..1, our next match is + /// // 3..3 because 1..1 and 2..2 split the snowman codepoint (which is + /// // three bytes long). + /// input.set_start(1); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 3..3)), caps.get_match()); + /// + /// // But if we disable UTF-8, then we'll get matches at 1..1 and 2..2: + /// let re = PikeVM::builder() + /// .thompson(thompson::Config::new().utf8(false)) + /// .build("")?; + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 1..1)), caps.get_match()); + /// + /// input.set_start(2); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 2..2)), caps.get_match()); + /// + /// input.set_start(3); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 3..3)), caps.get_match()); + /// + /// input.set_start(4); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(None, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn utf8(mut self, yes: bool) -> Config { + self.utf8 = Some(yes); + self + } + + /// Reverse the NFA. + /// + /// A NFA reversal is performed by reversing all of the concatenated + /// sub-expressions in the original pattern, recursively. (Look around + /// operators are also inverted.) The resulting NFA can be used to match + /// the pattern starting from the end of a string instead of the beginning + /// of a string. + /// + /// Reversing the NFA is useful for building a reverse DFA, which is most + /// useful for finding the start of a match after its ending position has + /// been found. NFA execution engines typically do not work on reverse + /// NFAs. For example, currently, the Pike VM reports the starting location + /// of matches without a reverse NFA. + /// + /// Currently, enabling this setting requires disabling the + /// [`captures`](Config::captures) setting. If both are enabled, then the + /// compiler will return an error. It is expected that this limitation will + /// be lifted in the future. + /// + /// This is disabled by default. + /// + /// # Example + /// + /// This example shows how to build a DFA from a reverse NFA, and then use + /// the DFA to search backwards. + /// + /// ``` + /// use regex_automata::{ + /// dfa::{self, Automaton}, + /// nfa::thompson::{NFA, WhichCaptures}, + /// HalfMatch, Input, + /// }; + /// + /// let dfa = dfa::dense::Builder::new() + /// .thompson(NFA::config() + /// .which_captures(WhichCaptures::None) + /// .reverse(true) + /// ) + /// .build("baz[0-9]+")?; + /// let expected = Some(HalfMatch::must(0, 3)); + /// assert_eq!( + /// expected, + /// dfa.try_search_rev(&Input::new("foobaz12345bar"))?, + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reverse(mut self, yes: bool) -> Config { + self.reverse = Some(yes); + self + } + + /// Sets an approximate size limit on the total heap used by the NFA being + /// compiled. + /// + /// This permits imposing constraints on the size of a compiled NFA. This + /// may be useful in contexts where the regex pattern is untrusted and one + /// wants to avoid using too much memory. + /// + /// This size limit does not apply to auxiliary heap used during + /// compilation that is not part of the built NFA. + /// + /// Note that this size limit is applied during compilation in order for + /// the limit to prevent too much heap from being used. However, the + /// implementation may use an intermediate NFA representation that is + /// otherwise slightly bigger than the final public form. Since the size + /// limit may be applied to an intermediate representation, there is not + /// necessarily a precise correspondence between the configured size limit + /// and the heap usage of the final NFA. + /// + /// There is no size limit by default. + /// + /// # Example + /// + /// This example demonstrates how Unicode mode can greatly increase the + /// size of the NFA. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::NFA; + /// + /// // 300KB isn't enough! + /// NFA::compiler() + /// .configure(NFA::config().nfa_size_limit(Some(300_000))) + /// .build(r"\w{20}") + /// .unwrap_err(); + /// + /// // ... but 400KB probably is. + /// let nfa = NFA::compiler() + /// .configure(NFA::config().nfa_size_limit(Some(400_000))) + /// .build(r"\w{20}")?; + /// + /// assert_eq!(nfa.pattern_len(), 1); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn nfa_size_limit(mut self, bytes: Option) -> Config { + self.nfa_size_limit = Some(bytes); + self + } + + /// Apply best effort heuristics to shrink the NFA at the expense of more + /// time/memory. + /// + /// Generally speaking, if one is using an NFA to compile a DFA, then the + /// extra time used to shrink the NFA will be more than made up for during + /// DFA construction (potentially by a lot). In other words, enabling this + /// can substantially decrease the overall amount of time it takes to build + /// a DFA. + /// + /// A reason to keep this disabled is if you want to compile an NFA and + /// start using it as quickly as possible without needing to build a DFA, + /// and you don't mind using a bit of extra memory for the NFA. e.g., for + /// an NFA simulation or for a lazy DFA. + /// + /// NFA shrinking is currently most useful when compiling a reverse + /// NFA with large Unicode character classes. In particular, it trades + /// additional CPU time during NFA compilation in favor of generating fewer + /// NFA states. + /// + /// This is disabled by default because it can increase compile times + /// quite a bit if you aren't building a full DFA. + /// + /// # Example + /// + /// This example shows that NFA shrinking can lead to substantial space + /// savings in some cases. Notice that, as noted above, we build a reverse + /// DFA and use a pattern with a large Unicode character class. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::{NFA, WhichCaptures}; + /// + /// // Currently we have to disable captures when enabling reverse NFA. + /// let config = NFA::config() + /// .which_captures(WhichCaptures::None) + /// .reverse(true); + /// let not_shrunk = NFA::compiler() + /// .configure(config.clone().shrink(false)) + /// .build(r"\w")?; + /// let shrunk = NFA::compiler() + /// .configure(config.clone().shrink(true)) + /// .build(r"\w")?; + /// + /// // While a specific shrink factor is not guaranteed, the savings can be + /// // considerable in some cases. + /// assert!(shrunk.states().len() * 2 < not_shrunk.states().len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn shrink(mut self, yes: bool) -> Config { + self.shrink = Some(yes); + self + } + + /// Whether to include 'Capture' states in the NFA. + /// + /// Currently, enabling this setting requires disabling the + /// [`reverse`](Config::reverse) setting. If both are enabled, then the + /// compiler will return an error. It is expected that this limitation will + /// be lifted in the future. + /// + /// This is enabled by default. + /// + /// # Example + /// + /// This example demonstrates that some regex engines, like the Pike VM, + /// require capturing states to be present in the NFA to report match + /// offsets. + /// + /// (Note that since this method is deprecated, the example below uses + /// [`Config::which_captures`] to disable capture states.) + /// + /// ``` + /// use regex_automata::nfa::thompson::{ + /// pikevm::PikeVM, + /// NFA, + /// WhichCaptures, + /// }; + /// + /// let re = PikeVM::builder() + /// .thompson(NFA::config().which_captures(WhichCaptures::None)) + /// .build(r"[a-z]+")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.is_match(&mut cache, "abc")); + /// assert_eq!(None, re.find(&mut cache, "abc")); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[deprecated(since = "0.3.5", note = "use which_captures instead")] + pub fn captures(self, yes: bool) -> Config { + self.which_captures(if yes { + WhichCaptures::All + } else { + WhichCaptures::None + }) + } + + /// Configures what kinds of capture groups are compiled into + /// [`State::Capture`](crate::nfa::thompson::State::Capture) states in a + /// Thompson NFA. + /// + /// Currently, using any option except for [`WhichCaptures::None`] requires + /// disabling the [`reverse`](Config::reverse) setting. If both are + /// enabled, then the compiler will return an error. It is expected that + /// this limitation will be lifted in the future. + /// + /// This is set to [`WhichCaptures::All`] by default. Callers may wish to + /// use [`WhichCaptures::Implicit`] in cases where one wants avoid the + /// overhead of capture states for explicit groups. Usually this occurs + /// when one wants to use the `PikeVM` only for determining the overall + /// match. Otherwise, the `PikeVM` could use much more memory than is + /// necessary. + /// + /// # Example + /// + /// This example demonstrates that some regex engines, like the Pike VM, + /// require capturing states to be present in the NFA to report match + /// offsets. + /// + /// ``` + /// use regex_automata::nfa::thompson::{ + /// pikevm::PikeVM, + /// NFA, + /// WhichCaptures, + /// }; + /// + /// let re = PikeVM::builder() + /// .thompson(NFA::config().which_captures(WhichCaptures::None)) + /// .build(r"[a-z]+")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.is_match(&mut cache, "abc")); + /// assert_eq!(None, re.find(&mut cache, "abc")); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// The same applies to the bounded backtracker: + /// + /// ``` + /// use regex_automata::nfa::thompson::{ + /// backtrack::BoundedBacktracker, + /// NFA, + /// WhichCaptures, + /// }; + /// + /// let re = BoundedBacktracker::builder() + /// .thompson(NFA::config().which_captures(WhichCaptures::None)) + /// .build(r"[a-z]+")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.try_is_match(&mut cache, "abc")?); + /// assert_eq!(None, re.try_find(&mut cache, "abc")?); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn which_captures(mut self, which_captures: WhichCaptures) -> Config { + self.which_captures = Some(which_captures); + self + } + + /// Sets the look-around matcher that should be used with this NFA. + /// + /// A look-around matcher determines how to match look-around assertions. + /// In particular, some assertions are configurable. For example, the + /// `(?m:^)` and `(?m:$)` assertions can have their line terminator changed + /// from the default of `\n` to any other byte. + /// + /// # Example + /// + /// This shows how to change the line terminator for multi-line assertions. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{self, pikevm::PikeVM}, + /// util::look::LookMatcher, + /// Match, Input, + /// }; + /// + /// let mut lookm = LookMatcher::new(); + /// lookm.set_line_terminator(b'\x00'); + /// + /// let re = PikeVM::builder() + /// .thompson(thompson::Config::new().look_matcher(lookm)) + /// .build(r"(?m)^[a-z]+$")?; + /// let mut cache = re.create_cache(); + /// + /// // Multi-line assertions now use NUL as a terminator. + /// assert_eq!( + /// Some(Match::must(0, 1..4)), + /// re.find(&mut cache, b"\x00abc\x00"), + /// ); + /// // ... and \n is no longer recognized as a terminator. + /// assert_eq!( + /// None, + /// re.find(&mut cache, b"\nabc\n"), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn look_matcher(mut self, m: LookMatcher) -> Config { + self.look_matcher = Some(m); + self + } + + /// Whether to compile an unanchored prefix into this NFA. + /// + /// This is enabled by default. It is made available for tests only to make + /// it easier to unit test the output of the compiler. + #[cfg(test)] + fn unanchored_prefix(mut self, yes: bool) -> Config { + self.unanchored_prefix = Some(yes); + self + } + + /// Returns whether this configuration has enabled UTF-8 mode. + pub fn get_utf8(&self) -> bool { + self.utf8.unwrap_or(true) + } + + /// Returns whether this configuration has enabled reverse NFA compilation. + pub fn get_reverse(&self) -> bool { + self.reverse.unwrap_or(false) + } + + /// Return the configured NFA size limit, if it exists, in the number of + /// bytes of heap used. + pub fn get_nfa_size_limit(&self) -> Option { + self.nfa_size_limit.unwrap_or(None) + } + + /// Return whether NFA shrinking is enabled. + pub fn get_shrink(&self) -> bool { + self.shrink.unwrap_or(false) + } + + /// Return whether NFA compilation is configured to produce capture states. + #[deprecated(since = "0.3.5", note = "use get_which_captures instead")] + pub fn get_captures(&self) -> bool { + self.get_which_captures().is_any() + } + + /// Return what kinds of capture states will be compiled into an NFA. + pub fn get_which_captures(&self) -> WhichCaptures { + self.which_captures.unwrap_or(WhichCaptures::All) + } + + /// Return the look-around matcher for this NFA. + pub fn get_look_matcher(&self) -> LookMatcher { + self.look_matcher.clone().unwrap_or(LookMatcher::default()) + } + + /// Return whether NFA compilation is configured to include an unanchored + /// prefix. + /// + /// This is always false when not in test mode. + fn get_unanchored_prefix(&self) -> bool { + #[cfg(test)] + { + self.unanchored_prefix.unwrap_or(true) + } + #[cfg(not(test))] + { + true + } + } + + /// Overwrite the default configuration such that the options in `o` are + /// always used. If an option in `o` is not set, then the corresponding + /// option in `self` is used. If it's not set in `self` either, then it + /// remains not set. + pub(crate) fn overwrite(&self, o: Config) -> Config { + Config { + utf8: o.utf8.or(self.utf8), + reverse: o.reverse.or(self.reverse), + nfa_size_limit: o.nfa_size_limit.or(self.nfa_size_limit), + shrink: o.shrink.or(self.shrink), + which_captures: o.which_captures.or(self.which_captures), + look_matcher: o.look_matcher.or_else(|| self.look_matcher.clone()), + #[cfg(test)] + unanchored_prefix: o.unanchored_prefix.or(self.unanchored_prefix), + } + } +} + +/// A configuration indicating which kinds of +/// [`State::Capture`](crate::nfa::thompson::State::Capture) states to include. +/// +/// This configuration can be used with [`Config::which_captures`] to control +/// which capture states are compiled into a Thompson NFA. +/// +/// The default configuration is [`WhichCaptures::All`]. +#[derive(Clone, Copy, Debug)] +pub enum WhichCaptures { + /// All capture states, including those corresponding to both implicit and + /// explicit capture groups, are included in the Thompson NFA. + All, + /// Only capture states corresponding to implicit capture groups are + /// included. Implicit capture groups appear in every pattern implicitly + /// and correspond to the overall match of a pattern. + /// + /// This is useful when one only cares about the overall match of a + /// pattern. By excluding capture states from explicit capture groups, + /// one might be able to reduce the memory usage of a multi-pattern regex + /// substantially if it was otherwise written to have many explicit capture + /// groups. + Implicit, + /// No capture states are compiled into the Thompson NFA. + /// + /// This is useful when capture states are either not needed (for example, + /// if one is only trying to build a DFA) or if they aren't supported (for + /// example, a reverse NFA). + None, +} + +impl Default for WhichCaptures { + fn default() -> WhichCaptures { + WhichCaptures::All + } +} + +impl WhichCaptures { + /// Returns true if this configuration indicates that no capture states + /// should be produced in an NFA. + pub fn is_none(&self) -> bool { + matches!(*self, WhichCaptures::None) + } + + /// Returns true if this configuration indicates that some capture states + /// should be added to an NFA. Note that this might only include capture + /// states for implicit capture groups. + pub fn is_any(&self) -> bool { + !self.is_none() + } +} + +/* +This compiler below uses Thompson's construction algorithm. The compiler takes +a regex-syntax::Hir as input and emits an NFA graph as output. The NFA graph +is structured in a way that permits it to be executed by a virtual machine and +also used to efficiently build a DFA. + +The compiler deals with a slightly expanded set of NFA states than what is +in a final NFA (as exhibited by builder::State and nfa::State). Notably a +compiler state includes an empty node that has exactly one unconditional +epsilon transition to the next state. In other words, it's a "goto" instruction +if one views Thompson's NFA as a set of bytecode instructions. These goto +instructions are removed in a subsequent phase before returning the NFA to the +caller. The purpose of these empty nodes is that they make the construction +algorithm substantially simpler to implement. We remove them before returning +to the caller because they can represent substantial overhead when traversing +the NFA graph (either while searching using the NFA directly or while building +a DFA). + +In the future, it would be nice to provide a Glushkov compiler as well, as it +would work well as a bit-parallel NFA for smaller regexes. But the Thompson +construction is one I'm more familiar with and seems more straight-forward to +deal with when it comes to large Unicode character classes. + +Internally, the compiler uses interior mutability to improve composition in the +face of the borrow checker. In particular, we'd really like to be able to write +things like this: + + self.c_concat(exprs.iter().map(|e| self.c(e))) + +Which elegantly uses iterators to build up a sequence of compiled regex +sub-expressions and then hands it off to the concatenating compiler routine. +Without interior mutability, the borrow checker won't let us borrow `self` +mutably both inside and outside the closure at the same time. +*/ + +/// A builder for compiling an NFA from a regex's high-level intermediate +/// representation (HIR). +/// +/// This compiler provides a way to translate a parsed regex pattern into an +/// NFA state graph. The NFA state graph can either be used directly to execute +/// a search (e.g., with a Pike VM), or it can be further used to build a DFA. +/// +/// This compiler provides APIs both for compiling regex patterns directly from +/// their concrete syntax, or via a [`regex_syntax::hir::Hir`]. +/// +/// This compiler has various options that may be configured via +/// [`thompson::Config`](Config). +/// +/// Note that a compiler is not the same as a [`thompson::Builder`](Builder). +/// A `Builder` provides a lower level API that is uncoupled from a regex +/// pattern's concrete syntax or even its HIR. Instead, it permits stitching +/// together an NFA by hand. See its docs for examples. +/// +/// # Example: compilation from concrete syntax +/// +/// This shows how to compile an NFA from a pattern string while setting a size +/// limit on how big the NFA is allowed to be (in terms of bytes of heap used). +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::{NFA, pikevm::PikeVM}, +/// Match, +/// }; +/// +/// let config = NFA::config().nfa_size_limit(Some(1_000)); +/// let nfa = NFA::compiler().configure(config).build(r"(?-u)\w")?; +/// +/// let re = PikeVM::new_from_nfa(nfa)?; +/// let mut cache = re.create_cache(); +/// let mut caps = re.create_captures(); +/// let expected = Some(Match::must(0, 3..4)); +/// re.captures(&mut cache, "!@#A#@!", &mut caps); +/// assert_eq!(expected, caps.get_match()); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: compilation from HIR +/// +/// This shows how to hand assemble a regular expression via its HIR, and then +/// compile an NFA directly from it. +/// +/// ``` +/// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match}; +/// use regex_syntax::hir::{Hir, Class, ClassBytes, ClassBytesRange}; +/// +/// let hir = Hir::class(Class::Bytes(ClassBytes::new(vec![ +/// ClassBytesRange::new(b'0', b'9'), +/// ClassBytesRange::new(b'A', b'Z'), +/// ClassBytesRange::new(b'_', b'_'), +/// ClassBytesRange::new(b'a', b'z'), +/// ]))); +/// +/// let config = NFA::config().nfa_size_limit(Some(1_000)); +/// let nfa = NFA::compiler().configure(config).build_from_hir(&hir)?; +/// +/// let re = PikeVM::new_from_nfa(nfa)?; +/// let mut cache = re.create_cache(); +/// let mut caps = re.create_captures(); +/// let expected = Some(Match::must(0, 3..4)); +/// re.captures(&mut cache, "!@#A#@!", &mut caps); +/// assert_eq!(expected, caps.get_match()); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Compiler { + /// A regex parser, used when compiling an NFA directly from a pattern + /// string. + parser: ParserBuilder, + /// The compiler configuration. + config: Config, + /// The builder for actually constructing an NFA. This provides a + /// convenient abstraction for writing a compiler. + builder: RefCell, + /// State used for compiling character classes to UTF-8 byte automata. + /// State is not retained between character class compilations. This just + /// serves to amortize allocation to the extent possible. + utf8_state: RefCell, + /// State used for arranging character classes in reverse into a trie. + trie_state: RefCell, + /// State used for caching common suffixes when compiling reverse UTF-8 + /// automata (for Unicode character classes). + utf8_suffix: RefCell, +} + +impl Compiler { + /// Create a new NFA builder with its default configuration. + pub fn new() -> Compiler { + Compiler { + parser: ParserBuilder::new(), + config: Config::default(), + builder: RefCell::new(Builder::new()), + utf8_state: RefCell::new(Utf8State::new()), + trie_state: RefCell::new(RangeTrie::new()), + utf8_suffix: RefCell::new(Utf8SuffixMap::new(1000)), + } + } + + /// Compile the given regular expression pattern into an NFA. + /// + /// If there was a problem parsing the regex, then that error is returned. + /// + /// Otherwise, if there was a problem building the NFA, then an error is + /// returned. The only error that can occur is if the compiled regex would + /// exceed the size limits configured on this builder, or if any part of + /// the NFA would exceed the integer representations used. (For example, + /// too many states might plausibly occur on a 16-bit target.) + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match}; + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build(r"(?-u)\w")?; + /// + /// let re = PikeVM::new_from_nfa(nfa)?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// let expected = Some(Match::must(0, 3..4)); + /// re.captures(&mut cache, "!@#A#@!", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Compile the given regular expression patterns into a single NFA. + /// + /// When matches are returned, the pattern ID corresponds to the index of + /// the pattern in the slice given. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match}; + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build_many(&[ + /// r"(?-u)\s", + /// r"(?-u)\w", + /// ])?; + /// + /// let re = PikeVM::new_from_nfa(nfa)?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// let expected = Some(Match::must(1, 1..2)); + /// re.captures(&mut cache, "!A! !A!", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let mut hirs = vec![]; + for p in patterns { + hirs.push( + self.parser + .build() + .parse(p.as_ref()) + .map_err(BuildError::syntax)?, + ); + debug!("parsed: {:?}", p.as_ref()); + } + self.build_many_from_hir(&hirs) + } + + /// Compile the given high level intermediate representation of a regular + /// expression into an NFA. + /// + /// If there was a problem building the NFA, then an error is returned. The + /// only error that can occur is if the compiled regex would exceed the + /// size limits configured on this builder, or if any part of the NFA would + /// exceed the integer representations used. (For example, too many states + /// might plausibly occur on a 16-bit target.) + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match}; + /// use regex_syntax::hir::{Hir, Class, ClassBytes, ClassBytesRange}; + /// + /// let hir = Hir::class(Class::Bytes(ClassBytes::new(vec![ + /// ClassBytesRange::new(b'0', b'9'), + /// ClassBytesRange::new(b'A', b'Z'), + /// ClassBytesRange::new(b'_', b'_'), + /// ClassBytesRange::new(b'a', b'z'), + /// ]))); + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build_from_hir(&hir)?; + /// + /// let re = PikeVM::new_from_nfa(nfa)?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// let expected = Some(Match::must(0, 3..4)); + /// re.captures(&mut cache, "!@#A#@!", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_from_hir(&self, expr: &Hir) -> Result { + self.build_many_from_hir(&[expr]) + } + + /// Compile the given high level intermediate representations of regular + /// expressions into a single NFA. + /// + /// When matches are returned, the pattern ID corresponds to the index of + /// the pattern in the slice given. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match}; + /// use regex_syntax::hir::{Hir, Class, ClassBytes, ClassBytesRange}; + /// + /// let hirs = &[ + /// Hir::class(Class::Bytes(ClassBytes::new(vec![ + /// ClassBytesRange::new(b'\t', b'\r'), + /// ClassBytesRange::new(b' ', b' '), + /// ]))), + /// Hir::class(Class::Bytes(ClassBytes::new(vec![ + /// ClassBytesRange::new(b'0', b'9'), + /// ClassBytesRange::new(b'A', b'Z'), + /// ClassBytesRange::new(b'_', b'_'), + /// ClassBytesRange::new(b'a', b'z'), + /// ]))), + /// ]; + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build_many_from_hir(hirs)?; + /// + /// let re = PikeVM::new_from_nfa(nfa)?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// let expected = Some(Match::must(1, 1..2)); + /// re.captures(&mut cache, "!A! !A!", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn build_many_from_hir>( + &self, + exprs: &[H], + ) -> Result { + self.compile(exprs) + } + + /// Apply the given NFA configuration options to this builder. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::NFA; + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build(r"(?-u)\w")?; + /// assert_eq!(nfa.pattern_len(), 1); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn configure(&mut self, config: Config) -> &mut Compiler { + self.config = self.config.overwrite(config); + self + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + /// + /// This syntax configuration only applies when an NFA is built directly + /// from a pattern string. If an NFA is built from an HIR, then all syntax + /// settings are ignored. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::syntax}; + /// + /// let syntax_config = syntax::Config::new().unicode(false); + /// let nfa = NFA::compiler().syntax(syntax_config).build(r"\w")?; + /// // If Unicode were enabled, the number of states would be much bigger. + /// assert!(nfa.states().len() < 15); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Compiler { + config.apply(&mut self.parser); + self + } +} + +impl Compiler { + /// Compile the sequence of HIR expressions given. Pattern IDs are + /// allocated starting from 0, in correspondence with the slice given. + /// + /// It is legal to provide an empty slice. In that case, the NFA returned + /// has no patterns and will never match anything. + fn compile>(&self, exprs: &[H]) -> Result { + if exprs.len() > PatternID::LIMIT { + return Err(BuildError::too_many_patterns(exprs.len())); + } + if self.config.get_reverse() + && self.config.get_which_captures().is_any() + { + return Err(BuildError::unsupported_captures()); + } + + self.builder.borrow_mut().clear(); + self.builder.borrow_mut().set_utf8(self.config.get_utf8()); + self.builder.borrow_mut().set_reverse(self.config.get_reverse()); + self.builder + .borrow_mut() + .set_look_matcher(self.config.get_look_matcher()); + self.builder + .borrow_mut() + .set_size_limit(self.config.get_nfa_size_limit())?; + + // We always add an unanchored prefix unless we were specifically told + // not to (for tests only), or if we know that the regex is anchored + // for all matches. When an unanchored prefix is not added, then the + // NFA's anchored and unanchored start states are equivalent. + let all_anchored = exprs.iter().all(|e| { + e.borrow() + .properties() + .look_set_prefix() + .contains(hir::Look::Start) + }); + let anchored = !self.config.get_unanchored_prefix() || all_anchored; + let unanchored_prefix = if anchored { + self.c_empty()? + } else { + self.c_at_least(&Hir::dot(hir::Dot::AnyByte), false, 0)? + }; + + let compiled = self.c_alt_iter(exprs.iter().map(|e| { + let _ = self.start_pattern()?; + let one = self.c_cap(0, None, e.borrow())?; + let match_state_id = self.add_match()?; + self.patch(one.end, match_state_id)?; + let _ = self.finish_pattern(one.start)?; + Ok(ThompsonRef { start: one.start, end: match_state_id }) + }))?; + self.patch(unanchored_prefix.end, compiled.start)?; + let nfa = self + .builder + .borrow_mut() + .build(compiled.start, unanchored_prefix.start)?; + + debug!("HIR-to-NFA compilation complete, config: {:?}", self.config); + Ok(nfa) + } + + /// Compile an arbitrary HIR expression. + fn c(&self, expr: &Hir) -> Result { + use regex_syntax::hir::{Class, HirKind::*}; + + match *expr.kind() { + Empty => self.c_empty(), + Literal(hir::Literal(ref bytes)) => self.c_literal(bytes), + Class(Class::Bytes(ref c)) => self.c_byte_class(c), + Class(Class::Unicode(ref c)) => self.c_unicode_class(c), + Look(ref look) => self.c_look(look), + Repetition(ref rep) => self.c_repetition(rep), + Capture(ref c) => self.c_cap(c.index, c.name.as_deref(), &c.sub), + Concat(ref es) => self.c_concat(es.iter().map(|e| self.c(e))), + Alternation(ref es) => self.c_alt_slice(es), + } + } + + /// Compile a concatenation of the sub-expressions yielded by the given + /// iterator. If the iterator yields no elements, then this compiles down + /// to an "empty" state that always matches. + /// + /// If the compiler is in reverse mode, then the expressions given are + /// automatically compiled in reverse. + fn c_concat(&self, mut it: I) -> Result + where + I: DoubleEndedIterator>, + { + let first = if self.is_reverse() { it.next_back() } else { it.next() }; + let ThompsonRef { start, mut end } = match first { + Some(result) => result?, + None => return self.c_empty(), + }; + loop { + let next = + if self.is_reverse() { it.next_back() } else { it.next() }; + let compiled = match next { + Some(result) => result?, + None => break, + }; + self.patch(end, compiled.start)?; + end = compiled.end; + } + Ok(ThompsonRef { start, end }) + } + + /// Compile an alternation of the given HIR values. + /// + /// This is like 'c_alt_iter', but it accepts a slice of HIR values instead + /// of an iterator of compiled NFA subgraphs. The point of accepting a + /// slice here is that it opens up some optimization opportunities. For + /// example, if all of the HIR values are literals, then this routine might + /// re-shuffle them to make NFA epsilon closures substantially faster. + fn c_alt_slice(&self, exprs: &[Hir]) -> Result { + // self.c_alt_iter(exprs.iter().map(|e| self.c(e))) + let literal_count = exprs + .iter() + .filter(|e| { + matches!(*e.kind(), hir::HirKind::Literal(hir::Literal(_))) + }) + .count(); + if literal_count <= 1 || literal_count < exprs.len() { + return self.c_alt_iter(exprs.iter().map(|e| self.c(e))); + } + + let mut trie = if self.is_reverse() { + LiteralTrie::reverse() + } else { + LiteralTrie::forward() + }; + for expr in exprs.iter() { + let literal = match *expr.kind() { + hir::HirKind::Literal(hir::Literal(ref bytes)) => bytes, + _ => unreachable!(), + }; + trie.add(literal)?; + } + trie.compile(&mut self.builder.borrow_mut()) + } + + /// Compile an alternation, where each element yielded by the given + /// iterator represents an item in the alternation. If the iterator yields + /// no elements, then this compiles down to a "fail" state. + /// + /// In an alternation, expressions appearing earlier are "preferred" at + /// match time over expressions appearing later. At least, this is true + /// when using "leftmost first" match semantics. (If "leftmost longest" are + /// ever added in the future, then this preference order of priority would + /// not apply in that mode.) + fn c_alt_iter(&self, mut it: I) -> Result + where + I: Iterator>, + { + let first = match it.next() { + None => return self.c_fail(), + Some(result) => result?, + }; + let second = match it.next() { + None => return Ok(first), + Some(result) => result?, + }; + + let union = self.add_union()?; + let end = self.add_empty()?; + self.patch(union, first.start)?; + self.patch(first.end, end)?; + self.patch(union, second.start)?; + self.patch(second.end, end)?; + for result in it { + let compiled = result?; + self.patch(union, compiled.start)?; + self.patch(compiled.end, end)?; + } + Ok(ThompsonRef { start: union, end }) + } + + /// Compile the given capture sub-expression. `expr` should be the + /// sub-expression contained inside the capture. If "capture" states are + /// enabled, then they are added as appropriate. + /// + /// This accepts the pieces of a capture instead of a `hir::Capture` so + /// that it's easy to manufacture a "fake" group when necessary, e.g., for + /// adding the entire pattern as if it were a group in order to create + /// appropriate "capture" states in the NFA. + fn c_cap( + &self, + index: u32, + name: Option<&str>, + expr: &Hir, + ) -> Result { + match self.config.get_which_captures() { + // No capture states means we always skip them. + WhichCaptures::None => return self.c(expr), + // Implicit captures states means we only add when index==0 since + // index==0 implies the group is implicit. + WhichCaptures::Implicit if index > 0 => return self.c(expr), + _ => {} + } + + let start = self.add_capture_start(index, name)?; + let inner = self.c(expr)?; + let end = self.add_capture_end(index)?; + self.patch(start, inner.start)?; + self.patch(inner.end, end)?; + Ok(ThompsonRef { start, end }) + } + + /// Compile the given repetition expression. This handles all types of + /// repetitions and greediness. + fn c_repetition( + &self, + rep: &hir::Repetition, + ) -> Result { + match (rep.min, rep.max) { + (0, Some(1)) => self.c_zero_or_one(&rep.sub, rep.greedy), + (min, None) => self.c_at_least(&rep.sub, rep.greedy, min), + (min, Some(max)) if min == max => self.c_exactly(&rep.sub, min), + (min, Some(max)) => self.c_bounded(&rep.sub, rep.greedy, min, max), + } + } + + /// Compile the given expression such that it matches at least `min` times, + /// but no more than `max` times. + /// + /// When `greedy` is true, then the preference is for the expression to + /// match as much as possible. Otherwise, it will match as little as + /// possible. + fn c_bounded( + &self, + expr: &Hir, + greedy: bool, + min: u32, + max: u32, + ) -> Result { + let prefix = self.c_exactly(expr, min)?; + if min == max { + return Ok(prefix); + } + + // It is tempting here to compile the rest here as a concatenation + // of zero-or-one matches. i.e., for `a{2,5}`, compile it as if it + // were `aaa?a?a?`. The problem here is that it leads to this program: + // + // >000000: 61 => 01 + // 000001: 61 => 02 + // 000002: union(03, 04) + // 000003: 61 => 04 + // 000004: union(05, 06) + // 000005: 61 => 06 + // 000006: union(07, 08) + // 000007: 61 => 08 + // 000008: MATCH + // + // And effectively, once you hit state 2, the epsilon closure will + // include states 3, 5, 6, 7 and 8, which is quite a bit. It is better + // to instead compile it like so: + // + // >000000: 61 => 01 + // 000001: 61 => 02 + // 000002: union(03, 08) + // 000003: 61 => 04 + // 000004: union(05, 08) + // 000005: 61 => 06 + // 000006: union(07, 08) + // 000007: 61 => 08 + // 000008: MATCH + // + // So that the epsilon closure of state 2 is now just 3 and 8. + let empty = self.add_empty()?; + let mut prev_end = prefix.end; + for _ in min..max { + let union = if greedy { + self.add_union() + } else { + self.add_union_reverse() + }?; + let compiled = self.c(expr)?; + self.patch(prev_end, union)?; + self.patch(union, compiled.start)?; + self.patch(union, empty)?; + prev_end = compiled.end; + } + self.patch(prev_end, empty)?; + Ok(ThompsonRef { start: prefix.start, end: empty }) + } + + /// Compile the given expression such that it may be matched `n` or more + /// times, where `n` can be any integer. (Although a particularly large + /// integer is likely to run afoul of any configured size limits.) + /// + /// When `greedy` is true, then the preference is for the expression to + /// match as much as possible. Otherwise, it will match as little as + /// possible. + fn c_at_least( + &self, + expr: &Hir, + greedy: bool, + n: u32, + ) -> Result { + if n == 0 { + // When the expression cannot match the empty string, then we + // can get away with something much simpler: just one 'alt' + // instruction that optionally repeats itself. But if the expr + // can match the empty string... see below. + if expr.properties().minimum_len().map_or(false, |len| len > 0) { + let union = if greedy { + self.add_union() + } else { + self.add_union_reverse() + }?; + let compiled = self.c(expr)?; + self.patch(union, compiled.start)?; + self.patch(compiled.end, union)?; + return Ok(ThompsonRef { start: union, end: union }); + } + + // What's going on here? Shouldn't x* be simpler than this? It + // turns out that when implementing leftmost-first (Perl-like) + // match semantics, x* results in an incorrect preference order + // when computing the transitive closure of states if and only if + // 'x' can match the empty string. So instead, we compile x* as + // (x+)?, which preserves the correct preference order. + // + // See: https://github.com/rust-lang/regex/issues/779 + let compiled = self.c(expr)?; + let plus = if greedy { + self.add_union() + } else { + self.add_union_reverse() + }?; + self.patch(compiled.end, plus)?; + self.patch(plus, compiled.start)?; + + let question = if greedy { + self.add_union() + } else { + self.add_union_reverse() + }?; + let empty = self.add_empty()?; + self.patch(question, compiled.start)?; + self.patch(question, empty)?; + self.patch(plus, empty)?; + Ok(ThompsonRef { start: question, end: empty }) + } else if n == 1 { + let compiled = self.c(expr)?; + let union = if greedy { + self.add_union() + } else { + self.add_union_reverse() + }?; + self.patch(compiled.end, union)?; + self.patch(union, compiled.start)?; + Ok(ThompsonRef { start: compiled.start, end: union }) + } else { + let prefix = self.c_exactly(expr, n - 1)?; + let last = self.c(expr)?; + let union = if greedy { + self.add_union() + } else { + self.add_union_reverse() + }?; + self.patch(prefix.end, last.start)?; + self.patch(last.end, union)?; + self.patch(union, last.start)?; + Ok(ThompsonRef { start: prefix.start, end: union }) + } + } + + /// Compile the given expression such that it may be matched zero or one + /// times. + /// + /// When `greedy` is true, then the preference is for the expression to + /// match as much as possible. Otherwise, it will match as little as + /// possible. + fn c_zero_or_one( + &self, + expr: &Hir, + greedy: bool, + ) -> Result { + let union = + if greedy { self.add_union() } else { self.add_union_reverse() }?; + let compiled = self.c(expr)?; + let empty = self.add_empty()?; + self.patch(union, compiled.start)?; + self.patch(union, empty)?; + self.patch(compiled.end, empty)?; + Ok(ThompsonRef { start: union, end: empty }) + } + + /// Compile the given HIR expression exactly `n` times. + fn c_exactly( + &self, + expr: &Hir, + n: u32, + ) -> Result { + let it = (0..n).map(|_| self.c(expr)); + self.c_concat(it) + } + + /// Compile the given byte oriented character class. + /// + /// This uses "sparse" states to represent an alternation between ranges in + /// this character class. We can use "sparse" states instead of stitching + /// together a "union" state because all ranges in a character class have + /// equal priority *and* are non-overlapping (thus, only one can match, so + /// there's never a question of priority in the first place). This saves a + /// fair bit of overhead when traversing an NFA. + /// + /// This routine compiles an empty character class into a "fail" state. + fn c_byte_class( + &self, + cls: &hir::ClassBytes, + ) -> Result { + let end = self.add_empty()?; + let mut trans = Vec::with_capacity(cls.ranges().len()); + for r in cls.iter() { + trans.push(Transition { + start: r.start(), + end: r.end(), + next: end, + }); + } + Ok(ThompsonRef { start: self.add_sparse(trans)?, end }) + } + + /// Compile the given Unicode character class. + /// + /// This routine specifically tries to use various types of compression, + /// since UTF-8 automata of large classes can get quite large. The specific + /// type of compression used depends on forward vs reverse compilation, and + /// whether NFA shrinking is enabled or not. + /// + /// Aside from repetitions causing lots of repeat group, this is like the + /// single most expensive part of regex compilation. Therefore, a large part + /// of the expense of compilation may be reduce by disabling Unicode in the + /// pattern. + /// + /// This routine compiles an empty character class into a "fail" state. + fn c_unicode_class( + &self, + cls: &hir::ClassUnicode, + ) -> Result { + // If all we have are ASCII ranges wrapped in a Unicode package, then + // there is zero reason to bring out the big guns. We can fit all ASCII + // ranges within a single sparse state. + if cls.is_ascii() { + let end = self.add_empty()?; + let mut trans = Vec::with_capacity(cls.ranges().len()); + for r in cls.iter() { + // The unwraps below are OK because we've verified that this + // class only contains ASCII codepoints. + trans.push(Transition { + // FIXME(1.59): use the 'TryFrom for u8' impl. + start: u8::try_from(u32::from(r.start())).unwrap(), + end: u8::try_from(u32::from(r.end())).unwrap(), + next: end, + }); + } + Ok(ThompsonRef { start: self.add_sparse(trans)?, end }) + } else if self.is_reverse() { + if !self.config.get_shrink() { + // When we don't want to spend the extra time shrinking, we + // compile the UTF-8 automaton in reverse using something like + // the "naive" approach, but will attempt to re-use common + // suffixes. + self.c_unicode_class_reverse_with_suffix(cls) + } else { + // When we want to shrink our NFA for reverse UTF-8 automata, + // we cannot feed UTF-8 sequences directly to the UTF-8 + // compiler, since the UTF-8 compiler requires all sequences + // to be lexicographically sorted. Instead, we organize our + // sequences into a range trie, which can then output our + // sequences in the correct order. Unfortunately, building the + // range trie is fairly expensive (but not nearly as expensive + // as building a DFA). Hence the reason why the 'shrink' option + // exists, so that this path can be toggled off. For example, + // we might want to turn this off if we know we won't be + // compiling a DFA. + let mut trie = self.trie_state.borrow_mut(); + trie.clear(); + + for rng in cls.iter() { + for mut seq in Utf8Sequences::new(rng.start(), rng.end()) { + seq.reverse(); + trie.insert(seq.as_slice()); + } + } + let mut builder = self.builder.borrow_mut(); + let mut utf8_state = self.utf8_state.borrow_mut(); + let mut utf8c = + Utf8Compiler::new(&mut *builder, &mut *utf8_state)?; + trie.iter(|seq| { + utf8c.add(&seq)?; + Ok(()) + })?; + utf8c.finish() + } + } else { + // In the forward direction, we always shrink our UTF-8 automata + // because we can stream it right into the UTF-8 compiler. There + // is almost no downside (in either memory or time) to using this + // approach. + let mut builder = self.builder.borrow_mut(); + let mut utf8_state = self.utf8_state.borrow_mut(); + let mut utf8c = + Utf8Compiler::new(&mut *builder, &mut *utf8_state)?; + for rng in cls.iter() { + for seq in Utf8Sequences::new(rng.start(), rng.end()) { + utf8c.add(seq.as_slice())?; + } + } + utf8c.finish() + } + + // For reference, the code below is the "naive" version of compiling a + // UTF-8 automaton. It is deliciously simple (and works for both the + // forward and reverse cases), but will unfortunately produce very + // large NFAs. When compiling a forward automaton, the size difference + // can sometimes be an order of magnitude. For example, the '\w' regex + // will generate about ~3000 NFA states using the naive approach below, + // but only 283 states when using the approach above. This is because + // the approach above actually compiles a *minimal* (or near minimal, + // because of the bounded hashmap for reusing equivalent states) UTF-8 + // automaton. + // + // The code below is kept as a reference point in order to make it + // easier to understand the higher level goal here. Although, it will + // almost certainly bit-rot, so keep that in mind. Also, if you try to + // use it, some of the tests in this module will fail because they look + // for terser byte code produce by the more optimized handling above. + // But the integration test suite should still pass. + // + // One good example of the substantial difference this can make is to + // compare and contrast performance of the Pike VM when the code below + // is active vs the code above. Here's an example to try: + // + // regex-cli find match pikevm -b -p '(?m)^\w{20}' non-ascii-file + // + // With Unicode classes generated below, this search takes about 45s on + // my machine. But with the compressed version above, the search takes + // only around 1.4s. The NFA is also 20% smaller. This is in part due + // to the compression, but also because of the utilization of 'sparse' + // NFA states. They lead to much less state shuffling during the NFA + // search. + /* + let it = cls + .iter() + .flat_map(|rng| Utf8Sequences::new(rng.start(), rng.end())) + .map(|seq| { + let it = seq + .as_slice() + .iter() + .map(|rng| self.c_range(rng.start, rng.end)); + self.c_concat(it) + }); + self.c_alt_iter(it) + */ + } + + /// Compile the given Unicode character class in reverse with suffix + /// caching. + /// + /// This is a "quick" way to compile large Unicode classes into reverse + /// UTF-8 automata while doing a small amount of compression on that + /// automata by reusing common suffixes. + /// + /// A more comprehensive compression scheme can be accomplished by using + /// a range trie to efficiently sort a reverse sequence of UTF-8 byte + /// rqanges, and then use Daciuk's algorithm via `Utf8Compiler`. + /// + /// This is the technique used when "NFA shrinking" is disabled. + /// + /// (This also tries to use "sparse" states where possible, just like + /// `c_byte_class` does.) + fn c_unicode_class_reverse_with_suffix( + &self, + cls: &hir::ClassUnicode, + ) -> Result { + // N.B. It would likely be better to cache common *prefixes* in the + // reverse direction, but it's not quite clear how to do that. The + // advantage of caching suffixes is that it does give us a win, and + // has a very small additional overhead. + let mut cache = self.utf8_suffix.borrow_mut(); + cache.clear(); + + let union = self.add_union()?; + let alt_end = self.add_empty()?; + for urng in cls.iter() { + for seq in Utf8Sequences::new(urng.start(), urng.end()) { + let mut end = alt_end; + for brng in seq.as_slice() { + let key = Utf8SuffixKey { + from: end, + start: brng.start, + end: brng.end, + }; + let hash = cache.hash(&key); + if let Some(id) = cache.get(&key, hash) { + end = id; + continue; + } + + let compiled = self.c_range(brng.start, brng.end)?; + self.patch(compiled.end, end)?; + end = compiled.start; + cache.set(key, hash, end); + } + self.patch(union, end)?; + } + } + Ok(ThompsonRef { start: union, end: alt_end }) + } + + /// Compile the given HIR look-around assertion to an NFA look-around + /// assertion. + fn c_look(&self, anchor: &hir::Look) -> Result { + let look = match *anchor { + hir::Look::Start => Look::Start, + hir::Look::End => Look::End, + hir::Look::StartLF => Look::StartLF, + hir::Look::EndLF => Look::EndLF, + hir::Look::StartCRLF => Look::StartCRLF, + hir::Look::EndCRLF => Look::EndCRLF, + hir::Look::WordAscii => Look::WordAscii, + hir::Look::WordAsciiNegate => Look::WordAsciiNegate, + hir::Look::WordUnicode => Look::WordUnicode, + hir::Look::WordUnicodeNegate => Look::WordUnicodeNegate, + hir::Look::WordStartAscii => Look::WordStartAscii, + hir::Look::WordEndAscii => Look::WordEndAscii, + hir::Look::WordStartUnicode => Look::WordStartUnicode, + hir::Look::WordEndUnicode => Look::WordEndUnicode, + hir::Look::WordStartHalfAscii => Look::WordStartHalfAscii, + hir::Look::WordEndHalfAscii => Look::WordEndHalfAscii, + hir::Look::WordStartHalfUnicode => Look::WordStartHalfUnicode, + hir::Look::WordEndHalfUnicode => Look::WordEndHalfUnicode, + }; + let id = self.add_look(look)?; + Ok(ThompsonRef { start: id, end: id }) + } + + /// Compile the given byte string to a concatenation of bytes. + fn c_literal(&self, bytes: &[u8]) -> Result { + self.c_concat(bytes.iter().copied().map(|b| self.c_range(b, b))) + } + + /// Compile a "range" state with one transition that may only be followed + /// if the input byte is in the (inclusive) range given. + /// + /// Both the `start` and `end` locations point to the state created. + /// Callers will likely want to keep the `start`, but patch the `end` to + /// point to some other state. + fn c_range(&self, start: u8, end: u8) -> Result { + let id = self.add_range(start, end)?; + Ok(ThompsonRef { start: id, end: id }) + } + + /// Compile an "empty" state with one unconditional epsilon transition. + /// + /// Both the `start` and `end` locations point to the state created. + /// Callers will likely want to keep the `start`, but patch the `end` to + /// point to some other state. + fn c_empty(&self) -> Result { + let id = self.add_empty()?; + Ok(ThompsonRef { start: id, end: id }) + } + + /// Compile a "fail" state that can never have any outgoing transitions. + fn c_fail(&self) -> Result { + let id = self.add_fail()?; + Ok(ThompsonRef { start: id, end: id }) + } + + // The below helpers are meant to be simple wrappers around the + // corresponding Builder methods. For the most part, they let us write + // 'self.add_foo()' instead of 'self.builder.borrow_mut().add_foo()', where + // the latter is a mouthful. Some of the methods do inject a little bit + // of extra logic. e.g., Flipping look-around operators when compiling in + // reverse mode. + + fn patch(&self, from: StateID, to: StateID) -> Result<(), BuildError> { + self.builder.borrow_mut().patch(from, to) + } + + fn start_pattern(&self) -> Result { + self.builder.borrow_mut().start_pattern() + } + + fn finish_pattern( + &self, + start_id: StateID, + ) -> Result { + self.builder.borrow_mut().finish_pattern(start_id) + } + + fn add_empty(&self) -> Result { + self.builder.borrow_mut().add_empty() + } + + fn add_range(&self, start: u8, end: u8) -> Result { + self.builder.borrow_mut().add_range(Transition { + start, + end, + next: StateID::ZERO, + }) + } + + fn add_sparse( + &self, + ranges: Vec, + ) -> Result { + self.builder.borrow_mut().add_sparse(ranges) + } + + fn add_look(&self, mut look: Look) -> Result { + if self.is_reverse() { + look = look.reversed(); + } + self.builder.borrow_mut().add_look(StateID::ZERO, look) + } + + fn add_union(&self) -> Result { + self.builder.borrow_mut().add_union(vec![]) + } + + fn add_union_reverse(&self) -> Result { + self.builder.borrow_mut().add_union_reverse(vec![]) + } + + fn add_capture_start( + &self, + capture_index: u32, + name: Option<&str>, + ) -> Result { + let name = name.map(|n| Arc::from(n)); + self.builder.borrow_mut().add_capture_start( + StateID::ZERO, + capture_index, + name, + ) + } + + fn add_capture_end( + &self, + capture_index: u32, + ) -> Result { + self.builder.borrow_mut().add_capture_end(StateID::ZERO, capture_index) + } + + fn add_fail(&self) -> Result { + self.builder.borrow_mut().add_fail() + } + + fn add_match(&self) -> Result { + self.builder.borrow_mut().add_match() + } + + fn is_reverse(&self) -> bool { + self.config.get_reverse() + } +} + +/// A value that represents the result of compiling a sub-expression of a +/// regex's HIR. Specifically, this represents a sub-graph of the NFA that +/// has an initial state at `start` and a final state at `end`. +#[derive(Clone, Copy, Debug)] +pub(crate) struct ThompsonRef { + pub(crate) start: StateID, + pub(crate) end: StateID, +} + +/// A UTF-8 compiler based on Daciuk's algorithm for compilining minimal DFAs +/// from a lexicographically sorted sequence of strings in linear time. +/// +/// The trick here is that any Unicode codepoint range can be converted to +/// a sequence of byte ranges that form a UTF-8 automaton. Connecting them +/// together via an alternation is trivial, and indeed, it works. However, +/// there is a lot of redundant structure in many UTF-8 automatons. Since our +/// UTF-8 ranges are in lexicographic order, we can use Daciuk's algorithm +/// to build nearly minimal DFAs in linear time. (They are guaranteed to be +/// minimal because we use a bounded cache of previously build DFA states.) +/// +/// The drawback is that this sadly doesn't work for reverse automata, since +/// the ranges are no longer in lexicographic order. For that, we invented the +/// range trie (which gets its own module). Once a range trie is built, we then +/// use this same Utf8Compiler to build a reverse UTF-8 automaton. +/// +/// The high level idea is described here: +/// https://blog.burntsushi.net/transducers/#finite-state-machines-as-data-structures +/// +/// There is also another implementation of this in the `fst` crate. +#[derive(Debug)] +struct Utf8Compiler<'a> { + builder: &'a mut Builder, + state: &'a mut Utf8State, + target: StateID, +} + +#[derive(Clone, Debug)] +struct Utf8State { + compiled: Utf8BoundedMap, + uncompiled: Vec, +} + +#[derive(Clone, Debug)] +struct Utf8Node { + trans: Vec, + last: Option, +} + +#[derive(Clone, Debug)] +struct Utf8LastTransition { + start: u8, + end: u8, +} + +impl Utf8State { + fn new() -> Utf8State { + Utf8State { compiled: Utf8BoundedMap::new(10_000), uncompiled: vec![] } + } + + fn clear(&mut self) { + self.compiled.clear(); + self.uncompiled.clear(); + } +} + +impl<'a> Utf8Compiler<'a> { + fn new( + builder: &'a mut Builder, + state: &'a mut Utf8State, + ) -> Result, BuildError> { + let target = builder.add_empty()?; + state.clear(); + let mut utf8c = Utf8Compiler { builder, state, target }; + utf8c.add_empty(); + Ok(utf8c) + } + + fn finish(&mut self) -> Result { + self.compile_from(0)?; + let node = self.pop_root(); + let start = self.compile(node)?; + Ok(ThompsonRef { start, end: self.target }) + } + + fn add(&mut self, ranges: &[Utf8Range]) -> Result<(), BuildError> { + let prefix_len = ranges + .iter() + .zip(&self.state.uncompiled) + .take_while(|&(range, node)| { + node.last.as_ref().map_or(false, |t| { + (t.start, t.end) == (range.start, range.end) + }) + }) + .count(); + assert!(prefix_len < ranges.len()); + self.compile_from(prefix_len)?; + self.add_suffix(&ranges[prefix_len..]); + Ok(()) + } + + fn compile_from(&mut self, from: usize) -> Result<(), BuildError> { + let mut next = self.target; + while from + 1 < self.state.uncompiled.len() { + let node = self.pop_freeze(next); + next = self.compile(node)?; + } + self.top_last_freeze(next); + Ok(()) + } + + fn compile( + &mut self, + node: Vec, + ) -> Result { + let hash = self.state.compiled.hash(&node); + if let Some(id) = self.state.compiled.get(&node, hash) { + return Ok(id); + } + let id = self.builder.add_sparse(node.clone())?; + self.state.compiled.set(node, hash, id); + Ok(id) + } + + fn add_suffix(&mut self, ranges: &[Utf8Range]) { + assert!(!ranges.is_empty()); + let last = self + .state + .uncompiled + .len() + .checked_sub(1) + .expect("non-empty nodes"); + assert!(self.state.uncompiled[last].last.is_none()); + self.state.uncompiled[last].last = Some(Utf8LastTransition { + start: ranges[0].start, + end: ranges[0].end, + }); + for r in &ranges[1..] { + self.state.uncompiled.push(Utf8Node { + trans: vec![], + last: Some(Utf8LastTransition { start: r.start, end: r.end }), + }); + } + } + + fn add_empty(&mut self) { + self.state.uncompiled.push(Utf8Node { trans: vec![], last: None }); + } + + fn pop_freeze(&mut self, next: StateID) -> Vec { + let mut uncompiled = self.state.uncompiled.pop().unwrap(); + uncompiled.set_last_transition(next); + uncompiled.trans + } + + fn pop_root(&mut self) -> Vec { + assert_eq!(self.state.uncompiled.len(), 1); + assert!(self.state.uncompiled[0].last.is_none()); + self.state.uncompiled.pop().expect("non-empty nodes").trans + } + + fn top_last_freeze(&mut self, next: StateID) { + let last = self + .state + .uncompiled + .len() + .checked_sub(1) + .expect("non-empty nodes"); + self.state.uncompiled[last].set_last_transition(next); + } +} + +impl Utf8Node { + fn set_last_transition(&mut self, next: StateID) { + if let Some(last) = self.last.take() { + self.trans.push(Transition { + start: last.start, + end: last.end, + next, + }); + } + } +} + +#[cfg(test)] +mod tests { + use alloc::{vec, vec::Vec}; + + use crate::{ + nfa::thompson::{SparseTransitions, State, Transition, NFA}, + util::primitives::{PatternID, SmallIndex, StateID}, + }; + + use super::*; + + fn build(pattern: &str) -> NFA { + NFA::compiler() + .configure( + NFA::config() + .which_captures(WhichCaptures::None) + .unanchored_prefix(false), + ) + .build(pattern) + .unwrap() + } + + fn pid(id: usize) -> PatternID { + PatternID::new(id).unwrap() + } + + fn sid(id: usize) -> StateID { + StateID::new(id).unwrap() + } + + fn s_byte(byte: u8, next: usize) -> State { + let next = sid(next); + let trans = Transition { start: byte, end: byte, next }; + State::ByteRange { trans } + } + + fn s_range(start: u8, end: u8, next: usize) -> State { + let next = sid(next); + let trans = Transition { start, end, next }; + State::ByteRange { trans } + } + + fn s_sparse(transitions: &[(u8, u8, usize)]) -> State { + let transitions = transitions + .iter() + .map(|&(start, end, next)| Transition { + start, + end, + next: sid(next), + }) + .collect(); + State::Sparse(SparseTransitions { transitions }) + } + + fn s_bin_union(alt1: usize, alt2: usize) -> State { + State::BinaryUnion { alt1: sid(alt1), alt2: sid(alt2) } + } + + fn s_union(alts: &[usize]) -> State { + State::Union { + alternates: alts + .iter() + .map(|&id| sid(id)) + .collect::>() + .into_boxed_slice(), + } + } + + fn s_cap(next: usize, pattern: usize, index: usize, slot: usize) -> State { + State::Capture { + next: sid(next), + pattern_id: pid(pattern), + group_index: SmallIndex::new(index).unwrap(), + slot: SmallIndex::new(slot).unwrap(), + } + } + + fn s_fail() -> State { + State::Fail + } + + fn s_match(id: usize) -> State { + State::Match { pattern_id: pid(id) } + } + + // Test that building an unanchored NFA has an appropriate `(?s:.)*?` + // prefix. + #[test] + fn compile_unanchored_prefix() { + let nfa = NFA::compiler() + .configure(NFA::config().which_captures(WhichCaptures::None)) + .build(r"a") + .unwrap(); + assert_eq!( + nfa.states(), + &[ + s_bin_union(2, 1), + s_range(0, 255, 0), + s_byte(b'a', 3), + s_match(0), + ] + ); + } + + #[test] + fn compile_empty() { + assert_eq!(build("").states(), &[s_match(0),]); + } + + #[test] + fn compile_literal() { + assert_eq!(build("a").states(), &[s_byte(b'a', 1), s_match(0),]); + assert_eq!( + build("ab").states(), + &[s_byte(b'a', 1), s_byte(b'b', 2), s_match(0),] + ); + assert_eq!( + build("☃").states(), + &[s_byte(0xE2, 1), s_byte(0x98, 2), s_byte(0x83, 3), s_match(0)] + ); + + // Check that non-UTF-8 literals work. + let nfa = NFA::compiler() + .configure( + NFA::config() + .which_captures(WhichCaptures::None) + .unanchored_prefix(false), + ) + .syntax(crate::util::syntax::Config::new().utf8(false)) + .build(r"(?-u)\xFF") + .unwrap(); + assert_eq!(nfa.states(), &[s_byte(b'\xFF', 1), s_match(0),]); + } + + #[test] + fn compile_class_ascii() { + assert_eq!( + build(r"[a-z]").states(), + &[s_range(b'a', b'z', 1), s_match(0),] + ); + assert_eq!( + build(r"[x-za-c]").states(), + &[s_sparse(&[(b'a', b'c', 1), (b'x', b'z', 1)]), s_match(0)] + ); + } + + #[test] + #[cfg(not(miri))] + fn compile_class_unicode() { + assert_eq!( + build(r"[\u03B1-\u03B4]").states(), + &[s_range(0xB1, 0xB4, 2), s_byte(0xCE, 0), s_match(0)] + ); + assert_eq!( + build(r"[\u03B1-\u03B4\u{1F919}-\u{1F91E}]").states(), + &[ + s_range(0xB1, 0xB4, 5), + s_range(0x99, 0x9E, 5), + s_byte(0xA4, 1), + s_byte(0x9F, 2), + s_sparse(&[(0xCE, 0xCE, 0), (0xF0, 0xF0, 3)]), + s_match(0), + ] + ); + assert_eq!( + build(r"[a-z☃]").states(), + &[ + s_byte(0x83, 3), + s_byte(0x98, 0), + s_sparse(&[(b'a', b'z', 3), (0xE2, 0xE2, 1)]), + s_match(0), + ] + ); + } + + #[test] + fn compile_repetition() { + assert_eq!( + build(r"a?").states(), + &[s_bin_union(1, 2), s_byte(b'a', 2), s_match(0),] + ); + assert_eq!( + build(r"a??").states(), + &[s_bin_union(2, 1), s_byte(b'a', 2), s_match(0),] + ); + } + + #[test] + fn compile_group() { + assert_eq!( + build(r"ab+").states(), + &[s_byte(b'a', 1), s_byte(b'b', 2), s_bin_union(1, 3), s_match(0)] + ); + assert_eq!( + build(r"(ab)").states(), + &[s_byte(b'a', 1), s_byte(b'b', 2), s_match(0)] + ); + assert_eq!( + build(r"(ab)+").states(), + &[s_byte(b'a', 1), s_byte(b'b', 2), s_bin_union(0, 3), s_match(0)] + ); + } + + #[test] + fn compile_alternation() { + assert_eq!( + build(r"a|b").states(), + &[s_range(b'a', b'b', 1), s_match(0)] + ); + assert_eq!( + build(r"ab|cd").states(), + &[ + s_byte(b'b', 3), + s_byte(b'd', 3), + s_sparse(&[(b'a', b'a', 0), (b'c', b'c', 1)]), + s_match(0) + ], + ); + assert_eq!( + build(r"|b").states(), + &[s_byte(b'b', 2), s_bin_union(2, 0), s_match(0)] + ); + assert_eq!( + build(r"a|").states(), + &[s_byte(b'a', 2), s_bin_union(0, 2), s_match(0)] + ); + } + + // This tests the use of a non-binary union, i.e., a state with more than + // 2 unconditional epsilon transitions. The only place they tend to appear + // is in reverse NFAs when shrinking is disabled. Otherwise, 'binary-union' + // and 'sparse' tend to cover all other cases of alternation. + #[test] + fn compile_non_binary_union() { + let nfa = NFA::compiler() + .configure( + NFA::config() + .which_captures(WhichCaptures::None) + .reverse(true) + .shrink(false) + .unanchored_prefix(false), + ) + .build(r"[\u1000\u2000\u3000]") + .unwrap(); + assert_eq!( + nfa.states(), + &[ + s_union(&[3, 6, 9]), + s_byte(0xE1, 10), + s_byte(0x80, 1), + s_byte(0x80, 2), + s_byte(0xE2, 10), + s_byte(0x80, 4), + s_byte(0x80, 5), + s_byte(0xE3, 10), + s_byte(0x80, 7), + s_byte(0x80, 8), + s_match(0), + ] + ); + } + + #[test] + fn compile_many_start_pattern() { + let nfa = NFA::compiler() + .configure( + NFA::config() + .which_captures(WhichCaptures::None) + .unanchored_prefix(false), + ) + .build_many(&["a", "b"]) + .unwrap(); + assert_eq!( + nfa.states(), + &[ + s_byte(b'a', 1), + s_match(0), + s_byte(b'b', 3), + s_match(1), + s_bin_union(0, 2), + ] + ); + assert_eq!(nfa.start_anchored().as_usize(), 4); + assert_eq!(nfa.start_unanchored().as_usize(), 4); + // Test that the start states for each individual pattern are correct. + assert_eq!(nfa.start_pattern(pid(0)).unwrap(), sid(0)); + assert_eq!(nfa.start_pattern(pid(1)).unwrap(), sid(2)); + } + + // This tests that our compiler can handle an empty character class. At the + // time of writing, the regex parser forbids it, so the only way to test it + // is to provide a hand written HIR. + #[test] + fn empty_class_bytes() { + use regex_syntax::hir::{Class, ClassBytes, Hir}; + + let hir = Hir::class(Class::Bytes(ClassBytes::new(vec![]))); + let config = NFA::config() + .which_captures(WhichCaptures::None) + .unanchored_prefix(false); + let nfa = + NFA::compiler().configure(config).build_from_hir(&hir).unwrap(); + assert_eq!(nfa.states(), &[s_fail(), s_match(0)]); + } + + // Like empty_class_bytes, but for a Unicode class. + #[test] + fn empty_class_unicode() { + use regex_syntax::hir::{Class, ClassUnicode, Hir}; + + let hir = Hir::class(Class::Unicode(ClassUnicode::new(vec![]))); + let config = NFA::config() + .which_captures(WhichCaptures::None) + .unanchored_prefix(false); + let nfa = + NFA::compiler().configure(config).build_from_hir(&hir).unwrap(); + assert_eq!(nfa.states(), &[s_fail(), s_match(0)]); + } + + #[test] + fn compile_captures_all() { + let nfa = NFA::compiler() + .configure( + NFA::config() + .unanchored_prefix(false) + .which_captures(WhichCaptures::All), + ) + .build("a(b)c") + .unwrap(); + assert_eq!( + nfa.states(), + &[ + s_cap(1, 0, 0, 0), + s_byte(b'a', 2), + s_cap(3, 0, 1, 2), + s_byte(b'b', 4), + s_cap(5, 0, 1, 3), + s_byte(b'c', 6), + s_cap(7, 0, 0, 1), + s_match(0) + ] + ); + let ginfo = nfa.group_info(); + assert_eq!(2, ginfo.all_group_len()); + } + + #[test] + fn compile_captures_implicit() { + let nfa = NFA::compiler() + .configure( + NFA::config() + .unanchored_prefix(false) + .which_captures(WhichCaptures::Implicit), + ) + .build("a(b)c") + .unwrap(); + assert_eq!( + nfa.states(), + &[ + s_cap(1, 0, 0, 0), + s_byte(b'a', 2), + s_byte(b'b', 3), + s_byte(b'c', 4), + s_cap(5, 0, 0, 1), + s_match(0) + ] + ); + let ginfo = nfa.group_info(); + assert_eq!(1, ginfo.all_group_len()); + } + + #[test] + fn compile_captures_none() { + let nfa = NFA::compiler() + .configure( + NFA::config() + .unanchored_prefix(false) + .which_captures(WhichCaptures::None), + ) + .build("a(b)c") + .unwrap(); + assert_eq!( + nfa.states(), + &[s_byte(b'a', 1), s_byte(b'b', 2), s_byte(b'c', 3), s_match(0)] + ); + let ginfo = nfa.group_info(); + assert_eq!(0, ginfo.all_group_len()); + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/error.rs b/vendor/regex-automata/src/nfa/thompson/error.rs new file mode 100644 index 0000000..3c2fa8a --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/error.rs @@ -0,0 +1,185 @@ +use crate::util::{ + captures, look, + primitives::{PatternID, StateID}, +}; + +/// An error that can occurred during the construction of a thompson NFA. +/// +/// This error does not provide many introspection capabilities. There are +/// generally only two things you can do with it: +/// +/// * Obtain a human readable message via its `std::fmt::Display` impl. +/// * Access an underlying [`regex_syntax::Error`] type from its `source` +/// method via the `std::error::Error` trait. This error only occurs when using +/// convenience routines for building an NFA directly from a pattern string. +/// +/// Otherwise, errors typically occur when a limit has been breeched. For +/// example, if the total heap usage of the compiled NFA exceeds the limit +/// set by [`Config::nfa_size_limit`](crate::nfa::thompson::Config), then +/// building the NFA will fail. +#[derive(Clone, Debug)] +pub struct BuildError { + kind: BuildErrorKind, +} + +/// The kind of error that occurred during the construction of a thompson NFA. +#[derive(Clone, Debug)] +enum BuildErrorKind { + /// An error that occurred while parsing a regular expression. Note that + /// this error may be printed over multiple lines, and is generally + /// intended to be end user readable on its own. + #[cfg(feature = "syntax")] + Syntax(regex_syntax::Error), + /// An error that occurs if the capturing groups provided to an NFA builder + /// do not satisfy the documented invariants. For example, things like + /// too many groups, missing groups, having the first (zeroth) group be + /// named or duplicate group names within the same pattern. + Captures(captures::GroupInfoError), + /// An error that occurs when an NFA contains a Unicode word boundary, but + /// where the crate was compiled without the necessary data for dealing + /// with Unicode word boundaries. + Word(look::UnicodeWordBoundaryError), + /// An error that occurs if too many patterns were given to the NFA + /// compiler. + TooManyPatterns { + /// The number of patterns given, which exceeds the limit. + given: usize, + /// The limit on the number of patterns. + limit: usize, + }, + /// An error that occurs if too states are produced while building an NFA. + TooManyStates { + /// The minimum number of states that are desired, which exceeds the + /// limit. + given: usize, + /// The limit on the number of states. + limit: usize, + }, + /// An error that occurs when NFA compilation exceeds a configured heap + /// limit. + ExceededSizeLimit { + /// The configured limit, in bytes. + limit: usize, + }, + /// An error that occurs when an invalid capture group index is added to + /// the NFA. An "invalid" index can be one that would otherwise overflow + /// a `usize` on the current target. + InvalidCaptureIndex { + /// The invalid index that was given. + index: u32, + }, + /// An error that occurs when one tries to build a reverse NFA with + /// captures enabled. Currently, this isn't supported, but we probably + /// should support it at some point. + #[cfg(feature = "syntax")] + UnsupportedCaptures, +} + +impl BuildError { + /// If this error occurred because the NFA exceeded the configured size + /// limit before being built, then this returns the configured size limit. + /// + /// The limit returned is what was configured, and corresponds to the + /// maximum amount of heap usage in bytes. + pub fn size_limit(&self) -> Option { + match self.kind { + BuildErrorKind::ExceededSizeLimit { limit } => Some(limit), + _ => None, + } + } + + fn kind(&self) -> &BuildErrorKind { + &self.kind + } + + #[cfg(feature = "syntax")] + pub(crate) fn syntax(err: regex_syntax::Error) -> BuildError { + BuildError { kind: BuildErrorKind::Syntax(err) } + } + + pub(crate) fn captures(err: captures::GroupInfoError) -> BuildError { + BuildError { kind: BuildErrorKind::Captures(err) } + } + + pub(crate) fn word(err: look::UnicodeWordBoundaryError) -> BuildError { + BuildError { kind: BuildErrorKind::Word(err) } + } + + pub(crate) fn too_many_patterns(given: usize) -> BuildError { + let limit = PatternID::LIMIT; + BuildError { kind: BuildErrorKind::TooManyPatterns { given, limit } } + } + + pub(crate) fn too_many_states(given: usize) -> BuildError { + let limit = StateID::LIMIT; + BuildError { kind: BuildErrorKind::TooManyStates { given, limit } } + } + + pub(crate) fn exceeded_size_limit(limit: usize) -> BuildError { + BuildError { kind: BuildErrorKind::ExceededSizeLimit { limit } } + } + + pub(crate) fn invalid_capture_index(index: u32) -> BuildError { + BuildError { kind: BuildErrorKind::InvalidCaptureIndex { index } } + } + + #[cfg(feature = "syntax")] + pub(crate) fn unsupported_captures() -> BuildError { + BuildError { kind: BuildErrorKind::UnsupportedCaptures } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for BuildError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self.kind() { + #[cfg(feature = "syntax")] + BuildErrorKind::Syntax(ref err) => Some(err), + BuildErrorKind::Captures(ref err) => Some(err), + _ => None, + } + } +} + +impl core::fmt::Display for BuildError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self.kind() { + #[cfg(feature = "syntax")] + BuildErrorKind::Syntax(_) => write!(f, "error parsing regex"), + BuildErrorKind::Captures(_) => { + write!(f, "error with capture groups") + } + BuildErrorKind::Word(_) => { + write!(f, "NFA contains Unicode word boundary") + } + BuildErrorKind::TooManyPatterns { given, limit } => write!( + f, + "attempted to compile {} patterns, \ + which exceeds the limit of {}", + given, limit, + ), + BuildErrorKind::TooManyStates { given, limit } => write!( + f, + "attempted to compile {} NFA states, \ + which exceeds the limit of {}", + given, limit, + ), + BuildErrorKind::ExceededSizeLimit { limit } => write!( + f, + "heap usage during NFA compilation exceeded limit of {}", + limit, + ), + BuildErrorKind::InvalidCaptureIndex { index } => write!( + f, + "capture group index {} is invalid (too big or discontinuous)", + index, + ), + #[cfg(feature = "syntax")] + BuildErrorKind::UnsupportedCaptures => write!( + f, + "currently captures must be disabled when compiling \ + a reverse NFA", + ), + } + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/literal_trie.rs b/vendor/regex-automata/src/nfa/thompson/literal_trie.rs new file mode 100644 index 0000000..7ed129a --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/literal_trie.rs @@ -0,0 +1,528 @@ +use core::mem; + +use alloc::{vec, vec::Vec}; + +use crate::{ + nfa::thompson::{self, compiler::ThompsonRef, BuildError, Builder}, + util::primitives::{IteratorIndexExt, StateID}, +}; + +/// A trie that preserves leftmost-first match semantics. +/// +/// This is a purpose-built data structure for optimizing 'lit1|lit2|..|litN' +/// patterns. It can *only* handle alternations of literals, which makes it +/// somewhat restricted in its scope, but literal alternations are fairly +/// common. +/// +/// At a 5,000 foot level, the main idea of this trie is make an alternation of +/// literals look more like a DFA than an NFA via epsilon removal. +/// +/// More precisely, the main issue is in how alternations are compiled into +/// a Thompson NFA. Namely, each alternation gets a single NFA "union" state +/// with an epsilon transition for every branch of the alternation pointing to +/// an NFA state corresponding to the start of that branch. The main problem +/// with this representation is the cost of computing an epsilon closure. Once +/// you hit the alternation's start state, it acts as a sort of "clog" that +/// requires you to traverse all of the epsilon transitions to compute the full +/// closure. +/// +/// While fixing such clogs in the general case is pretty tricky without going +/// to a DFA (or perhaps a Glushkov NFA, but that comes with other problems). +/// But at least in the case of an alternation of literals, we can convert +/// that to a prefix trie without too much cost. In theory, that's all you +/// really need to do: build the trie and then compile it to a Thompson NFA. +/// For example, if you have the pattern 'bar|baz|foo', then using a trie, it +/// is transformed to something like 'b(a(r|z))|f'. This reduces the clog by +/// reducing the number of epsilon transitions out of the alternation's start +/// state from 3 to 2 (it actually gets down to 1 when you use a sparse state, +/// which we do below). It's a small effect here, but when your alternation is +/// huge, the savings is also huge. +/// +/// And that is... essentially what a LiteralTrie does. But there is one +/// hiccup. Consider a regex like 'sam|samwise'. How does a prefix trie compile +/// that when leftmost-first semantics are used? If 'sam|samwise' was the +/// entire regex, then you could just drop the 'samwise' branch entirely since +/// it is impossible to match ('sam' will always take priority, and since it +/// is a prefix of 'samwise', 'samwise' will never match). But what about the +/// regex '\b(sam|samwise)\b'? In that case, you can't remove 'samwise' because +/// it might match when 'sam' doesn't fall on a word boundary. +/// +/// The main idea is that 'sam|samwise' can be translated to 'sam(?:|wise)', +/// which is a precisely equivalent regex that also gets rid of the clog. +/// +/// Another example is 'zapper|z|zap'. That gets translated to +/// 'z(?:apper||ap)'. +/// +/// We accomplish this by giving each state in the trie multiple "chunks" of +/// transitions. Each chunk barrier represents a match. The idea is that once +/// you know a match occurs, none of the transitions after the match can be +/// re-ordered and mixed in with the transitions before the match. Otherwise, +/// the match semantics could be changed. +/// +/// See the 'State' data type for a bit more detail. +/// +/// Future work: +/// +/// * In theory, it would be nice to generalize the idea of removing clogs and +/// apply it to the NFA graph itself. Then this could in theory work for +/// case insensitive alternations of literals, or even just alternations where +/// each branch starts with a non-epsilon transition. +/// * Could we instead use the Aho-Corasick algorithm here? The aho-corasick +/// crate deals with leftmost-first matches correctly, but I think this implies +/// encoding failure transitions into a Thompson NFA somehow. Which seems fine, +/// because failure transitions are just unconditional epsilon transitions? +/// * Or perhaps even better, could we use an aho_corasick::AhoCorasick +/// directly? At time of writing, 0.7 is the current version of the +/// aho-corasick crate, and that definitely cannot be used as-is. But if we +/// expose the underlying finite state machine API, then could we use it? That +/// would be super. If we could figure that out, it might also lend itself to +/// more general composition of finite state machines. +#[derive(Clone)] +pub(crate) struct LiteralTrie { + /// The set of trie states. Each state contains one or more chunks, where + /// each chunk is a sparse set of transitions to other states. A leaf state + /// is always a match state that contains only empty chunks (i.e., no + /// transitions). + states: Vec, + /// Whether to add literals in reverse to the trie. Useful when building + /// a reverse NFA automaton. + rev: bool, +} + +impl LiteralTrie { + /// Create a new literal trie that adds literals in the forward direction. + pub(crate) fn forward() -> LiteralTrie { + let root = State::default(); + LiteralTrie { states: vec![root], rev: false } + } + + /// Create a new literal trie that adds literals in reverse. + pub(crate) fn reverse() -> LiteralTrie { + let root = State::default(); + LiteralTrie { states: vec![root], rev: true } + } + + /// Add the given literal to this trie. + /// + /// If the literal could not be added because the `StateID` space was + /// exhausted, then an error is returned. If an error returns, the trie + /// is in an unspecified state. + pub(crate) fn add(&mut self, bytes: &[u8]) -> Result<(), BuildError> { + let mut prev = StateID::ZERO; + let mut it = bytes.iter().copied(); + while let Some(b) = if self.rev { it.next_back() } else { it.next() } { + prev = self.get_or_add_state(prev, b)?; + } + self.states[prev].add_match(); + Ok(()) + } + + /// If the given transition is defined, then return the next state ID. + /// Otherwise, add the transition to `from` and point it to a new state. + /// + /// If a new state ID could not be allocated, then an error is returned. + fn get_or_add_state( + &mut self, + from: StateID, + byte: u8, + ) -> Result { + let active = self.states[from].active_chunk(); + match active.binary_search_by_key(&byte, |t| t.byte) { + Ok(i) => Ok(active[i].next), + Err(i) => { + // Add a new state and get its ID. + let next = StateID::new(self.states.len()).map_err(|_| { + BuildError::too_many_states(self.states.len()) + })?; + self.states.push(State::default()); + // Offset our position to account for all transitions and not + // just the ones in the active chunk. + let i = self.states[from].active_chunk_start() + i; + let t = Transition { byte, next }; + self.states[from].transitions.insert(i, t); + Ok(next) + } + } + } + + /// Compile this literal trie to the NFA builder given. + /// + /// This forwards any errors that may occur while using the given builder. + pub(crate) fn compile( + &self, + builder: &mut Builder, + ) -> Result { + // Compilation proceeds via depth-first traversal of the trie. + // + // This is overall pretty brutal. The recursive version of this is + // deliciously simple. (See 'compile_to_hir' below for what it might + // look like.) But recursion on a trie means your call stack grows + // in accordance with the longest literal, which just does not seem + // appropriate. So we push the call stack to the heap. But as a result, + // the trie traversal becomes pretty brutal because we essentially + // have to encode the state of a double for-loop into an explicit call + // frame. If someone can simplify this without using recursion, that'd + // be great. + + // 'end' is our match state for this trie, but represented in the the + // NFA. Any time we see a match in the trie, we insert a transition + // from the current state we're in to 'end'. + let end = builder.add_empty()?; + let mut stack = vec![]; + let mut f = Frame::new(&self.states[StateID::ZERO]); + loop { + if let Some(t) = f.transitions.next() { + if self.states[t.next].is_leaf() { + f.sparse.push(thompson::Transition { + start: t.byte, + end: t.byte, + next: end, + }); + } else { + f.sparse.push(thompson::Transition { + start: t.byte, + end: t.byte, + // This is a little funny, but when the frame we create + // below completes, it will pop this parent frame off + // and modify this transition to point to the correct + // state. + next: StateID::ZERO, + }); + stack.push(f); + f = Frame::new(&self.states[t.next]); + } + continue; + } + // At this point, we have visited all transitions in f.chunk, so + // add it as a sparse NFA state. Unless the chunk was empty, in + // which case, we don't do anything. + if !f.sparse.is_empty() { + let chunk_id = if f.sparse.len() == 1 { + builder.add_range(f.sparse.pop().unwrap())? + } else { + let sparse = mem::replace(&mut f.sparse, vec![]); + builder.add_sparse(sparse)? + }; + f.union.push(chunk_id); + } + // Now we need to look to see if there are other chunks to visit. + if let Some(chunk) = f.chunks.next() { + // If we're here, it means we're on the second (or greater) + // chunk, which implies there is a match at this point. So + // connect this state to the final end state. + f.union.push(end); + // Advance to the next chunk. + f.transitions = chunk.iter(); + continue; + } + // Now that we are out of chunks, we have completely visited + // this state. So turn our union of chunks into an NFA union + // state, and add that union state to the parent state's current + // sparse state. (If there is no parent, we're done.) + let start = builder.add_union(f.union)?; + match stack.pop() { + None => { + return Ok(ThompsonRef { start, end }); + } + Some(mut parent) => { + // OK because the only way a frame gets pushed on to the + // stack (aside from the root) is when a transition has + // been added to 'sparse'. + parent.sparse.last_mut().unwrap().next = start; + f = parent; + } + } + } + } + + /// Converts this trie to an equivalent HIR expression. + /// + /// We don't actually use this, but it's useful for tests. In particular, + /// it provides a (somewhat) human readable representation of the trie + /// itself. + #[cfg(test)] + fn compile_to_hir(&self) -> regex_syntax::hir::Hir { + self.compile_state_to_hir(StateID::ZERO) + } + + /// The recursive implementation of 'to_hir'. + /// + /// Notice how simple this is compared to 'compile' above. 'compile' could + /// be similarly simple, but we opt to not use recursion in order to avoid + /// overflowing the stack in the case of a longer literal. + #[cfg(test)] + fn compile_state_to_hir(&self, sid: StateID) -> regex_syntax::hir::Hir { + use regex_syntax::hir::Hir; + + let mut alt = vec![]; + for (i, chunk) in self.states[sid].chunks().enumerate() { + if i > 0 { + alt.push(Hir::empty()); + } + if chunk.is_empty() { + continue; + } + let mut chunk_alt = vec![]; + for t in chunk.iter() { + chunk_alt.push(Hir::concat(vec![ + Hir::literal(vec![t.byte]), + self.compile_state_to_hir(t.next), + ])); + } + alt.push(Hir::alternation(chunk_alt)); + } + Hir::alternation(alt) + } +} + +impl core::fmt::Debug for LiteralTrie { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + writeln!(f, "LiteralTrie(")?; + for (sid, state) in self.states.iter().with_state_ids() { + writeln!(f, "{:06?}: {:?}", sid.as_usize(), state)?; + } + writeln!(f, ")")?; + Ok(()) + } +} + +/// An explicit stack frame used for traversing the trie without using +/// recursion. +/// +/// Each frame is tied to the traversal of a single trie state. The frame is +/// dropped once the entire state (and all of its children) have been visited. +/// The "output" of compiling a state is the 'union' vector, which is turn +/// converted to a NFA union state. Each branch of the union corresponds to a +/// chunk in the trie state. +/// +/// 'sparse' corresponds to the set of transitions for a particular chunk in a +/// trie state. It is ultimately converted to an NFA sparse state. The 'sparse' +/// field, after being converted to a sparse NFA state, is reused for any +/// subsequent chunks in the trie state, if any exist. +#[derive(Debug)] +struct Frame<'a> { + /// The remaining chunks to visit for a trie state. + chunks: StateChunksIter<'a>, + /// The transitions of the current chunk that we're iterating over. Since + /// every trie state has at least one chunk, every frame is initialized + /// with the first chunk's transitions ready to be consumed. + transitions: core::slice::Iter<'a, Transition>, + /// The NFA state IDs pointing to the start of each chunk compiled by + /// this trie state. This ultimately gets converted to an NFA union once + /// the entire trie state (and all of its children) have been compiled. + /// The order of these matters for leftmost-first match semantics, since + /// earlier matches in the union are preferred over later ones. + union: Vec, + /// The actual NFA transitions for a single chunk in a trie state. This + /// gets converted to an NFA sparse state, and its corresponding NFA state + /// ID should get added to 'union'. + sparse: Vec, +} + +impl<'a> Frame<'a> { + /// Create a new stack frame for trie traversal. This initializes the + /// 'transitions' iterator to the transitions for the first chunk, with the + /// 'chunks' iterator being every chunk after the first one. + fn new(state: &'a State) -> Frame<'a> { + let mut chunks = state.chunks(); + // every state has at least 1 chunk + let chunk = chunks.next().unwrap(); + let transitions = chunk.iter(); + Frame { chunks, transitions, union: vec![], sparse: vec![] } + } +} + +/// A state in a trie. +/// +/// This uses a sparse representation. Since we don't use literal tries +/// for searching, and ultimately (and compilation requires visiting every +/// transition anyway), we use a sparse representation for transitions. This +/// means we save on memory, at the expense of 'LiteralTrie::add' being perhaps +/// a bit slower. +/// +/// While 'transitions' is pretty standard as far as tries goes, the 'chunks' +/// piece here is more unusual. In effect, 'chunks' defines a partitioning +/// of 'transitions', where each chunk corresponds to a distinct set of +/// transitions. The key invariant is that a transition in one chunk cannot +/// be moved to another chunk. This is the secret sauce that preserve +/// leftmost-first match semantics. +/// +/// A new chunk is added whenever we mark a state as a match state. Once a +/// new chunk is added, the old active chunk is frozen and is never mutated +/// again. The new chunk becomes the active chunk, which is defined as +/// '&transitions[chunks.last().map_or(0, |c| c.1)..]'. Thus, a state where +/// 'chunks' is empty actually contains one chunk. Thus, every state contains +/// at least one (possibly empty) chunk. +/// +/// A "leaf" state is a state that has no outgoing transitions (so +/// 'transitions' is empty). Note that there is no way for a leaf state to be a +/// non-matching state. (Although while building the trie, within 'add', a leaf +/// state may exist while not containing any matches. But this invariant is +/// only broken within 'add'. Once 'add' returns, the invariant is upheld.) +#[derive(Clone, Default)] +struct State { + transitions: Vec, + chunks: Vec<(usize, usize)>, +} + +impl State { + /// Mark this state as a match state and freeze the active chunk such that + /// it can not be further mutated. + fn add_match(&mut self) { + // This is not strictly necessary, but there's no point in recording + // another match by adding another chunk if the state has no + // transitions. Note though that we only skip this if we already know + // this is a match state, which is only true if 'chunks' is not empty. + // Basically, if we didn't do this, nothing semantically would change, + // but we'd end up pushing another chunk and potentially triggering an + // alloc. + if self.transitions.is_empty() && !self.chunks.is_empty() { + return; + } + let chunk_start = self.active_chunk_start(); + let chunk_end = self.transitions.len(); + self.chunks.push((chunk_start, chunk_end)); + } + + /// Returns true if and only if this state is a leaf state. That is, a + /// state that has no outgoing transitions. + fn is_leaf(&self) -> bool { + self.transitions.is_empty() + } + + /// Returns an iterator over all of the chunks (including the currently + /// active chunk) in this state. Since the active chunk is included, the + /// iterator is guaranteed to always yield at least one chunk (although the + /// chunk may be empty). + fn chunks(&self) -> StateChunksIter<'_> { + StateChunksIter { + transitions: &*self.transitions, + chunks: self.chunks.iter(), + active: Some(self.active_chunk()), + } + } + + /// Returns the active chunk as a slice of transitions. + fn active_chunk(&self) -> &[Transition] { + let start = self.active_chunk_start(); + &self.transitions[start..] + } + + /// Returns the index into 'transitions' where the active chunk starts. + fn active_chunk_start(&self) -> usize { + self.chunks.last().map_or(0, |&(_, end)| end) + } +} + +impl core::fmt::Debug for State { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut spacing = " "; + for (i, chunk) in self.chunks().enumerate() { + if i > 0 { + write!(f, "{}MATCH", spacing)?; + } + spacing = ""; + for (j, t) in chunk.iter().enumerate() { + spacing = " "; + if j == 0 && i > 0 { + write!(f, " ")?; + } else if j > 0 { + write!(f, ", ")?; + } + write!(f, "{:?}", t)?; + } + } + Ok(()) + } +} + +/// An iterator over all of the chunks in a state, including the active chunk. +/// +/// This iterator is created by `State::chunks`. We name this iterator so that +/// we can include it in the `Frame` type for non-recursive trie traversal. +#[derive(Debug)] +struct StateChunksIter<'a> { + transitions: &'a [Transition], + chunks: core::slice::Iter<'a, (usize, usize)>, + active: Option<&'a [Transition]>, +} + +impl<'a> Iterator for StateChunksIter<'a> { + type Item = &'a [Transition]; + + fn next(&mut self) -> Option<&'a [Transition]> { + if let Some(&(start, end)) = self.chunks.next() { + return Some(&self.transitions[start..end]); + } + if let Some(chunk) = self.active.take() { + return Some(chunk); + } + None + } +} + +/// A single transition in a trie to another state. +#[derive(Clone, Copy)] +struct Transition { + byte: u8, + next: StateID, +} + +impl core::fmt::Debug for Transition { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "{:?} => {}", + crate::util::escape::DebugByte(self.byte), + self.next.as_usize() + ) + } +} + +#[cfg(test)] +mod tests { + use bstr::B; + use regex_syntax::hir::Hir; + + use super::*; + + #[test] + fn zap() { + let mut trie = LiteralTrie::forward(); + trie.add(b"zapper").unwrap(); + trie.add(b"z").unwrap(); + trie.add(b"zap").unwrap(); + + let got = trie.compile_to_hir(); + let expected = Hir::concat(vec![ + Hir::literal(B("z")), + Hir::alternation(vec![ + Hir::literal(B("apper")), + Hir::empty(), + Hir::literal(B("ap")), + ]), + ]); + assert_eq!(expected, got); + } + + #[test] + fn maker() { + let mut trie = LiteralTrie::forward(); + trie.add(b"make").unwrap(); + trie.add(b"maple").unwrap(); + trie.add(b"maker").unwrap(); + + let got = trie.compile_to_hir(); + let expected = Hir::concat(vec![ + Hir::literal(B("ma")), + Hir::alternation(vec![ + Hir::concat(vec![ + Hir::literal(B("ke")), + Hir::alternation(vec![Hir::empty(), Hir::literal(B("r"))]), + ]), + Hir::literal(B("ple")), + ]), + ]); + assert_eq!(expected, got); + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/map.rs b/vendor/regex-automata/src/nfa/thompson/map.rs new file mode 100644 index 0000000..7f074a3 --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/map.rs @@ -0,0 +1,296 @@ +// This module contains a couple simple and purpose built hash maps. The key +// trade off they make is that they serve as caches rather than true maps. That +// is, inserting a new entry may cause eviction of another entry. This gives +// us two things. First, there's less overhead associated with inserts and +// lookups. Secondly, it lets us control our memory usage. +// +// These maps are used in some fairly hot code when generating NFA states for +// large Unicode character classes. +// +// Instead of exposing a rich hashmap entry API, we just permit the caller to +// produce a hash of the key directly. The hash can then be reused for both +// lookups and insertions at the cost of leaking abstraction a bit. But these +// are for internal use only, so it's fine. +// +// The Utf8BoundedMap is used for Daciuk's algorithm for constructing a +// (almost) minimal DFA for large Unicode character classes in linear time. +// (Daciuk's algorithm is always used when compiling forward NFAs. For reverse +// NFAs, it's only used when the compiler is configured to 'shrink' the NFA, +// since there's a bit more expense in the reverse direction.) +// +// The Utf8SuffixMap is used when compiling large Unicode character classes for +// reverse NFAs when 'shrink' is disabled. Specifically, it augments the naive +// construction of UTF-8 automata by caching common suffixes. This doesn't +// get the same space savings as Daciuk's algorithm, but it's basically as +// fast as the naive approach and typically winds up using less memory (since +// it generates smaller NFAs) despite the presence of the cache. +// +// These maps effectively represent caching mechanisms for sparse and +// byte-range NFA states, respectively. The former represents a single NFA +// state with many transitions of equivalent priority while the latter +// represents a single NFA state with a single transition. (Neither state ever +// has or is an epsilon transition.) Thus, they have different key types. It's +// likely we could make one generic map, but the machinery didn't seem worth +// it. They are simple enough. + +use alloc::{vec, vec::Vec}; + +use crate::{ + nfa::thompson::Transition, + util::{ + int::{Usize, U64}, + primitives::StateID, + }, +}; + +// Basic FNV-1a hash constants as described in: +// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function +const PRIME: u64 = 1099511628211; +const INIT: u64 = 14695981039346656037; + +/// A bounded hash map where the key is a sequence of NFA transitions and the +/// value is a pre-existing NFA state ID. +/// +/// std's hashmap can be used for this, however, this map has two important +/// advantages. Firstly, it has lower overhead. Secondly, it permits us to +/// control our memory usage by limited the number of slots. In general, the +/// cost here is that this map acts as a cache. That is, inserting a new entry +/// may remove an old entry. We are okay with this, since it does not impact +/// correctness in the cases where it is used. The only effect that dropping +/// states from the cache has is that the resulting NFA generated may be bigger +/// than it otherwise would be. +/// +/// This improves benchmarks that compile large Unicode character classes, +/// since it makes the generation of (almost) minimal UTF-8 automaton faster. +/// Specifically, one could observe the difference with std's hashmap via +/// something like the following benchmark: +/// +/// hyperfine "regex-cli debug thompson -qr --captures none '\w{90} ecurB'" +/// +/// But to observe that difference, you'd have to modify the code to use +/// std's hashmap. +/// +/// It is quite possible that there is a better way to approach this problem. +/// For example, if there happens to be a very common state that collides with +/// a lot of less frequent states, then we could wind up with very poor caching +/// behavior. Alas, the effectiveness of this cache has not been measured. +/// Instead, ad hoc experiments suggest that it is "good enough." Additional +/// smarts (such as an LRU eviction policy) have to be weighed against the +/// amount of extra time they cost. +#[derive(Clone, Debug)] +pub struct Utf8BoundedMap { + /// The current version of this map. Only entries with matching versions + /// are considered during lookups. If an entry is found with a mismatched + /// version, then the map behaves as if the entry does not exist. + /// + /// This makes it possible to clear the map by simply incrementing the + /// version number instead of actually deallocating any storage. + version: u16, + /// The total number of entries this map can store. + capacity: usize, + /// The actual entries, keyed by hash. Collisions between different states + /// result in the old state being dropped. + map: Vec, +} + +/// An entry in this map. +#[derive(Clone, Debug, Default)] +struct Utf8BoundedEntry { + /// The version of the map used to produce this entry. If this entry's + /// version does not match the current version of the map, then the map + /// should behave as if this entry does not exist. + version: u16, + /// The key, which is a sorted sequence of non-overlapping NFA transitions. + key: Vec, + /// The state ID corresponding to the state containing the transitions in + /// this entry. + val: StateID, +} + +impl Utf8BoundedMap { + /// Create a new bounded map with the given capacity. The map will never + /// grow beyond the given size. + /// + /// Note that this does not allocate. Instead, callers must call `clear` + /// before using this map. `clear` will allocate space if necessary. + /// + /// This avoids the need to pay for the allocation of this map when + /// compiling regexes that lack large Unicode character classes. + pub fn new(capacity: usize) -> Utf8BoundedMap { + assert!(capacity > 0); + Utf8BoundedMap { version: 0, capacity, map: vec![] } + } + + /// Clear this map of all entries, but permit the reuse of allocation + /// if possible. + /// + /// This must be called before the map can be used. + pub fn clear(&mut self) { + if self.map.is_empty() { + self.map = vec![Utf8BoundedEntry::default(); self.capacity]; + } else { + self.version = self.version.wrapping_add(1); + // If we loop back to version 0, then we forcefully clear the + // entire map. Otherwise, it might be possible to incorrectly + // match entries used to generate other NFAs. + if self.version == 0 { + self.map = vec![Utf8BoundedEntry::default(); self.capacity]; + } + } + } + + /// Return a hash of the given transitions. + pub fn hash(&self, key: &[Transition]) -> usize { + let mut h = INIT; + for t in key { + h = (h ^ u64::from(t.start)).wrapping_mul(PRIME); + h = (h ^ u64::from(t.end)).wrapping_mul(PRIME); + h = (h ^ t.next.as_u64()).wrapping_mul(PRIME); + } + (h % self.map.len().as_u64()).as_usize() + } + + /// Retrieve the cached state ID corresponding to the given key. The hash + /// given must have been computed with `hash` using the same key value. + /// + /// If there is no cached state with the given transitions, then None is + /// returned. + pub fn get(&mut self, key: &[Transition], hash: usize) -> Option { + let entry = &self.map[hash]; + if entry.version != self.version { + return None; + } + // There may be a hash collision, so we need to confirm real equality. + if entry.key != key { + return None; + } + Some(entry.val) + } + + /// Add a cached state to this map with the given key. Callers should + /// ensure that `state_id` points to a state that contains precisely the + /// NFA transitions given. + /// + /// `hash` must have been computed using the `hash` method with the same + /// key. + pub fn set( + &mut self, + key: Vec, + hash: usize, + state_id: StateID, + ) { + self.map[hash] = + Utf8BoundedEntry { version: self.version, key, val: state_id }; + } +} + +/// A cache of suffixes used to modestly compress UTF-8 automata for large +/// Unicode character classes. +#[derive(Clone, Debug)] +pub struct Utf8SuffixMap { + /// The current version of this map. Only entries with matching versions + /// are considered during lookups. If an entry is found with a mismatched + /// version, then the map behaves as if the entry does not exist. + version: u16, + /// The total number of entries this map can store. + capacity: usize, + /// The actual entries, keyed by hash. Collisions between different states + /// result in the old state being dropped. + map: Vec, +} + +/// A key that uniquely identifies an NFA state. It is a triple that represents +/// a transition from one state for a particular byte range. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct Utf8SuffixKey { + pub from: StateID, + pub start: u8, + pub end: u8, +} + +/// An entry in this map. +#[derive(Clone, Debug, Default)] +struct Utf8SuffixEntry { + /// The version of the map used to produce this entry. If this entry's + /// version does not match the current version of the map, then the map + /// should behave as if this entry does not exist. + version: u16, + /// The key, which consists of a transition in a particular state. + key: Utf8SuffixKey, + /// The identifier that the transition in the key maps to. + val: StateID, +} + +impl Utf8SuffixMap { + /// Create a new bounded map with the given capacity. The map will never + /// grow beyond the given size. + /// + /// Note that this does not allocate. Instead, callers must call `clear` + /// before using this map. `clear` will allocate space if necessary. + /// + /// This avoids the need to pay for the allocation of this map when + /// compiling regexes that lack large Unicode character classes. + pub fn new(capacity: usize) -> Utf8SuffixMap { + assert!(capacity > 0); + Utf8SuffixMap { version: 0, capacity, map: vec![] } + } + + /// Clear this map of all entries, but permit the reuse of allocation + /// if possible. + /// + /// This must be called before the map can be used. + pub fn clear(&mut self) { + if self.map.is_empty() { + self.map = vec![Utf8SuffixEntry::default(); self.capacity]; + } else { + self.version = self.version.wrapping_add(1); + if self.version == 0 { + self.map = vec![Utf8SuffixEntry::default(); self.capacity]; + } + } + } + + /// Return a hash of the given transition. + pub fn hash(&self, key: &Utf8SuffixKey) -> usize { + // Basic FNV-1a hash as described: + // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function + const PRIME: u64 = 1099511628211; + const INIT: u64 = 14695981039346656037; + + let mut h = INIT; + h = (h ^ key.from.as_u64()).wrapping_mul(PRIME); + h = (h ^ u64::from(key.start)).wrapping_mul(PRIME); + h = (h ^ u64::from(key.end)).wrapping_mul(PRIME); + (h % self.map.len().as_u64()).as_usize() + } + + /// Retrieve the cached state ID corresponding to the given key. The hash + /// given must have been computed with `hash` using the same key value. + /// + /// If there is no cached state with the given key, then None is returned. + pub fn get( + &mut self, + key: &Utf8SuffixKey, + hash: usize, + ) -> Option { + let entry = &self.map[hash]; + if entry.version != self.version { + return None; + } + if key != &entry.key { + return None; + } + Some(entry.val) + } + + /// Add a cached state to this map with the given key. Callers should + /// ensure that `state_id` points to a state that contains precisely the + /// NFA transition given. + /// + /// `hash` must have been computed using the `hash` method with the same + /// key. + pub fn set(&mut self, key: Utf8SuffixKey, hash: usize, state_id: StateID) { + self.map[hash] = + Utf8SuffixEntry { version: self.version, key, val: state_id }; + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/mod.rs b/vendor/regex-automata/src/nfa/thompson/mod.rs new file mode 100644 index 0000000..cf42673 --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/mod.rs @@ -0,0 +1,81 @@ +/*! +Defines a Thompson NFA and provides the [`PikeVM`](pikevm::PikeVM) and +[`BoundedBacktracker`](backtrack::BoundedBacktracker) regex engines. + +A Thompson NFA (non-deterministic finite automaton) is arguably _the_ central +data type in this library. It is the result of what is commonly referred to as +"regex compilation." That is, turning a regex pattern from its concrete syntax +string into something that can run a search looks roughly like this: + +* A `&str` is parsed into a [`regex-syntax::ast::Ast`](regex_syntax::ast::Ast). +* An `Ast` is translated into a [`regex-syntax::hir::Hir`](regex_syntax::hir::Hir). +* An `Hir` is compiled into a [`NFA`]. +* The `NFA` is then used to build one of a few different regex engines: + * An `NFA` is used directly in the `PikeVM` and `BoundedBacktracker` engines. + * An `NFA` is used by a [hybrid NFA/DFA](crate::hybrid) to build out a DFA's + transition table at search time. + * An `NFA`, assuming it is one-pass, is used to build a full + [one-pass DFA](crate::dfa::onepass) ahead of time. + * An `NFA` is used to build a [full DFA](crate::dfa) ahead of time. + +The [`meta`](crate::meta) regex engine makes all of these choices for you based +on various criteria. However, if you have a lower level use case, _you_ can +build any of the above regex engines and use them directly. But you must start +here by building an `NFA`. + +# Details + +It is perhaps worth expanding a bit more on what it means to go through the +`&str`->`Ast`->`Hir`->`NFA` process. + +* Parsing a string into an `Ast` gives it a structured representation. +Crucially, the size and amount of work done in this step is proportional to the +size of the original string. No optimization or Unicode handling is done at +this point. This means that parsing into an `Ast` has very predictable costs. +Moreover, an `Ast` can be roundtripped back to its original pattern string as +written. +* Translating an `Ast` into an `Hir` is a process by which the structured +representation is simplified down to its most fundamental components. +Translation deals with flags such as case insensitivity by converting things +like `(?i:a)` to `[Aa]`. Translation is also where Unicode tables are consulted +to resolve things like `\p{Emoji}` and `\p{Greek}`. It also flattens each +character class, regardless of how deeply nested it is, into a single sequence +of non-overlapping ranges. All the various literal forms are thrown out in +favor of one common representation. Overall, the `Hir` is small enough to fit +into your head and makes analysis and other tasks much simpler. +* Compiling an `Hir` into an `NFA` formulates the regex into a finite state +machine whose transitions are defined over bytes. For example, an `Hir` might +have a Unicode character class corresponding to a sequence of ranges defined +in terms of `char`. Compilation is then responsible for turning those ranges +into a UTF-8 automaton. That is, an automaton that matches the UTF-8 encoding +of just the codepoints specified by those ranges. Otherwise, the main job of +an `NFA` is to serve as a byte-code of sorts for a virtual machine. It can be +seen as a sequence of instructions for how to match a regex. +*/ + +#[cfg(feature = "nfa-backtrack")] +pub mod backtrack; +mod builder; +#[cfg(feature = "syntax")] +mod compiler; +mod error; +#[cfg(feature = "syntax")] +mod literal_trie; +#[cfg(feature = "syntax")] +mod map; +mod nfa; +#[cfg(feature = "nfa-pikevm")] +pub mod pikevm; +#[cfg(feature = "syntax")] +mod range_trie; + +pub use self::{ + builder::Builder, + error::BuildError, + nfa::{ + DenseTransitions, PatternIter, SparseTransitions, State, Transition, + NFA, + }, +}; +#[cfg(feature = "syntax")] +pub use compiler::{Compiler, Config, WhichCaptures}; diff --git a/vendor/regex-automata/src/nfa/thompson/nfa.rs b/vendor/regex-automata/src/nfa/thompson/nfa.rs new file mode 100644 index 0000000..1f57f8e --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/nfa.rs @@ -0,0 +1,2099 @@ +use core::{fmt, mem}; + +use alloc::{boxed::Box, format, string::String, sync::Arc, vec, vec::Vec}; + +#[cfg(feature = "syntax")] +use crate::nfa::thompson::{ + compiler::{Compiler, Config}, + error::BuildError, +}; +use crate::{ + nfa::thompson::builder::Builder, + util::{ + alphabet::{self, ByteClassSet, ByteClasses}, + captures::{GroupInfo, GroupInfoError}, + look::{Look, LookMatcher, LookSet}, + primitives::{ + IteratorIndexExt, PatternID, PatternIDIter, SmallIndex, StateID, + }, + sparse_set::SparseSet, + }, +}; + +/// A byte oriented Thompson non-deterministic finite automaton (NFA). +/// +/// A Thompson NFA is a finite state machine that permits unconditional epsilon +/// transitions, but guarantees that there exists at most one non-epsilon +/// transition for each element in the alphabet for each state. +/// +/// An NFA may be used directly for searching, for analysis or to build +/// a deterministic finite automaton (DFA). +/// +/// # Cheap clones +/// +/// Since an NFA is a core data type in this crate that many other regex +/// engines are based on top of, it is convenient to give ownership of an NFA +/// to said regex engines. Because of this, an NFA uses reference counting +/// internally. Therefore, it is cheap to clone and it is encouraged to do so. +/// +/// # Capabilities +/// +/// Using an NFA for searching via the +/// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) provides the most amount +/// of "power" of any regex engine in this crate. Namely, it supports the +/// following in all cases: +/// +/// 1. Detection of a match. +/// 2. Location of a match, including both the start and end offset, in a +/// single pass of the haystack. +/// 3. Location of matching capturing groups. +/// 4. Handles multiple patterns, including (1)-(3) when multiple patterns are +/// present. +/// +/// # Capturing Groups +/// +/// Groups refer to parenthesized expressions inside a regex pattern. They look +/// like this, where `exp` is an arbitrary regex: +/// +/// * `(exp)` - An unnamed capturing group. +/// * `(?Pexp)` or `(?exp)` - A named capturing group. +/// * `(?:exp)` - A non-capturing group. +/// * `(?i:exp)` - A non-capturing group that sets flags. +/// +/// Only the first two forms are said to be _capturing_. Capturing +/// means that the last position at which they match is reportable. The +/// [`Captures`](crate::util::captures::Captures) type provides convenient +/// access to the match positions of capturing groups, which includes looking +/// up capturing groups by their name. +/// +/// # Byte oriented +/// +/// This NFA is byte oriented, which means that all of its transitions are +/// defined on bytes. In other words, the alphabet of an NFA consists of the +/// 256 different byte values. +/// +/// While DFAs nearly demand that they be byte oriented for performance +/// reasons, an NFA could conceivably be *Unicode codepoint* oriented. Indeed, +/// a previous version of this NFA supported both byte and codepoint oriented +/// modes. A codepoint oriented mode can work because an NFA fundamentally uses +/// a sparse representation of transitions, which works well with the large +/// sparse space of Unicode codepoints. +/// +/// Nevertheless, this NFA is only byte oriented. This choice is primarily +/// driven by implementation simplicity, and also in part memory usage. In +/// practice, performance between the two is roughly comparable. However, +/// building a DFA (including a hybrid DFA) really wants a byte oriented NFA. +/// So if we do have a codepoint oriented NFA, then we also need to generate +/// byte oriented NFA in order to build an hybrid NFA/DFA. Thus, by only +/// generating byte oriented NFAs, we can produce one less NFA. In other words, +/// if we made our NFA codepoint oriented, we'd need to *also* make it support +/// a byte oriented mode, which is more complicated. But a byte oriented mode +/// can support everything. +/// +/// # Differences with DFAs +/// +/// At the theoretical level, the precise difference between an NFA and a DFA +/// is that, in a DFA, for every state, an input symbol unambiguously refers +/// to a single transition _and_ that an input symbol is required for each +/// transition. At a practical level, this permits DFA implementations to be +/// implemented at their core with a small constant number of CPU instructions +/// for each byte of input searched. In practice, this makes them quite a bit +/// faster than NFAs _in general_. Namely, in order to execute a search for any +/// Thompson NFA, one needs to keep track of a _set_ of states, and execute +/// the possible transitions on all of those states for each input symbol. +/// Overall, this results in much more overhead. To a first approximation, one +/// can expect DFA searches to be about an order of magnitude faster. +/// +/// So why use an NFA at all? The main advantage of an NFA is that it takes +/// linear time (in the size of the pattern string after repetitions have been +/// expanded) to build and linear memory usage. A DFA, on the other hand, may +/// take exponential time and/or space to build. Even in non-pathological +/// cases, DFAs often take quite a bit more memory than their NFA counterparts, +/// _especially_ if large Unicode character classes are involved. Of course, +/// an NFA also provides additional capabilities. For example, it can match +/// Unicode word boundaries on non-ASCII text and resolve the positions of +/// capturing groups. +/// +/// Note that a [`hybrid::regex::Regex`](crate::hybrid::regex::Regex) strikes a +/// good balance between an NFA and a DFA. It avoids the exponential build time +/// of a DFA while maintaining its fast search time. The downside of a hybrid +/// NFA/DFA is that in some cases it can be slower at search time than the NFA. +/// (It also has less functionality than a pure NFA. It cannot handle Unicode +/// word boundaries on non-ASCII text and cannot resolve capturing groups.) +/// +/// # Example +/// +/// This shows how to build an NFA with the default configuration and execute a +/// search using the Pike VM. +/// +/// ``` +/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; +/// +/// let re = PikeVM::new(r"foo[0-9]+")?; +/// let mut cache = re.create_cache(); +/// let mut caps = re.create_captures(); +/// +/// let expected = Some(Match::must(0, 0..8)); +/// re.captures(&mut cache, b"foo12345", &mut caps); +/// assert_eq!(expected, caps.get_match()); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: resolving capturing groups +/// +/// This example shows how to parse some simple dates and extract the +/// components of each date via capturing groups. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{ +/// nfa::thompson::pikevm::PikeVM, +/// util::captures::Captures, +/// }; +/// +/// let vm = PikeVM::new(r"(?P\d{4})-(?P\d{2})-(?P\d{2})")?; +/// let mut cache = vm.create_cache(); +/// +/// let haystack = "2012-03-14, 2013-01-01 and 2014-07-05"; +/// let all: Vec = vm.captures_iter( +/// &mut cache, haystack.as_bytes() +/// ).collect(); +/// // There should be a total of 3 matches. +/// assert_eq!(3, all.len()); +/// // The year from the second match is '2013'. +/// let span = all[1].get_group_by_name("y").unwrap(); +/// assert_eq!("2013", &haystack[span]); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// This example shows that only the last match of a capturing group is +/// reported, even if it had to match multiple times for an overall match +/// to occur. +/// +/// ``` +/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span}; +/// +/// let re = PikeVM::new(r"([a-z]){4}")?; +/// let mut cache = re.create_cache(); +/// let mut caps = re.create_captures(); +/// +/// let haystack = b"quux"; +/// re.captures(&mut cache, haystack, &mut caps); +/// assert!(caps.is_match()); +/// assert_eq!(Some(Span::from(3..4)), caps.get_group(1)); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone)] +pub struct NFA( + // We make NFAs reference counted primarily for two reasons. First is that + // the NFA type itself is quite large (at least 0.5KB), and so it makes + // sense to put it on the heap by default anyway. Second is that, for Arc + // specifically, this enables cheap clones. This tends to be useful because + // several structures (the backtracker, the Pike VM, the hybrid NFA/DFA) + // all want to hang on to an NFA for use during search time. We could + // provide the NFA at search time via a function argument, but this makes + // for an unnecessarily annoying API. Instead, we just let each structure + // share ownership of the NFA. Using a deep clone would not be smart, since + // the NFA can use quite a bit of heap space. + Arc, +); + +impl NFA { + /// Parse the given regular expression using a default configuration and + /// build an NFA from it. + /// + /// If you want a non-default configuration, then use the NFA + /// [`Compiler`] with a [`Config`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::new(r"foo[0-9]+")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let expected = Some(Match::must(0, 0..8)); + /// re.captures(&mut cache, b"foo12345", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new(pattern: &str) -> Result { + NFA::compiler().build(pattern) + } + + /// Parse the given regular expressions using a default configuration and + /// build a multi-NFA from them. + /// + /// If you want a non-default configuration, then use the NFA + /// [`Compiler`] with a [`Config`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::new_many(&["[0-9]+", "[a-z]+"])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let expected = Some(Match::must(1, 0..3)); + /// re.captures(&mut cache, b"foo12345bar", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new_many>(patterns: &[P]) -> Result { + NFA::compiler().build_many(patterns) + } + + /// Returns an NFA with a single regex pattern that always matches at every + /// position. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match}; + /// + /// let re = PikeVM::new_from_nfa(NFA::always_match())?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let expected = Some(Match::must(0, 0..0)); + /// re.captures(&mut cache, b"", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// re.captures(&mut cache, b"foo", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn always_match() -> NFA { + // We could use NFA::new("") here and we'd get the same semantics, but + // hand-assembling the NFA (as below) does the same thing with a fewer + // number of states. It also avoids needing the 'syntax' feature + // enabled. + // + // Technically all we need is the "match" state, but we add the + // "capture" states so that the PikeVM can use this NFA. + // + // The unwraps below are OK because we add so few states that they will + // never exhaust any default limits in any environment. + let mut builder = Builder::new(); + let pid = builder.start_pattern().unwrap(); + assert_eq!(pid.as_usize(), 0); + let start_id = + builder.add_capture_start(StateID::ZERO, 0, None).unwrap(); + let end_id = builder.add_capture_end(StateID::ZERO, 0).unwrap(); + let match_id = builder.add_match().unwrap(); + builder.patch(start_id, end_id).unwrap(); + builder.patch(end_id, match_id).unwrap(); + let pid = builder.finish_pattern(start_id).unwrap(); + assert_eq!(pid.as_usize(), 0); + builder.build(start_id, start_id).unwrap() + } + + /// Returns an NFA that never matches at any position. + /// + /// This is a convenience routine for creating an NFA with zero patterns. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::{NFA, pikevm::PikeVM}; + /// + /// let re = PikeVM::new_from_nfa(NFA::never_match())?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, b"", &mut caps); + /// assert!(!caps.is_match()); + /// re.captures(&mut cache, b"foo", &mut caps); + /// assert!(!caps.is_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn never_match() -> NFA { + // This always succeeds because it only requires one NFA state, which + // will never exhaust any (default) limits. + let mut builder = Builder::new(); + let sid = builder.add_fail().unwrap(); + builder.build(sid, sid).unwrap() + } + + /// Return a default configuration for an `NFA`. + /// + /// This is a convenience routine to avoid needing to import the `Config` + /// type when customizing the construction of an NFA. + /// + /// # Example + /// + /// This example shows how to build an NFA with a small size limit that + /// results in a compilation error for any regex that tries to use more + /// heap memory than the configured limit. + /// + /// ``` + /// use regex_automata::nfa::thompson::{NFA, pikevm::PikeVM}; + /// + /// let result = PikeVM::builder() + /// .thompson(NFA::config().nfa_size_limit(Some(1_000))) + /// // Remember, \w is Unicode-aware by default and thus huge. + /// .build(r"\w+"); + /// assert!(result.is_err()); + /// ``` + #[cfg(feature = "syntax")] + pub fn config() -> Config { + Config::new() + } + + /// Return a compiler for configuring the construction of an `NFA`. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Compiler`] type in common cases. + /// + /// # Example + /// + /// This example shows how to build an NFA that is permitted match invalid + /// UTF-8. Without the additional syntax configuration here, compilation of + /// `(?-u:.)` would fail because it is permitted to match invalid UTF-8. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::syntax, + /// Match, + /// }; + /// + /// let re = PikeVM::builder() + /// .syntax(syntax::Config::new().utf8(false)) + /// .build(r"[a-z]+(?-u:.)")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let expected = Some(Match::must(0, 1..5)); + /// re.captures(&mut cache, b"\xFFabc\xFF", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn compiler() -> Compiler { + Compiler::new() + } + + /// Returns an iterator over all pattern identifiers in this NFA. + /// + /// Pattern IDs are allocated in sequential order starting from zero, + /// where the order corresponds to the order of patterns provided to the + /// [`NFA::new_many`] constructor. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let nfa = NFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?; + /// let pids: Vec = nfa.patterns().collect(); + /// assert_eq!(pids, vec![ + /// PatternID::must(0), + /// PatternID::must(1), + /// PatternID::must(2), + /// ]); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn patterns(&self) -> PatternIter<'_> { + PatternIter { + it: PatternID::iter(self.pattern_len()), + _marker: core::marker::PhantomData, + } + } + + /// Returns the total number of regex patterns in this NFA. + /// + /// This may return zero if the NFA was constructed with no patterns. In + /// this case, the NFA can never produce a match for any input. + /// + /// This is guaranteed to be no bigger than [`PatternID::LIMIT`] because + /// NFA construction will fail if too many patterns are added. + /// + /// It is always true that `nfa.patterns().count() == nfa.pattern_len()`. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::NFA; + /// + /// let nfa = NFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?; + /// assert_eq!(3, nfa.pattern_len()); + /// + /// let nfa = NFA::never_match(); + /// assert_eq!(0, nfa.pattern_len()); + /// + /// let nfa = NFA::always_match(); + /// assert_eq!(1, nfa.pattern_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn pattern_len(&self) -> usize { + self.0.start_pattern.len() + } + + /// Return the state identifier of the initial anchored state of this NFA. + /// + /// The returned identifier is guaranteed to be a valid index into the + /// slice returned by [`NFA::states`], and is also a valid argument to + /// [`NFA::state`]. + /// + /// # Example + /// + /// This example shows a somewhat contrived example where we can easily + /// predict the anchored starting state. + /// + /// ``` + /// use regex_automata::nfa::thompson::{NFA, State, WhichCaptures}; + /// + /// let nfa = NFA::compiler() + /// .configure(NFA::config().which_captures(WhichCaptures::None)) + /// .build("a")?; + /// let state = nfa.state(nfa.start_anchored()); + /// match *state { + /// State::ByteRange { trans } => { + /// assert_eq!(b'a', trans.start); + /// assert_eq!(b'a', trans.end); + /// } + /// _ => unreachable!("unexpected state"), + /// } + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn start_anchored(&self) -> StateID { + self.0.start_anchored + } + + /// Return the state identifier of the initial unanchored state of this + /// NFA. + /// + /// This is equivalent to the identifier returned by + /// [`NFA::start_anchored`] when the NFA has no unanchored starting state. + /// + /// The returned identifier is guaranteed to be a valid index into the + /// slice returned by [`NFA::states`], and is also a valid argument to + /// [`NFA::state`]. + /// + /// # Example + /// + /// This example shows that the anchored and unanchored starting states + /// are equivalent when an anchored NFA is built. + /// + /// ``` + /// use regex_automata::nfa::thompson::NFA; + /// + /// let nfa = NFA::new("^a")?; + /// assert_eq!(nfa.start_anchored(), nfa.start_unanchored()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn start_unanchored(&self) -> StateID { + self.0.start_unanchored + } + + /// Return the state identifier of the initial anchored state for the given + /// pattern, or `None` if there is no pattern corresponding to the given + /// identifier. + /// + /// If one uses the starting state for a particular pattern, then the only + /// match that can be returned is for the corresponding pattern. + /// + /// The returned identifier is guaranteed to be a valid index into the + /// slice returned by [`NFA::states`], and is also a valid argument to + /// [`NFA::state`]. + /// + /// # Errors + /// + /// If the pattern doesn't exist in this NFA, then this returns an error. + /// This occurs when `pid.as_usize() >= nfa.pattern_len()`. + /// + /// # Example + /// + /// This example shows that the anchored and unanchored starting states + /// are equivalent when an anchored NFA is built. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let nfa = NFA::new_many(&["^a", "^b"])?; + /// // The anchored and unanchored states for the entire NFA are the same, + /// // since all of the patterns are anchored. + /// assert_eq!(nfa.start_anchored(), nfa.start_unanchored()); + /// // But the anchored starting states for each pattern are distinct, + /// // because these starting states can only lead to matches for the + /// // corresponding pattern. + /// let anchored = Some(nfa.start_anchored()); + /// assert_ne!(anchored, nfa.start_pattern(PatternID::must(0))); + /// assert_ne!(anchored, nfa.start_pattern(PatternID::must(1))); + /// // Requesting a pattern not in the NFA will result in None: + /// assert_eq!(None, nfa.start_pattern(PatternID::must(2))); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn start_pattern(&self, pid: PatternID) -> Option { + self.0.start_pattern.get(pid.as_usize()).copied() + } + + /// Get the byte class set for this NFA. + /// + /// A byte class set is a partitioning of this NFA's alphabet into + /// equivalence classes. Any two bytes in the same equivalence class are + /// guaranteed to never discriminate between a match or a non-match. (The + /// partitioning may not be minimal.) + /// + /// Byte classes are used internally by this crate when building DFAs. + /// Namely, among other optimizations, they enable a space optimization + /// where the DFA's internal alphabet is defined over the equivalence + /// classes of bytes instead of all possible byte values. The former is + /// often quite a bit smaller than the latter, which permits the DFA to use + /// less space for its transition table. + #[inline] + pub(crate) fn byte_class_set(&self) -> &ByteClassSet { + &self.0.byte_class_set + } + + /// Get the byte classes for this NFA. + /// + /// Byte classes represent a partitioning of this NFA's alphabet into + /// equivalence classes. Any two bytes in the same equivalence class are + /// guaranteed to never discriminate between a match or a non-match. (The + /// partitioning may not be minimal.) + /// + /// Byte classes are used internally by this crate when building DFAs. + /// Namely, among other optimizations, they enable a space optimization + /// where the DFA's internal alphabet is defined over the equivalence + /// classes of bytes instead of all possible byte values. The former is + /// often quite a bit smaller than the latter, which permits the DFA to use + /// less space for its transition table. + /// + /// # Example + /// + /// This example shows how to query the class of various bytes. + /// + /// ``` + /// use regex_automata::nfa::thompson::NFA; + /// + /// let nfa = NFA::new("[a-z]+")?; + /// let classes = nfa.byte_classes(); + /// // 'a' and 'z' are in the same class for this regex. + /// assert_eq!(classes.get(b'a'), classes.get(b'z')); + /// // But 'a' and 'A' are not. + /// assert_ne!(classes.get(b'a'), classes.get(b'A')); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn byte_classes(&self) -> &ByteClasses { + &self.0.byte_classes + } + + /// Return a reference to the NFA state corresponding to the given ID. + /// + /// This is a convenience routine for `nfa.states()[id]`. + /// + /// # Panics + /// + /// This panics when the given identifier does not reference a valid state. + /// That is, when `id.as_usize() >= nfa.states().len()`. + /// + /// # Example + /// + /// The anchored state for a pattern will typically correspond to a + /// capturing state for that pattern. (Although, this is not an API + /// guarantee!) + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, State}, PatternID}; + /// + /// let nfa = NFA::new("a")?; + /// let state = nfa.state(nfa.start_pattern(PatternID::ZERO).unwrap()); + /// match *state { + /// State::Capture { slot, .. } => { + /// assert_eq!(0, slot.as_usize()); + /// } + /// _ => unreachable!("unexpected state"), + /// } + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn state(&self, id: StateID) -> &State { + &self.states()[id] + } + + /// Returns a slice of all states in this NFA. + /// + /// The slice returned is indexed by `StateID`. This provides a convenient + /// way to access states while following transitions among those states. + /// + /// # Example + /// + /// This demonstrates that disabling UTF-8 mode can shrink the size of the + /// NFA considerably in some cases, especially when using Unicode character + /// classes. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::NFA; + /// + /// let nfa_unicode = NFA::new(r"\w")?; + /// let nfa_ascii = NFA::new(r"(?-u)\w")?; + /// // Yes, a factor of 45 difference. No lie. + /// assert!(40 * nfa_ascii.states().len() < nfa_unicode.states().len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn states(&self) -> &[State] { + &self.0.states + } + + /// Returns the capturing group info for this NFA. + /// + /// The [`GroupInfo`] provides a way to map to and from capture index + /// and capture name for each pattern. It also provides a mapping from + /// each of the capturing groups in every pattern to their corresponding + /// slot offsets encoded in [`State::Capture`] states. + /// + /// Note that `GroupInfo` uses reference counting internally, such that + /// cloning a `GroupInfo` is very cheap. + /// + /// # Example + /// + /// This example shows how to get a list of all capture group names for + /// a particular pattern. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let nfa = NFA::new(r"(a)(?Pb)(c)(d)(?Pe)")?; + /// // The first is the implicit group that is always unnammed. The next + /// // 5 groups are the explicit groups found in the concrete syntax above. + /// let expected = vec![None, None, Some("foo"), None, None, Some("bar")]; + /// let got: Vec> = + /// nfa.group_info().pattern_names(PatternID::ZERO).collect(); + /// assert_eq!(expected, got); + /// + /// // Using an invalid pattern ID will result in nothing yielded. + /// let got = nfa.group_info().pattern_names(PatternID::must(999)).count(); + /// assert_eq!(0, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn group_info(&self) -> &GroupInfo { + &self.0.group_info() + } + + /// Returns true if and only if this NFA has at least one + /// [`Capture`](State::Capture) in its sequence of states. + /// + /// This is useful as a way to perform a quick test before attempting + /// something that does or does not require capture states. For example, + /// some regex engines (like the PikeVM) require capture states in order to + /// work at all. + /// + /// # Example + /// + /// This example shows a few different NFAs and whether they have captures + /// or not. + /// + /// ``` + /// use regex_automata::nfa::thompson::{NFA, WhichCaptures}; + /// + /// // Obviously has capture states. + /// let nfa = NFA::new("(a)")?; + /// assert!(nfa.has_capture()); + /// + /// // Less obviously has capture states, because every pattern has at + /// // least one anonymous capture group corresponding to the match for the + /// // entire pattern. + /// let nfa = NFA::new("a")?; + /// assert!(nfa.has_capture()); + /// + /// // Other than hand building your own NFA, this is the only way to build + /// // an NFA without capturing groups. In general, you should only do this + /// // if you don't intend to use any of the NFA-oriented regex engines. + /// // Overall, capturing groups don't have many downsides. Although they + /// // can add a bit of noise to simple NFAs, so it can be nice to disable + /// // them for debugging purposes. + /// // + /// // Notice that 'has_capture' is false here even when we have an + /// // explicit capture group in the pattern. + /// let nfa = NFA::compiler() + /// .configure(NFA::config().which_captures(WhichCaptures::None)) + /// .build("(a)")?; + /// assert!(!nfa.has_capture()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn has_capture(&self) -> bool { + self.0.has_capture + } + + /// Returns true if and only if this NFA can match the empty string. + /// When it returns false, all possible matches are guaranteed to have a + /// non-zero length. + /// + /// This is useful as cheap way to know whether code needs to handle the + /// case of a zero length match. This is particularly important when UTF-8 + /// modes are enabled, as when UTF-8 mode is enabled, empty matches that + /// split a codepoint must never be reported. This extra handling can + /// sometimes be costly, and since regexes matching an empty string are + /// somewhat rare, it can be beneficial to treat such regexes specially. + /// + /// # Example + /// + /// This example shows a few different NFAs and whether they match the + /// empty string or not. Notice the empty string isn't merely a matter + /// of a string of length literally `0`, but rather, whether a match can + /// occur between specific pairs of bytes. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::syntax}; + /// + /// // The empty regex matches the empty string. + /// let nfa = NFA::new("")?; + /// assert!(nfa.has_empty(), "empty matches empty"); + /// // The '+' repetition operator requires at least one match, and so + /// // does not match the empty string. + /// let nfa = NFA::new("a+")?; + /// assert!(!nfa.has_empty(), "+ does not match empty"); + /// // But the '*' repetition operator does. + /// let nfa = NFA::new("a*")?; + /// assert!(nfa.has_empty(), "* does match empty"); + /// // And wrapping '+' in an operator that can match an empty string also + /// // causes it to match the empty string too. + /// let nfa = NFA::new("(a+)*")?; + /// assert!(nfa.has_empty(), "+ inside of * matches empty"); + /// + /// // If a regex is just made of a look-around assertion, even if the + /// // assertion requires some kind of non-empty string around it (such as + /// // \b), then it is still treated as if it matches the empty string. + /// // Namely, if a match occurs of just a look-around assertion, then the + /// // match returned is empty. + /// let nfa = NFA::compiler() + /// .syntax(syntax::Config::new().utf8(false)) + /// .build(r"^$\A\z\b\B(?-u:\b\B)")?; + /// assert!(nfa.has_empty(), "assertions match empty"); + /// // Even when an assertion is wrapped in a '+', it still matches the + /// // empty string. + /// let nfa = NFA::new(r"\b+")?; + /// assert!(nfa.has_empty(), "+ of an assertion matches empty"); + /// + /// // An alternation with even one branch that can match the empty string + /// // is also said to match the empty string overall. + /// let nfa = NFA::new("foo|(bar)?|quux")?; + /// assert!(nfa.has_empty(), "alternations can match empty"); + /// + /// // An NFA that matches nothing does not match the empty string. + /// let nfa = NFA::new("[a&&b]")?; + /// assert!(!nfa.has_empty(), "never matching means not matching empty"); + /// // But if it's wrapped in something that doesn't require a match at + /// // all, then it can match the empty string! + /// let nfa = NFA::new("[a&&b]*")?; + /// assert!(nfa.has_empty(), "* on never-match still matches empty"); + /// // Since a '+' requires a match, using it on something that can never + /// // match will itself produce a regex that can never match anything, + /// // and thus does not match the empty string. + /// let nfa = NFA::new("[a&&b]+")?; + /// assert!(!nfa.has_empty(), "+ on never-match still matches nothing"); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn has_empty(&self) -> bool { + self.0.has_empty + } + + /// Whether UTF-8 mode is enabled for this NFA or not. + /// + /// When UTF-8 mode is enabled, all matches reported by a regex engine + /// derived from this NFA are guaranteed to correspond to spans of valid + /// UTF-8. This includes zero-width matches. For example, the regex engine + /// must guarantee that the empty regex will not match at the positions + /// between code units in the UTF-8 encoding of a single codepoint. + /// + /// See [`Config::utf8`] for more information. + /// + /// This is enabled by default. + /// + /// # Example + /// + /// This example shows how UTF-8 mode can impact the match spans that may + /// be reported in certain cases. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{self, pikevm::PikeVM}, + /// Match, Input, + /// }; + /// + /// let re = PikeVM::new("")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// // UTF-8 mode is enabled by default. + /// let mut input = Input::new("☃"); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 0..0)), caps.get_match()); + /// + /// // Even though an empty regex matches at 1..1, our next match is + /// // 3..3 because 1..1 and 2..2 split the snowman codepoint (which is + /// // three bytes long). + /// input.set_start(1); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 3..3)), caps.get_match()); + /// + /// // But if we disable UTF-8, then we'll get matches at 1..1 and 2..2: + /// let re = PikeVM::builder() + /// .thompson(thompson::Config::new().utf8(false)) + /// .build("")?; + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 1..1)), caps.get_match()); + /// + /// input.set_start(2); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 2..2)), caps.get_match()); + /// + /// input.set_start(3); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 3..3)), caps.get_match()); + /// + /// input.set_start(4); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(None, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_utf8(&self) -> bool { + self.0.utf8 + } + + /// Returns true when this NFA is meant to be matched in reverse. + /// + /// Generally speaking, when this is true, it means the NFA is supposed to + /// be used in conjunction with moving backwards through the haystack. That + /// is, from a higher memory address to a lower memory address. + /// + /// It is often the case that lower level routines dealing with an NFA + /// don't need to care about whether it is "meant" to be matched in reverse + /// or not. However, there are some specific cases where it matters. For + /// example, the implementation of CRLF-aware `^` and `$` line anchors + /// needs to know whether the search is in the forward or reverse + /// direction. In the forward direction, neither `^` nor `$` should match + /// when a `\r` has been seen previously and a `\n` is next. However, in + /// the reverse direction, neither `^` nor `$` should match when a `\n` + /// has been seen previously and a `\r` is next. This fundamentally changes + /// how the state machine is constructed, and thus needs to be altered + /// based on the direction of the search. + /// + /// This is automatically set when using a [`Compiler`] with a configuration + /// where [`Config::reverse`] is enabled. If you're building your own NFA + /// by hand via a [`Builder`] + #[inline] + pub fn is_reverse(&self) -> bool { + self.0.reverse + } + + /// Returns true if and only if all starting states for this NFA correspond + /// to the beginning of an anchored search. + /// + /// Typically, an NFA will have both an anchored and an unanchored starting + /// state. Namely, because it tends to be useful to have both and the cost + /// of having an unanchored starting state is almost zero (for an NFA). + /// However, if all patterns in the NFA are themselves anchored, then even + /// the unanchored starting state will correspond to an anchored search + /// since the pattern doesn't permit anything else. + /// + /// # Example + /// + /// This example shows a few different scenarios where this method's + /// return value varies. + /// + /// ``` + /// use regex_automata::nfa::thompson::NFA; + /// + /// // The unanchored starting state permits matching this pattern anywhere + /// // in a haystack, instead of just at the beginning. + /// let nfa = NFA::new("a")?; + /// assert!(!nfa.is_always_start_anchored()); + /// + /// // In this case, the pattern is itself anchored, so there is no way + /// // to run an unanchored search. + /// let nfa = NFA::new("^a")?; + /// assert!(nfa.is_always_start_anchored()); + /// + /// // When multiline mode is enabled, '^' can match at the start of a line + /// // in addition to the start of a haystack, so an unanchored search is + /// // actually possible. + /// let nfa = NFA::new("(?m)^a")?; + /// assert!(!nfa.is_always_start_anchored()); + /// + /// // Weird cases also work. A pattern is only considered anchored if all + /// // matches may only occur at the start of a haystack. + /// let nfa = NFA::new("(^a)|a")?; + /// assert!(!nfa.is_always_start_anchored()); + /// + /// // When multiple patterns are present, if they are all anchored, then + /// // the NFA is always anchored too. + /// let nfa = NFA::new_many(&["^a", "^b", "^c"])?; + /// assert!(nfa.is_always_start_anchored()); + /// + /// // But if one pattern is unanchored, then the NFA must permit an + /// // unanchored search. + /// let nfa = NFA::new_many(&["^a", "b", "^c"])?; + /// assert!(!nfa.is_always_start_anchored()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_always_start_anchored(&self) -> bool { + self.start_anchored() == self.start_unanchored() + } + + /// Returns the look-around matcher associated with this NFA. + /// + /// A look-around matcher determines how to match look-around assertions. + /// In particular, some assertions are configurable. For example, the + /// `(?m:^)` and `(?m:$)` assertions can have their line terminator changed + /// from the default of `\n` to any other byte. + /// + /// If the NFA was built using a [`Compiler`], then this matcher + /// can be set via the [`Config::look_matcher`] configuration + /// knob. Otherwise, if you've built an NFA by hand, it is set via + /// [`Builder::set_look_matcher`]. + /// + /// # Example + /// + /// This shows how to change the line terminator for multi-line assertions. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{self, pikevm::PikeVM}, + /// util::look::LookMatcher, + /// Match, Input, + /// }; + /// + /// let mut lookm = LookMatcher::new(); + /// lookm.set_line_terminator(b'\x00'); + /// + /// let re = PikeVM::builder() + /// .thompson(thompson::Config::new().look_matcher(lookm)) + /// .build(r"(?m)^[a-z]+$")?; + /// let mut cache = re.create_cache(); + /// + /// // Multi-line assertions now use NUL as a terminator. + /// assert_eq!( + /// Some(Match::must(0, 1..4)), + /// re.find(&mut cache, b"\x00abc\x00"), + /// ); + /// // ... and \n is no longer recognized as a terminator. + /// assert_eq!( + /// None, + /// re.find(&mut cache, b"\nabc\n"), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn look_matcher(&self) -> &LookMatcher { + &self.0.look_matcher + } + + /// Returns the union of all look-around assertions used throughout this + /// NFA. When the returned set is empty, it implies that the NFA has no + /// look-around assertions and thus zero conditional epsilon transitions. + /// + /// This is useful in some cases enabling optimizations. It is not + /// unusual, for example, for optimizations to be of the form, "for any + /// regex with zero conditional epsilon transitions, do ..." where "..." + /// is some kind of optimization. + /// + /// This isn't only helpful for optimizations either. Sometimes look-around + /// assertions are difficult to support. For example, many of the DFAs in + /// this crate don't support Unicode word boundaries or handle them using + /// heuristics. Handling that correctly typically requires some kind of + /// cheap check of whether the NFA has a Unicode word boundary in the first + /// place. + /// + /// # Example + /// + /// This example shows how this routine varies based on the regex pattern: + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::look::Look}; + /// + /// // No look-around at all. + /// let nfa = NFA::new("a")?; + /// assert!(nfa.look_set_any().is_empty()); + /// + /// // When multiple patterns are present, since this returns the union, + /// // it will include look-around assertions that only appear in one + /// // pattern. + /// let nfa = NFA::new_many(&["a", "b", "a^b", "c"])?; + /// assert!(nfa.look_set_any().contains(Look::Start)); + /// + /// // Some groups of assertions have various shortcuts. For example: + /// let nfa = NFA::new(r"(?-u:\b)")?; + /// assert!(nfa.look_set_any().contains_word()); + /// assert!(!nfa.look_set_any().contains_word_unicode()); + /// assert!(nfa.look_set_any().contains_word_ascii()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn look_set_any(&self) -> LookSet { + self.0.look_set_any + } + + /// Returns the union of all prefix look-around assertions for every + /// pattern in this NFA. When the returned set is empty, it implies none of + /// the patterns require moving through a conditional epsilon transition + /// before inspecting the first byte in the haystack. + /// + /// This can be useful for determining what kinds of assertions need to be + /// satisfied at the beginning of a search. For example, typically DFAs + /// in this crate will build a distinct starting state for each possible + /// starting configuration that might result in look-around assertions + /// being satisfied differently. However, if the set returned here is + /// empty, then you know that the start state is invariant because there + /// are no conditional epsilon transitions to consider. + /// + /// # Example + /// + /// This example shows how this routine varies based on the regex pattern: + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::look::Look}; + /// + /// // No look-around at all. + /// let nfa = NFA::new("a")?; + /// assert!(nfa.look_set_prefix_any().is_empty()); + /// + /// // When multiple patterns are present, since this returns the union, + /// // it will include look-around assertions that only appear in one + /// // pattern. But it will only include assertions that are in the prefix + /// // of a pattern. For example, this includes '^' but not '$' even though + /// // '$' does appear. + /// let nfa = NFA::new_many(&["a", "b", "^ab$", "c"])?; + /// assert!(nfa.look_set_prefix_any().contains(Look::Start)); + /// assert!(!nfa.look_set_prefix_any().contains(Look::End)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn look_set_prefix_any(&self) -> LookSet { + self.0.look_set_prefix_any + } + + // FIXME: The `look_set_prefix_all` computation was not correct, and it + // seemed a little tricky to fix it. Since I wasn't actually using it for + // anything, I just decided to remove it in the run up to the regex 1.9 + // release. If you need this, please file an issue. + /* + /// Returns the intersection of all prefix look-around assertions for every + /// pattern in this NFA. When the returned set is empty, it implies at + /// least one of the patterns does not require moving through a conditional + /// epsilon transition before inspecting the first byte in the haystack. + /// Conversely, when the set contains an assertion, it implies that every + /// pattern in the NFA also contains that assertion in its prefix. + /// + /// This can be useful for determining what kinds of assertions need to be + /// satisfied at the beginning of a search. For example, if you know that + /// [`Look::Start`] is in the prefix intersection set returned here, then + /// you know that all searches, regardless of input configuration, will be + /// anchored. + /// + /// # Example + /// + /// This example shows how this routine varies based on the regex pattern: + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::look::Look}; + /// + /// // No look-around at all. + /// let nfa = NFA::new("a")?; + /// assert!(nfa.look_set_prefix_all().is_empty()); + /// + /// // When multiple patterns are present, since this returns the + /// // intersection, it will only include assertions present in every + /// // prefix, and only the prefix. + /// let nfa = NFA::new_many(&["^a$", "^b$", "$^ab$", "^c$"])?; + /// assert!(nfa.look_set_prefix_all().contains(Look::Start)); + /// assert!(!nfa.look_set_prefix_all().contains(Look::End)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn look_set_prefix_all(&self) -> LookSet { + self.0.look_set_prefix_all + } + */ + + /// Returns the memory usage, in bytes, of this NFA. + /// + /// This does **not** include the stack size used up by this NFA. To + /// compute that, use `std::mem::size_of::()`. + /// + /// # Example + /// + /// This example shows that large Unicode character classes can use quite + /// a bit of memory. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::NFA; + /// + /// let nfa_unicode = NFA::new(r"\w")?; + /// let nfa_ascii = NFA::new(r"(?-u:\w)")?; + /// + /// assert!(10 * nfa_ascii.memory_usage() < nfa_unicode.memory_usage()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn memory_usage(&self) -> usize { + use core::mem::size_of; + + size_of::() // allocated on the heap via Arc + + self.0.states.len() * size_of::() + + self.0.start_pattern.len() * size_of::() + + self.0.group_info.memory_usage() + + self.0.memory_extra + } +} + +impl fmt::Debug for NFA { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +/// The "inner" part of the NFA. We split this part out so that we can easily +/// wrap it in an `Arc` above in the definition of `NFA`. +/// +/// See builder.rs for the code that actually builds this type. This module +/// does provide (internal) mutable methods for adding things to this +/// NFA before finalizing it, but the high level construction process is +/// controlled by the builder abstraction. (Which is complicated enough to +/// get its own module.) +#[derive(Default)] +pub(super) struct Inner { + /// The state sequence. This sequence is guaranteed to be indexable by all + /// starting state IDs, and it is also guaranteed to contain at most one + /// `Match` state for each pattern compiled into this NFA. (A pattern may + /// not have a corresponding `Match` state if a `Match` state is impossible + /// to reach.) + states: Vec, + /// The anchored starting state of this NFA. + start_anchored: StateID, + /// The unanchored starting state of this NFA. + start_unanchored: StateID, + /// The starting states for each individual pattern. Starting at any + /// of these states will result in only an anchored search for the + /// corresponding pattern. The vec is indexed by pattern ID. When the NFA + /// contains a single regex, then `start_pattern[0]` and `start_anchored` + /// are always equivalent. + start_pattern: Vec, + /// Info about the capturing groups in this NFA. This is responsible for + /// mapping groups to slots, mapping groups to names and names to groups. + group_info: GroupInfo, + /// A representation of equivalence classes over the transitions in this + /// NFA. Two bytes in the same equivalence class must not discriminate + /// between a match or a non-match. This map can be used to shrink the + /// total size of a DFA's transition table with a small match-time cost. + /// + /// Note that the NFA's transitions are *not* defined in terms of these + /// equivalence classes. The NFA's transitions are defined on the original + /// byte values. For the most part, this is because they wouldn't really + /// help the NFA much since the NFA already uses a sparse representation + /// to represent transitions. Byte classes are most effective in a dense + /// representation. + byte_class_set: ByteClassSet, + /// This is generated from `byte_class_set`, and essentially represents the + /// same thing but supports different access patterns. Namely, this permits + /// looking up the equivalence class of a byte very cheaply. + /// + /// Ideally we would just store this, but because of annoying code + /// structure reasons, we keep both this and `byte_class_set` around for + /// now. I think I would prefer that `byte_class_set` were computed in the + /// `Builder`, but right now, we compute it as states are added to the + /// `NFA`. + byte_classes: ByteClasses, + /// Whether this NFA has a `Capture` state anywhere. + has_capture: bool, + /// When the empty string is in the language matched by this NFA. + has_empty: bool, + /// Whether UTF-8 mode is enabled for this NFA. Briefly, this means that + /// all non-empty matches produced by this NFA correspond to spans of valid + /// UTF-8, and any empty matches produced by this NFA that split a UTF-8 + /// encoded codepoint should be filtered out by the corresponding regex + /// engine. + utf8: bool, + /// Whether this NFA is meant to be matched in reverse or not. + reverse: bool, + /// The matcher to be used for look-around assertions. + look_matcher: LookMatcher, + /// The union of all look-around assertions that occur anywhere within + /// this NFA. If this set is empty, then it means there are precisely zero + /// conditional epsilon transitions in the NFA. + look_set_any: LookSet, + /// The union of all look-around assertions that occur as a zero-length + /// prefix for any of the patterns in this NFA. + look_set_prefix_any: LookSet, + /* + /// The intersection of all look-around assertions that occur as a + /// zero-length prefix for any of the patterns in this NFA. + look_set_prefix_all: LookSet, + */ + /// Heap memory used indirectly by NFA states and other things (like the + /// various capturing group representations above). Since each state + /// might use a different amount of heap, we need to keep track of this + /// incrementally. + memory_extra: usize, +} + +impl Inner { + /// Runs any last finalization bits and turns this into a full NFA. + pub(super) fn into_nfa(mut self) -> NFA { + self.byte_classes = self.byte_class_set.byte_classes(); + // Do epsilon closure from the start state of every pattern in order + // to compute various properties such as look-around assertions and + // whether the empty string can be matched. + let mut stack = vec![]; + let mut seen = SparseSet::new(self.states.len()); + for &start_id in self.start_pattern.iter() { + stack.push(start_id); + seen.clear(); + // let mut prefix_all = LookSet::full(); + let mut prefix_any = LookSet::empty(); + while let Some(sid) = stack.pop() { + if !seen.insert(sid) { + continue; + } + match self.states[sid] { + State::ByteRange { .. } + | State::Dense { .. } + | State::Fail => continue, + State::Sparse(_) => { + // This snippet below will rewrite this sparse state + // as a dense state. By doing it here, we apply this + // optimization to all hot "sparse" states since these + // are the states that are reachable from the start + // state via an epsilon closure. + // + // Unfortunately, this optimization did not seem to + // help much in some very limited ad hoc benchmarking. + // + // I left the 'Dense' state type in place in case we + // want to revisit this, but I suspect the real way + // to make forward progress is a more fundamental + // rearchitecting of how data in the NFA is laid out. + // I think we should consider a single contiguous + // allocation instead of all this indirection and + // potential heap allocations for every state. But this + // is a large re-design and will require API breaking + // changes. + // self.memory_extra -= self.states[sid].memory_usage(); + // let trans = DenseTransitions::from_sparse(sparse); + // self.states[sid] = State::Dense(trans); + // self.memory_extra += self.states[sid].memory_usage(); + continue; + } + State::Match { .. } => self.has_empty = true, + State::Look { look, next } => { + prefix_any = prefix_any.insert(look); + stack.push(next); + } + State::Union { ref alternates } => { + // Order doesn't matter here, since we're just dealing + // with look-around sets. But if we do richer analysis + // here that needs to care about preference order, then + // this should be done in reverse. + stack.extend(alternates.iter()); + } + State::BinaryUnion { alt1, alt2 } => { + stack.push(alt2); + stack.push(alt1); + } + State::Capture { next, .. } => { + stack.push(next); + } + } + } + self.look_set_prefix_any = + self.look_set_prefix_any.union(prefix_any); + } + NFA(Arc::new(self)) + } + + /// Returns the capturing group info for this NFA. + pub(super) fn group_info(&self) -> &GroupInfo { + &self.group_info + } + + /// Add the given state to this NFA after allocating a fresh identifier for + /// it. + /// + /// This panics if too many states are added such that a fresh identifier + /// could not be created. (Currently, the only caller of this routine is + /// a `Builder`, and it upholds this invariant.) + pub(super) fn add(&mut self, state: State) -> StateID { + match state { + State::ByteRange { ref trans } => { + self.byte_class_set.set_range(trans.start, trans.end); + } + State::Sparse(ref sparse) => { + for trans in sparse.transitions.iter() { + self.byte_class_set.set_range(trans.start, trans.end); + } + } + State::Dense { .. } => unreachable!(), + State::Look { look, .. } => { + self.look_matcher + .add_to_byteset(look, &mut self.byte_class_set); + self.look_set_any = self.look_set_any.insert(look); + } + State::Capture { .. } => { + self.has_capture = true; + } + State::Union { .. } + | State::BinaryUnion { .. } + | State::Fail + | State::Match { .. } => {} + } + + let id = StateID::new(self.states.len()).unwrap(); + self.memory_extra += state.memory_usage(); + self.states.push(state); + id + } + + /// Set the starting state identifiers for this NFA. + /// + /// `start_anchored` and `start_unanchored` may be equivalent. When they + /// are, then the NFA can only execute anchored searches. This might + /// occur, for example, for patterns that are unconditionally anchored. + /// e.g., `^foo`. + pub(super) fn set_starts( + &mut self, + start_anchored: StateID, + start_unanchored: StateID, + start_pattern: &[StateID], + ) { + self.start_anchored = start_anchored; + self.start_unanchored = start_unanchored; + self.start_pattern = start_pattern.to_vec(); + } + + /// Sets the UTF-8 mode of this NFA. + pub(super) fn set_utf8(&mut self, yes: bool) { + self.utf8 = yes; + } + + /// Sets the reverse mode of this NFA. + pub(super) fn set_reverse(&mut self, yes: bool) { + self.reverse = yes; + } + + /// Sets the look-around assertion matcher for this NFA. + pub(super) fn set_look_matcher(&mut self, m: LookMatcher) { + self.look_matcher = m; + } + + /// Set the capturing groups for this NFA. + /// + /// The given slice should contain the capturing groups for each pattern, + /// The capturing groups in turn should correspond to the total number of + /// capturing groups in the pattern, including the anonymous first capture + /// group for each pattern. If a capturing group does have a name, then it + /// should be provided as a Arc. + /// + /// This returns an error if a corresponding `GroupInfo` could not be + /// built. + pub(super) fn set_captures( + &mut self, + captures: &[Vec>>], + ) -> Result<(), GroupInfoError> { + self.group_info = GroupInfo::new( + captures.iter().map(|x| x.iter().map(|y| y.as_ref())), + )?; + Ok(()) + } + + /// Remap the transitions in every state of this NFA using the given map. + /// The given map should be indexed according to state ID namespace used by + /// the transitions of the states currently in this NFA. + /// + /// This is particularly useful to the NFA builder, since it is convenient + /// to add NFA states in order to produce their final IDs. Then, after all + /// of the intermediate "empty" states (unconditional epsilon transitions) + /// have been removed from the builder's representation, we can re-map all + /// of the transitions in the states already added to their final IDs. + pub(super) fn remap(&mut self, old_to_new: &[StateID]) { + for state in &mut self.states { + state.remap(old_to_new); + } + self.start_anchored = old_to_new[self.start_anchored]; + self.start_unanchored = old_to_new[self.start_unanchored]; + for id in self.start_pattern.iter_mut() { + *id = old_to_new[*id]; + } + } +} + +impl fmt::Debug for Inner { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "thompson::NFA(")?; + for (sid, state) in self.states.iter().with_state_ids() { + let status = if sid == self.start_anchored { + '^' + } else if sid == self.start_unanchored { + '>' + } else { + ' ' + }; + writeln!(f, "{}{:06?}: {:?}", status, sid.as_usize(), state)?; + } + let pattern_len = self.start_pattern.len(); + if pattern_len > 1 { + writeln!(f, "")?; + for pid in 0..pattern_len { + let sid = self.start_pattern[pid]; + writeln!(f, "START({:06?}): {:?}", pid, sid.as_usize())?; + } + } + writeln!(f, "")?; + writeln!( + f, + "transition equivalence classes: {:?}", + self.byte_classes, + )?; + writeln!(f, ")")?; + Ok(()) + } +} + +/// A state in an NFA. +/// +/// In theory, it can help to conceptualize an `NFA` as a graph consisting of +/// `State`s. Each `State` contains its complete set of outgoing transitions. +/// +/// In practice, it can help to conceptualize an `NFA` as a sequence of +/// instructions for a virtual machine. Each `State` says what to do and where +/// to go next. +/// +/// Strictly speaking, the practical interpretation is the most correct one, +/// because of the [`Capture`](State::Capture) state. Namely, a `Capture` +/// state always forwards execution to another state unconditionally. Its only +/// purpose is to cause a side effect: the recording of the current input +/// position at a particular location in memory. In this sense, an `NFA` +/// has more power than a theoretical non-deterministic finite automaton. +/// +/// For most uses of this crate, it is likely that one may never even need to +/// be aware of this type at all. The main use cases for looking at `State`s +/// directly are if you need to write your own search implementation or if you +/// need to do some kind of analysis on the NFA. +#[derive(Clone, Eq, PartialEq)] +pub enum State { + /// A state with a single transition that can only be taken if the current + /// input symbol is in a particular range of bytes. + ByteRange { + /// The transition from this state to the next. + trans: Transition, + }, + /// A state with possibly many transitions represented in a sparse fashion. + /// Transitions are non-overlapping and ordered lexicographically by input + /// range. + /// + /// In practice, this is used for encoding UTF-8 automata. Its presence is + /// primarily an optimization that avoids many additional unconditional + /// epsilon transitions (via [`Union`](State::Union) states), and thus + /// decreases the overhead of traversing the NFA. This can improve both + /// matching time and DFA construction time. + Sparse(SparseTransitions), + /// A dense representation of a state with multiple transitions. + Dense(DenseTransitions), + /// A conditional epsilon transition satisfied via some sort of + /// look-around. Look-around is limited to anchor and word boundary + /// assertions. + /// + /// Look-around states are meant to be evaluated while performing epsilon + /// closure (computing the set of states reachable from a particular state + /// via only epsilon transitions). If the current position in the haystack + /// satisfies the look-around assertion, then you're permitted to follow + /// that epsilon transition. + Look { + /// The look-around assertion that must be satisfied before moving + /// to `next`. + look: Look, + /// The state to transition to if the look-around assertion is + /// satisfied. + next: StateID, + }, + /// An alternation such that there exists an epsilon transition to all + /// states in `alternates`, where matches found via earlier transitions + /// are preferred over later transitions. + Union { + /// An ordered sequence of unconditional epsilon transitions to other + /// states. Transitions earlier in the sequence are preferred over + /// transitions later in the sequence. + alternates: Box<[StateID]>, + }, + /// An alternation such that there exists precisely two unconditional + /// epsilon transitions, where matches found via `alt1` are preferred over + /// matches found via `alt2`. + /// + /// This state exists as a common special case of Union where there are + /// only two alternates. In this case, we don't need any allocations to + /// represent the state. This saves a bit of memory and also saves an + /// additional memory access when traversing the NFA. + BinaryUnion { + /// An unconditional epsilon transition to another NFA state. This + /// is preferred over `alt2`. + alt1: StateID, + /// An unconditional epsilon transition to another NFA state. Matches + /// reported via this transition should only be reported if no matches + /// were found by following `alt1`. + alt2: StateID, + }, + /// An empty state that records a capture location. + /// + /// From the perspective of finite automata, this is precisely equivalent + /// to an unconditional epsilon transition, but serves the purpose of + /// instructing NFA simulations to record additional state when the finite + /// state machine passes through this epsilon transition. + /// + /// `slot` in this context refers to the specific capture group slot + /// offset that is being recorded. Each capturing group has two slots + /// corresponding to the start and end of the matching portion of that + /// group. + /// + /// The pattern ID and capture group index are also included in this state + /// in case they are useful. But mostly, all you'll need is `next` and + /// `slot`. + Capture { + /// The state to transition to, unconditionally. + next: StateID, + /// The pattern ID that this capture belongs to. + pattern_id: PatternID, + /// The capture group index that this capture belongs to. Capture group + /// indices are local to each pattern. For example, when capturing + /// groups are enabled, every pattern has a capture group at index + /// `0`. + group_index: SmallIndex, + /// The slot index for this capture. Every capturing group has two + /// slots: one for the start haystack offset and one for the end + /// haystack offset. Unlike capture group indices, slot indices are + /// global across all patterns in this NFA. That is, each slot belongs + /// to a single pattern, but there is only one slot at index `i`. + slot: SmallIndex, + }, + /// A state that cannot be transitioned out of. This is useful for cases + /// where you want to prevent matching from occurring. For example, if your + /// regex parser permits empty character classes, then one could choose + /// a `Fail` state to represent them. (An empty character class can be + /// thought of as an empty set. Since nothing is in an empty set, they can + /// never match anything.) + Fail, + /// A match state. There is at least one such occurrence of this state for + /// each regex that can match that is in this NFA. + Match { + /// The matching pattern ID. + pattern_id: PatternID, + }, +} + +impl State { + /// Returns true if and only if this state contains one or more epsilon + /// transitions. + /// + /// In practice, a state has no outgoing transitions (like `Match`), has + /// only non-epsilon transitions (like `ByteRange`) or has only epsilon + /// transitions (like `Union`). + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{State, Transition}, + /// util::primitives::{PatternID, StateID, SmallIndex}, + /// }; + /// + /// // Capture states are epsilon transitions. + /// let state = State::Capture { + /// next: StateID::ZERO, + /// pattern_id: PatternID::ZERO, + /// group_index: SmallIndex::ZERO, + /// slot: SmallIndex::ZERO, + /// }; + /// assert!(state.is_epsilon()); + /// + /// // ByteRange states are not. + /// let state = State::ByteRange { + /// trans: Transition { start: b'a', end: b'z', next: StateID::ZERO }, + /// }; + /// assert!(!state.is_epsilon()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_epsilon(&self) -> bool { + match *self { + State::ByteRange { .. } + | State::Sparse { .. } + | State::Dense { .. } + | State::Fail + | State::Match { .. } => false, + State::Look { .. } + | State::Union { .. } + | State::BinaryUnion { .. } + | State::Capture { .. } => true, + } + } + + /// Returns the heap memory usage of this NFA state in bytes. + fn memory_usage(&self) -> usize { + match *self { + State::ByteRange { .. } + | State::Look { .. } + | State::BinaryUnion { .. } + | State::Capture { .. } + | State::Match { .. } + | State::Fail => 0, + State::Sparse(SparseTransitions { ref transitions }) => { + transitions.len() * mem::size_of::() + } + State::Dense { .. } => 256 * mem::size_of::(), + State::Union { ref alternates } => { + alternates.len() * mem::size_of::() + } + } + } + + /// Remap the transitions in this state using the given map. Namely, the + /// given map should be indexed according to the transitions currently + /// in this state. + /// + /// This is used during the final phase of the NFA compiler, which turns + /// its intermediate NFA into the final NFA. + fn remap(&mut self, remap: &[StateID]) { + match *self { + State::ByteRange { ref mut trans } => { + trans.next = remap[trans.next] + } + State::Sparse(SparseTransitions { ref mut transitions }) => { + for t in transitions.iter_mut() { + t.next = remap[t.next]; + } + } + State::Dense(DenseTransitions { ref mut transitions }) => { + for sid in transitions.iter_mut() { + *sid = remap[*sid]; + } + } + State::Look { ref mut next, .. } => *next = remap[*next], + State::Union { ref mut alternates } => { + for alt in alternates.iter_mut() { + *alt = remap[*alt]; + } + } + State::BinaryUnion { ref mut alt1, ref mut alt2 } => { + *alt1 = remap[*alt1]; + *alt2 = remap[*alt2]; + } + State::Capture { ref mut next, .. } => *next = remap[*next], + State::Fail => {} + State::Match { .. } => {} + } + } +} + +impl fmt::Debug for State { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + State::ByteRange { ref trans } => trans.fmt(f), + State::Sparse(SparseTransitions { ref transitions }) => { + let rs = transitions + .iter() + .map(|t| format!("{:?}", t)) + .collect::>() + .join(", "); + write!(f, "sparse({})", rs) + } + State::Dense(ref dense) => { + write!(f, "dense(")?; + for (i, t) in dense.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{:?}", t)?; + } + write!(f, ")") + } + State::Look { ref look, next } => { + write!(f, "{:?} => {:?}", look, next.as_usize()) + } + State::Union { ref alternates } => { + let alts = alternates + .iter() + .map(|id| format!("{:?}", id.as_usize())) + .collect::>() + .join(", "); + write!(f, "union({})", alts) + } + State::BinaryUnion { alt1, alt2 } => { + write!( + f, + "binary-union({}, {})", + alt1.as_usize(), + alt2.as_usize() + ) + } + State::Capture { next, pattern_id, group_index, slot } => { + write!( + f, + "capture(pid={:?}, group={:?}, slot={:?}) => {:?}", + pattern_id.as_usize(), + group_index.as_usize(), + slot.as_usize(), + next.as_usize(), + ) + } + State::Fail => write!(f, "FAIL"), + State::Match { pattern_id } => { + write!(f, "MATCH({:?})", pattern_id.as_usize()) + } + } + } +} + +/// A sequence of transitions used to represent a sparse state. +/// +/// This is the primary representation of a [`Sparse`](State::Sparse) state. +/// It corresponds to a sorted sequence of transitions with non-overlapping +/// byte ranges. If the byte at the current position in the haystack matches +/// one of the byte ranges, then the finite state machine should take the +/// corresponding transition. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SparseTransitions { + /// The sorted sequence of non-overlapping transitions. + pub transitions: Box<[Transition]>, +} + +impl SparseTransitions { + /// This follows the matching transition for a particular byte. + /// + /// The matching transition is found by looking for a matching byte + /// range (there is at most one) corresponding to the position `at` in + /// `haystack`. + /// + /// If `at >= haystack.len()`, then this returns `None`. + #[inline] + pub fn matches(&self, haystack: &[u8], at: usize) -> Option { + haystack.get(at).and_then(|&b| self.matches_byte(b)) + } + + /// This follows the matching transition for any member of the alphabet. + /// + /// The matching transition is found by looking for a matching byte + /// range (there is at most one) corresponding to the position `at` in + /// `haystack`. If the given alphabet unit is [`EOI`](alphabet::Unit::eoi), + /// then this always returns `None`. + #[inline] + pub(crate) fn matches_unit( + &self, + unit: alphabet::Unit, + ) -> Option { + unit.as_u8().map_or(None, |byte| self.matches_byte(byte)) + } + + /// This follows the matching transition for a particular byte. + /// + /// The matching transition is found by looking for a matching byte range + /// (there is at most one) corresponding to the byte given. + #[inline] + pub fn matches_byte(&self, byte: u8) -> Option { + for t in self.transitions.iter() { + if t.start > byte { + break; + } else if t.matches_byte(byte) { + return Some(t.next); + } + } + None + + /* + // This is an alternative implementation that uses binary search. In + // some ad hoc experiments, like + // + // regex-cli find match pikevm -b -p '\b\w+\b' non-ascii-file + // + // I could not observe any improvement, and in fact, things seemed to + // be a bit slower. I can see an improvement in at least one benchmark: + // + // regex-cli find match pikevm -b -p '\pL{100}' all-codepoints-utf8 + // + // Where total search time goes from 3.2s to 2.4s when using binary + // search. + self.transitions + .binary_search_by(|t| { + if t.end < byte { + core::cmp::Ordering::Less + } else if t.start > byte { + core::cmp::Ordering::Greater + } else { + core::cmp::Ordering::Equal + } + }) + .ok() + .map(|i| self.transitions[i].next) + */ + } +} + +/// A sequence of transitions used to represent a dense state. +/// +/// This is the primary representation of a [`Dense`](State::Dense) state. It +/// provides constant time matching. That is, given a byte in a haystack and +/// a `DenseTransitions`, one can determine if the state matches in constant +/// time. +/// +/// This is in contrast to `SparseTransitions`, whose time complexity is +/// necessarily bigger than constant time. Also in contrast, `DenseTransitions` +/// usually requires (much) more heap memory. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct DenseTransitions { + /// A dense representation of this state's transitions on the heap. This + /// always has length 256. + pub transitions: Box<[StateID]>, +} + +impl DenseTransitions { + /// This follows the matching transition for a particular byte. + /// + /// The matching transition is found by looking for a transition that + /// doesn't correspond to `StateID::ZERO` for the byte `at` the given + /// position in `haystack`. + /// + /// If `at >= haystack.len()`, then this returns `None`. + #[inline] + pub fn matches(&self, haystack: &[u8], at: usize) -> Option { + haystack.get(at).and_then(|&b| self.matches_byte(b)) + } + + /// This follows the matching transition for any member of the alphabet. + /// + /// The matching transition is found by looking for a transition that + /// doesn't correspond to `StateID::ZERO` for the byte `at` the given + /// position in `haystack`. + /// + /// If `at >= haystack.len()` or if the given alphabet unit is + /// [`EOI`](alphabet::Unit::eoi), then this returns `None`. + #[inline] + pub(crate) fn matches_unit( + &self, + unit: alphabet::Unit, + ) -> Option { + unit.as_u8().map_or(None, |byte| self.matches_byte(byte)) + } + + /// This follows the matching transition for a particular byte. + /// + /// The matching transition is found by looking for a transition that + /// doesn't correspond to `StateID::ZERO` for the given `byte`. + /// + /// If `at >= haystack.len()`, then this returns `None`. + #[inline] + pub fn matches_byte(&self, byte: u8) -> Option { + let next = self.transitions[usize::from(byte)]; + if next == StateID::ZERO { + None + } else { + Some(next) + } + } + + /* + /// The dense state optimization isn't currently enabled, so permit a + /// little bit of dead code. + pub(crate) fn from_sparse(sparse: &SparseTransitions) -> DenseTransitions { + let mut dense = vec![StateID::ZERO; 256]; + for t in sparse.transitions.iter() { + for b in t.start..=t.end { + dense[usize::from(b)] = t.next; + } + } + DenseTransitions { transitions: dense.into_boxed_slice() } + } + */ + + /// Returns an iterator over all transitions that don't point to + /// `StateID::ZERO`. + pub(crate) fn iter(&self) -> impl Iterator + '_ { + use crate::util::int::Usize; + self.transitions + .iter() + .enumerate() + .filter(|&(_, &sid)| sid != StateID::ZERO) + .map(|(byte, &next)| Transition { + start: byte.as_u8(), + end: byte.as_u8(), + next, + }) + } +} + +/// A single transition to another state. +/// +/// This transition may only be followed if the current byte in the haystack +/// falls in the inclusive range of bytes specified. +#[derive(Clone, Copy, Eq, Hash, PartialEq)] +pub struct Transition { + /// The inclusive start of the byte range. + pub start: u8, + /// The inclusive end of the byte range. + pub end: u8, + /// The identifier of the state to transition to. + pub next: StateID, +} + +impl Transition { + /// Returns true if the position `at` in `haystack` falls in this + /// transition's range of bytes. + /// + /// If `at >= haystack.len()`, then this returns `false`. + pub fn matches(&self, haystack: &[u8], at: usize) -> bool { + haystack.get(at).map_or(false, |&b| self.matches_byte(b)) + } + + /// Returns true if the given alphabet unit falls in this transition's + /// range of bytes. If the given unit is [`EOI`](alphabet::Unit::eoi), then + /// this returns `false`. + pub fn matches_unit(&self, unit: alphabet::Unit) -> bool { + unit.as_u8().map_or(false, |byte| self.matches_byte(byte)) + } + + /// Returns true if the given byte falls in this transition's range of + /// bytes. + pub fn matches_byte(&self, byte: u8) -> bool { + self.start <= byte && byte <= self.end + } +} + +impl fmt::Debug for Transition { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use crate::util::escape::DebugByte; + + let Transition { start, end, next } = *self; + if self.start == self.end { + write!(f, "{:?} => {:?}", DebugByte(start), next.as_usize()) + } else { + write!( + f, + "{:?}-{:?} => {:?}", + DebugByte(start), + DebugByte(end), + next.as_usize(), + ) + } + } +} + +/// An iterator over all pattern IDs in an NFA. +/// +/// This iterator is created by [`NFA::patterns`]. +/// +/// The lifetime parameter `'a` refers to the lifetime of the NFA from which +/// this pattern iterator was created. +#[derive(Debug)] +pub struct PatternIter<'a> { + it: PatternIDIter, + /// We explicitly associate a lifetime with this iterator even though we + /// don't actually borrow anything from the NFA. We do this for backward + /// compatibility purposes. If we ever do need to borrow something from + /// the NFA, then we can and just get rid of this marker without breaking + /// the public API. + _marker: core::marker::PhantomData<&'a ()>, +} + +impl<'a> Iterator for PatternIter<'a> { + type Item = PatternID; + + fn next(&mut self) -> Option { + self.it.next() + } +} + +#[cfg(all(test, feature = "nfa-pikevm"))] +mod tests { + use super::*; + use crate::{nfa::thompson::pikevm::PikeVM, Input}; + + // This asserts that an NFA state doesn't have its size changed. It is + // *really* easy to accidentally increase the size, and thus potentially + // dramatically increase the memory usage of every NFA. + // + // This assert doesn't mean we absolutely cannot increase the size of an + // NFA state. We can. It's just here to make sure we do it knowingly and + // intentionally. + #[test] + fn state_has_small_size() { + #[cfg(target_pointer_width = "64")] + assert_eq!(24, core::mem::size_of::()); + #[cfg(target_pointer_width = "32")] + assert_eq!(20, core::mem::size_of::()); + } + + #[test] + fn always_match() { + let re = PikeVM::new_from_nfa(NFA::always_match()).unwrap(); + let mut cache = re.create_cache(); + let mut caps = re.create_captures(); + let mut find = |haystack, start, end| { + let input = Input::new(haystack).range(start..end); + re.search(&mut cache, &input, &mut caps); + caps.get_match().map(|m| m.end()) + }; + + assert_eq!(Some(0), find("", 0, 0)); + assert_eq!(Some(0), find("a", 0, 1)); + assert_eq!(Some(1), find("a", 1, 1)); + assert_eq!(Some(0), find("ab", 0, 2)); + assert_eq!(Some(1), find("ab", 1, 2)); + assert_eq!(Some(2), find("ab", 2, 2)); + } + + #[test] + fn never_match() { + let re = PikeVM::new_from_nfa(NFA::never_match()).unwrap(); + let mut cache = re.create_cache(); + let mut caps = re.create_captures(); + let mut find = |haystack, start, end| { + let input = Input::new(haystack).range(start..end); + re.search(&mut cache, &input, &mut caps); + caps.get_match().map(|m| m.end()) + }; + + assert_eq!(None, find("", 0, 0)); + assert_eq!(None, find("a", 0, 1)); + assert_eq!(None, find("a", 1, 1)); + assert_eq!(None, find("ab", 0, 2)); + assert_eq!(None, find("ab", 1, 2)); + assert_eq!(None, find("ab", 2, 2)); + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/pikevm.rs b/vendor/regex-automata/src/nfa/thompson/pikevm.rs new file mode 100644 index 0000000..0128c15 --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/pikevm.rs @@ -0,0 +1,2359 @@ +/*! +An NFA backed Pike VM for executing regex searches with capturing groups. + +This module provides a [`PikeVM`] that works by simulating an NFA and +resolving all spans of capturing groups that participate in a match. +*/ + +#[cfg(feature = "internal-instrument-pikevm")] +use core::cell::RefCell; + +use alloc::{vec, vec::Vec}; + +use crate::{ + nfa::thompson::{self, BuildError, State, NFA}, + util::{ + captures::Captures, + empty, iter, + prefilter::Prefilter, + primitives::{NonMaxUsize, PatternID, SmallIndex, StateID}, + search::{ + Anchored, HalfMatch, Input, Match, MatchKind, PatternSet, Span, + }, + sparse_set::SparseSet, + }, +}; + +/// A simple macro for conditionally executing instrumentation logic when +/// the 'trace' log level is enabled. This is a compile-time no-op when the +/// 'internal-instrument-pikevm' feature isn't enabled. The intent here is that +/// this makes it easier to avoid doing extra work when instrumentation isn't +/// enabled. +/// +/// This macro accepts a closure of type `|&mut Counters|`. The closure can +/// then increment counters (or whatever) in accordance with what one wants +/// to track. +macro_rules! instrument { + ($fun:expr) => { + #[cfg(feature = "internal-instrument-pikevm")] + { + let fun: &mut dyn FnMut(&mut Counters) = &mut $fun; + COUNTERS.with(|c: &RefCell| fun(&mut *c.borrow_mut())); + } + }; +} + +#[cfg(feature = "internal-instrument-pikevm")] +std::thread_local! { + /// Effectively global state used to keep track of instrumentation + /// counters. The "proper" way to do this is to thread it through the + /// PikeVM, but it makes the code quite icky. Since this is just a + /// debugging feature, we're content to relegate it to thread local + /// state. When instrumentation is enabled, the counters are reset at the + /// beginning of every search and printed (with the 'trace' log level) at + /// the end of every search. + static COUNTERS: RefCell = RefCell::new(Counters::empty()); +} + +/// The configuration used for building a [`PikeVM`]. +/// +/// A PikeVM configuration is a simple data object that is typically used with +/// [`Builder::configure`]. It can be cheaply cloned. +/// +/// A default configuration can be created either with `Config::new`, or +/// perhaps more conveniently, with [`PikeVM::config`]. +#[derive(Clone, Debug, Default)] +pub struct Config { + match_kind: Option, + pre: Option>, +} + +impl Config { + /// Return a new default PikeVM configuration. + pub fn new() -> Config { + Config::default() + } + + /// Set the desired match semantics. + /// + /// The default is [`MatchKind::LeftmostFirst`], which corresponds to the + /// match semantics of Perl-like regex engines. That is, when multiple + /// patterns would match at the same leftmost position, the pattern that + /// appears first in the concrete syntax is chosen. + /// + /// Currently, the only other kind of match semantics supported is + /// [`MatchKind::All`]. This corresponds to "classical DFA" construction + /// where all possible matches are visited in the NFA by the `PikeVM`. + /// + /// Typically, `All` is used when one wants to execute an overlapping + /// search and `LeftmostFirst` otherwise. In particular, it rarely makes + /// sense to use `All` with the various "leftmost" find routines, since the + /// leftmost routines depend on the `LeftmostFirst` automata construction + /// strategy. Specifically, `LeftmostFirst` results in the `PikeVM` + /// simulating dead states as a way to terminate the search and report a + /// match. `LeftmostFirst` also supports non-greedy matches using this + /// strategy where as `All` does not. + pub fn match_kind(mut self, kind: MatchKind) -> Config { + self.match_kind = Some(kind); + self + } + + /// Set a prefilter to be used whenever a start state is entered. + /// + /// A [`Prefilter`] in this context is meant to accelerate searches by + /// looking for literal prefixes that every match for the corresponding + /// pattern (or patterns) must start with. Once a prefilter produces a + /// match, the underlying search routine continues on to try and confirm + /// the match. + /// + /// Be warned that setting a prefilter does not guarantee that the search + /// will be faster. While it's usually a good bet, if the prefilter + /// produces a lot of false positive candidates (i.e., positions matched + /// by the prefilter but not by the regex), then the overall result can + /// be slower than if you had just executed the regex engine without any + /// prefilters. + /// + /// By default no prefilter is set. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::prefilter::Prefilter, + /// Input, Match, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "bar"]); + /// let re = PikeVM::builder() + /// .configure(PikeVM::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("foo1 barfox bar"); + /// assert_eq!(Some(Match::must(0, 5..11)), re.find(&mut cache, input)); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Be warned though that an incorrect prefilter can lead to incorrect + /// results! + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::prefilter::Prefilter, + /// Input, HalfMatch, MatchKind, + /// }; + /// + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["foo", "car"]); + /// let re = PikeVM::builder() + /// .configure(PikeVM::config().prefilter(pre)) + /// .build(r"(foo|bar)[a-z]+")?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("foo1 barfox bar"); + /// // No match reported even though there clearly is one! + /// assert_eq!(None, re.find(&mut cache, input)); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn prefilter(mut self, pre: Option) -> Config { + self.pre = Some(pre); + self + } + + /// Returns the match semantics set in this configuration. + pub fn get_match_kind(&self) -> MatchKind { + self.match_kind.unwrap_or(MatchKind::LeftmostFirst) + } + + /// Returns the prefilter set in this configuration, if one at all. + pub fn get_prefilter(&self) -> Option<&Prefilter> { + self.pre.as_ref().unwrap_or(&None).as_ref() + } + + /// Overwrite the default configuration such that the options in `o` are + /// always used. If an option in `o` is not set, then the corresponding + /// option in `self` is used. If it's not set in `self` either, then it + /// remains not set. + pub(crate) fn overwrite(&self, o: Config) -> Config { + Config { + match_kind: o.match_kind.or(self.match_kind), + pre: o.pre.or_else(|| self.pre.clone()), + } + } +} + +/// A builder for a `PikeVM`. +/// +/// This builder permits configuring options for the syntax of a pattern, +/// the NFA construction and the `PikeVM` construction. This builder is +/// different from a general purpose regex builder in that it permits fine +/// grain configuration of the construction process. The trade off for this is +/// complexity, and the possibility of setting a configuration that might not +/// make sense. For example, there are two different UTF-8 modes: +/// +/// * [`util::syntax::Config::utf8`](crate::util::syntax::Config::utf8) +/// controls whether the pattern itself can contain sub-expressions that match +/// invalid UTF-8. +/// * [`thompson::Config::utf8`] controls whether empty matches that split a +/// Unicode codepoint are reported or not. +/// +/// Generally speaking, callers will want to either enable all of these or +/// disable all of these. +/// +/// # Example +/// +/// This example shows how to disable UTF-8 mode in the syntax and the regex +/// itself. This is generally what you want for matching on arbitrary bytes. +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::{self, pikevm::PikeVM}, +/// util::syntax, +/// Match, +/// }; +/// +/// let re = PikeVM::builder() +/// .syntax(syntax::Config::new().utf8(false)) +/// .thompson(thompson::Config::new().utf8(false)) +/// .build(r"foo(?-u:[^b])ar.*")?; +/// let mut cache = re.create_cache(); +/// +/// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; +/// let expected = Some(Match::must(0, 1..9)); +/// let got = re.find_iter(&mut cache, haystack).next(); +/// assert_eq!(expected, got); +/// // Notice that `(?-u:[^b])` matches invalid UTF-8, +/// // but the subsequent `.*` does not! Disabling UTF-8 +/// // on the syntax permits this. +/// // +/// // N.B. This example does not show the impact of +/// // disabling UTF-8 mode on a PikeVM Config, since that +/// // only impacts regexes that can produce matches of +/// // length 0. +/// assert_eq!(b"foo\xFFarzz", &haystack[got.unwrap().range()]); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Builder { + config: Config, + #[cfg(feature = "syntax")] + thompson: thompson::Compiler, +} + +impl Builder { + /// Create a new PikeVM builder with its default configuration. + pub fn new() -> Builder { + Builder { + config: Config::default(), + #[cfg(feature = "syntax")] + thompson: thompson::Compiler::new(), + } + } + + /// Build a `PikeVM` from the given pattern. + /// + /// If there was a problem parsing or compiling the pattern, then an error + /// is returned. + #[cfg(feature = "syntax")] + pub fn build(&self, pattern: &str) -> Result { + self.build_many(&[pattern]) + } + + /// Build a `PikeVM` from the given patterns. + #[cfg(feature = "syntax")] + pub fn build_many>( + &self, + patterns: &[P], + ) -> Result { + let nfa = self.thompson.build_many(patterns)?; + self.build_from_nfa(nfa) + } + + /// Build a `PikeVM` directly from its NFA. + /// + /// Note that when using this method, any configuration that applies to the + /// construction of the NFA itself will of course be ignored, since the NFA + /// given here is already built. + pub fn build_from_nfa(&self, nfa: NFA) -> Result { + nfa.look_set_any().available().map_err(BuildError::word)?; + Ok(PikeVM { config: self.config.clone(), nfa }) + } + + /// Apply the given `PikeVM` configuration options to this builder. + pub fn configure(&mut self, config: Config) -> &mut Builder { + self.config = self.config.overwrite(config); + self + } + + /// Set the syntax configuration for this builder using + /// [`syntax::Config`](crate::util::syntax::Config). + /// + /// This permits setting things like case insensitivity, Unicode and multi + /// line mode. + /// + /// These settings only apply when constructing a PikeVM directly from a + /// pattern. + #[cfg(feature = "syntax")] + pub fn syntax( + &mut self, + config: crate::util::syntax::Config, + ) -> &mut Builder { + self.thompson.syntax(config); + self + } + + /// Set the Thompson NFA configuration for this builder using + /// [`nfa::thompson::Config`](crate::nfa::thompson::Config). + /// + /// This permits setting things like if additional time should be spent + /// shrinking the size of the NFA. + /// + /// These settings only apply when constructing a PikeVM directly from a + /// pattern. + #[cfg(feature = "syntax")] + pub fn thompson(&mut self, config: thompson::Config) -> &mut Builder { + self.thompson.configure(config); + self + } +} + +/// A virtual machine for executing regex searches with capturing groups. +/// +/// # Infallible APIs +/// +/// Unlike most other regex engines in this crate, a `PikeVM` never returns an +/// error at search time. It supports all [`Anchored`] configurations, never +/// quits and works on haystacks of arbitrary length. +/// +/// There are two caveats to mention though: +/// +/// * If an invalid pattern ID is given to a search via [`Anchored::Pattern`], +/// then the PikeVM will report "no match." This is consistent with all other +/// regex engines in this crate. +/// * When using [`PikeVM::which_overlapping_matches`] with a [`PatternSet`] +/// that has insufficient capacity to store all valid pattern IDs, then if a +/// match occurs for a `PatternID` that cannot be inserted, it is silently +/// dropped as if it did not match. +/// +/// # Advice +/// +/// The `PikeVM` is generally the most "powerful" regex engine in this crate. +/// "Powerful" in this context means that it can handle any regular expression +/// that is parseable by `regex-syntax` and any size haystack. Regretably, +/// the `PikeVM` is also simultaneously often the _slowest_ regex engine in +/// practice. This results in an annoying situation where one generally tries +/// to pick any other regex engine (or perhaps none at all) before being +/// forced to fall back to a `PikeVM`. +/// +/// For example, a common strategy for dealing with capturing groups is to +/// actually look for the overall match of the regex using a faster regex +/// engine, like a [lazy DFA](crate::hybrid::regex::Regex). Once the overall +/// match is found, one can then run the `PikeVM` on just the match span to +/// find the spans of the capturing groups. In this way, the faster regex +/// engine does the majority of the work, while the `PikeVM` only lends its +/// power in a more limited role. +/// +/// Unfortunately, this isn't always possible because the faster regex engines +/// don't support all of the regex features in `regex-syntax`. This notably +/// includes (and is currently limited to) Unicode word boundaries. So if +/// your pattern has Unicode word boundaries, you typically can't use a +/// DFA-based regex engine at all (unless you [enable heuristic support for +/// it](crate::hybrid::dfa::Config::unicode_word_boundary)). (The [one-pass +/// DFA](crate::dfa::onepass::DFA) can handle Unicode word boundaries for +/// anchored searches only, but in a cruel sort of joke, many Unicode features +/// tend to result in making the regex _not_ one-pass.) +/// +/// # Example +/// +/// This example shows that the `PikeVM` implements Unicode word boundaries +/// correctly by default. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; +/// +/// let re = PikeVM::new(r"\b\w+\b")?; +/// let mut cache = re.create_cache(); +/// +/// let mut it = re.find_iter(&mut cache, "Шерлок Холмс"); +/// assert_eq!(Some(Match::must(0, 0..12)), it.next()); +/// assert_eq!(Some(Match::must(0, 13..23)), it.next()); +/// assert_eq!(None, it.next()); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct PikeVM { + config: Config, + nfa: NFA, +} + +impl PikeVM { + /// Parse the given regular expression using the default configuration and + /// return the corresponding `PikeVM`. + /// + /// If you want a non-default configuration, then use the [`Builder`] to + /// set your own configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::new("foo[0-9]+bar")?; + /// let mut cache = re.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 3..14)), + /// re.find_iter(&mut cache, "zzzfoo12345barzzz").next(), + /// ); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new(pattern: &str) -> Result { + PikeVM::builder().build(pattern) + } + + /// Like `new`, but parses multiple patterns into a single "multi regex." + /// This similarly uses the default regex configuration. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::new_many(&["[a-z]+", "[0-9]+"])?; + /// let mut cache = re.create_cache(); + /// + /// let mut it = re.find_iter(&mut cache, "abc 1 foo 4567 0 quux"); + /// assert_eq!(Some(Match::must(0, 0..3)), it.next()); + /// assert_eq!(Some(Match::must(1, 4..5)), it.next()); + /// assert_eq!(Some(Match::must(0, 6..9)), it.next()); + /// assert_eq!(Some(Match::must(1, 10..14)), it.next()); + /// assert_eq!(Some(Match::must(1, 15..16)), it.next()); + /// assert_eq!(Some(Match::must(0, 17..21)), it.next()); + /// assert_eq!(None, it.next()); + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn new_many>( + patterns: &[P], + ) -> Result { + PikeVM::builder().build_many(patterns) + } + + /// Like `new`, but builds a PikeVM directly from an NFA. This is useful + /// if you already have an NFA, or even if you hand-assembled the NFA. + /// + /// # Example + /// + /// This shows how to hand assemble a regular expression via its HIR, + /// compile an NFA from it and build a PikeVM from the NFA. + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, pikevm::PikeVM}, Match}; + /// use regex_syntax::hir::{Hir, Class, ClassBytes, ClassBytesRange}; + /// + /// let hir = Hir::class(Class::Bytes(ClassBytes::new(vec![ + /// ClassBytesRange::new(b'0', b'9'), + /// ClassBytesRange::new(b'A', b'Z'), + /// ClassBytesRange::new(b'_', b'_'), + /// ClassBytesRange::new(b'a', b'z'), + /// ]))); + /// + /// let config = NFA::config().nfa_size_limit(Some(1_000)); + /// let nfa = NFA::compiler().configure(config).build_from_hir(&hir)?; + /// + /// let re = PikeVM::new_from_nfa(nfa)?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let expected = Some(Match::must(0, 3..4)); + /// re.captures(&mut cache, "!@#A#@!", &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn new_from_nfa(nfa: NFA) -> Result { + PikeVM::builder().build_from_nfa(nfa) + } + + /// Create a new `PikeVM` that matches every input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::always_match()?; + /// let mut cache = re.create_cache(); + /// + /// let expected = Match::must(0, 0..0); + /// assert_eq!(Some(expected), re.find_iter(&mut cache, "").next()); + /// assert_eq!(Some(expected), re.find_iter(&mut cache, "foo").next()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn always_match() -> Result { + let nfa = thompson::NFA::always_match(); + PikeVM::new_from_nfa(nfa) + } + + /// Create a new `PikeVM` that never matches any input. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::never_match()?; + /// let mut cache = re.create_cache(); + /// + /// assert_eq!(None, re.find_iter(&mut cache, "").next()); + /// assert_eq!(None, re.find_iter(&mut cache, "foo").next()); + /// # Ok::<(), Box>(()) + /// ``` + pub fn never_match() -> Result { + let nfa = thompson::NFA::never_match(); + PikeVM::new_from_nfa(nfa) + } + + /// Return a default configuration for a `PikeVM`. + /// + /// This is a convenience routine to avoid needing to import the `Config` + /// type when customizing the construction of a `PikeVM`. + /// + /// # Example + /// + /// This example shows how to disable UTF-8 mode. When UTF-8 mode is + /// disabled, zero-width matches that split a codepoint are allowed. + /// Otherwise they are never reported. + /// + /// In the code below, notice that `""` is permitted to match positions + /// that split the encoding of a codepoint. + /// + /// ``` + /// use regex_automata::{nfa::thompson::{self, pikevm::PikeVM}, Match}; + /// + /// let re = PikeVM::builder() + /// .thompson(thompson::Config::new().utf8(false)) + /// .build(r"")?; + /// let mut cache = re.create_cache(); + /// + /// let haystack = "a☃z"; + /// let mut it = re.find_iter(&mut cache, haystack); + /// assert_eq!(Some(Match::must(0, 0..0)), it.next()); + /// assert_eq!(Some(Match::must(0, 1..1)), it.next()); + /// assert_eq!(Some(Match::must(0, 2..2)), it.next()); + /// assert_eq!(Some(Match::must(0, 3..3)), it.next()); + /// assert_eq!(Some(Match::must(0, 4..4)), it.next()); + /// assert_eq!(Some(Match::must(0, 5..5)), it.next()); + /// assert_eq!(None, it.next()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn config() -> Config { + Config::new() + } + + /// Return a builder for configuring the construction of a `PikeVM`. + /// + /// This is a convenience routine to avoid needing to import the + /// [`Builder`] type in common cases. + /// + /// # Example + /// + /// This example shows how to use the builder to disable UTF-8 mode + /// everywhere. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::{self, pikevm::PikeVM}, + /// util::syntax, + /// Match, + /// }; + /// + /// let re = PikeVM::builder() + /// .syntax(syntax::Config::new().utf8(false)) + /// .thompson(thompson::Config::new().utf8(false)) + /// .build(r"foo(?-u:[^b])ar.*")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let haystack = b"\xFEfoo\xFFarzz\xE2\x98\xFF\n"; + /// let expected = Some(Match::must(0, 1..9)); + /// re.captures(&mut cache, haystack, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn builder() -> Builder { + Builder::new() + } + + /// Create a new empty set of capturing groups that is guaranteed to be + /// valid for the search APIs on this `PikeVM`. + /// + /// A `Captures` value created for a specific `PikeVM` cannot be used with + /// any other `PikeVM`. + /// + /// This is a convenience function for [`Captures::all`]. See the + /// [`Captures`] documentation for an explanation of its alternative + /// constructors that permit the `PikeVM` to do less work during a search, + /// and thus might make it faster. + pub fn create_captures(&self) -> Captures { + Captures::all(self.get_nfa().group_info().clone()) + } + + /// Create a new cache for this `PikeVM`. + /// + /// The cache returned should only be used for searches for this + /// `PikeVM`. If you want to reuse the cache for another `PikeVM`, then + /// you must call [`Cache::reset`] with that `PikeVM` (or, equivalently, + /// [`PikeVM::reset_cache`]). + pub fn create_cache(&self) -> Cache { + Cache::new(self) + } + + /// Reset the given cache such that it can be used for searching with the + /// this `PikeVM` (and only this `PikeVM`). + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different `PikeVM`. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different `PikeVM`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re1 = PikeVM::new(r"\w")?; + /// let re2 = PikeVM::new(r"\W")?; + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 0..2)), + /// re1.find_iter(&mut cache, "Δ").next(), + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the PikeVM we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// re2.reset_cache(&mut cache); + /// assert_eq!( + /// Some(Match::must(0, 0..3)), + /// re2.find_iter(&mut cache, "☃").next(), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset_cache(&self, cache: &mut Cache) { + cache.reset(self); + } + + /// Returns the total number of patterns compiled into this `PikeVM`. + /// + /// In the case of a `PikeVM` that contains no patterns, this returns `0`. + /// + /// # Example + /// + /// This example shows the pattern length for a `PikeVM` that never + /// matches: + /// + /// ``` + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::never_match()?; + /// assert_eq!(re.pattern_len(), 0); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And another example for a `PikeVM` that matches at every position: + /// + /// ``` + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::always_match()?; + /// assert_eq!(re.pattern_len(), 1); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// And finally, a `PikeVM` that was constructed from multiple patterns: + /// + /// ``` + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?; + /// assert_eq!(re.pattern_len(), 3); + /// # Ok::<(), Box>(()) + /// ``` + pub fn pattern_len(&self) -> usize { + self.nfa.pattern_len() + } + + /// Return the config for this `PikeVM`. + #[inline] + pub fn get_config(&self) -> &Config { + &self.config + } + + /// Returns a reference to the underlying NFA. + #[inline] + pub fn get_nfa(&self) -> &NFA { + &self.nfa + } +} + +impl PikeVM { + /// Returns true if and only if this `PikeVM` matches the given haystack. + /// + /// This routine may short circuit if it knows that scanning future + /// input will never lead to a different result. In particular, if the + /// underlying NFA enters a match state, then this routine will return + /// `true` immediately without inspecting any future input. (Consider how + /// this might make a difference given the regex `a+` on the haystack + /// `aaaaaaaaaaaaaaa`. This routine can stop after it sees the first `a`, + /// but routines like `find` need to continue searching because `+` is + /// greedy by default.) + /// + /// # Example + /// + /// This shows basic usage: + /// + /// ``` + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::new("foo[0-9]+bar")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.is_match(&mut cache, "foo12345bar")); + /// assert!(!re.is_match(&mut cache, "foobar")); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: consistency with search APIs + /// + /// `is_match` is guaranteed to return `true` whenever `find` returns a + /// match. This includes searches that are executed entirely within a + /// codepoint: + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Input}; + /// + /// let re = PikeVM::new("a*")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(!re.is_match(&mut cache, Input::new("☃").span(1..2))); + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Notice that when UTF-8 mode is disabled, then the above reports a + /// match because the restriction against zero-width matches that split a + /// codepoint has been lifted: + /// + /// ``` + /// use regex_automata::{nfa::thompson::{pikevm::PikeVM, NFA}, Input}; + /// + /// let re = PikeVM::builder() + /// .thompson(NFA::config().utf8(false)) + /// .build("a*")?; + /// let mut cache = re.create_cache(); + /// + /// assert!(re.is_match(&mut cache, Input::new("☃").span(1..2))); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_match<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> bool { + let input = input.into().earliest(true); + self.search_slots(cache, &input, &mut []).is_some() + } + + /// Executes a leftmost forward search and returns a `Match` if one exists. + /// + /// This routine only includes the overall match span. To get access to the + /// individual spans of each capturing group, use [`PikeVM::captures`]. + /// + /// # Example + /// + /// Leftmost first match semantics corresponds to the match with the + /// smallest starting offset, but where the end offset is determined by + /// preferring earlier branches in the original regular expression. For + /// example, `Sam|Samwise` will match `Sam` in `Samwise`, but `Samwise|Sam` + /// will match `Samwise` in `Samwise`. + /// + /// Generally speaking, the "leftmost first" match is how most backtracking + /// regular expressions tend to work. This is in contrast to POSIX-style + /// regular expressions that yield "leftmost longest" matches. Namely, + /// both `Sam|Samwise` and `Samwise|Sam` match `Samwise` when using + /// leftmost longest semantics. (This crate does not currently support + /// leftmost longest semantics.) + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::new("foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// let expected = Match::must(0, 0..8); + /// assert_eq!(Some(expected), re.find(&mut cache, "foo12345")); + /// + /// // Even though a match is found after reading the first byte (`a`), + /// // the leftmost first match semantics demand that we find the earliest + /// // match that prefers earlier parts of the pattern over later parts. + /// let re = PikeVM::new("abc|a")?; + /// let mut cache = re.create_cache(); + /// let expected = Match::must(0, 0..3); + /// assert_eq!(Some(expected), re.find(&mut cache, "abc")); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + ) -> Option { + let input = input.into(); + if self.get_nfa().pattern_len() == 1 { + let mut slots = [None, None]; + let pid = self.search_slots(cache, &input, &mut slots)?; + let start = slots[0]?.get(); + let end = slots[1]?.get(); + return Some(Match::new(pid, Span { start, end })); + } + let ginfo = self.get_nfa().group_info(); + let slots_len = ginfo.implicit_slot_len(); + let mut slots = vec![None; slots_len]; + let pid = self.search_slots(cache, &input, &mut slots)?; + let start = slots[pid.as_usize() * 2]?.get(); + let end = slots[pid.as_usize() * 2 + 1]?.get(); + Some(Match::new(pid, Span { start, end })) + } + + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided [`Captures`] + /// value. If no match was found, then [`Captures::is_match`] is guaranteed + /// to return `false`. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span}; + /// + /// let re = PikeVM::new(r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "2010-03-14", &mut caps); + /// assert!(caps.is_match()); + /// assert_eq!(Some(Span::from(0..4)), caps.get_group(1)); + /// assert_eq!(Some(Span::from(5..7)), caps.get_group(2)); + /// assert_eq!(Some(Span::from(8..10)), caps.get_group(3)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn captures<'h, I: Into>>( + &self, + cache: &mut Cache, + input: I, + caps: &mut Captures, + ) { + self.search(cache, &input.into(), caps) + } + + /// Returns an iterator over all non-overlapping leftmost matches in the + /// given bytes. If no match exists, then the iterator yields no elements. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::new("foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// + /// let text = "foo1 foo12 foo123"; + /// let matches: Vec = re.find_iter(&mut cache, text).collect(); + /// assert_eq!(matches, vec![ + /// Match::must(0, 0..4), + /// Match::must(0, 5..10), + /// Match::must(0, 11..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find_iter<'r, 'c, 'h, I: Into>>( + &'r self, + cache: &'c mut Cache, + input: I, + ) -> FindMatches<'r, 'c, 'h> { + let caps = Captures::matches(self.get_nfa().group_info().clone()); + let it = iter::Searcher::new(input.into()); + FindMatches { re: self, cache, caps, it } + } + + /// Returns an iterator over all non-overlapping `Captures` values. If no + /// match exists, then the iterator yields no elements. + /// + /// This yields the same matches as [`PikeVM::find_iter`], but it includes + /// the spans of all capturing groups that participate in each match. + /// + /// **Tip:** See [`util::iter::Searcher`](crate::util::iter::Searcher) for + /// how to correctly iterate over all matches in a haystack while avoiding + /// the creation of a new `Captures` value for every match. (Which you are + /// forced to do with an `Iterator`.) + /// + /// # Example + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span}; + /// + /// let re = PikeVM::new("foo(?P[0-9]+)")?; + /// let mut cache = re.create_cache(); + /// + /// let text = "foo1 foo12 foo123"; + /// let matches: Vec = re + /// .captures_iter(&mut cache, text) + /// // The unwrap is OK since 'numbers' matches if the pattern matches. + /// .map(|caps| caps.get_group_by_name("numbers").unwrap()) + /// .collect(); + /// assert_eq!(matches, vec![ + /// Span::from(3..4), + /// Span::from(8..10), + /// Span::from(14..17), + /// ]); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn captures_iter<'r, 'c, 'h, I: Into>>( + &'r self, + cache: &'c mut Cache, + input: I, + ) -> CapturesMatches<'r, 'c, 'h> { + let caps = self.create_captures(); + let it = iter::Searcher::new(input.into()); + CapturesMatches { re: self, cache, caps, it } + } +} + +impl PikeVM { + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided [`Captures`] + /// value. If no match was found, then [`Captures::is_match`] is guaranteed + /// to return `false`. + /// + /// This is like [`PikeVM::captures`], but it accepts a concrete `&Input` + /// instead of an `Into`. + /// + /// # Example: specific pattern search + /// + /// This example shows how to build a multi-PikeVM that permits searching + /// for specific patterns. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// Anchored, Match, PatternID, Input, + /// }; + /// + /// let re = PikeVM::new_many(&["[a-z0-9]{6}", "[a-z][a-z0-9]{5}"])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "foo123"; + /// + /// // Since we are using the default leftmost-first match and both + /// // patterns match at the same starting position, only the first pattern + /// // will be returned in this case when doing a search for any of the + /// // patterns. + /// let expected = Some(Match::must(0, 0..6)); + /// re.search(&mut cache, &Input::new(haystack), &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we want to check whether some other pattern matches, then we + /// // can provide its pattern ID. + /// let expected = Some(Match::must(1, 0..6)); + /// let input = Input::new(haystack) + /// .anchored(Anchored::Pattern(PatternID::must(1))); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: specifying the bounds of a search + /// + /// This example shows how providing the bounds of a search can produce + /// different results than simply sub-slicing the haystack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match, Input}; + /// + /// let re = PikeVM::new(r"\b[0-9]{3}\b")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let haystack = "foo123bar"; + /// + /// // Since we sub-slice the haystack, the search doesn't know about + /// // the larger context and assumes that `123` is surrounded by word + /// // boundaries. And of course, the match position is reported relative + /// // to the sub-slice as well, which means we get `0..3` instead of + /// // `3..6`. + /// let expected = Some(Match::must(0, 0..3)); + /// re.search(&mut cache, &Input::new(&haystack[3..6]), &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// // But if we provide the bounds of the search within the context of the + /// // entire haystack, then the search can take the surrounding context + /// // into account. (And if we did find a match, it would be reported + /// // as a valid offset into `haystack` instead of its sub-slice.) + /// let expected = None; + /// let input = Input::new(haystack).range(3..6); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search( + &self, + cache: &mut Cache, + input: &Input<'_>, + caps: &mut Captures, + ) { + caps.set_pattern(None); + let pid = self.search_slots(cache, input, caps.slots_mut()); + caps.set_pattern(pid); + } + + /// Executes a leftmost forward search and writes the spans of capturing + /// groups that participated in a match into the provided `slots`, and + /// returns the matching pattern ID. The contents of the slots for patterns + /// other than the matching pattern are unspecified. If no match was found, + /// then `None` is returned and the contents of `slots` is unspecified. + /// + /// This is like [`PikeVM::search`], but it accepts a raw slots slice + /// instead of a `Captures` value. This is useful in contexts where you + /// don't want or need to allocate a `Captures`. + /// + /// It is legal to pass _any_ number of slots to this routine. If the regex + /// engine would otherwise write a slot offset that doesn't fit in the + /// provided slice, then it is simply skipped. In general though, there are + /// usually three slice lengths you might want to use: + /// + /// * An empty slice, if you only care about which pattern matched. + /// * A slice with + /// [`pattern_len() * 2`](crate::nfa::thompson::NFA::pattern_len) + /// slots, if you only care about the overall match spans for each matching + /// pattern. + /// * A slice with + /// [`slot_len()`](crate::util::captures::GroupInfo::slot_len) slots, which + /// permits recording match offsets for every capturing group in every + /// pattern. + /// + /// # Example + /// + /// This example shows how to find the overall match offsets in a + /// multi-pattern search without allocating a `Captures` value. Indeed, we + /// can put our slots right on the stack. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID, Input}; + /// + /// let re = PikeVM::new_many(&[ + /// r"\pL+", + /// r"\d+", + /// ])?; + /// let mut cache = re.create_cache(); + /// let input = Input::new("!@#123"); + /// + /// // We only care about the overall match offsets here, so we just + /// // allocate two slots for each pattern. Each slot records the start + /// // and end of the match. + /// let mut slots = [None; 4]; + /// let pid = re.search_slots(&mut cache, &input, &mut slots); + /// assert_eq!(Some(PatternID::must(1)), pid); + /// + /// // The overall match offsets are always at 'pid * 2' and 'pid * 2 + 1'. + /// // See 'GroupInfo' for more details on the mapping between groups and + /// // slot indices. + /// let slot_start = pid.unwrap().as_usize() * 2; + /// let slot_end = slot_start + 1; + /// assert_eq!(Some(3), slots[slot_start].map(|s| s.get())); + /// assert_eq!(Some(6), slots[slot_end].map(|s| s.get())); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn search_slots( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + if !utf8empty { + let hm = self.search_slots_imp(cache, input, slots)?; + return Some(hm.pattern()); + } + // There is an unfortunate special case where if the regex can + // match the empty string and UTF-8 mode is enabled, the search + // implementation requires that the slots have at least as much space + // to report the bounds of any match. This is so zero-width matches + // that split a codepoint can be filtered out. + // + // Note that if utf8empty is true, we specialize the case for when + // the number of patterns is 1. In that case, we can just use a stack + // allocation. Otherwise we resort to a heap allocation, which we + // convince ourselves we're fine with due to the pathological nature of + // this case. + let min = self.get_nfa().group_info().implicit_slot_len(); + if slots.len() >= min { + let hm = self.search_slots_imp(cache, input, slots)?; + return Some(hm.pattern()); + } + if self.get_nfa().pattern_len() == 1 { + let mut enough = [None, None]; + let got = self.search_slots_imp(cache, input, &mut enough); + // This is OK because we know `enough` is strictly bigger than + // `slots`, otherwise this special case isn't reached. + slots.copy_from_slice(&enough[..slots.len()]); + return got.map(|hm| hm.pattern()); + } + let mut enough = vec![None; min]; + let got = self.search_slots_imp(cache, input, &mut enough); + // This is OK because we know `enough` is strictly bigger than `slots`, + // otherwise this special case isn't reached. + slots.copy_from_slice(&enough[..slots.len()]); + got.map(|hm| hm.pattern()) + } + + /// This is the actual implementation of `search_slots_imp` that + /// doesn't account for the special case when 1) the NFA has UTF-8 mode + /// enabled, 2) the NFA can match the empty string and 3) the caller has + /// provided an insufficient number of slots to record match offsets. + #[inline(never)] + fn search_slots_imp( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + let hm = match self.search_imp(cache, input, slots) { + None => return None, + Some(hm) if !utf8empty => return Some(hm), + Some(hm) => hm, + }; + empty::skip_splits_fwd(input, hm, hm.offset(), |input| { + Ok(self + .search_imp(cache, input, slots) + .map(|hm| (hm, hm.offset()))) + }) + // OK because the PikeVM never errors. + .unwrap() + } + + /// Writes the set of patterns that match anywhere in the given search + /// configuration to `patset`. If multiple patterns match at the same + /// position and this `PikeVM` was configured with [`MatchKind::All`] + /// semantics, then all matching patterns are written to the given set. + /// + /// Unless all of the patterns in this `PikeVM` are anchored, then + /// generally speaking, this will visit every byte in the haystack. + /// + /// This search routine *does not* clear the pattern set. This gives some + /// flexibility to the caller (e.g., running multiple searches with the + /// same pattern set), but does make the API bug-prone if you're reusing + /// the same pattern set for multiple searches but intended them to be + /// independent. + /// + /// If a pattern ID matched but the given `PatternSet` does not have + /// sufficient capacity to store it, then it is not inserted and silently + /// dropped. + /// + /// # Example + /// + /// This example shows how to find all matching patterns in a haystack, + /// even when some patterns match at the same position as other patterns. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// Input, MatchKind, PatternSet, + /// }; + /// + /// let patterns = &[ + /// r"\w+", r"\d+", r"\pL+", r"foo", r"bar", r"barfoo", r"foobar", + /// ]; + /// let re = PikeVM::builder() + /// .configure(PikeVM::config().match_kind(MatchKind::All)) + /// .build_many(patterns)?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new("foobar"); + /// let mut patset = PatternSet::new(re.pattern_len()); + /// re.which_overlapping_matches(&mut cache, &input, &mut patset); + /// let expected = vec![0, 2, 3, 4, 6]; + /// let got: Vec = patset.iter().map(|p| p.as_usize()).collect(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn which_overlapping_matches( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + self.which_overlapping_imp(cache, input, patset) + } +} + +impl PikeVM { + /// The implementation of standard leftmost search. + /// + /// Capturing group spans are written to `slots`, but only if requested. + /// `slots` can be any length. Any slot in the NFA that is activated but + /// which is out of bounds for the given `slots` is ignored. + fn search_imp( + &self, + cache: &mut Cache, + input: &Input<'_>, + slots: &mut [Option], + ) -> Option { + cache.setup_search(slots.len()); + if input.is_done() { + return None; + } + // Why do we even care about this? Well, in our 'Captures' + // representation, we use usize::MAX as a sentinel to indicate "no + // match." This isn't problematic so long as our haystack doesn't have + // a maximal length. Byte slices are guaranteed by Rust to have a + // length that fits into isize, and so this assert should always pass. + // But we put it here to make our assumption explicit. + assert!( + input.haystack().len() < core::usize::MAX, + "byte slice lengths must be less than usize MAX", + ); + instrument!(|c| c.reset(&self.nfa)); + + // Whether we want to visit all match states instead of emulating the + // 'leftmost' semantics of typical backtracking regex engines. + let allmatches = + self.config.get_match_kind().continue_past_first_match(); + let (anchored, start_id) = match self.start_config(input) { + None => return None, + Some(config) => config, + }; + + let pre = + if anchored { None } else { self.get_config().get_prefilter() }; + let Cache { ref mut stack, ref mut curr, ref mut next } = cache; + let mut hm = None; + // Yes, our search doesn't end at input.end(), but includes it. This + // is necessary because matches are delayed by one byte, just like + // how the DFA engines work. The delay is used to handle look-behind + // assertions. In the case of the PikeVM, the delay is implemented + // by not considering a match to exist until it is visited in + // 'steps'. Technically, we know a match exists in the previous + // iteration via 'epsilon_closure'. (It's the same thing in NFA-to-DFA + // determinization. We don't mark a DFA state as a match state if it + // contains an NFA match state, but rather, whether the DFA state was + // generated by a transition from a DFA state that contains an NFA + // match state.) + let mut at = input.start(); + while at <= input.end() { + // If we have no states left to visit, then there are some cases + // where we know we can quit early or even skip ahead. + if curr.set.is_empty() { + // We have a match and we haven't been instructed to continue + // on even after finding a match, so we can quit. + if hm.is_some() && !allmatches { + break; + } + // If we're running an anchored search and we've advanced + // beyond the start position with no other states to try, then + // we will never observe a match and thus can stop. + if anchored && at > input.start() { + break; + } + // If there no states left to explore at this position and we + // know we can't terminate early, then we are effectively at + // the starting state of the NFA. If we fell through here, + // we'd end up adding our '(?s-u:.)*?' prefix and it would be + // the only thing in 'curr'. So we might as well just skip + // ahead until we find something that we know might advance us + // forward. + if let Some(ref pre) = pre { + let span = Span::from(at..input.end()); + match pre.find(input.haystack(), span) { + None => break, + Some(ref span) => at = span.start, + } + } + } + // Instead of using the NFA's unanchored start state, we actually + // always use its anchored starting state. As a result, when doing + // an unanchored search, we need to simulate our own '(?s-u:.)*?' + // prefix, to permit a match to appear anywhere. + // + // Now, we don't *have* to do things this way. We could use the + // NFA's unanchored starting state and do one 'epsilon_closure' + // call from that starting state before the main loop here. And + // that is just as correct. However, it turns out to be slower + // than our approach here because it slightly increases the cost + // of processing each byte by requiring us to visit more NFA + // states to deal with the additional NFA states in the unanchored + // prefix. By simulating it explicitly here, we lower those costs + // substantially. The cost is itself small, but it adds up for + // large haystacks. + // + // In order to simulate the '(?s-u:.)*?' prefix---which is not + // greedy---we are careful not to perform an epsilon closure on + // the start state if we already have a match. Namely, if we + // did otherwise, we would never reach a terminating condition + // because there would always be additional states to process. + // In effect, the exclusion of running 'epsilon_closure' when + // we have a match corresponds to the "dead" states we have in + // our DFA regex engines. Namely, in a DFA, match states merely + // instruct the search execution to record the current offset as + // the most recently seen match. It is the dead state that actually + // indicates when to stop the search (other than EOF or quit + // states). + // + // However, when 'allmatches' is true, the caller has asked us to + // leave in every possible match state. This tends not to make a + // whole lot of sense in unanchored searches, because it means the + // search really cannot terminate until EOF. And often, in that + // case, you wind up skipping over a bunch of matches and are left + // with the "last" match. Arguably, it just doesn't make a lot of + // sense to run a 'leftmost' search (which is what this routine is) + // with 'allmatches' set to true. But the DFAs support it and this + // matches their behavior. (Generally, 'allmatches' is useful for + // overlapping searches or leftmost anchored searches to find the + // longest possible match by ignoring match priority.) + // + // Additionally, when we're running an anchored search, this + // epsilon closure should only be computed at the beginning of the + // search. If we re-computed it at every position, we would be + // simulating an unanchored search when we were tasked to perform + // an anchored search. + if (!hm.is_some() || allmatches) + && (!anchored || at == input.start()) + { + // Since we are adding to the 'curr' active states and since + // this is for the start ID, we use a slots slice that is + // guaranteed to have the right length but where every element + // is absent. This is exactly what we want, because this + // epsilon closure is responsible for simulating an unanchored + // '(?s:.)*?' prefix. It is specifically outside of any + // capturing groups, and thus, using slots that are always + // absent is correct. + // + // Note though that we can't just use '&mut []' here, since + // this epsilon closure may traverse through 'Captures' epsilon + // transitions, and thus must be able to write offsets to the + // slots given which are later copied to slot values in 'curr'. + let slots = next.slot_table.all_absent(); + self.epsilon_closure(stack, slots, curr, input, at, start_id); + } + if let Some(pid) = self.nexts(stack, curr, next, input, at, slots) + { + hm = Some(HalfMatch::new(pid, at)); + } + // Unless the caller asked us to return early, we need to mush on + // to see if we can extend our match. (But note that 'nexts' will + // quit right after seeing a match when match_kind==LeftmostFirst, + // as is consistent with leftmost-first match priority.) + if input.get_earliest() && hm.is_some() { + break; + } + core::mem::swap(curr, next); + next.set.clear(); + at += 1; + } + instrument!(|c| c.eprint(&self.nfa)); + hm + } + + /// The implementation for the 'which_overlapping_matches' API. Basically, + /// we do a single scan through the entire haystack (unless our regex + /// or search is anchored) and record every pattern that matched. In + /// particular, when MatchKind::All is used, this supports overlapping + /// matches. So if we have the regexes 'sam' and 'samwise', they will + /// *both* be reported in the pattern set when searching the haystack + /// 'samwise'. + fn which_overlapping_imp( + &self, + cache: &mut Cache, + input: &Input<'_>, + patset: &mut PatternSet, + ) { + // NOTE: This is effectively a copy of 'search_imp' above, but with no + // captures support and instead writes patterns that matched directly + // to 'patset'. See that routine for better commentary about what's + // going on in this routine. We probably could unify the routines using + // generics or more helper routines, but I'm not sure it's worth it. + // + // NOTE: We somewhat go out of our way here to support things like + // 'input.get_earliest()' and 'leftmost-first' match semantics. Neither + // of those seem particularly relevant to this routine, but they are + // both supported by the DFA analogs of this routine by construction + // and composition, so it seems like good sense to have the PikeVM + // match that behavior. + + cache.setup_search(0); + if input.is_done() { + return; + } + assert!( + input.haystack().len() < core::usize::MAX, + "byte slice lengths must be less than usize MAX", + ); + instrument!(|c| c.reset(&self.nfa)); + + let allmatches = + self.config.get_match_kind().continue_past_first_match(); + let (anchored, start_id) = match self.start_config(input) { + None => return, + Some(config) => config, + }; + + let Cache { ref mut stack, ref mut curr, ref mut next } = cache; + for at in input.start()..=input.end() { + let any_matches = !patset.is_empty(); + if curr.set.is_empty() { + if any_matches && !allmatches { + break; + } + if anchored && at > input.start() { + break; + } + } + if !any_matches || allmatches { + let slots = &mut []; + self.epsilon_closure(stack, slots, curr, input, at, start_id); + } + self.nexts_overlapping(stack, curr, next, input, at, patset); + // If we found a match and filled our set, then there is no more + // additional info that we can provide. Thus, we can quit. We also + // quit if the caller asked us to stop at the earliest point that + // we know a match exists. + if patset.is_full() || input.get_earliest() { + break; + } + core::mem::swap(curr, next); + next.set.clear(); + } + instrument!(|c| c.eprint(&self.nfa)); + } + + /// Process the active states in 'curr' to find the states (written to + /// 'next') we should process for the next byte in the haystack. + /// + /// 'stack' is used to perform a depth first traversal of the NFA when + /// computing an epsilon closure. + /// + /// When a match is found, the slots for that match state (in 'curr') are + /// copied to 'caps'. Moreover, once a match is seen, processing for 'curr' + /// stops (unless the PikeVM was configured with MatchKind::All semantics). + #[cfg_attr(feature = "perf-inline", inline(always))] + fn nexts( + &self, + stack: &mut Vec, + curr: &mut ActiveStates, + next: &mut ActiveStates, + input: &Input<'_>, + at: usize, + slots: &mut [Option], + ) -> Option { + instrument!(|c| c.record_state_set(&curr.set)); + let mut pid = None; + let ActiveStates { ref set, ref mut slot_table } = *curr; + for sid in set.iter() { + pid = match self.next(stack, slot_table, next, input, at, sid) { + None => continue, + Some(pid) => Some(pid), + }; + slots.copy_from_slice(slot_table.for_state(sid)); + if !self.config.get_match_kind().continue_past_first_match() { + break; + } + } + pid + } + + /// Like 'nexts', but for the overlapping case. This doesn't write any + /// slots, and instead just writes which pattern matched in 'patset'. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn nexts_overlapping( + &self, + stack: &mut Vec, + curr: &mut ActiveStates, + next: &mut ActiveStates, + input: &Input<'_>, + at: usize, + patset: &mut PatternSet, + ) { + instrument!(|c| c.record_state_set(&curr.set)); + let utf8empty = self.get_nfa().has_empty() && self.get_nfa().is_utf8(); + let ActiveStates { ref set, ref mut slot_table } = *curr; + for sid in set.iter() { + let pid = match self.next(stack, slot_table, next, input, at, sid) + { + None => continue, + Some(pid) => pid, + }; + // This handles the case of finding a zero-width match that splits + // a codepoint. Namely, if we're in UTF-8 mode AND we know we can + // match the empty string, then the only valid way of getting to + // this point with an offset that splits a codepoint is when we + // have an empty match. Such matches, in UTF-8 mode, must not be + // reported. So we just skip them here and pretend as if we did + // not see a match. + if utf8empty && !input.is_char_boundary(at) { + continue; + } + let _ = patset.try_insert(pid); + if !self.config.get_match_kind().continue_past_first_match() { + break; + } + } + } + + /// Starting from 'sid', if the position 'at' in the 'input' haystack has a + /// transition defined out of 'sid', then add the state transitioned to and + /// its epsilon closure to the 'next' set of states to explore. + /// + /// 'stack' is used by the epsilon closure computation to perform a depth + /// first traversal of the NFA. + /// + /// 'curr_slot_table' should be the table of slots for the current set of + /// states being explored. If there is a transition out of 'sid', then + /// sid's row in the slot table is used to perform the epsilon closure. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn next( + &self, + stack: &mut Vec, + curr_slot_table: &mut SlotTable, + next: &mut ActiveStates, + input: &Input<'_>, + at: usize, + sid: StateID, + ) -> Option { + instrument!(|c| c.record_step(sid)); + match *self.nfa.state(sid) { + State::Fail + | State::Look { .. } + | State::Union { .. } + | State::BinaryUnion { .. } + | State::Capture { .. } => None, + State::ByteRange { ref trans } => { + if trans.matches(input.haystack(), at) { + let slots = curr_slot_table.for_state(sid); + // OK because 'at <= haystack.len() < usize::MAX', so + // adding 1 will never wrap. + let at = at.wrapping_add(1); + self.epsilon_closure( + stack, slots, next, input, at, trans.next, + ); + } + None + } + State::Sparse(ref sparse) => { + if let Some(next_sid) = sparse.matches(input.haystack(), at) { + let slots = curr_slot_table.for_state(sid); + // OK because 'at <= haystack.len() < usize::MAX', so + // adding 1 will never wrap. + let at = at.wrapping_add(1); + self.epsilon_closure( + stack, slots, next, input, at, next_sid, + ); + } + None + } + State::Dense(ref dense) => { + if let Some(next_sid) = dense.matches(input.haystack(), at) { + let slots = curr_slot_table.for_state(sid); + // OK because 'at <= haystack.len() < usize::MAX', so + // adding 1 will never wrap. + let at = at.wrapping_add(1); + self.epsilon_closure( + stack, slots, next, input, at, next_sid, + ); + } + None + } + State::Match { pattern_id } => Some(pattern_id), + } + } + + /// Compute the epsilon closure of 'sid', writing the closure into 'next' + /// while copying slot values from 'curr_slots' into corresponding states + /// in 'next'. 'curr_slots' should be the slot values corresponding to + /// 'sid'. + /// + /// The given 'stack' is used to perform a depth first traversal of the + /// NFA by recursively following all epsilon transitions out of 'sid'. + /// Conditional epsilon transitions are followed if and only if they are + /// satisfied for the position 'at' in the 'input' haystack. + /// + /// While this routine may write to 'curr_slots', once it returns, any + /// writes are undone and the original values (even if absent) are + /// restored. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn epsilon_closure( + &self, + stack: &mut Vec, + curr_slots: &mut [Option], + next: &mut ActiveStates, + input: &Input<'_>, + at: usize, + sid: StateID, + ) { + instrument!(|c| { + c.record_closure(sid); + c.record_stack_push(sid); + }); + stack.push(FollowEpsilon::Explore(sid)); + while let Some(frame) = stack.pop() { + match frame { + FollowEpsilon::RestoreCapture { slot, offset: pos } => { + curr_slots[slot] = pos; + } + FollowEpsilon::Explore(sid) => { + self.epsilon_closure_explore( + stack, curr_slots, next, input, at, sid, + ); + } + } + } + } + + /// Explore all of the epsilon transitions out of 'sid'. This is mostly + /// split out from 'epsilon_closure' in order to clearly delineate + /// the actual work of computing an epsilon closure from the stack + /// book-keeping. + /// + /// This will push any additional explorations needed on to 'stack'. + /// + /// 'curr_slots' should refer to the slots for the currently active NFA + /// state. That is, the current state we are stepping through. These + /// slots are mutated in place as new 'Captures' states are traversed + /// during epsilon closure, but the slots are restored to their original + /// values once the full epsilon closure is completed. The ultimate use of + /// 'curr_slots' is to copy them to the corresponding 'next_slots', so that + /// the capturing group spans are forwarded from the currently active state + /// to the next. + /// + /// 'next' refers to the next set of active states. Computing an epsilon + /// closure may increase the next set of active states. + /// + /// 'input' refers to the caller's input configuration and 'at' refers to + /// the current position in the haystack. These are used to check whether + /// conditional epsilon transitions (like look-around) are satisfied at + /// the current position. If they aren't, then the epsilon closure won't + /// include them. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn epsilon_closure_explore( + &self, + stack: &mut Vec, + curr_slots: &mut [Option], + next: &mut ActiveStates, + input: &Input<'_>, + at: usize, + mut sid: StateID, + ) { + // We can avoid pushing some state IDs on to our stack in precisely + // the cases where a 'push(x)' would be immediately followed by a 'x + // = pop()'. This is achieved by this outer-loop. We simply set 'sid' + // to be the next state ID we want to explore once we're done with + // our initial exploration. In practice, this avoids a lot of stack + // thrashing. + loop { + instrument!(|c| c.record_set_insert(sid)); + // Record this state as part of our next set of active states. If + // we've already explored it, then no need to do it again. + if !next.set.insert(sid) { + return; + } + match *self.nfa.state(sid) { + State::Fail + | State::Match { .. } + | State::ByteRange { .. } + | State::Sparse { .. } + | State::Dense { .. } => { + next.slot_table.for_state(sid).copy_from_slice(curr_slots); + return; + } + State::Look { look, next } => { + // OK because we don't permit building a searcher with a + // Unicode word boundary if the requisite Unicode data is + // unavailable. + if !self.nfa.look_matcher().matches_inline( + look, + input.haystack(), + at, + ) { + return; + } + sid = next; + } + State::Union { ref alternates } => { + sid = match alternates.get(0) { + None => return, + Some(&sid) => sid, + }; + instrument!(|c| { + for &alt in &alternates[1..] { + c.record_stack_push(alt); + } + }); + stack.extend( + alternates[1..] + .iter() + .copied() + .rev() + .map(FollowEpsilon::Explore), + ); + } + State::BinaryUnion { alt1, alt2 } => { + sid = alt1; + instrument!(|c| c.record_stack_push(sid)); + stack.push(FollowEpsilon::Explore(alt2)); + } + State::Capture { next, slot, .. } => { + // There's no need to do anything with slots that + // ultimately won't be copied into the caller-provided + // 'Captures' value. So we just skip dealing with them at + // all. + if slot.as_usize() < curr_slots.len() { + instrument!(|c| c.record_stack_push(sid)); + stack.push(FollowEpsilon::RestoreCapture { + slot, + offset: curr_slots[slot], + }); + // OK because length of a slice must fit into an isize. + curr_slots[slot] = Some(NonMaxUsize::new(at).unwrap()); + } + sid = next; + } + } + } + } + + /// Return the starting configuration of a PikeVM search. + /// + /// The "start config" is basically whether the search should be anchored + /// or not and the NFA state ID at which to begin the search. The state ID + /// returned always corresponds to an anchored starting state even when the + /// search is unanchored. This is because the PikeVM search loop deals with + /// unanchored searches with an explicit epsilon closure out of the start + /// state. + /// + /// This routine accounts for both the caller's `Input` configuration + /// and the pattern itself. For example, even if the caller asks for an + /// unanchored search, if the pattern itself is anchored, then this will + /// always return 'true' because implementing an unanchored search in that + /// case would be incorrect. + /// + /// Similarly, if the caller requests an anchored search for a particular + /// pattern, then the starting state ID returned will reflect that. + /// + /// If a pattern ID is given in the input configuration that is not in + /// this regex, then `None` is returned. + fn start_config(&self, input: &Input<'_>) -> Option<(bool, StateID)> { + match input.get_anchored() { + // Only way we're unanchored is if both the caller asked for an + // unanchored search *and* the pattern is itself not anchored. + Anchored::No => Some(( + self.nfa.is_always_start_anchored(), + self.nfa.start_anchored(), + )), + Anchored::Yes => Some((true, self.nfa.start_anchored())), + Anchored::Pattern(pid) => { + Some((true, self.nfa.start_pattern(pid)?)) + } + } + } +} + +/// An iterator over all non-overlapping matches for a particular search. +/// +/// The iterator yields a [`Match`] value until no more matches could be found. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the PikeVM. +/// * `'c` represents the lifetime of the PikeVM's cache. +/// * `'h` represents the lifetime of the haystack being searched. +/// +/// This iterator can be created with the [`PikeVM::find_iter`] method. +#[derive(Debug)] +pub struct FindMatches<'r, 'c, 'h> { + re: &'r PikeVM, + cache: &'c mut Cache, + caps: Captures, + it: iter::Searcher<'h>, +} + +impl<'r, 'c, 'h> Iterator for FindMatches<'r, 'c, 'h> { + type Item = Match; + + #[inline] + fn next(&mut self) -> Option { + // Splitting 'self' apart seems necessary to appease borrowck. + let FindMatches { re, ref mut cache, ref mut caps, ref mut it } = + *self; + // 'advance' converts errors into panics, which is OK here because + // the PikeVM can never return an error. + it.advance(|input| { + re.search(cache, input, caps); + Ok(caps.get_match()) + }) + } +} + +/// An iterator over all non-overlapping leftmost matches, with their capturing +/// groups, for a particular search. +/// +/// The iterator yields a [`Captures`] value until no more matches could be +/// found. +/// +/// The lifetime parameters are as follows: +/// +/// * `'r` represents the lifetime of the PikeVM. +/// * `'c` represents the lifetime of the PikeVM's cache. +/// * `'h` represents the lifetime of the haystack being searched. +/// +/// This iterator can be created with the [`PikeVM::captures_iter`] method. +#[derive(Debug)] +pub struct CapturesMatches<'r, 'c, 'h> { + re: &'r PikeVM, + cache: &'c mut Cache, + caps: Captures, + it: iter::Searcher<'h>, +} + +impl<'r, 'c, 'h> Iterator for CapturesMatches<'r, 'c, 'h> { + type Item = Captures; + + #[inline] + fn next(&mut self) -> Option { + // Splitting 'self' apart seems necessary to appease borrowck. + let CapturesMatches { re, ref mut cache, ref mut caps, ref mut it } = + *self; + // 'advance' converts errors into panics, which is OK here because + // the PikeVM can never return an error. + it.advance(|input| { + re.search(cache, input, caps); + Ok(caps.get_match()) + }); + if caps.is_match() { + Some(caps.clone()) + } else { + None + } + } +} + +/// A cache represents mutable state that a [`PikeVM`] requires during a +/// search. +/// +/// For a given [`PikeVM`], its corresponding cache may be created either via +/// [`PikeVM::create_cache`], or via [`Cache::new`]. They are equivalent in +/// every way, except the former does not require explicitly importing `Cache`. +/// +/// A particular `Cache` is coupled with the [`PikeVM`] from which it +/// was created. It may only be used with that `PikeVM`. A cache and its +/// allocations may be re-purposed via [`Cache::reset`], in which case, it can +/// only be used with the new `PikeVM` (and not the old one). +#[derive(Clone, Debug)] +pub struct Cache { + /// Stack used while computing epsilon closure. This effectively lets us + /// move what is more naturally expressed through recursion to a stack + /// on the heap. + stack: Vec, + /// The current active states being explored for the current byte in the + /// haystack. + curr: ActiveStates, + /// The next set of states we're building that will be explored for the + /// next byte in the haystack. + next: ActiveStates, +} + +impl Cache { + /// Create a new [`PikeVM`] cache. + /// + /// A potentially more convenient routine to create a cache is + /// [`PikeVM::create_cache`], as it does not require also importing the + /// `Cache` type. + /// + /// If you want to reuse the returned `Cache` with some other `PikeVM`, + /// then you must call [`Cache::reset`] with the desired `PikeVM`. + pub fn new(re: &PikeVM) -> Cache { + Cache { + stack: vec![], + curr: ActiveStates::new(re), + next: ActiveStates::new(re), + } + } + + /// Reset this cache such that it can be used for searching with a + /// different [`PikeVM`]. + /// + /// A cache reset permits reusing memory already allocated in this cache + /// with a different `PikeVM`. + /// + /// # Example + /// + /// This shows how to re-purpose a cache for use with a different `PikeVM`. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re1 = PikeVM::new(r"\w")?; + /// let re2 = PikeVM::new(r"\W")?; + /// + /// let mut cache = re1.create_cache(); + /// assert_eq!( + /// Some(Match::must(0, 0..2)), + /// re1.find_iter(&mut cache, "Δ").next(), + /// ); + /// + /// // Using 'cache' with re2 is not allowed. It may result in panics or + /// // incorrect results. In order to re-purpose the cache, we must reset + /// // it with the PikeVM we'd like to use it with. + /// // + /// // Similarly, after this reset, using the cache with 're1' is also not + /// // allowed. + /// cache.reset(&re2); + /// assert_eq!( + /// Some(Match::must(0, 0..3)), + /// re2.find_iter(&mut cache, "☃").next(), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn reset(&mut self, re: &PikeVM) { + self.curr.reset(re); + self.next.reset(re); + } + + /// Returns the heap memory usage, in bytes, of this cache. + /// + /// This does **not** include the stack size used up by this cache. To + /// compute that, use `std::mem::size_of::()`. + pub fn memory_usage(&self) -> usize { + use core::mem::size_of; + (self.stack.len() * size_of::()) + + self.curr.memory_usage() + + self.next.memory_usage() + } + + /// Clears this cache. This should be called at the start of every search + /// to ensure we start with a clean slate. + /// + /// This also sets the length of the capturing groups used in the current + /// search. This permits an optimization where by 'SlotTable::for_state' + /// only returns the number of slots equivalent to the number of slots + /// given in the 'Captures' value. This may be less than the total number + /// of possible slots, e.g., when one only wants to track overall match + /// offsets. This in turn permits less copying of capturing group spans + /// in the PikeVM. + fn setup_search(&mut self, captures_slot_len: usize) { + self.stack.clear(); + self.curr.setup_search(captures_slot_len); + self.next.setup_search(captures_slot_len); + } +} + +/// A set of active states used to "simulate" the execution of an NFA via the +/// PikeVM. +/// +/// There are two sets of these used during NFA simulation. One set corresponds +/// to the "current" set of states being traversed for the current position +/// in a haystack. The other set corresponds to the "next" set of states being +/// built, which will become the new "current" set for the next position in the +/// haystack. These two sets correspond to CLIST and NLIST in Thompson's +/// original paper regexes: https://dl.acm.org/doi/pdf/10.1145/363347.363387 +/// +/// In addition to representing a set of NFA states, this also maintains slot +/// values for each state. These slot values are what turn the NFA simulation +/// into the "Pike VM." Namely, they track capturing group values for each +/// state. During the computation of epsilon closure, we copy slot values from +/// states in the "current" set to the "next" set. Eventually, once a match +/// is found, the slot values for that match state are what we write to the +/// caller provided 'Captures' value. +#[derive(Clone, Debug)] +struct ActiveStates { + /// The set of active NFA states. This set preserves insertion order, which + /// is critical for simulating the match semantics of backtracking regex + /// engines. + set: SparseSet, + /// The slots for every NFA state, where each slot stores a (possibly + /// absent) offset. Every capturing group has two slots. One for a start + /// offset and one for an end offset. + slot_table: SlotTable, +} + +impl ActiveStates { + /// Create a new set of active states for the given PikeVM. The active + /// states returned may only be used with the given PikeVM. (Use 'reset' + /// to re-purpose the allocation for a different PikeVM.) + fn new(re: &PikeVM) -> ActiveStates { + let mut active = ActiveStates { + set: SparseSet::new(0), + slot_table: SlotTable::new(), + }; + active.reset(re); + active + } + + /// Reset this set of active states such that it can be used with the given + /// PikeVM (and only that PikeVM). + fn reset(&mut self, re: &PikeVM) { + self.set.resize(re.get_nfa().states().len()); + self.slot_table.reset(re); + } + + /// Return the heap memory usage, in bytes, used by this set of active + /// states. + /// + /// This does not include the stack size of this value. + fn memory_usage(&self) -> usize { + self.set.memory_usage() + self.slot_table.memory_usage() + } + + /// Setup this set of active states for a new search. The given slot + /// length should be the number of slots in a caller provided 'Captures' + /// (and may be zero). + fn setup_search(&mut self, captures_slot_len: usize) { + self.set.clear(); + self.slot_table.setup_search(captures_slot_len); + } +} + +/// A table of slots, where each row represent a state in an NFA. Thus, the +/// table has room for storing slots for every single state in an NFA. +/// +/// This table is represented with a single contiguous allocation. In general, +/// the notion of "capturing group" doesn't really exist at this level of +/// abstraction, hence the name "slot" instead. (Indeed, every capturing group +/// maps to a pair of slots, one for the start offset and one for the end +/// offset.) Slots are indexed by the 'Captures' NFA state. +/// +/// N.B. Not every state actually needs a row of slots. Namely, states that +/// only have epsilon transitions currently never have anything written to +/// their rows in this table. Thus, the table is somewhat wasteful in its heap +/// usage. However, it is important to maintain fast random access by state +/// ID, which means one giant table tends to work well. RE2 takes a different +/// approach here and allocates each row as its own reference counted thing. +/// I explored such a strategy at one point here, but couldn't get it to work +/// well using entirely safe code. (To the ambitious reader: I encourage you to +/// re-litigate that experiment.) I very much wanted to stick to safe code, but +/// could be convinced otherwise if there was a solid argument and the safety +/// was encapsulated well. +#[derive(Clone, Debug)] +struct SlotTable { + /// The actual table of offsets. + table: Vec>, + /// The number of slots per state, i.e., the table's stride or the length + /// of each row. + slots_per_state: usize, + /// The number of slots in the caller-provided 'Captures' value for the + /// current search. Setting this to 'slots_per_state' is always correct, + /// but may be wasteful. + slots_for_captures: usize, +} + +impl SlotTable { + /// Create a new slot table. + /// + /// One should call 'reset' with the corresponding PikeVM before use. + fn new() -> SlotTable { + SlotTable { table: vec![], slots_for_captures: 0, slots_per_state: 0 } + } + + /// Reset this slot table such that it can be used with the given PikeVM + /// (and only that PikeVM). + fn reset(&mut self, re: &PikeVM) { + let nfa = re.get_nfa(); + self.slots_per_state = nfa.group_info().slot_len(); + // This is always correct, but may be reduced for a particular search + // if a 'Captures' has fewer slots, e.g., none at all or only slots + // for tracking the overall match instead of all slots for every + // group. + self.slots_for_captures = core::cmp::max( + self.slots_per_state, + nfa.pattern_len().checked_mul(2).unwrap(), + ); + let len = nfa + .states() + .len() + .checked_mul(self.slots_per_state) + // Add space to account for scratch space used during a search. + .and_then(|x| x.checked_add(self.slots_for_captures)) + // It seems like this could actually panic on legitimate inputs on + // 32-bit targets, and very likely to panic on 16-bit. Should we + // somehow convert this to an error? What about something similar + // for the lazy DFA cache? If you're tripping this assert, please + // file a bug. + .expect("slot table length doesn't overflow"); + // This happens about as often as a regex is compiled, so it probably + // should be at debug level, but I found it quite distracting and not + // particularly useful. + trace!( + "resizing PikeVM active states table to {} entries \ + (slots_per_state={})", + len, + self.slots_per_state, + ); + self.table.resize(len, None); + } + + /// Return the heap memory usage, in bytes, used by this slot table. + /// + /// This does not include the stack size of this value. + fn memory_usage(&self) -> usize { + self.table.len() * core::mem::size_of::>() + } + + /// Perform any per-search setup for this slot table. + /// + /// In particular, this sets the length of the number of slots used in the + /// 'Captures' given by the caller (if any at all). This number may be + /// smaller than the total number of slots available, e.g., when the caller + /// is only interested in tracking the overall match and not the spans of + /// every matching capturing group. Only tracking the overall match can + /// save a substantial amount of time copying capturing spans during a + /// search. + fn setup_search(&mut self, captures_slot_len: usize) { + self.slots_for_captures = captures_slot_len; + } + + /// Return a mutable slice of the slots for the given state. + /// + /// Note that the length of the slice returned may be less than the total + /// number of slots available for this state. In particular, the length + /// always matches the number of slots indicated via 'setup_search'. + fn for_state(&mut self, sid: StateID) -> &mut [Option] { + let i = sid.as_usize() * self.slots_per_state; + &mut self.table[i..i + self.slots_for_captures] + } + + /// Return a slice of slots of appropriate length where every slot offset + /// is guaranteed to be absent. This is useful in cases where you need to + /// compute an epsilon closure outside of the user supplied regex, and thus + /// never want it to have any capturing slots set. + fn all_absent(&mut self) -> &mut [Option] { + let i = self.table.len() - self.slots_for_captures; + &mut self.table[i..i + self.slots_for_captures] + } +} + +/// Represents a stack frame for use while computing an epsilon closure. +/// +/// (An "epsilon closure" refers to the set of reachable NFA states from a +/// single state without consuming any input. That is, the set of all epsilon +/// transitions not only from that single state, but from every other state +/// reachable by an epsilon transition as well. This is why it's called a +/// "closure." Computing an epsilon closure is also done during DFA +/// determinization! Compare and contrast the epsilon closure here in this +/// PikeVM and the one used for determinization in crate::util::determinize.) +/// +/// Computing the epsilon closure in a Thompson NFA proceeds via a depth +/// first traversal over all epsilon transitions from a particular state. +/// (A depth first traversal is important because it emulates the same priority +/// of matches that is typically found in backtracking regex engines.) This +/// depth first traversal is naturally expressed using recursion, but to avoid +/// a call stack size proportional to the size of a regex, we put our stack on +/// the heap instead. +/// +/// This stack thus consists of call frames. The typical call frame is +/// `Explore`, which instructs epsilon closure to explore the epsilon +/// transitions from that state. (Subsequent epsilon transitions are then +/// pushed on to the stack as more `Explore` frames.) If the state ID being +/// explored has no epsilon transitions, then the capturing group slots are +/// copied from the original state that sparked the epsilon closure (from the +/// 'step' routine) to the state ID being explored. This way, capturing group +/// slots are forwarded from the previous state to the next. +/// +/// The other stack frame, `RestoreCaptures`, instructs the epsilon closure to +/// set the position for a particular slot back to some particular offset. This +/// frame is pushed when `Explore` sees a `Capture` transition. `Explore` will +/// set the offset of the slot indicated in `Capture` to the current offset, +/// and then push the old offset on to the stack as a `RestoreCapture` frame. +/// Thus, the new offset is only used until the epsilon closure reverts back to +/// the `RestoreCapture` frame. In effect, this gives the `Capture` epsilon +/// transition its "scope" to only states that come "after" it during depth +/// first traversal. +#[derive(Clone, Debug)] +enum FollowEpsilon { + /// Explore the epsilon transitions from a state ID. + Explore(StateID), + /// Reset the given `slot` to the given `offset` (which might be `None`). + RestoreCapture { slot: SmallIndex, offset: Option }, +} + +/// A set of counters that "instruments" a PikeVM search. To enable this, you +/// must enable the 'internal-instrument-pikevm' feature. Then run your Rust +/// program with RUST_LOG=regex_automata::nfa::thompson::pikevm=trace set in +/// the environment. The metrics collected will be dumped automatically for +/// every search executed by the PikeVM. +/// +/// NOTE: When 'internal-instrument-pikevm' is enabled, it will likely cause an +/// absolute decrease in wall-clock performance, even if the 'trace' log level +/// isn't enabled. (Although, we do try to avoid extra costs when 'trace' isn't +/// enabled.) The main point of instrumentation is to get counts of various +/// events that occur during the PikeVM's execution. +/// +/// This is a somewhat hacked together collection of metrics that are useful +/// to gather from a PikeVM search. In particular, it lets us scrutinize the +/// performance profile of a search beyond what general purpose profiling tools +/// give us. Namely, we orient the profiling data around the specific states of +/// the NFA. +/// +/// In other words, this lets us see which parts of the NFA graph are most +/// frequently activated. This then provides direction for optimization +/// opportunities. +/// +/// The really sad part about this is that it absolutely clutters up the PikeVM +/// implementation. :'( Another approach would be to just manually add this +/// code in whenever I want this kind of profiling data, but it's complicated +/// and tedious enough that I went with this approach... for now. +/// +/// When instrumentation is enabled (which also turns on 'logging'), then a +/// `Counters` is initialized for every search and `trace`'d just before the +/// search returns to the caller. +/// +/// Tip: When debugging performance problems with the PikeVM, it's best to try +/// to work with an NFA that is as small as possible. Otherwise the state graph +/// is likely to be too big to digest. +#[cfg(feature = "internal-instrument-pikevm")] +#[derive(Clone, Debug)] +struct Counters { + /// The number of times the NFA is in a particular permutation of states. + state_sets: alloc::collections::BTreeMap, u64>, + /// The number of times 'step' is called for a particular state ID (which + /// indexes this array). + steps: Vec, + /// The number of times an epsilon closure was computed for a state. + closures: Vec, + /// The number of times a particular state ID is pushed on to a stack while + /// computing an epsilon closure. + stack_pushes: Vec, + /// The number of times a particular state ID is inserted into a sparse set + /// while computing an epsilon closure. + set_inserts: Vec, +} + +#[cfg(feature = "internal-instrument-pikevm")] +impl Counters { + fn empty() -> Counters { + Counters { + state_sets: alloc::collections::BTreeMap::new(), + steps: vec![], + closures: vec![], + stack_pushes: vec![], + set_inserts: vec![], + } + } + + fn reset(&mut self, nfa: &NFA) { + let len = nfa.states().len(); + + self.state_sets.clear(); + + self.steps.clear(); + self.steps.resize(len, 0); + + self.closures.clear(); + self.closures.resize(len, 0); + + self.stack_pushes.clear(); + self.stack_pushes.resize(len, 0); + + self.set_inserts.clear(); + self.set_inserts.resize(len, 0); + } + + fn eprint(&self, nfa: &NFA) { + trace!("===== START PikeVM Instrumentation Output ====="); + // We take the top-K most occurring state sets. Otherwise the output + // is likely to be overwhelming. And we probably only care about the + // most frequently occurring ones anyway. + const LIMIT: usize = 20; + let mut set_counts = + self.state_sets.iter().collect::, &u64)>>(); + set_counts.sort_by_key(|(_, &count)| core::cmp::Reverse(count)); + trace!("## PikeVM frequency of state sets (top {})", LIMIT); + for (set, count) in set_counts.iter().take(LIMIT) { + trace!("{:?}: {}", set, count); + } + if set_counts.len() > LIMIT { + trace!( + "... {} sets omitted (out of {} total)", + set_counts.len() - LIMIT, + set_counts.len(), + ); + } + + trace!(""); + trace!("## PikeVM total frequency of events"); + trace!( + "steps: {}, closures: {}, stack-pushes: {}, set-inserts: {}", + self.steps.iter().copied().sum::(), + self.closures.iter().copied().sum::(), + self.stack_pushes.iter().copied().sum::(), + self.set_inserts.iter().copied().sum::(), + ); + + trace!(""); + trace!("## PikeVM frequency of events broken down by state"); + for sid in 0..self.steps.len() { + trace!( + "{:06}: steps: {}, closures: {}, \ + stack-pushes: {}, set-inserts: {}", + sid, + self.steps[sid], + self.closures[sid], + self.stack_pushes[sid], + self.set_inserts[sid], + ); + } + + trace!(""); + trace!("## NFA debug display"); + trace!("{:?}", nfa); + trace!("===== END PikeVM Instrumentation Output ====="); + } + + fn record_state_set(&mut self, set: &SparseSet) { + let set = set.iter().collect::>(); + *self.state_sets.entry(set).or_insert(0) += 1; + } + + fn record_step(&mut self, sid: StateID) { + self.steps[sid] += 1; + } + + fn record_closure(&mut self, sid: StateID) { + self.closures[sid] += 1; + } + + fn record_stack_push(&mut self, sid: StateID) { + self.stack_pushes[sid] += 1; + } + + fn record_set_insert(&mut self, sid: StateID) { + self.set_inserts[sid] += 1; + } +} diff --git a/vendor/regex-automata/src/nfa/thompson/range_trie.rs b/vendor/regex-automata/src/nfa/thompson/range_trie.rs new file mode 100644 index 0000000..cd77cc1 --- /dev/null +++ b/vendor/regex-automata/src/nfa/thompson/range_trie.rs @@ -0,0 +1,1055 @@ +/* +I've called the primary data structure in this module a "range trie." As far +as I can tell, there is no prior art on a data structure like this, however, +it's likely someone somewhere has built something like it. Searching for +"range trie" turns up the paper "Range Tries for Scalable Address Lookup," +but it does not appear relevant. + +The range trie is just like a trie in that it is a special case of a +deterministic finite state machine. It has states and each state has a set +of transitions to other states. It is acyclic, and, like a normal trie, +it makes no attempt to reuse common suffixes among its elements. The key +difference between a normal trie and a range trie below is that a range trie +operates on *contiguous sequences* of bytes instead of singleton bytes. +One could say say that our alphabet is ranges of bytes instead of bytes +themselves, except a key part of range trie construction is splitting ranges +apart to ensure there is at most one transition that can be taken for any +byte in a given state. + +I've tried to explain the details of how the range trie works below, so +for now, we are left with trying to understand what problem we're trying to +solve. Which is itself fairly involved! + +At the highest level, here's what we want to do. We want to convert a +sequence of Unicode codepoints into a finite state machine whose transitions +are over *bytes* and *not* Unicode codepoints. We want this because it makes +said finite state machines much smaller and much faster to execute. As a +simple example, consider a byte oriented automaton for all Unicode scalar +values (0x00 through 0x10FFFF, not including surrogate codepoints): + + [00-7F] + [C2-DF][80-BF] + [E0-E0][A0-BF][80-BF] + [E1-EC][80-BF][80-BF] + [ED-ED][80-9F][80-BF] + [EE-EF][80-BF][80-BF] + [F0-F0][90-BF][80-BF][80-BF] + [F1-F3][80-BF][80-BF][80-BF] + [F4-F4][80-8F][80-BF][80-BF] + +(These byte ranges are generated via the regex-syntax::utf8 module, which +was based on Russ Cox's code in RE2, which was in turn based on Ken +Thompson's implementation of the same idea in his Plan9 implementation of +grep.) + +It should be fairly straight-forward to see how one could compile this into +a DFA. The sequences are sorted and non-overlapping. Essentially, you could +build a trie from this fairly easy. The problem comes when your initial +range (in this case, 0x00-0x10FFFF) isn't so nice. For example, the class +represented by '\w' contains only a tenth of the codepoints that +0x00-0x10FFFF contains, but if we were to write out the byte based ranges +as we did above, the list would stretch to 892 entries! This turns into +quite a large NFA with a few thousand states. Turning this beast into a DFA +takes quite a bit of time. We are thus left with trying to trim down the +number of states we produce as early as possible. + +One approach (used by RE2 and still by the regex crate, at time of writing) +is to try to find common suffixes while building NFA states for the above +and reuse them. This is very cheap to do and one can control precisely how +much extra memory you want to use for the cache. + +Another approach, however, is to reuse an algorithm for constructing a +*minimal* DFA from a sorted sequence of inputs. I don't want to go into +the full details here, but I explain it in more depth in my blog post on +FSTs[1]. Note that the algorithm was not invented by me, but was published +in paper by Daciuk et al. in 2000 called "Incremental Construction of +MinimalAcyclic Finite-State Automata." Like the suffix cache approach above, +it is also possible to control the amount of extra memory one uses, although +this usually comes with the cost of sacrificing true minimality. (But it's +typically close enough with a reasonably sized cache of states.) + +The catch is that Daciuk's algorithm only works if you add your keys in +lexicographic ascending order. In our case, since we're dealing with ranges, +we also need the additional requirement that ranges are either equivalent +or do not overlap at all. For example, if one were given the following byte +ranges: + + [BC-BF][80-BF] + [BC-BF][90-BF] + +Then Daciuk's algorithm would not work, since there is nothing to handle the +fact that the ranges overlap. They would need to be split apart. Thankfully, +Thompson's algorithm for producing byte ranges for Unicode codepoint ranges +meets both of our requirements. (A proof for this eludes me, but it appears +true.) + +... however, we would also like to be able to compile UTF-8 automata in +reverse. We want this because in order to find the starting location of a +match using a DFA, we need to run a second DFA---a reversed version of the +forward DFA---backwards to discover the match location. Unfortunately, if +we reverse our byte sequences for 0x00-0x10FFFF, we get sequences that are +can overlap, even if they are sorted: + + [00-7F] + [80-BF][80-9F][ED-ED] + [80-BF][80-BF][80-8F][F4-F4] + [80-BF][80-BF][80-BF][F1-F3] + [80-BF][80-BF][90-BF][F0-F0] + [80-BF][80-BF][E1-EC] + [80-BF][80-BF][EE-EF] + [80-BF][A0-BF][E0-E0] + [80-BF][C2-DF] + +For example, '[80-BF][80-BF][EE-EF]' and '[80-BF][A0-BF][E0-E0]' have +overlapping ranges between '[80-BF]' and '[A0-BF]'. Thus, there is no +simple way to apply Daciuk's algorithm. + +And thus, the range trie was born. The range trie's only purpose is to take +sequences of byte ranges like the ones above, collect them into a trie and then +spit them out in a sorted fashion with no overlapping ranges. For example, +0x00-0x10FFFF gets translated to: + + [0-7F] + [80-BF][80-9F][80-8F][F1-F3] + [80-BF][80-9F][80-8F][F4] + [80-BF][80-9F][90-BF][F0] + [80-BF][80-9F][90-BF][F1-F3] + [80-BF][80-9F][E1-EC] + [80-BF][80-9F][ED] + [80-BF][80-9F][EE-EF] + [80-BF][A0-BF][80-8F][F1-F3] + [80-BF][A0-BF][80-8F][F4] + [80-BF][A0-BF][90-BF][F0] + [80-BF][A0-BF][90-BF][F1-F3] + [80-BF][A0-BF][E0] + [80-BF][A0-BF][E1-EC] + [80-BF][A0-BF][EE-EF] + [80-BF][C2-DF] + +We've thus satisfied our requirements for running Daciuk's algorithm. All +sequences of ranges are sorted, and any corresponding ranges are either +exactly equivalent or non-overlapping. + +In effect, a range trie is building a DFA from a sequence of arbitrary byte +ranges. But it uses an algorithm custom tailored to its input, so it is not as +costly as traditional DFA construction. While it is still quite a bit more +costly than the forward case (which only needs Daciuk's algorithm), it winds +up saving a substantial amount of time if one is doing a full DFA powerset +construction later by virtue of producing a much much smaller NFA. + +[1] - https://blog.burntsushi.net/transducers/ +[2] - https://www.mitpressjournals.org/doi/pdfplus/10.1162/089120100561601 +*/ + +use core::{cell::RefCell, convert::TryFrom, fmt, mem, ops::RangeInclusive}; + +use alloc::{format, string::String, vec, vec::Vec}; + +use regex_syntax::utf8::Utf8Range; + +use crate::util::primitives::StateID; + +/// There is only one final state in this trie. Every sequence of byte ranges +/// added shares the same final state. +const FINAL: StateID = StateID::ZERO; + +/// The root state of the trie. +const ROOT: StateID = StateID::new_unchecked(1); + +/// A range trie represents an ordered set of sequences of bytes. +/// +/// A range trie accepts as input a sequence of byte ranges and merges +/// them into the existing set such that the trie can produce a sorted +/// non-overlapping sequence of byte ranges. The sequence emitted corresponds +/// precisely to the sequence of bytes matched by the given keys, although the +/// byte ranges themselves may be split at different boundaries. +/// +/// The order complexity of this data structure seems difficult to analyze. +/// If the size of a byte is held as a constant, then insertion is clearly +/// O(n) where n is the number of byte ranges in the input key. However, if +/// k=256 is our alphabet size, then insertion could be O(k^2 * n). In +/// particular it seems possible for pathological inputs to cause insertion +/// to do a lot of work. However, for what we use this data structure for, +/// there should be no pathological inputs since the ultimate source is always +/// a sorted set of Unicode scalar value ranges. +/// +/// Internally, this trie is setup like a finite state machine. Note though +/// that it is acyclic. +#[derive(Clone)] +pub struct RangeTrie { + /// The states in this trie. The first is always the shared final state. + /// The second is always the root state. Otherwise, there is no + /// particular order. + states: Vec, + /// A free-list of states. When a range trie is cleared, all of its states + /// are added to this list. Creating a new state reuses states from this + /// list before allocating a new one. + free: Vec, + /// A stack for traversing this trie to yield sequences of byte ranges in + /// lexicographic order. + iter_stack: RefCell>, + /// A buffer that stores the current sequence during iteration. + iter_ranges: RefCell>, + /// A stack used for traversing the trie in order to (deeply) duplicate + /// a state. States are recursively duplicated when ranges are split. + dupe_stack: Vec, + /// A stack used for traversing the trie during insertion of a new + /// sequence of byte ranges. + insert_stack: Vec, +} + +/// A single state in this trie. +#[derive(Clone)] +struct State { + /// A sorted sequence of non-overlapping transitions to other states. Each + /// transition corresponds to a single range of bytes. + transitions: Vec, +} + +/// A transition is a single range of bytes. If a particular byte is in this +/// range, then the corresponding machine may transition to the state pointed +/// to by `next_id`. +#[derive(Clone)] +struct Transition { + /// The byte range. + range: Utf8Range, + /// The next state to transition to. + next_id: StateID, +} + +impl RangeTrie { + /// Create a new empty range trie. + pub fn new() -> RangeTrie { + let mut trie = RangeTrie { + states: vec![], + free: vec![], + iter_stack: RefCell::new(vec![]), + iter_ranges: RefCell::new(vec![]), + dupe_stack: vec![], + insert_stack: vec![], + }; + trie.clear(); + trie + } + + /// Clear this range trie such that it is empty. Clearing a range trie + /// and reusing it can beneficial because this may reuse allocations. + pub fn clear(&mut self) { + self.free.extend(self.states.drain(..)); + self.add_empty(); // final + self.add_empty(); // root + } + + /// Iterate over all of the sequences of byte ranges in this trie, and + /// call the provided function for each sequence. Iteration occurs in + /// lexicographic order. + pub fn iter Result<(), E>>( + &self, + mut f: F, + ) -> Result<(), E> { + let mut stack = self.iter_stack.borrow_mut(); + stack.clear(); + let mut ranges = self.iter_ranges.borrow_mut(); + ranges.clear(); + + // We do iteration in a way that permits us to use a single buffer + // for our keys. We iterate in a depth first fashion, while being + // careful to expand our frontier as we move deeper in the trie. + stack.push(NextIter { state_id: ROOT, tidx: 0 }); + while let Some(NextIter { mut state_id, mut tidx }) = stack.pop() { + // This could be implemented more simply without an inner loop + // here, but at the cost of more stack pushes. + loop { + let state = self.state(state_id); + // If we've visited all transitions in this state, then pop + // back to the parent state. + if tidx >= state.transitions.len() { + ranges.pop(); + break; + } + + let t = &state.transitions[tidx]; + ranges.push(t.range); + if t.next_id == FINAL { + f(&ranges)?; + ranges.pop(); + tidx += 1; + } else { + // Expand our frontier. Once we come back to this state + // via the stack, start in on the next transition. + stack.push(NextIter { state_id, tidx: tidx + 1 }); + // Otherwise, move to the first transition of the next + // state. + state_id = t.next_id; + tidx = 0; + } + } + } + Ok(()) + } + + /// Inserts a new sequence of ranges into this trie. + /// + /// The sequence given must be non-empty and must not have a length + /// exceeding 4. + pub fn insert(&mut self, ranges: &[Utf8Range]) { + assert!(!ranges.is_empty()); + assert!(ranges.len() <= 4); + + let mut stack = mem::replace(&mut self.insert_stack, vec![]); + stack.clear(); + + stack.push(NextInsert::new(ROOT, ranges)); + while let Some(next) = stack.pop() { + let (state_id, ranges) = (next.state_id(), next.ranges()); + assert!(!ranges.is_empty()); + + let (mut new, rest) = (ranges[0], &ranges[1..]); + + // i corresponds to the position of the existing transition on + // which we are operating. Typically, the result is to remove the + // transition and replace it with two or more new transitions + // corresponding to the partitions generated by splitting the + // 'new' with the ith transition's range. + let mut i = self.state(state_id).find(new); + + // In this case, there is no overlap *and* the new range is greater + // than all existing ranges. So we can just add it to the end. + if i == self.state(state_id).transitions.len() { + let next_id = NextInsert::push(self, &mut stack, rest); + self.add_transition(state_id, new, next_id); + continue; + } + + // The need for this loop is a bit subtle, buf basically, after + // we've handled the partitions from our initial split, it's + // possible that there will be a partition leftover that overlaps + // with a subsequent transition. If so, then we have to repeat + // the split process again with the leftovers and that subsequent + // transition. + 'OUTER: loop { + let old = self.state(state_id).transitions[i].clone(); + let split = match Split::new(old.range, new) { + Some(split) => split, + None => { + let next_id = NextInsert::push(self, &mut stack, rest); + self.add_transition_at(i, state_id, new, next_id); + continue; + } + }; + let splits = split.as_slice(); + // If we only have one partition, then the ranges must be + // equivalent. There's nothing to do here for this state, so + // just move on to the next one. + if splits.len() == 1 { + // ... but only if we have anything left to do. + if !rest.is_empty() { + stack.push(NextInsert::new(old.next_id, rest)); + } + break; + } + // At this point, we know that 'split' is non-empty and there + // must be some overlap AND that the two ranges are not + // equivalent. Therefore, the existing range MUST be removed + // and split up somehow. Instead of actually doing the removal + // and then a subsequent insertion---with all the memory + // shuffling that entails---we simply overwrite the transition + // at position `i` for the first new transition we want to + // insert. After that, we're forced to do expensive inserts. + let mut first = true; + let mut add_trans = + |trie: &mut RangeTrie, pos, from, range, to| { + if first { + trie.set_transition_at(pos, from, range, to); + first = false; + } else { + trie.add_transition_at(pos, from, range, to); + } + }; + for (j, &srange) in splits.iter().enumerate() { + match srange { + SplitRange::Old(r) => { + // Deep clone the state pointed to by the ith + // transition. This is always necessary since 'old' + // is always coupled with at least a 'both' + // partition. We don't want any new changes made + // via the 'both' partition to impact the part of + // the transition that doesn't overlap with the + // new range. + let dup_id = self.duplicate(old.next_id); + add_trans(self, i, state_id, r, dup_id); + } + SplitRange::New(r) => { + // This is a bit subtle, but if this happens to be + // the last partition in our split, it is possible + // that this overlaps with a subsequent transition. + // If it does, then we must repeat the whole + // splitting process over again with `r` and the + // subsequent transition. + { + let trans = &self.state(state_id).transitions; + if j + 1 == splits.len() + && i < trans.len() + && intersects(r, trans[i].range) + { + new = r; + continue 'OUTER; + } + } + + // ... otherwise, setup exploration for a new + // empty state and add a brand new transition for + // this new range. + let next_id = + NextInsert::push(self, &mut stack, rest); + add_trans(self, i, state_id, r, next_id); + } + SplitRange::Both(r) => { + // Continue adding the remaining ranges on this + // path and update the transition with the new + // range. + if !rest.is_empty() { + stack.push(NextInsert::new(old.next_id, rest)); + } + add_trans(self, i, state_id, r, old.next_id); + } + } + i += 1; + } + // If we've reached this point, then we know that there are + // no subsequent transitions with any overlap. Therefore, we + // can stop processing this range and move on to the next one. + break; + } + } + self.insert_stack = stack; + } + + pub fn add_empty(&mut self) -> StateID { + let id = match StateID::try_from(self.states.len()) { + Ok(id) => id, + Err(_) => { + // This generally should not happen since a range trie is + // only ever used to compile a single sequence of Unicode + // scalar values. If we ever got to this point, we would, at + // *minimum*, be using 96GB in just the range trie alone. + panic!("too many sequences added to range trie"); + } + }; + // If we have some free states available, then use them to avoid + // more allocations. + if let Some(mut state) = self.free.pop() { + state.clear(); + self.states.push(state); + } else { + self.states.push(State { transitions: vec![] }); + } + id + } + + /// Performs a deep clone of the given state and returns the duplicate's + /// state ID. + /// + /// A "deep clone" in this context means that the state given along with + /// recursively all states that it points to are copied. Once complete, + /// the given state ID and the returned state ID share nothing. + /// + /// This is useful during range trie insertion when a new range overlaps + /// with an existing range that is bigger than the new one. The part + /// of the existing range that does *not* overlap with the new one is + /// duplicated so that adding the new range to the overlap doesn't disturb + /// the non-overlapping portion. + /// + /// There's one exception: if old_id is the final state, then it is not + /// duplicated and the same final state is returned. This is because all + /// final states in this trie are equivalent. + fn duplicate(&mut self, old_id: StateID) -> StateID { + if old_id == FINAL { + return FINAL; + } + + let mut stack = mem::replace(&mut self.dupe_stack, vec![]); + stack.clear(); + + let new_id = self.add_empty(); + // old_id is the state we're cloning and new_id is the ID of the + // duplicated state for old_id. + stack.push(NextDupe { old_id, new_id }); + while let Some(NextDupe { old_id, new_id }) = stack.pop() { + for i in 0..self.state(old_id).transitions.len() { + let t = self.state(old_id).transitions[i].clone(); + if t.next_id == FINAL { + // All final states are the same, so there's no need to + // duplicate it. + self.add_transition(new_id, t.range, FINAL); + continue; + } + + let new_child_id = self.add_empty(); + self.add_transition(new_id, t.range, new_child_id); + stack.push(NextDupe { + old_id: t.next_id, + new_id: new_child_id, + }); + } + } + self.dupe_stack = stack; + new_id + } + + /// Adds the given transition to the given state. + /// + /// Callers must ensure that all previous transitions in this state + /// are lexicographically smaller than the given range. + fn add_transition( + &mut self, + from_id: StateID, + range: Utf8Range, + next_id: StateID, + ) { + self.state_mut(from_id) + .transitions + .push(Transition { range, next_id }); + } + + /// Like `add_transition`, except this inserts the transition just before + /// the ith transition. + fn add_transition_at( + &mut self, + i: usize, + from_id: StateID, + range: Utf8Range, + next_id: StateID, + ) { + self.state_mut(from_id) + .transitions + .insert(i, Transition { range, next_id }); + } + + /// Overwrites the transition at position i with the given transition. + fn set_transition_at( + &mut self, + i: usize, + from_id: StateID, + range: Utf8Range, + next_id: StateID, + ) { + self.state_mut(from_id).transitions[i] = Transition { range, next_id }; + } + + /// Return an immutable borrow for the state with the given ID. + fn state(&self, id: StateID) -> &State { + &self.states[id] + } + + /// Return a mutable borrow for the state with the given ID. + fn state_mut(&mut self, id: StateID) -> &mut State { + &mut self.states[id] + } +} + +impl State { + /// Find the position at which the given range should be inserted in this + /// state. + /// + /// The position returned is always in the inclusive range + /// [0, transitions.len()]. If 'transitions.len()' is returned, then the + /// given range overlaps with no other range in this state *and* is greater + /// than all of them. + /// + /// For all other possible positions, the given range either overlaps + /// with the transition at that position or is otherwise less than it + /// with no overlap (and is greater than the previous transition). In the + /// former case, careful attention must be paid to inserting this range + /// as a new transition. In the latter case, the range can be inserted as + /// a new transition at the given position without disrupting any other + /// transitions. + fn find(&self, range: Utf8Range) -> usize { + /// Returns the position `i` at which `pred(xs[i])` first returns true + /// such that for all `j >= i`, `pred(xs[j]) == true`. If `pred` never + /// returns true, then `xs.len()` is returned. + /// + /// We roll our own binary search because it doesn't seem like the + /// standard library's binary search can be used here. Namely, if + /// there is an overlapping range, then we want to find the first such + /// occurrence, but there may be many. Or at least, it's not quite + /// clear to me how to do it. + fn binary_search(xs: &[T], mut pred: F) -> usize + where + F: FnMut(&T) -> bool, + { + let (mut left, mut right) = (0, xs.len()); + while left < right { + // Overflow is impossible because xs.len() <= 256. + let mid = (left + right) / 2; + if pred(&xs[mid]) { + right = mid; + } else { + left = mid + 1; + } + } + left + } + + // Benchmarks suggest that binary search is just a bit faster than + // straight linear search. Specifically when using the debug tool: + // + // hyperfine "regex-cli debug thompson -qr --captures none '\w{90} ecurB'" + binary_search(&self.transitions, |t| range.start <= t.range.end) + } + + /// Clear this state such that it has zero transitions. + fn clear(&mut self) { + self.transitions.clear(); + } +} + +/// The next state to process during duplication. +#[derive(Clone, Debug)] +struct NextDupe { + /// The state we want to duplicate. + old_id: StateID, + /// The ID of the new state that is a duplicate of old_id. + new_id: StateID, +} + +/// The next state (and its corresponding transition) that we want to visit +/// during iteration in lexicographic order. +#[derive(Clone, Debug)] +struct NextIter { + state_id: StateID, + tidx: usize, +} + +/// The next state to process during insertion and any remaining ranges that we +/// want to add for a particular sequence of ranges. The first such instance +/// is always the root state along with all ranges given. +#[derive(Clone, Debug)] +struct NextInsert { + /// The next state to begin inserting ranges. This state should be the + /// state at which `ranges[0]` should be inserted. + state_id: StateID, + /// The ranges to insert. We used a fixed-size array here to avoid an + /// allocation. + ranges: [Utf8Range; 4], + /// The number of valid ranges in the above array. + len: u8, +} + +impl NextInsert { + /// Create the next item to visit. The given state ID should correspond + /// to the state at which the first range in the given slice should be + /// inserted. The slice given must not be empty and it must be no longer + /// than 4. + fn new(state_id: StateID, ranges: &[Utf8Range]) -> NextInsert { + let len = ranges.len(); + assert!(len > 0); + assert!(len <= 4); + + let mut tmp = [Utf8Range { start: 0, end: 0 }; 4]; + tmp[..len].copy_from_slice(ranges); + NextInsert { state_id, ranges: tmp, len: u8::try_from(len).unwrap() } + } + + /// Push a new empty state to visit along with any remaining ranges that + /// still need to be inserted. The ID of the new empty state is returned. + /// + /// If ranges is empty, then no new state is created and FINAL is returned. + fn push( + trie: &mut RangeTrie, + stack: &mut Vec, + ranges: &[Utf8Range], + ) -> StateID { + if ranges.is_empty() { + FINAL + } else { + let next_id = trie.add_empty(); + stack.push(NextInsert::new(next_id, ranges)); + next_id + } + } + + /// Return the ID of the state to visit. + fn state_id(&self) -> StateID { + self.state_id + } + + /// Return the remaining ranges to insert. + fn ranges(&self) -> &[Utf8Range] { + &self.ranges[..usize::try_from(self.len).unwrap()] + } +} + +/// Split represents a partitioning of two ranges into one or more ranges. This +/// is the secret sauce that makes a range trie work, as it's what tells us +/// how to deal with two overlapping but unequal ranges during insertion. +/// +/// Essentially, either two ranges overlap or they don't. If they don't, then +/// handling insertion is easy: just insert the new range into its +/// lexicographically correct position. Since it does not overlap with anything +/// else, no other transitions are impacted by the new range. +/// +/// If they do overlap though, there are generally three possible cases to +/// handle: +/// +/// 1. The part where the two ranges actually overlap. i.e., The intersection. +/// 2. The part of the existing range that is not in the the new range. +/// 3. The part of the new range that is not in the old range. +/// +/// (1) is guaranteed to always occur since all overlapping ranges have a +/// non-empty intersection. If the two ranges are not equivalent, then at +/// least one of (2) or (3) is guaranteed to occur as well. In some cases, +/// e.g., `[0-4]` and `[4-9]`, all three cases will occur. +/// +/// This `Split` type is responsible for providing (1), (2) and (3) for any +/// possible pair of byte ranges. +/// +/// As for insertion, for the overlap in (1), the remaining ranges to insert +/// should be added by following the corresponding transition. However, this +/// should only be done for the overlapping parts of the range. If there was +/// a part of the existing range that was not in the new range, then that +/// existing part must be split off from the transition and duplicated. The +/// remaining parts of the overlap can then be added to using the new ranges +/// without disturbing the existing range. +/// +/// Handling the case for the part of a new range that is not in an existing +/// range is seemingly easy. Just treat it as if it were a non-overlapping +/// range. The problem here is that if this new non-overlapping range occurs +/// after both (1) and (2), then it's possible that it can overlap with the +/// next transition in the current state. If it does, then the whole process +/// must be repeated! +/// +/// # Details of the 3 cases +/// +/// The following details the various cases that are implemented in code +/// below. It's plausible that the number of cases is not actually minimal, +/// but it's important for this code to remain at least somewhat readable. +/// +/// Given [a,b] and [x,y], where a <= b, x <= y, b < 256 and y < 256, we define +/// the follow distinct relationships where at least one must apply. The order +/// of these matters, since multiple can match. The first to match applies. +/// +/// 1. b < x <=> [a,b] < [x,y] +/// 2. y < a <=> [x,y] < [a,b] +/// +/// In the case of (1) and (2), these are the only cases where there is no +/// overlap. Or otherwise, the intersection of [a,b] and [x,y] is empty. In +/// order to compute the intersection, one can do [max(a,x), min(b,y)]. The +/// intersection in all of the following cases is non-empty. +/// +/// 3. a = x && b = y <=> [a,b] == [x,y] +/// 4. a = x && b < y <=> [x,y] right-extends [a,b] +/// 5. b = y && a > x <=> [x,y] left-extends [a,b] +/// 6. x = a && y < b <=> [a,b] right-extends [x,y] +/// 7. y = b && x > a <=> [a,b] left-extends [x,y] +/// 8. a > x && b < y <=> [x,y] covers [a,b] +/// 9. x > a && y < b <=> [a,b] covers [x,y] +/// 10. b = x && a < y <=> [a,b] is left-adjacent to [x,y] +/// 11. y = a && x < b <=> [x,y] is left-adjacent to [a,b] +/// 12. b > x && b < y <=> [a,b] left-overlaps [x,y] +/// 13. y > a && y < b <=> [x,y] left-overlaps [a,b] +/// +/// In cases 3-13, we can form rules that partition the ranges into a +/// non-overlapping ordered sequence of ranges: +/// +/// 3. [a,b] +/// 4. [a,b], [b+1,y] +/// 5. [x,a-1], [a,b] +/// 6. [x,y], [y+1,b] +/// 7. [a,x-1], [x,y] +/// 8. [x,a-1], [a,b], [b+1,y] +/// 9. [a,x-1], [x,y], [y+1,b] +/// 10. [a,b-1], [b,b], [b+1,y] +/// 11. [x,y-1], [y,y], [y+1,b] +/// 12. [a,x-1], [x,b], [b+1,y] +/// 13. [x,a-1], [a,y], [y+1,b] +/// +/// In the code below, we go a step further and identify each of the above +/// outputs as belonging either to the overlap of the two ranges or to one +/// of [a,b] or [x,y] exclusively. +#[derive(Clone, Debug, Eq, PartialEq)] +struct Split { + partitions: [SplitRange; 3], + len: usize, +} + +/// A tagged range indicating how it was derived from a pair of ranges. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum SplitRange { + Old(Utf8Range), + New(Utf8Range), + Both(Utf8Range), +} + +impl Split { + /// Create a partitioning of the given ranges. + /// + /// If the given ranges have an empty intersection, then None is returned. + fn new(o: Utf8Range, n: Utf8Range) -> Option { + let range = |r: RangeInclusive| Utf8Range { + start: *r.start(), + end: *r.end(), + }; + let old = |r| SplitRange::Old(range(r)); + let new = |r| SplitRange::New(range(r)); + let both = |r| SplitRange::Both(range(r)); + + // Use same names as the comment above to make it easier to compare. + let (a, b, x, y) = (o.start, o.end, n.start, n.end); + + if b < x || y < a { + // case 1, case 2 + None + } else if a == x && b == y { + // case 3 + Some(Split::parts1(both(a..=b))) + } else if a == x && b < y { + // case 4 + Some(Split::parts2(both(a..=b), new(b + 1..=y))) + } else if b == y && a > x { + // case 5 + Some(Split::parts2(new(x..=a - 1), both(a..=b))) + } else if x == a && y < b { + // case 6 + Some(Split::parts2(both(x..=y), old(y + 1..=b))) + } else if y == b && x > a { + // case 7 + Some(Split::parts2(old(a..=x - 1), both(x..=y))) + } else if a > x && b < y { + // case 8 + Some(Split::parts3(new(x..=a - 1), both(a..=b), new(b + 1..=y))) + } else if x > a && y < b { + // case 9 + Some(Split::parts3(old(a..=x - 1), both(x..=y), old(y + 1..=b))) + } else if b == x && a < y { + // case 10 + Some(Split::parts3(old(a..=b - 1), both(b..=b), new(b + 1..=y))) + } else if y == a && x < b { + // case 11 + Some(Split::parts3(new(x..=y - 1), both(y..=y), old(y + 1..=b))) + } else if b > x && b < y { + // case 12 + Some(Split::parts3(old(a..=x - 1), both(x..=b), new(b + 1..=y))) + } else if y > a && y < b { + // case 13 + Some(Split::parts3(new(x..=a - 1), both(a..=y), old(y + 1..=b))) + } else { + unreachable!() + } + } + + /// Create a new split with a single partition. This only occurs when two + /// ranges are equivalent. + fn parts1(r1: SplitRange) -> Split { + // This value doesn't matter since it is never accessed. + let nada = SplitRange::Old(Utf8Range { start: 0, end: 0 }); + Split { partitions: [r1, nada, nada], len: 1 } + } + + /// Create a new split with two partitions. + fn parts2(r1: SplitRange, r2: SplitRange) -> Split { + // This value doesn't matter since it is never accessed. + let nada = SplitRange::Old(Utf8Range { start: 0, end: 0 }); + Split { partitions: [r1, r2, nada], len: 2 } + } + + /// Create a new split with three partitions. + fn parts3(r1: SplitRange, r2: SplitRange, r3: SplitRange) -> Split { + Split { partitions: [r1, r2, r3], len: 3 } + } + + /// Return the partitions in this split as a slice. + fn as_slice(&self) -> &[SplitRange] { + &self.partitions[..self.len] + } +} + +impl fmt::Debug for RangeTrie { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "")?; + for (i, state) in self.states.iter().enumerate() { + let status = if i == FINAL.as_usize() { '*' } else { ' ' }; + writeln!(f, "{}{:06}: {:?}", status, i, state)?; + } + Ok(()) + } +} + +impl fmt::Debug for State { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let rs = self + .transitions + .iter() + .map(|t| format!("{:?}", t)) + .collect::>() + .join(", "); + write!(f, "{}", rs) + } +} + +impl fmt::Debug for Transition { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.range.start == self.range.end { + write!( + f, + "{:02X} => {:02X}", + self.range.start, + self.next_id.as_usize(), + ) + } else { + write!( + f, + "{:02X}-{:02X} => {:02X}", + self.range.start, + self.range.end, + self.next_id.as_usize(), + ) + } + } +} + +/// Returns true if and only if the given ranges intersect. +fn intersects(r1: Utf8Range, r2: Utf8Range) -> bool { + !(r1.end < r2.start || r2.end < r1.start) +} + +#[cfg(test)] +mod tests { + use core::ops::RangeInclusive; + + use regex_syntax::utf8::Utf8Range; + + use super::*; + + fn r(range: RangeInclusive) -> Utf8Range { + Utf8Range { start: *range.start(), end: *range.end() } + } + + fn split_maybe( + old: RangeInclusive, + new: RangeInclusive, + ) -> Option { + Split::new(r(old), r(new)) + } + + fn split( + old: RangeInclusive, + new: RangeInclusive, + ) -> Vec { + split_maybe(old, new).unwrap().as_slice().to_vec() + } + + #[test] + fn no_splits() { + // case 1 + assert_eq!(None, split_maybe(0..=1, 2..=3)); + // case 2 + assert_eq!(None, split_maybe(2..=3, 0..=1)); + } + + #[test] + fn splits() { + let range = |r: RangeInclusive| Utf8Range { + start: *r.start(), + end: *r.end(), + }; + let old = |r| SplitRange::Old(range(r)); + let new = |r| SplitRange::New(range(r)); + let both = |r| SplitRange::Both(range(r)); + + // case 3 + assert_eq!(split(0..=0, 0..=0), vec![both(0..=0)]); + assert_eq!(split(9..=9, 9..=9), vec![both(9..=9)]); + + // case 4 + assert_eq!(split(0..=5, 0..=6), vec![both(0..=5), new(6..=6)]); + assert_eq!(split(0..=5, 0..=8), vec![both(0..=5), new(6..=8)]); + assert_eq!(split(5..=5, 5..=8), vec![both(5..=5), new(6..=8)]); + + // case 5 + assert_eq!(split(1..=5, 0..=5), vec![new(0..=0), both(1..=5)]); + assert_eq!(split(3..=5, 0..=5), vec![new(0..=2), both(3..=5)]); + assert_eq!(split(5..=5, 0..=5), vec![new(0..=4), both(5..=5)]); + + // case 6 + assert_eq!(split(0..=6, 0..=5), vec![both(0..=5), old(6..=6)]); + assert_eq!(split(0..=8, 0..=5), vec![both(0..=5), old(6..=8)]); + assert_eq!(split(5..=8, 5..=5), vec![both(5..=5), old(6..=8)]); + + // case 7 + assert_eq!(split(0..=5, 1..=5), vec![old(0..=0), both(1..=5)]); + assert_eq!(split(0..=5, 3..=5), vec![old(0..=2), both(3..=5)]); + assert_eq!(split(0..=5, 5..=5), vec![old(0..=4), both(5..=5)]); + + // case 8 + assert_eq!( + split(3..=6, 2..=7), + vec![new(2..=2), both(3..=6), new(7..=7)], + ); + assert_eq!( + split(3..=6, 1..=8), + vec![new(1..=2), both(3..=6), new(7..=8)], + ); + + // case 9 + assert_eq!( + split(2..=7, 3..=6), + vec![old(2..=2), both(3..=6), old(7..=7)], + ); + assert_eq!( + split(1..=8, 3..=6), + vec![old(1..=2), both(3..=6), old(7..=8)], + ); + + // case 10 + assert_eq!( + split(3..=6, 6..=7), + vec![old(3..=5), both(6..=6), new(7..=7)], + ); + assert_eq!( + split(3..=6, 6..=8), + vec![old(3..=5), both(6..=6), new(7..=8)], + ); + assert_eq!( + split(5..=6, 6..=7), + vec![old(5..=5), both(6..=6), new(7..=7)], + ); + + // case 11 + assert_eq!( + split(6..=7, 3..=6), + vec![new(3..=5), both(6..=6), old(7..=7)], + ); + assert_eq!( + split(6..=8, 3..=6), + vec![new(3..=5), both(6..=6), old(7..=8)], + ); + assert_eq!( + split(6..=7, 5..=6), + vec![new(5..=5), both(6..=6), old(7..=7)], + ); + + // case 12 + assert_eq!( + split(3..=7, 5..=9), + vec![old(3..=4), both(5..=7), new(8..=9)], + ); + assert_eq!( + split(3..=5, 4..=6), + vec![old(3..=3), both(4..=5), new(6..=6)], + ); + + // case 13 + assert_eq!( + split(5..=9, 3..=7), + vec![new(3..=4), both(5..=7), old(8..=9)], + ); + assert_eq!( + split(4..=6, 3..=5), + vec![new(3..=3), both(4..=5), old(6..=6)], + ); + } + + // Arguably there should be more tests here, but in practice, this data + // structure is well covered by the huge number of regex tests. +} diff --git a/vendor/regex-automata/src/util/alphabet.rs b/vendor/regex-automata/src/util/alphabet.rs new file mode 100644 index 0000000..22b5a76 --- /dev/null +++ b/vendor/regex-automata/src/util/alphabet.rs @@ -0,0 +1,1139 @@ +/*! +This module provides APIs for dealing with the alphabets of finite state +machines. + +There are two principal types in this module, [`ByteClasses`] and [`Unit`]. +The former defines the alphabet of a finite state machine while the latter +represents an element of that alphabet. + +To a first approximation, the alphabet of all automata in this crate is just +a `u8`. Namely, every distinct byte value. All 256 of them. In practice, this +can be quite wasteful when building a transition table for a DFA, since it +requires storing a state identifier for each element in the alphabet. Instead, +we collapse the alphabet of an automaton down into equivalence classes, where +every byte in the same equivalence class never discriminates between a match or +a non-match from any other byte in the same class. For example, in the regex +`[a-z]+`, then you could consider it having an alphabet consisting of two +equivalence classes: `a-z` and everything else. In terms of the transitions on +an automaton, it doesn't actually require representing every distinct byte. +Just the equivalence classes. + +The downside of equivalence classes is that, of course, searching a haystack +deals with individual byte values. Those byte values need to be mapped to +their corresponding equivalence class. This is what `ByteClasses` does. In +practice, doing this for every state transition has negligible impact on modern +CPUs. Moreover, it helps make more efficient use of the CPU cache by (possibly +considerably) shrinking the size of the transition table. + +One last hiccup concerns `Unit`. Namely, because of look-around and how the +DFAs in this crate work, we need to add a sentinel value to our alphabet +of equivalence classes that represents the "end" of a search. We call that +sentinel [`Unit::eoi`] or "end of input." Thus, a `Unit` is either an +equivalence class corresponding to a set of bytes, or it is a special "end of +input" sentinel. + +In general, you should not expect to need either of these types unless you're +doing lower level shenanigans with DFAs, or even building your own DFAs. +(Although, you don't have to use these types to build your own DFAs of course.) +For example, if you're walking a DFA's state graph, it's probably useful to +make use of [`ByteClasses`] to visit each element in the DFA's alphabet instead +of just visiting every distinct `u8` value. The latter isn't necessarily wrong, +but it could be potentially very wasteful. +*/ +use crate::util::{ + escape::DebugByte, + wire::{self, DeserializeError, SerializeError}, +}; + +/// Unit represents a single unit of haystack for DFA based regex engines. +/// +/// It is not expected for consumers of this crate to need to use this type +/// unless they are implementing their own DFA. And even then, it's not +/// required: implementors may use other techniques to handle haystack units. +/// +/// Typically, a single unit of haystack for a DFA would be a single byte. +/// However, for the DFAs in this crate, matches are delayed by a single byte +/// in order to handle look-ahead assertions (`\b`, `$` and `\z`). Thus, once +/// we have consumed the haystack, we must run the DFA through one additional +/// transition using a unit that indicates the haystack has ended. +/// +/// There is no way to represent a sentinel with a `u8` since all possible +/// values *may* be valid haystack units to a DFA, therefore this type +/// explicitly adds room for a sentinel value. +/// +/// The sentinel EOI value is always its own equivalence class and is +/// ultimately represented by adding 1 to the maximum equivalence class value. +/// So for example, the regex `^[a-z]+$` might be split into the following +/// equivalence classes: +/// +/// ```text +/// 0 => [\x00-`] +/// 1 => [a-z] +/// 2 => [{-\xFF] +/// 3 => [EOI] +/// ``` +/// +/// Where EOI is the special sentinel value that is always in its own +/// singleton equivalence class. +#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)] +pub struct Unit(UnitKind); + +#[derive(Clone, Copy, Eq, PartialEq, PartialOrd, Ord)] +enum UnitKind { + /// Represents a byte value, or more typically, an equivalence class + /// represented as a byte value. + U8(u8), + /// Represents the "end of input" sentinel. We regretably use a `u16` + /// here since the maximum sentinel value is `256`. Thankfully, we don't + /// actually store a `Unit` anywhere, so this extra space shouldn't be too + /// bad. + EOI(u16), +} + +impl Unit { + /// Create a new haystack unit from a byte value. + /// + /// All possible byte values are legal. However, when creating a haystack + /// unit for a specific DFA, one should be careful to only construct units + /// that are in that DFA's alphabet. Namely, one way to compact a DFA's + /// in-memory representation is to collapse its transitions to a set of + /// equivalence classes into a set of all possible byte values. If a DFA + /// uses equivalence classes instead of byte values, then the byte given + /// here should be the equivalence class. + pub fn u8(byte: u8) -> Unit { + Unit(UnitKind::U8(byte)) + } + + /// Create a new "end of input" haystack unit. + /// + /// The value given is the sentinel value used by this unit to represent + /// the "end of input." The value should be the total number of equivalence + /// classes in the corresponding alphabet. Its maximum value is `256`, + /// which occurs when every byte is its own equivalence class. + /// + /// # Panics + /// + /// This panics when `num_byte_equiv_classes` is greater than `256`. + pub fn eoi(num_byte_equiv_classes: usize) -> Unit { + assert!( + num_byte_equiv_classes <= 256, + "max number of byte-based equivalent classes is 256, but got {}", + num_byte_equiv_classes, + ); + Unit(UnitKind::EOI(u16::try_from(num_byte_equiv_classes).unwrap())) + } + + /// If this unit is not an "end of input" sentinel, then returns its + /// underlying byte value. Otherwise return `None`. + pub fn as_u8(self) -> Option { + match self.0 { + UnitKind::U8(b) => Some(b), + UnitKind::EOI(_) => None, + } + } + + /// If this unit is an "end of input" sentinel, then return the underlying + /// sentinel value that was given to [`Unit::eoi`]. Otherwise return + /// `None`. + pub fn as_eoi(self) -> Option { + match self.0 { + UnitKind::U8(_) => None, + UnitKind::EOI(sentinel) => Some(sentinel), + } + } + + /// Return this unit as a `usize`, regardless of whether it is a byte value + /// or an "end of input" sentinel. In the latter case, the underlying + /// sentinel value given to [`Unit::eoi`] is returned. + pub fn as_usize(self) -> usize { + match self.0 { + UnitKind::U8(b) => usize::from(b), + UnitKind::EOI(eoi) => usize::from(eoi), + } + } + + /// Returns true if and only of this unit is a byte value equivalent to the + /// byte given. This always returns false when this is an "end of input" + /// sentinel. + pub fn is_byte(self, byte: u8) -> bool { + self.as_u8().map_or(false, |b| b == byte) + } + + /// Returns true when this unit represents an "end of input" sentinel. + pub fn is_eoi(self) -> bool { + self.as_eoi().is_some() + } + + /// Returns true when this unit corresponds to an ASCII word byte. + /// + /// This always returns false when this unit represents an "end of input" + /// sentinel. + pub fn is_word_byte(self) -> bool { + self.as_u8().map_or(false, crate::util::utf8::is_word_byte) + } +} + +impl core::fmt::Debug for Unit { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + match self.0 { + UnitKind::U8(b) => write!(f, "{:?}", DebugByte(b)), + UnitKind::EOI(_) => write!(f, "EOI"), + } + } +} + +/// A representation of byte oriented equivalence classes. +/// +/// This is used in a DFA to reduce the size of the transition table. This can +/// have a particularly large impact not only on the total size of a dense DFA, +/// but also on compile times. +/// +/// The essential idea here is that the alphabet of a DFA is shrunk from the +/// usual 256 distinct byte values down to a set of equivalence classes. The +/// guarantee you get is that any byte belonging to the same equivalence class +/// can be treated as if it were any other byte in the same class, and the +/// result of a search wouldn't change. +/// +/// # Example +/// +/// This example shows how to get byte classes from an +/// [`NFA`](crate::nfa::thompson::NFA) and ask for the class of various bytes. +/// +/// ``` +/// use regex_automata::nfa::thompson::NFA; +/// +/// let nfa = NFA::new("[a-z]+")?; +/// let classes = nfa.byte_classes(); +/// // 'a' and 'z' are in the same class for this regex. +/// assert_eq!(classes.get(b'a'), classes.get(b'z')); +/// // But 'a' and 'A' are not. +/// assert_ne!(classes.get(b'a'), classes.get(b'A')); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Copy)] +pub struct ByteClasses([u8; 256]); + +impl ByteClasses { + /// Creates a new set of equivalence classes where all bytes are mapped to + /// the same class. + #[inline] + pub fn empty() -> ByteClasses { + ByteClasses([0; 256]) + } + + /// Creates a new set of equivalence classes where each byte belongs to + /// its own equivalence class. + #[inline] + pub fn singletons() -> ByteClasses { + let mut classes = ByteClasses::empty(); + for b in 0..=255 { + classes.set(b, b); + } + classes + } + + /// Deserializes a byte class map from the given slice. If the slice is of + /// insufficient length or otherwise contains an impossible mapping, then + /// an error is returned. Upon success, the number of bytes read along with + /// the map are returned. The number of bytes read is always a multiple of + /// 8. + pub(crate) fn from_bytes( + slice: &[u8], + ) -> Result<(ByteClasses, usize), DeserializeError> { + wire::check_slice_len(slice, 256, "byte class map")?; + let mut classes = ByteClasses::empty(); + for (b, &class) in slice[..256].iter().enumerate() { + classes.set(u8::try_from(b).unwrap(), class); + } + // We specifically don't use 'classes.iter()' here because that + // iterator depends on 'classes.alphabet_len()' being correct. But that + // is precisely the thing we're trying to verify below! + for &b in classes.0.iter() { + if usize::from(b) >= classes.alphabet_len() { + return Err(DeserializeError::generic( + "found equivalence class greater than alphabet len", + )); + } + } + Ok((classes, 256)) + } + + /// Writes this byte class map to the given byte buffer. if the given + /// buffer is too small, then an error is returned. Upon success, the total + /// number of bytes written is returned. The number of bytes written is + /// guaranteed to be a multiple of 8. + pub(crate) fn write_to( + &self, + mut dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("byte class map")); + } + for b in 0..=255 { + dst[0] = self.get(b); + dst = &mut dst[1..]; + } + Ok(nwrite) + } + + /// Returns the total number of bytes written by `write_to`. + pub(crate) fn write_to_len(&self) -> usize { + 256 + } + + /// Set the equivalence class for the given byte. + #[inline] + pub fn set(&mut self, byte: u8, class: u8) { + self.0[usize::from(byte)] = class; + } + + /// Get the equivalence class for the given byte. + #[inline] + pub fn get(&self, byte: u8) -> u8 { + self.0[usize::from(byte)] + } + + /// Get the equivalence class for the given haystack unit and return the + /// class as a `usize`. + #[inline] + pub fn get_by_unit(&self, unit: Unit) -> usize { + match unit.0 { + UnitKind::U8(b) => usize::from(self.get(b)), + UnitKind::EOI(b) => usize::from(b), + } + } + + /// Create a unit that represents the "end of input" sentinel based on the + /// number of equivalence classes. + #[inline] + pub fn eoi(&self) -> Unit { + // The alphabet length already includes the EOI sentinel, hence why + // we subtract 1. + Unit::eoi(self.alphabet_len().checked_sub(1).unwrap()) + } + + /// Return the total number of elements in the alphabet represented by + /// these equivalence classes. Equivalently, this returns the total number + /// of equivalence classes. + #[inline] + pub fn alphabet_len(&self) -> usize { + // Add one since the number of equivalence classes is one bigger than + // the last one. But add another to account for the final EOI class + // that isn't explicitly represented. + usize::from(self.0[255]) + 1 + 1 + } + + /// Returns the stride, as a base-2 exponent, required for these + /// equivalence classes. + /// + /// The stride is always the smallest power of 2 that is greater than or + /// equal to the alphabet length, and the `stride2` returned here is the + /// exponent applied to `2` to get the smallest power. This is done so that + /// converting between premultiplied state IDs and indices can be done with + /// shifts alone, which is much faster than integer division. + #[inline] + pub fn stride2(&self) -> usize { + let zeros = self.alphabet_len().next_power_of_two().trailing_zeros(); + usize::try_from(zeros).unwrap() + } + + /// Returns true if and only if every byte in this class maps to its own + /// equivalence class. Equivalently, there are 257 equivalence classes + /// and each class contains either exactly one byte or corresponds to the + /// singleton class containing the "end of input" sentinel. + #[inline] + pub fn is_singleton(&self) -> bool { + self.alphabet_len() == 257 + } + + /// Returns an iterator over all equivalence classes in this set. + #[inline] + pub fn iter(&self) -> ByteClassIter<'_> { + ByteClassIter { classes: self, i: 0 } + } + + /// Returns an iterator over a sequence of representative bytes from each + /// equivalence class within the range of bytes given. + /// + /// When the given range is unbounded on both sides, the iterator yields + /// exactly N items, where N is equivalent to the number of equivalence + /// classes. Each item is an arbitrary byte drawn from each equivalence + /// class. + /// + /// This is useful when one is determinizing an NFA and the NFA's alphabet + /// hasn't been converted to equivalence classes. Picking an arbitrary byte + /// from each equivalence class then permits a full exploration of the NFA + /// instead of using every possible byte value and thus potentially saves + /// quite a lot of redundant work. + /// + /// # Example + /// + /// This shows an example of what a complete sequence of representatives + /// might look like from a real example. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::alphabet::Unit}; + /// + /// let nfa = NFA::new("[a-z]+")?; + /// let classes = nfa.byte_classes(); + /// let reps: Vec = classes.representatives(..).collect(); + /// // Note that the specific byte values yielded are not guaranteed! + /// let expected = vec![ + /// Unit::u8(b'\x00'), + /// Unit::u8(b'a'), + /// Unit::u8(b'{'), + /// Unit::eoi(3), + /// ]; + /// assert_eq!(expected, reps); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Note though, that you can ask for an arbitrary range of bytes, and only + /// representatives for that range will be returned: + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::alphabet::Unit}; + /// + /// let nfa = NFA::new("[a-z]+")?; + /// let classes = nfa.byte_classes(); + /// let reps: Vec = classes.representatives(b'A'..=b'z').collect(); + /// // Note that the specific byte values yielded are not guaranteed! + /// let expected = vec![ + /// Unit::u8(b'A'), + /// Unit::u8(b'a'), + /// ]; + /// assert_eq!(expected, reps); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn representatives>( + &self, + range: R, + ) -> ByteClassRepresentatives<'_> { + use core::ops::Bound; + + let cur_byte = match range.start_bound() { + Bound::Included(&i) => usize::from(i), + Bound::Excluded(&i) => usize::from(i).checked_add(1).unwrap(), + Bound::Unbounded => 0, + }; + let end_byte = match range.end_bound() { + Bound::Included(&i) => { + Some(usize::from(i).checked_add(1).unwrap()) + } + Bound::Excluded(&i) => Some(usize::from(i)), + Bound::Unbounded => None, + }; + assert_ne!( + cur_byte, + usize::MAX, + "start range must be less than usize::MAX", + ); + ByteClassRepresentatives { + classes: self, + cur_byte, + end_byte, + last_class: None, + } + } + + /// Returns an iterator of the bytes in the given equivalence class. + /// + /// This is useful when one needs to know the actual bytes that belong to + /// an equivalence class. For example, conceptually speaking, accelerating + /// a DFA state occurs when a state only has a few outgoing transitions. + /// But in reality, what is required is that there are only a small + /// number of distinct bytes that can lead to an outgoing transition. The + /// difference is that any one transition can correspond to an equivalence + /// class which may contains many bytes. Therefore, DFA state acceleration + /// considers the actual elements in each equivalence class of each + /// outgoing transition. + /// + /// # Example + /// + /// This shows an example of how to get all of the elements in an + /// equivalence class. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, util::alphabet::Unit}; + /// + /// let nfa = NFA::new("[a-z]+")?; + /// let classes = nfa.byte_classes(); + /// let elements: Vec = classes.elements(Unit::u8(1)).collect(); + /// let expected: Vec = (b'a'..=b'z').map(Unit::u8).collect(); + /// assert_eq!(expected, elements); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn elements(&self, class: Unit) -> ByteClassElements { + ByteClassElements { classes: self, class, byte: 0 } + } + + /// Returns an iterator of byte ranges in the given equivalence class. + /// + /// That is, a sequence of contiguous ranges are returned. Typically, every + /// class maps to a single contiguous range. + fn element_ranges(&self, class: Unit) -> ByteClassElementRanges { + ByteClassElementRanges { elements: self.elements(class), range: None } + } +} + +impl Default for ByteClasses { + fn default() -> ByteClasses { + ByteClasses::singletons() + } +} + +impl core::fmt::Debug for ByteClasses { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + if self.is_singleton() { + write!(f, "ByteClasses({{singletons}})") + } else { + write!(f, "ByteClasses(")?; + for (i, class) in self.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{:?} => [", class.as_usize())?; + for (start, end) in self.element_ranges(class) { + if start == end { + write!(f, "{:?}", start)?; + } else { + write!(f, "{:?}-{:?}", start, end)?; + } + } + write!(f, "]")?; + } + write!(f, ")") + } + } +} + +/// An iterator over each equivalence class. +/// +/// The last element in this iterator always corresponds to [`Unit::eoi`]. +/// +/// This is created by the [`ByteClasses::iter`] method. +/// +/// The lifetime `'a` refers to the lifetime of the byte classes that this +/// iterator was created from. +#[derive(Debug)] +pub struct ByteClassIter<'a> { + classes: &'a ByteClasses, + i: usize, +} + +impl<'a> Iterator for ByteClassIter<'a> { + type Item = Unit; + + fn next(&mut self) -> Option { + if self.i + 1 == self.classes.alphabet_len() { + self.i += 1; + Some(self.classes.eoi()) + } else if self.i < self.classes.alphabet_len() { + let class = u8::try_from(self.i).unwrap(); + self.i += 1; + Some(Unit::u8(class)) + } else { + None + } + } +} + +/// An iterator over representative bytes from each equivalence class. +/// +/// This is created by the [`ByteClasses::representatives`] method. +/// +/// The lifetime `'a` refers to the lifetime of the byte classes that this +/// iterator was created from. +#[derive(Debug)] +pub struct ByteClassRepresentatives<'a> { + classes: &'a ByteClasses, + cur_byte: usize, + end_byte: Option, + last_class: Option, +} + +impl<'a> Iterator for ByteClassRepresentatives<'a> { + type Item = Unit; + + fn next(&mut self) -> Option { + while self.cur_byte < self.end_byte.unwrap_or(256) { + let byte = u8::try_from(self.cur_byte).unwrap(); + let class = self.classes.get(byte); + self.cur_byte += 1; + + if self.last_class != Some(class) { + self.last_class = Some(class); + return Some(Unit::u8(byte)); + } + } + if self.cur_byte != usize::MAX && self.end_byte.is_none() { + // Using usize::MAX as a sentinel is OK because we ban usize::MAX + // from appearing as a start bound in iterator construction. But + // why do it this way? Well, we want to return the EOI class + // whenever the end of the given range is unbounded because EOI + // isn't really a "byte" per se, so the only way it should be + // excluded is if there is a bounded end to the range. Therefore, + // when the end is unbounded, we just need to know whether we've + // reported EOI or not. When we do, we set cur_byte to a value it + // can never otherwise be. + self.cur_byte = usize::MAX; + return Some(self.classes.eoi()); + } + None + } +} + +/// An iterator over all elements in an equivalence class. +/// +/// This is created by the [`ByteClasses::elements`] method. +/// +/// The lifetime `'a` refers to the lifetime of the byte classes that this +/// iterator was created from. +#[derive(Debug)] +pub struct ByteClassElements<'a> { + classes: &'a ByteClasses, + class: Unit, + byte: usize, +} + +impl<'a> Iterator for ByteClassElements<'a> { + type Item = Unit; + + fn next(&mut self) -> Option { + while self.byte < 256 { + let byte = u8::try_from(self.byte).unwrap(); + self.byte += 1; + if self.class.is_byte(self.classes.get(byte)) { + return Some(Unit::u8(byte)); + } + } + if self.byte < 257 { + self.byte += 1; + if self.class.is_eoi() { + return Some(Unit::eoi(256)); + } + } + None + } +} + +/// An iterator over all elements in an equivalence class expressed as a +/// sequence of contiguous ranges. +#[derive(Debug)] +struct ByteClassElementRanges<'a> { + elements: ByteClassElements<'a>, + range: Option<(Unit, Unit)>, +} + +impl<'a> Iterator for ByteClassElementRanges<'a> { + type Item = (Unit, Unit); + + fn next(&mut self) -> Option<(Unit, Unit)> { + loop { + let element = match self.elements.next() { + None => return self.range.take(), + Some(element) => element, + }; + match self.range.take() { + None => { + self.range = Some((element, element)); + } + Some((start, end)) => { + if end.as_usize() + 1 != element.as_usize() + || element.is_eoi() + { + self.range = Some((element, element)); + return Some((start, end)); + } + self.range = Some((start, element)); + } + } + } + } +} + +/// A partitioning of bytes into equivalence classes. +/// +/// A byte class set keeps track of an *approximation* of equivalence classes +/// of bytes during NFA construction. That is, every byte in an equivalence +/// class cannot discriminate between a match and a non-match. +/// +/// For example, in the regex `[ab]+`, the bytes `a` and `b` would be in the +/// same equivalence class because it never matters whether an `a` or a `b` is +/// seen, and no combination of `a`s and `b`s in the text can discriminate a +/// match. +/// +/// Note though that this does not compute the minimal set of equivalence +/// classes. For example, in the regex `[ac]+`, both `a` and `c` are in the +/// same equivalence class for the same reason that `a` and `b` are in the +/// same equivalence class in the aforementioned regex. However, in this +/// implementation, `a` and `c` are put into distinct equivalence classes. The +/// reason for this is implementation complexity. In the future, we should +/// endeavor to compute the minimal equivalence classes since they can have a +/// rather large impact on the size of the DFA. (Doing this will likely require +/// rethinking how equivalence classes are computed, including changing the +/// representation here, which is only able to group contiguous bytes into the +/// same equivalence class.) +#[cfg(feature = "alloc")] +#[derive(Clone, Debug)] +pub(crate) struct ByteClassSet(ByteSet); + +#[cfg(feature = "alloc")] +impl Default for ByteClassSet { + fn default() -> ByteClassSet { + ByteClassSet::empty() + } +} + +#[cfg(feature = "alloc")] +impl ByteClassSet { + /// Create a new set of byte classes where all bytes are part of the same + /// equivalence class. + pub(crate) fn empty() -> Self { + ByteClassSet(ByteSet::empty()) + } + + /// Indicate the the range of byte given (inclusive) can discriminate a + /// match between it and all other bytes outside of the range. + pub(crate) fn set_range(&mut self, start: u8, end: u8) { + debug_assert!(start <= end); + if start > 0 { + self.0.add(start - 1); + } + self.0.add(end); + } + + /// Add the contiguous ranges in the set given to this byte class set. + pub(crate) fn add_set(&mut self, set: &ByteSet) { + for (start, end) in set.iter_ranges() { + self.set_range(start, end); + } + } + + /// Convert this boolean set to a map that maps all byte values to their + /// corresponding equivalence class. The last mapping indicates the largest + /// equivalence class identifier (which is never bigger than 255). + pub(crate) fn byte_classes(&self) -> ByteClasses { + let mut classes = ByteClasses::empty(); + let mut class = 0u8; + let mut b = 0u8; + loop { + classes.set(b, class); + if b == 255 { + break; + } + if self.0.contains(b) { + class = class.checked_add(1).unwrap(); + } + b = b.checked_add(1).unwrap(); + } + classes + } +} + +/// A simple set of bytes that is reasonably cheap to copy and allocation free. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub(crate) struct ByteSet { + bits: BitSet, +} + +/// The representation of a byte set. Split out so that we can define a +/// convenient Debug impl for it while keeping "ByteSet" in the output. +#[derive(Clone, Copy, Default, Eq, PartialEq)] +struct BitSet([u128; 2]); + +impl ByteSet { + /// Create an empty set of bytes. + pub(crate) fn empty() -> ByteSet { + ByteSet { bits: BitSet([0; 2]) } + } + + /// Add a byte to this set. + /// + /// If the given byte already belongs to this set, then this is a no-op. + pub(crate) fn add(&mut self, byte: u8) { + let bucket = byte / 128; + let bit = byte % 128; + self.bits.0[usize::from(bucket)] |= 1 << bit; + } + + /// Remove a byte from this set. + /// + /// If the given byte is not in this set, then this is a no-op. + pub(crate) fn remove(&mut self, byte: u8) { + let bucket = byte / 128; + let bit = byte % 128; + self.bits.0[usize::from(bucket)] &= !(1 << bit); + } + + /// Return true if and only if the given byte is in this set. + pub(crate) fn contains(&self, byte: u8) -> bool { + let bucket = byte / 128; + let bit = byte % 128; + self.bits.0[usize::from(bucket)] & (1 << bit) > 0 + } + + /// Return true if and only if the given inclusive range of bytes is in + /// this set. + pub(crate) fn contains_range(&self, start: u8, end: u8) -> bool { + (start..=end).all(|b| self.contains(b)) + } + + /// Returns an iterator over all bytes in this set. + pub(crate) fn iter(&self) -> ByteSetIter { + ByteSetIter { set: self, b: 0 } + } + + /// Returns an iterator over all contiguous ranges of bytes in this set. + pub(crate) fn iter_ranges(&self) -> ByteSetRangeIter { + ByteSetRangeIter { set: self, b: 0 } + } + + /// Return true if and only if this set is empty. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn is_empty(&self) -> bool { + self.bits.0 == [0, 0] + } + + /// Deserializes a byte set from the given slice. If the slice is of + /// incorrect length or is otherwise malformed, then an error is returned. + /// Upon success, the number of bytes read along with the set are returned. + /// The number of bytes read is always a multiple of 8. + pub(crate) fn from_bytes( + slice: &[u8], + ) -> Result<(ByteSet, usize), DeserializeError> { + use core::mem::size_of; + + wire::check_slice_len(slice, 2 * size_of::(), "byte set")?; + let mut nread = 0; + let (low, nr) = wire::try_read_u128(slice, "byte set low bucket")?; + nread += nr; + let (high, nr) = wire::try_read_u128(slice, "byte set high bucket")?; + nread += nr; + Ok((ByteSet { bits: BitSet([low, high]) }, nread)) + } + + /// Writes this byte set to the given byte buffer. If the given buffer is + /// too small, then an error is returned. Upon success, the total number of + /// bytes written is returned. The number of bytes written is guaranteed to + /// be a multiple of 8. + pub(crate) fn write_to( + &self, + dst: &mut [u8], + ) -> Result { + use core::mem::size_of; + + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("byte set")); + } + let mut nw = 0; + E::write_u128(self.bits.0[0], &mut dst[nw..]); + nw += size_of::(); + E::write_u128(self.bits.0[1], &mut dst[nw..]); + nw += size_of::(); + assert_eq!(nwrite, nw, "expected to write certain number of bytes",); + assert_eq!( + nw % 8, + 0, + "expected to write multiple of 8 bytes for byte set", + ); + Ok(nw) + } + + /// Returns the total number of bytes written by `write_to`. + pub(crate) fn write_to_len(&self) -> usize { + 2 * core::mem::size_of::() + } +} + +impl core::fmt::Debug for BitSet { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut fmtd = f.debug_set(); + for b in 0u8..=255 { + if (ByteSet { bits: *self }).contains(b) { + fmtd.entry(&b); + } + } + fmtd.finish() + } +} + +#[derive(Debug)] +pub(crate) struct ByteSetIter<'a> { + set: &'a ByteSet, + b: usize, +} + +impl<'a> Iterator for ByteSetIter<'a> { + type Item = u8; + + fn next(&mut self) -> Option { + while self.b <= 255 { + let b = u8::try_from(self.b).unwrap(); + self.b += 1; + if self.set.contains(b) { + return Some(b); + } + } + None + } +} + +#[derive(Debug)] +pub(crate) struct ByteSetRangeIter<'a> { + set: &'a ByteSet, + b: usize, +} + +impl<'a> Iterator for ByteSetRangeIter<'a> { + type Item = (u8, u8); + + fn next(&mut self) -> Option<(u8, u8)> { + let asu8 = |n: usize| u8::try_from(n).unwrap(); + while self.b <= 255 { + let start = asu8(self.b); + self.b += 1; + if !self.set.contains(start) { + continue; + } + + let mut end = start; + while self.b <= 255 && self.set.contains(asu8(self.b)) { + end = asu8(self.b); + self.b += 1; + } + return Some((start, end)); + } + None + } +} + +#[cfg(all(test, feature = "alloc"))] +mod tests { + use alloc::{vec, vec::Vec}; + + use super::*; + + #[test] + fn byte_classes() { + let mut set = ByteClassSet::empty(); + set.set_range(b'a', b'z'); + + let classes = set.byte_classes(); + assert_eq!(classes.get(0), 0); + assert_eq!(classes.get(1), 0); + assert_eq!(classes.get(2), 0); + assert_eq!(classes.get(b'a' - 1), 0); + assert_eq!(classes.get(b'a'), 1); + assert_eq!(classes.get(b'm'), 1); + assert_eq!(classes.get(b'z'), 1); + assert_eq!(classes.get(b'z' + 1), 2); + assert_eq!(classes.get(254), 2); + assert_eq!(classes.get(255), 2); + + let mut set = ByteClassSet::empty(); + set.set_range(0, 2); + set.set_range(4, 6); + let classes = set.byte_classes(); + assert_eq!(classes.get(0), 0); + assert_eq!(classes.get(1), 0); + assert_eq!(classes.get(2), 0); + assert_eq!(classes.get(3), 1); + assert_eq!(classes.get(4), 2); + assert_eq!(classes.get(5), 2); + assert_eq!(classes.get(6), 2); + assert_eq!(classes.get(7), 3); + assert_eq!(classes.get(255), 3); + } + + #[test] + fn full_byte_classes() { + let mut set = ByteClassSet::empty(); + for b in 0u8..=255 { + set.set_range(b, b); + } + assert_eq!(set.byte_classes().alphabet_len(), 257); + } + + #[test] + fn elements_typical() { + let mut set = ByteClassSet::empty(); + set.set_range(b'b', b'd'); + set.set_range(b'g', b'm'); + set.set_range(b'z', b'z'); + let classes = set.byte_classes(); + // class 0: \x00-a + // class 1: b-d + // class 2: e-f + // class 3: g-m + // class 4: n-y + // class 5: z-z + // class 6: \x7B-\xFF + // class 7: EOI + assert_eq!(classes.alphabet_len(), 8); + + let elements = classes.elements(Unit::u8(0)).collect::>(); + assert_eq!(elements.len(), 98); + assert_eq!(elements[0], Unit::u8(b'\x00')); + assert_eq!(elements[97], Unit::u8(b'a')); + + let elements = classes.elements(Unit::u8(1)).collect::>(); + assert_eq!( + elements, + vec![Unit::u8(b'b'), Unit::u8(b'c'), Unit::u8(b'd')], + ); + + let elements = classes.elements(Unit::u8(2)).collect::>(); + assert_eq!(elements, vec![Unit::u8(b'e'), Unit::u8(b'f')],); + + let elements = classes.elements(Unit::u8(3)).collect::>(); + assert_eq!( + elements, + vec![ + Unit::u8(b'g'), + Unit::u8(b'h'), + Unit::u8(b'i'), + Unit::u8(b'j'), + Unit::u8(b'k'), + Unit::u8(b'l'), + Unit::u8(b'm'), + ], + ); + + let elements = classes.elements(Unit::u8(4)).collect::>(); + assert_eq!(elements.len(), 12); + assert_eq!(elements[0], Unit::u8(b'n')); + assert_eq!(elements[11], Unit::u8(b'y')); + + let elements = classes.elements(Unit::u8(5)).collect::>(); + assert_eq!(elements, vec![Unit::u8(b'z')]); + + let elements = classes.elements(Unit::u8(6)).collect::>(); + assert_eq!(elements.len(), 133); + assert_eq!(elements[0], Unit::u8(b'\x7B')); + assert_eq!(elements[132], Unit::u8(b'\xFF')); + + let elements = classes.elements(Unit::eoi(7)).collect::>(); + assert_eq!(elements, vec![Unit::eoi(256)]); + } + + #[test] + fn elements_singletons() { + let classes = ByteClasses::singletons(); + assert_eq!(classes.alphabet_len(), 257); + + let elements = classes.elements(Unit::u8(b'a')).collect::>(); + assert_eq!(elements, vec![Unit::u8(b'a')]); + + let elements = classes.elements(Unit::eoi(5)).collect::>(); + assert_eq!(elements, vec![Unit::eoi(256)]); + } + + #[test] + fn elements_empty() { + let classes = ByteClasses::empty(); + assert_eq!(classes.alphabet_len(), 2); + + let elements = classes.elements(Unit::u8(0)).collect::>(); + assert_eq!(elements.len(), 256); + assert_eq!(elements[0], Unit::u8(b'\x00')); + assert_eq!(elements[255], Unit::u8(b'\xFF')); + + let elements = classes.elements(Unit::eoi(1)).collect::>(); + assert_eq!(elements, vec![Unit::eoi(256)]); + } + + #[test] + fn representatives() { + let mut set = ByteClassSet::empty(); + set.set_range(b'b', b'd'); + set.set_range(b'g', b'm'); + set.set_range(b'z', b'z'); + let classes = set.byte_classes(); + + let got: Vec = classes.representatives(..).collect(); + let expected = vec![ + Unit::u8(b'\x00'), + Unit::u8(b'b'), + Unit::u8(b'e'), + Unit::u8(b'g'), + Unit::u8(b'n'), + Unit::u8(b'z'), + Unit::u8(b'\x7B'), + Unit::eoi(7), + ]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(..0).collect(); + assert!(got.is_empty()); + let got: Vec = classes.representatives(1..1).collect(); + assert!(got.is_empty()); + let got: Vec = classes.representatives(255..255).collect(); + assert!(got.is_empty()); + + // A weird case that is the only guaranteed to way to get an iterator + // of just the EOI class by excluding all possible byte values. + let got: Vec = classes + .representatives(( + core::ops::Bound::Excluded(255), + core::ops::Bound::Unbounded, + )) + .collect(); + let expected = vec![Unit::eoi(7)]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(..=255).collect(); + let expected = vec![ + Unit::u8(b'\x00'), + Unit::u8(b'b'), + Unit::u8(b'e'), + Unit::u8(b'g'), + Unit::u8(b'n'), + Unit::u8(b'z'), + Unit::u8(b'\x7B'), + ]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(b'b'..=b'd').collect(); + let expected = vec![Unit::u8(b'b')]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(b'a'..=b'd').collect(); + let expected = vec![Unit::u8(b'a'), Unit::u8(b'b')]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(b'b'..=b'e').collect(); + let expected = vec![Unit::u8(b'b'), Unit::u8(b'e')]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(b'A'..=b'Z').collect(); + let expected = vec![Unit::u8(b'A')]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(b'A'..=b'z').collect(); + let expected = vec![ + Unit::u8(b'A'), + Unit::u8(b'b'), + Unit::u8(b'e'), + Unit::u8(b'g'), + Unit::u8(b'n'), + Unit::u8(b'z'), + ]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(b'z'..).collect(); + let expected = vec![Unit::u8(b'z'), Unit::u8(b'\x7B'), Unit::eoi(7)]; + assert_eq!(expected, got); + + let got: Vec = classes.representatives(b'z'..=0xFF).collect(); + let expected = vec![Unit::u8(b'z'), Unit::u8(b'\x7B')]; + assert_eq!(expected, got); + } +} diff --git a/vendor/regex-automata/src/util/captures.rs b/vendor/regex-automata/src/util/captures.rs new file mode 100644 index 0000000..05db6a9 --- /dev/null +++ b/vendor/regex-automata/src/util/captures.rs @@ -0,0 +1,2548 @@ +/*! +Provides types for dealing with capturing groups. + +Capturing groups refer to sub-patterns of regexes that some regex engines can +report matching offsets for. For example, matching `[a-z]([0-9]+)` against +`a789` would give `a789` as the overall match (for the implicit capturing group +at index `0`) and `789` as the match for the capturing group `([0-9]+)` (an +explicit capturing group at index `1`). + +Not all regex engines can report match offsets for capturing groups. Indeed, +to a first approximation, regex engines that can report capturing group offsets +tend to be quite a bit slower than regex engines that can't. This is because +tracking capturing groups at search time usually requires more "power" that +in turn adds overhead. + +Other regex implementations might call capturing groups "submatches." + +# Overview + +The main types in this module are: + +* [`Captures`] records the capturing group offsets found during a search. It +provides convenience routines for looking up capturing group offsets by either +index or name. +* [`GroupInfo`] records the mapping between capturing groups and "slots," +where the latter are how capturing groups are recorded during a regex search. +This also keeps a mapping from capturing group name to index, and capture +group index to name. A `GroupInfo` is used by `Captures` internally to +provide a convenient API. It is unlikely that you'll use a `GroupInfo` +directly, but for example, if you've compiled an Thompson NFA, then you can use +[`thompson::NFA::group_info`](crate::nfa::thompson::NFA::group_info) to get its +underlying `GroupInfo`. +*/ + +use alloc::{string::String, sync::Arc, vec, vec::Vec}; + +use crate::util::{ + interpolate, + primitives::{ + NonMaxUsize, PatternID, PatternIDError, PatternIDIter, SmallIndex, + }, + search::{Match, Span}, +}; + +/// The span offsets of capturing groups after a match has been found. +/// +/// This type represents the output of regex engines that can report the +/// offsets at which capturing groups matches or "submatches" occur. For +/// example, the [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM). When a match +/// occurs, it will at minimum contain the [`PatternID`] of the pattern that +/// matched. Depending upon how it was constructed, it may also contain the +/// start/end offsets of the entire match of the pattern and the start/end +/// offsets of each capturing group that participated in the match. +/// +/// Values of this type are always created for a specific [`GroupInfo`]. It is +/// unspecified behavior to use a `Captures` value in a search with any regex +/// engine that has a different `GroupInfo` than the one the `Captures` were +/// created with. +/// +/// # Constructors +/// +/// There are three constructors for this type that control what kind of +/// information is available upon a match: +/// +/// * [`Captures::all`]: Will store overall pattern match offsets in addition +/// to the offsets of capturing groups that participated in the match. +/// * [`Captures::matches`]: Will store only the overall pattern +/// match offsets. The offsets of capturing groups (even ones that participated +/// in the match) are not available. +/// * [`Captures::empty`]: Will only store the pattern ID that matched. No +/// match offsets are available at all. +/// +/// If you aren't sure which to choose, then pick the first one. The first one +/// is what convenience routines like, +/// [`PikeVM::create_captures`](crate::nfa::thompson::pikevm::PikeVM::create_captures), +/// will use automatically. +/// +/// The main difference between these choices is performance. Namely, if you +/// ask for _less_ information, then the execution of regex search may be able +/// to run more quickly. +/// +/// # Notes +/// +/// It is worth pointing out that this type is not coupled to any one specific +/// regex engine. Instead, its coupling is with [`GroupInfo`], which is the +/// thing that is responsible for mapping capturing groups to "slot" offsets. +/// Slot offsets are indices into a single sequence of memory at which matching +/// haystack offsets for the corresponding group are written by regex engines. +/// +/// # Example +/// +/// This example shows how to parse a simple date and extract the components of +/// the date via capturing groups: +/// +/// ``` +/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span}; +/// +/// let re = PikeVM::new(r"^([0-9]{4})-([0-9]{2})-([0-9]{2})$")?; +/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); +/// +/// re.captures(&mut cache, "2010-03-14", &mut caps); +/// assert!(caps.is_match()); +/// assert_eq!(Some(Span::from(0..4)), caps.get_group(1)); +/// assert_eq!(Some(Span::from(5..7)), caps.get_group(2)); +/// assert_eq!(Some(Span::from(8..10)), caps.get_group(3)); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: named capturing groups +/// +/// This example is like the one above, but leverages the ability to name +/// capturing groups in order to make the code a bit clearer: +/// +/// ``` +/// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span}; +/// +/// let re = PikeVM::new(r"^(?P[0-9]{4})-(?P[0-9]{2})-(?P[0-9]{2})$")?; +/// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); +/// +/// re.captures(&mut cache, "2010-03-14", &mut caps); +/// assert!(caps.is_match()); +/// assert_eq!(Some(Span::from(0..4)), caps.get_group_by_name("y")); +/// assert_eq!(Some(Span::from(5..7)), caps.get_group_by_name("m")); +/// assert_eq!(Some(Span::from(8..10)), caps.get_group_by_name("d")); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone)] +pub struct Captures { + /// The group info that these capture groups are coupled to. This is what + /// gives the "convenience" of the `Captures` API. Namely, it provides the + /// slot mapping and the name|-->index mapping for capture lookups by name. + group_info: GroupInfo, + /// The ID of the pattern that matched. Regex engines must set this to + /// None when no match occurs. + pid: Option, + /// The slot values, i.e., submatch offsets. + /// + /// In theory, the smallest sequence of slots would be something like + /// `max(groups(pattern) for pattern in regex) * 2`, but instead, we use + /// `sum(groups(pattern) for pattern in regex) * 2`. Why? + /// + /// Well, the former could be used in theory, because we don't generally + /// have any overlapping APIs that involve capturing groups. Therefore, + /// there's technically never any need to have slots set for multiple + /// patterns. However, this might change some day, in which case, we would + /// need to have slots available. + /// + /// The other reason is that during the execution of some regex engines, + /// there exists a point in time where multiple slots for different + /// patterns may be written to before knowing which pattern has matched. + /// Therefore, the regex engines themselves, in order to support multiple + /// patterns correctly, must have all slots available. If `Captures` + /// doesn't have all slots available, then regex engines can't write + /// directly into the caller provided `Captures` and must instead write + /// into some other storage and then copy the slots involved in the match + /// at the end of the search. + /// + /// So overall, at least as of the time of writing, it seems like the path + /// of least resistance is to just require allocating all possible slots + /// instead of the conceptual minimum. Another way to justify this is that + /// the most common case is a single pattern, in which case, there is no + /// inefficiency here since the 'max' and 'sum' calculations above are + /// equivalent in that case. + /// + /// N.B. The mapping from group index to slot is maintained by `GroupInfo` + /// and is considered an API guarantee. See `GroupInfo` for more details on + /// that mapping. + /// + /// N.B. `Option` has the same size as a `usize`. + slots: Vec>, +} + +impl Captures { + /// Create new storage for the offsets of all matching capturing groups. + /// + /// This routine provides the most information for matches---namely, the + /// spans of matching capturing groups---but also requires the regex search + /// routines to do the most work. + /// + /// It is unspecified behavior to use the returned `Captures` value in a + /// search with a `GroupInfo` other than the one that is provided to this + /// constructor. + /// + /// # Example + /// + /// This example shows that all capturing groups---but only ones that + /// participated in a match---are available to query after a match has + /// been found: + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::captures::Captures, + /// Span, Match, + /// }; + /// + /// let re = PikeVM::new( + /// r"^(?:(?P[a-z]+)|(?P[A-Z]+))(?P[0-9]+)$", + /// )?; + /// let mut cache = re.create_cache(); + /// let mut caps = Captures::all(re.get_nfa().group_info().clone()); + /// + /// re.captures(&mut cache, "ABC123", &mut caps); + /// assert!(caps.is_match()); + /// assert_eq!(Some(Match::must(0, 0..6)), caps.get_match()); + /// // The 'lower' group didn't match, so it won't have any offsets. + /// assert_eq!(None, caps.get_group_by_name("lower")); + /// assert_eq!(Some(Span::from(0..3)), caps.get_group_by_name("upper")); + /// assert_eq!(Some(Span::from(3..6)), caps.get_group_by_name("digits")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn all(group_info: GroupInfo) -> Captures { + let slots = group_info.slot_len(); + Captures { group_info, pid: None, slots: vec![None; slots] } + } + + /// Create new storage for only the full match spans of a pattern. This + /// does not include any capturing group offsets. + /// + /// It is unspecified behavior to use the returned `Captures` value in a + /// search with a `GroupInfo` other than the one that is provided to this + /// constructor. + /// + /// # Example + /// + /// This example shows that only overall match offsets are reported when + /// this constructor is used. Accessing any capturing groups other than + /// the 0th will always return `None`. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::captures::Captures, + /// Match, + /// }; + /// + /// let re = PikeVM::new( + /// r"^(?:(?P[a-z]+)|(?P[A-Z]+))(?P[0-9]+)$", + /// )?; + /// let mut cache = re.create_cache(); + /// let mut caps = Captures::matches(re.get_nfa().group_info().clone()); + /// + /// re.captures(&mut cache, "ABC123", &mut caps); + /// assert!(caps.is_match()); + /// assert_eq!(Some(Match::must(0, 0..6)), caps.get_match()); + /// // We didn't ask for capturing group offsets, so they aren't available. + /// assert_eq!(None, caps.get_group_by_name("lower")); + /// assert_eq!(None, caps.get_group_by_name("upper")); + /// assert_eq!(None, caps.get_group_by_name("digits")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn matches(group_info: GroupInfo) -> Captures { + // This is OK because we know there are at least this many slots, + // and GroupInfo construction guarantees that the number of slots fits + // into a usize. + let slots = group_info.pattern_len().checked_mul(2).unwrap(); + Captures { group_info, pid: None, slots: vec![None; slots] } + } + + /// Create new storage for only tracking which pattern matched. No offsets + /// are stored at all. + /// + /// It is unspecified behavior to use the returned `Captures` value in a + /// search with a `GroupInfo` other than the one that is provided to this + /// constructor. + /// + /// # Example + /// + /// This example shows that only the pattern that matched can be accessed + /// from a `Captures` value created via this constructor. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::captures::Captures, + /// PatternID, + /// }; + /// + /// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?; + /// let mut cache = re.create_cache(); + /// let mut caps = Captures::empty(re.get_nfa().group_info().clone()); + /// + /// re.captures(&mut cache, "aABCz", &mut caps); + /// assert!(caps.is_match()); + /// assert_eq!(Some(PatternID::must(0)), caps.pattern()); + /// // We didn't ask for any offsets, so they aren't available. + /// assert_eq!(None, caps.get_match()); + /// + /// re.captures(&mut cache, &"aABCz"[1..], &mut caps); + /// assert!(caps.is_match()); + /// assert_eq!(Some(PatternID::must(1)), caps.pattern()); + /// // We didn't ask for any offsets, so they aren't available. + /// assert_eq!(None, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn empty(group_info: GroupInfo) -> Captures { + Captures { group_info, pid: None, slots: vec![] } + } + + /// Returns true if and only if this capturing group represents a match. + /// + /// This is a convenience routine for `caps.pattern().is_some()`. + /// + /// # Example + /// + /// When using the PikeVM (for example), the lightest weight way of + /// detecting whether a match exists is to create capturing groups that + /// only track the ID of the pattern that match (if any): + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::captures::Captures, + /// }; + /// + /// let re = PikeVM::new(r"[a-z]+")?; + /// let mut cache = re.create_cache(); + /// let mut caps = Captures::empty(re.get_nfa().group_info().clone()); + /// + /// re.captures(&mut cache, "aABCz", &mut caps); + /// assert!(caps.is_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn is_match(&self) -> bool { + self.pid.is_some() + } + + /// Returns the identifier of the pattern that matched when this + /// capturing group represents a match. If no match was found, then this + /// always returns `None`. + /// + /// This returns a pattern ID in precisely the cases in which `is_match` + /// returns `true`. Similarly, the pattern ID returned is always the + /// same pattern ID found in the `Match` returned by `get_match`. + /// + /// # Example + /// + /// When using the PikeVM (for example), the lightest weight way of + /// detecting which pattern matched is to create capturing groups that only + /// track the ID of the pattern that match (if any): + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::captures::Captures, + /// PatternID, + /// }; + /// + /// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?; + /// let mut cache = re.create_cache(); + /// let mut caps = Captures::empty(re.get_nfa().group_info().clone()); + /// + /// re.captures(&mut cache, "ABC", &mut caps); + /// assert_eq!(Some(PatternID::must(1)), caps.pattern()); + /// // Recall that offsets are only available when using a non-empty + /// // Captures value. So even though a match occurred, this returns None! + /// assert_eq!(None, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn pattern(&self) -> Option { + self.pid + } + + /// Returns the pattern ID and the span of the match, if one occurred. + /// + /// This always returns `None` when `Captures` was created with + /// [`Captures::empty`], even if a match was found. + /// + /// If this routine returns a non-`None` value, then `is_match` is + /// guaranteed to return `true` and `pattern` is also guaranteed to return + /// a non-`None` value. + /// + /// # Example + /// + /// This example shows how to get the full match from a search: + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match}; + /// + /// let re = PikeVM::new_many(&[r"[a-z]+", r"[A-Z]+"])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "ABC", &mut caps); + /// assert_eq!(Some(Match::must(1, 0..3)), caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn get_match(&self) -> Option { + Some(Match::new(self.pattern()?, self.get_group(0)?)) + } + + /// Returns the span of a capturing group match corresponding to the group + /// index given, only if both the overall pattern matched and the capturing + /// group participated in that match. + /// + /// This returns `None` if `index` is invalid. `index` is valid if and only + /// if it's less than [`Captures::group_len`] for the matching pattern. + /// + /// This always returns `None` when `Captures` was created with + /// [`Captures::empty`], even if a match was found. This also always + /// returns `None` for any `index > 0` when `Captures` was created with + /// [`Captures::matches`]. + /// + /// If this routine returns a non-`None` value, then `is_match` is + /// guaranteed to return `true`, `pattern` is guaranteed to return a + /// non-`None` value and `get_match` is guaranteed to return a non-`None` + /// value. + /// + /// By convention, the 0th capture group will always return the same + /// span as the span returned by `get_match`. This is because the 0th + /// capture group always corresponds to the entirety of the pattern's + /// match. (It is similarly always unnamed because it is implicit.) This + /// isn't necessarily true of all regex engines. For example, one can + /// hand-compile a [`thompson::NFA`](crate::nfa::thompson::NFA) via a + /// [`thompson::Builder`](crate::nfa::thompson::Builder), which isn't + /// technically forced to make the 0th capturing group always correspond to + /// the entire match. + /// + /// # Example + /// + /// This example shows how to get the capturing groups, by index, from a + /// match: + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span, Match}; + /// + /// let re = PikeVM::new(r"^(?P\pL+)\s+(?P\pL+)$")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Bruce Springsteen", &mut caps); + /// assert_eq!(Some(Match::must(0, 0..17)), caps.get_match()); + /// assert_eq!(Some(Span::from(0..5)), caps.get_group(1)); + /// assert_eq!(Some(Span::from(6..17)), caps.get_group(2)); + /// // Looking for a non-existent capturing group will return None: + /// assert_eq!(None, caps.get_group(3)); + /// # // literals are too big for 32-bit usize: #1039 + /// # #[cfg(target_pointer_width = "64")] + /// assert_eq!(None, caps.get_group(9944060567225171988)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn get_group(&self, index: usize) -> Option { + let pid = self.pattern()?; + // There's a little bit of work needed to map captures to slots in the + // fully general case. But in the overwhelming common case of a single + // pattern, we can just do some simple arithmetic. + let (slot_start, slot_end) = if self.group_info().pattern_len() == 1 { + (index.checked_mul(2)?, index.checked_mul(2)?.checked_add(1)?) + } else { + self.group_info().slots(pid, index)? + }; + let start = self.slots.get(slot_start).copied()??; + let end = self.slots.get(slot_end).copied()??; + Some(Span { start: start.get(), end: end.get() }) + } + + /// Returns the span of a capturing group match corresponding to the group + /// name given, only if both the overall pattern matched and the capturing + /// group participated in that match. + /// + /// This returns `None` if `name` does not correspond to a valid capturing + /// group for the pattern that matched. + /// + /// This always returns `None` when `Captures` was created with + /// [`Captures::empty`], even if a match was found. This also always + /// returns `None` for any `index > 0` when `Captures` was created with + /// [`Captures::matches`]. + /// + /// If this routine returns a non-`None` value, then `is_match` is + /// guaranteed to return `true`, `pattern` is guaranteed to return a + /// non-`None` value and `get_match` is guaranteed to return a non-`None` + /// value. + /// + /// # Example + /// + /// This example shows how to get the capturing groups, by name, from a + /// match: + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span, Match}; + /// + /// let re = PikeVM::new(r"^(?P\pL+)\s+(?P\pL+)$")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Bruce Springsteen", &mut caps); + /// assert_eq!(Some(Match::must(0, 0..17)), caps.get_match()); + /// assert_eq!(Some(Span::from(0..5)), caps.get_group_by_name("first")); + /// assert_eq!(Some(Span::from(6..17)), caps.get_group_by_name("last")); + /// // Looking for a non-existent capturing group will return None: + /// assert_eq!(None, caps.get_group_by_name("middle")); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn get_group_by_name(&self, name: &str) -> Option { + let index = self.group_info().to_index(self.pattern()?, name)?; + self.get_group(index) + } + + /// Returns an iterator of possible spans for every capturing group in the + /// matching pattern. + /// + /// If this `Captures` value does not correspond to a match, then the + /// iterator returned yields no elements. + /// + /// Note that the iterator returned yields elements of type `Option`. + /// A span is present if and only if it corresponds to a capturing group + /// that participated in a match. + /// + /// # Example + /// + /// This example shows how to collect all capturing groups: + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span}; + /// + /// let re = PikeVM::new( + /// // Matches first/last names, with an optional middle name. + /// r"^(?P\pL+)\s+(?:(?P\pL+)\s+)?(?P\pL+)$", + /// )?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Harry James Potter", &mut caps); + /// assert!(caps.is_match()); + /// let groups: Vec> = caps.iter().collect(); + /// assert_eq!(groups, vec![ + /// Some(Span::from(0..18)), + /// Some(Span::from(0..5)), + /// Some(Span::from(6..11)), + /// Some(Span::from(12..18)), + /// ]); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// This example uses the same regex as the previous example, but with a + /// haystack that omits the middle name. This results in a capturing group + /// that is present in the elements yielded by the iterator but without a + /// match: + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Span}; + /// + /// let re = PikeVM::new( + /// // Matches first/last names, with an optional middle name. + /// r"^(?P\pL+)\s+(?:(?P\pL+)\s+)?(?P\pL+)$", + /// )?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Harry Potter", &mut caps); + /// assert!(caps.is_match()); + /// let groups: Vec> = caps.iter().collect(); + /// assert_eq!(groups, vec![ + /// Some(Span::from(0..12)), + /// Some(Span::from(0..5)), + /// None, + /// Some(Span::from(6..12)), + /// ]); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn iter(&self) -> CapturesPatternIter<'_> { + let names = self + .pattern() + .map_or(GroupInfoPatternNames::empty().enumerate(), |pid| { + self.group_info().pattern_names(pid).enumerate() + }); + CapturesPatternIter { caps: self, names } + } + + /// Return the total number of capturing groups for the matching pattern. + /// + /// If this `Captures` value does not correspond to a match, then this + /// always returns `0`. + /// + /// This always returns the same number of elements yielded by + /// [`Captures::iter`]. That is, the number includes capturing groups even + /// if they don't participate in the match. + /// + /// # Example + /// + /// This example shows how to count the total number of capturing groups + /// associated with a pattern. Notice that it includes groups that did not + /// participate in a match (just like `Captures::iter` does). + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::new( + /// // Matches first/last names, with an optional middle name. + /// r"^(?P\pL+)\s+(?:(?P\pL+)\s+)?(?P\pL+)$", + /// )?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Harry Potter", &mut caps); + /// assert_eq!(4, caps.group_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn group_len(&self) -> usize { + let pid = match self.pattern() { + None => return 0, + Some(pid) => pid, + }; + self.group_info().group_len(pid) + } + + /// Returns a reference to the underlying group info on which these + /// captures are based. + /// + /// The difference between `GroupInfo` and `Captures` is that the former + /// defines the structure of capturing groups where as the latter is what + /// stores the actual match information. So where as `Captures` only gives + /// you access to the current match, `GroupInfo` lets you query any + /// information about all capturing groups, even ones for patterns that + /// weren't involved in a match. + /// + /// Note that a `GroupInfo` uses reference counting internally, so it may + /// be cloned cheaply. + /// + /// # Example + /// + /// This example shows how to get all capturing group names from the + /// underlying `GroupInfo`. Notice that we don't even need to run a + /// search. + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID}; + /// + /// let re = PikeVM::new_many(&[ + /// r"(?Pa)", + /// r"(a)(b)", + /// r"ab", + /// r"(?Pa)(?Pa)", + /// r"(?Pz)", + /// ])?; + /// let caps = re.create_captures(); + /// + /// let expected = vec![ + /// (PatternID::must(0), 0, None), + /// (PatternID::must(0), 1, Some("foo")), + /// (PatternID::must(1), 0, None), + /// (PatternID::must(1), 1, None), + /// (PatternID::must(1), 2, None), + /// (PatternID::must(2), 0, None), + /// (PatternID::must(3), 0, None), + /// (PatternID::must(3), 1, Some("bar")), + /// (PatternID::must(3), 2, Some("quux")), + /// (PatternID::must(4), 0, None), + /// (PatternID::must(4), 1, Some("foo")), + /// ]; + /// // We could also just use 're.get_nfa().group_info()'. + /// let got: Vec<(PatternID, usize, Option<&str>)> = + /// caps.group_info().all_names().collect(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn group_info(&self) -> &GroupInfo { + &self.group_info + } + + /// Interpolates the capture references in `replacement` with the + /// corresponding substrings in `haystack` matched by each reference. The + /// interpolated string is returned. + /// + /// See the [`interpolate` module](interpolate) for documentation on the + /// format of the replacement string. + /// + /// # Example + /// + /// This example shows how to use interpolation, and also shows how it + /// can work with multi-pattern regexes. + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID}; + /// + /// let re = PikeVM::new_many(&[ + /// r"(?[0-9]{2})-(?[0-9]{2})-(?[0-9]{4})", + /// r"(?[0-9]{4})-(?[0-9]{2})-(?[0-9]{2})", + /// ])?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// let replacement = "year=$year, month=$month, day=$day"; + /// + /// // This matches the first pattern. + /// let hay = "On 14-03-2010, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let result = caps.interpolate_string(hay, replacement); + /// assert_eq!("year=2010, month=03, day=14", result); + /// + /// // And this matches the second pattern. + /// let hay = "On 2010-03-14, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let result = caps.interpolate_string(hay, replacement); + /// assert_eq!("year=2010, month=03, day=14", result); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn interpolate_string( + &self, + haystack: &str, + replacement: &str, + ) -> String { + let mut dst = String::new(); + self.interpolate_string_into(haystack, replacement, &mut dst); + dst + } + + /// Interpolates the capture references in `replacement` with the + /// corresponding substrings in `haystack` matched by each reference. The + /// interpolated string is written to `dst`. + /// + /// See the [`interpolate` module](interpolate) for documentation on the + /// format of the replacement string. + /// + /// # Example + /// + /// This example shows how to use interpolation, and also shows how it + /// can work with multi-pattern regexes. + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID}; + /// + /// let re = PikeVM::new_many(&[ + /// r"(?[0-9]{2})-(?[0-9]{2})-(?[0-9]{4})", + /// r"(?[0-9]{4})-(?[0-9]{2})-(?[0-9]{2})", + /// ])?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// let replacement = "year=$year, month=$month, day=$day"; + /// + /// // This matches the first pattern. + /// let hay = "On 14-03-2010, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let mut dst = String::new(); + /// caps.interpolate_string_into(hay, replacement, &mut dst); + /// assert_eq!("year=2010, month=03, day=14", dst); + /// + /// // And this matches the second pattern. + /// let hay = "On 2010-03-14, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let mut dst = String::new(); + /// caps.interpolate_string_into(hay, replacement, &mut dst); + /// assert_eq!("year=2010, month=03, day=14", dst); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn interpolate_string_into( + &self, + haystack: &str, + replacement: &str, + dst: &mut String, + ) { + interpolate::string( + replacement, + |index, dst| { + let span = match self.get_group(index) { + None => return, + Some(span) => span, + }; + dst.push_str(&haystack[span]); + }, + |name| self.group_info().to_index(self.pattern()?, name), + dst, + ); + } + + /// Interpolates the capture references in `replacement` with the + /// corresponding substrings in `haystack` matched by each reference. The + /// interpolated byte string is returned. + /// + /// See the [`interpolate` module](interpolate) for documentation on the + /// format of the replacement string. + /// + /// # Example + /// + /// This example shows how to use interpolation, and also shows how it + /// can work with multi-pattern regexes. + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID}; + /// + /// let re = PikeVM::new_many(&[ + /// r"(?[0-9]{2})-(?[0-9]{2})-(?[0-9]{4})", + /// r"(?[0-9]{4})-(?[0-9]{2})-(?[0-9]{2})", + /// ])?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// let replacement = b"year=$year, month=$month, day=$day"; + /// + /// // This matches the first pattern. + /// let hay = b"On 14-03-2010, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let result = caps.interpolate_bytes(hay, replacement); + /// assert_eq!(&b"year=2010, month=03, day=14"[..], result); + /// + /// // And this matches the second pattern. + /// let hay = b"On 2010-03-14, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let result = caps.interpolate_bytes(hay, replacement); + /// assert_eq!(&b"year=2010, month=03, day=14"[..], result); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn interpolate_bytes( + &self, + haystack: &[u8], + replacement: &[u8], + ) -> Vec { + let mut dst = vec![]; + self.interpolate_bytes_into(haystack, replacement, &mut dst); + dst + } + + /// Interpolates the capture references in `replacement` with the + /// corresponding substrings in `haystack` matched by each reference. The + /// interpolated byte string is written to `dst`. + /// + /// See the [`interpolate` module](interpolate) for documentation on the + /// format of the replacement string. + /// + /// # Example + /// + /// This example shows how to use interpolation, and also shows how it + /// can work with multi-pattern regexes. + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, PatternID}; + /// + /// let re = PikeVM::new_many(&[ + /// r"(?[0-9]{2})-(?[0-9]{2})-(?[0-9]{4})", + /// r"(?[0-9]{4})-(?[0-9]{2})-(?[0-9]{2})", + /// ])?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// let replacement = b"year=$year, month=$month, day=$day"; + /// + /// // This matches the first pattern. + /// let hay = b"On 14-03-2010, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let mut dst = vec![]; + /// caps.interpolate_bytes_into(hay, replacement, &mut dst); + /// assert_eq!(&b"year=2010, month=03, day=14"[..], dst); + /// + /// // And this matches the second pattern. + /// let hay = b"On 2010-03-14, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// let mut dst = vec![]; + /// caps.interpolate_bytes_into(hay, replacement, &mut dst); + /// assert_eq!(&b"year=2010, month=03, day=14"[..], dst); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn interpolate_bytes_into( + &self, + haystack: &[u8], + replacement: &[u8], + dst: &mut Vec, + ) { + interpolate::bytes( + replacement, + |index, dst| { + let span = match self.get_group(index) { + None => return, + Some(span) => span, + }; + dst.extend_from_slice(&haystack[span]); + }, + |name| self.group_info().to_index(self.pattern()?, name), + dst, + ); + } + + /// This is a convenience routine for extracting the substrings + /// corresponding to matching capture groups in the given `haystack`. The + /// `haystack` should be the same substring used to find the match spans in + /// this `Captures` value. + /// + /// This is identical to [`Captures::extract_bytes`], except it works with + /// `&str` instead of `&[u8]`. + /// + /// # Panics + /// + /// This panics if the number of explicit matching groups in this + /// `Captures` value is less than `N`. This also panics if this `Captures` + /// value does not correspond to a match. + /// + /// Note that this does *not* panic if the number of explicit matching + /// groups is bigger than `N`. In that case, only the first `N` matching + /// groups are extracted. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})")?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// let hay = "On 2010-03-14, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// assert!(caps.is_match()); + /// let (full, [year, month, day]) = caps.extract(hay); + /// assert_eq!("2010-03-14", full); + /// assert_eq!("2010", year); + /// assert_eq!("03", month); + /// assert_eq!("14", day); + /// + /// // We can also ask for fewer than all capture groups. + /// let (full, [year]) = caps.extract(hay); + /// assert_eq!("2010-03-14", full); + /// assert_eq!("2010", year); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn extract<'h, const N: usize>( + &self, + haystack: &'h str, + ) -> (&'h str, [&'h str; N]) { + let mut matched = self.iter().flatten(); + let whole_match = &haystack[matched.next().expect("a match")]; + let group_matches = [0; N].map(|_| { + let sp = matched.next().expect("too few matching groups"); + &haystack[sp] + }); + (whole_match, group_matches) + } + + /// This is a convenience routine for extracting the substrings + /// corresponding to matching capture groups in the given `haystack`. The + /// `haystack` should be the same substring used to find the match spans in + /// this `Captures` value. + /// + /// This is identical to [`Captures::extract`], except it works with + /// `&[u8]` instead of `&str`. + /// + /// # Panics + /// + /// This panics if the number of explicit matching groups in this + /// `Captures` value is less than `N`. This also panics if this `Captures` + /// value does not correspond to a match. + /// + /// Note that this does *not* panic if the number of explicit matching + /// groups is bigger than `N`. In that case, only the first `N` matching + /// groups are extracted. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})")?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// let hay = b"On 2010-03-14, I became a Tenneessee lamb."; + /// re.captures(&mut cache, hay, &mut caps); + /// assert!(caps.is_match()); + /// let (full, [year, month, day]) = caps.extract_bytes(hay); + /// assert_eq!(b"2010-03-14", full); + /// assert_eq!(b"2010", year); + /// assert_eq!(b"03", month); + /// assert_eq!(b"14", day); + /// + /// // We can also ask for fewer than all capture groups. + /// let (full, [year]) = caps.extract_bytes(hay); + /// assert_eq!(b"2010-03-14", full); + /// assert_eq!(b"2010", year); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn extract_bytes<'h, const N: usize>( + &self, + haystack: &'h [u8], + ) -> (&'h [u8], [&'h [u8]; N]) { + let mut matched = self.iter().flatten(); + let whole_match = &haystack[matched.next().expect("a match")]; + let group_matches = [0; N].map(|_| { + let sp = matched.next().expect("too few matching groups"); + &haystack[sp] + }); + (whole_match, group_matches) + } +} + +/// Lower level "slot" oriented APIs. One does not typically need to use these +/// when executing a search. They are instead mostly intended for folks that +/// are writing their own regex engine while reusing this `Captures` type. +impl Captures { + /// Clear this `Captures` value. + /// + /// After clearing, all slots inside this `Captures` value will be set to + /// `None`. Similarly, any pattern ID that it was previously associated + /// with (for a match) is erased. + /// + /// It is not usually necessary to call this routine. Namely, a `Captures` + /// value only provides high level access to the capturing groups of the + /// pattern that matched, and only low level access to individual slots. + /// Thus, even if slots corresponding to groups that aren't associated + /// with the matching pattern are set, then it won't impact the higher + /// level APIs. Namely, higher level APIs like [`Captures::get_group`] will + /// return `None` if no pattern ID is present, even if there are spans set + /// in the underlying slots. + /// + /// Thus, to "clear" a `Captures` value of a match, it is usually only + /// necessary to call [`Captures::set_pattern`] with `None`. + /// + /// # Example + /// + /// This example shows what happens when a `Captures` value is cleared. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::new(r"^(?P\pL+)\s+(?P\pL+)$")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Bruce Springsteen", &mut caps); + /// assert!(caps.is_match()); + /// let slots: Vec> = + /// caps.slots().iter().map(|s| s.map(|x| x.get())).collect(); + /// // Note that the following ordering is considered an API guarantee. + /// assert_eq!(slots, vec![ + /// Some(0), + /// Some(17), + /// Some(0), + /// Some(5), + /// Some(6), + /// Some(17), + /// ]); + /// + /// // Now clear the slots. Everything is gone and it is no longer a match. + /// caps.clear(); + /// assert!(!caps.is_match()); + /// let slots: Vec> = + /// caps.slots().iter().map(|s| s.map(|x| x.get())).collect(); + /// assert_eq!(slots, vec![ + /// None, + /// None, + /// None, + /// None, + /// None, + /// None, + /// ]); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn clear(&mut self) { + self.pid = None; + for slot in self.slots.iter_mut() { + *slot = None; + } + } + + /// Set the pattern on this `Captures` value. + /// + /// When the pattern ID is `None`, then this `Captures` value does not + /// correspond to a match (`is_match` will return `false`). Otherwise, it + /// corresponds to a match. + /// + /// This is useful in search implementations where you might want to + /// initially call `set_pattern(None)` in order to avoid the cost of + /// calling `clear()` if it turns out to not be necessary. + /// + /// # Example + /// + /// This example shows that `set_pattern` merely overwrites the pattern ID. + /// It does not actually change the underlying slot values. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::nfa::thompson::pikevm::PikeVM; + /// + /// let re = PikeVM::new(r"^(?P\pL+)\s+(?P\pL+)$")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "Bruce Springsteen", &mut caps); + /// assert!(caps.is_match()); + /// assert!(caps.pattern().is_some()); + /// let slots: Vec> = + /// caps.slots().iter().map(|s| s.map(|x| x.get())).collect(); + /// // Note that the following ordering is considered an API guarantee. + /// assert_eq!(slots, vec![ + /// Some(0), + /// Some(17), + /// Some(0), + /// Some(5), + /// Some(6), + /// Some(17), + /// ]); + /// + /// // Now set the pattern to None. Note that the slot values remain. + /// caps.set_pattern(None); + /// assert!(!caps.is_match()); + /// assert!(!caps.pattern().is_some()); + /// let slots: Vec> = + /// caps.slots().iter().map(|s| s.map(|x| x.get())).collect(); + /// // Note that the following ordering is considered an API guarantee. + /// assert_eq!(slots, vec![ + /// Some(0), + /// Some(17), + /// Some(0), + /// Some(5), + /// Some(6), + /// Some(17), + /// ]); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn set_pattern(&mut self, pid: Option) { + self.pid = pid; + } + + /// Returns the underlying slots, where each slot stores a single offset. + /// + /// Every matching capturing group generally corresponds to two slots: one + /// slot for the starting position and another for the ending position. + /// Typically, either both are present or neither are. (The weasel word + /// "typically" is used here because it really depends on the regex engine + /// implementation. Every sensible regex engine likely adheres to this + /// invariant, and every regex engine in this crate is sensible.) + /// + /// Generally speaking, callers should prefer to use higher level routines + /// like [`Captures::get_match`] or [`Captures::get_group`]. + /// + /// An important note here is that a regex engine may not reset all of the + /// slots to `None` values when no match occurs, or even when a match of + /// a different pattern occurs. But this depends on how the regex engine + /// implementation deals with slots. + /// + /// # Example + /// + /// This example shows how to get the underlying slots from a regex match. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::primitives::{PatternID, NonMaxUsize}, + /// }; + /// + /// let re = PikeVM::new_many(&[ + /// r"[a-z]+", + /// r"[0-9]+", + /// ])?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// re.captures(&mut cache, "123", &mut caps); + /// assert_eq!(Some(PatternID::must(1)), caps.pattern()); + /// // Note that the only guarantee we have here is that slots 2 and 3 + /// // are set to correct values. The contents of the first two slots are + /// // unspecified since the 0th pattern did not match. + /// let expected = &[ + /// None, + /// None, + /// NonMaxUsize::new(0), + /// NonMaxUsize::new(3), + /// ]; + /// assert_eq!(expected, caps.slots()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn slots(&self) -> &[Option] { + &self.slots + } + + /// Returns the underlying slots as a mutable slice, where each slot stores + /// a single offset. + /// + /// This tends to be most useful for regex engine implementations for + /// writing offsets for matching capturing groups to slots. + /// + /// See [`Captures::slots`] for more information about slots. + #[inline] + pub fn slots_mut(&mut self) -> &mut [Option] { + &mut self.slots + } +} + +impl core::fmt::Debug for Captures { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut dstruct = f.debug_struct("Captures"); + dstruct.field("pid", &self.pid); + if let Some(pid) = self.pid { + dstruct.field("spans", &CapturesDebugMap { pid, caps: self }); + } + dstruct.finish() + } +} + +/// A little helper type to provide a nice map-like debug representation for +/// our capturing group spans. +struct CapturesDebugMap<'a> { + pid: PatternID, + caps: &'a Captures, +} + +impl<'a> core::fmt::Debug for CapturesDebugMap<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + struct Key<'a>(usize, Option<&'a str>); + + impl<'a> core::fmt::Debug for Key<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{}", self.0)?; + if let Some(name) = self.1 { + write!(f, "/{:?}", name)?; + } + Ok(()) + } + } + + let mut map = f.debug_map(); + let names = self.caps.group_info().pattern_names(self.pid); + for (group_index, maybe_name) in names.enumerate() { + let key = Key(group_index, maybe_name); + match self.caps.get_group(group_index) { + None => map.entry(&key, &None::<()>), + Some(span) => map.entry(&key, &span), + }; + } + map.finish() + } +} + +/// An iterator over all capturing groups in a `Captures` value. +/// +/// This iterator includes capturing groups that did not participate in a +/// match. See the [`Captures::iter`] method documentation for more details +/// and examples. +/// +/// The lifetime parameter `'a` refers to the lifetime of the underlying +/// `Captures` value. +#[derive(Clone, Debug)] +pub struct CapturesPatternIter<'a> { + caps: &'a Captures, + names: core::iter::Enumerate>, +} + +impl<'a> Iterator for CapturesPatternIter<'a> { + type Item = Option; + + fn next(&mut self) -> Option> { + let (group_index, _) = self.names.next()?; + Some(self.caps.get_group(group_index)) + } + + fn size_hint(&self) -> (usize, Option) { + self.names.size_hint() + } + + fn count(self) -> usize { + self.names.count() + } +} + +impl<'a> ExactSizeIterator for CapturesPatternIter<'a> {} +impl<'a> core::iter::FusedIterator for CapturesPatternIter<'a> {} + +/// Represents information about capturing groups in a compiled regex. +/// +/// The information encapsulated by this type consists of the following. For +/// each pattern: +/// +/// * A map from every capture group name to its corresponding capture group +/// index. +/// * A map from every capture group index to its corresponding capture group +/// name. +/// * A map from capture group index to its corresponding slot index. A slot +/// refers to one half of a capturing group. That is, a capture slot is either +/// the start or end of a capturing group. A slot is usually the mechanism +/// by which a regex engine records offsets for each capturing group during a +/// search. +/// +/// A `GroupInfo` uses reference counting internally and is thus cheap to +/// clone. +/// +/// # Mapping from capture groups to slots +/// +/// One of the main responsibilities of a `GroupInfo` is to build a mapping +/// from `(PatternID, u32)` (where the `u32` is a capture index) to something +/// called a "slot." As mentioned above, a slot refers to one half of a +/// capturing group. Both combined provide the start and end offsets of +/// a capturing group that participated in a match. +/// +/// **The mapping between group indices and slots is an API guarantee.** That +/// is, the mapping won't change within a semver compatible release. +/// +/// Slots exist primarily because this is a convenient mechanism by which +/// regex engines report group offsets at search time. For example, the +/// [`nfa::thompson::State::Capture`](crate::nfa::thompson::State::Capture) +/// NFA state includes the slot index. When a regex engine transitions through +/// this state, it will likely use the slot index to write the current haystack +/// offset to some region of memory. When a match is found, those slots are +/// then reported to the caller, typically via a convenient abstraction like a +/// [`Captures`] value. +/// +/// Because this crate provides first class support for multi-pattern regexes, +/// and because of some performance related reasons, the mapping between +/// capturing groups and slots is a little complex. However, in the case of a +/// single pattern, the mapping can be described very simply: for all capture +/// group indices `i`, its corresponding slots are at `i * 2` and `i * 2 + 1`. +/// Notice that the pattern ID isn't involved at all here, because it only +/// applies to a single-pattern regex, it is therefore always `0`. +/// +/// In the multi-pattern case, the mapping is a bit more complicated. To talk +/// about it, we must define what we mean by "implicit" vs "explicit" +/// capturing groups: +/// +/// * An **implicit** capturing group refers to the capturing group that is +/// present for every pattern automatically, and corresponds to the overall +/// match of a pattern. Every pattern has precisely one implicit capturing +/// group. It is always unnamed and it always corresponds to the capture group +/// index `0`. +/// * An **explicit** capturing group refers to any capturing group that +/// appears in the concrete syntax of the pattern. (Or, if an NFA was hand +/// built without any concrete syntax, it refers to any capturing group with an +/// index greater than `0`.) +/// +/// Some examples: +/// +/// * `\w+` has one implicit capturing group and zero explicit capturing +/// groups. +/// * `(\w+)` has one implicit group and one explicit group. +/// * `foo(\d+)(?:\pL+)(\d+)` has one implicit group and two explicit groups. +/// +/// Turning back to the slot mapping, we can now state it as follows: +/// +/// * Given a pattern ID `pid`, the slots for its implicit group are always +/// at `pid * 2` and `pid * 2 + 1`. +/// * Given a pattern ID `0`, the slots for its explicit groups start +/// at `group_info.pattern_len() * 2`. +/// * Given a pattern ID `pid > 0`, the slots for its explicit groups start +/// immediately following where the slots for the explicit groups of `pid - 1` +/// end. +/// +/// In particular, while there is a concrete formula one can use to determine +/// where the slots for the implicit group of any pattern are, there is no +/// general formula for determining where the slots for explicit capturing +/// groups are. This is because each pattern can contain a different number +/// of groups. +/// +/// The intended way of getting the slots for a particular capturing group +/// (whether implicit or explicit) is via the [`GroupInfo::slot`] or +/// [`GroupInfo::slots`] method. +/// +/// See below for a concrete example of how capturing groups get mapped to +/// slots. +/// +/// # Example +/// +/// This example shows how to build a new `GroupInfo` and query it for +/// information. +/// +/// ``` +/// use regex_automata::util::{captures::GroupInfo, primitives::PatternID}; +/// +/// let info = GroupInfo::new(vec![ +/// vec![None, Some("foo")], +/// vec![None], +/// vec![None, None, None, Some("bar"), None], +/// vec![None, None, Some("foo")], +/// ])?; +/// // The number of patterns being tracked. +/// assert_eq!(4, info.pattern_len()); +/// // We can query the number of groups for any pattern. +/// assert_eq!(2, info.group_len(PatternID::must(0))); +/// assert_eq!(1, info.group_len(PatternID::must(1))); +/// assert_eq!(5, info.group_len(PatternID::must(2))); +/// assert_eq!(3, info.group_len(PatternID::must(3))); +/// // An invalid pattern always has zero groups. +/// assert_eq!(0, info.group_len(PatternID::must(999))); +/// // 2 slots per group +/// assert_eq!(22, info.slot_len()); +/// +/// // We can map a group index for a particular pattern to its name, if +/// // one exists. +/// assert_eq!(Some("foo"), info.to_name(PatternID::must(3), 2)); +/// assert_eq!(None, info.to_name(PatternID::must(2), 4)); +/// // Or map a name to its group index. +/// assert_eq!(Some(1), info.to_index(PatternID::must(0), "foo")); +/// assert_eq!(Some(2), info.to_index(PatternID::must(3), "foo")); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Example: mapping from capture groups to slots +/// +/// This example shows the specific mapping from capture group indices for +/// each pattern to their corresponding slots. The slot values shown in this +/// example are considered an API guarantee. +/// +/// ``` +/// use regex_automata::util::{captures::GroupInfo, primitives::PatternID}; +/// +/// let info = GroupInfo::new(vec![ +/// vec![None, Some("foo")], +/// vec![None], +/// vec![None, None, None, Some("bar"), None], +/// vec![None, None, Some("foo")], +/// ])?; +/// +/// // We first show the slots for each pattern's implicit group. +/// assert_eq!(Some((0, 1)), info.slots(PatternID::must(0), 0)); +/// assert_eq!(Some((2, 3)), info.slots(PatternID::must(1), 0)); +/// assert_eq!(Some((4, 5)), info.slots(PatternID::must(2), 0)); +/// assert_eq!(Some((6, 7)), info.slots(PatternID::must(3), 0)); +/// +/// // And now we show the slots for each pattern's explicit group. +/// assert_eq!(Some((8, 9)), info.slots(PatternID::must(0), 1)); +/// assert_eq!(Some((10, 11)), info.slots(PatternID::must(2), 1)); +/// assert_eq!(Some((12, 13)), info.slots(PatternID::must(2), 2)); +/// assert_eq!(Some((14, 15)), info.slots(PatternID::must(2), 3)); +/// assert_eq!(Some((16, 17)), info.slots(PatternID::must(2), 4)); +/// assert_eq!(Some((18, 19)), info.slots(PatternID::must(3), 1)); +/// assert_eq!(Some((20, 21)), info.slots(PatternID::must(3), 2)); +/// +/// // Asking for the slots for an invalid pattern ID or even for an invalid +/// // group index for a specific pattern will return None. So for example, +/// // you're guaranteed to not get the slots for a different pattern than the +/// // one requested. +/// assert_eq!(None, info.slots(PatternID::must(5), 0)); +/// assert_eq!(None, info.slots(PatternID::must(1), 1)); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug, Default)] +pub struct GroupInfo(Arc); + +impl GroupInfo { + /// Creates a new group info from a sequence of patterns, where each + /// sequence of patterns yields a sequence of possible group names. The + /// index of each pattern in the sequence corresponds to its `PatternID`, + /// and the index of each group in each pattern's sequence corresponds to + /// its corresponding group index. + /// + /// While this constructor is very generic and therefore perhaps hard to + /// chew on, an example of a valid concrete type that can be passed to + /// this constructor is `Vec>>`. The outer `Vec` + /// corresponds to the patterns, i.e., one `Vec>` per + /// pattern. The inner `Vec` corresponds to the capturing groups for + /// each pattern. The `Option` corresponds to the name of the + /// capturing group, if present. + /// + /// It is legal to pass an empty iterator to this constructor. It will + /// return an empty group info with zero slots. An empty group info is + /// useful for cases where you have no patterns or for cases where slots + /// aren't being used at all (e.g., for most DFAs in this crate). + /// + /// # Errors + /// + /// This constructor returns an error if the given capturing groups are + /// invalid in some way. Those reasons include, but are not necessarily + /// limited to: + /// + /// * Too many patterns (i.e., `PatternID` would overflow). + /// * Too many capturing groups (e.g., `u32` would overflow). + /// * A pattern is given that has no capturing groups. (All patterns must + /// have at least an implicit capturing group at index `0`.) + /// * The capturing group at index `0` has a name. It must be unnamed. + /// * There are duplicate capturing group names within the same pattern. + /// (Multiple capturing groups with the same name may exist, but they + /// must be in different patterns.) + /// + /// An example below shows how to trigger some of the above error + /// conditions. + /// + /// # Example + /// + /// This example shows how to build a new `GroupInfo` and query it for + /// information. + /// + /// ``` + /// use regex_automata::util::captures::GroupInfo; + /// + /// let info = GroupInfo::new(vec![ + /// vec![None, Some("foo")], + /// vec![None], + /// vec![None, None, None, Some("bar"), None], + /// vec![None, None, Some("foo")], + /// ])?; + /// // The number of patterns being tracked. + /// assert_eq!(4, info.pattern_len()); + /// // 2 slots per group + /// assert_eq!(22, info.slot_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: empty `GroupInfo` + /// + /// This example shows how to build a new `GroupInfo` and query it for + /// information. + /// + /// ``` + /// use regex_automata::util::captures::GroupInfo; + /// + /// let info = GroupInfo::empty(); + /// // Everything is zero. + /// assert_eq!(0, info.pattern_len()); + /// assert_eq!(0, info.slot_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// # Example: error conditions + /// + /// This example shows how to provoke some of the ways in which building + /// a `GroupInfo` can fail. + /// + /// ``` + /// use regex_automata::util::captures::GroupInfo; + /// + /// // Either the group info is empty, or all patterns must have at least + /// // one capturing group. + /// assert!(GroupInfo::new(vec![ + /// vec![None, Some("a")], // ok + /// vec![None], // ok + /// vec![], // not ok + /// ]).is_err()); + /// // Note that building an empty group info is OK. + /// assert!(GroupInfo::new(Vec::>>::new()).is_ok()); + /// + /// // The first group in each pattern must correspond to an implicit + /// // anonymous group. i.e., One that is not named. By convention, this + /// // group corresponds to the overall match of a regex. Every other group + /// // in a pattern is explicit and optional. + /// assert!(GroupInfo::new(vec![vec![Some("foo")]]).is_err()); + /// + /// // There must not be duplicate group names within the same pattern. + /// assert!(GroupInfo::new(vec![ + /// vec![None, Some("foo"), Some("foo")], + /// ]).is_err()); + /// // But duplicate names across distinct patterns is OK. + /// assert!(GroupInfo::new(vec![ + /// vec![None, Some("foo")], + /// vec![None, Some("foo")], + /// ]).is_ok()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// There are other ways for building a `GroupInfo` to fail but are + /// difficult to show. For example, if the number of patterns given would + /// overflow `PatternID`. + pub fn new(pattern_groups: P) -> Result + where + P: IntoIterator, + G: IntoIterator>, + N: AsRef, + { + let mut group_info = GroupInfoInner { + slot_ranges: vec![], + name_to_index: vec![], + index_to_name: vec![], + memory_extra: 0, + }; + for (pattern_index, groups) in pattern_groups.into_iter().enumerate() { + // If we can't convert the pattern index to an ID, then the caller + // tried to build capture info for too many patterns. + let pid = PatternID::new(pattern_index) + .map_err(GroupInfoError::too_many_patterns)?; + + let mut groups_iter = groups.into_iter().enumerate(); + match groups_iter.next() { + None => return Err(GroupInfoError::missing_groups(pid)), + Some((_, Some(_))) => { + return Err(GroupInfoError::first_must_be_unnamed(pid)) + } + Some((_, None)) => {} + } + group_info.add_first_group(pid); + // Now iterate over the rest, which correspond to all of the + // (conventionally) explicit capture groups in a regex pattern. + for (group_index, maybe_name) in groups_iter { + // Just like for patterns, if the group index can't be + // converted to a "small" index, then the caller has given too + // many groups for a particular pattern. + let group = SmallIndex::new(group_index).map_err(|_| { + GroupInfoError::too_many_groups(pid, group_index) + })?; + group_info.add_explicit_group(pid, group, maybe_name)?; + } + } + group_info.fixup_slot_ranges()?; + Ok(GroupInfo(Arc::new(group_info))) + } + + /// This creates an empty `GroupInfo`. + /// + /// This is a convenience routine for calling `GroupInfo::new` with an + /// iterator that yields no elements. + /// + /// # Example + /// + /// This example shows how to build a new empty `GroupInfo` and query it + /// for information. + /// + /// ``` + /// use regex_automata::util::captures::GroupInfo; + /// + /// let info = GroupInfo::empty(); + /// // Everything is zero. + /// assert_eq!(0, info.pattern_len()); + /// assert_eq!(0, info.all_group_len()); + /// assert_eq!(0, info.slot_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn empty() -> GroupInfo { + GroupInfo::new(core::iter::empty::<[Option<&str>; 0]>()) + .expect("empty group info is always valid") + } + + /// Return the capture group index corresponding to the given name in the + /// given pattern. If no such capture group name exists in the given + /// pattern, then this returns `None`. + /// + /// If the given pattern ID is invalid, then this returns `None`. + /// + /// This also returns `None` for all inputs if these captures are empty + /// (e.g., built from an empty [`GroupInfo`]). To check whether captures + /// are are present for a specific pattern, use [`GroupInfo::group_len`]. + /// + /// # Example + /// + /// This example shows how to find the capture index for the given pattern + /// and group name. + /// + /// Remember that capture indices are relative to the pattern, such that + /// the same capture index value may refer to different capturing groups + /// for distinct patterns. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let (pid0, pid1) = (PatternID::must(0), PatternID::must(1)); + /// + /// let nfa = NFA::new_many(&[ + /// r"a(?P\w+)z(?P\s+)", + /// r"a(?P\d+)z", + /// ])?; + /// let groups = nfa.group_info(); + /// assert_eq!(Some(2), groups.to_index(pid0, "foo")); + /// // Recall that capture index 0 is always unnamed and refers to the + /// // entire pattern. So the first capturing group present in the pattern + /// // itself always starts at index 1. + /// assert_eq!(Some(1), groups.to_index(pid1, "foo")); + /// + /// // And if a name does not exist for a particular pattern, None is + /// // returned. + /// assert!(groups.to_index(pid0, "quux").is_some()); + /// assert!(groups.to_index(pid1, "quux").is_none()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn to_index(&self, pid: PatternID, name: &str) -> Option { + let indices = self.0.name_to_index.get(pid.as_usize())?; + indices.get(name).cloned().map(|i| i.as_usize()) + } + + /// Return the capture name for the given index and given pattern. If the + /// corresponding group does not have a name, then this returns `None`. + /// + /// If the pattern ID is invalid, then this returns `None`. + /// + /// If the group index is invalid for the given pattern, then this returns + /// `None`. A group `index` is valid for a pattern `pid` in an `nfa` if and + /// only if `index < nfa.pattern_capture_len(pid)`. + /// + /// This also returns `None` for all inputs if these captures are empty + /// (e.g., built from an empty [`GroupInfo`]). To check whether captures + /// are are present for a specific pattern, use [`GroupInfo::group_len`]. + /// + /// # Example + /// + /// This example shows how to find the capture group name for the given + /// pattern and group index. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let (pid0, pid1) = (PatternID::must(0), PatternID::must(1)); + /// + /// let nfa = NFA::new_many(&[ + /// r"a(?P\w+)z(\s+)x(\d+)", + /// r"a(\d+)z(?P\s+)", + /// ])?; + /// let groups = nfa.group_info(); + /// assert_eq!(None, groups.to_name(pid0, 0)); + /// assert_eq!(Some("foo"), groups.to_name(pid0, 1)); + /// assert_eq!(None, groups.to_name(pid0, 2)); + /// assert_eq!(None, groups.to_name(pid0, 3)); + /// + /// assert_eq!(None, groups.to_name(pid1, 0)); + /// assert_eq!(None, groups.to_name(pid1, 1)); + /// assert_eq!(Some("foo"), groups.to_name(pid1, 2)); + /// // '3' is not a valid capture index for the second pattern. + /// assert_eq!(None, groups.to_name(pid1, 3)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn to_name(&self, pid: PatternID, group_index: usize) -> Option<&str> { + let pattern_names = self.0.index_to_name.get(pid.as_usize())?; + pattern_names.get(group_index)?.as_deref() + } + + /// Return an iterator of all capture groups and their names (if present) + /// for a particular pattern. + /// + /// If the given pattern ID is invalid or if this `GroupInfo` is empty, + /// then the iterator yields no elements. + /// + /// The number of elements yielded by this iterator is always equal to + /// the result of calling [`GroupInfo::group_len`] with the same + /// `PatternID`. + /// + /// # Example + /// + /// This example shows how to get a list of all capture group names for + /// a particular pattern. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let nfa = NFA::new(r"(a)(?Pb)(c)(d)(?Pe)")?; + /// // The first is the implicit group that is always unnammed. The next + /// // 5 groups are the explicit groups found in the concrete syntax above. + /// let expected = vec![None, None, Some("foo"), None, None, Some("bar")]; + /// let got: Vec> = + /// nfa.group_info().pattern_names(PatternID::ZERO).collect(); + /// assert_eq!(expected, got); + /// + /// // Using an invalid pattern ID will result in nothing yielded. + /// let got = nfa.group_info().pattern_names(PatternID::must(999)).count(); + /// assert_eq!(0, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn pattern_names(&self, pid: PatternID) -> GroupInfoPatternNames<'_> { + GroupInfoPatternNames { + it: self + .0 + .index_to_name + .get(pid.as_usize()) + .map(|indices| indices.iter()) + .unwrap_or([].iter()), + } + } + + /// Return an iterator of all capture groups for all patterns supported by + /// this `GroupInfo`. Each item yielded is a triple of the group's pattern + /// ID, index in the pattern and the group's name, if present. + /// + /// # Example + /// + /// This example shows how to get a list of all capture groups found in + /// one NFA, potentially spanning multiple patterns. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let nfa = NFA::new_many(&[ + /// r"(?Pa)", + /// r"a", + /// r"(a)", + /// ])?; + /// let expected = vec![ + /// (PatternID::must(0), 0, None), + /// (PatternID::must(0), 1, Some("foo")), + /// (PatternID::must(1), 0, None), + /// (PatternID::must(2), 0, None), + /// (PatternID::must(2), 1, None), + /// ]; + /// let got: Vec<(PatternID, usize, Option<&str>)> = + /// nfa.group_info().all_names().collect(); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// Unlike other capturing group related routines, this routine doesn't + /// panic even if captures aren't enabled on this NFA: + /// + /// ``` + /// use regex_automata::nfa::thompson::{NFA, WhichCaptures}; + /// + /// let nfa = NFA::compiler() + /// .configure(NFA::config().which_captures(WhichCaptures::None)) + /// .build_many(&[ + /// r"(?Pa)", + /// r"a", + /// r"(a)", + /// ])?; + /// // When captures aren't enabled, there's nothing to return. + /// assert_eq!(0, nfa.group_info().all_names().count()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn all_names(&self) -> GroupInfoAllNames<'_> { + GroupInfoAllNames { + group_info: self, + pids: PatternID::iter(self.pattern_len()), + current_pid: None, + names: None, + } + } + + /// Returns the starting and ending slot corresponding to the given + /// capturing group for the given pattern. The ending slot is always one + /// more than the starting slot returned. + /// + /// Note that this is like [`GroupInfo::slot`], except that it also returns + /// the ending slot value for convenience. + /// + /// If either the pattern ID or the capture index is invalid, then this + /// returns None. + /// + /// # Example + /// + /// This example shows that the starting slots for the first capturing + /// group of each pattern are distinct. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let nfa = NFA::new_many(&["a", "b"])?; + /// assert_ne!( + /// nfa.group_info().slots(PatternID::must(0), 0), + /// nfa.group_info().slots(PatternID::must(1), 0), + /// ); + /// + /// // Also, the start and end slot values are never equivalent. + /// let (start, end) = nfa.group_info().slots(PatternID::ZERO, 0).unwrap(); + /// assert_ne!(start, end); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn slots( + &self, + pid: PatternID, + group_index: usize, + ) -> Option<(usize, usize)> { + // Since 'slot' only even returns valid starting slots, we know that + // there must also be an end slot and that end slot is always one more + // than the start slot. + self.slot(pid, group_index).map(|start| (start, start + 1)) + } + + /// Returns the starting slot corresponding to the given capturing group + /// for the given pattern. The ending slot is always one more than the + /// value returned. + /// + /// If either the pattern ID or the capture index is invalid, then this + /// returns None. + /// + /// # Example + /// + /// This example shows that the starting slots for the first capturing + /// group of each pattern are distinct. + /// + /// ``` + /// use regex_automata::{nfa::thompson::NFA, PatternID}; + /// + /// let nfa = NFA::new_many(&["a", "b"])?; + /// assert_ne!( + /// nfa.group_info().slot(PatternID::must(0), 0), + /// nfa.group_info().slot(PatternID::must(1), 0), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn slot(&self, pid: PatternID, group_index: usize) -> Option { + if group_index >= self.group_len(pid) { + return None; + } + // At this point, we know that 'pid' refers to a real pattern and that + // 'group_index' refers to a real group. We therefore also know that + // the pattern and group can be combined to return a correct slot. + // That's why we don't need to use checked arithmetic below. + if group_index == 0 { + Some(pid.as_usize() * 2) + } else { + // As above, we don't need to check that our slot is less than the + // end of our range since we already know the group index is a + // valid index for the given pattern. + let (start, _) = self.0.slot_ranges[pid]; + Some(start.as_usize() + ((group_index - 1) * 2)) + } + } + + /// Returns the total number of patterns in this `GroupInfo`. + /// + /// This may return zero if the `GroupInfo` was constructed with no + /// patterns. + /// + /// This is guaranteed to be no bigger than [`PatternID::LIMIT`] because + /// `GroupInfo` construction will fail if too many patterns are added. + /// + /// # Example + /// + /// ``` + /// use regex_automata::nfa::thompson::NFA; + /// + /// let nfa = NFA::new_many(&["[0-9]+", "[a-z]+", "[A-Z]+"])?; + /// assert_eq!(3, nfa.group_info().pattern_len()); + /// + /// let nfa = NFA::never_match(); + /// assert_eq!(0, nfa.group_info().pattern_len()); + /// + /// let nfa = NFA::always_match(); + /// assert_eq!(1, nfa.group_info().pattern_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn pattern_len(&self) -> usize { + self.0.pattern_len() + } + + /// Return the number of capture groups in a pattern. + /// + /// If the pattern ID is invalid, then this returns `0`. + /// + /// # Example + /// + /// This example shows how the values returned by this routine may vary + /// for different patterns and NFA configurations. + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, WhichCaptures}, PatternID}; + /// + /// let nfa = NFA::new(r"(a)(b)(c)")?; + /// // There are 3 explicit groups in the pattern's concrete syntax and + /// // 1 unnamed and implicit group spanning the entire pattern. + /// assert_eq!(4, nfa.group_info().group_len(PatternID::ZERO)); + /// + /// let nfa = NFA::new(r"abc")?; + /// // There is just the unnamed implicit group. + /// assert_eq!(1, nfa.group_info().group_len(PatternID::ZERO)); + /// + /// let nfa = NFA::compiler() + /// .configure(NFA::config().which_captures(WhichCaptures::None)) + /// .build(r"abc")?; + /// // We disabled capturing groups, so there are none. + /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO)); + /// + /// let nfa = NFA::compiler() + /// .configure(NFA::config().which_captures(WhichCaptures::None)) + /// .build(r"(a)(b)(c)")?; + /// // We disabled capturing groups, so there are none, even if there are + /// // explicit groups in the concrete syntax. + /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn group_len(&self, pid: PatternID) -> usize { + self.0.group_len(pid) + } + + /// Return the total number of capture groups across all patterns. + /// + /// This includes implicit groups that represent the entire match of a + /// pattern. + /// + /// # Example + /// + /// This example shows how the values returned by this routine may vary + /// for different patterns and NFA configurations. + /// + /// ``` + /// use regex_automata::{nfa::thompson::{NFA, WhichCaptures}, PatternID}; + /// + /// let nfa = NFA::new(r"(a)(b)(c)")?; + /// // There are 3 explicit groups in the pattern's concrete syntax and + /// // 1 unnamed and implicit group spanning the entire pattern. + /// assert_eq!(4, nfa.group_info().all_group_len()); + /// + /// let nfa = NFA::new(r"abc")?; + /// // There is just the unnamed implicit group. + /// assert_eq!(1, nfa.group_info().all_group_len()); + /// + /// let nfa = NFA::new_many(&["(a)", "b", "(c)"])?; + /// // Each pattern has one implicit groups, and two + /// // patterns have one explicit group each. + /// assert_eq!(5, nfa.group_info().all_group_len()); + /// + /// let nfa = NFA::compiler() + /// .configure(NFA::config().which_captures(WhichCaptures::None)) + /// .build(r"abc")?; + /// // We disabled capturing groups, so there are none. + /// assert_eq!(0, nfa.group_info().all_group_len()); + /// + /// let nfa = NFA::compiler() + /// .configure(NFA::config().which_captures(WhichCaptures::None)) + /// .build(r"(a)(b)(c)")?; + /// // We disabled capturing groups, so there are none, even if there are + /// // explicit groups in the concrete syntax. + /// assert_eq!(0, nfa.group_info().group_len(PatternID::ZERO)); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn all_group_len(&self) -> usize { + self.slot_len() / 2 + } + + /// Returns the total number of slots in this `GroupInfo` across all + /// patterns. + /// + /// The total number of slots is always twice the total number of capturing + /// groups, including both implicit and explicit groups. + /// + /// # Example + /// + /// This example shows the relationship between the number of capturing + /// groups and slots. + /// + /// ``` + /// use regex_automata::util::captures::GroupInfo; + /// + /// // There are 11 total groups here. + /// let info = GroupInfo::new(vec![ + /// vec![None, Some("foo")], + /// vec![None], + /// vec![None, None, None, Some("bar"), None], + /// vec![None, None, Some("foo")], + /// ])?; + /// // 2 slots per group gives us 11*2=22 slots. + /// assert_eq!(22, info.slot_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn slot_len(&self) -> usize { + self.0.small_slot_len().as_usize() + } + + /// Returns the total number of slots for implicit capturing groups. + /// + /// This is like [`GroupInfo::slot_len`], except it doesn't include the + /// explicit slots for each pattern. Since there are always exactly 2 + /// implicit slots for each pattern, the number of implicit slots is always + /// equal to twice the number of patterns. + /// + /// # Example + /// + /// This example shows the relationship between the number of capturing + /// groups, implicit slots and explicit slots. + /// + /// ``` + /// use regex_automata::util::captures::GroupInfo; + /// + /// // There are 11 total groups here. + /// let info = GroupInfo::new(vec![vec![None, Some("foo"), Some("bar")]])?; + /// // 2 slots per group gives us 11*2=22 slots. + /// assert_eq!(6, info.slot_len()); + /// // 2 implicit slots per pattern gives us 2 implicit slots since there + /// // is 1 pattern. + /// assert_eq!(2, info.implicit_slot_len()); + /// // 2 explicit capturing groups gives us 2*2=4 explicit slots. + /// assert_eq!(4, info.explicit_slot_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn implicit_slot_len(&self) -> usize { + self.pattern_len() * 2 + } + + /// Returns the total number of slots for explicit capturing groups. + /// + /// This is like [`GroupInfo::slot_len`], except it doesn't include the + /// implicit slots for each pattern. (There are always 2 implicit slots for + /// each pattern.) + /// + /// For a non-empty `GroupInfo`, it is always the case that `slot_len` is + /// strictly greater than `explicit_slot_len`. For an empty `GroupInfo`, + /// both the total number of slots and the number of explicit slots is + /// `0`. + /// + /// # Example + /// + /// This example shows the relationship between the number of capturing + /// groups, implicit slots and explicit slots. + /// + /// ``` + /// use regex_automata::util::captures::GroupInfo; + /// + /// // There are 11 total groups here. + /// let info = GroupInfo::new(vec![vec![None, Some("foo"), Some("bar")]])?; + /// // 2 slots per group gives us 11*2=22 slots. + /// assert_eq!(6, info.slot_len()); + /// // 2 implicit slots per pattern gives us 2 implicit slots since there + /// // is 1 pattern. + /// assert_eq!(2, info.implicit_slot_len()); + /// // 2 explicit capturing groups gives us 2*2=4 explicit slots. + /// assert_eq!(4, info.explicit_slot_len()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn explicit_slot_len(&self) -> usize { + self.slot_len().saturating_sub(self.implicit_slot_len()) + } + + /// Returns the memory usage, in bytes, of this `GroupInfo`. + /// + /// This does **not** include the stack size used up by this `GroupInfo`. + /// To compute that, use `std::mem::size_of::()`. + #[inline] + pub fn memory_usage(&self) -> usize { + use core::mem::size_of as s; + + s::() + + self.0.slot_ranges.len() * s::<(SmallIndex, SmallIndex)>() + + self.0.name_to_index.len() * s::() + + self.0.index_to_name.len() * s::>>>() + + self.0.memory_extra + } +} + +/// A map from capture group name to its corresponding capture group index. +/// +/// This type is actually wrapped inside a Vec indexed by pattern ID on a +/// `GroupInfo`, since multiple patterns may have the same capture group name. +/// That is, each pattern gets its own namespace of capture group names. +/// +/// Perhaps a more memory efficient representation would be +/// HashMap<(PatternID, Arc), usize>, but this makes it difficult to look +/// up a capture index by name without producing a `Arc`, which requires +/// an allocation. To fix this, I think we'd need to define our own unsized +/// type or something? Anyway, I didn't give this much thought since it +/// probably doesn't matter much in the grand scheme of things. But it did +/// stand out to me as mildly wasteful. +#[cfg(feature = "std")] +type CaptureNameMap = std::collections::HashMap, SmallIndex>; +#[cfg(not(feature = "std"))] +type CaptureNameMap = alloc::collections::BTreeMap, SmallIndex>; + +/// The inner guts of `GroupInfo`. This type only exists so that it can +/// be wrapped in an `Arc` to make `GroupInfo` reference counted. +#[derive(Debug, Default)] +struct GroupInfoInner { + slot_ranges: Vec<(SmallIndex, SmallIndex)>, + name_to_index: Vec, + index_to_name: Vec>>>, + memory_extra: usize, +} + +impl GroupInfoInner { + /// This adds the first unnamed group for the given pattern ID. The given + /// pattern ID must be zero if this is the first time this method is + /// called, or must be exactly one more than the pattern ID supplied to the + /// previous call to this method. (This method panics if this rule is + /// violated.) + /// + /// This can be thought of as initializing the GroupInfo state for the + /// given pattern and closing off the state for any previous pattern. + fn add_first_group(&mut self, pid: PatternID) { + assert_eq!(pid.as_usize(), self.slot_ranges.len()); + assert_eq!(pid.as_usize(), self.name_to_index.len()); + assert_eq!(pid.as_usize(), self.index_to_name.len()); + // This is the start of our slots for the explicit capturing groups. + // Note that since the slots for the 0th group for every pattern appear + // before any slots for the nth group (where n > 0) in any pattern, we + // will have to fix up the slot ranges once we know how many patterns + // we've added capture groups for. + let slot_start = self.small_slot_len(); + self.slot_ranges.push((slot_start, slot_start)); + self.name_to_index.push(CaptureNameMap::new()); + self.index_to_name.push(vec![None]); + self.memory_extra += core::mem::size_of::>>(); + } + + /// Add an explicit capturing group for the given pattern with the given + /// index. If the group has a name, then that must be given as well. + /// + /// Note that every capturing group except for the first or zeroth group is + /// explicit. + /// + /// This returns an error if adding this group would result in overflowing + /// slot indices or if a capturing group with the same name for this + /// pattern has already been added. + fn add_explicit_group>( + &mut self, + pid: PatternID, + group: SmallIndex, + maybe_name: Option, + ) -> Result<(), GroupInfoError> { + // We also need to check that the slot index generated for + // this group is also valid. Although, this is a little weird + // because we offset these indices below, at which point, we'll + // have to recheck them. Gosh this is annoying. Note that + // the '+2' below is OK because 'end' is guaranteed to be less + // than isize::MAX. + let end = &mut self.slot_ranges[pid].1; + *end = SmallIndex::new(end.as_usize() + 2).map_err(|_| { + GroupInfoError::too_many_groups(pid, group.as_usize()) + })?; + if let Some(name) = maybe_name { + let name = Arc::::from(name.as_ref()); + if self.name_to_index[pid].contains_key(&*name) { + return Err(GroupInfoError::duplicate(pid, &name)); + } + let len = name.len(); + self.name_to_index[pid].insert(Arc::clone(&name), group); + self.index_to_name[pid].push(Some(name)); + // Adds the memory used by the Arc in both maps. + self.memory_extra += + 2 * (len + core::mem::size_of::>>()); + // And also the value entry for the 'name_to_index' map. + // This is probably an underestimate for 'name_to_index' since + // hashmaps/btrees likely have some non-zero overhead, but we + // assume here that they have zero overhead. + self.memory_extra += core::mem::size_of::(); + } else { + self.index_to_name[pid].push(None); + self.memory_extra += core::mem::size_of::>>(); + } + // This is a sanity assert that checks that our group index + // is in line with the number of groups added so far for this + // pattern. + assert_eq!(group.one_more(), self.group_len(pid)); + // And is also in line with the 'index_to_name' map. + assert_eq!(group.one_more(), self.index_to_name[pid].len()); + Ok(()) + } + + /// This corrects the slot ranges to account for the slots corresponding + /// to the zeroth group of each pattern. That is, every slot range is + /// offset by 'pattern_len() * 2', since each pattern uses two slots to + /// represent the zeroth group. + fn fixup_slot_ranges(&mut self) -> Result<(), GroupInfoError> { + use crate::util::primitives::IteratorIndexExt; + // Since we know number of patterns fits in PatternID and + // PatternID::MAX < isize::MAX, it follows that multiplying by 2 will + // never overflow usize. + let offset = self.pattern_len().checked_mul(2).unwrap(); + for (pid, &mut (ref mut start, ref mut end)) in + self.slot_ranges.iter_mut().with_pattern_ids() + { + let group_len = 1 + ((end.as_usize() - start.as_usize()) / 2); + let new_end = match end.as_usize().checked_add(offset) { + Some(new_end) => new_end, + None => { + return Err(GroupInfoError::too_many_groups( + pid, group_len, + )) + } + }; + *end = SmallIndex::new(new_end).map_err(|_| { + GroupInfoError::too_many_groups(pid, group_len) + })?; + // Since start <= end, if end is valid then start must be too. + *start = SmallIndex::new(start.as_usize() + offset).unwrap(); + } + Ok(()) + } + + /// Return the total number of patterns represented by this capture slot + /// info. + fn pattern_len(&self) -> usize { + self.slot_ranges.len() + } + + /// Return the total number of capturing groups for the given pattern. If + /// the given pattern isn't valid for this capture slot info, then 0 is + /// returned. + fn group_len(&self, pid: PatternID) -> usize { + let (start, end) = match self.slot_ranges.get(pid.as_usize()) { + None => return 0, + Some(range) => range, + }; + // The difference between any two SmallIndex values always fits in a + // usize since we know that SmallIndex::MAX <= isize::MAX-1. We also + // know that start<=end by construction and that the number of groups + // never exceeds SmallIndex and thus never overflows usize. + 1 + ((end.as_usize() - start.as_usize()) / 2) + } + + /// Return the total number of slots in this capture slot info as a + /// "small index." + fn small_slot_len(&self) -> SmallIndex { + // Since slots are allocated in order of pattern (starting at 0) and + // then in order of capture group, it follows that the number of slots + // is the end of the range of slots for the last pattern. This is + // true even when the last pattern has no capturing groups, since + // 'slot_ranges' will still represent it explicitly with an empty + // range. + self.slot_ranges.last().map_or(SmallIndex::ZERO, |&(_, end)| end) + } +} + +/// An error that may occur when building a `GroupInfo`. +/// +/// Building a `GroupInfo` does a variety of checks to make sure the +/// capturing groups satisfy a number of invariants. This includes, but is not +/// limited to, ensuring that the first capturing group is unnamed and that +/// there are no duplicate capture groups for a specific pattern. +#[derive(Clone, Debug)] +pub struct GroupInfoError { + kind: GroupInfoErrorKind, +} + +/// The kind of error that occurs when building a `GroupInfo` fails. +/// +/// We keep this un-exported because it's not clear how useful it is to +/// export it. +#[derive(Clone, Debug)] +enum GroupInfoErrorKind { + /// This occurs when too many patterns have been added. i.e., It would + /// otherwise overflow a `PatternID`. + TooManyPatterns { err: PatternIDError }, + /// This occurs when too many capturing groups have been added for a + /// particular pattern. + TooManyGroups { + /// The ID of the pattern that had too many groups. + pattern: PatternID, + /// The minimum number of groups that the caller has tried to add for + /// a pattern. + minimum: usize, + }, + /// An error that occurs when a pattern has no capture groups. Either the + /// group info must be empty, or all patterns must have at least one group + /// (corresponding to the unnamed group for the entire pattern). + MissingGroups { + /// The ID of the pattern that had no capturing groups. + pattern: PatternID, + }, + /// An error that occurs when one tries to provide a name for the capture + /// group at index 0. This capturing group must currently always be + /// unnamed. + FirstMustBeUnnamed { + /// The ID of the pattern that was found to have a named first + /// capturing group. + pattern: PatternID, + }, + /// An error that occurs when duplicate capture group names for the same + /// pattern are added. + /// + /// NOTE: At time of writing, this error can never occur if you're using + /// regex-syntax, since the parser itself will reject patterns with + /// duplicate capture group names. This error can only occur when the + /// builder is used to hand construct NFAs. + Duplicate { + /// The pattern in which the duplicate capture group name was found. + pattern: PatternID, + /// The duplicate name. + name: String, + }, +} + +impl GroupInfoError { + fn too_many_patterns(err: PatternIDError) -> GroupInfoError { + GroupInfoError { kind: GroupInfoErrorKind::TooManyPatterns { err } } + } + + fn too_many_groups(pattern: PatternID, minimum: usize) -> GroupInfoError { + GroupInfoError { + kind: GroupInfoErrorKind::TooManyGroups { pattern, minimum }, + } + } + + fn missing_groups(pattern: PatternID) -> GroupInfoError { + GroupInfoError { kind: GroupInfoErrorKind::MissingGroups { pattern } } + } + + fn first_must_be_unnamed(pattern: PatternID) -> GroupInfoError { + GroupInfoError { + kind: GroupInfoErrorKind::FirstMustBeUnnamed { pattern }, + } + } + + fn duplicate(pattern: PatternID, name: &str) -> GroupInfoError { + GroupInfoError { + kind: GroupInfoErrorKind::Duplicate { + pattern, + name: String::from(name), + }, + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for GroupInfoError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self.kind { + GroupInfoErrorKind::TooManyPatterns { .. } + | GroupInfoErrorKind::TooManyGroups { .. } + | GroupInfoErrorKind::MissingGroups { .. } + | GroupInfoErrorKind::FirstMustBeUnnamed { .. } + | GroupInfoErrorKind::Duplicate { .. } => None, + } + } +} + +impl core::fmt::Display for GroupInfoError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + use self::GroupInfoErrorKind::*; + + match self.kind { + TooManyPatterns { ref err } => { + write!(f, "too many patterns to build capture info: {}", err) + } + TooManyGroups { pattern, minimum } => { + write!( + f, + "too many capture groups (at least {}) were \ + found for pattern {}", + minimum, + pattern.as_usize() + ) + } + MissingGroups { pattern } => write!( + f, + "no capturing groups found for pattern {} \ + (either all patterns have zero groups or all patterns have \ + at least one group)", + pattern.as_usize(), + ), + FirstMustBeUnnamed { pattern } => write!( + f, + "first capture group (at index 0) for pattern {} has a name \ + (it must be unnamed)", + pattern.as_usize(), + ), + Duplicate { pattern, ref name } => write!( + f, + "duplicate capture group name '{}' found for pattern {}", + name, + pattern.as_usize(), + ), + } + } +} + +/// An iterator over capturing groups and their names for a specific pattern. +/// +/// This iterator is created by [`GroupInfo::pattern_names`]. +/// +/// The lifetime parameter `'a` refers to the lifetime of the `GroupInfo` +/// from which this iterator was created. +#[derive(Clone, Debug)] +pub struct GroupInfoPatternNames<'a> { + it: core::slice::Iter<'a, Option>>, +} + +impl GroupInfoPatternNames<'static> { + fn empty() -> GroupInfoPatternNames<'static> { + GroupInfoPatternNames { it: [].iter() } + } +} + +impl<'a> Iterator for GroupInfoPatternNames<'a> { + type Item = Option<&'a str>; + + fn next(&mut self) -> Option> { + self.it.next().map(|x| x.as_deref()) + } + + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } + + fn count(self) -> usize { + self.it.count() + } +} + +impl<'a> ExactSizeIterator for GroupInfoPatternNames<'a> {} +impl<'a> core::iter::FusedIterator for GroupInfoPatternNames<'a> {} + +/// An iterator over capturing groups and their names for a `GroupInfo`. +/// +/// This iterator is created by [`GroupInfo::all_names`]. +/// +/// The lifetime parameter `'a` refers to the lifetime of the `GroupInfo` +/// from which this iterator was created. +#[derive(Debug)] +pub struct GroupInfoAllNames<'a> { + group_info: &'a GroupInfo, + pids: PatternIDIter, + current_pid: Option, + names: Option>>, +} + +impl<'a> Iterator for GroupInfoAllNames<'a> { + type Item = (PatternID, usize, Option<&'a str>); + + fn next(&mut self) -> Option<(PatternID, usize, Option<&'a str>)> { + // If the group info has no captures, then we never have anything + // to yield. We need to consider this case explicitly (at time of + // writing) because 'pattern_capture_names' will panic if captures + // aren't enabled. + if self.group_info.0.index_to_name.is_empty() { + return None; + } + if self.current_pid.is_none() { + self.current_pid = Some(self.pids.next()?); + } + let pid = self.current_pid.unwrap(); + if self.names.is_none() { + self.names = Some(self.group_info.pattern_names(pid).enumerate()); + } + let (group_index, name) = match self.names.as_mut().unwrap().next() { + Some((group_index, name)) => (group_index, name), + None => { + self.current_pid = None; + self.names = None; + return self.next(); + } + }; + Some((pid, group_index, name)) + } +} diff --git a/vendor/regex-automata/src/util/determinize/mod.rs b/vendor/regex-automata/src/util/determinize/mod.rs new file mode 100644 index 0000000..ba32991 --- /dev/null +++ b/vendor/regex-automata/src/util/determinize/mod.rs @@ -0,0 +1,682 @@ +/*! +This module contains types and routines for implementing determinization. + +In this crate, there are at least two places where we implement +determinization: fully ahead-of-time compiled DFAs in the `dfa` module and +lazily compiled DFAs in the `hybrid` module. The stuff in this module +corresponds to the things that are in common between these implementations. + +There are three broad things that our implementations of determinization have +in common, as defined by this module: + +* The classification of start states. That is, whether we're dealing with +word boundaries, line boundaries, etc., is all the same. This also includes +the look-behind assertions that are satisfied by each starting state +classification. +* The representation of DFA states as sets of NFA states, including +convenience types for building these DFA states that are amenable to reusing +allocations. +* Routines for the "classical" parts of determinization: computing the +epsilon closure, tracking match states (with corresponding pattern IDs, since +we support multi-pattern finite automata) and, of course, computing the +transition function between states for units of input. + +I did consider a couple of alternatives to this particular form of code reuse: + +1. Don't do any code reuse. The problem here is that we *really* want both +forms of determinization to do exactly identical things when it comes to +their handling of NFA states. While our tests generally ensure this, the code +is tricky and large enough where not reusing code is a pretty big bummer. + +2. Implement all of determinization once and make it generic over fully +compiled DFAs and lazily compiled DFAs. While I didn't actually try this +approach, my instinct is that it would be more complex than is needed here. +And the interface required would be pretty hairy. Instead, I think splitting +it into logical sub-components works better. +*/ + +use alloc::vec::Vec; + +pub(crate) use self::state::{ + State, StateBuilderEmpty, StateBuilderMatches, StateBuilderNFA, +}; + +use crate::{ + nfa::thompson, + util::{ + alphabet, + look::{Look, LookSet}, + primitives::StateID, + search::MatchKind, + sparse_set::{SparseSet, SparseSets}, + start::Start, + utf8, + }, +}; + +mod state; + +/// Compute the set of all reachable NFA states, including the full epsilon +/// closure, from a DFA state for a single unit of input. The set of reachable +/// states is returned as a `StateBuilderNFA`. The `StateBuilderNFA` returned +/// also includes any look-behind assertions satisfied by `unit`, in addition +/// to whether it is a match state. For multi-pattern DFAs, the builder will +/// also include the pattern IDs that match (in the order seen). +/// +/// `nfa` must be able to resolve any NFA state in `state` and any NFA state +/// reachable via the epsilon closure of any NFA state in `state`. `sparses` +/// must have capacity equivalent to `nfa.len()`. +/// +/// `match_kind` should correspond to the match semantics implemented by the +/// DFA being built. Generally speaking, for leftmost-first match semantics, +/// states that appear after the first NFA match state will not be included in +/// the `StateBuilderNFA` returned since they are impossible to visit. +/// +/// `sparses` is used as scratch space for NFA traversal. Other than their +/// capacity requirements (detailed above), there are no requirements on what's +/// contained within them (if anything). Similarly, what's inside of them once +/// this routine returns is unspecified. +/// +/// `stack` must have length 0. It is used as scratch space for depth first +/// traversal. After returning, it is guaranteed that `stack` will have length +/// 0. +/// +/// `state` corresponds to the current DFA state on which one wants to compute +/// the transition for the input `unit`. +/// +/// `empty_builder` corresponds to the builder allocation to use to produce a +/// complete `StateBuilderNFA` state. If the state is not needed (or is already +/// cached), then it can be cleared and reused without needing to create a new +/// `State`. The `StateBuilderNFA` state returned is final and ready to be +/// turned into a `State` if necessary. +pub(crate) fn next( + nfa: &thompson::NFA, + match_kind: MatchKind, + sparses: &mut SparseSets, + stack: &mut Vec, + state: &State, + unit: alphabet::Unit, + empty_builder: StateBuilderEmpty, +) -> StateBuilderNFA { + sparses.clear(); + + // Whether the NFA is matched in reverse or not. We use this in some + // conditional logic for dealing with the exceptionally annoying CRLF-aware + // line anchors. + let rev = nfa.is_reverse(); + // The look-around matcher that our NFA is configured with. We don't + // actually use it to match look-around assertions, but we do need its + // configuration for constructing states consistent with how it matches. + let lookm = nfa.look_matcher(); + + // Put the NFA state IDs into a sparse set in case we need to + // re-compute their epsilon closure. + // + // Doing this state shuffling is technically not necessary unless some + // kind of look-around is used in the DFA. Some ad hoc experiments + // suggested that avoiding this didn't lead to much of an improvement, + // but perhaps more rigorous experimentation should be done. And in + // particular, avoiding this check requires some light refactoring of + // the code below. + state.iter_nfa_state_ids(|nfa_id| { + sparses.set1.insert(nfa_id); + }); + + // Compute look-ahead assertions originating from the current state. Based + // on the input unit we're transitioning over, some additional set of + // assertions may be true. Thus, we re-compute this state's epsilon closure + // (but only if necessary). Notably, when we build a DFA state initially, + // we don't enable any look-ahead assertions because we don't know whether + // they're true or not at that point. + if !state.look_need().is_empty() { + // Add look-ahead assertions that are now true based on the current + // input unit. + let mut look_have = state.look_have().clone(); + match unit.as_u8() { + Some(b'\r') => { + if !rev || !state.is_half_crlf() { + look_have = look_have.insert(Look::EndCRLF); + } + } + Some(b'\n') => { + if rev || !state.is_half_crlf() { + look_have = look_have.insert(Look::EndCRLF); + } + } + Some(_) => {} + None => { + look_have = look_have + .insert(Look::End) + .insert(Look::EndLF) + .insert(Look::EndCRLF); + } + } + if unit.is_byte(lookm.get_line_terminator()) { + look_have = look_have.insert(Look::EndLF); + } + if state.is_half_crlf() + && ((rev && !unit.is_byte(b'\r')) + || (!rev && !unit.is_byte(b'\n'))) + { + look_have = look_have.insert(Look::StartCRLF); + } + if state.is_from_word() == unit.is_word_byte() { + look_have = look_have + .insert(Look::WordAsciiNegate) + .insert(Look::WordUnicodeNegate); + } else { + look_have = + look_have.insert(Look::WordAscii).insert(Look::WordUnicode); + } + if !unit.is_word_byte() { + look_have = look_have + .insert(Look::WordEndHalfAscii) + .insert(Look::WordEndHalfUnicode); + } + if state.is_from_word() && !unit.is_word_byte() { + look_have = look_have + .insert(Look::WordEndAscii) + .insert(Look::WordEndUnicode); + } else if !state.is_from_word() && unit.is_word_byte() { + look_have = look_have + .insert(Look::WordStartAscii) + .insert(Look::WordStartUnicode); + } + // If we have new assertions satisfied that are among the set of + // assertions that exist in this state (that is, just because we added + // an EndLF assertion above doesn't mean there is an EndLF conditional + // epsilon transition in this state), then we re-compute this state's + // epsilon closure using the updated set of assertions. + // + // Note that since our DFA states omit unconditional epsilon + // transitions, this check is necessary for correctness. If we re-did + // the epsilon closure below needlessly, it could change based on the + // fact that we omitted epsilon states originally. + if !look_have + .subtract(state.look_have()) + .intersect(state.look_need()) + .is_empty() + { + for nfa_id in sparses.set1.iter() { + epsilon_closure( + nfa, + nfa_id, + look_have, + stack, + &mut sparses.set2, + ); + } + sparses.swap(); + sparses.set2.clear(); + } + } + + // Convert our empty builder into one that can record assertions and match + // pattern IDs. + let mut builder = empty_builder.into_matches(); + // Set whether the StartLF look-behind assertion is true for this + // transition or not. The look-behind assertion for ASCII word boundaries + // is handled below. + if nfa.look_set_any().contains_anchor_line() + && unit.is_byte(lookm.get_line_terminator()) + { + // Why only handle StartLF here and not Start? That's because Start + // can only impact the starting state, which is special cased in + // start state handling. + builder.set_look_have(|have| have.insert(Look::StartLF)); + } + // We also need to add StartCRLF to our assertions too, if we can. This + // is unfortunately a bit more complicated, because it depends on the + // direction of the search. In the forward direction, ^ matches after a + // \n, but in the reverse direction, ^ only matches after a \r. (This is + // further complicated by the fact that reverse a regex means changing a ^ + // to a $ and vice versa.) + if nfa.look_set_any().contains_anchor_crlf() + && ((rev && unit.is_byte(b'\r')) || (!rev && unit.is_byte(b'\n'))) + { + builder.set_look_have(|have| have.insert(Look::StartCRLF)); + } + // And also for the start-half word boundary assertions. As long as the + // look-behind byte is not a word char, then the assertions are satisfied. + if nfa.look_set_any().contains_word() && !unit.is_word_byte() { + builder.set_look_have(|have| { + have.insert(Look::WordStartHalfAscii) + .insert(Look::WordStartHalfUnicode) + }); + } + for nfa_id in sparses.set1.iter() { + match *nfa.state(nfa_id) { + thompson::State::Union { .. } + | thompson::State::BinaryUnion { .. } + | thompson::State::Fail + | thompson::State::Look { .. } + | thompson::State::Capture { .. } => {} + thompson::State::Match { pattern_id } => { + // Notice here that we are calling the NEW state a match + // state if the OLD state we are transitioning from + // contains an NFA match state. This is precisely how we + // delay all matches by one byte and also what therefore + // guarantees that starting states cannot be match states. + // + // If we didn't delay matches by one byte, then whether + // a DFA is a matching state or not would be determined + // by whether one of its own constituent NFA states + // was a match state. (And that would be done in + // 'add_nfa_states'.) + // + // Also, 'add_match_pattern_id' requires that callers never + // pass duplicative pattern IDs. We do in fact uphold that + // guarantee here, but it's subtle. In particular, a Thompson + // NFA guarantees that each pattern has exactly one match + // state. Moreover, since we're iterating over the NFA state + // IDs in a set, we are guarateed not to have any duplicative + // match states. Thus, it is impossible to add the same pattern + // ID more than once. + // + // N.B. We delay matches by 1 byte as a way to hack 1-byte + // look-around into DFA searches. This lets us support ^, $ + // and ASCII-only \b. The delay is also why we need a special + // "end-of-input" (EOI) sentinel and why we need to follow the + // EOI sentinel at the end of every search. This final EOI + // transition is necessary to report matches found at the end + // of a haystack. + builder.add_match_pattern_id(pattern_id); + if !match_kind.continue_past_first_match() { + break; + } + } + thompson::State::ByteRange { ref trans } => { + if trans.matches_unit(unit) { + epsilon_closure( + nfa, + trans.next, + builder.look_have(), + stack, + &mut sparses.set2, + ); + } + } + thompson::State::Sparse(ref sparse) => { + if let Some(next) = sparse.matches_unit(unit) { + epsilon_closure( + nfa, + next, + builder.look_have(), + stack, + &mut sparses.set2, + ); + } + } + thompson::State::Dense(ref dense) => { + if let Some(next) = dense.matches_unit(unit) { + epsilon_closure( + nfa, + next, + builder.look_have(), + stack, + &mut sparses.set2, + ); + } + } + } + } + // We only set the word byte if there's a word boundary look-around + // anywhere in this regex. Otherwise, there's no point in bloating the + // number of states if we don't have one. + // + // We also only set it when the state has a non-zero number of NFA states. + // Otherwise, we could wind up with states that *should* be DEAD states + // but are otherwise distinct from DEAD states because of this look-behind + // assertion being set. While this can't technically impact correctness *in + // theory*, it can create pathological DFAs that consume input until EOI or + // a quit byte is seen. Consuming until EOI isn't a correctness problem, + // but a (serious) perf problem. Hitting a quit byte, however, could be a + // correctness problem since it could cause search routines to report an + // error instead of a detected match once the quit state is entered. (The + // search routine could be made to be a bit smarter by reporting a match + // if one was detected once it enters a quit state (and indeed, the search + // routines in this crate do just that), but it seems better to prevent + // these things by construction if possible.) + if !sparses.set2.is_empty() { + if nfa.look_set_any().contains_word() && unit.is_word_byte() { + builder.set_is_from_word(); + } + if nfa.look_set_any().contains_anchor_crlf() + && ((rev && unit.is_byte(b'\n')) || (!rev && unit.is_byte(b'\r'))) + { + builder.set_is_half_crlf(); + } + } + let mut builder_nfa = builder.into_nfa(); + add_nfa_states(nfa, &sparses.set2, &mut builder_nfa); + builder_nfa +} + +/// Compute the epsilon closure for the given NFA state. The epsilon closure +/// consists of all NFA state IDs, including `start_nfa_id`, that can be +/// reached from `start_nfa_id` without consuming any input. These state IDs +/// are written to `set` in the order they are visited, but only if they are +/// not already in `set`. `start_nfa_id` must be a valid state ID for the NFA +/// given. +/// +/// `look_have` consists of the satisfied assertions at the current +/// position. For conditional look-around epsilon transitions, these are +/// only followed if they are satisfied by `look_have`. +/// +/// `stack` must have length 0. It is used as scratch space for depth first +/// traversal. After returning, it is guaranteed that `stack` will have length +/// 0. +pub(crate) fn epsilon_closure( + nfa: &thompson::NFA, + start_nfa_id: StateID, + look_have: LookSet, + stack: &mut Vec, + set: &mut SparseSet, +) { + assert!(stack.is_empty()); + // If this isn't an epsilon state, then the epsilon closure is always just + // itself, so there's no need to spin up the machinery below to handle it. + if !nfa.state(start_nfa_id).is_epsilon() { + set.insert(start_nfa_id); + return; + } + + stack.push(start_nfa_id); + while let Some(mut id) = stack.pop() { + // In many cases, we can avoid stack operations when an NFA state only + // adds one new state to visit. In that case, we just set our ID to + // that state and mush on. We only use the stack when an NFA state + // introduces multiple new states to visit. + loop { + // Insert this NFA state, and if it's already in the set and thus + // already visited, then we can move on to the next one. + if !set.insert(id) { + break; + } + match *nfa.state(id) { + thompson::State::ByteRange { .. } + | thompson::State::Sparse { .. } + | thompson::State::Dense { .. } + | thompson::State::Fail + | thompson::State::Match { .. } => break, + thompson::State::Look { look, next } => { + if !look_have.contains(look) { + break; + } + id = next; + } + thompson::State::Union { ref alternates } => { + id = match alternates.get(0) { + None => break, + Some(&id) => id, + }; + // We need to process our alternates in order to preserve + // match preferences, so put the earliest alternates closer + // to the top of the stack. + stack.extend(alternates[1..].iter().rev()); + } + thompson::State::BinaryUnion { alt1, alt2 } => { + id = alt1; + stack.push(alt2); + } + thompson::State::Capture { next, .. } => { + id = next; + } + } + } + } +} + +/// Add the NFA state IDs in the given `set` to the given DFA builder state. +/// The order in which states are added corresponds to the order in which they +/// were added to `set`. +/// +/// The DFA builder state given should already have its complete set of match +/// pattern IDs added (if any) and any look-behind assertions (StartLF, Start +/// and whether this state is being generated for a transition over a word byte +/// when applicable) that are true immediately prior to transitioning into this +/// state (via `builder.look_have()`). The match pattern IDs should correspond +/// to matches that occurred on the previous transition, since all matches are +/// delayed by one byte. The things that should _not_ be set are look-ahead +/// assertions (EndLF, End and whether the next byte is a word byte or not). +/// The builder state should also not have anything in `look_need` set, as this +/// routine will compute that for you. +/// +/// The given NFA should be able to resolve all identifiers in `set` to a +/// particular NFA state. Additionally, `set` must have capacity equivalent +/// to `nfa.len()`. +pub(crate) fn add_nfa_states( + nfa: &thompson::NFA, + set: &SparseSet, + builder: &mut StateBuilderNFA, +) { + for nfa_id in set.iter() { + match *nfa.state(nfa_id) { + thompson::State::ByteRange { .. } => { + builder.add_nfa_state_id(nfa_id); + } + thompson::State::Sparse { .. } => { + builder.add_nfa_state_id(nfa_id); + } + thompson::State::Dense { .. } => { + builder.add_nfa_state_id(nfa_id); + } + thompson::State::Look { look, .. } => { + builder.add_nfa_state_id(nfa_id); + builder.set_look_need(|need| need.insert(look)); + } + thompson::State::Union { .. } + | thompson::State::BinaryUnion { .. } => { + // Pure epsilon transitions don't need to be tracked as part + // of the DFA state. Tracking them is actually superfluous; + // they won't cause any harm other than making determinization + // slower. + // + // Why aren't these needed? Well, in an NFA, epsilon + // transitions are really just jumping points to other states. + // So once you hit an epsilon transition, the same set of + // resulting states always appears. Therefore, putting them in + // a DFA's set of ordered NFA states is strictly redundant. + // + // Look-around states are also epsilon transitions, but + // they are *conditional*. So their presence could be + // discriminatory, and thus, they are tracked above. + // + // But wait... why are epsilon states in our `set` in the first + // place? Why not just leave them out? They're in our `set` + // because it was generated by computing an epsilon closure, + // and we want to keep track of all states we visited to avoid + // re-visiting them. In exchange, we have to do this second + // iteration over our collected states to finalize our DFA + // state. In theory, we could avoid this second iteration if + // we maintained two sets during epsilon closure: the set of + // visited states (to avoid cycles) and the set of states that + // will actually be used to construct the next DFA state. + // + // Note that this optimization requires that we re-compute the + // epsilon closure to account for look-ahead in 'next' *only + // when necessary*. Namely, only when the set of look-around + // assertions changes and only when those changes are within + // the set of assertions that are needed in order to step + // through the closure correctly. Otherwise, if we re-do the + // epsilon closure needlessly, it could change based on the + // fact that we are omitting epsilon states here. + // + // ----- + // + // Welp, scratch the above. It turns out that recording these + // is in fact necessary to seemingly handle one particularly + // annoying case: when a conditional epsilon transition is + // put inside of a repetition operator. One specific case I + // ran into was the regex `(?:\b|%)+` on the haystack `z%`. + // The correct leftmost first matches are: [0, 0] and [1, 1]. + // But the DFA was reporting [0, 0] and [1, 2]. To understand + // why this happens, consider the NFA for the aforementioned + // regex: + // + // >000000: binary-union(4, 1) + // 000001: \x00-\xFF => 0 + // 000002: WordAscii => 5 + // 000003: % => 5 + // ^000004: binary-union(2, 3) + // 000005: binary-union(4, 6) + // 000006: MATCH(0) + // + // The problem here is that one of the DFA start states is + // going to consist of the NFA states [2, 3] by computing the + // epsilon closure of state 4. State 4 isn't included because + // we previously were not keeping track of union states. But + // only a subset of transitions out of this state will be able + // to follow WordAscii, and in those cases, the epsilon closure + // is redone. The only problem is that computing the epsilon + // closure from [2, 3] is different than computing the epsilon + // closure from [4]. In the former case, assuming the WordAscii + // assertion is satisfied, you get: [2, 3, 6]. In the latter + // case, you get: [2, 6, 3]. Notice that '6' is the match state + // and appears AFTER '3' in the former case. This leads to a + // preferential but incorrect match of '%' before returning + // a match. In the latter case, the match is preferred over + // continuing to accept the '%'. + // + // It almost feels like we might be able to fix the NFA states + // to avoid this, or to at least only keep track of union + // states where this actually matters, since in the vast + // majority of cases, this doesn't matter. + // + // Another alternative would be to define a new HIR property + // called "assertion is repeated anywhere" and compute it + // inductively over the entire pattern. If it happens anywhere, + // which is probably pretty rare, then we record union states. + // Otherwise we don't. + builder.add_nfa_state_id(nfa_id); + } + // Capture states we definitely do not need to record, since they + // are unconditional epsilon transitions with no branching. + thompson::State::Capture { .. } => {} + // It's not totally clear whether we need to record fail states or + // not, but we do so out of an abundance of caution. Since they are + // quite rare in practice, there isn't much cost to recording them. + thompson::State::Fail => { + builder.add_nfa_state_id(nfa_id); + } + thompson::State::Match { .. } => { + // Normally, the NFA match state doesn't actually need to + // be inside the DFA state. But since we delay matches by + // one byte, the matching DFA state corresponds to states + // that transition from the one we're building here. And + // the way we detect those cases is by looking for an NFA + // match state. See 'next' for how this is handled. + builder.add_nfa_state_id(nfa_id); + } + } + } + // If we know this state contains no look-around assertions, then + // there's no reason to track which look-around assertions were + // satisfied when this state was created. + if builder.look_need().is_empty() { + builder.set_look_have(|_| LookSet::empty()); + } +} + +/// Sets the appropriate look-behind assertions on the given state based on +/// this starting configuration. +pub(crate) fn set_lookbehind_from_start( + nfa: &thompson::NFA, + start: &Start, + builder: &mut StateBuilderMatches, +) { + let rev = nfa.is_reverse(); + let lineterm = nfa.look_matcher().get_line_terminator(); + let lookset = nfa.look_set_any(); + match *start { + Start::NonWordByte => { + if lookset.contains_word() { + builder.set_look_have(|have| { + have.insert(Look::WordStartHalfAscii) + .insert(Look::WordStartHalfUnicode) + }); + } + } + Start::WordByte => { + if lookset.contains_word() { + builder.set_is_from_word(); + } + } + Start::Text => { + if lookset.contains_anchor_haystack() { + builder.set_look_have(|have| have.insert(Look::Start)); + } + if lookset.contains_anchor_line() { + builder.set_look_have(|have| { + have.insert(Look::StartLF).insert(Look::StartCRLF) + }); + } + if lookset.contains_word() { + builder.set_look_have(|have| { + have.insert(Look::WordStartHalfAscii) + .insert(Look::WordStartHalfUnicode) + }); + } + } + Start::LineLF => { + if rev { + if lookset.contains_anchor_crlf() { + builder.set_is_half_crlf(); + } + if lookset.contains_anchor_line() { + builder.set_look_have(|have| have.insert(Look::StartLF)); + } + } else { + if lookset.contains_anchor_line() { + builder.set_look_have(|have| have.insert(Look::StartCRLF)); + } + } + if lookset.contains_anchor_line() && lineterm == b'\n' { + builder.set_look_have(|have| have.insert(Look::StartLF)); + } + if lookset.contains_word() { + builder.set_look_have(|have| { + have.insert(Look::WordStartHalfAscii) + .insert(Look::WordStartHalfUnicode) + }); + } + } + Start::LineCR => { + if lookset.contains_anchor_crlf() { + if rev { + builder.set_look_have(|have| have.insert(Look::StartCRLF)); + } else { + builder.set_is_half_crlf(); + } + } + if lookset.contains_anchor_line() && lineterm == b'\r' { + builder.set_look_have(|have| have.insert(Look::StartLF)); + } + if lookset.contains_word() { + builder.set_look_have(|have| { + have.insert(Look::WordStartHalfAscii) + .insert(Look::WordStartHalfUnicode) + }); + } + } + Start::CustomLineTerminator => { + if lookset.contains_anchor_line() { + builder.set_look_have(|have| have.insert(Look::StartLF)); + } + // This is a bit of a tricky case, but if the line terminator was + // set to a word byte, then we also need to behave as if the start + // configuration is Start::WordByte. That is, we need to mark our + // state as having come from a word byte. + if lookset.contains_word() { + if utf8::is_word_byte(lineterm) { + builder.set_is_from_word(); + } else { + builder.set_look_have(|have| { + have.insert(Look::WordStartHalfAscii) + .insert(Look::WordStartHalfUnicode) + }); + } + } + } + } +} diff --git a/vendor/regex-automata/src/util/determinize/state.rs b/vendor/regex-automata/src/util/determinize/state.rs new file mode 100644 index 0000000..effa6f4 --- /dev/null +++ b/vendor/regex-automata/src/util/determinize/state.rs @@ -0,0 +1,907 @@ +/*! +This module defines a DFA state representation and builders for constructing +DFA states. + +This representation is specifically for use in implementations of NFA-to-DFA +conversion via powerset construction. (Also called "determinization" in this +crate.) + +The term "DFA state" is somewhat overloaded in this crate. In some cases, it +refers to the set of transitions over an alphabet for a particular state. In +other cases, it refers to a set of NFA states. The former is really about the +final representation of a state in a DFA's transition table, where as the +latter---what this module is focused on---is closer to an intermediate form +that is used to help eventually build the transition table. + +This module exports four types. All four types represent the same idea: an +ordered set of NFA states. This ordered set represents the epsilon closure of a +particular NFA state, where the "epsilon closure" is the set of NFA states that +can be transitioned to without consuming any input. i.e., Follow all of the NFA +state's epsilon transitions. In addition, this implementation of DFA states +cares about two other things: the ordered set of pattern IDs corresponding +to the patterns that match if the state is a match state, and the set of +look-behind assertions that were true when the state was created. + +The first, `State`, is a frozen representation of a state that cannot be +modified. It may be cheaply cloned without copying the state itself and can be +accessed safely from multiple threads simultaneously. This type is useful for +when one knows that the DFA state being constructed is distinct from any other +previously constructed states. Namely, powerset construction, in practice, +requires one to keep a cache of previously created DFA states. Otherwise, +the number of DFA states created in memory balloons to an impractically +large number. For this reason, equivalent states should endeavor to have an +equivalent byte-level representation. (In general, "equivalency" here means, +"equivalent assertions, pattern IDs and NFA state IDs." We do not require that +full DFA minimization be implemented here. This form of equivalency is only +surface deep and is more-or-less a practical necessity.) + +The other three types represent different phases in the construction of a +DFA state. Internally, these three types (and `State`) all use the same +byte-oriented representation. That means one can use any of the builder types +to check whether the state it represents already exists or not. If it does, +then there is no need to freeze it into a `State` (which requires an alloc and +a copy). Here are the three types described succinctly: + +* `StateBuilderEmpty` represents a state with no pattern IDs, no assertions +and no NFA states. Creating a `StateBuilderEmpty` performs no allocs. A +`StateBuilderEmpty` can only be used to query its underlying memory capacity, +or to convert into a builder for recording pattern IDs and/or assertions. + +* `StateBuilderMatches` represents a state with zero or more pattern IDs, zero +or more satisfied assertions and zero NFA state IDs. A `StateBuilderMatches` +can only be used for adding pattern IDs and recording assertions. + +* `StateBuilderNFA` represents a state with zero or more pattern IDs, zero or +more satisfied assertions and zero or more NFA state IDs. A `StateBuilderNFA` +can only be used for adding NFA state IDs and recording some assertions. + +The expected flow here is to use the above builders to construct a candidate +DFA state to check if it already exists. If it does, then there's no need to +freeze it into a `State`. It it doesn't exist, then `StateBuilderNFA::to_state` +can be called to freeze the builder into an immutable `State`. In either +case, `clear` should be called on the builder to turn it back into a +`StateBuilderEmpty` that reuses the underlying memory. + +The main purpose for splitting the builder into these distinct types is to +make it impossible to do things like adding a pattern ID after adding an NFA +state ID. Namely, this makes it simpler to use a space-and-time efficient +binary representation for the state. (The format is documented on the `Repr` +type below.) If we just used one type for everything, it would be possible for +callers to use an incorrect interleaving of calls and thus result in a corrupt +representation. I chose to use more type machinery to make this impossible to +do because 1) determinization is itself pretty complex and it wouldn't be too +hard to foul this up and 2) there isn't too much machinery involved and it's +well contained. + +As an optimization, sometimes states won't have certain things set. For +example, if the underlying NFA has no word boundary assertions, then there is +no reason to set a state's look-behind assertion as to whether it was generated +from a word byte or not. Similarly, if a state has no NFA states corresponding +to look-around assertions, then there is no reason to set `look_have` to a +non-empty set. Finally, callers usually omit unconditional epsilon transitions +when adding NFA state IDs since they aren't discriminatory. + +Finally, the binary representation used by these states is, thankfully, not +serialized anywhere. So any kind of change can be made with reckless abandon, +as long as everything in this module agrees. +*/ + +use core::{convert::TryFrom, mem}; + +use alloc::{sync::Arc, vec::Vec}; + +use crate::util::{ + int::{I32, U32}, + look::LookSet, + primitives::{PatternID, StateID}, + wire::{self, Endian}, +}; + +/// A DFA state that, at its core, is represented by an ordered set of NFA +/// states. +/// +/// This type is intended to be used only in NFA-to-DFA conversion via powerset +/// construction. +/// +/// It may be cheaply cloned and accessed safely from multiple threads +/// simultaneously. +#[derive(Clone, Eq, Hash, PartialEq, PartialOrd, Ord)] +pub(crate) struct State(Arc<[u8]>); + +/// This Borrow impl permits us to lookup any state in a map by its byte +/// representation. This is particularly convenient when one has a StateBuilder +/// and we want to see if a correspondingly equivalent state already exists. If +/// one does exist, then we can reuse the allocation required by StateBuilder +/// without having to convert it into a State first. +impl core::borrow::Borrow<[u8]> for State { + fn borrow(&self) -> &[u8] { + &*self.0 + } +} + +impl core::fmt::Debug for State { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_tuple("State").field(&self.repr()).finish() + } +} + +/// For docs on these routines, see the internal Repr and ReprVec types below. +impl State { + pub(crate) fn dead() -> State { + StateBuilderEmpty::new().into_matches().into_nfa().to_state() + } + + pub(crate) fn is_match(&self) -> bool { + self.repr().is_match() + } + + pub(crate) fn is_from_word(&self) -> bool { + self.repr().is_from_word() + } + + pub(crate) fn is_half_crlf(&self) -> bool { + self.repr().is_half_crlf() + } + + pub(crate) fn look_have(&self) -> LookSet { + self.repr().look_have() + } + + pub(crate) fn look_need(&self) -> LookSet { + self.repr().look_need() + } + + pub(crate) fn match_len(&self) -> usize { + self.repr().match_len() + } + + pub(crate) fn match_pattern(&self, index: usize) -> PatternID { + self.repr().match_pattern(index) + } + + pub(crate) fn match_pattern_ids(&self) -> Option> { + self.repr().match_pattern_ids() + } + + #[cfg(all(test, not(miri)))] + pub(crate) fn iter_match_pattern_ids(&self, f: F) { + self.repr().iter_match_pattern_ids(f) + } + + pub(crate) fn iter_nfa_state_ids(&self, f: F) { + self.repr().iter_nfa_state_ids(f) + } + + pub(crate) fn memory_usage(&self) -> usize { + self.0.len() + } + + fn repr(&self) -> Repr<'_> { + Repr(&*self.0) + } +} + +/// A state builder that represents an empty state. +/// +/// This is a useful "initial condition" for state construction. It has no +/// NFA state IDs, no assertions set and no pattern IDs. No allocations are +/// made when new() is called. Its main use is for being converted into a +/// builder that can capture assertions and pattern IDs. +#[derive(Clone, Debug)] +pub(crate) struct StateBuilderEmpty(Vec); + +/// For docs on these routines, see the internal Repr and ReprVec types below. +impl StateBuilderEmpty { + pub(crate) fn new() -> StateBuilderEmpty { + StateBuilderEmpty(alloc::vec![]) + } + + pub(crate) fn into_matches(mut self) -> StateBuilderMatches { + self.0.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0, 0]); + StateBuilderMatches(self.0) + } + + fn clear(&mut self) { + self.0.clear(); + } + + pub(crate) fn capacity(&self) -> usize { + self.0.capacity() + } +} + +/// A state builder that collects assertions and pattern IDs. +/// +/// When collecting pattern IDs is finished, this can be converted into a +/// builder that collects NFA state IDs. +#[derive(Clone)] +pub(crate) struct StateBuilderMatches(Vec); + +impl core::fmt::Debug for StateBuilderMatches { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_tuple("StateBuilderMatches").field(&self.repr()).finish() + } +} + +/// For docs on these routines, see the internal Repr and ReprVec types below. +impl StateBuilderMatches { + pub(crate) fn into_nfa(mut self) -> StateBuilderNFA { + self.repr_vec().close_match_pattern_ids(); + StateBuilderNFA { repr: self.0, prev_nfa_state_id: StateID::ZERO } + } + + pub(crate) fn set_is_from_word(&mut self) { + self.repr_vec().set_is_from_word() + } + + pub(crate) fn set_is_half_crlf(&mut self) { + self.repr_vec().set_is_half_crlf() + } + + pub(crate) fn look_have(&self) -> LookSet { + LookSet::read_repr(&self.0[1..]) + } + + pub(crate) fn set_look_have( + &mut self, + set: impl FnMut(LookSet) -> LookSet, + ) { + self.repr_vec().set_look_have(set) + } + + pub(crate) fn add_match_pattern_id(&mut self, pid: PatternID) { + self.repr_vec().add_match_pattern_id(pid) + } + + fn repr(&self) -> Repr<'_> { + Repr(&self.0) + } + + fn repr_vec(&mut self) -> ReprVec<'_> { + ReprVec(&mut self.0) + } +} + +/// A state builder that collects some assertions and NFA state IDs. +/// +/// When collecting NFA state IDs is finished, this can be used to build a +/// `State` if necessary. +/// +/// When dont with building a state (regardless of whether it got kept or not), +/// it's usually a good idea to call `clear` to get an empty builder back so +/// that it can be reused to build the next state. +#[derive(Clone)] +pub(crate) struct StateBuilderNFA { + repr: Vec, + prev_nfa_state_id: StateID, +} + +impl core::fmt::Debug for StateBuilderNFA { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_tuple("StateBuilderNFA").field(&self.repr()).finish() + } +} + +/// For docs on these routines, see the internal Repr and ReprVec types below. +impl StateBuilderNFA { + pub(crate) fn to_state(&self) -> State { + State(Arc::from(&*self.repr)) + } + + pub(crate) fn clear(self) -> StateBuilderEmpty { + let mut builder = StateBuilderEmpty(self.repr); + builder.clear(); + builder + } + + pub(crate) fn look_need(&self) -> LookSet { + self.repr().look_need() + } + + pub(crate) fn set_look_have( + &mut self, + set: impl FnMut(LookSet) -> LookSet, + ) { + self.repr_vec().set_look_have(set) + } + + pub(crate) fn set_look_need( + &mut self, + set: impl FnMut(LookSet) -> LookSet, + ) { + self.repr_vec().set_look_need(set) + } + + pub(crate) fn add_nfa_state_id(&mut self, sid: StateID) { + ReprVec(&mut self.repr) + .add_nfa_state_id(&mut self.prev_nfa_state_id, sid) + } + + pub(crate) fn as_bytes(&self) -> &[u8] { + &self.repr + } + + fn repr(&self) -> Repr<'_> { + Repr(&self.repr) + } + + fn repr_vec(&mut self) -> ReprVec<'_> { + ReprVec(&mut self.repr) + } +} + +/// Repr is a read-only view into the representation of a DFA state. +/// +/// Primarily, a Repr is how we achieve DRY: we implement decoding the format +/// in one place, and then use a Repr to implement the various methods on the +/// public state types. +/// +/// The format is as follows: +/// +/// The first three bytes correspond to bitsets. +/// +/// Byte 0 is a bitset corresponding to miscellaneous flags associated with the +/// state. Bit 0 is set to 1 if the state is a match state. Bit 1 is set to 1 +/// if the state has pattern IDs explicitly written to it. (This is a flag that +/// is not meant to be set by determinization, but rather, is used as part of +/// an internal space-saving optimization.) Bit 2 is set to 1 if the state was +/// generated by a transition over a "word" byte. (Callers may not always set +/// this. For example, if the NFA has no word boundary assertion, then needing +/// to track whether a state came from a word byte or not is superfluous and +/// wasteful.) Bit 3 is set to 1 if the state was generated by a transition +/// from a `\r` (forward search) or a `\n` (reverse search) when CRLF mode is +/// enabled. +/// +/// Bytes 1..5 correspond to the look-behind assertions that were satisfied +/// by the transition that created this state. (Look-ahead assertions are not +/// tracked as part of states. Instead, these are applied by re-computing the +/// epsilon closure of a state when computing the transition function. See +/// `next` in the parent module.) +/// +/// Bytes 5..9 correspond to the set of look-around assertions (including both +/// look-behind and look-ahead) that appear somewhere in this state's set of +/// NFA state IDs. This is used to determine whether this state's epsilon +/// closure should be re-computed when computing the transition function. +/// Namely, look-around assertions are "just" conditional epsilon transitions, +/// so if there are new assertions available when computing the transition +/// function, we should only re-compute the epsilon closure if those new +/// assertions are relevant to this particular state. +/// +/// Bytes 9..13 correspond to a 32-bit native-endian encoded integer +/// corresponding to the number of patterns encoded in this state. If the state +/// is not a match state (byte 0 bit 0 is 0) or if it's only pattern ID is +/// PatternID::ZERO, then no integer is encoded at this position. Instead, byte +/// offset 3 is the position at which the first NFA state ID is encoded. +/// +/// For a match state with at least one non-ZERO pattern ID, the next bytes +/// correspond to a sequence of 32-bit native endian encoded integers that +/// represent each pattern ID, in order, that this match state represents. +/// +/// After the pattern IDs (if any), NFA state IDs are delta encoded as +/// varints.[1] The first NFA state ID is encoded as itself, and each +/// subsequent NFA state ID is encoded as the difference between itself and the +/// previous NFA state ID. +/// +/// [1] - https://developers.google.com/protocol-buffers/docs/encoding#varints +struct Repr<'a>(&'a [u8]); + +impl<'a> Repr<'a> { + /// Returns true if and only if this is a match state. + /// + /// If callers have added pattern IDs to this state, then callers MUST set + /// this state as a match state explicitly. However, as a special case, + /// states that are marked as match states but with no pattern IDs, then + /// the state is treated as if it had a single pattern ID equivalent to + /// PatternID::ZERO. + fn is_match(&self) -> bool { + self.0[0] & (1 << 0) > 0 + } + + /// Returns true if and only if this state has had at least one pattern + /// ID added to it. + /// + /// This is an internal-only flag that permits the representation to save + /// space in the common case of an NFA with one pattern in it. In that + /// case, a match state can only ever have exactly one pattern ID: + /// PatternID::ZERO. So there's no need to represent it. + fn has_pattern_ids(&self) -> bool { + self.0[0] & (1 << 1) > 0 + } + + /// Returns true if and only if this state is marked as having been created + /// from a transition over a word byte. This is useful for checking whether + /// a word boundary assertion is true or not, which requires look-behind + /// (whether the current state came from a word byte or not) and look-ahead + /// (whether the transition byte is a word byte or not). + /// + /// Since states with this set are distinct from states that don't have + /// this set (even if they are otherwise equivalent), callers should not + /// set this assertion unless the underlying NFA has at least one word + /// boundary assertion somewhere. Otherwise, a superfluous number of states + /// may be created. + fn is_from_word(&self) -> bool { + self.0[0] & (1 << 2) > 0 + } + + /// Returns true if and only if this state is marked as being inside of a + /// CRLF terminator. In the forward direction, this means the state was + /// created after seeing a `\r`. In the reverse direction, this means the + /// state was created after seeing a `\n`. + fn is_half_crlf(&self) -> bool { + self.0[0] & (1 << 3) > 0 + } + + /// The set of look-behind assertions that were true in the transition that + /// created this state. + /// + /// Generally, this should be empty if 'look_need' is empty, since there is + /// no reason to track which look-behind assertions are true if the state + /// has no conditional epsilon transitions. + /// + /// Satisfied look-ahead assertions are not tracked in states. Instead, + /// these are re-computed on demand via epsilon closure when computing the + /// transition function. + fn look_have(&self) -> LookSet { + LookSet::read_repr(&self.0[1..]) + } + + /// The set of look-around (both behind and ahead) assertions that appear + /// at least once in this state's set of NFA states. + /// + /// This is used to determine whether the epsilon closure needs to be + /// re-computed when computing the transition function. Namely, if the + /// state has no conditional epsilon transitions, then there is no need + /// to re-compute the epsilon closure. + fn look_need(&self) -> LookSet { + LookSet::read_repr(&self.0[5..]) + } + + /// Returns the total number of match pattern IDs in this state. + /// + /// If this state is not a match state, then this always returns 0. + fn match_len(&self) -> usize { + if !self.is_match() { + return 0; + } else if !self.has_pattern_ids() { + 1 + } else { + self.encoded_pattern_len() + } + } + + /// Returns the pattern ID for this match state at the given index. + /// + /// If the given index is greater than or equal to `match_len()` for this + /// state, then this could panic or return incorrect results. + fn match_pattern(&self, index: usize) -> PatternID { + if !self.has_pattern_ids() { + PatternID::ZERO + } else { + let offset = 13 + index * PatternID::SIZE; + // This is OK since we only ever serialize valid PatternIDs to + // states. + wire::read_pattern_id_unchecked(&self.0[offset..]).0 + } + } + + /// Returns a copy of all match pattern IDs in this state. If this state + /// is not a match state, then this returns None. + fn match_pattern_ids(&self) -> Option> { + if !self.is_match() { + return None; + } + let mut pids = alloc::vec![]; + self.iter_match_pattern_ids(|pid| pids.push(pid)); + Some(pids) + } + + /// Calls the given function on every pattern ID in this state. + fn iter_match_pattern_ids(&self, mut f: F) { + if !self.is_match() { + return; + } + // As an optimization for a very common case, when this is a match + // state for an NFA with only one pattern, we don't actually write the + // pattern ID to the state representation. Instead, we know it must + // be there since it is the only possible choice. + if !self.has_pattern_ids() { + f(PatternID::ZERO); + return; + } + let mut pids = &self.0[13..self.pattern_offset_end()]; + while !pids.is_empty() { + let pid = wire::read_u32(pids); + pids = &pids[PatternID::SIZE..]; + // This is OK since we only ever serialize valid PatternIDs to + // states. And since pattern IDs can never exceed a usize, the + // unwrap is OK. + f(PatternID::new_unchecked(usize::try_from(pid).unwrap())); + } + } + + /// Calls the given function on every NFA state ID in this state. + fn iter_nfa_state_ids(&self, mut f: F) { + let mut sids = &self.0[self.pattern_offset_end()..]; + let mut prev = 0i32; + while !sids.is_empty() { + let (delta, nr) = read_vari32(sids); + sids = &sids[nr..]; + let sid = prev + delta; + prev = sid; + // This is OK since we only ever serialize valid StateIDs to + // states. And since state IDs can never exceed an isize, they must + // always be able to fit into a usize, and thus cast is OK. + f(StateID::new_unchecked(sid.as_usize())) + } + } + + /// Returns the offset into this state's representation where the pattern + /// IDs end and the NFA state IDs begin. + fn pattern_offset_end(&self) -> usize { + let encoded = self.encoded_pattern_len(); + if encoded == 0 { + return 9; + } + // This arithmetic is OK since we were able to address this many bytes + // when writing to the state, thus, it must fit into a usize. + encoded.checked_mul(4).unwrap().checked_add(13).unwrap() + } + + /// Returns the total number of *encoded* pattern IDs in this state. + /// + /// This may return 0 even when this is a match state, since the pattern + /// ID `PatternID::ZERO` is not encoded when it's the only pattern ID in + /// the match state (the overwhelming common case). + fn encoded_pattern_len(&self) -> usize { + if !self.has_pattern_ids() { + return 0; + } + // This unwrap is OK since the total number of patterns is always + // guaranteed to fit into a usize. + usize::try_from(wire::read_u32(&self.0[9..13])).unwrap() + } +} + +impl<'a> core::fmt::Debug for Repr<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut nfa_ids = alloc::vec![]; + self.iter_nfa_state_ids(|sid| nfa_ids.push(sid)); + f.debug_struct("Repr") + .field("is_match", &self.is_match()) + .field("is_from_word", &self.is_from_word()) + .field("is_half_crlf", &self.is_half_crlf()) + .field("look_have", &self.look_have()) + .field("look_need", &self.look_need()) + .field("match_pattern_ids", &self.match_pattern_ids()) + .field("nfa_state_ids", &nfa_ids) + .finish() + } +} + +/// ReprVec is a write-only view into the representation of a DFA state. +/// +/// See Repr for more details on the purpose of this type and also the format. +/// +/// Note that not all possible combinations of methods may be called. This is +/// precisely what the various StateBuilder types encapsulate: they only +/// permit valid combinations via Rust's linear typing. +struct ReprVec<'a>(&'a mut Vec); + +impl<'a> ReprVec<'a> { + /// Set this state as a match state. + /// + /// This should not be exposed explicitly outside of this module. It is + /// set automatically when a pattern ID is added. + fn set_is_match(&mut self) { + self.0[0] |= 1 << 0; + } + + /// Set that this state has pattern IDs explicitly written to it. + /// + /// This should not be exposed explicitly outside of this module. This is + /// used internally as a space saving optimization. Namely, if the state + /// is a match state but does not have any pattern IDs written to it, + /// then it is automatically inferred to have a pattern ID of ZERO. + fn set_has_pattern_ids(&mut self) { + self.0[0] |= 1 << 1; + } + + /// Set this state as being built from a transition over a word byte. + /// + /// Setting this is only necessary when one needs to deal with word + /// boundary assertions. Therefore, if the underlying NFA has no word + /// boundary assertions, callers should not set this. + fn set_is_from_word(&mut self) { + self.0[0] |= 1 << 2; + } + + /// Set this state as having seen half of a CRLF terminator. + /// + /// In the forward direction, this should be set when a `\r` has been seen. + /// In the reverse direction, this should be set when a `\n` has been seen. + fn set_is_half_crlf(&mut self) { + self.0[0] |= 1 << 3; + } + + /// The set of look-behind assertions that were true in the transition that + /// created this state. + fn look_have(&self) -> LookSet { + self.repr().look_have() + } + + /// The set of look-around (both behind and ahead) assertions that appear + /// at least once in this state's set of NFA states. + fn look_need(&self) -> LookSet { + self.repr().look_need() + } + + /// Mutate the set of look-behind assertions that were true in the + /// transition that created this state. + fn set_look_have(&mut self, mut set: impl FnMut(LookSet) -> LookSet) { + set(self.look_have()).write_repr(&mut self.0[1..]); + } + + /// Mutate the set of look-around (both behind and ahead) assertions that + /// appear at least once in this state's set of NFA states. + fn set_look_need(&mut self, mut set: impl FnMut(LookSet) -> LookSet) { + set(self.look_need()).write_repr(&mut self.0[5..]); + } + + /// Add a pattern ID to this state. All match states must have at least + /// one pattern ID associated with it. + /// + /// Callers must never add duplicative pattern IDs. + /// + /// The order in which patterns are added must correspond to the order + /// in which patterns are reported as matches. + fn add_match_pattern_id(&mut self, pid: PatternID) { + // As a (somewhat small) space saving optimization, in the case where + // a matching state has exactly one pattern ID, PatternID::ZERO, we do + // not write either the pattern ID or the number of patterns encoded. + // Instead, all we do is set the 'is_match' bit on this state. Overall, + // this saves 8 bytes per match state for the overwhelming majority of + // match states. + // + // In order to know whether pattern IDs need to be explicitly read or + // not, we use another internal-only bit, 'has_pattern_ids', to + // indicate whether they have been explicitly written or not. + if !self.repr().has_pattern_ids() { + if pid == PatternID::ZERO { + self.set_is_match(); + return; + } + // Make room for 'close_match_pattern_ids' to write the total + // number of pattern IDs written. + self.0.extend(core::iter::repeat(0).take(PatternID::SIZE)); + self.set_has_pattern_ids(); + // If this was already a match state, then the only way that's + // possible when the state doesn't have pattern IDs is if + // PatternID::ZERO was added by the caller previously. In this + // case, we are now adding a non-ZERO pattern ID after it, in + // which case, we want to make sure to represent ZERO explicitly + // now. + if self.repr().is_match() { + write_u32(self.0, 0) + } else { + // Otherwise, just make sure the 'is_match' bit is set. + self.set_is_match(); + } + } + write_u32(self.0, pid.as_u32()); + } + + /// Indicate that no more pattern IDs will be added to this state. + /// + /// Once this is called, callers must not call it or 'add_match_pattern_id' + /// again. + /// + /// This should not be exposed explicitly outside of this module. It + /// should be called only when converting a StateBuilderMatches into a + /// StateBuilderNFA. + fn close_match_pattern_ids(&mut self) { + // If we never wrote any pattern IDs, then there's nothing to do here. + if !self.repr().has_pattern_ids() { + return; + } + let patsize = PatternID::SIZE; + let pattern_bytes = self.0.len() - 13; + // Every pattern ID uses 4 bytes, so number of bytes should be + // divisible by 4. + assert_eq!(pattern_bytes % patsize, 0); + // This unwrap is OK since we are guaranteed that the maximum number + // of possible patterns fits into a u32. + let count32 = u32::try_from(pattern_bytes / patsize).unwrap(); + wire::NE::write_u32(count32, &mut self.0[9..13]); + } + + /// Add an NFA state ID to this state. The order in which NFA states are + /// added matters. It is the caller's responsibility to ensure that + /// duplicate NFA state IDs are not added. + fn add_nfa_state_id(&mut self, prev: &mut StateID, sid: StateID) { + let delta = sid.as_i32() - prev.as_i32(); + write_vari32(self.0, delta); + *prev = sid; + } + + /// Return a read-only view of this state's representation. + fn repr(&self) -> Repr<'_> { + Repr(self.0.as_slice()) + } +} + +/// Write a signed 32-bit integer using zig-zag encoding. +/// +/// https://developers.google.com/protocol-buffers/docs/encoding#varints +fn write_vari32(data: &mut Vec, n: i32) { + let mut un = n.to_bits() << 1; + if n < 0 { + un = !un; + } + write_varu32(data, un) +} + +/// Read a signed 32-bit integer using zig-zag encoding. Also, return the +/// number of bytes read. +/// +/// https://developers.google.com/protocol-buffers/docs/encoding#varints +fn read_vari32(data: &[u8]) -> (i32, usize) { + let (un, i) = read_varu32(data); + let mut n = i32::from_bits(un >> 1); + if un & 1 != 0 { + n = !n; + } + (n, i) +} + +/// Write an unsigned 32-bit integer as a varint. In essence, `n` is written +/// as a sequence of bytes where all bytes except for the last one have the +/// most significant bit set. The least significant 7 bits correspond to the +/// actual bits of `n`. So in the worst case, a varint uses 5 bytes, but in +/// very common cases, it uses fewer than 4. +/// +/// https://developers.google.com/protocol-buffers/docs/encoding#varints +fn write_varu32(data: &mut Vec, mut n: u32) { + while n >= 0b1000_0000 { + data.push(n.low_u8() | 0b1000_0000); + n >>= 7; + } + data.push(n.low_u8()); +} + +/// Read an unsigned 32-bit varint. Also, return the number of bytes read. +/// +/// https://developers.google.com/protocol-buffers/docs/encoding#varints +fn read_varu32(data: &[u8]) -> (u32, usize) { + // N.B. We can assume correctness here since we know that all varuints are + // written with write_varu32. Hence, the 'as' uses and unchecked arithmetic + // is all okay. + let mut n: u32 = 0; + let mut shift: u32 = 0; + for (i, &b) in data.iter().enumerate() { + if b < 0b1000_0000 { + return (n | (u32::from(b) << shift), i + 1); + } + n |= (u32::from(b) & 0b0111_1111) << shift; + shift += 7; + } + (0, 0) +} + +/// Push a native-endian encoded `n` on to `dst`. +fn write_u32(dst: &mut Vec, n: u32) { + use crate::util::wire::NE; + + let start = dst.len(); + dst.extend(core::iter::repeat(0).take(mem::size_of::())); + NE::write_u32(n, &mut dst[start..]); +} + +#[cfg(test)] +mod tests { + use alloc::vec; + + use quickcheck::quickcheck; + + use super::*; + + #[cfg(not(miri))] + quickcheck! { + fn prop_state_read_write_nfa_state_ids(sids: Vec) -> bool { + // Builders states do not permit duplicate IDs. + let sids = dedup_state_ids(sids); + + let mut b = StateBuilderEmpty::new().into_matches().into_nfa(); + for &sid in &sids { + b.add_nfa_state_id(sid); + } + let s = b.to_state(); + let mut got = vec![]; + s.iter_nfa_state_ids(|sid| got.push(sid)); + got == sids + } + + fn prop_state_read_write_pattern_ids(pids: Vec) -> bool { + // Builders states do not permit duplicate IDs. + let pids = dedup_pattern_ids(pids); + + let mut b = StateBuilderEmpty::new().into_matches(); + for &pid in &pids { + b.add_match_pattern_id(pid); + } + let s = b.into_nfa().to_state(); + let mut got = vec![]; + s.iter_match_pattern_ids(|pid| got.push(pid)); + got == pids + } + + fn prop_state_read_write_nfa_state_and_pattern_ids( + sids: Vec, + pids: Vec + ) -> bool { + // Builders states do not permit duplicate IDs. + let sids = dedup_state_ids(sids); + let pids = dedup_pattern_ids(pids); + + let mut b = StateBuilderEmpty::new().into_matches(); + for &pid in &pids { + b.add_match_pattern_id(pid); + } + + let mut b = b.into_nfa(); + for &sid in &sids { + b.add_nfa_state_id(sid); + } + + let s = b.to_state(); + let mut got_pids = vec![]; + s.iter_match_pattern_ids(|pid| got_pids.push(pid)); + let mut got_sids = vec![]; + s.iter_nfa_state_ids(|sid| got_sids.push(sid)); + got_pids == pids && got_sids == sids + } + } + + quickcheck! { + fn prop_read_write_varu32(n: u32) -> bool { + let mut buf = vec![]; + write_varu32(&mut buf, n); + let (got, nread) = read_varu32(&buf); + nread == buf.len() && got == n + } + + fn prop_read_write_vari32(n: i32) -> bool { + let mut buf = vec![]; + write_vari32(&mut buf, n); + let (got, nread) = read_vari32(&buf); + nread == buf.len() && got == n + } + } + + #[cfg(not(miri))] + fn dedup_state_ids(sids: Vec) -> Vec { + let mut set = alloc::collections::BTreeSet::new(); + let mut deduped = vec![]; + for sid in sids { + if set.contains(&sid) { + continue; + } + set.insert(sid); + deduped.push(sid); + } + deduped + } + + #[cfg(not(miri))] + fn dedup_pattern_ids(pids: Vec) -> Vec { + let mut set = alloc::collections::BTreeSet::new(); + let mut deduped = vec![]; + for pid in pids { + if set.contains(&pid) { + continue; + } + set.insert(pid); + deduped.push(pid); + } + deduped + } +} diff --git a/vendor/regex-automata/src/util/empty.rs b/vendor/regex-automata/src/util/empty.rs new file mode 100644 index 0000000..e16af3b --- /dev/null +++ b/vendor/regex-automata/src/util/empty.rs @@ -0,0 +1,265 @@ +/*! +This module provides helper routines for dealing with zero-width matches. + +The main problem being solved here is this: + +1. The caller wants to search something that they know is valid UTF-8, such +as a Rust `&str`. +2. The regex used by the caller can match the empty string. For example, `a*`. +3. The caller should never get match offsets returned that occur within the +encoding of a UTF-8 codepoint. It is logically incorrect, and also means that, +e.g., slicing the `&str` at those offsets will lead to a panic. + +So the question here is, how do we prevent the caller from getting match +offsets that split a codepoint? For example, strictly speaking, the regex `a*` +matches `☃` at the positions `[0, 0]`, `[1, 1]`, `[2, 2]` and `[3, 3]` since +the UTF-8 encoding of `☃` is `\xE2\x98\x83`. In particular, the `NFA` that +underlies all of the matching engines in this crate doesn't have anything in +its state graph that prevents matching between UTF-8 code units. Indeed, any +engine derived from the `NFA` will match at those positions by virtue of the +fact that the `NFA` is byte oriented. That is, its transitions are defined over +bytes and the matching engines work by proceeding one byte at a time. + +(An alternative architecture would be to define the transitions in an `NFA` +over codepoints, or `char`. And then make the matching engines proceed by +decoding one codepoint at a time. This is a viable strategy, but it doesn't +work for DFA matching engines because designing a fast and memory efficient +transition table for an alphabet as large as Unicode is quite difficult. More +to the point, the top-level `regex` crate supports matching on arbitrary bytes +when Unicode mode is disabled and one is searching a `&[u8]`. So in that case, +you can't just limit yourself to decoding codepoints and matching those. You +really do need to be able to follow byte oriented transitions on the `NFA`.) + +In an older version of the regex crate, we handled this case not in the regex +engine, but in the iterators over matches. Namely, since this case only arises +when the match is empty, we "just" incremented the next starting position +of the search by `N`, where `N` is the length of the codepoint encoded at +the current position. The alternative or more "natural" solution of just +incrementing by `1` would result in executing a search of `a*` on `☃` like +this: + +* Start search at `0`. +* Found match at `[0, 0]`. +* Next start position is `0`. +* To avoid an infinite loop, since it's an empty match, increment by `1`. +* Start search at `1`. +* Found match at `[1, 1]`. Oops. + +But if we instead incremented by `3` (the length in bytes of `☃`), then we get +the following: + +* Start search at `0`. +* Found match at `[0, 0]`. +* Next start position is `0`. +* To avoid an infinite loop, since it's an empty match, increment by `3`. +* Start search at `3`. +* Found match at `[3, 3]`. + +And we get the correct result. But does this technique work in all cases? +Crucially, it requires that a zero-width match that splits a codepoint never +occurs beyond the starting position of the search. Because if it did, merely +incrementing the start position by the number of bytes in the codepoint at +the current position wouldn't be enough. A zero-width match could just occur +anywhere. It turns out that it is _almost_ true. We can convince ourselves by +looking at all possible patterns that can match the empty string: + +* Patterns like `a*`, `a{0}`, `(?:)`, `a|` and `|a` all unconditionally match +the empty string. That is, assuming there isn't an `a` at the current position, +they will all match the empty string at the start of a search. There is no way +to move past it because any other match would not be "leftmost." +* `^` only matches at the beginning of the haystack, where the start position +is `0`. Since we know we're searching valid UTF-8 (if it isn't valid UTF-8, +then this entire problem goes away because it implies your string type supports +invalid UTF-8 and thus must deal with offsets that not only split a codepoint +but occur in entirely invalid UTF-8 somehow), it follows that `^` never matches +between the code units of a codepoint because the start of a valid UTF-8 string +is never within the encoding of a codepoint. +* `$` basically the same logic as `^`, but for the end of a string. A valid +UTF-8 string can't have an incomplete codepoint at the end of it. +* `(?m:^)` follows similarly to `^`, but it can match immediately following +a `\n`. However, since a `\n` is always a codepoint itself and can never +appear within a codepoint, it follows that the position immediately following +a `\n` in a string that is valid UTF-8 is guaranteed to not be between the +code units of another codepoint. (One caveat here is that the line terminator +for multi-line anchors can now be changed to any arbitrary byte, including +things like `\x98` which might occur within a codepoint. However, this wasn't +supported by the old regex crate. If it was, it pose the same problems as +`(?-u:\B)`, as we'll discuss below.) +* `(?m:$)` a similar argument as for `(?m:^)`. The only difference is that a +`(?m:$)` matches just before a `\n`. But the same argument applies. +* `(?Rm:^)` and `(?Rm:$)` weren't supported by the old regex crate, but the +CRLF aware line anchors follow a similar argument as for `(?m:^)` and `(?m:$)`. +Namely, since they only ever match at a boundary where one side is either a +`\r` or a `\n`, neither of which can occur within a codepoint. +* `\b` only matches at positions where both sides are valid codepoints, so +this cannot split a codepoint. +* `\B`, like `\b`, also only matches at positions where both sides are valid +codepoints. So this cannot split a codepoint either. +* `(?-u:\b)` matches only at positions where at least one side of it is an ASCII +word byte. Since ASCII bytes cannot appear as code units in non-ASCII codepoints +(one of the many amazing qualities of UTF-8), it follows that this too cannot +split a codepoint. +* `(?-u:\B)` finally represents a problem. It can matches between *any* two +bytes that are either both word bytes or non-word bytes. Since code units like +`\xE2` and `\x98` (from the UTF-8 encoding of `☃`) are both non-word bytes, +`(?-u:\B)` will match at the position between them. + +Thus, our approach of incrementing one codepoint at a time after seeing an +empty match is flawed because `(?-u:\B)` can result in an empty match that +splits a codepoint at a position past the starting point of a search. For +example, searching `(?-u:\B)` on `a☃` would produce the following matches: `[2, +2]`, `[3, 3]` and `[4, 4]`. The positions at `0` and `1` don't match because +they correspond to word boundaries since `a` is an ASCII word byte. + +So what did the old regex crate do to avoid this? It banned `(?-u:\B)` from +regexes that could match `&str`. That might sound extreme, but a lot of other +things were banned too. For example, all of `(?-u:.)`, `(?-u:[^a])` and +`(?-u:\W)` can match invalid UTF-8 too, including individual code units with a +codepoint. The key difference is that those expressions could never produce an +empty match. That ban happens when translating an `Ast` to an `Hir`, because +that process that reason about whether an `Hir` can produce *non-empty* matches +at invalid UTF-8 boundaries. Bottom line though is that we side-stepped the +`(?-u:\B)` issue by banning it. + +If banning `(?-u:\B)` were the only issue with the old regex crate's approach, +then I probably would have kept it. `\B` is rarely used, so it's not such a big +deal to have to work-around it. However, the problem with the above approach +is that it doesn't compose. The logic for avoiding splitting a codepoint only +lived in the iterator, which means if anyone wants to implement their own +iterator over regex matches, they have to deal with this extremely subtle edge +case to get full correctness. + +Instead, in this crate, we take the approach of pushing this complexity down +to the lowest layers of each regex engine. The approach is pretty simple: + +* If this corner case doesn't apply, don't do anything. (For example, if UTF-8 +mode isn't enabled or if the regex cannot match the empty string.) +* If an empty match is reported, explicitly check if it splits a codepoint. +* If it doesn't, we're done, return the match. +* If it does, then ignore the match and re-run the search. +* Repeat the above process until the end of the haystack is reached or a match +is found that doesn't split a codepoint or isn't zero width. + +And that's pretty much what this module provides. Every regex engine uses these +methods in their lowest level public APIs, but just above the layer where +their internal engine is used. That way, all regex engines can be arbitrarily +composed without worrying about handling this case, and iterators don't need to +handle it explicitly. + +(It turns out that a new feature I added, support for changing the line +terminator in a regex to any arbitrary byte, also provokes the above problem. +Namely, the byte could be invalid UTF-8 or a UTF-8 continuation byte. So that +support would need to be limited or banned when UTF-8 mode is enabled, just +like we did for `(?-u:\B)`. But thankfully our more robust approach in this +crate handles that case just fine too.) +*/ + +use crate::util::search::{Input, MatchError}; + +#[cold] +#[inline(never)] +pub(crate) fn skip_splits_fwd( + input: &Input<'_>, + init_value: T, + match_offset: usize, + find: F, +) -> Result, MatchError> +where + F: FnMut(&Input<'_>) -> Result, MatchError>, +{ + skip_splits(true, input, init_value, match_offset, find) +} + +#[cold] +#[inline(never)] +pub(crate) fn skip_splits_rev( + input: &Input<'_>, + init_value: T, + match_offset: usize, + find: F, +) -> Result, MatchError> +where + F: FnMut(&Input<'_>) -> Result, MatchError>, +{ + skip_splits(false, input, init_value, match_offset, find) +} + +fn skip_splits( + forward: bool, + input: &Input<'_>, + init_value: T, + mut match_offset: usize, + mut find: F, +) -> Result, MatchError> +where + F: FnMut(&Input<'_>) -> Result, MatchError>, +{ + // If our config says to do an anchored search, then we're definitely + // done. We just need to determine whether we have a valid match or + // not. If we don't, then we're not allowed to continue, so we report + // no match. + // + // This is actually quite a subtle correctness thing. The key here is + // that if we got an empty match that splits a codepoint after doing an + // anchored search in UTF-8 mode, then that implies that we must have + // *started* the search at a location that splits a codepoint. This + // follows from the fact that if a match is reported from an anchored + // search, then the start offset of the match *must* match the start + // offset of the search. + // + // It also follows that no other non-empty match is possible. For + // example, you might write a regex like '(?:)|SOMETHING' and start its + // search in the middle of a codepoint. The first branch is an empty + // regex that will bubble up a match at the first position, and then + // get rejected here and report no match. But what if 'SOMETHING' could + // have matched? We reason that such a thing is impossible, because + // if it does, it must report a match that starts in the middle of a + // codepoint. This in turn implies that a match is reported whose span + // does not correspond to valid UTF-8, and this breaks the promise + // made when UTF-8 mode is enabled. (That promise *can* be broken, for + // example, by enabling UTF-8 mode but building an by hand NFA that + // produces non-empty matches that span invalid UTF-8. This is an unchecked + // but documented precondition violation of UTF-8 mode, and is documented + // to have unspecified behavior.) + // + // I believe this actually means that if an anchored search is run, and + // UTF-8 mode is enabled and the start position splits a codepoint, + // then it is correct to immediately report no match without even + // executing the regex engine. But it doesn't really seem worth writing + // out that case in every regex engine to save a tiny bit of work in an + // extremely pathological case, so we just handle it here. + if input.get_anchored().is_anchored() { + return Ok(if input.is_char_boundary(match_offset) { + Some(init_value) + } else { + None + }); + } + // Otherwise, we have an unanchored search, so just keep looking for + // matches until we have one that does not split a codepoint or we hit + // EOI. + let mut value = init_value; + let mut input = input.clone(); + while !input.is_char_boundary(match_offset) { + if forward { + // The unwrap is OK here because overflowing usize while + // iterating over a slice is impossible, at it would require + // a slice of length greater than isize::MAX, which is itself + // impossible. + input.set_start(input.start().checked_add(1).unwrap()); + } else { + input.set_end(match input.end().checked_sub(1) { + None => return Ok(None), + Some(end) => end, + }); + } + match find(&input)? { + None => return Ok(None), + Some((new_value, new_match_end)) => { + value = new_value; + match_offset = new_match_end; + } + } + } + Ok(Some(value)) +} diff --git a/vendor/regex-automata/src/util/escape.rs b/vendor/regex-automata/src/util/escape.rs new file mode 100644 index 0000000..7f6aa15 --- /dev/null +++ b/vendor/regex-automata/src/util/escape.rs @@ -0,0 +1,84 @@ +/*! +Provides convenience routines for escaping raw bytes. + +Since this crate tends to deal with `&[u8]` everywhere and the default +`Debug` implementation just shows decimal integers, it makes debugging those +representations quite difficult. This module provides types that show `&[u8]` +as if it were a string, with invalid UTF-8 escaped into its byte-by-byte hex +representation. +*/ + +use crate::util::utf8; + +/// Provides a convenient `Debug` implementation for a `u8`. +/// +/// The `Debug` impl treats the byte as an ASCII, and emits a human readable +/// representation of it. If the byte isn't ASCII, then it's emitted as a hex +/// escape sequence. +#[derive(Clone, Copy)] +pub struct DebugByte(pub u8); + +impl core::fmt::Debug for DebugByte { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + // Special case ASCII space. It's too hard to read otherwise, so + // put quotes around it. I sometimes wonder whether just '\x20' would + // be better... + if self.0 == b' ' { + return write!(f, "' '"); + } + // 10 bytes is enough to cover any output from ascii::escape_default. + let mut bytes = [0u8; 10]; + let mut len = 0; + for (i, mut b) in core::ascii::escape_default(self.0).enumerate() { + // capitalize \xab to \xAB + if i >= 2 && b'a' <= b && b <= b'f' { + b -= 32; + } + bytes[len] = b; + len += 1; + } + write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap()) + } +} + +/// Provides a convenient `Debug` implementation for `&[u8]`. +/// +/// This generally works best when the bytes are presumed to be mostly UTF-8, +/// but will work for anything. For any bytes that aren't UTF-8, they are +/// emitted as hex escape sequences. +pub struct DebugHaystack<'a>(pub &'a [u8]); + +impl<'a> core::fmt::Debug for DebugHaystack<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "\"")?; + // This is a sad re-implementation of a similar impl found in bstr. + let mut bytes = self.0; + while let Some(result) = utf8::decode(bytes) { + let ch = match result { + Ok(ch) => ch, + Err(byte) => { + write!(f, r"\x{:02x}", byte)?; + bytes = &bytes[1..]; + continue; + } + }; + bytes = &bytes[ch.len_utf8()..]; + match ch { + '\0' => write!(f, "\\0")?, + // ASCII control characters except \0, \n, \r, \t + '\x01'..='\x08' + | '\x0b' + | '\x0c' + | '\x0e'..='\x19' + | '\x7f' => { + write!(f, "\\x{:02x}", u32::from(ch))?; + } + '\n' | '\r' | '\t' | _ => { + write!(f, "{}", ch.escape_debug())?; + } + } + } + write!(f, "\"")?; + Ok(()) + } +} diff --git a/vendor/regex-automata/src/util/int.rs b/vendor/regex-automata/src/util/int.rs new file mode 100644 index 0000000..e6b13bf --- /dev/null +++ b/vendor/regex-automata/src/util/int.rs @@ -0,0 +1,252 @@ +/*! +This module provides several integer oriented traits for converting between +both fixed size integers and integers whose size varies based on the target +(like `usize`). + +The driving design principle of this module is to attempt to centralize as many +`as` casts as possible here. And in particular, we separate casts into two +buckets: + +* Casts that we use for their truncating behavior. In this case, we use more +descriptive names, like `low_u32` and `high_u32`. +* Casts that we use for converting back-and-forth between `usize`. These +conversions are generally necessary because we often store indices in different +formats to save on memory, which requires converting to and from `usize`. In +this case, we very specifically do not want to overflow, and so the methods +defined here will panic if the `as` cast would be lossy in debug mode. (A +normal `as` cast will never panic!) + +For `as` casts between raw pointers, we use `cast`, so `as` isn't needed there. + +For regex engines, floating point is just never used, so we don't have to worry +about `as` casts for those. + +Otherwise, this module pretty much covers all of our `as` needs except for one +thing: const contexts. There are a select few places in this crate where we +still need to use `as` because const functions on traits aren't stable yet. +If we wind up significantly expanding our const footprint in this crate, it +might be worth defining free functions to handle those cases. But at the time +of writing, that just seemed like too much ceremony. Instead, I comment each +such use of `as` in a const context with a "fixme" notice. + +NOTE: for simplicity, we don't take target pointer width into account here for +`usize` conversions. Since we currently only panic in debug mode, skipping the +check when it can be proven it isn't needed at compile time doesn't really +matter. Now, if we wind up wanting to do as many checks as possible in release +mode, then we would want to skip those when we know the conversions are always +non-lossy. + +NOTE: this module isn't an exhaustive API. For example, we still use things +like `u64::from` where possible, or even `usize::try_from()` for when we do +explicitly want to panic or when we want to return an error for overflow. +*/ + +pub(crate) trait U8 { + fn as_usize(self) -> usize; +} + +impl U8 for u8 { + fn as_usize(self) -> usize { + usize::from(self) + } +} + +pub(crate) trait U16 { + fn as_usize(self) -> usize; + fn low_u8(self) -> u8; + fn high_u8(self) -> u8; +} + +impl U16 for u16 { + fn as_usize(self) -> usize { + usize::from(self) + } + + fn low_u8(self) -> u8 { + self as u8 + } + + fn high_u8(self) -> u8 { + (self >> 8) as u8 + } +} + +pub(crate) trait U32 { + fn as_usize(self) -> usize; + fn low_u8(self) -> u8; + fn low_u16(self) -> u16; + fn high_u16(self) -> u16; +} + +impl U32 for u32 { + fn as_usize(self) -> usize { + #[cfg(debug_assertions)] + { + usize::try_from(self).expect("u32 overflowed usize") + } + #[cfg(not(debug_assertions))] + { + self as usize + } + } + + fn low_u8(self) -> u8 { + self as u8 + } + + fn low_u16(self) -> u16 { + self as u16 + } + + fn high_u16(self) -> u16 { + (self >> 16) as u16 + } +} + +pub(crate) trait U64 { + fn as_usize(self) -> usize; + fn low_u8(self) -> u8; + fn low_u16(self) -> u16; + fn low_u32(self) -> u32; + fn high_u32(self) -> u32; +} + +impl U64 for u64 { + fn as_usize(self) -> usize { + #[cfg(debug_assertions)] + { + usize::try_from(self).expect("u64 overflowed usize") + } + #[cfg(not(debug_assertions))] + { + self as usize + } + } + + fn low_u8(self) -> u8 { + self as u8 + } + + fn low_u16(self) -> u16 { + self as u16 + } + + fn low_u32(self) -> u32 { + self as u32 + } + + fn high_u32(self) -> u32 { + (self >> 32) as u32 + } +} + +pub(crate) trait I32 { + fn as_usize(self) -> usize; + fn to_bits(self) -> u32; + fn from_bits(n: u32) -> i32; +} + +impl I32 for i32 { + fn as_usize(self) -> usize { + #[cfg(debug_assertions)] + { + usize::try_from(self).expect("i32 overflowed usize") + } + #[cfg(not(debug_assertions))] + { + self as usize + } + } + + fn to_bits(self) -> u32 { + self as u32 + } + + fn from_bits(n: u32) -> i32 { + n as i32 + } +} + +pub(crate) trait Usize { + fn as_u8(self) -> u8; + fn as_u16(self) -> u16; + fn as_u32(self) -> u32; + fn as_u64(self) -> u64; +} + +impl Usize for usize { + fn as_u8(self) -> u8 { + #[cfg(debug_assertions)] + { + u8::try_from(self).expect("usize overflowed u8") + } + #[cfg(not(debug_assertions))] + { + self as u8 + } + } + + fn as_u16(self) -> u16 { + #[cfg(debug_assertions)] + { + u16::try_from(self).expect("usize overflowed u16") + } + #[cfg(not(debug_assertions))] + { + self as u16 + } + } + + fn as_u32(self) -> u32 { + #[cfg(debug_assertions)] + { + u32::try_from(self).expect("usize overflowed u32") + } + #[cfg(not(debug_assertions))] + { + self as u32 + } + } + + fn as_u64(self) -> u64 { + #[cfg(debug_assertions)] + { + u64::try_from(self).expect("usize overflowed u64") + } + #[cfg(not(debug_assertions))] + { + self as u64 + } + } +} + +// Pointers aren't integers, but we convert pointers to integers to perform +// offset arithmetic in some places. (And no, we don't convert the integers +// back to pointers.) So add 'as_usize' conversions here too for completeness. +// +// These 'as' casts are actually okay because they're always non-lossy. But the +// idea here is to just try and remove as much 'as' as possible, particularly +// in this crate where we are being really paranoid about offsets and making +// sure we don't panic on inputs that might be untrusted. This way, the 'as' +// casts become easier to audit if they're all in one place, even when some of +// them are actually okay 100% of the time. + +pub(crate) trait Pointer { + fn as_usize(self) -> usize; +} + +impl Pointer for *const T { + fn as_usize(self) -> usize { + self as usize + } +} + +pub(crate) trait PointerMut { + fn as_usize(self) -> usize; +} + +impl PointerMut for *mut T { + fn as_usize(self) -> usize { + self as usize + } +} diff --git a/vendor/regex-automata/src/util/interpolate.rs b/vendor/regex-automata/src/util/interpolate.rs new file mode 100644 index 0000000..f274629 --- /dev/null +++ b/vendor/regex-automata/src/util/interpolate.rs @@ -0,0 +1,579 @@ +/*! +Provides routines for interpolating capture group references. + +That is, if a replacement string contains references like `$foo` or `${foo1}`, +then they are replaced with the corresponding capture values for the groups +named `foo` and `foo1`, respectively. Similarly, syntax like `$1` and `${1}` +is supported as well, with `1` corresponding to a capture group index and not +a name. + +This module provides the free functions [`string`] and [`bytes`], which +interpolate Rust Unicode strings and byte strings, respectively. + +# Format + +These routines support two different kinds of capture references: unbraced and +braced. + +For the unbraced format, the format supported is `$ref` where `name` can be +any character in the class `[0-9A-Za-z_]`. `ref` is always the longest +possible parse. So for example, `$1a` corresponds to the capture group named +`1a` and not the capture group at index `1`. If `ref` matches `^[0-9]+$`, then +it is treated as a capture group index itself and not a name. + +For the braced format, the format supported is `${ref}` where `ref` can be any +sequence of bytes except for `}`. If no closing brace occurs, then it is not +considered a capture reference. As with the unbraced format, if `ref` matches +`^[0-9]+$`, then it is treated as a capture group index and not a name. + +The braced format is useful for exerting precise control over the name of the +capture reference. For example, `${1}a` corresponds to the capture group +reference `1` followed by the letter `a`, where as `$1a` (as mentioned above) +corresponds to the capture group reference `1a`. The braced format is also +useful for expressing capture group names that use characters not supported by +the unbraced format. For example, `${foo[bar].baz}` refers to the capture group +named `foo[bar].baz`. + +If a capture group reference is found and it does not refer to a valid capture +group, then it will be replaced with the empty string. + +To write a literal `$`, use `$$`. + +To be clear, and as exhibited via the type signatures in the routines in this +module, it is impossible for a replacement string to be invalid. A replacement +string may not have the intended semantics, but the interpolation procedure +itself can never fail. +*/ + +use alloc::{string::String, vec::Vec}; + +use crate::util::memchr::memchr; + +/// Accepts a replacement string and interpolates capture references with their +/// corresponding values. +/// +/// `append` should be a function that appends the string value of a capture +/// group at a particular index to the string given. If the capture group +/// index is invalid, then nothing should be appended. +/// +/// `name_to_index` should be a function that maps a capture group name to a +/// capture group index. If the given name doesn't exist, then `None` should +/// be returned. +/// +/// Finally, `dst` is where the final interpolated contents should be written. +/// If `replacement` contains no capture group references, then `dst` will be +/// equivalent to `replacement`. +/// +/// See the [module documentation](self) for details about the format +/// supported. +/// +/// # Example +/// +/// ``` +/// use regex_automata::util::interpolate; +/// +/// let mut dst = String::new(); +/// interpolate::string( +/// "foo $bar baz", +/// |index, dst| { +/// if index == 0 { +/// dst.push_str("BAR"); +/// } +/// }, +/// |name| { +/// if name == "bar" { +/// Some(0) +/// } else { +/// None +/// } +/// }, +/// &mut dst, +/// ); +/// assert_eq!("foo BAR baz", dst); +/// ``` +pub fn string( + mut replacement: &str, + mut append: impl FnMut(usize, &mut String), + mut name_to_index: impl FnMut(&str) -> Option, + dst: &mut String, +) { + while !replacement.is_empty() { + match memchr(b'$', replacement.as_bytes()) { + None => break, + Some(i) => { + dst.push_str(&replacement[..i]); + replacement = &replacement[i..]; + } + } + // Handle escaping of '$'. + if replacement.as_bytes().get(1).map_or(false, |&b| b == b'$') { + dst.push_str("$"); + replacement = &replacement[2..]; + continue; + } + debug_assert!(!replacement.is_empty()); + let cap_ref = match find_cap_ref(replacement.as_bytes()) { + Some(cap_ref) => cap_ref, + None => { + dst.push_str("$"); + replacement = &replacement[1..]; + continue; + } + }; + replacement = &replacement[cap_ref.end..]; + match cap_ref.cap { + Ref::Number(i) => append(i, dst), + Ref::Named(name) => { + if let Some(i) = name_to_index(name) { + append(i, dst); + } + } + } + } + dst.push_str(replacement); +} + +/// Accepts a replacement byte string and interpolates capture references with +/// their corresponding values. +/// +/// `append` should be a function that appends the byte string value of a +/// capture group at a particular index to the byte string given. If the +/// capture group index is invalid, then nothing should be appended. +/// +/// `name_to_index` should be a function that maps a capture group name to a +/// capture group index. If the given name doesn't exist, then `None` should +/// be returned. +/// +/// Finally, `dst` is where the final interpolated contents should be written. +/// If `replacement` contains no capture group references, then `dst` will be +/// equivalent to `replacement`. +/// +/// See the [module documentation](self) for details about the format +/// supported. +/// +/// # Example +/// +/// ``` +/// use regex_automata::util::interpolate; +/// +/// let mut dst = vec![]; +/// interpolate::bytes( +/// b"foo $bar baz", +/// |index, dst| { +/// if index == 0 { +/// dst.extend_from_slice(b"BAR"); +/// } +/// }, +/// |name| { +/// if name == "bar" { +/// Some(0) +/// } else { +/// None +/// } +/// }, +/// &mut dst, +/// ); +/// assert_eq!(&b"foo BAR baz"[..], dst); +/// ``` +pub fn bytes( + mut replacement: &[u8], + mut append: impl FnMut(usize, &mut Vec), + mut name_to_index: impl FnMut(&str) -> Option, + dst: &mut Vec, +) { + while !replacement.is_empty() { + match memchr(b'$', replacement) { + None => break, + Some(i) => { + dst.extend_from_slice(&replacement[..i]); + replacement = &replacement[i..]; + } + } + // Handle escaping of '$'. + if replacement.get(1).map_or(false, |&b| b == b'$') { + dst.push(b'$'); + replacement = &replacement[2..]; + continue; + } + debug_assert!(!replacement.is_empty()); + let cap_ref = match find_cap_ref(replacement) { + Some(cap_ref) => cap_ref, + None => { + dst.push(b'$'); + replacement = &replacement[1..]; + continue; + } + }; + replacement = &replacement[cap_ref.end..]; + match cap_ref.cap { + Ref::Number(i) => append(i, dst), + Ref::Named(name) => { + if let Some(i) = name_to_index(name) { + append(i, dst); + } + } + } + } + dst.extend_from_slice(replacement); +} + +/// `CaptureRef` represents a reference to a capture group inside some text. +/// The reference is either a capture group name or a number. +/// +/// It is also tagged with the position in the text following the +/// capture reference. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct CaptureRef<'a> { + cap: Ref<'a>, + end: usize, +} + +/// A reference to a capture group in some text. +/// +/// e.g., `$2`, `$foo`, `${foo}`. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum Ref<'a> { + Named(&'a str), + Number(usize), +} + +impl<'a> From<&'a str> for Ref<'a> { + fn from(x: &'a str) -> Ref<'a> { + Ref::Named(x) + } +} + +impl From for Ref<'static> { + fn from(x: usize) -> Ref<'static> { + Ref::Number(x) + } +} + +/// Parses a possible reference to a capture group name in the given text, +/// starting at the beginning of `replacement`. +/// +/// If no such valid reference could be found, None is returned. +/// +/// Note that this returns a "possible" reference because this routine doesn't +/// know whether the reference is to a valid group or not. If it winds up not +/// being a valid reference, then it should be replaced with the empty string. +fn find_cap_ref(replacement: &[u8]) -> Option> { + let mut i = 0; + let rep: &[u8] = replacement; + if rep.len() <= 1 || rep[0] != b'$' { + return None; + } + i += 1; + if rep[i] == b'{' { + return find_cap_ref_braced(rep, i + 1); + } + let mut cap_end = i; + while rep.get(cap_end).copied().map_or(false, is_valid_cap_letter) { + cap_end += 1; + } + if cap_end == i { + return None; + } + // We just verified that the range 0..cap_end is valid ASCII, so it must + // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8 + // check via an unchecked conversion or by parsing the number straight from + // &[u8]. + let cap = core::str::from_utf8(&rep[i..cap_end]) + .expect("valid UTF-8 capture name"); + Some(CaptureRef { + cap: match cap.parse::() { + Ok(i) => Ref::Number(i), + Err(_) => Ref::Named(cap), + }, + end: cap_end, + }) +} + +/// Looks for a braced reference, e.g., `${foo1}`. This assumes that an opening +/// brace has been found at `i-1` in `rep`. This then looks for a closing +/// brace and returns the capture reference within the brace. +fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option> { + assert_eq!(b'{', rep[i.checked_sub(1).unwrap()]); + let start = i; + while rep.get(i).map_or(false, |&b| b != b'}') { + i += 1; + } + if !rep.get(i).map_or(false, |&b| b == b'}') { + return None; + } + // When looking at braced names, we don't put any restrictions on the name, + // so it's possible it could be invalid UTF-8. But a capture group name + // can never be invalid UTF-8, so if we have invalid UTF-8, then we can + // safely return None. + let cap = match core::str::from_utf8(&rep[start..i]) { + Err(_) => return None, + Ok(cap) => cap, + }; + Some(CaptureRef { + cap: match cap.parse::() { + Ok(i) => Ref::Number(i), + Err(_) => Ref::Named(cap), + }, + end: i + 1, + }) +} + +/// Returns true if and only if the given byte is allowed in a capture name +/// written in non-brace form. +fn is_valid_cap_letter(b: u8) -> bool { + match b { + b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true, + _ => false, + } +} + +#[cfg(test)] +mod tests { + use alloc::{string::String, vec, vec::Vec}; + + use super::{find_cap_ref, CaptureRef}; + + macro_rules! find { + ($name:ident, $text:expr) => { + #[test] + fn $name() { + assert_eq!(None, find_cap_ref($text.as_bytes())); + } + }; + ($name:ident, $text:expr, $capref:expr) => { + #[test] + fn $name() { + assert_eq!(Some($capref), find_cap_ref($text.as_bytes())); + } + }; + } + + macro_rules! c { + ($name_or_number:expr, $pos:expr) => { + CaptureRef { cap: $name_or_number.into(), end: $pos } + }; + } + + find!(find_cap_ref1, "$foo", c!("foo", 4)); + find!(find_cap_ref2, "${foo}", c!("foo", 6)); + find!(find_cap_ref3, "$0", c!(0, 2)); + find!(find_cap_ref4, "$5", c!(5, 2)); + find!(find_cap_ref5, "$10", c!(10, 3)); + // See https://github.com/rust-lang/regex/pull/585 + // for more on characters following numbers + find!(find_cap_ref6, "$42a", c!("42a", 4)); + find!(find_cap_ref7, "${42}a", c!(42, 5)); + find!(find_cap_ref8, "${42"); + find!(find_cap_ref9, "${42 "); + find!(find_cap_ref10, " $0 "); + find!(find_cap_ref11, "$"); + find!(find_cap_ref12, " "); + find!(find_cap_ref13, ""); + find!(find_cap_ref14, "$1-$2", c!(1, 2)); + find!(find_cap_ref15, "$1_$2", c!("1_", 3)); + find!(find_cap_ref16, "$x-$y", c!("x", 2)); + find!(find_cap_ref17, "$x_$y", c!("x_", 3)); + find!(find_cap_ref18, "${#}", c!("#", 4)); + find!(find_cap_ref19, "${Z[}", c!("Z[", 5)); + find!(find_cap_ref20, "${¾}", c!("¾", 5)); + find!(find_cap_ref21, "${¾a}", c!("¾a", 6)); + find!(find_cap_ref22, "${a¾}", c!("a¾", 6)); + find!(find_cap_ref23, "${☃}", c!("☃", 6)); + find!(find_cap_ref24, "${a☃}", c!("a☃", 7)); + find!(find_cap_ref25, "${☃a}", c!("☃a", 7)); + find!(find_cap_ref26, "${名字}", c!("名字", 9)); + + fn interpolate_string( + mut name_to_index: Vec<(&'static str, usize)>, + caps: Vec<&'static str>, + replacement: &str, + ) -> String { + name_to_index.sort_by_key(|x| x.0); + + let mut dst = String::new(); + super::string( + replacement, + |i, dst| { + if let Some(&s) = caps.get(i) { + dst.push_str(s); + } + }, + |name| -> Option { + name_to_index + .binary_search_by_key(&name, |x| x.0) + .ok() + .map(|i| name_to_index[i].1) + }, + &mut dst, + ); + dst + } + + fn interpolate_bytes( + mut name_to_index: Vec<(&'static str, usize)>, + caps: Vec<&'static str>, + replacement: &str, + ) -> String { + name_to_index.sort_by_key(|x| x.0); + + let mut dst = vec![]; + super::bytes( + replacement.as_bytes(), + |i, dst| { + if let Some(&s) = caps.get(i) { + dst.extend_from_slice(s.as_bytes()); + } + }, + |name| -> Option { + name_to_index + .binary_search_by_key(&name, |x| x.0) + .ok() + .map(|i| name_to_index[i].1) + }, + &mut dst, + ); + String::from_utf8(dst).unwrap() + } + + macro_rules! interp { + ($name:ident, $map:expr, $caps:expr, $hay:expr, $expected:expr $(,)*) => { + #[test] + fn $name() { + assert_eq!( + $expected, + interpolate_string($map, $caps, $hay), + "interpolate::string failed", + ); + assert_eq!( + $expected, + interpolate_bytes($map, $caps, $hay), + "interpolate::bytes failed", + ); + } + }; + } + + interp!( + interp1, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test $foo test", + "test xxx test", + ); + + interp!( + interp2, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test$footest", + "test", + ); + + interp!( + interp3, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test${foo}test", + "testxxxtest", + ); + + interp!( + interp4, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test$2test", + "test", + ); + + interp!( + interp5, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test${2}test", + "testxxxtest", + ); + + interp!( + interp6, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test $$foo test", + "test $foo test", + ); + + interp!( + interp7, + vec![("foo", 2)], + vec!["", "", "xxx"], + "test $foo", + "test xxx", + ); + + interp!( + interp8, + vec![("foo", 2)], + vec!["", "", "xxx"], + "$foo test", + "xxx test", + ); + + interp!( + interp9, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test $bar$foo", + "test yyyxxx", + ); + + interp!( + interp10, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test $ test", + "test $ test", + ); + + interp!( + interp11, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test ${} test", + "test test", + ); + + interp!( + interp12, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test ${ } test", + "test test", + ); + + interp!( + interp13, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test ${a b} test", + "test test", + ); + + interp!( + interp14, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test ${a} test", + "test test", + ); + + // This is a funny case where a braced reference is never closed, but + // within the unclosed braced reference, there is an unbraced reference. + // In this case, the braced reference is just treated literally and the + // unbraced reference is found. + interp!( + interp15, + vec![("bar", 1), ("foo", 2)], + vec!["", "yyy", "xxx"], + "test ${wat $bar ok", + "test ${wat yyy ok", + ); +} diff --git a/vendor/regex-automata/src/util/iter.rs b/vendor/regex-automata/src/util/iter.rs new file mode 100644 index 0000000..a789fa0 --- /dev/null +++ b/vendor/regex-automata/src/util/iter.rs @@ -0,0 +1,1027 @@ +/*! +Generic helpers for iteration of matches from a regex engine in a haystack. + +The principle type in this module is a [`Searcher`]. A `Searcher` provides +its own lower level iterator-like API in addition to methods for constructing +types that implement `Iterator`. The documentation for `Searcher` explains a +bit more about why these different APIs exist. + +Currently, this module supports iteration over any regex engine that works +with the [`HalfMatch`], [`Match`] or [`Captures`] types. +*/ + +#[cfg(feature = "alloc")] +use crate::util::captures::Captures; +use crate::util::search::{HalfMatch, Input, Match, MatchError}; + +/// A searcher for creating iterators and performing lower level iteration. +/// +/// This searcher encapsulates the logic required for finding all successive +/// non-overlapping matches in a haystack. In theory, iteration would look +/// something like this: +/// +/// 1. Setting the start position to `0`. +/// 2. Execute a regex search. If no match, end iteration. +/// 3. Report the match and set the start position to the end of the match. +/// 4. Go back to (2). +/// +/// And if this were indeed the case, it's likely that `Searcher` wouldn't +/// exist. Unfortunately, because a regex may match the empty string, the above +/// logic won't work for all possible regexes. Namely, if an empty match is +/// found, then step (3) would set the start position of the search to the +/// position it was at. Thus, iteration would never end. +/// +/// Instead, a `Searcher` knows how to detect these cases and forcefully +/// advance iteration in the case of an empty match that overlaps with a +/// previous match. +/// +/// If you know that your regex cannot match any empty string, then the simple +/// algorithm described above will work correctly. +/// +/// When possible, prefer the iterators defined on the regex engine you're +/// using. This tries to abstract over the regex engine and is thus a bit more +/// unwieldy to use. +/// +/// In particular, a `Searcher` is not itself an iterator. Instead, it provides +/// `advance` routines that permit moving the search along explicitly. It also +/// provides various routines, like [`Searcher::into_matches_iter`], that +/// accept a closure (representing how a regex engine executes a search) and +/// returns a conventional iterator. +/// +/// The lifetime parameters come from the [`Input`] type passed to +/// [`Searcher::new`]: +/// +/// * `'h` is the lifetime of the underlying haystack. +/// +/// # Searcher vs Iterator +/// +/// Why does a search type with "advance" APIs exist at all when we also have +/// iterators? Unfortunately, the reasoning behind this split is a complex +/// combination of the following things: +/// +/// 1. While many of the regex engines expose their own iterators, it is also +/// nice to expose this lower level iteration helper because it permits callers +/// to provide their own `Input` configuration. Moreover, a `Searcher` can work +/// with _any_ regex engine instead of only the ones defined in this crate. +/// This way, everyone benefits from a shared iteration implementation. +/// 2. There are many different regex engines that, while they have the same +/// match semantics, they have slightly different APIs. Iteration is just +/// complex enough to want to share code, and so we need a way of abstracting +/// over those different regex engines. While we could define a new trait that +/// describes any regex engine search API, it would wind up looking very close +/// to a closure. While there may still be reasons for the more generic trait +/// to exist, for now and for the purposes of iteration, we use a closure. +/// Closures also provide a lot of easy flexibility at the call site, in that +/// they permit the caller to borrow any kind of state they want for use during +/// each search call. +/// 3. As a result of using closures, and because closures are anonymous types +/// that cannot be named, it is difficult to encapsulate them without both +/// costs to speed and added complexity to the public API. For example, in +/// defining an iterator type like +/// [`dfa::regex::FindMatches`](crate::dfa::regex::FindMatches), +/// if we use a closure internally, it's not possible to name this type in the +/// return type of the iterator constructor. Thus, the only way around it is +/// to erase the type by boxing it and turning it into a `Box`. +/// This boxed closure is unlikely to be inlined _and_ it infects the public +/// API in subtle ways. Namely, unless you declare the closure as implementing +/// `Send` and `Sync`, then the resulting iterator type won't implement it +/// either. But there are practical issues with requiring the closure to +/// implement `Send` and `Sync` that result in other API complexities that +/// are beyond the scope of this already long exposition. +/// 4. Some regex engines expose more complex match information than just +/// "which pattern matched" and "at what offsets." For example, the PikeVM +/// exposes match spans for each capturing group that participated in the +/// match. In such cases, it can be quite beneficial to reuse the capturing +/// group allocation on subsequent searches. A proper iterator doesn't permit +/// this API due to its interface, so it's useful to have something a bit lower +/// level that permits callers to amortize allocations while also reusing a +/// shared implementation of iteration. (See the documentation for +/// [`Searcher::advance`] for an example of using the "advance" API with the +/// PikeVM.) +/// +/// What this boils down to is that there are "advance" APIs which require +/// handing a closure to it for every call, and there are also APIs to create +/// iterators from a closure. The former are useful for _implementing_ +/// iterators or when you need more flexibility, while the latter are useful +/// for conveniently writing custom iterators on-the-fly. +/// +/// # Example: iterating with captures +/// +/// Several regex engines in this crate over convenient iterator APIs over +/// [`Captures`] values. To do so, this requires allocating a new `Captures` +/// value for each iteration step. This can perhaps be more costly than you +/// might want. Instead of implementing your own iterator to avoid that +/// cost (which can be a little subtle if you want to handle empty matches +/// correctly), you can use this `Searcher` to do it for you: +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::pikevm::PikeVM, +/// util::iter::Searcher, +/// Input, Span, +/// }; +/// +/// let re = PikeVM::new("foo(?P[0-9]+)")?; +/// let haystack = "foo1 foo12 foo123"; +/// +/// let mut caps = re.create_captures(); +/// let mut cache = re.create_cache(); +/// let mut matches = vec![]; +/// let mut searcher = Searcher::new(Input::new(haystack)); +/// while let Some(_) = searcher.advance(|input| { +/// re.search(&mut cache, input, &mut caps); +/// Ok(caps.get_match()) +/// }) { +/// // The unwrap is OK since 'numbers' matches if the pattern matches. +/// matches.push(caps.get_group_by_name("numbers").unwrap()); +/// } +/// assert_eq!(matches, vec![ +/// Span::from(3..4), +/// Span::from(8..10), +/// Span::from(14..17), +/// ]); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Searcher<'h> { + /// The input parameters to give to each regex engine call. + /// + /// The start position of the search is mutated during iteration. + input: Input<'h>, + /// Records the end offset of the most recent match. This is necessary to + /// handle a corner case for preventing empty matches from overlapping with + /// the ending bounds of a prior match. + last_match_end: Option, +} + +impl<'h> Searcher<'h> { + /// Create a new fallible non-overlapping matches iterator. + /// + /// The given `input` provides the parameters (including the haystack), + /// while the `finder` represents a closure that calls the underlying regex + /// engine. The closure may borrow any additional state that is needed, + /// such as a prefilter scanner. + pub fn new(input: Input<'h>) -> Searcher<'h> { + Searcher { input, last_match_end: None } + } + + /// Returns the current `Input` used by this searcher. + /// + /// The `Input` returned is generally equivalent to the one given to + /// [`Searcher::new`], but its start position may be different to reflect + /// the start of the next search to be executed. + pub fn input<'s>(&'s self) -> &'s Input<'h> { + &self.input + } + + /// Return the next half match for an infallible search if one exists, and + /// advance to the next position. + /// + /// This is like `try_advance_half`, except errors are converted into + /// panics. + /// + /// # Panics + /// + /// If the given closure returns an error, then this panics. This is useful + /// when you know your underlying regex engine has been configured to not + /// return an error. + /// + /// # Example + /// + /// This example shows how to use a `Searcher` to iterate over all matches + /// when using a DFA, which only provides "half" matches. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// util::iter::Searcher, + /// HalfMatch, Input, + /// }; + /// + /// let re = DFA::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new("2010-03-14 2016-10-08 2020-10-22"); + /// let mut it = Searcher::new(input); + /// + /// let expected = Some(HalfMatch::must(0, 10)); + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = Some(HalfMatch::must(0, 21)); + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = Some(HalfMatch::must(0, 32)); + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = None; + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// This correctly moves iteration forward even when an empty match occurs: + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// util::iter::Searcher, + /// HalfMatch, Input, + /// }; + /// + /// let re = DFA::new(r"a|")?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new("abba"); + /// let mut it = Searcher::new(input); + /// + /// let expected = Some(HalfMatch::must(0, 1)); + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = Some(HalfMatch::must(0, 2)); + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = Some(HalfMatch::must(0, 4)); + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = None; + /// let got = it.advance_half(|input| re.try_search_fwd(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn advance_half(&mut self, finder: F) -> Option + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + match self.try_advance_half(finder) { + Ok(m) => m, + Err(err) => panic!( + "unexpected regex half find error: {}\n\ + to handle find errors, use 'try' or 'search' methods", + err, + ), + } + } + + /// Return the next match for an infallible search if one exists, and + /// advance to the next position. + /// + /// The search is advanced even in the presence of empty matches by + /// forbidding empty matches from overlapping with any other match. + /// + /// This is like `try_advance`, except errors are converted into panics. + /// + /// # Panics + /// + /// If the given closure returns an error, then this panics. This is useful + /// when you know your underlying regex engine has been configured to not + /// return an error. + /// + /// # Example + /// + /// This example shows how to use a `Searcher` to iterate over all matches + /// when using a regex based on lazy DFAs: + /// + /// ``` + /// use regex_automata::{ + /// hybrid::regex::Regex, + /// util::iter::Searcher, + /// Match, Input, + /// }; + /// + /// let re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new("2010-03-14 2016-10-08 2020-10-22"); + /// let mut it = Searcher::new(input); + /// + /// let expected = Some(Match::must(0, 0..10)); + /// let got = it.advance(|input| re.try_search(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = Some(Match::must(0, 11..21)); + /// let got = it.advance(|input| re.try_search(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = Some(Match::must(0, 22..32)); + /// let got = it.advance(|input| re.try_search(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// let expected = None; + /// let got = it.advance(|input| re.try_search(&mut cache, input)); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// This example shows the same as above, but with the PikeVM. This example + /// is useful because it shows how to use this API even when the regex + /// engine doesn't directly return a `Match`. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::iter::Searcher, + /// Match, Input, + /// }; + /// + /// let re = PikeVM::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// + /// let input = Input::new("2010-03-14 2016-10-08 2020-10-22"); + /// let mut it = Searcher::new(input); + /// + /// let expected = Some(Match::must(0, 0..10)); + /// let got = it.advance(|input| { + /// re.search(&mut cache, input, &mut caps); + /// Ok(caps.get_match()) + /// }); + /// // Note that if we wanted to extract capturing group spans, we could + /// // do that here with 'caps'. + /// assert_eq!(expected, got); + /// + /// let expected = Some(Match::must(0, 11..21)); + /// let got = it.advance(|input| { + /// re.search(&mut cache, input, &mut caps); + /// Ok(caps.get_match()) + /// }); + /// assert_eq!(expected, got); + /// + /// let expected = Some(Match::must(0, 22..32)); + /// let got = it.advance(|input| { + /// re.search(&mut cache, input, &mut caps); + /// Ok(caps.get_match()) + /// }); + /// assert_eq!(expected, got); + /// + /// let expected = None; + /// let got = it.advance(|input| { + /// re.search(&mut cache, input, &mut caps); + /// Ok(caps.get_match()) + /// }); + /// assert_eq!(expected, got); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn advance(&mut self, finder: F) -> Option + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + match self.try_advance(finder) { + Ok(m) => m, + Err(err) => panic!( + "unexpected regex find error: {}\n\ + to handle find errors, use 'try' or 'search' methods", + err, + ), + } + } + + /// Return the next half match for a fallible search if one exists, and + /// advance to the next position. + /// + /// This is like `advance_half`, except it permits callers to handle errors + /// during iteration. + #[inline] + pub fn try_advance_half( + &mut self, + mut finder: F, + ) -> Result, MatchError> + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + let mut m = match finder(&self.input)? { + None => return Ok(None), + Some(m) => m, + }; + if Some(m.offset()) == self.last_match_end { + m = match self.handle_overlapping_empty_half_match(m, finder)? { + None => return Ok(None), + Some(m) => m, + }; + } + self.input.set_start(m.offset()); + self.last_match_end = Some(m.offset()); + Ok(Some(m)) + } + + /// Return the next match for a fallible search if one exists, and advance + /// to the next position. + /// + /// This is like `advance`, except it permits callers to handle errors + /// during iteration. + #[inline] + pub fn try_advance( + &mut self, + mut finder: F, + ) -> Result, MatchError> + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + let mut m = match finder(&self.input)? { + None => return Ok(None), + Some(m) => m, + }; + if m.is_empty() && Some(m.end()) == self.last_match_end { + m = match self.handle_overlapping_empty_match(m, finder)? { + None => return Ok(None), + Some(m) => m, + }; + } + self.input.set_start(m.end()); + self.last_match_end = Some(m.end()); + Ok(Some(m)) + } + + /// Given a closure that executes a single search, return an iterator over + /// all successive non-overlapping half matches. + /// + /// The iterator returned yields result values. If the underlying regex + /// engine is configured to never return an error, consider calling + /// [`TryHalfMatchesIter::infallible`] to convert errors into panics. + /// + /// # Example + /// + /// This example shows how to use a `Searcher` to create a proper + /// iterator over half matches. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::dfa::DFA, + /// util::iter::Searcher, + /// HalfMatch, Input, + /// }; + /// + /// let re = DFA::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new("2010-03-14 2016-10-08 2020-10-22"); + /// let mut it = Searcher::new(input).into_half_matches_iter(|input| { + /// re.try_search_fwd(&mut cache, input) + /// }); + /// + /// let expected = Some(Ok(HalfMatch::must(0, 10))); + /// assert_eq!(expected, it.next()); + /// + /// let expected = Some(Ok(HalfMatch::must(0, 21))); + /// assert_eq!(expected, it.next()); + /// + /// let expected = Some(Ok(HalfMatch::must(0, 32))); + /// assert_eq!(expected, it.next()); + /// + /// let expected = None; + /// assert_eq!(expected, it.next()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn into_half_matches_iter( + self, + finder: F, + ) -> TryHalfMatchesIter<'h, F> + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + TryHalfMatchesIter { it: self, finder } + } + + /// Given a closure that executes a single search, return an iterator over + /// all successive non-overlapping matches. + /// + /// The iterator returned yields result values. If the underlying regex + /// engine is configured to never return an error, consider calling + /// [`TryMatchesIter::infallible`] to convert errors into panics. + /// + /// # Example + /// + /// This example shows how to use a `Searcher` to create a proper + /// iterator over matches. + /// + /// ``` + /// use regex_automata::{ + /// hybrid::regex::Regex, + /// util::iter::Searcher, + /// Match, Input, + /// }; + /// + /// let re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}")?; + /// let mut cache = re.create_cache(); + /// + /// let input = Input::new("2010-03-14 2016-10-08 2020-10-22"); + /// let mut it = Searcher::new(input).into_matches_iter(|input| { + /// re.try_search(&mut cache, input) + /// }); + /// + /// let expected = Some(Ok(Match::must(0, 0..10))); + /// assert_eq!(expected, it.next()); + /// + /// let expected = Some(Ok(Match::must(0, 11..21))); + /// assert_eq!(expected, it.next()); + /// + /// let expected = Some(Ok(Match::must(0, 22..32))); + /// assert_eq!(expected, it.next()); + /// + /// let expected = None; + /// assert_eq!(expected, it.next()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn into_matches_iter(self, finder: F) -> TryMatchesIter<'h, F> + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + TryMatchesIter { it: self, finder } + } + + /// Given a closure that executes a single search, return an iterator over + /// all successive non-overlapping `Captures` values. + /// + /// The iterator returned yields result values. If the underlying regex + /// engine is configured to never return an error, consider calling + /// [`TryCapturesIter::infallible`] to convert errors into panics. + /// + /// Unlike the other iterator constructors, this accepts an initial + /// `Captures` value. This `Captures` value is reused for each search, and + /// the iterator implementation clones it before returning it. The caller + /// must provide this value because the iterator is purposely ignorant + /// of the underlying regex engine and thus doesn't know how to create + /// one itself. More to the point, a `Captures` value itself has a few + /// different constructors, which change which kind of information is + /// available to query in exchange for search performance. + /// + /// # Example + /// + /// This example shows how to use a `Searcher` to create a proper iterator + /// over `Captures` values, which provides access to all capturing group + /// spans for each match. + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// util::iter::Searcher, + /// Input, + /// }; + /// + /// let re = PikeVM::new( + /// r"(?P[0-9]{4})-(?P[0-9]{2})-(?P[0-9]{2})", + /// )?; + /// let (mut cache, caps) = (re.create_cache(), re.create_captures()); + /// + /// let haystack = "2010-03-14 2016-10-08 2020-10-22"; + /// let input = Input::new(haystack); + /// let mut it = Searcher::new(input) + /// .into_captures_iter(caps, |input, caps| { + /// re.search(&mut cache, input, caps); + /// Ok(()) + /// }); + /// + /// let got = it.next().expect("first date")?; + /// let year = got.get_group_by_name("y").expect("must match"); + /// assert_eq!("2010", &haystack[year]); + /// + /// let got = it.next().expect("second date")?; + /// let month = got.get_group_by_name("m").expect("must match"); + /// assert_eq!("10", &haystack[month]); + /// + /// let got = it.next().expect("third date")?; + /// let day = got.get_group_by_name("d").expect("must match"); + /// assert_eq!("22", &haystack[day]); + /// + /// assert!(it.next().is_none()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "alloc")] + #[inline] + pub fn into_captures_iter( + self, + caps: Captures, + finder: F, + ) -> TryCapturesIter<'h, F> + where + F: FnMut(&Input<'_>, &mut Captures) -> Result<(), MatchError>, + { + TryCapturesIter { it: self, caps, finder } + } + + /// Handles the special case of a match that begins where the previous + /// match ended. Without this special handling, it'd be possible to get + /// stuck where an empty match never results in forward progress. This + /// also makes it more consistent with how presiding general purpose regex + /// engines work. + #[cold] + #[inline(never)] + fn handle_overlapping_empty_half_match( + &mut self, + _: HalfMatch, + mut finder: F, + ) -> Result, MatchError> + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + // Since we are only here when 'm.offset()' matches the offset of the + // last match, it follows that this must have been an empty match. + // Since we both need to make progress *and* prevent overlapping + // matches, we discard this match and advance the search by 1. + // + // Note that this may start a search in the middle of a codepoint. The + // regex engines themselves are expected to deal with that and not + // report any matches within a codepoint if they are configured in + // UTF-8 mode. + self.input.set_start(self.input.start().checked_add(1).unwrap()); + finder(&self.input) + } + + /// Handles the special case of an empty match by ensuring that 1) the + /// iterator always advances and 2) empty matches never overlap with other + /// matches. + /// + /// (1) is necessary because we principally make progress by setting the + /// starting location of the next search to the ending location of the last + /// match. But if a match is empty, then this results in a search that does + /// not advance and thus does not terminate. + /// + /// (2) is not strictly necessary, but makes intuitive sense and matches + /// the presiding behavior of most general purpose regex engines. The + /// "intuitive sense" here is that we want to report NON-overlapping + /// matches. So for example, given the regex 'a|(?:)' against the haystack + /// 'a', without the special handling, you'd get the matches [0, 1) and [1, + /// 1), where the latter overlaps with the end bounds of the former. + /// + /// Note that we mark this cold and forcefully prevent inlining because + /// handling empty matches like this is extremely rare and does require + /// quite a bit of code, comparatively. Keeping this code out of the main + /// iterator function keeps it smaller and more amenable to inlining + /// itself. + #[cold] + #[inline(never)] + fn handle_overlapping_empty_match( + &mut self, + m: Match, + mut finder: F, + ) -> Result, MatchError> + where + F: FnMut(&Input<'_>) -> Result, MatchError>, + { + assert!(m.is_empty()); + self.input.set_start(self.input.start().checked_add(1).unwrap()); + finder(&self.input) + } +} + +/// An iterator over all non-overlapping half matches for a fallible search. +/// +/// The iterator yields a `Result` value until no more +/// matches could be found. +/// +/// The type parameters are as follows: +/// +/// * `F` represents the type of a closure that executes the search. +/// +/// The lifetime parameters come from the [`Input`] type: +/// +/// * `'h` is the lifetime of the underlying haystack. +/// +/// When possible, prefer the iterators defined on the regex engine you're +/// using. This tries to abstract over the regex engine and is thus a bit more +/// unwieldy to use. +/// +/// This iterator is created by [`Searcher::into_half_matches_iter`]. +pub struct TryHalfMatchesIter<'h, F> { + it: Searcher<'h>, + finder: F, +} + +impl<'h, F> TryHalfMatchesIter<'h, F> { + /// Return an infallible version of this iterator. + /// + /// Any item yielded that corresponds to an error results in a panic. This + /// is useful if your underlying regex engine is configured in a way that + /// it is guaranteed to never return an error. + pub fn infallible(self) -> HalfMatchesIter<'h, F> { + HalfMatchesIter(self) + } + + /// Returns the current `Input` used by this iterator. + /// + /// The `Input` returned is generally equivalent to the one used to + /// construct this iterator, but its start position may be different to + /// reflect the start of the next search to be executed. + pub fn input<'i>(&'i self) -> &'i Input<'h> { + self.it.input() + } +} + +impl<'h, F> Iterator for TryHalfMatchesIter<'h, F> +where + F: FnMut(&Input<'_>) -> Result, MatchError>, +{ + type Item = Result; + + #[inline] + fn next(&mut self) -> Option> { + self.it.try_advance_half(&mut self.finder).transpose() + } +} + +impl<'h, F> core::fmt::Debug for TryHalfMatchesIter<'h, F> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("TryHalfMatchesIter") + .field("it", &self.it) + .field("finder", &"") + .finish() + } +} + +/// An iterator over all non-overlapping half matches for an infallible search. +/// +/// The iterator yields a [`HalfMatch`] value until no more matches could be +/// found. +/// +/// The type parameters are as follows: +/// +/// * `F` represents the type of a closure that executes the search. +/// +/// The lifetime parameters come from the [`Input`] type: +/// +/// * `'h` is the lifetime of the underlying haystack. +/// +/// When possible, prefer the iterators defined on the regex engine you're +/// using. This tries to abstract over the regex engine and is thus a bit more +/// unwieldy to use. +/// +/// This iterator is created by [`Searcher::into_half_matches_iter`] and +/// then calling [`TryHalfMatchesIter::infallible`]. +#[derive(Debug)] +pub struct HalfMatchesIter<'h, F>(TryHalfMatchesIter<'h, F>); + +impl<'h, F> HalfMatchesIter<'h, F> { + /// Returns the current `Input` used by this iterator. + /// + /// The `Input` returned is generally equivalent to the one used to + /// construct this iterator, but its start position may be different to + /// reflect the start of the next search to be executed. + pub fn input<'i>(&'i self) -> &'i Input<'h> { + self.0.it.input() + } +} + +impl<'h, F> Iterator for HalfMatchesIter<'h, F> +where + F: FnMut(&Input<'_>) -> Result, MatchError>, +{ + type Item = HalfMatch; + + #[inline] + fn next(&mut self) -> Option { + match self.0.next()? { + Ok(m) => Some(m), + Err(err) => panic!( + "unexpected regex half find error: {}\n\ + to handle find errors, use 'try' or 'search' methods", + err, + ), + } + } +} + +/// An iterator over all non-overlapping matches for a fallible search. +/// +/// The iterator yields a `Result` value until no more +/// matches could be found. +/// +/// The type parameters are as follows: +/// +/// * `F` represents the type of a closure that executes the search. +/// +/// The lifetime parameters come from the [`Input`] type: +/// +/// * `'h` is the lifetime of the underlying haystack. +/// +/// When possible, prefer the iterators defined on the regex engine you're +/// using. This tries to abstract over the regex engine and is thus a bit more +/// unwieldy to use. +/// +/// This iterator is created by [`Searcher::into_matches_iter`]. +pub struct TryMatchesIter<'h, F> { + it: Searcher<'h>, + finder: F, +} + +impl<'h, F> TryMatchesIter<'h, F> { + /// Return an infallible version of this iterator. + /// + /// Any item yielded that corresponds to an error results in a panic. This + /// is useful if your underlying regex engine is configured in a way that + /// it is guaranteed to never return an error. + pub fn infallible(self) -> MatchesIter<'h, F> { + MatchesIter(self) + } + + /// Returns the current `Input` used by this iterator. + /// + /// The `Input` returned is generally equivalent to the one used to + /// construct this iterator, but its start position may be different to + /// reflect the start of the next search to be executed. + pub fn input<'i>(&'i self) -> &'i Input<'h> { + self.it.input() + } +} + +impl<'h, F> Iterator for TryMatchesIter<'h, F> +where + F: FnMut(&Input<'_>) -> Result, MatchError>, +{ + type Item = Result; + + #[inline] + fn next(&mut self) -> Option> { + self.it.try_advance(&mut self.finder).transpose() + } +} + +impl<'h, F> core::fmt::Debug for TryMatchesIter<'h, F> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("TryMatchesIter") + .field("it", &self.it) + .field("finder", &"") + .finish() + } +} + +/// An iterator over all non-overlapping matches for an infallible search. +/// +/// The iterator yields a [`Match`] value until no more matches could be found. +/// +/// The type parameters are as follows: +/// +/// * `F` represents the type of a closure that executes the search. +/// +/// The lifetime parameters come from the [`Input`] type: +/// +/// * `'h` is the lifetime of the underlying haystack. +/// +/// When possible, prefer the iterators defined on the regex engine you're +/// using. This tries to abstract over the regex engine and is thus a bit more +/// unwieldy to use. +/// +/// This iterator is created by [`Searcher::into_matches_iter`] and +/// then calling [`TryMatchesIter::infallible`]. +#[derive(Debug)] +pub struct MatchesIter<'h, F>(TryMatchesIter<'h, F>); + +impl<'h, F> MatchesIter<'h, F> { + /// Returns the current `Input` used by this iterator. + /// + /// The `Input` returned is generally equivalent to the one used to + /// construct this iterator, but its start position may be different to + /// reflect the start of the next search to be executed. + pub fn input<'i>(&'i self) -> &'i Input<'h> { + self.0.it.input() + } +} + +impl<'h, F> Iterator for MatchesIter<'h, F> +where + F: FnMut(&Input<'_>) -> Result, MatchError>, +{ + type Item = Match; + + #[inline] + fn next(&mut self) -> Option { + match self.0.next()? { + Ok(m) => Some(m), + Err(err) => panic!( + "unexpected regex find error: {}\n\ + to handle find errors, use 'try' or 'search' methods", + err, + ), + } + } +} + +/// An iterator over all non-overlapping captures for a fallible search. +/// +/// The iterator yields a `Result` value until no more +/// matches could be found. +/// +/// The type parameters are as follows: +/// +/// * `F` represents the type of a closure that executes the search. +/// +/// The lifetime parameters come from the [`Input`] type: +/// +/// * `'h` is the lifetime of the underlying haystack. +/// +/// When possible, prefer the iterators defined on the regex engine you're +/// using. This tries to abstract over the regex engine and is thus a bit more +/// unwieldy to use. +/// +/// This iterator is created by [`Searcher::into_captures_iter`]. +#[cfg(feature = "alloc")] +pub struct TryCapturesIter<'h, F> { + it: Searcher<'h>, + caps: Captures, + finder: F, +} + +#[cfg(feature = "alloc")] +impl<'h, F> TryCapturesIter<'h, F> { + /// Return an infallible version of this iterator. + /// + /// Any item yielded that corresponds to an error results in a panic. This + /// is useful if your underlying regex engine is configured in a way that + /// it is guaranteed to never return an error. + pub fn infallible(self) -> CapturesIter<'h, F> { + CapturesIter(self) + } +} + +#[cfg(feature = "alloc")] +impl<'h, F> Iterator for TryCapturesIter<'h, F> +where + F: FnMut(&Input<'_>, &mut Captures) -> Result<(), MatchError>, +{ + type Item = Result; + + #[inline] + fn next(&mut self) -> Option> { + let TryCapturesIter { ref mut it, ref mut caps, ref mut finder } = + *self; + let result = it + .try_advance(|input| { + (finder)(input, caps)?; + Ok(caps.get_match()) + }) + .transpose()?; + match result { + Ok(_) => Some(Ok(caps.clone())), + Err(err) => Some(Err(err)), + } + } +} + +#[cfg(feature = "alloc")] +impl<'h, F> core::fmt::Debug for TryCapturesIter<'h, F> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("TryCapturesIter") + .field("it", &self.it) + .field("caps", &self.caps) + .field("finder", &"") + .finish() + } +} + +/// An iterator over all non-overlapping captures for an infallible search. +/// +/// The iterator yields a [`Captures`] value until no more matches could be +/// found. +/// +/// The type parameters are as follows: +/// +/// * `F` represents the type of a closure that executes the search. +/// +/// The lifetime parameters come from the [`Input`] type: +/// +/// * `'h` is the lifetime of the underlying haystack. +/// +/// When possible, prefer the iterators defined on the regex engine you're +/// using. This tries to abstract over the regex engine and is thus a bit more +/// unwieldy to use. +/// +/// This iterator is created by [`Searcher::into_captures_iter`] and then +/// calling [`TryCapturesIter::infallible`]. +#[cfg(feature = "alloc")] +#[derive(Debug)] +pub struct CapturesIter<'h, F>(TryCapturesIter<'h, F>); + +#[cfg(feature = "alloc")] +impl<'h, F> Iterator for CapturesIter<'h, F> +where + F: FnMut(&Input<'_>, &mut Captures) -> Result<(), MatchError>, +{ + type Item = Captures; + + #[inline] + fn next(&mut self) -> Option { + match self.0.next()? { + Ok(m) => Some(m), + Err(err) => panic!( + "unexpected regex captures error: {}\n\ + to handle find errors, use 'try' or 'search' methods", + err, + ), + } + } +} diff --git a/vendor/regex-automata/src/util/lazy.rs b/vendor/regex-automata/src/util/lazy.rs new file mode 100644 index 0000000..0d0b4fb --- /dev/null +++ b/vendor/regex-automata/src/util/lazy.rs @@ -0,0 +1,461 @@ +/*! +A lazily initialized value for safe sharing between threads. + +The principal type in this module is `Lazy`, which makes it easy to construct +values that are shared safely across multiple threads simultaneously. +*/ + +use core::fmt; + +/// A lazily initialized value that implements `Deref` for `T`. +/// +/// A `Lazy` takes an initialization function and permits callers from any +/// thread to access the result of that initialization function in a safe +/// manner. In effect, this permits one-time initialization of global resources +/// in a (possibly) multi-threaded program. +/// +/// This type and its functionality are available even when neither the `alloc` +/// nor the `std` features are enabled. In exchange, a `Lazy` does **not** +/// guarantee that the given `create` function is called at most once. It +/// might be called multiple times. Moreover, a call to `Lazy::get` (either +/// explicitly or implicitly via `Lazy`'s `Deref` impl) may block until a `T` +/// is available. +/// +/// This is very similar to `lazy_static` or `once_cell`, except it doesn't +/// guarantee that the initialization function will be run once and it works +/// in no-alloc no-std environments. With that said, if you need stronger +/// guarantees or a more flexible API, then it is recommended to use either +/// `lazy_static` or `once_cell`. +/// +/// # Warning: may use a spin lock +/// +/// When this crate is compiled _without_ the `alloc` feature, then this type +/// may used a spin lock internally. This can have subtle effects that may +/// be undesirable. See [Spinlocks Considered Harmful][spinharm] for a more +/// thorough treatment of this topic. +/// +/// [spinharm]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html +/// +/// # Example +/// +/// This type is useful for creating regexes once, and then using them from +/// multiple threads simultaneously without worrying about synchronization. +/// +/// ``` +/// use regex_automata::{dfa::regex::Regex, util::lazy::Lazy, Match}; +/// +/// static RE: Lazy = Lazy::new(|| Regex::new("foo[0-9]+bar").unwrap()); +/// +/// let expected = Some(Match::must(0, 3..14)); +/// assert_eq!(expected, RE.find(b"zzzfoo12345barzzz")); +/// ``` +pub struct Lazy T>(lazy::Lazy); + +impl Lazy { + /// Create a new `Lazy` value that is initialized via the given function. + /// + /// The `T` type is automatically inferred from the return type of the + /// `create` function given. + pub const fn new(create: F) -> Lazy { + Lazy(lazy::Lazy::new(create)) + } +} + +impl T> Lazy { + /// Return a reference to the lazily initialized value. + /// + /// This routine may block if another thread is initializing a `T`. + /// + /// Note that given a `x` which has type `Lazy`, this must be called via + /// `Lazy::get(x)` and not `x.get()`. This routine is defined this way + /// because `Lazy` impls `Deref` with a target of `T`. + /// + /// # Panics + /// + /// This panics if the `create` function inside this lazy value panics. + /// If the panic occurred in another thread, then this routine _may_ also + /// panic (but is not guaranteed to do so). + pub fn get(this: &Lazy) -> &T { + this.0.get() + } +} + +impl T> core::ops::Deref for Lazy { + type Target = T; + + fn deref(&self) -> &T { + Lazy::get(self) + } +} + +impl T> fmt::Debug for Lazy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(f) + } +} + +#[cfg(feature = "alloc")] +mod lazy { + use core::{ + fmt, + marker::PhantomData, + sync::atomic::{AtomicPtr, Ordering}, + }; + + use alloc::boxed::Box; + + /// A non-std lazy initialized value. + /// + /// This might run the initialization function more than once, but will + /// never block. + /// + /// I wish I could get these semantics into the non-alloc non-std Lazy + /// type below, but I'm not sure how to do it. If you can do an alloc, + /// then the implementation becomes very simple if you don't care about + /// redundant work precisely because a pointer can be atomically swapped. + /// + /// Perhaps making this approach work in the non-alloc non-std case + /// requires asking the caller for a pointer? It would make the API less + /// convenient I think. + pub(super) struct Lazy { + data: AtomicPtr, + create: F, + // This indicates to the compiler that this type can drop T. It's not + // totally clear how the absence of this marker could lead to trouble, + // but putting here doesn't have any downsides so we hedge until somone + // can from the Unsafe Working Group can tell us definitively that we + // don't need it. + // + // See: https://github.com/BurntSushi/regex-automata/issues/30 + owned: PhantomData>, + } + + // SAFETY: So long as T and &T (and F and &F) can themselves be safely + // shared among threads, so to can a Lazy. Namely, the Lazy API only + // permits accessing a &T and initialization is free of data races. So if T + // is thread safe, then so to is Lazy. + // + // We specifically require that T: Send in order for Lazy to be Sync. + // Without that requirement, it's possible to send a T from one thread to + // another via Lazy's destructor. + // + // It's not clear whether we need F: Send+Sync for Lazy to be Sync. But + // we're conservative for now and keep both. + unsafe impl Sync for Lazy {} + + impl Lazy { + /// Create a new alloc but non-std lazy value that is racily + /// initialized. That is, the 'create' function may be called more than + /// once. + pub(super) const fn new(create: F) -> Lazy { + Lazy { + data: AtomicPtr::new(core::ptr::null_mut()), + create, + owned: PhantomData, + } + } + } + + impl T> Lazy { + /// Get the underlying lazy value. If it hasn't been initialized + /// yet, then always attempt to initialize it (even if some other + /// thread is initializing it) and atomically attach it to this lazy + /// value before returning it. + pub(super) fn get(&self) -> &T { + if let Some(data) = self.poll() { + return data; + } + let data = (self.create)(); + let mut ptr = Box::into_raw(Box::new(data)); + // We attempt to stuff our initialized value into our atomic + // pointer. Upon success, we don't need to do anything. But if + // someone else beat us to the punch, then we need to make sure + // our newly created value is dropped. + let result = self.data.compare_exchange( + core::ptr::null_mut(), + ptr, + Ordering::AcqRel, + Ordering::Acquire, + ); + if let Err(old) = result { + // SAFETY: We created 'ptr' via Box::into_raw above, so turning + // it back into a Box via from_raw is safe. + drop(unsafe { Box::from_raw(ptr) }); + ptr = old; + } + // SAFETY: We just set the pointer above to a non-null value, even + // in the error case, and set it to a fully initialized value + // returned by 'create'. + unsafe { &*ptr } + } + + /// If this lazy value has been initialized successfully, then return + /// that value. Otherwise return None immediately. This never attempts + /// to run initialization itself. + fn poll(&self) -> Option<&T> { + let ptr = self.data.load(Ordering::Acquire); + if ptr.is_null() { + return None; + } + // SAFETY: We just checked that the pointer is not null. Since it's + // not null, it must have been fully initialized by 'get' at some + // point. + Some(unsafe { &*ptr }) + } + } + + impl T> fmt::Debug for Lazy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Lazy").field("data", &self.poll()).finish() + } + } + + impl Drop for Lazy { + fn drop(&mut self) { + let ptr = *self.data.get_mut(); + if !ptr.is_null() { + // SAFETY: We just checked that 'ptr' is not null. And since + // we have exclusive access, there are no races to worry about. + drop(unsafe { Box::from_raw(ptr) }); + } + } + } +} + +#[cfg(not(feature = "alloc"))] +mod lazy { + use core::{ + cell::Cell, + fmt, + mem::MaybeUninit, + panic::{RefUnwindSafe, UnwindSafe}, + sync::atomic::{AtomicU8, Ordering}, + }; + + /// Our 'Lazy' value can be in one of three states: + /// + /// * INIT is where it starts, and also ends up back here if the + /// 'create' routine panics. + /// * BUSY is where it sits while initialization is running in exactly + /// one thread. + /// * DONE is where it sits after 'create' has completed and 'data' has + /// been fully initialized. + const LAZY_STATE_INIT: u8 = 0; + const LAZY_STATE_BUSY: u8 = 1; + const LAZY_STATE_DONE: u8 = 2; + + /// A non-alloc non-std lazy initialized value. + /// + /// This guarantees initialization only happens once, but uses a spinlock + /// to block in the case of simultaneous access. Blocking occurs so that + /// one thread waits while another thread initializes the value. + /// + /// I would much rather have the semantics of the 'alloc' Lazy type above. + /// Namely, that we might run the initialization function more than once, + /// but we never otherwise block. However, I don't know how to do that in + /// a non-alloc non-std context. + pub(super) struct Lazy { + state: AtomicU8, + create: Cell>, + data: Cell>, + } + + // SAFETY: So long as T and &T (and F and &F) can themselves be safely + // shared among threads, so to can a Lazy. Namely, the Lazy API only + // permits accessing a &T and initialization is free of data races. So if T + // is thread safe, then so to is Lazy. + unsafe impl Sync for Lazy {} + // A reference to a Lazy is unwind safe because we specifically take + // precautions to poison all accesses to a Lazy if the caller-provided + // 'create' function panics. + impl RefUnwindSafe + for Lazy + { + } + + impl Lazy { + /// Create a new non-alloc non-std lazy value that is initialized + /// exactly once on first use using the given function. + pub(super) const fn new(create: F) -> Lazy { + Lazy { + state: AtomicU8::new(LAZY_STATE_INIT), + create: Cell::new(Some(create)), + data: Cell::new(MaybeUninit::uninit()), + } + } + } + + impl T> Lazy { + /// Get the underlying lazy value. If it isn't been initialized + /// yet, then either initialize it or block until some other thread + /// initializes it. If the 'create' function given to Lazy::new panics + /// (even in another thread), then this panics too. + pub(super) fn get(&self) -> &T { + // This is effectively a spinlock. We loop until we enter a DONE + // state, and if possible, initialize it ourselves. The only way + // we exit the loop is if 'create' panics, we initialize 'data' or + // some other thread initializes 'data'. + // + // Yes, I have read spinlocks considered harmful[1]. And that + // article is why this spinlock is only active when 'alloc' isn't + // enabled. I did this because I don't think there is really + // another choice without 'alloc', other than not providing this at + // all. But I think that's a big bummer. + // + // [1]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html + while self.state.load(Ordering::Acquire) != LAZY_STATE_DONE { + // Check if we're the first ones to get here. If so, we'll be + // the ones who initialize. + let result = self.state.compare_exchange( + LAZY_STATE_INIT, + LAZY_STATE_BUSY, + Ordering::AcqRel, + Ordering::Acquire, + ); + // This means we saw the INIT state and nobody else can. So we + // must take responsibility for initializing. And by virtue of + // observing INIT, we have also told anyone else trying to + // get here that we are BUSY. If someone else sees BUSY, then + // they will spin until we finish initialization. + if let Ok(_) = result { + // Since we are guaranteed to be the only ones here, we + // know that 'create' is there... Unless someone else got + // here before us and 'create' panicked. In which case, + // 'self.create' is now 'None' and we forward the panic + // to the caller. (i.e., We implement poisoning.) + // + // SAFETY: Our use of 'self.state' guarantees that we are + // the only thread executing this line, and thus there are + // no races. + let create = unsafe { + (*self.create.as_ptr()).take().expect( + "Lazy's create function panicked, \ + preventing initialization, + poisoning current thread", + ) + }; + let guard = Guard { state: &self.state }; + // SAFETY: Our use of 'self.state' guarantees that we are + // the only thread executing this line, and thus there are + // no races. + unsafe { + (*self.data.as_ptr()).as_mut_ptr().write(create()); + } + // All is well. 'self.create' ran successfully, so we + // forget the guard. + core::mem::forget(guard); + // Everything is initialized, so we can declare success. + self.state.store(LAZY_STATE_DONE, Ordering::Release); + break; + } + core::hint::spin_loop(); + } + // We only get here if data is fully initialized, and thus poll + // will always return something. + self.poll().unwrap() + } + + /// If this lazy value has been initialized successfully, then return + /// that value. Otherwise return None immediately. This never blocks. + fn poll(&self) -> Option<&T> { + if self.state.load(Ordering::Acquire) == LAZY_STATE_DONE { + // SAFETY: The DONE state only occurs when data has been fully + // initialized. + Some(unsafe { &*(*self.data.as_ptr()).as_ptr() }) + } else { + None + } + } + } + + impl T> fmt::Debug for Lazy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Lazy") + .field("state", &self.state.load(Ordering::Acquire)) + .field("create", &"") + .field("data", &self.poll()) + .finish() + } + } + + impl Drop for Lazy { + fn drop(&mut self) { + if *self.state.get_mut() == LAZY_STATE_DONE { + // SAFETY: state is DONE if and only if data has been fully + // initialized. At which point, it is safe to drop. + unsafe { + self.data.get_mut().assume_init_drop(); + } + } + } + } + + /// A guard that will reset a Lazy's state back to INIT when dropped. The + /// idea here is to 'forget' this guard on success. On failure (when a + /// panic occurs), the Drop impl runs and causes all in-progress and future + /// 'get' calls to panic. Without this guard, all in-progress and future + /// 'get' calls would spin forever. Crashing is much better than getting + /// stuck in an infinite loop. + struct Guard<'a> { + state: &'a AtomicU8, + } + + impl<'a> Drop for Guard<'a> { + fn drop(&mut self) { + // We force ourselves back into an INIT state. This will in turn + // cause any future 'get' calls to attempt calling 'self.create' + // again which will in turn panic because 'self.create' will now + // be 'None'. + self.state.store(LAZY_STATE_INIT, Ordering::Release); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn assert_send() {} + fn assert_sync() {} + fn assert_unwind() {} + fn assert_refunwind() {} + + #[test] + fn oibits() { + assert_send::>(); + assert_sync::>(); + assert_unwind::>(); + assert_refunwind::>(); + } + + // This is a regression test because we used to rely on the inferred Sync + // impl for the Lazy type defined above (for 'alloc' mode). In the + // inferred impl, it only requires that T: Sync for Lazy: Sync. But + // if we have that, we can actually make use of the fact that Lazy drops + // T to create a value on one thread and drop it on another. This *should* + // require T: Send, but our missing bounds before let it sneak by. + // + // Basically, this test should not compile, so we... comment it out. We + // don't have a great way of testing compile-fail tests right now. + // + // See: https://github.com/BurntSushi/regex-automata/issues/30 + /* + #[test] + fn sync_not_send() { + #[allow(dead_code)] + fn inner() { + let lazy = Lazy::new(move || T::default()); + std::thread::scope(|scope| { + scope.spawn(|| { + Lazy::get(&lazy); // We create T in this thread + }); + }); + // And drop in this thread. + drop(lazy); + // So we have send a !Send type over threads. (with some more + // legwork, its possible to even sneak the value out of drop + // through thread local) + } + } + */ +} diff --git a/vendor/regex-automata/src/util/look.rs b/vendor/regex-automata/src/util/look.rs new file mode 100644 index 0000000..73e51c0 --- /dev/null +++ b/vendor/regex-automata/src/util/look.rs @@ -0,0 +1,2547 @@ +/*! +Types and routines for working with look-around assertions. + +This module principally defines two types: + +* [`Look`] enumerates all of the assertions supported by this crate. +* [`LookSet`] provides a way to efficiently store a set of [`Look`] values. +* [`LookMatcher`] provides routines for checking whether a `Look` or a +`LookSet` matches at a particular position in a haystack. +*/ + +// LAMENTATION: Sadly, a lot of the API of `Look` and `LookSet` were basically +// copied verbatim from the regex-syntax crate. I would have no problems using +// the regex-syntax types and defining the matching routines (only found +// in this crate) as free functions, except the `Look` and `LookSet` types +// are used in lots of places. Including in places we expect to work when +// regex-syntax is *not* enabled, such as in the definition of the NFA itself. +// +// Thankfully the code we copy is pretty simple and there isn't much of it. +// Otherwise, the rest of this module deals with *matching* the assertions, +// which is not something that regex-syntax handles. + +use crate::util::{escape::DebugByte, utf8}; + +/// A look-around assertion. +/// +/// An assertion matches at a position between characters in a haystack. +/// Namely, it does not actually "consume" any input as most parts of a regular +/// expression do. Assertions are a way of stating that some property must be +/// true at a particular point during matching. +/// +/// For example, `(?m)^[a-z]+$` is a pattern that: +/// +/// * Scans the haystack for a position at which `(?m:^)` is satisfied. That +/// occurs at either the beginning of the haystack, or immediately following +/// a `\n` character. +/// * Looks for one or more occurrences of `[a-z]`. +/// * Once `[a-z]+` has matched as much as it can, an overall match is only +/// reported when `[a-z]+` stops just before a `\n`. +/// +/// So in this case, `abc` and `\nabc\n` match, but `\nabc1\n` does not. +/// +/// Assertions are also called "look-around," "look-behind" and "look-ahead." +/// Specifically, some assertions are look-behind (like `^`), other assertions +/// are look-ahead (like `$`) and yet other assertions are both look-ahead and +/// look-behind (like `\b`). +/// +/// # Assertions in an NFA +/// +/// An assertion in a [`thompson::NFA`](crate::nfa::thompson::NFA) can be +/// thought of as a conditional epsilon transition. That is, a matching engine +/// like the [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) only permits +/// moving through conditional epsilon transitions when their condition +/// is satisfied at whatever position the `PikeVM` is currently at in the +/// haystack. +/// +/// How assertions are handled in a `DFA` is trickier, since a DFA does not +/// have epsilon transitions at all. In this case, they are compiled into the +/// automaton itself, at the expense of more states than what would be required +/// without an assertion. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Look { + /// Match the beginning of text. Specifically, this matches at the starting + /// position of the input. + Start = 1 << 0, + /// Match the end of text. Specifically, this matches at the ending + /// position of the input. + End = 1 << 1, + /// Match the beginning of a line or the beginning of text. Specifically, + /// this matches at the starting position of the input, or at the position + /// immediately following a `\n` character. + StartLF = 1 << 2, + /// Match the end of a line or the end of text. Specifically, this matches + /// at the end position of the input, or at the position immediately + /// preceding a `\n` character. + EndLF = 1 << 3, + /// Match the beginning of a line or the beginning of text. Specifically, + /// this matches at the starting position of the input, or at the position + /// immediately following either a `\r` or `\n` character, but never after + /// a `\r` when a `\n` follows. + StartCRLF = 1 << 4, + /// Match the end of a line or the end of text. Specifically, this matches + /// at the end position of the input, or at the position immediately + /// preceding a `\r` or `\n` character, but never before a `\n` when a `\r` + /// precedes it. + EndCRLF = 1 << 5, + /// Match an ASCII-only word boundary. That is, this matches a position + /// where the left adjacent character and right adjacent character + /// correspond to a word and non-word or a non-word and word character. + WordAscii = 1 << 6, + /// Match an ASCII-only negation of a word boundary. + WordAsciiNegate = 1 << 7, + /// Match a Unicode-aware word boundary. That is, this matches a position + /// where the left adjacent character and right adjacent character + /// correspond to a word and non-word or a non-word and word character. + WordUnicode = 1 << 8, + /// Match a Unicode-aware negation of a word boundary. + WordUnicodeNegate = 1 << 9, + /// Match the start of an ASCII-only word boundary. That is, this matches a + /// position at either the beginning of the haystack or where the previous + /// character is not a word character and the following character is a word + /// character. + WordStartAscii = 1 << 10, + /// Match the end of an ASCII-only word boundary. That is, this matches + /// a position at either the end of the haystack or where the previous + /// character is a word character and the following character is not a word + /// character. + WordEndAscii = 1 << 11, + /// Match the start of a Unicode word boundary. That is, this matches a + /// position at either the beginning of the haystack or where the previous + /// character is not a word character and the following character is a word + /// character. + WordStartUnicode = 1 << 12, + /// Match the end of a Unicode word boundary. That is, this matches a + /// position at either the end of the haystack or where the previous + /// character is a word character and the following character is not a word + /// character. + WordEndUnicode = 1 << 13, + /// Match the start half of an ASCII-only word boundary. That is, this + /// matches a position at either the beginning of the haystack or where the + /// previous character is not a word character. + WordStartHalfAscii = 1 << 14, + /// Match the end half of an ASCII-only word boundary. That is, this + /// matches a position at either the end of the haystack or where the + /// following character is not a word character. + WordEndHalfAscii = 1 << 15, + /// Match the start half of a Unicode word boundary. That is, this matches + /// a position at either the beginning of the haystack or where the + /// previous character is not a word character. + WordStartHalfUnicode = 1 << 16, + /// Match the end half of a Unicode word boundary. That is, this matches + /// a position at either the end of the haystack or where the following + /// character is not a word character. + WordEndHalfUnicode = 1 << 17, +} + +impl Look { + /// Flip the look-around assertion to its equivalent for reverse searches. + /// For example, `StartLF` gets translated to `EndLF`. + /// + /// Some assertions, such as `WordUnicode`, remain the same since they + /// match the same positions regardless of the direction of the search. + #[inline] + pub const fn reversed(self) -> Look { + match self { + Look::Start => Look::End, + Look::End => Look::Start, + Look::StartLF => Look::EndLF, + Look::EndLF => Look::StartLF, + Look::StartCRLF => Look::EndCRLF, + Look::EndCRLF => Look::StartCRLF, + Look::WordAscii => Look::WordAscii, + Look::WordAsciiNegate => Look::WordAsciiNegate, + Look::WordUnicode => Look::WordUnicode, + Look::WordUnicodeNegate => Look::WordUnicodeNegate, + Look::WordStartAscii => Look::WordEndAscii, + Look::WordEndAscii => Look::WordStartAscii, + Look::WordStartUnicode => Look::WordEndUnicode, + Look::WordEndUnicode => Look::WordStartUnicode, + Look::WordStartHalfAscii => Look::WordEndHalfAscii, + Look::WordEndHalfAscii => Look::WordStartHalfAscii, + Look::WordStartHalfUnicode => Look::WordEndHalfUnicode, + Look::WordEndHalfUnicode => Look::WordStartHalfUnicode, + } + } + + /// Return the underlying representation of this look-around enumeration + /// as an integer. Giving the return value to the [`Look::from_repr`] + /// constructor is guaranteed to return the same look-around variant that + /// one started with within a semver compatible release of this crate. + #[inline] + pub const fn as_repr(self) -> u32 { + // AFAIK, 'as' is the only way to zero-cost convert an int enum to an + // actual int. + self as u32 + } + + /// Given the underlying representation of a `Look` value, return the + /// corresponding `Look` value if the representation is valid. Otherwise + /// `None` is returned. + #[inline] + pub const fn from_repr(repr: u32) -> Option { + match repr { + 0b00_0000_0000_0000_0001 => Some(Look::Start), + 0b00_0000_0000_0000_0010 => Some(Look::End), + 0b00_0000_0000_0000_0100 => Some(Look::StartLF), + 0b00_0000_0000_0000_1000 => Some(Look::EndLF), + 0b00_0000_0000_0001_0000 => Some(Look::StartCRLF), + 0b00_0000_0000_0010_0000 => Some(Look::EndCRLF), + 0b00_0000_0000_0100_0000 => Some(Look::WordAscii), + 0b00_0000_0000_1000_0000 => Some(Look::WordAsciiNegate), + 0b00_0000_0001_0000_0000 => Some(Look::WordUnicode), + 0b00_0000_0010_0000_0000 => Some(Look::WordUnicodeNegate), + 0b00_0000_0100_0000_0000 => Some(Look::WordStartAscii), + 0b00_0000_1000_0000_0000 => Some(Look::WordEndAscii), + 0b00_0001_0000_0000_0000 => Some(Look::WordStartUnicode), + 0b00_0010_0000_0000_0000 => Some(Look::WordEndUnicode), + 0b00_0100_0000_0000_0000 => Some(Look::WordStartHalfAscii), + 0b00_1000_0000_0000_0000 => Some(Look::WordEndHalfAscii), + 0b01_0000_0000_0000_0000 => Some(Look::WordStartHalfUnicode), + 0b10_0000_0000_0000_0000 => Some(Look::WordEndHalfUnicode), + _ => None, + } + } + + /// Returns a convenient single codepoint representation of this + /// look-around assertion. Each assertion is guaranteed to be represented + /// by a distinct character. + /// + /// This is useful for succinctly representing a look-around assertion in + /// human friendly but succinct output intended for a programmer working on + /// regex internals. + #[inline] + pub const fn as_char(self) -> char { + match self { + Look::Start => 'A', + Look::End => 'z', + Look::StartLF => '^', + Look::EndLF => '$', + Look::StartCRLF => 'r', + Look::EndCRLF => 'R', + Look::WordAscii => 'b', + Look::WordAsciiNegate => 'B', + Look::WordUnicode => '𝛃', + Look::WordUnicodeNegate => '𝚩', + Look::WordStartAscii => '<', + Look::WordEndAscii => '>', + Look::WordStartUnicode => '〈', + Look::WordEndUnicode => '〉', + Look::WordStartHalfAscii => '◁', + Look::WordEndHalfAscii => '▷', + Look::WordStartHalfUnicode => '◀', + Look::WordEndHalfUnicode => '▶', + } + } +} + +/// LookSet is a memory-efficient set of look-around assertions. +/// +/// This is useful for efficiently tracking look-around assertions. For +/// example, a [`thompson::NFA`](crate::nfa::thompson::NFA) provides properties +/// that return `LookSet`s. +#[derive(Clone, Copy, Default, Eq, PartialEq)] +pub struct LookSet { + /// The underlying representation this set is exposed to make it possible + /// to store it somewhere efficiently. The representation is that + /// of a bitset, where each assertion occupies bit `i` where + /// `i = Look::as_repr()`. + /// + /// Note that users of this internal representation must permit the full + /// range of `u16` values to be represented. For example, even if the + /// current implementation only makes use of the 10 least significant bits, + /// it may use more bits in a future semver compatible release. + pub bits: u32, +} + +impl LookSet { + /// Create an empty set of look-around assertions. + #[inline] + pub fn empty() -> LookSet { + LookSet { bits: 0 } + } + + /// Create a full set of look-around assertions. + /// + /// This set contains all possible look-around assertions. + #[inline] + pub fn full() -> LookSet { + LookSet { bits: !0 } + } + + /// Create a look-around set containing the look-around assertion given. + /// + /// This is a convenience routine for creating an empty set and inserting + /// one look-around assertions. + #[inline] + pub fn singleton(look: Look) -> LookSet { + LookSet::empty().insert(look) + } + + /// Returns the total number of look-around assertions in this set. + #[inline] + pub fn len(self) -> usize { + // OK because max value always fits in a u8, which in turn always + // fits in a usize, regardless of target. + usize::try_from(self.bits.count_ones()).unwrap() + } + + /// Returns true if and only if this set is empty. + #[inline] + pub fn is_empty(self) -> bool { + self.len() == 0 + } + + /// Returns true if and only if the given look-around assertion is in this + /// set. + #[inline] + pub fn contains(self, look: Look) -> bool { + self.bits & look.as_repr() != 0 + } + + /// Returns true if and only if this set contains any anchor assertions. + /// This includes both "start/end of haystack" and "start/end of line." + #[inline] + pub fn contains_anchor(&self) -> bool { + self.contains_anchor_haystack() || self.contains_anchor_line() + } + + /// Returns true if and only if this set contains any "start/end of + /// haystack" anchors. This doesn't include "start/end of line" anchors. + #[inline] + pub fn contains_anchor_haystack(&self) -> bool { + self.contains(Look::Start) || self.contains(Look::End) + } + + /// Returns true if and only if this set contains any "start/end of line" + /// anchors. This doesn't include "start/end of haystack" anchors. This + /// includes both `\n` line anchors and CRLF (`\r\n`) aware line anchors. + #[inline] + pub fn contains_anchor_line(&self) -> bool { + self.contains(Look::StartLF) + || self.contains(Look::EndLF) + || self.contains(Look::StartCRLF) + || self.contains(Look::EndCRLF) + } + + /// Returns true if and only if this set contains any "start/end of line" + /// anchors that only treat `\n` as line terminators. This does not include + /// haystack anchors or CRLF aware line anchors. + #[inline] + pub fn contains_anchor_lf(&self) -> bool { + self.contains(Look::StartLF) || self.contains(Look::EndLF) + } + + /// Returns true if and only if this set contains any "start/end of line" + /// anchors that are CRLF-aware. This doesn't include "start/end of + /// haystack" or "start/end of line-feed" anchors. + #[inline] + pub fn contains_anchor_crlf(&self) -> bool { + self.contains(Look::StartCRLF) || self.contains(Look::EndCRLF) + } + + /// Returns true if and only if this set contains any word boundary or + /// negated word boundary assertions. This include both Unicode and ASCII + /// word boundaries. + #[inline] + pub fn contains_word(self) -> bool { + self.contains_word_unicode() || self.contains_word_ascii() + } + + /// Returns true if and only if this set contains any Unicode word boundary + /// or negated Unicode word boundary assertions. + #[inline] + pub fn contains_word_unicode(self) -> bool { + self.contains(Look::WordUnicode) + || self.contains(Look::WordUnicodeNegate) + || self.contains(Look::WordStartUnicode) + || self.contains(Look::WordEndUnicode) + || self.contains(Look::WordStartHalfUnicode) + || self.contains(Look::WordEndHalfUnicode) + } + + /// Returns true if and only if this set contains any ASCII word boundary + /// or negated ASCII word boundary assertions. + #[inline] + pub fn contains_word_ascii(self) -> bool { + self.contains(Look::WordAscii) + || self.contains(Look::WordAsciiNegate) + || self.contains(Look::WordStartAscii) + || self.contains(Look::WordEndAscii) + || self.contains(Look::WordStartHalfAscii) + || self.contains(Look::WordEndHalfAscii) + } + + /// Returns an iterator over all of the look-around assertions in this set. + #[inline] + pub fn iter(self) -> LookSetIter { + LookSetIter { set: self } + } + + /// Return a new set that is equivalent to the original, but with the given + /// assertion added to it. If the assertion is already in the set, then the + /// returned set is equivalent to the original. + #[inline] + pub fn insert(self, look: Look) -> LookSet { + LookSet { bits: self.bits | look.as_repr() } + } + + /// Updates this set in place with the result of inserting the given + /// assertion into this set. + #[inline] + pub fn set_insert(&mut self, look: Look) { + *self = self.insert(look); + } + + /// Return a new set that is equivalent to the original, but with the given + /// assertion removed from it. If the assertion is not in the set, then the + /// returned set is equivalent to the original. + #[inline] + pub fn remove(self, look: Look) -> LookSet { + LookSet { bits: self.bits & !look.as_repr() } + } + + /// Updates this set in place with the result of removing the given + /// assertion from this set. + #[inline] + pub fn set_remove(&mut self, look: Look) { + *self = self.remove(look); + } + + /// Returns a new set that is the result of subtracting the given set from + /// this set. + #[inline] + pub fn subtract(self, other: LookSet) -> LookSet { + LookSet { bits: self.bits & !other.bits } + } + + /// Updates this set in place with the result of subtracting the given set + /// from this set. + #[inline] + pub fn set_subtract(&mut self, other: LookSet) { + *self = self.subtract(other); + } + + /// Returns a new set that is the union of this and the one given. + #[inline] + pub fn union(self, other: LookSet) -> LookSet { + LookSet { bits: self.bits | other.bits } + } + + /// Updates this set in place with the result of unioning it with the one + /// given. + #[inline] + pub fn set_union(&mut self, other: LookSet) { + *self = self.union(other); + } + + /// Returns a new set that is the intersection of this and the one given. + #[inline] + pub fn intersect(self, other: LookSet) -> LookSet { + LookSet { bits: self.bits & other.bits } + } + + /// Updates this set in place with the result of intersecting it with the + /// one given. + #[inline] + pub fn set_intersect(&mut self, other: LookSet) { + *self = self.intersect(other); + } + + /// Return a `LookSet` from the slice given as a native endian 32-bit + /// integer. + /// + /// # Panics + /// + /// This panics if `slice.len() < 4`. + #[inline] + pub fn read_repr(slice: &[u8]) -> LookSet { + let bits = u32::from_ne_bytes(slice[..4].try_into().unwrap()); + LookSet { bits } + } + + /// Write a `LookSet` as a native endian 32-bit integer to the beginning + /// of the slice given. + /// + /// # Panics + /// + /// This panics if `slice.len() < 4`. + #[inline] + pub fn write_repr(self, slice: &mut [u8]) { + let raw = self.bits.to_ne_bytes(); + slice[0] = raw[0]; + slice[1] = raw[1]; + slice[2] = raw[2]; + slice[3] = raw[3]; + } + + /// Checks that all assertions in this set can be matched. + /// + /// Some assertions, such as Unicode word boundaries, require optional (but + /// enabled by default) tables that may not be available. If there are + /// assertions in this set that require tables that are not available, then + /// this will return an error. + /// + /// Specifically, this returns an error when the the + /// `unicode-word-boundary` feature is _not_ enabled _and_ this set + /// contains a Unicode word boundary assertion. + /// + /// It can be useful to use this on the result of + /// [`NFA::look_set_any`](crate::nfa::thompson::NFA::look_set_any) + /// when building a matcher engine to ensure methods like + /// [`LookMatcher::matches_set`] do not panic at search time. + pub fn available(self) -> Result<(), UnicodeWordBoundaryError> { + if self.contains_word_unicode() { + UnicodeWordBoundaryError::check()?; + } + Ok(()) + } +} + +impl core::fmt::Debug for LookSet { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + if self.is_empty() { + return write!(f, "∅"); + } + for look in self.iter() { + write!(f, "{}", look.as_char())?; + } + Ok(()) + } +} + +/// An iterator over all look-around assertions in a [`LookSet`]. +/// +/// This iterator is created by [`LookSet::iter`]. +#[derive(Clone, Debug)] +pub struct LookSetIter { + set: LookSet, +} + +impl Iterator for LookSetIter { + type Item = Look; + + #[inline] + fn next(&mut self) -> Option { + if self.set.is_empty() { + return None; + } + // We'll never have more than u8::MAX distinct look-around assertions, + // so 'bit' will always fit into a u16. + let bit = u16::try_from(self.set.bits.trailing_zeros()).unwrap(); + let look = Look::from_repr(1 << bit)?; + self.set = self.set.remove(look); + Some(look) + } +} + +/// A matcher for look-around assertions. +/// +/// This matcher permits configuring aspects of how look-around assertions are +/// matched. +/// +/// # Example +/// +/// A `LookMatcher` can change the line terminator used for matching multi-line +/// anchors such as `(?m:^)` and `(?m:$)`. +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::{self, pikevm::PikeVM}, +/// util::look::LookMatcher, +/// Match, Input, +/// }; +/// +/// let mut lookm = LookMatcher::new(); +/// lookm.set_line_terminator(b'\x00'); +/// +/// let re = PikeVM::builder() +/// .thompson(thompson::Config::new().look_matcher(lookm)) +/// .build(r"(?m)^[a-z]+$")?; +/// let mut cache = re.create_cache(); +/// +/// // Multi-line assertions now use NUL as a terminator. +/// assert_eq!( +/// Some(Match::must(0, 1..4)), +/// re.find(&mut cache, b"\x00abc\x00"), +/// ); +/// // ... and \n is no longer recognized as a terminator. +/// assert_eq!( +/// None, +/// re.find(&mut cache, b"\nabc\n"), +/// ); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct LookMatcher { + lineterm: DebugByte, +} + +impl LookMatcher { + /// Creates a new default matcher for look-around assertions. + pub fn new() -> LookMatcher { + LookMatcher { lineterm: DebugByte(b'\n') } + } + + /// Sets the line terminator for use with `(?m:^)` and `(?m:$)`. + /// + /// Namely, instead of `^` matching after `\n` and `$` matching immediately + /// before a `\n`, this will cause it to match after and before the byte + /// given. + /// + /// It can occasionally be useful to use this to configure the line + /// terminator to the NUL byte when searching binary data. + /// + /// Note that this does not apply to CRLF-aware line anchors such as + /// `(?Rm:^)` and `(?Rm:$)`. CRLF-aware line anchors are hard-coded to + /// use `\r` and `\n`. + pub fn set_line_terminator(&mut self, byte: u8) -> &mut LookMatcher { + self.lineterm.0 = byte; + self + } + + /// Returns the line terminator that was configured for this matcher. + /// + /// If no line terminator was configured, then this returns `\n`. + /// + /// Note that the line terminator should only be used for matching `(?m:^)` + /// and `(?m:$)` assertions. It specifically should _not_ be used for + /// matching the CRLF aware assertions `(?Rm:^)` and `(?Rm:$)`. + pub fn get_line_terminator(&self) -> u8 { + self.lineterm.0 + } + + /// Returns true when the position `at` in `haystack` satisfies the given + /// look-around assertion. + /// + /// # Panics + /// + /// This panics when testing any Unicode word boundary assertion in this + /// set and when the Unicode word data is not available. Specifically, this + /// only occurs when the `unicode-word-boundary` feature is not enabled. + /// + /// Since it's generally expected that this routine is called inside of + /// a matching engine, callers should check the error condition when + /// building the matching engine. If there is a Unicode word boundary + /// in the matcher and the data isn't available, then the matcher should + /// fail to build. + /// + /// Callers can check the error condition with [`LookSet::available`]. + /// + /// This also may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn matches(&self, look: Look, haystack: &[u8], at: usize) -> bool { + self.matches_inline(look, haystack, at) + } + + /// Like `matches`, but forcefully inlined. + /// + /// # Panics + /// + /// This panics when testing any Unicode word boundary assertion in this + /// set and when the Unicode word data is not available. Specifically, this + /// only occurs when the `unicode-word-boundary` feature is not enabled. + /// + /// Since it's generally expected that this routine is called inside of + /// a matching engine, callers should check the error condition when + /// building the matching engine. If there is a Unicode word boundary + /// in the matcher and the data isn't available, then the matcher should + /// fail to build. + /// + /// Callers can check the error condition with [`LookSet::available`]. + /// + /// This also may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn matches_inline( + &self, + look: Look, + haystack: &[u8], + at: usize, + ) -> bool { + match look { + Look::Start => self.is_start(haystack, at), + Look::End => self.is_end(haystack, at), + Look::StartLF => self.is_start_lf(haystack, at), + Look::EndLF => self.is_end_lf(haystack, at), + Look::StartCRLF => self.is_start_crlf(haystack, at), + Look::EndCRLF => self.is_end_crlf(haystack, at), + Look::WordAscii => self.is_word_ascii(haystack, at), + Look::WordAsciiNegate => self.is_word_ascii_negate(haystack, at), + Look::WordUnicode => self.is_word_unicode(haystack, at).unwrap(), + Look::WordUnicodeNegate => { + self.is_word_unicode_negate(haystack, at).unwrap() + } + Look::WordStartAscii => self.is_word_start_ascii(haystack, at), + Look::WordEndAscii => self.is_word_end_ascii(haystack, at), + Look::WordStartUnicode => { + self.is_word_start_unicode(haystack, at).unwrap() + } + Look::WordEndUnicode => { + self.is_word_end_unicode(haystack, at).unwrap() + } + Look::WordStartHalfAscii => { + self.is_word_start_half_ascii(haystack, at) + } + Look::WordEndHalfAscii => { + self.is_word_end_half_ascii(haystack, at) + } + Look::WordStartHalfUnicode => { + self.is_word_start_half_unicode(haystack, at).unwrap() + } + Look::WordEndHalfUnicode => { + self.is_word_end_half_unicode(haystack, at).unwrap() + } + } + } + + /// Returns true when _all_ of the assertions in the given set match at the + /// given position in the haystack. + /// + /// # Panics + /// + /// This panics when testing any Unicode word boundary assertion in this + /// set and when the Unicode word data is not available. Specifically, this + /// only occurs when the `unicode-word-boundary` feature is not enabled. + /// + /// Since it's generally expected that this routine is called inside of + /// a matching engine, callers should check the error condition when + /// building the matching engine. If there is a Unicode word boundary + /// in the matcher and the data isn't available, then the matcher should + /// fail to build. + /// + /// Callers can check the error condition with [`LookSet::available`]. + /// + /// This also may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn matches_set( + &self, + set: LookSet, + haystack: &[u8], + at: usize, + ) -> bool { + self.matches_set_inline(set, haystack, at) + } + + /// Like `LookSet::matches`, but forcefully inlined for perf. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn matches_set_inline( + &self, + set: LookSet, + haystack: &[u8], + at: usize, + ) -> bool { + // This used to luse LookSet::iter with Look::matches on each element, + // but that proved to be quite diastrous for perf. The manual "if + // the set has this assertion, check it" turns out to be quite a bit + // faster. + if set.contains(Look::Start) { + if !self.is_start(haystack, at) { + return false; + } + } + if set.contains(Look::End) { + if !self.is_end(haystack, at) { + return false; + } + } + if set.contains(Look::StartLF) { + if !self.is_start_lf(haystack, at) { + return false; + } + } + if set.contains(Look::EndLF) { + if !self.is_end_lf(haystack, at) { + return false; + } + } + if set.contains(Look::StartCRLF) { + if !self.is_start_crlf(haystack, at) { + return false; + } + } + if set.contains(Look::EndCRLF) { + if !self.is_end_crlf(haystack, at) { + return false; + } + } + if set.contains(Look::WordAscii) { + if !self.is_word_ascii(haystack, at) { + return false; + } + } + if set.contains(Look::WordAsciiNegate) { + if !self.is_word_ascii_negate(haystack, at) { + return false; + } + } + if set.contains(Look::WordUnicode) { + if !self.is_word_unicode(haystack, at).unwrap() { + return false; + } + } + if set.contains(Look::WordUnicodeNegate) { + if !self.is_word_unicode_negate(haystack, at).unwrap() { + return false; + } + } + if set.contains(Look::WordStartAscii) { + if !self.is_word_start_ascii(haystack, at) { + return false; + } + } + if set.contains(Look::WordEndAscii) { + if !self.is_word_end_ascii(haystack, at) { + return false; + } + } + if set.contains(Look::WordStartUnicode) { + if !self.is_word_start_unicode(haystack, at).unwrap() { + return false; + } + } + if set.contains(Look::WordEndUnicode) { + if !self.is_word_end_unicode(haystack, at).unwrap() { + return false; + } + } + if set.contains(Look::WordStartHalfAscii) { + if !self.is_word_start_half_ascii(haystack, at) { + return false; + } + } + if set.contains(Look::WordEndHalfAscii) { + if !self.is_word_end_half_ascii(haystack, at) { + return false; + } + } + if set.contains(Look::WordStartHalfUnicode) { + if !self.is_word_start_half_unicode(haystack, at).unwrap() { + return false; + } + } + if set.contains(Look::WordEndHalfUnicode) { + if !self.is_word_end_half_unicode(haystack, at).unwrap() { + return false; + } + } + true + } + + /// Split up the given byte classes into equivalence classes in a way that + /// is consistent with this look-around assertion. + #[cfg(feature = "alloc")] + pub(crate) fn add_to_byteset( + &self, + look: Look, + set: &mut crate::util::alphabet::ByteClassSet, + ) { + match look { + Look::Start | Look::End => {} + Look::StartLF | Look::EndLF => { + set.set_range(self.lineterm.0, self.lineterm.0); + } + Look::StartCRLF | Look::EndCRLF => { + set.set_range(b'\r', b'\r'); + set.set_range(b'\n', b'\n'); + } + Look::WordAscii + | Look::WordAsciiNegate + | Look::WordUnicode + | Look::WordUnicodeNegate + | Look::WordStartAscii + | Look::WordEndAscii + | Look::WordStartUnicode + | Look::WordEndUnicode + | Look::WordStartHalfAscii + | Look::WordEndHalfAscii + | Look::WordStartHalfUnicode + | Look::WordEndHalfUnicode => { + // We need to mark all ranges of bytes whose pairs result in + // evaluating \b differently. This isn't technically correct + // for Unicode word boundaries, but DFAs can't handle those + // anyway, and thus, the byte classes don't need to either + // since they are themselves only used in DFAs. + // + // FIXME: It seems like the calls to 'set_range' here are + // completely invariant, which means we could just hard-code + // them here without needing to write a loop. And we only need + // to do this dance at most once per regex. + // + // FIXME: Is this correct for \B? + let iswb = utf8::is_word_byte; + // This unwrap is OK because we guard every use of 'asu8' with + // a check that the input is <= 255. + let asu8 = |b: u16| u8::try_from(b).unwrap(); + let mut b1: u16 = 0; + let mut b2: u16; + while b1 <= 255 { + b2 = b1 + 1; + while b2 <= 255 && iswb(asu8(b1)) == iswb(asu8(b2)) { + b2 += 1; + } + // The guards above guarantee that b2 can never get any + // bigger. + assert!(b2 <= 256); + // Subtracting 1 from b2 is always OK because it is always + // at least 1 greater than b1, and the assert above + // guarantees that the asu8 conversion will succeed. + set.set_range(asu8(b1), asu8(b2.checked_sub(1).unwrap())); + b1 = b2; + } + } + } + } + + /// Returns true when [`Look::Start`] is satisfied `at` the given position + /// in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_start(&self, _haystack: &[u8], at: usize) -> bool { + at == 0 + } + + /// Returns true when [`Look::End`] is satisfied `at` the given position in + /// `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_end(&self, haystack: &[u8], at: usize) -> bool { + at == haystack.len() + } + + /// Returns true when [`Look::StartLF`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_start_lf(&self, haystack: &[u8], at: usize) -> bool { + self.is_start(haystack, at) || haystack[at - 1] == self.lineterm.0 + } + + /// Returns true when [`Look::EndLF`] is satisfied `at` the given position + /// in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_end_lf(&self, haystack: &[u8], at: usize) -> bool { + self.is_end(haystack, at) || haystack[at] == self.lineterm.0 + } + + /// Returns true when [`Look::StartCRLF`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_start_crlf(&self, haystack: &[u8], at: usize) -> bool { + self.is_start(haystack, at) + || haystack[at - 1] == b'\n' + || (haystack[at - 1] == b'\r' + && (at >= haystack.len() || haystack[at] != b'\n')) + } + + /// Returns true when [`Look::EndCRLF`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_end_crlf(&self, haystack: &[u8], at: usize) -> bool { + self.is_end(haystack, at) + || haystack[at] == b'\r' + || (haystack[at] == b'\n' + && (at == 0 || haystack[at - 1] != b'\r')) + } + + /// Returns true when [`Look::WordAscii`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_word_ascii(&self, haystack: &[u8], at: usize) -> bool { + let word_before = at > 0 && utf8::is_word_byte(haystack[at - 1]); + let word_after = + at < haystack.len() && utf8::is_word_byte(haystack[at]); + word_before != word_after + } + + /// Returns true when [`Look::WordAsciiNegate`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_word_ascii_negate(&self, haystack: &[u8], at: usize) -> bool { + !self.is_word_ascii(haystack, at) + } + + /// Returns true when [`Look::WordUnicode`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + /// + /// # Errors + /// + /// This returns an error when Unicode word boundary tables + /// are not available. Specifically, this only occurs when the + /// `unicode-word-boundary` feature is not enabled. + #[inline] + pub fn is_word_unicode( + &self, + haystack: &[u8], + at: usize, + ) -> Result { + let word_before = is_word_char::rev(haystack, at)?; + let word_after = is_word_char::fwd(haystack, at)?; + Ok(word_before != word_after) + } + + /// Returns true when [`Look::WordUnicodeNegate`] is satisfied `at` the + /// given position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + /// + /// # Errors + /// + /// This returns an error when Unicode word boundary tables + /// are not available. Specifically, this only occurs when the + /// `unicode-word-boundary` feature is not enabled. + #[inline] + pub fn is_word_unicode_negate( + &self, + haystack: &[u8], + at: usize, + ) -> Result { + // This is pretty subtle. Why do we need to do UTF-8 decoding here? + // Well... at time of writing, the is_word_char_{fwd,rev} routines will + // only return true if there is a valid UTF-8 encoding of a "word" + // codepoint, and false in every other case (including invalid UTF-8). + // This means that in regions of invalid UTF-8 (which might be a + // subset of valid UTF-8!), it would result in \B matching. While this + // would be questionable in the context of truly invalid UTF-8, it is + // *certainly* wrong to report match boundaries that split the encoding + // of a codepoint. So to work around this, we ensure that we can decode + // a codepoint on either side of `at`. If either direction fails, then + // we don't permit \B to match at all. + // + // Now, this isn't exactly optimal from a perf perspective. We could + // try and detect this in is_word_char::{fwd,rev}, but it's not clear + // if it's worth it. \B is, after all, rarely used. Even worse, + // is_word_char::{fwd,rev} could do its own UTF-8 decoding, and so this + // will wind up doing UTF-8 decoding twice. Owch. We could fix this + // with more code complexity, but it just doesn't feel worth it for \B. + // + // And in particular, we do *not* have to do this with \b, because \b + // *requires* that at least one side of `at` be a "word" codepoint, + // which in turn implies one side of `at` must be valid UTF-8. This in + // turn implies that \b can never split a valid UTF-8 encoding of a + // codepoint. In the case where one side of `at` is truly invalid UTF-8 + // and the other side IS a word codepoint, then we want \b to match + // since it represents a valid UTF-8 boundary. It also makes sense. For + // example, you'd want \b\w+\b to match 'abc' in '\xFFabc\xFF'. + // + // Note also that this is not just '!is_word_unicode(..)' like it is + // for the ASCII case. For example, neither \b nor \B is satisfied + // within invalid UTF-8 sequences. + let word_before = at > 0 + && match utf8::decode_last(&haystack[..at]) { + None | Some(Err(_)) => return Ok(false), + Some(Ok(_)) => is_word_char::rev(haystack, at)?, + }; + let word_after = at < haystack.len() + && match utf8::decode(&haystack[at..]) { + None | Some(Err(_)) => return Ok(false), + Some(Ok(_)) => is_word_char::fwd(haystack, at)?, + }; + Ok(word_before == word_after) + } + + /// Returns true when [`Look::WordStartAscii`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_word_start_ascii(&self, haystack: &[u8], at: usize) -> bool { + let word_before = at > 0 && utf8::is_word_byte(haystack[at - 1]); + let word_after = + at < haystack.len() && utf8::is_word_byte(haystack[at]); + !word_before && word_after + } + + /// Returns true when [`Look::WordEndAscii`] is satisfied `at` the given + /// position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_word_end_ascii(&self, haystack: &[u8], at: usize) -> bool { + let word_before = at > 0 && utf8::is_word_byte(haystack[at - 1]); + let word_after = + at < haystack.len() && utf8::is_word_byte(haystack[at]); + word_before && !word_after + } + + /// Returns true when [`Look::WordStartUnicode`] is satisfied `at` the + /// given position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + /// + /// # Errors + /// + /// This returns an error when Unicode word boundary tables + /// are not available. Specifically, this only occurs when the + /// `unicode-word-boundary` feature is not enabled. + #[inline] + pub fn is_word_start_unicode( + &self, + haystack: &[u8], + at: usize, + ) -> Result { + let word_before = is_word_char::rev(haystack, at)?; + let word_after = is_word_char::fwd(haystack, at)?; + Ok(!word_before && word_after) + } + + /// Returns true when [`Look::WordEndUnicode`] is satisfied `at` the + /// given position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + /// + /// # Errors + /// + /// This returns an error when Unicode word boundary tables + /// are not available. Specifically, this only occurs when the + /// `unicode-word-boundary` feature is not enabled. + #[inline] + pub fn is_word_end_unicode( + &self, + haystack: &[u8], + at: usize, + ) -> Result { + let word_before = is_word_char::rev(haystack, at)?; + let word_after = is_word_char::fwd(haystack, at)?; + Ok(word_before && !word_after) + } + + /// Returns true when [`Look::WordStartHalfAscii`] is satisfied `at` the + /// given position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_word_start_half_ascii( + &self, + haystack: &[u8], + at: usize, + ) -> bool { + let word_before = at > 0 && utf8::is_word_byte(haystack[at - 1]); + !word_before + } + + /// Returns true when [`Look::WordEndHalfAscii`] is satisfied `at` the + /// given position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + #[inline] + pub fn is_word_end_half_ascii(&self, haystack: &[u8], at: usize) -> bool { + let word_after = + at < haystack.len() && utf8::is_word_byte(haystack[at]); + !word_after + } + + /// Returns true when [`Look::WordStartHalfUnicode`] is satisfied `at` the + /// given position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + /// + /// # Errors + /// + /// This returns an error when Unicode word boundary tables + /// are not available. Specifically, this only occurs when the + /// `unicode-word-boundary` feature is not enabled. + #[inline] + pub fn is_word_start_half_unicode( + &self, + haystack: &[u8], + at: usize, + ) -> Result { + // See `is_word_unicode_negate` for why we need to do this. We don't + // need to do it for `is_word_start_unicode` because that guarantees + // that the position matched falls on a valid UTF-8 boundary given + // that the right side must be in \w. + let word_before = at > 0 + && match utf8::decode_last(&haystack[..at]) { + None | Some(Err(_)) => return Ok(false), + Some(Ok(_)) => is_word_char::rev(haystack, at)?, + }; + Ok(!word_before) + } + + /// Returns true when [`Look::WordEndHalfUnicode`] is satisfied `at` the + /// given position in `haystack`. + /// + /// # Panics + /// + /// This may panic when `at > haystack.len()`. Note that `at == + /// haystack.len()` is legal and guaranteed not to panic. + /// + /// # Errors + /// + /// This returns an error when Unicode word boundary tables + /// are not available. Specifically, this only occurs when the + /// `unicode-word-boundary` feature is not enabled. + #[inline] + pub fn is_word_end_half_unicode( + &self, + haystack: &[u8], + at: usize, + ) -> Result { + // See `is_word_unicode_negate` for why we need to do this. We don't + // need to do it for `is_word_end_unicode` because that guarantees + // that the position matched falls on a valid UTF-8 boundary given + // that the left side must be in \w. + let word_after = at < haystack.len() + && match utf8::decode(&haystack[at..]) { + None | Some(Err(_)) => return Ok(false), + Some(Ok(_)) => is_word_char::fwd(haystack, at)?, + }; + Ok(!word_after) + } +} + +impl Default for LookMatcher { + fn default() -> LookMatcher { + LookMatcher::new() + } +} + +/// An error that occurs when the Unicode-aware `\w` class is unavailable. +/// +/// This error can occur when the data tables necessary for the Unicode aware +/// Perl character class `\w` are unavailable. The `\w` class is used to +/// determine whether a codepoint is considered a word character or not when +/// determining whether a Unicode aware `\b` (or `\B`) matches at a particular +/// position. +/// +/// This error can only occur when the `unicode-word-boundary` feature is +/// disabled. +#[derive(Clone, Debug)] +pub struct UnicodeWordBoundaryError(()); + +impl UnicodeWordBoundaryError { + #[cfg(not(feature = "unicode-word-boundary"))] + pub(crate) fn new() -> UnicodeWordBoundaryError { + UnicodeWordBoundaryError(()) + } + + /// Returns an error if and only if Unicode word boundary data is + /// unavailable. + pub fn check() -> Result<(), UnicodeWordBoundaryError> { + is_word_char::check() + } +} + +#[cfg(feature = "std")] +impl std::error::Error for UnicodeWordBoundaryError {} + +impl core::fmt::Display for UnicodeWordBoundaryError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!( + f, + "Unicode-aware \\b and \\B are unavailable because the \ + requisite data tables are missing, please enable the \ + unicode-word-boundary feature" + ) + } +} + +// Below are FOUR different ways for checking whether whether a "word" +// codepoint exists at a particular position in the haystack. The four +// different approaches are, in order of preference: +// +// 1. Parse '\w', convert to an NFA, convert to a fully compiled DFA on the +// first call, and then use that DFA for all subsequent calls. +// 2. Do UTF-8 decoding and use regex_syntax::is_word_character if available. +// 3. Do UTF-8 decoding and use our own 'perl_word' table. +// 4. Return an error. +// +// The reason for all of these approaches is a combination of perf and +// permitting one to build regex-automata without the Unicode data necessary +// for handling Unicode-aware word boundaries. (In which case, '(?-u:\b)' would +// still work.) +// +// The DFA approach is the fastest, but it requires the regex parser, the +// NFA compiler, the DFA builder and the DFA search runtime. That's a lot to +// bring in, but if it's available, it's (probably) the best we can do. +// +// Approaches (2) and (3) are effectively equivalent, but (2) reuses the +// data in regex-syntax and avoids duplicating it in regex-automata. +// +// Finally, (4) unconditionally returns an error since the requisite data isn't +// available anywhere. +// +// There are actually more approaches possible that we didn't implement. For +// example, if the DFA builder is available but the syntax parser is not, we +// could technically hand construct our own NFA from the 'perl_word' data +// table. But to avoid some pretty hairy code duplication, we would in turn +// need to pull the UTF-8 compiler out of the NFA compiler. Yikes. +// +// A possibly more sensible alternative is to use a lazy DFA when the full +// DFA builder isn't available... +// +// Yet another choice would be to build the full DFA and then embed it into the +// source. Then we'd only need to bring in the DFA search runtime, which is +// considerably smaller than the DFA builder code. The problem here is that the +// Debian people have spooked me[1] into avoiding cyclic dependencies. Namely, +// we'd need to build regex-cli, which depends on regex-automata in order to +// build some part of regex-automata. But to be honest, something like this has +// to be allowed somehow? I just don't know what the right process is. +// +// There are perhaps other choices as well. Why did I stop at these 4? Because +// I wanted to preserve my sanity. I suspect I'll wind up adding the lazy DFA +// approach eventually, as the benefits of the DFA approach are somewhat +// compelling. The 'boundary-words-holmes' benchmark tests this. (Note that +// the commands below no longer work. If necessary, we should re-capitulate +// the benchmark from whole cloth in rebar.) +// +// $ regex-cli bench measure -f boundary-words-holmes -e pikevm > dfa.csv +// +// Then I changed the code below so that the util/unicode_data/perl_word table +// was used and re-ran the benchmark: +// +// $ regex-cli bench measure -f boundary-words-holmes -e pikevm > table.csv +// +// And compared them: +// +// $ regex-cli bench diff dfa.csv table.csv +// benchmark engine dfa table +// --------- ------ --- ----- +// internal/count/boundary-words-holmes regex/automata/pikevm 18.6 MB/s 12.9 MB/s +// +// Which is a nice improvement. +// +// UPDATE: It turns out that it takes approximately 22ms to build the reverse +// DFA for \w. (And about 3ms for the forward DFA.) It's probably not much in +// the grand scheme things, but that is a significant latency cost. So I'm not +// sure that's a good idea. I then tried using a lazy DFA instead, and that +// eliminated the overhead, but since the lazy DFA requires mutable working +// memory, that requires introducing a 'Cache' for every simultaneous call. +// +// I ended up deciding for now to just keep the "UTF-8 decode and check the +// table." The DFA and lazy DFA approaches are still below, but commented out. +// +// [1]: https://github.com/BurntSushi/ucd-generate/issues/11 + +/* +/// A module that looks for word codepoints using lazy DFAs. +#[cfg(all( + feature = "unicode-word-boundary", + feature = "syntax", + feature = "unicode-perl", + feature = "hybrid" +))] +mod is_word_char { + use alloc::vec::Vec; + + use crate::{ + hybrid::dfa::{Cache, DFA}, + nfa::thompson::NFA, + util::{lazy::Lazy, pool::Pool, primitives::StateID}, + Anchored, Input, + }; + + pub(super) fn check() -> Result<(), super::UnicodeWordBoundaryError> { + Ok(()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn fwd( + haystack: &[u8], + mut at: usize, + ) -> Result { + static WORD: Lazy = Lazy::new(|| DFA::new(r"\w").unwrap()); + static CACHE: Lazy> = + Lazy::new(|| Pool::new(|| WORD.create_cache())); + let dfa = Lazy::get(&WORD); + let mut cache = Lazy::get(&CACHE).get(); + let mut sid = dfa + .start_state_forward( + &mut cache, + &Input::new("").anchored(Anchored::Yes), + ) + .unwrap(); + while at < haystack.len() { + let byte = haystack[at]; + sid = dfa.next_state(&mut cache, sid, byte).unwrap(); + at += 1; + if sid.is_tagged() { + if sid.is_match() { + return Ok(true); + } else if sid.is_dead() { + return Ok(false); + } + } + } + Ok(dfa.next_eoi_state(&mut cache, sid).unwrap().is_match()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn rev( + haystack: &[u8], + mut at: usize, + ) -> Result { + static WORD: Lazy = Lazy::new(|| { + DFA::builder() + .thompson(NFA::config().reverse(true)) + .build(r"\w") + .unwrap() + }); + static CACHE: Lazy> = + Lazy::new(|| Pool::new(|| WORD.create_cache())); + let dfa = Lazy::get(&WORD); + let mut cache = Lazy::get(&CACHE).get(); + let mut sid = dfa + .start_state_reverse( + &mut cache, + &Input::new("").anchored(Anchored::Yes), + ) + .unwrap(); + while at > 0 { + at -= 1; + let byte = haystack[at]; + sid = dfa.next_state(&mut cache, sid, byte).unwrap(); + if sid.is_tagged() { + if sid.is_match() { + return Ok(true); + } else if sid.is_dead() { + return Ok(false); + } + } + } + Ok(dfa.next_eoi_state(&mut cache, sid).unwrap().is_match()) + } +} +*/ + +/* +/// A module that looks for word codepoints using fully compiled DFAs. +#[cfg(all( + feature = "unicode-word-boundary", + feature = "syntax", + feature = "unicode-perl", + feature = "dfa-build" +))] +mod is_word_char { + use alloc::vec::Vec; + + use crate::{ + dfa::{dense::DFA, Automaton, StartKind}, + nfa::thompson::NFA, + util::{lazy::Lazy, primitives::StateID}, + Anchored, Input, + }; + + pub(super) fn check() -> Result<(), super::UnicodeWordBoundaryError> { + Ok(()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn fwd( + haystack: &[u8], + mut at: usize, + ) -> Result { + static WORD: Lazy<(DFA>, StateID)> = Lazy::new(|| { + let dfa = DFA::builder() + .configure(DFA::config().start_kind(StartKind::Anchored)) + .build(r"\w") + .unwrap(); + // OK because our regex has no look-around. + let start_id = dfa.universal_start_state(Anchored::Yes).unwrap(); + (dfa, start_id) + }); + let &(ref dfa, mut sid) = Lazy::get(&WORD); + while at < haystack.len() { + let byte = haystack[at]; + sid = dfa.next_state(sid, byte); + at += 1; + if dfa.is_special_state(sid) { + if dfa.is_match_state(sid) { + return Ok(true); + } else if dfa.is_dead_state(sid) { + return Ok(false); + } + } + } + Ok(dfa.is_match_state(dfa.next_eoi_state(sid))) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn rev( + haystack: &[u8], + mut at: usize, + ) -> Result { + static WORD: Lazy<(DFA>, StateID)> = Lazy::new(|| { + let dfa = DFA::builder() + .configure(DFA::config().start_kind(StartKind::Anchored)) + // From ad hoc measurements, it looks like setting + // shrink==false is slightly faster than shrink==true. I kind + // of feel like this indicates that shrinking is probably a + // failure, although it can help in some cases. Sigh. + .thompson(NFA::config().reverse(true).shrink(false)) + .build(r"\w") + .unwrap(); + // OK because our regex has no look-around. + let start_id = dfa.universal_start_state(Anchored::Yes).unwrap(); + (dfa, start_id) + }); + let &(ref dfa, mut sid) = Lazy::get(&WORD); + while at > 0 { + at -= 1; + let byte = haystack[at]; + sid = dfa.next_state(sid, byte); + if dfa.is_special_state(sid) { + if dfa.is_match_state(sid) { + return Ok(true); + } else if dfa.is_dead_state(sid) { + return Ok(false); + } + } + } + Ok(dfa.is_match_state(dfa.next_eoi_state(sid))) + } +} +*/ + +/// A module that looks for word codepoints using regex-syntax's data tables. +#[cfg(all( + feature = "unicode-word-boundary", + feature = "syntax", + feature = "unicode-perl", +))] +mod is_word_char { + use regex_syntax::try_is_word_character; + + use crate::util::utf8; + + pub(super) fn check() -> Result<(), super::UnicodeWordBoundaryError> { + Ok(()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn fwd( + haystack: &[u8], + at: usize, + ) -> Result { + Ok(match utf8::decode(&haystack[at..]) { + None | Some(Err(_)) => false, + Some(Ok(ch)) => try_is_word_character(ch).expect( + "since unicode-word-boundary, syntax and unicode-perl \ + are all enabled, it is expected that \ + try_is_word_character succeeds", + ), + }) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn rev( + haystack: &[u8], + at: usize, + ) -> Result { + Ok(match utf8::decode_last(&haystack[..at]) { + None | Some(Err(_)) => false, + Some(Ok(ch)) => try_is_word_character(ch).expect( + "since unicode-word-boundary, syntax and unicode-perl \ + are all enabled, it is expected that \ + try_is_word_character succeeds", + ), + }) + } +} + +/// A module that looks for word codepoints using regex-automata's data tables +/// (which are only compiled when regex-syntax's tables aren't available). +/// +/// Note that the cfg should match the one in src/util/unicode_data/mod.rs for +/// perl_word. +#[cfg(all( + feature = "unicode-word-boundary", + not(all(feature = "syntax", feature = "unicode-perl")), +))] +mod is_word_char { + use crate::util::utf8; + + pub(super) fn check() -> Result<(), super::UnicodeWordBoundaryError> { + Ok(()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn fwd( + haystack: &[u8], + at: usize, + ) -> Result { + Ok(match utf8::decode(&haystack[at..]) { + None | Some(Err(_)) => false, + Some(Ok(ch)) => is_word_character(ch), + }) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn rev( + haystack: &[u8], + at: usize, + ) -> Result { + Ok(match utf8::decode_last(&haystack[..at]) { + None | Some(Err(_)) => false, + Some(Ok(ch)) => is_word_character(ch), + }) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_word_character(c: char) -> bool { + use crate::util::{unicode_data::perl_word::PERL_WORD, utf8}; + + if u8::try_from(c).map_or(false, utf8::is_word_byte) { + return true; + } + PERL_WORD + .binary_search_by(|&(start, end)| { + use core::cmp::Ordering; + + if start <= c && c <= end { + Ordering::Equal + } else if start > c { + Ordering::Greater + } else { + Ordering::Less + } + }) + .is_ok() + } +} + +/// A module that always returns an error if Unicode word boundaries are +/// disabled. When this feature is disabled, then regex-automata will not +/// include its own data tables even if regex-syntax is disabled. +#[cfg(not(feature = "unicode-word-boundary"))] +mod is_word_char { + pub(super) fn check() -> Result<(), super::UnicodeWordBoundaryError> { + Err(super::UnicodeWordBoundaryError::new()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn fwd( + _bytes: &[u8], + _at: usize, + ) -> Result { + Err(super::UnicodeWordBoundaryError::new()) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(super) fn rev( + _bytes: &[u8], + _at: usize, + ) -> Result { + Err(super::UnicodeWordBoundaryError::new()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! testlook { + ($look:expr, $haystack:expr, $at:expr) => { + LookMatcher::default().matches($look, $haystack.as_bytes(), $at) + }; + } + + #[test] + fn look_matches_start_line() { + let look = Look::StartLF; + + assert!(testlook!(look, "", 0)); + assert!(testlook!(look, "\n", 0)); + assert!(testlook!(look, "\n", 1)); + assert!(testlook!(look, "a", 0)); + assert!(testlook!(look, "\na", 1)); + + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a\na", 1)); + } + + #[test] + fn look_matches_end_line() { + let look = Look::EndLF; + + assert!(testlook!(look, "", 0)); + assert!(testlook!(look, "\n", 1)); + assert!(testlook!(look, "\na", 0)); + assert!(testlook!(look, "\na", 2)); + assert!(testlook!(look, "a\na", 1)); + + assert!(!testlook!(look, "a", 0)); + assert!(!testlook!(look, "\na", 1)); + assert!(!testlook!(look, "a\na", 0)); + assert!(!testlook!(look, "a\na", 2)); + } + + #[test] + fn look_matches_start_text() { + let look = Look::Start; + + assert!(testlook!(look, "", 0)); + assert!(testlook!(look, "\n", 0)); + assert!(testlook!(look, "a", 0)); + + assert!(!testlook!(look, "\n", 1)); + assert!(!testlook!(look, "\na", 1)); + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a\na", 1)); + } + + #[test] + fn look_matches_end_text() { + let look = Look::End; + + assert!(testlook!(look, "", 0)); + assert!(testlook!(look, "\n", 1)); + assert!(testlook!(look, "\na", 2)); + + assert!(!testlook!(look, "\na", 0)); + assert!(!testlook!(look, "a\na", 1)); + assert!(!testlook!(look, "a", 0)); + assert!(!testlook!(look, "\na", 1)); + assert!(!testlook!(look, "a\na", 0)); + assert!(!testlook!(look, "a\na", 2)); + } + + #[test] + #[cfg(all(not(miri), feature = "unicode-word-boundary"))] + fn look_matches_word_unicode() { + let look = Look::WordUnicode; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(testlook!(look, "a", 0)); + assert!(testlook!(look, "a", 1)); + assert!(testlook!(look, "a ", 1)); + assert!(testlook!(look, " a ", 1)); + assert!(testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. + assert!(testlook!(look, "𝛃", 0)); + assert!(testlook!(look, "𝛃", 4)); + assert!(testlook!(look, "𝛃 ", 4)); + assert!(testlook!(look, " 𝛃 ", 1)); + assert!(testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. + assert!(testlook!(look, "𝛃𐆀", 0)); + assert!(testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(!testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(!testlook!(look, "a ", 2)); + assert!(!testlook!(look, " a ", 0)); + assert!(!testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "𝛃 ", 5)); + assert!(!testlook!(look, " 𝛃 ", 0)); + assert!(!testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(!testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + fn look_matches_word_ascii() { + let look = Look::WordAscii; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(testlook!(look, "a", 0)); + assert!(testlook!(look, "a", 1)); + assert!(testlook!(look, "a ", 1)); + assert!(testlook!(look, " a ", 1)); + assert!(testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. Since this is + // an ASCII word boundary, none of these match. + assert!(!testlook!(look, "𝛃", 0)); + assert!(!testlook!(look, "𝛃", 4)); + assert!(!testlook!(look, "𝛃 ", 4)); + assert!(!testlook!(look, " 𝛃 ", 1)); + assert!(!testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. Again, since + // this is an ASCII word boundary, none of these match. + assert!(!testlook!(look, "𝛃𐆀", 0)); + assert!(!testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(!testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(!testlook!(look, "a ", 2)); + assert!(!testlook!(look, " a ", 0)); + assert!(!testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "𝛃 ", 5)); + assert!(!testlook!(look, " 𝛃 ", 0)); + assert!(!testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(!testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + #[cfg(all(not(miri), feature = "unicode-word-boundary"))] + fn look_matches_word_unicode_negate() { + let look = Look::WordUnicodeNegate; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(!testlook!(look, "a", 0)); + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a ", 1)); + assert!(!testlook!(look, " a ", 1)); + assert!(!testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃", 0)); + assert!(!testlook!(look, "𝛃", 4)); + assert!(!testlook!(look, "𝛃 ", 4)); + assert!(!testlook!(look, " 𝛃 ", 1)); + assert!(!testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 0)); + assert!(!testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(testlook!(look, "", 0)); + assert!(testlook!(look, "ab", 1)); + assert!(testlook!(look, "a ", 2)); + assert!(testlook!(look, " a ", 0)); + assert!(testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(testlook!(look, "𝛃b", 4)); + assert!(testlook!(look, "𝛃 ", 5)); + assert!(testlook!(look, " 𝛃 ", 0)); + assert!(testlook!(look, " 𝛃 ", 6)); + // These don't match because they could otherwise return an offset that + // splits the UTF-8 encoding of a codepoint. + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. These also don't + // match because they could otherwise return an offset that splits the + // UTF-8 encoding of a codepoint. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + // But this one does, since 𐆀 isn't a word codepoint, and 8 is the end + // of the haystack. So the "end" of the haystack isn't a word and 𐆀 + // isn't a word, thus, \B matches. + assert!(testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + fn look_matches_word_ascii_negate() { + let look = Look::WordAsciiNegate; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(!testlook!(look, "a", 0)); + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a ", 1)); + assert!(!testlook!(look, " a ", 1)); + assert!(!testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. Since this is + // an ASCII word boundary, none of these match. + assert!(testlook!(look, "𝛃", 0)); + assert!(testlook!(look, "𝛃", 4)); + assert!(testlook!(look, "𝛃 ", 4)); + assert!(testlook!(look, " 𝛃 ", 1)); + assert!(testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. Again, since + // this is an ASCII word boundary, none of these match. + assert!(testlook!(look, "𝛃𐆀", 0)); + assert!(testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(testlook!(look, "", 0)); + assert!(testlook!(look, "ab", 1)); + assert!(testlook!(look, "a ", 2)); + assert!(testlook!(look, " a ", 0)); + assert!(testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(testlook!(look, "𝛃 ", 5)); + assert!(testlook!(look, " 𝛃 ", 0)); + assert!(testlook!(look, " 𝛃 ", 6)); + assert!(testlook!(look, "𝛃", 1)); + assert!(testlook!(look, "𝛃", 2)); + assert!(testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(testlook!(look, "𝛃𐆀", 1)); + assert!(testlook!(look, "𝛃𐆀", 2)); + assert!(testlook!(look, "𝛃𐆀", 3)); + assert!(testlook!(look, "𝛃𐆀", 5)); + assert!(testlook!(look, "𝛃𐆀", 6)); + assert!(testlook!(look, "𝛃𐆀", 7)); + assert!(testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + fn look_matches_word_start_ascii() { + let look = Look::WordStartAscii; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(testlook!(look, "a", 0)); + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a ", 1)); + assert!(testlook!(look, " a ", 1)); + assert!(!testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. Since this is + // an ASCII word boundary, none of these match. + assert!(!testlook!(look, "𝛃", 0)); + assert!(!testlook!(look, "𝛃", 4)); + assert!(!testlook!(look, "𝛃 ", 4)); + assert!(!testlook!(look, " 𝛃 ", 1)); + assert!(!testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. Again, since + // this is an ASCII word boundary, none of these match. + assert!(!testlook!(look, "𝛃𐆀", 0)); + assert!(!testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(!testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(!testlook!(look, "a ", 2)); + assert!(!testlook!(look, " a ", 0)); + assert!(!testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "b𝛃", 1)); + assert!(!testlook!(look, "𝛃 ", 5)); + assert!(!testlook!(look, " 𝛃 ", 0)); + assert!(!testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(!testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + fn look_matches_word_end_ascii() { + let look = Look::WordEndAscii; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(!testlook!(look, "a", 0)); + assert!(testlook!(look, "a", 1)); + assert!(testlook!(look, "a ", 1)); + assert!(!testlook!(look, " a ", 1)); + assert!(testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. Since this is + // an ASCII word boundary, none of these match. + assert!(!testlook!(look, "𝛃", 0)); + assert!(!testlook!(look, "𝛃", 4)); + assert!(!testlook!(look, "𝛃 ", 4)); + assert!(!testlook!(look, " 𝛃 ", 1)); + assert!(!testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. Again, since + // this is an ASCII word boundary, none of these match. + assert!(!testlook!(look, "𝛃𐆀", 0)); + assert!(!testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(!testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(!testlook!(look, "a ", 2)); + assert!(!testlook!(look, " a ", 0)); + assert!(!testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(testlook!(look, "b𝛃", 1)); + assert!(!testlook!(look, "𝛃 ", 5)); + assert!(!testlook!(look, " 𝛃 ", 0)); + assert!(!testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(!testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + #[cfg(all(not(miri), feature = "unicode-word-boundary"))] + fn look_matches_word_start_unicode() { + let look = Look::WordStartUnicode; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(testlook!(look, "a", 0)); + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a ", 1)); + assert!(testlook!(look, " a ", 1)); + assert!(!testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. + assert!(testlook!(look, "𝛃", 0)); + assert!(!testlook!(look, "𝛃", 4)); + assert!(!testlook!(look, "𝛃 ", 4)); + assert!(testlook!(look, " 𝛃 ", 1)); + assert!(!testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. + assert!(testlook!(look, "𝛃𐆀", 0)); + assert!(!testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(!testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(!testlook!(look, "a ", 2)); + assert!(!testlook!(look, " a ", 0)); + assert!(!testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "b𝛃", 1)); + assert!(!testlook!(look, "𝛃 ", 5)); + assert!(!testlook!(look, " 𝛃 ", 0)); + assert!(!testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(!testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + #[cfg(all(not(miri), feature = "unicode-word-boundary"))] + fn look_matches_word_end_unicode() { + let look = Look::WordEndUnicode; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(!testlook!(look, "a", 0)); + assert!(testlook!(look, "a", 1)); + assert!(testlook!(look, "a ", 1)); + assert!(!testlook!(look, " a ", 1)); + assert!(testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃", 0)); + assert!(testlook!(look, "𝛃", 4)); + assert!(testlook!(look, "𝛃 ", 4)); + assert!(!testlook!(look, " 𝛃 ", 1)); + assert!(testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 0)); + assert!(testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(!testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(!testlook!(look, "a ", 2)); + assert!(!testlook!(look, " a ", 0)); + assert!(!testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "b𝛃", 1)); + assert!(!testlook!(look, "𝛃 ", 5)); + assert!(!testlook!(look, " 𝛃 ", 0)); + assert!(!testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(!testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + fn look_matches_word_start_half_ascii() { + let look = Look::WordStartHalfAscii; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(testlook!(look, "a", 0)); + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a ", 1)); + assert!(testlook!(look, " a ", 1)); + assert!(!testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. Since this is + // an ASCII word boundary, none of these match. + assert!(testlook!(look, "𝛃", 0)); + assert!(testlook!(look, "𝛃", 4)); + assert!(testlook!(look, "𝛃 ", 4)); + assert!(testlook!(look, " 𝛃 ", 1)); + assert!(testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. Again, since + // this is an ASCII word boundary, none of these match. + assert!(testlook!(look, "𝛃𐆀", 0)); + assert!(testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(testlook!(look, "a ", 2)); + assert!(testlook!(look, " a ", 0)); + assert!(testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "b𝛃", 1)); + assert!(testlook!(look, "𝛃 ", 5)); + assert!(testlook!(look, " 𝛃 ", 0)); + assert!(testlook!(look, " 𝛃 ", 6)); + assert!(testlook!(look, "𝛃", 1)); + assert!(testlook!(look, "𝛃", 2)); + assert!(testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(testlook!(look, "𝛃𐆀", 1)); + assert!(testlook!(look, "𝛃𐆀", 2)); + assert!(testlook!(look, "𝛃𐆀", 3)); + assert!(testlook!(look, "𝛃𐆀", 5)); + assert!(testlook!(look, "𝛃𐆀", 6)); + assert!(testlook!(look, "𝛃𐆀", 7)); + assert!(testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + fn look_matches_word_end_half_ascii() { + let look = Look::WordEndHalfAscii; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(!testlook!(look, "a", 0)); + assert!(testlook!(look, "a", 1)); + assert!(testlook!(look, "a ", 1)); + assert!(!testlook!(look, " a ", 1)); + assert!(testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. Since this is + // an ASCII word boundary, none of these match. + assert!(testlook!(look, "𝛃", 0)); + assert!(testlook!(look, "𝛃", 4)); + assert!(testlook!(look, "𝛃 ", 4)); + assert!(testlook!(look, " 𝛃 ", 1)); + assert!(testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. Again, since + // this is an ASCII word boundary, none of these match. + assert!(testlook!(look, "𝛃𐆀", 0)); + assert!(testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(testlook!(look, "a ", 2)); + assert!(testlook!(look, " a ", 0)); + assert!(testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(testlook!(look, "b𝛃", 1)); + assert!(testlook!(look, "𝛃 ", 5)); + assert!(testlook!(look, " 𝛃 ", 0)); + assert!(testlook!(look, " 𝛃 ", 6)); + assert!(testlook!(look, "𝛃", 1)); + assert!(testlook!(look, "𝛃", 2)); + assert!(testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(testlook!(look, "𝛃𐆀", 1)); + assert!(testlook!(look, "𝛃𐆀", 2)); + assert!(testlook!(look, "𝛃𐆀", 3)); + assert!(testlook!(look, "𝛃𐆀", 5)); + assert!(testlook!(look, "𝛃𐆀", 6)); + assert!(testlook!(look, "𝛃𐆀", 7)); + assert!(testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + #[cfg(all(not(miri), feature = "unicode-word-boundary"))] + fn look_matches_word_start_half_unicode() { + let look = Look::WordStartHalfUnicode; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(testlook!(look, "a", 0)); + assert!(!testlook!(look, "a", 1)); + assert!(!testlook!(look, "a ", 1)); + assert!(testlook!(look, " a ", 1)); + assert!(!testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. + assert!(testlook!(look, "𝛃", 0)); + assert!(!testlook!(look, "𝛃", 4)); + assert!(!testlook!(look, "𝛃 ", 4)); + assert!(testlook!(look, " 𝛃 ", 1)); + assert!(!testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. + assert!(testlook!(look, "𝛃𐆀", 0)); + assert!(!testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(testlook!(look, "a ", 2)); + assert!(testlook!(look, " a ", 0)); + assert!(testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "b𝛃", 1)); + assert!(testlook!(look, "𝛃 ", 5)); + assert!(testlook!(look, " 𝛃 ", 0)); + assert!(testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + #[cfg(all(not(miri), feature = "unicode-word-boundary"))] + fn look_matches_word_end_half_unicode() { + let look = Look::WordEndHalfUnicode; + + // \xF0\x9D\x9B\x83 = 𝛃 (in \w) + // \xF0\x90\x86\x80 = 𐆀 (not in \w) + + // Simple ASCII word boundaries. + assert!(!testlook!(look, "a", 0)); + assert!(testlook!(look, "a", 1)); + assert!(testlook!(look, "a ", 1)); + assert!(!testlook!(look, " a ", 1)); + assert!(testlook!(look, " a ", 2)); + + // Unicode word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃", 0)); + assert!(testlook!(look, "𝛃", 4)); + assert!(testlook!(look, "𝛃 ", 4)); + assert!(!testlook!(look, " 𝛃 ", 1)); + assert!(testlook!(look, " 𝛃 ", 5)); + + // Unicode word boundaries between non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 0)); + assert!(testlook!(look, "𝛃𐆀", 4)); + + // Non word boundaries for ASCII. + assert!(testlook!(look, "", 0)); + assert!(!testlook!(look, "ab", 1)); + assert!(testlook!(look, "a ", 2)); + assert!(testlook!(look, " a ", 0)); + assert!(testlook!(look, " a ", 3)); + + // Non word boundaries with a non-ASCII codepoint. + assert!(!testlook!(look, "𝛃b", 4)); + assert!(!testlook!(look, "b𝛃", 1)); + assert!(testlook!(look, "𝛃 ", 5)); + assert!(testlook!(look, " 𝛃 ", 0)); + assert!(testlook!(look, " 𝛃 ", 6)); + assert!(!testlook!(look, "𝛃", 1)); + assert!(!testlook!(look, "𝛃", 2)); + assert!(!testlook!(look, "𝛃", 3)); + + // Non word boundaries with non-ASCII codepoints. + assert!(!testlook!(look, "𝛃𐆀", 1)); + assert!(!testlook!(look, "𝛃𐆀", 2)); + assert!(!testlook!(look, "𝛃𐆀", 3)); + assert!(!testlook!(look, "𝛃𐆀", 5)); + assert!(!testlook!(look, "𝛃𐆀", 6)); + assert!(!testlook!(look, "𝛃𐆀", 7)); + assert!(testlook!(look, "𝛃𐆀", 8)); + } + + #[test] + fn look_set() { + let mut f = LookSet::default(); + assert!(!f.contains(Look::Start)); + assert!(!f.contains(Look::End)); + assert!(!f.contains(Look::StartLF)); + assert!(!f.contains(Look::EndLF)); + assert!(!f.contains(Look::WordUnicode)); + assert!(!f.contains(Look::WordUnicodeNegate)); + assert!(!f.contains(Look::WordAscii)); + assert!(!f.contains(Look::WordAsciiNegate)); + + f = f.insert(Look::Start); + assert!(f.contains(Look::Start)); + f = f.remove(Look::Start); + assert!(!f.contains(Look::Start)); + + f = f.insert(Look::End); + assert!(f.contains(Look::End)); + f = f.remove(Look::End); + assert!(!f.contains(Look::End)); + + f = f.insert(Look::StartLF); + assert!(f.contains(Look::StartLF)); + f = f.remove(Look::StartLF); + assert!(!f.contains(Look::StartLF)); + + f = f.insert(Look::EndLF); + assert!(f.contains(Look::EndLF)); + f = f.remove(Look::EndLF); + assert!(!f.contains(Look::EndLF)); + + f = f.insert(Look::StartCRLF); + assert!(f.contains(Look::StartCRLF)); + f = f.remove(Look::StartCRLF); + assert!(!f.contains(Look::StartCRLF)); + + f = f.insert(Look::EndCRLF); + assert!(f.contains(Look::EndCRLF)); + f = f.remove(Look::EndCRLF); + assert!(!f.contains(Look::EndCRLF)); + + f = f.insert(Look::WordUnicode); + assert!(f.contains(Look::WordUnicode)); + f = f.remove(Look::WordUnicode); + assert!(!f.contains(Look::WordUnicode)); + + f = f.insert(Look::WordUnicodeNegate); + assert!(f.contains(Look::WordUnicodeNegate)); + f = f.remove(Look::WordUnicodeNegate); + assert!(!f.contains(Look::WordUnicodeNegate)); + + f = f.insert(Look::WordAscii); + assert!(f.contains(Look::WordAscii)); + f = f.remove(Look::WordAscii); + assert!(!f.contains(Look::WordAscii)); + + f = f.insert(Look::WordAsciiNegate); + assert!(f.contains(Look::WordAsciiNegate)); + f = f.remove(Look::WordAsciiNegate); + assert!(!f.contains(Look::WordAsciiNegate)); + + f = f.insert(Look::WordStartAscii); + assert!(f.contains(Look::WordStartAscii)); + f = f.remove(Look::WordStartAscii); + assert!(!f.contains(Look::WordStartAscii)); + + f = f.insert(Look::WordEndAscii); + assert!(f.contains(Look::WordEndAscii)); + f = f.remove(Look::WordEndAscii); + assert!(!f.contains(Look::WordEndAscii)); + + f = f.insert(Look::WordStartUnicode); + assert!(f.contains(Look::WordStartUnicode)); + f = f.remove(Look::WordStartUnicode); + assert!(!f.contains(Look::WordStartUnicode)); + + f = f.insert(Look::WordEndUnicode); + assert!(f.contains(Look::WordEndUnicode)); + f = f.remove(Look::WordEndUnicode); + assert!(!f.contains(Look::WordEndUnicode)); + + f = f.insert(Look::WordStartHalfAscii); + assert!(f.contains(Look::WordStartHalfAscii)); + f = f.remove(Look::WordStartHalfAscii); + assert!(!f.contains(Look::WordStartHalfAscii)); + + f = f.insert(Look::WordEndHalfAscii); + assert!(f.contains(Look::WordEndHalfAscii)); + f = f.remove(Look::WordEndHalfAscii); + assert!(!f.contains(Look::WordEndHalfAscii)); + + f = f.insert(Look::WordStartHalfUnicode); + assert!(f.contains(Look::WordStartHalfUnicode)); + f = f.remove(Look::WordStartHalfUnicode); + assert!(!f.contains(Look::WordStartHalfUnicode)); + + f = f.insert(Look::WordEndHalfUnicode); + assert!(f.contains(Look::WordEndHalfUnicode)); + f = f.remove(Look::WordEndHalfUnicode); + assert!(!f.contains(Look::WordEndHalfUnicode)); + } + + #[test] + fn look_set_iter() { + let set = LookSet::empty(); + assert_eq!(0, set.iter().count()); + + let set = LookSet::full(); + assert_eq!(18, set.iter().count()); + + let set = + LookSet::empty().insert(Look::StartLF).insert(Look::WordUnicode); + assert_eq!(2, set.iter().count()); + + let set = LookSet::empty().insert(Look::StartLF); + assert_eq!(1, set.iter().count()); + + let set = LookSet::empty().insert(Look::WordAsciiNegate); + assert_eq!(1, set.iter().count()); + + let set = LookSet::empty().insert(Look::WordEndHalfUnicode); + assert_eq!(1, set.iter().count()); + } + + #[test] + #[cfg(feature = "alloc")] + fn look_set_debug() { + let res = alloc::format!("{:?}", LookSet::empty()); + assert_eq!("∅", res); + let res = alloc::format!("{:?}", LookSet::full()); + assert_eq!("Az^$rRbB𝛃𝚩<>〈〉◁▷◀▶", res); + } +} diff --git a/vendor/regex-automata/src/util/memchr.rs b/vendor/regex-automata/src/util/memchr.rs new file mode 100644 index 0000000..a2cbb07 --- /dev/null +++ b/vendor/regex-automata/src/util/memchr.rs @@ -0,0 +1,93 @@ +/*! +This module defines simple wrapper routines for the memchr functions from the +`memchr` crate. Basically, when the `memchr` crate is available, we use it, +otherwise we use a naive implementation which is still pretty fast. +*/ + +pub(crate) use self::inner::*; + +#[cfg(feature = "perf-literal-substring")] +pub(super) mod inner { + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memchr(n1: u8, haystack: &[u8]) -> Option { + memchr::memchr(n1, haystack) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { + memchr::memchr2(n1, n2, haystack) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], + ) -> Option { + memchr::memchr3(n1, n2, n3, haystack) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memrchr(n1: u8, haystack: &[u8]) -> Option { + memchr::memrchr(n1, haystack) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { + memchr::memrchr2(n1, n2, haystack) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memrchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], + ) -> Option { + memchr::memrchr3(n1, n2, n3, haystack) + } +} + +#[cfg(not(feature = "perf-literal-substring"))] +pub(super) mod inner { + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memchr(n1: u8, haystack: &[u8]) -> Option { + haystack.iter().position(|&b| b == n1) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { + haystack.iter().position(|&b| b == n1 || b == n2) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], + ) -> Option { + haystack.iter().position(|&b| b == n1 || b == n2 || b == n3) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memrchr(n1: u8, haystack: &[u8]) -> Option { + haystack.iter().rposition(|&b| b == n1) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option { + haystack.iter().rposition(|&b| b == n1 || b == n2) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn memrchr3( + n1: u8, + n2: u8, + n3: u8, + haystack: &[u8], + ) -> Option { + haystack.iter().rposition(|&b| b == n1 || b == n2 || b == n3) + } +} diff --git a/vendor/regex-automata/src/util/mod.rs b/vendor/regex-automata/src/util/mod.rs new file mode 100644 index 0000000..b3eef64 --- /dev/null +++ b/vendor/regex-automata/src/util/mod.rs @@ -0,0 +1,57 @@ +/*! +A collection of modules that provide APIs that are useful across many regex +engines. + +While one should explore the sub-modules directly to get a sense of what's +there, here are some highlights that tie the sub-modules to higher level +use cases: + +* `alphabet` contains APIs that are useful if you're doing low level things +with the DFAs in this crate. For example, implementing determinization or +walking its state graph directly. +* `captures` contains APIs for dealing with capture group matches and their +mapping to "slots" used inside an NFA graph. This is also where you can find +iterators over capture group names. +* `escape` contains types for pretty-printing raw byte slices as strings. +* `iter` contains API helpers for writing regex iterators. +* `lazy` contains a no-std and no-alloc variant of `lazy_static!` and +`once_cell`. +* `look` contains APIs for matching and configuring look-around assertions. +* `pool` provides a way to reuse mutable memory allocated in a thread safe +manner. +* `prefilter` provides APIs for building prefilters and using them in searches. +* `primitives` are what you might use if you're doing lower level work on +automata, such as walking an NFA state graph. +* `syntax` provides some higher level convenience functions for interacting +with the `regex-syntax` crate. +* `wire` is useful if you're working with DFA serialization. +*/ + +pub mod alphabet; +#[cfg(feature = "alloc")] +pub mod captures; +pub mod escape; +#[cfg(feature = "alloc")] +pub mod interpolate; +pub mod iter; +pub mod lazy; +pub mod look; +#[cfg(feature = "alloc")] +pub mod pool; +pub mod prefilter; +pub mod primitives; +pub mod start; +#[cfg(feature = "syntax")] +pub mod syntax; +pub mod wire; + +#[cfg(any(feature = "dfa-build", feature = "hybrid"))] +pub(crate) mod determinize; +pub(crate) mod empty; +pub(crate) mod int; +pub(crate) mod memchr; +pub(crate) mod search; +#[cfg(feature = "alloc")] +pub(crate) mod sparse_set; +pub(crate) mod unicode_data; +pub(crate) mod utf8; diff --git a/vendor/regex-automata/src/util/pool.rs b/vendor/regex-automata/src/util/pool.rs new file mode 100644 index 0000000..d90d4ec --- /dev/null +++ b/vendor/regex-automata/src/util/pool.rs @@ -0,0 +1,1199 @@ +// This module provides a relatively simple thread-safe pool of reusable +// objects. For the most part, it's implemented by a stack represented by a +// Mutex>. It has one small trick: because unlocking a mutex is somewhat +// costly, in the case where a pool is accessed by the first thread that tried +// to get a value, we bypass the mutex. Here are some benchmarks showing the +// difference. +// +// 2022-10-15: These benchmarks are from the old regex crate and they aren't +// easy to reproduce because some rely on older implementations of Pool that +// are no longer around. I've left the results here for posterity, but any +// enterprising individual should feel encouraged to re-litigate the way Pool +// works. I am not at all certain it is the best approach. +// +// 1) misc::anchored_literal_long_non_match 21 (18571 MB/s) +// 2) misc::anchored_literal_long_non_match 107 (3644 MB/s) +// 3) misc::anchored_literal_long_non_match 45 (8666 MB/s) +// 4) misc::anchored_literal_long_non_match 19 (20526 MB/s) +// +// (1) represents our baseline: the master branch at the time of writing when +// using the 'thread_local' crate to implement the pool below. +// +// (2) represents a naive pool implemented completely via Mutex>. There +// is no special trick for bypassing the mutex. +// +// (3) is the same as (2), except it uses Mutex>>. It is twice as +// fast because a Box is much smaller than the T we use with a Pool in this +// crate. So pushing and popping a Box from a Vec is quite a bit faster +// than for T. +// +// (4) is the same as (3), but with the trick for bypassing the mutex in the +// case of the first-to-get thread. +// +// Why move off of thread_local? Even though (4) is a hair faster than (1) +// above, this was not the main goal. The main goal was to move off of +// thread_local and find a way to *simply* re-capture some of its speed for +// regex's specific case. So again, why move off of it? The *primary* reason is +// because of memory leaks. See https://github.com/rust-lang/regex/issues/362 +// for example. (Why do I want it to be simple? Well, I suppose what I mean is, +// "use as much safe code as possible to minimize risk and be as sure as I can +// be that it is correct.") +// +// My guess is that the thread_local design is probably not appropriate for +// regex since its memory usage scales to the number of active threads that +// have used a regex, where as the pool below scales to the number of threads +// that simultaneously use a regex. While neither case permits contraction, +// since we own the pool data structure below, we can add contraction if a +// clear use case pops up in the wild. More pressingly though, it seems that +// there are at least some use case patterns where one might have many threads +// sitting around that might have used a regex at one point. While thread_local +// does try to reuse space previously used by a thread that has since stopped, +// its maximal memory usage still scales with the total number of active +// threads. In contrast, the pool below scales with the total number of threads +// *simultaneously* using the pool. The hope is that this uses less memory +// overall. And if it doesn't, we can hopefully tune it somehow. +// +// It seems that these sort of conditions happen frequently +// in FFI inside of other more "managed" languages. This was +// mentioned in the issue linked above, and also mentioned here: +// https://github.com/BurntSushi/rure-go/issues/3. And in particular, users +// confirm that disabling the use of thread_local resolves the leak. +// +// There were other weaker reasons for moving off of thread_local as well. +// Namely, at the time, I was looking to reduce dependencies. And for something +// like regex, maintenance can be simpler when we own the full dependency tree. +// +// Note that I am not entirely happy with this pool. It has some subtle +// implementation details and is overall still observable (even with the +// thread owner optimization) in benchmarks. If someone wants to take a crack +// at building something better, please file an issue. Even if it means a +// different API. The API exposed by this pool is not the minimal thing that +// something like a 'Regex' actually needs. It could adapt to, for example, +// an API more like what is found in the 'thread_local' crate. However, we do +// really need to support the no-std alloc-only context, or else the regex +// crate wouldn't be able to support no-std alloc-only. However, I'm generally +// okay with making the alloc-only context slower (as it is here), although I +// do find it unfortunate. + +/*! +A thread safe memory pool. + +The principal type in this module is a [`Pool`]. It main use case is for +holding a thread safe collection of mutable scratch spaces (usually called +`Cache` in this crate) that regex engines need to execute a search. This then +permits sharing the same read-only regex object across multiple threads while +having a quick way of reusing scratch space in a thread safe way. This avoids +needing to re-create the scratch space for every search, which could wind up +being quite expensive. +*/ + +/// A thread safe pool that works in an `alloc`-only context. +/// +/// Getting a value out comes with a guard. When that guard is dropped, the +/// value is automatically put back in the pool. The guard provides both a +/// `Deref` and a `DerefMut` implementation for easy access to an underlying +/// `T`. +/// +/// A `Pool` impls `Sync` when `T` is `Send` (even if `T` is not `Sync`). This +/// is possible because a pool is guaranteed to provide a value to exactly one +/// thread at any time. +/// +/// Currently, a pool never contracts in size. Its size is proportional to the +/// maximum number of simultaneous uses. This may change in the future. +/// +/// A `Pool` is a particularly useful data structure for this crate because +/// many of the regex engines require a mutable "cache" in order to execute +/// a search. Since regexes themselves tend to be global, the problem is then: +/// how do you get a mutable cache to execute a search? You could: +/// +/// 1. Use a `thread_local!`, which requires the standard library and requires +/// that the regex pattern be statically known. +/// 2. Use a `Pool`. +/// 3. Make the cache an explicit dependency in your code and pass it around. +/// 4. Put the cache state in a `Mutex`, but this means only one search can +/// execute at a time. +/// 5. Create a new cache for every search. +/// +/// A `thread_local!` is perhaps the best choice if it works for your use case. +/// Putting the cache in a mutex or creating a new cache for every search are +/// perhaps the worst choices. Of the remaining two choices, whether you use +/// this `Pool` or thread through a cache explicitly in your code is a matter +/// of taste and depends on your code architecture. +/// +/// # Warning: may use a spin lock +/// +/// When this crate is compiled _without_ the `std` feature, then this type +/// may used a spin lock internally. This can have subtle effects that may +/// be undesirable. See [Spinlocks Considered Harmful][spinharm] for a more +/// thorough treatment of this topic. +/// +/// [spinharm]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html +/// +/// # Example +/// +/// This example shows how to share a single hybrid regex among multiple +/// threads, while also safely getting exclusive access to a hybrid's +/// [`Cache`](crate::hybrid::regex::Cache) without preventing other searches +/// from running while your thread uses the `Cache`. +/// +/// ``` +/// use regex_automata::{ +/// hybrid::regex::{Cache, Regex}, +/// util::{lazy::Lazy, pool::Pool}, +/// Match, +/// }; +/// +/// static RE: Lazy = +/// Lazy::new(|| Regex::new("foo[0-9]+bar").unwrap()); +/// static CACHE: Lazy> = +/// Lazy::new(|| Pool::new(|| RE.create_cache())); +/// +/// let expected = Some(Match::must(0, 3..14)); +/// assert_eq!(expected, RE.find(&mut CACHE.get(), b"zzzfoo12345barzzz")); +/// ``` +pub struct Pool T>(alloc::boxed::Box>); + +impl Pool { + /// Create a new pool. The given closure is used to create values in + /// the pool when necessary. + pub fn new(create: F) -> Pool { + Pool(alloc::boxed::Box::new(inner::Pool::new(create))) + } +} + +impl T> Pool { + /// Get a value from the pool. The caller is guaranteed to have + /// exclusive access to the given value. Namely, it is guaranteed that + /// this will never return a value that was returned by another call to + /// `get` but was not put back into the pool. + /// + /// When the guard goes out of scope and its destructor is called, then + /// it will automatically be put back into the pool. Alternatively, + /// [`PoolGuard::put`] may be used to explicitly put it back in the pool + /// without relying on its destructor. + /// + /// Note that there is no guarantee provided about which value in the + /// pool is returned. That is, calling get, dropping the guard (causing + /// the value to go back into the pool) and then calling get again is + /// *not* guaranteed to return the same value received in the first `get` + /// call. + #[inline] + pub fn get(&self) -> PoolGuard<'_, T, F> { + PoolGuard(self.0.get()) + } +} + +impl core::fmt::Debug for Pool { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_tuple("Pool").field(&self.0).finish() + } +} + +/// A guard that is returned when a caller requests a value from the pool. +/// +/// The purpose of the guard is to use RAII to automatically put the value +/// back in the pool once it's dropped. +pub struct PoolGuard<'a, T: Send, F: Fn() -> T>(inner::PoolGuard<'a, T, F>); + +impl<'a, T: Send, F: Fn() -> T> PoolGuard<'a, T, F> { + /// Consumes this guard and puts it back into the pool. + /// + /// This circumvents the guard's `Drop` implementation. This can be useful + /// in circumstances where the automatic `Drop` results in poorer codegen, + /// such as calling non-inlined functions. + #[inline] + pub fn put(this: PoolGuard<'_, T, F>) { + inner::PoolGuard::put(this.0); + } +} + +impl<'a, T: Send, F: Fn() -> T> core::ops::Deref for PoolGuard<'a, T, F> { + type Target = T; + + #[inline] + fn deref(&self) -> &T { + self.0.value() + } +} + +impl<'a, T: Send, F: Fn() -> T> core::ops::DerefMut for PoolGuard<'a, T, F> { + #[inline] + fn deref_mut(&mut self) -> &mut T { + self.0.value_mut() + } +} + +impl<'a, T: Send + core::fmt::Debug, F: Fn() -> T> core::fmt::Debug + for PoolGuard<'a, T, F> +{ + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_tuple("PoolGuard").field(&self.0).finish() + } +} + +#[cfg(feature = "std")] +mod inner { + use core::{ + cell::UnsafeCell, + panic::{RefUnwindSafe, UnwindSafe}, + sync::atomic::{AtomicUsize, Ordering}, + }; + + use alloc::{boxed::Box, vec, vec::Vec}; + + use std::{sync::Mutex, thread_local}; + + /// An atomic counter used to allocate thread IDs. + /// + /// We specifically start our counter at 3 so that we can use the values + /// less than it as sentinels. + static COUNTER: AtomicUsize = AtomicUsize::new(3); + + /// A thread ID indicating that there is no owner. This is the initial + /// state of a pool. Once a pool has an owner, there is no way to change + /// it. + static THREAD_ID_UNOWNED: usize = 0; + + /// A thread ID indicating that the special owner value is in use and not + /// available. This state is useful for avoiding a case where the owner + /// of a pool calls `get` before putting the result of a previous `get` + /// call back into the pool. + static THREAD_ID_INUSE: usize = 1; + + /// This sentinel is used to indicate that a guard has already been dropped + /// and should not be re-dropped. We use this because our drop code can be + /// called outside of Drop and thus there could be a bug in the internal + /// implementation that results in trying to put the same guard back into + /// the same pool multiple times, and *that* could result in UB if we + /// didn't mark the guard as already having been put back in the pool. + /// + /// So this isn't strictly necessary, but this let's us define some + /// routines as safe (like PoolGuard::put_imp) that we couldn't otherwise + /// do. + static THREAD_ID_DROPPED: usize = 2; + + /// The number of stacks we use inside of the pool. These are only used for + /// non-owners. That is, these represent the "slow" path. + /// + /// In the original implementation of this pool, we only used a single + /// stack. While this might be okay for a couple threads, the prevalence of + /// 32, 64 and even 128 core CPUs has made it untenable. The contention + /// such an environment introduces when threads are doing a lot of searches + /// on short haystacks (a not uncommon use case) is palpable and leads to + /// huge slowdowns. + /// + /// This constant reflects a change from using one stack to the number of + /// stacks that this constant is set to. The stack for a particular thread + /// is simply chosen by `thread_id % MAX_POOL_STACKS`. The idea behind + /// this setup is that there should be a good chance that accesses to the + /// pool will be distributed over several stacks instead of all of them + /// converging to one. + /// + /// This is not a particularly smart or dynamic strategy. Fixing this to a + /// specific number has at least two downsides. First is that it will help, + /// say, an 8 core CPU more than it will a 128 core CPU. (But, crucially, + /// it will still help the 128 core case.) Second is that this may wind + /// up being a little wasteful with respect to memory usage. Namely, if a + /// regex is used on one thread and then moved to another thread, then it + /// could result in creating a new copy of the data in the pool even though + /// only one is actually needed. + /// + /// And that memory usage bit is why this is set to 8 and not, say, 64. + /// Keeping it at 8 limits, to an extent, how much unnecessary memory can + /// be allocated. + /// + /// In an ideal world, we'd be able to have something like this: + /// + /// * Grow the number of stacks as the number of concurrent callers + /// increases. I spent a little time trying this, but even just adding an + /// atomic addition/subtraction for each pop/push for tracking concurrent + /// callers led to a big perf hit. Since even more work would seemingly be + /// required than just an addition/subtraction, I abandoned this approach. + /// * The maximum amount of memory used should scale with respect to the + /// number of concurrent callers and *not* the total number of existing + /// threads. This is primarily why the `thread_local` crate isn't used, as + /// as some environments spin up a lot of threads. This led to multiple + /// reports of extremely high memory usage (often described as memory + /// leaks). + /// * Even more ideally, the pool should contract in size. That is, it + /// should grow with bursts and then shrink. But this is a pretty thorny + /// issue to tackle and it might be better to just not. + /// * It would be nice to explore the use of, say, a lock-free stack + /// instead of using a mutex to guard a `Vec` that is ultimately just + /// treated as a stack. The main thing preventing me from exploring this + /// is the ABA problem. The `crossbeam` crate has tools for dealing with + /// this sort of problem (via its epoch based memory reclamation strategy), + /// but I can't justify bringing in all of `crossbeam` as a dependency of + /// `regex` for this. + /// + /// See this issue for more context and discussion: + /// https://github.com/rust-lang/regex/issues/934 + const MAX_POOL_STACKS: usize = 8; + + thread_local!( + /// A thread local used to assign an ID to a thread. + static THREAD_ID: usize = { + let next = COUNTER.fetch_add(1, Ordering::Relaxed); + // SAFETY: We cannot permit the reuse of thread IDs since reusing a + // thread ID might result in more than one thread "owning" a pool, + // and thus, permit accessing a mutable value from multiple threads + // simultaneously without synchronization. The intent of this panic + // is to be a sanity check. It is not expected that the thread ID + // space will actually be exhausted in practice. Even on a 32-bit + // system, it would require spawning 2^32 threads (although they + // wouldn't all need to run simultaneously, so it is in theory + // possible). + // + // This checks that the counter never wraps around, since atomic + // addition wraps around on overflow. + if next == 0 { + panic!("regex: thread ID allocation space exhausted"); + } + next + }; + ); + + /// This puts each stack in the pool below into its own cache line. This is + /// an absolutely critical optimization that tends to have the most impact + /// in high contention workloads. Without forcing each mutex protected + /// into its own cache line, high contention exacerbates the performance + /// problem by causing "false sharing." By putting each mutex in its own + /// cache-line, we avoid the false sharing problem and the affects of + /// contention are greatly reduced. + #[derive(Debug)] + #[repr(C, align(64))] + struct CacheLine(T); + + /// A thread safe pool utilizing std-only features. + /// + /// The main difference between this and the simplistic alloc-only pool is + /// the use of std::sync::Mutex and an "owner thread" optimization that + /// makes accesses by the owner of a pool faster than all other threads. + /// This makes the common case of running a regex within a single thread + /// faster by avoiding mutex unlocking. + pub(super) struct Pool { + /// A function to create more T values when stack is empty and a caller + /// has requested a T. + create: F, + /// Multiple stacks of T values to hand out. These are used when a Pool + /// is accessed by a thread that didn't create it. + /// + /// Conceptually this is `Mutex>>`, but sharded out to make + /// it scale better under high contention work-loads. We index into + /// this sequence via `thread_id % stacks.len()`. + stacks: Vec>>>>, + /// The ID of the thread that owns this pool. The owner is the thread + /// that makes the first call to 'get'. When the owner calls 'get', it + /// gets 'owner_val' directly instead of returning a T from 'stack'. + /// See comments elsewhere for details, but this is intended to be an + /// optimization for the common case that makes getting a T faster. + /// + /// It is initialized to a value of zero (an impossible thread ID) as a + /// sentinel to indicate that it is unowned. + owner: AtomicUsize, + /// A value to return when the caller is in the same thread that + /// first called `Pool::get`. + /// + /// This is set to None when a Pool is first created, and set to Some + /// once the first thread calls Pool::get. + owner_val: UnsafeCell>, + } + + // SAFETY: Since we want to use a Pool from multiple threads simultaneously + // behind an Arc, we need for it to be Sync. In cases where T is sync, + // Pool would be Sync. However, since we use a Pool to store mutable + // scratch space, we wind up using a T that has interior mutability and is + // thus itself not Sync. So what we *really* want is for our Pool to by + // Sync even when T is not Sync (but is at least Send). + // + // The only non-sync aspect of a Pool is its 'owner_val' field, which is + // used to implement faster access to a pool value in the common case of + // a pool being accessed in the same thread in which it was created. The + // 'stack' field is also shared, but a Mutex where T: Send is already + // Sync. So we only need to worry about 'owner_val'. + // + // The key is to guarantee that 'owner_val' can only ever be accessed from + // one thread. In our implementation below, we guarantee this by only + // returning the 'owner_val' when the ID of the current thread matches the + // ID of the thread that first called 'Pool::get'. Since this can only ever + // be one thread, it follows that only one thread can access 'owner_val' at + // any point in time. Thus, it is safe to declare that Pool is Sync when + // T is Send. + // + // If there is a way to achieve our performance goals using safe code, then + // I would very much welcome a patch. As it stands, the implementation + // below tries to balance safety with performance. The case where a Regex + // is used from multiple threads simultaneously will suffer a bit since + // getting a value out of the pool will require unlocking a mutex. + // + // We require `F: Send + Sync` because we call `F` at any point on demand, + // potentially from multiple threads simultaneously. + unsafe impl Sync for Pool {} + + // If T is UnwindSafe, then since we provide exclusive access to any + // particular value in the pool, the pool should therefore also be + // considered UnwindSafe. + // + // We require `F: UnwindSafe + RefUnwindSafe` because we call `F` at any + // point on demand, so it needs to be unwind safe on both dimensions for + // the entire Pool to be unwind safe. + impl UnwindSafe for Pool {} + + // If T is UnwindSafe, then since we provide exclusive access to any + // particular value in the pool, the pool should therefore also be + // considered RefUnwindSafe. + // + // We require `F: UnwindSafe + RefUnwindSafe` because we call `F` at any + // point on demand, so it needs to be unwind safe on both dimensions for + // the entire Pool to be unwind safe. + impl RefUnwindSafe + for Pool + { + } + + impl Pool { + /// Create a new pool. The given closure is used to create values in + /// the pool when necessary. + pub(super) fn new(create: F) -> Pool { + // FIXME: Now that we require 1.65+, Mutex::new is available as + // const... So we can almost mark this function as const. But of + // course, we're creating a Vec of stacks below (we didn't when I + // originally wrote this code). It seems like the best way to work + // around this would be to use a `[Stack; MAX_POOL_STACKS]` instead + // of a `Vec`. I refrained from making this change at time + // of writing (2023/10/08) because I was making a lot of other + // changes at the same time and wanted to do this more carefully. + // Namely, because of the cache line optimization, that `[Stack; + // MAX_POOL_STACKS]` would be quite big. It's unclear how bad (if + // at all) that would be. + // + // Another choice would be to lazily allocate the stacks, but... + // I'm not so sure about that. Seems like a fair bit of complexity? + // + // Maybe there's a simple solution I'm missing. + // + // ... OK, I tried to fix this. First, I did it by putting `stacks` + // in an `UnsafeCell` and using a `Once` to lazily initialize it. + // I benchmarked it and everything looked okay. I then made this + // function `const` and thought I was just about done. But the + // public pool type wraps its inner pool in a `Box` to keep its + // size down. Blech. + // + // So then I thought that I could push the box down into this + // type (and leave the non-std version unboxed) and use the same + // `UnsafeCell` technique to lazily initialize it. This has the + // downside of the `Once` now needing to get hit in the owner fast + // path, but maybe that's OK? However, I then realized that we can + // only lazily initialize `stacks`, `owner` and `owner_val`. The + // `create` function needs to be put somewhere outside of the box. + // So now the pool is a `Box`, `Once` and a function. Now we're + // starting to defeat the point of boxing in the first place. So I + // backed out that change too. + // + // Back to square one. I maybe we just don't make a pool's + // constructor const and live with it. It's probably not a huge + // deal. + let mut stacks = Vec::with_capacity(MAX_POOL_STACKS); + for _ in 0..stacks.capacity() { + stacks.push(CacheLine(Mutex::new(vec![]))); + } + let owner = AtomicUsize::new(THREAD_ID_UNOWNED); + let owner_val = UnsafeCell::new(None); // init'd on first access + Pool { create, stacks, owner, owner_val } + } + } + + impl T> Pool { + /// Get a value from the pool. This may block if another thread is also + /// attempting to retrieve a value from the pool. + #[inline] + pub(super) fn get(&self) -> PoolGuard<'_, T, F> { + // Our fast path checks if the caller is the thread that "owns" + // this pool. Or stated differently, whether it is the first thread + // that tried to extract a value from the pool. If it is, then we + // can return a T to the caller without going through a mutex. + // + // SAFETY: We must guarantee that only one thread gets access + // to this value. Since a thread is uniquely identified by the + // THREAD_ID thread local, it follows that if the caller's thread + // ID is equal to the owner, then only one thread may receive this + // value. This is also why we can get away with what looks like a + // racy load and a store. We know that if 'owner == caller', then + // only one thread can be here, so we don't need to worry about any + // other thread setting the owner to something else. + let caller = THREAD_ID.with(|id| *id); + let owner = self.owner.load(Ordering::Acquire); + if caller == owner { + // N.B. We could also do a CAS here instead of a load/store, + // but ad hoc benchmarking suggests it is slower. And a lot + // slower in the case where `get_slow` is common. + self.owner.store(THREAD_ID_INUSE, Ordering::Release); + return self.guard_owned(caller); + } + self.get_slow(caller, owner) + } + + /// This is the "slow" version that goes through a mutex to pop an + /// allocated value off a stack to return to the caller. (Or, if the + /// stack is empty, a new value is created.) + /// + /// If the pool has no owner, then this will set the owner. + #[cold] + fn get_slow( + &self, + caller: usize, + owner: usize, + ) -> PoolGuard<'_, T, F> { + if owner == THREAD_ID_UNOWNED { + // This sentinel means this pool is not yet owned. We try to + // atomically set the owner. If we do, then this thread becomes + // the owner and we can return a guard that represents the + // special T for the owner. + // + // Note that we set the owner to a different sentinel that + // indicates that the owned value is in use. The owner ID will + // get updated to the actual ID of this thread once the guard + // returned by this function is put back into the pool. + let res = self.owner.compare_exchange( + THREAD_ID_UNOWNED, + THREAD_ID_INUSE, + Ordering::AcqRel, + Ordering::Acquire, + ); + if res.is_ok() { + // SAFETY: A successful CAS above implies this thread is + // the owner and that this is the only such thread that + // can reach here. Thus, there is no data race. + unsafe { + *self.owner_val.get() = Some((self.create)()); + } + return self.guard_owned(caller); + } + } + let stack_id = caller % self.stacks.len(); + // We try to acquire exclusive access to this thread's stack, and + // if so, grab a value from it if we can. We put this in a loop so + // that it's easy to tweak and experiment with a different number + // of tries. In the end, I couldn't see anything obviously better + // than one attempt in ad hoc testing. + for _ in 0..1 { + let mut stack = match self.stacks[stack_id].0.try_lock() { + Err(_) => continue, + Ok(stack) => stack, + }; + if let Some(value) = stack.pop() { + return self.guard_stack(value); + } + // Unlock the mutex guarding the stack before creating a fresh + // value since we no longer need the stack. + drop(stack); + let value = Box::new((self.create)()); + return self.guard_stack(value); + } + // We're only here if we could get access to our stack, so just + // create a new value. This seems like it could be wasteful, but + // waiting for exclusive access to a stack when there's high + // contention is brutal for perf. + self.guard_stack_transient(Box::new((self.create)())) + } + + /// Puts a value back into the pool. Callers don't need to call this. + /// Once the guard that's returned by 'get' is dropped, it is put back + /// into the pool automatically. + #[inline] + fn put_value(&self, value: Box) { + let caller = THREAD_ID.with(|id| *id); + let stack_id = caller % self.stacks.len(); + // As with trying to pop a value from this thread's stack, we + // merely attempt to get access to push this value back on the + // stack. If there's too much contention, we just give up and throw + // the value away. + // + // Interestingly, in ad hoc benchmarking, it is beneficial to + // attempt to push the value back more than once, unlike when + // popping the value. I don't have a good theory for why this is. + // I guess if we drop too many values then that winds up forcing + // the pop operation to create new fresh values and thus leads to + // less reuse. There's definitely a balancing act here. + for _ in 0..10 { + let mut stack = match self.stacks[stack_id].0.try_lock() { + Err(_) => continue, + Ok(stack) => stack, + }; + stack.push(value); + return; + } + } + + /// Create a guard that represents the special owned T. + #[inline] + fn guard_owned(&self, caller: usize) -> PoolGuard<'_, T, F> { + PoolGuard { pool: self, value: Err(caller), discard: false } + } + + /// Create a guard that contains a value from the pool's stack. + #[inline] + fn guard_stack(&self, value: Box) -> PoolGuard<'_, T, F> { + PoolGuard { pool: self, value: Ok(value), discard: false } + } + + /// Create a guard that contains a value from the pool's stack with an + /// instruction to throw away the value instead of putting it back + /// into the pool. + #[inline] + fn guard_stack_transient(&self, value: Box) -> PoolGuard<'_, T, F> { + PoolGuard { pool: self, value: Ok(value), discard: true } + } + } + + impl core::fmt::Debug for Pool { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("Pool") + .field("stacks", &self.stacks) + .field("owner", &self.owner) + .field("owner_val", &self.owner_val) + .finish() + } + } + + /// A guard that is returned when a caller requests a value from the pool. + pub(super) struct PoolGuard<'a, T: Send, F: Fn() -> T> { + /// The pool that this guard is attached to. + pool: &'a Pool, + /// This is Err when the guard represents the special "owned" value. + /// In which case, the value is retrieved from 'pool.owner_val'. And + /// in the special case of `Err(THREAD_ID_DROPPED)`, it means the + /// guard has been put back into the pool and should no longer be used. + value: Result, usize>, + /// When true, the value should be discarded instead of being pushed + /// back into the pool. We tend to use this under high contention, and + /// this allows us to avoid inflating the size of the pool. (Because + /// under contention, we tend to create more values instead of waiting + /// for access to a stack of existing values.) + discard: bool, + } + + impl<'a, T: Send, F: Fn() -> T> PoolGuard<'a, T, F> { + /// Return the underlying value. + #[inline] + pub(super) fn value(&self) -> &T { + match self.value { + Ok(ref v) => &**v, + // SAFETY: This is safe because the only way a PoolGuard gets + // created for self.value=Err is when the current thread + // corresponds to the owning thread, of which there can only + // be one. Thus, we are guaranteed to be providing exclusive + // access here which makes this safe. + // + // Also, since 'owner_val' is guaranteed to be initialized + // before an owned PoolGuard is created, the unchecked unwrap + // is safe. + Err(id) => unsafe { + // This assert is *not* necessary for safety, since we + // should never be here if the guard had been put back into + // the pool. This is a sanity check to make sure we didn't + // break an internal invariant. + debug_assert_ne!(THREAD_ID_DROPPED, id); + (*self.pool.owner_val.get()).as_ref().unwrap_unchecked() + }, + } + } + + /// Return the underlying value as a mutable borrow. + #[inline] + pub(super) fn value_mut(&mut self) -> &mut T { + match self.value { + Ok(ref mut v) => &mut **v, + // SAFETY: This is safe because the only way a PoolGuard gets + // created for self.value=None is when the current thread + // corresponds to the owning thread, of which there can only + // be one. Thus, we are guaranteed to be providing exclusive + // access here which makes this safe. + // + // Also, since 'owner_val' is guaranteed to be initialized + // before an owned PoolGuard is created, the unwrap_unchecked + // is safe. + Err(id) => unsafe { + // This assert is *not* necessary for safety, since we + // should never be here if the guard had been put back into + // the pool. This is a sanity check to make sure we didn't + // break an internal invariant. + debug_assert_ne!(THREAD_ID_DROPPED, id); + (*self.pool.owner_val.get()).as_mut().unwrap_unchecked() + }, + } + } + + /// Consumes this guard and puts it back into the pool. + #[inline] + pub(super) fn put(this: PoolGuard<'_, T, F>) { + // Since this is effectively consuming the guard and putting the + // value back into the pool, there's no reason to run its Drop + // impl after doing this. I don't believe there is a correctness + // problem with doing so, but there's definitely a perf problem + // by redoing this work. So we avoid it. + let mut this = core::mem::ManuallyDrop::new(this); + this.put_imp(); + } + + /// Puts this guard back into the pool by only borrowing the guard as + /// mutable. This should be called at most once. + #[inline(always)] + fn put_imp(&mut self) { + match core::mem::replace(&mut self.value, Err(THREAD_ID_DROPPED)) { + Ok(value) => { + // If we were told to discard this value then don't bother + // trying to put it back into the pool. This occurs when + // the pop operation failed to acquire a lock and we + // decided to create a new value in lieu of contending for + // the lock. + if self.discard { + return; + } + self.pool.put_value(value); + } + // If this guard has a value "owned" by the thread, then + // the Pool guarantees that this is the ONLY such guard. + // Therefore, in order to place it back into the pool and make + // it available, we need to change the owner back to the owning + // thread's ID. But note that we use the ID that was stored in + // the guard, since a guard can be moved to another thread and + // dropped. (A previous iteration of this code read from the + // THREAD_ID thread local, which uses the ID of the current + // thread which may not be the ID of the owning thread! This + // also avoids the TLS access, which is likely a hair faster.) + Err(owner) => { + // If we hit this point, it implies 'put_imp' has been + // called multiple times for the same guard which in turn + // corresponds to a bug in this implementation. + assert_ne!(THREAD_ID_DROPPED, owner); + self.pool.owner.store(owner, Ordering::Release); + } + } + } + } + + impl<'a, T: Send, F: Fn() -> T> Drop for PoolGuard<'a, T, F> { + #[inline] + fn drop(&mut self) { + self.put_imp(); + } + } + + impl<'a, T: Send + core::fmt::Debug, F: Fn() -> T> core::fmt::Debug + for PoolGuard<'a, T, F> + { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("PoolGuard") + .field("pool", &self.pool) + .field("value", &self.value) + .finish() + } + } +} + +// FUTURE: We should consider using Mara Bos's nearly-lock-free version of this +// here: https://gist.github.com/m-ou-se/5fdcbdf7dcf4585199ce2de697f367a4. +// +// One reason why I did things with a "mutex" below is that it isolates the +// safety concerns to just the Mutex, where as the safety of Mara's pool is a +// bit more sprawling. I also expect this code to not be used that much, and +// so is unlikely to get as much real world usage with which to test it. That +// means the "obviously correct" lever is an important one. +// +// The specific reason to use Mara's pool is that it is likely faster and also +// less likely to hit problems with spin-locks, although it is not completely +// impervious to them. +// +// The best solution to this problem, probably, is a truly lock free pool. That +// could be done with a lock free linked list. The issue is the ABA problem. It +// is difficult to avoid, and doing so is complex. BUT, the upshot of that is +// that if we had a truly lock free pool, then we could also use it above in +// the 'std' pool instead of a Mutex because it should be completely free the +// problems that come from spin-locks. +#[cfg(not(feature = "std"))] +mod inner { + use core::{ + cell::UnsafeCell, + panic::{RefUnwindSafe, UnwindSafe}, + sync::atomic::{AtomicBool, Ordering}, + }; + + use alloc::{boxed::Box, vec, vec::Vec}; + + /// A thread safe pool utilizing alloc-only features. + /// + /// Unlike the std version, it doesn't seem possible(?) to implement the + /// "thread owner" optimization because alloc-only doesn't have any concept + /// of threads. So the best we can do is just a normal stack. This will + /// increase latency in alloc-only environments. + pub(super) struct Pool { + /// A stack of T values to hand out. These are used when a Pool is + /// accessed by a thread that didn't create it. + stack: Mutex>>, + /// A function to create more T values when stack is empty and a caller + /// has requested a T. + create: F, + } + + // If T is UnwindSafe, then since we provide exclusive access to any + // particular value in the pool, it should therefore also be considered + // RefUnwindSafe. + impl RefUnwindSafe for Pool {} + + impl Pool { + /// Create a new pool. The given closure is used to create values in + /// the pool when necessary. + pub(super) const fn new(create: F) -> Pool { + Pool { stack: Mutex::new(vec![]), create } + } + } + + impl T> Pool { + /// Get a value from the pool. This may block if another thread is also + /// attempting to retrieve a value from the pool. + #[inline] + pub(super) fn get(&self) -> PoolGuard<'_, T, F> { + let mut stack = self.stack.lock(); + let value = match stack.pop() { + None => Box::new((self.create)()), + Some(value) => value, + }; + PoolGuard { pool: self, value: Some(value) } + } + + #[inline] + fn put(&self, guard: PoolGuard<'_, T, F>) { + let mut guard = core::mem::ManuallyDrop::new(guard); + if let Some(value) = guard.value.take() { + self.put_value(value); + } + } + + /// Puts a value back into the pool. Callers don't need to call this. + /// Once the guard that's returned by 'get' is dropped, it is put back + /// into the pool automatically. + #[inline] + fn put_value(&self, value: Box) { + let mut stack = self.stack.lock(); + stack.push(value); + } + } + + impl core::fmt::Debug for Pool { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("Pool").field("stack", &self.stack).finish() + } + } + + /// A guard that is returned when a caller requests a value from the pool. + pub(super) struct PoolGuard<'a, T: Send, F: Fn() -> T> { + /// The pool that this guard is attached to. + pool: &'a Pool, + /// This is None after the guard has been put back into the pool. + value: Option>, + } + + impl<'a, T: Send, F: Fn() -> T> PoolGuard<'a, T, F> { + /// Return the underlying value. + #[inline] + pub(super) fn value(&self) -> &T { + self.value.as_deref().unwrap() + } + + /// Return the underlying value as a mutable borrow. + #[inline] + pub(super) fn value_mut(&mut self) -> &mut T { + self.value.as_deref_mut().unwrap() + } + + /// Consumes this guard and puts it back into the pool. + #[inline] + pub(super) fn put(this: PoolGuard<'_, T, F>) { + // Since this is effectively consuming the guard and putting the + // value back into the pool, there's no reason to run its Drop + // impl after doing this. I don't believe there is a correctness + // problem with doing so, but there's definitely a perf problem + // by redoing this work. So we avoid it. + let mut this = core::mem::ManuallyDrop::new(this); + this.put_imp(); + } + + /// Puts this guard back into the pool by only borrowing the guard as + /// mutable. This should be called at most once. + #[inline(always)] + fn put_imp(&mut self) { + if let Some(value) = self.value.take() { + self.pool.put_value(value); + } + } + } + + impl<'a, T: Send, F: Fn() -> T> Drop for PoolGuard<'a, T, F> { + #[inline] + fn drop(&mut self) { + self.put_imp(); + } + } + + impl<'a, T: Send + core::fmt::Debug, F: Fn() -> T> core::fmt::Debug + for PoolGuard<'a, T, F> + { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("PoolGuard") + .field("pool", &self.pool) + .field("value", &self.value) + .finish() + } + } + + /// A spin-lock based mutex. Yes, I have read spinlocks cosnidered + /// harmful[1], and if there's a reasonable alternative choice, I'll + /// happily take it. + /// + /// I suspect the most likely alternative here is a Treiber stack, but + /// implementing one correctly in a way that avoids the ABA problem looks + /// subtle enough that I'm not sure I want to attempt that. But otherwise, + /// we only need a mutex in order to implement our pool, so if there's + /// something simpler we can use that works for our `Pool` use case, then + /// that would be great. + /// + /// Note that this mutex does not do poisoning. + /// + /// [1]: https://matklad.github.io/2020/01/02/spinlocks-considered-harmful.html + #[derive(Debug)] + struct Mutex { + locked: AtomicBool, + data: UnsafeCell, + } + + // SAFETY: Since a Mutex guarantees exclusive access, as long as we can + // send it across threads, it must also be Sync. + unsafe impl Sync for Mutex {} + + impl Mutex { + /// Create a new mutex for protecting access to the given value across + /// multiple threads simultaneously. + const fn new(value: T) -> Mutex { + Mutex { + locked: AtomicBool::new(false), + data: UnsafeCell::new(value), + } + } + + /// Lock this mutex and return a guard providing exclusive access to + /// `T`. This blocks if some other thread has already locked this + /// mutex. + #[inline] + fn lock(&self) -> MutexGuard<'_, T> { + while self + .locked + .compare_exchange( + false, + true, + Ordering::AcqRel, + Ordering::Acquire, + ) + .is_err() + { + core::hint::spin_loop(); + } + // SAFETY: The only way we're here is if we successfully set + // 'locked' to true, which implies we must be the only thread here + // and thus have exclusive access to 'data'. + let data = unsafe { &mut *self.data.get() }; + MutexGuard { locked: &self.locked, data } + } + } + + /// A guard that derefs to &T and &mut T. When it's dropped, the lock is + /// released. + #[derive(Debug)] + struct MutexGuard<'a, T> { + locked: &'a AtomicBool, + data: &'a mut T, + } + + impl<'a, T> core::ops::Deref for MutexGuard<'a, T> { + type Target = T; + + #[inline] + fn deref(&self) -> &T { + self.data + } + } + + impl<'a, T> core::ops::DerefMut for MutexGuard<'a, T> { + #[inline] + fn deref_mut(&mut self) -> &mut T { + self.data + } + } + + impl<'a, T> Drop for MutexGuard<'a, T> { + #[inline] + fn drop(&mut self) { + // Drop means 'data' is no longer accessible, so we can unlock + // the mutex. + self.locked.store(false, Ordering::Release); + } + } +} + +#[cfg(test)] +mod tests { + use core::panic::{RefUnwindSafe, UnwindSafe}; + + use alloc::{boxed::Box, vec, vec::Vec}; + + use super::*; + + #[test] + fn oibits() { + fn assert_oitbits() {} + assert_oitbits::>>(); + assert_oitbits::>>>(); + assert_oitbits::< + Pool< + Vec, + Box< + dyn Fn() -> Vec + + Send + + Sync + + UnwindSafe + + RefUnwindSafe, + >, + >, + >(); + } + + // Tests that Pool implements the "single owner" optimization. That is, the + // thread that first accesses the pool gets its own copy, while all other + // threads get distinct copies. + #[cfg(feature = "std")] + #[test] + fn thread_owner_optimization() { + use std::{cell::RefCell, sync::Arc, vec}; + + let pool: Arc>>> = + Arc::new(Pool::new(|| RefCell::new(vec!['a']))); + pool.get().borrow_mut().push('x'); + + let pool1 = pool.clone(); + let t1 = std::thread::spawn(move || { + let guard = pool1.get(); + guard.borrow_mut().push('y'); + }); + + let pool2 = pool.clone(); + let t2 = std::thread::spawn(move || { + let guard = pool2.get(); + guard.borrow_mut().push('z'); + }); + + t1.join().unwrap(); + t2.join().unwrap(); + + // If we didn't implement the single owner optimization, then one of + // the threads above is likely to have mutated the [a, x] vec that + // we stuffed in the pool before spawning the threads. But since + // neither thread was first to access the pool, and because of the + // optimization, we should be guaranteed that neither thread mutates + // the special owned pool value. + // + // (Technically this is an implementation detail and not a contract of + // Pool's API.) + assert_eq!(vec!['a', 'x'], *pool.get().borrow()); + } + + // This tests that if the "owner" of a pool asks for two values, then it + // gets two distinct values and not the same one. This test failed in the + // course of developing the pool, which in turn resulted in UB because it + // permitted getting aliasing &mut borrows to the same place in memory. + #[test] + fn thread_owner_distinct() { + let pool = Pool::new(|| vec!['a']); + + { + let mut g1 = pool.get(); + let v1 = &mut *g1; + let mut g2 = pool.get(); + let v2 = &mut *g2; + v1.push('b'); + v2.push('c'); + assert_eq!(&mut vec!['a', 'b'], v1); + assert_eq!(&mut vec!['a', 'c'], v2); + } + // This isn't technically guaranteed, but we + // expect to now get the "owned" value (the first + // call to 'get()' above) now that it's back in + // the pool. + assert_eq!(&mut vec!['a', 'b'], &mut *pool.get()); + } + + // This tests that we can share a guard with another thread, mutate the + // underlying value and everything works. This failed in the course of + // developing a pool since the pool permitted 'get()' to return the same + // value to the owner thread, even before the previous value was put back + // into the pool. This in turn resulted in this test producing a data race. + #[cfg(feature = "std")] + #[test] + fn thread_owner_sync() { + let pool = Pool::new(|| vec!['a']); + { + let mut g1 = pool.get(); + let mut g2 = pool.get(); + std::thread::scope(|s| { + s.spawn(|| { + g1.push('b'); + }); + s.spawn(|| { + g2.push('c'); + }); + }); + + let v1 = &mut *g1; + let v2 = &mut *g2; + assert_eq!(&mut vec!['a', 'b'], v1); + assert_eq!(&mut vec!['a', 'c'], v2); + } + + // This isn't technically guaranteed, but we + // expect to now get the "owned" value (the first + // call to 'get()' above) now that it's back in + // the pool. + assert_eq!(&mut vec!['a', 'b'], &mut *pool.get()); + } + + // This tests that if we move a PoolGuard that is owned by the current + // thread to another thread and drop it, then the thread owner doesn't + // change. During development of the pool, this test failed because the + // PoolGuard assumed it was dropped in the same thread from which it was + // created, and thus used the current thread's ID as the owner, which could + // be different than the actual owner of the pool. + #[cfg(feature = "std")] + #[test] + fn thread_owner_send_drop() { + let pool = Pool::new(|| vec!['a']); + // Establishes this thread as the owner. + { + pool.get().push('b'); + } + std::thread::scope(|s| { + // Sanity check that we get the same value back. + // (Not technically guaranteed.) + let mut g = pool.get(); + assert_eq!(&vec!['a', 'b'], &*g); + // Now push it to another thread and drop it. + s.spawn(move || { + g.push('c'); + }) + .join() + .unwrap(); + }); + // Now check that we're still the owner. This is not technically + // guaranteed by the API, but is true in practice given the thread + // owner optimization. + assert_eq!(&vec!['a', 'b', 'c'], &*pool.get()); + } +} diff --git a/vendor/regex-automata/src/util/prefilter/aho_corasick.rs b/vendor/regex-automata/src/util/prefilter/aho_corasick.rs new file mode 100644 index 0000000..50cce82 --- /dev/null +++ b/vendor/regex-automata/src/util/prefilter/aho_corasick.rs @@ -0,0 +1,149 @@ +use crate::util::{ + prefilter::PrefilterI, + search::{MatchKind, Span}, +}; + +#[derive(Clone, Debug)] +pub(crate) struct AhoCorasick { + #[cfg(not(feature = "perf-literal-multisubstring"))] + _unused: (), + #[cfg(feature = "perf-literal-multisubstring")] + ac: aho_corasick::AhoCorasick, +} + +impl AhoCorasick { + pub(crate) fn new>( + kind: MatchKind, + needles: &[B], + ) -> Option { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + None + } + #[cfg(feature = "perf-literal-multisubstring")] + { + // We used to use `aho_corasick::MatchKind::Standard` here when + // `kind` was `MatchKind::All`, but this is not correct. The + // "standard" Aho-Corasick match semantics are to report a match + // immediately as soon as it is seen, but `All` isn't like that. + // In particular, with "standard" semantics, given the needles + // "abc" and "b" and the haystack "abc," it would report a match + // at offset 1 before a match at offset 0. This is never what we + // want in the context of the regex engine, regardless of whether + // we have leftmost-first or 'all' semantics. Namely, we always + // want the leftmost match. + let ac_match_kind = match kind { + MatchKind::LeftmostFirst | MatchKind::All => { + aho_corasick::MatchKind::LeftmostFirst + } + }; + // This is kind of just an arbitrary number, but basically, if we + // have a small enough set of literals, then we try to use the VERY + // memory hungry DFA. Otherwise, we whimp out and use an NFA. The + // upshot is that the NFA is quite lean and decently fast. Faster + // than a naive Aho-Corasick NFA anyway. + let ac_kind = if needles.len() <= 500 { + aho_corasick::AhoCorasickKind::DFA + } else { + aho_corasick::AhoCorasickKind::ContiguousNFA + }; + let result = aho_corasick::AhoCorasick::builder() + .kind(Some(ac_kind)) + .match_kind(ac_match_kind) + .start_kind(aho_corasick::StartKind::Both) + // We try to handle all of the prefilter cases in the super + // module, and only use Aho-Corasick for the actual automaton. + // The aho-corasick crate does have some extra prefilters, + // namely, looking for rare bytes to feed to memchr{,2,3} + // instead of just the first byte. If we end up wanting + // those---and they are somewhat tricky to implement---then + // we could port them to this crate. + // + // The main reason for doing things this way is so we have a + // complete and easy to understand picture of which prefilters + // are available and how they work. Otherwise it seems too + // easy to get into a situation where we have a prefilter + // layered on top of prefilter, and that might have unintended + // consequences. + .prefilter(false) + .build(needles); + let ac = match result { + Ok(ac) => ac, + Err(_err) => { + debug!("aho-corasick prefilter failed to build: {}", _err); + return None; + } + }; + Some(AhoCorasick { ac }) + } + } +} + +impl PrefilterI for AhoCorasick { + fn find(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + let input = + aho_corasick::Input::new(haystack).span(span.start..span.end); + self.ac + .find(input) + .map(|m| Span { start: m.start(), end: m.end() }) + } + } + + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + let input = aho_corasick::Input::new(haystack) + .anchored(aho_corasick::Anchored::Yes) + .span(span.start..span.end); + self.ac + .find(input) + .map(|m| Span { start: m.start(), end: m.end() }) + } + } + + fn memory_usage(&self) -> usize { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + self.ac.memory_usage() + } + } + + fn is_fast(&self) -> bool { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + // Aho-Corasick is never considered "fast" because it's never + // going to be even close to an order of magnitude faster than the + // regex engine itself (assuming a DFA is used). In fact, it is + // usually slower. The magic of Aho-Corasick is that it can search + // a *large* number of literals with a relatively small amount of + // memory. The regex engines are far more wasteful. + // + // Aho-Corasick may be "fast" when the regex engine corresponds + // to, say, the PikeVM. That happens when the lazy DFA couldn't be + // built or used for some reason. But in these cases, the regex + // itself is likely quite big and we're probably hosed no matter + // what we do. (In this case, the best bet is for the caller to + // increase some of the memory limits on the hybrid cache capacity + // and hope that's enough.) + false + } + } +} diff --git a/vendor/regex-automata/src/util/prefilter/byteset.rs b/vendor/regex-automata/src/util/prefilter/byteset.rs new file mode 100644 index 0000000..a669d6c --- /dev/null +++ b/vendor/regex-automata/src/util/prefilter/byteset.rs @@ -0,0 +1,58 @@ +use crate::util::{ + prefilter::PrefilterI, + search::{MatchKind, Span}, +}; + +#[derive(Clone, Debug)] +pub(crate) struct ByteSet([bool; 256]); + +impl ByteSet { + pub(crate) fn new>( + _kind: MatchKind, + needles: &[B], + ) -> Option { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + None + } + #[cfg(feature = "perf-literal-multisubstring")] + { + let mut set = [false; 256]; + for needle in needles.iter() { + let needle = needle.as_ref(); + if needle.len() != 1 { + return None; + } + set[usize::from(needle[0])] = true; + } + Some(ByteSet(set)) + } + } +} + +impl PrefilterI for ByteSet { + fn find(&self, haystack: &[u8], span: Span) -> Option { + haystack[span].iter().position(|&b| self.0[usize::from(b)]).map(|i| { + let start = span.start + i; + let end = start + 1; + Span { start, end } + }) + } + + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + let b = *haystack.get(span.start)?; + if self.0[usize::from(b)] { + Some(Span { start: span.start, end: span.start + 1 }) + } else { + None + } + } + + fn memory_usage(&self) -> usize { + 0 + } + + fn is_fast(&self) -> bool { + false + } +} diff --git a/vendor/regex-automata/src/util/prefilter/memchr.rs b/vendor/regex-automata/src/util/prefilter/memchr.rs new file mode 100644 index 0000000..3d44b83 --- /dev/null +++ b/vendor/regex-automata/src/util/prefilter/memchr.rs @@ -0,0 +1,186 @@ +use crate::util::{ + prefilter::PrefilterI, + search::{MatchKind, Span}, +}; + +#[derive(Clone, Debug)] +pub(crate) struct Memchr(u8); + +impl Memchr { + pub(crate) fn new>( + _kind: MatchKind, + needles: &[B], + ) -> Option { + #[cfg(not(feature = "perf-literal-substring"))] + { + None + } + #[cfg(feature = "perf-literal-substring")] + { + if needles.len() != 1 { + return None; + } + if needles[0].as_ref().len() != 1 { + return None; + } + Some(Memchr(needles[0].as_ref()[0])) + } + } +} + +impl PrefilterI for Memchr { + fn find(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "perf-literal-substring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-substring")] + { + memchr::memchr(self.0, &haystack[span]).map(|i| { + let start = span.start + i; + let end = start + 1; + Span { start, end } + }) + } + } + + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + let b = *haystack.get(span.start)?; + if self.0 == b { + Some(Span { start: span.start, end: span.start + 1 }) + } else { + None + } + } + + fn memory_usage(&self) -> usize { + 0 + } + + fn is_fast(&self) -> bool { + true + } +} + +#[derive(Clone, Debug)] +pub(crate) struct Memchr2(u8, u8); + +impl Memchr2 { + pub(crate) fn new>( + _kind: MatchKind, + needles: &[B], + ) -> Option { + #[cfg(not(feature = "perf-literal-substring"))] + { + None + } + #[cfg(feature = "perf-literal-substring")] + { + if needles.len() != 2 { + return None; + } + if !needles.iter().all(|n| n.as_ref().len() == 1) { + return None; + } + let b1 = needles[0].as_ref()[0]; + let b2 = needles[1].as_ref()[0]; + Some(Memchr2(b1, b2)) + } + } +} + +impl PrefilterI for Memchr2 { + fn find(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "perf-literal-substring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-substring")] + { + memchr::memchr2(self.0, self.1, &haystack[span]).map(|i| { + let start = span.start + i; + let end = start + 1; + Span { start, end } + }) + } + } + + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + let b = *haystack.get(span.start)?; + if self.0 == b || self.1 == b { + Some(Span { start: span.start, end: span.start + 1 }) + } else { + None + } + } + + fn memory_usage(&self) -> usize { + 0 + } + + fn is_fast(&self) -> bool { + true + } +} + +#[derive(Clone, Debug)] +pub(crate) struct Memchr3(u8, u8, u8); + +impl Memchr3 { + pub(crate) fn new>( + _kind: MatchKind, + needles: &[B], + ) -> Option { + #[cfg(not(feature = "perf-literal-substring"))] + { + None + } + #[cfg(feature = "perf-literal-substring")] + { + if needles.len() != 3 { + return None; + } + if !needles.iter().all(|n| n.as_ref().len() == 1) { + return None; + } + let b1 = needles[0].as_ref()[0]; + let b2 = needles[1].as_ref()[0]; + let b3 = needles[2].as_ref()[0]; + Some(Memchr3(b1, b2, b3)) + } + } +} + +impl PrefilterI for Memchr3 { + fn find(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "perf-literal-substring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-substring")] + { + memchr::memchr3(self.0, self.1, self.2, &haystack[span]).map(|i| { + let start = span.start + i; + let end = start + 1; + Span { start, end } + }) + } + } + + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + let b = *haystack.get(span.start)?; + if self.0 == b || self.1 == b || self.2 == b { + Some(Span { start: span.start, end: span.start + 1 }) + } else { + None + } + } + + fn memory_usage(&self) -> usize { + 0 + } + + fn is_fast(&self) -> bool { + true + } +} diff --git a/vendor/regex-automata/src/util/prefilter/memmem.rs b/vendor/regex-automata/src/util/prefilter/memmem.rs new file mode 100644 index 0000000..deea17b --- /dev/null +++ b/vendor/regex-automata/src/util/prefilter/memmem.rs @@ -0,0 +1,88 @@ +use crate::util::{ + prefilter::PrefilterI, + search::{MatchKind, Span}, +}; + +#[derive(Clone, Debug)] +pub(crate) struct Memmem { + #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))] + _unused: (), + #[cfg(all(feature = "std", feature = "perf-literal-substring"))] + finder: memchr::memmem::Finder<'static>, +} + +impl Memmem { + pub(crate) fn new>( + _kind: MatchKind, + needles: &[B], + ) -> Option { + #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))] + { + None + } + #[cfg(all(feature = "std", feature = "perf-literal-substring"))] + { + if needles.len() != 1 { + return None; + } + let needle = needles[0].as_ref(); + let finder = memchr::memmem::Finder::new(needle).into_owned(); + Some(Memmem { finder }) + } + } +} + +impl PrefilterI for Memmem { + fn find(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))] + { + unreachable!() + } + #[cfg(all(feature = "std", feature = "perf-literal-substring"))] + { + self.finder.find(&haystack[span]).map(|i| { + let start = span.start + i; + let end = start + self.finder.needle().len(); + Span { start, end } + }) + } + } + + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))] + { + unreachable!() + } + #[cfg(all(feature = "std", feature = "perf-literal-substring"))] + { + let needle = self.finder.needle(); + if haystack[span].starts_with(needle) { + Some(Span { end: span.start + needle.len(), ..span }) + } else { + None + } + } + } + + fn memory_usage(&self) -> usize { + #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))] + { + unreachable!() + } + #[cfg(all(feature = "std", feature = "perf-literal-substring"))] + { + self.finder.needle().len() + } + } + + fn is_fast(&self) -> bool { + #[cfg(not(all(feature = "std", feature = "perf-literal-substring")))] + { + unreachable!() + } + #[cfg(all(feature = "std", feature = "perf-literal-substring"))] + { + true + } + } +} diff --git a/vendor/regex-automata/src/util/prefilter/mod.rs b/vendor/regex-automata/src/util/prefilter/mod.rs new file mode 100644 index 0000000..d20442a --- /dev/null +++ b/vendor/regex-automata/src/util/prefilter/mod.rs @@ -0,0 +1,719 @@ +/*! +Defines a prefilter for accelerating regex searches. + +A prefilter can be created by building a [`Prefilter`] value. + +A prefilter represents one of the most important optimizations available for +accelerating regex searches. The idea of a prefilter is to very quickly find +candidate locations in a haystack where a regex _could_ match. Once a candidate +is found, it is then intended for the regex engine to run at that position to +determine whether the candidate is a match or a false positive. + +In the aforementioned description of the prefilter optimization also lay its +demise. Namely, if a prefilter has a high false positive rate and it produces +lots of candidates, then a prefilter can overall make a regex search slower. +It can run more slowly because more time is spent ping-ponging between the +prefilter search and the regex engine attempting to confirm each candidate as +a match. This ping-ponging has overhead that adds up, and is exacerbated by +a high false positive rate. + +Nevertheless, the optimization is still generally worth performing in most +cases. Particularly given just how much throughput can be improved. (It is not +uncommon for prefilter optimizations to improve throughput by one or two orders +of magnitude.) + +Typically a prefilter is used to find occurrences of literal prefixes from a +regex pattern, but this isn't required. A prefilter can be used to look for +suffixes or even inner literals. + +Note that as of now, prefilters throw away information about which pattern +each literal comes from. In other words, when a prefilter finds a match, +there's no way to know which pattern (or patterns) it came from. Therefore, +in order to confirm a match, you'll have to check all of the patterns by +running the full regex engine. +*/ + +mod aho_corasick; +mod byteset; +mod memchr; +mod memmem; +mod teddy; + +use core::{ + borrow::Borrow, + fmt::Debug, + panic::{RefUnwindSafe, UnwindSafe}, +}; + +#[cfg(feature = "alloc")] +use alloc::sync::Arc; + +#[cfg(feature = "syntax")] +use regex_syntax::hir::{literal, Hir}; + +use crate::util::search::{MatchKind, Span}; + +pub(crate) use crate::util::prefilter::{ + aho_corasick::AhoCorasick, + byteset::ByteSet, + memchr::{Memchr, Memchr2, Memchr3}, + memmem::Memmem, + teddy::Teddy, +}; + +/// A prefilter for accelerating regex searches. +/// +/// If you already have your literals that you want to search with, +/// then the vanilla [`Prefilter::new`] constructor is for you. But +/// if you have an [`Hir`] value from the `regex-syntax` crate, then +/// [`Prefilter::from_hir_prefix`] might be more convenient. Namely, it uses +/// the [`regex-syntax::hir::literal`](regex_syntax::hir::literal) module to +/// extract literal prefixes for you, optimize them and then select and build a +/// prefilter matcher. +/// +/// A prefilter must have **zero false negatives**. However, by its very +/// nature, it may produce false positives. That is, a prefilter will never +/// skip over a position in the haystack that corresponds to a match of the +/// original regex pattern, but it *may* produce a match for a position +/// in the haystack that does *not* correspond to a match of the original +/// regex pattern. If you use either the [`Prefilter::from_hir_prefix`] or +/// [`Prefilter::from_hirs_prefix`] constructors, then this guarantee is +/// upheld for you automatically. This guarantee is not preserved if you use +/// [`Prefilter::new`] though, since it is up to the caller to provide correct +/// literal strings with respect to the original regex pattern. +/// +/// # Cloning +/// +/// It is an API guarantee that cloning a prefilter is cheap. That is, cloning +/// it will not duplicate whatever heap memory is used to represent the +/// underlying matcher. +/// +/// # Example +/// +/// This example shows how to attach a `Prefilter` to the +/// [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) in order to accelerate +/// searches. +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::pikevm::PikeVM, +/// util::prefilter::Prefilter, +/// Match, MatchKind, +/// }; +/// +/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["Bruce "]) +/// .expect("a prefilter"); +/// let re = PikeVM::builder() +/// .configure(PikeVM::config().prefilter(Some(pre))) +/// .build(r"Bruce \w+")?; +/// let mut cache = re.create_cache(); +/// assert_eq!( +/// Some(Match::must(0, 6..23)), +/// re.find(&mut cache, "Hello Bruce Springsteen!"), +/// ); +/// # Ok::<(), Box>(()) +/// ``` +/// +/// But note that if you get your prefilter incorrect, it could lead to an +/// incorrect result! +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::pikevm::PikeVM, +/// util::prefilter::Prefilter, +/// Match, MatchKind, +/// }; +/// +/// // This prefilter is wrong! +/// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["Patti "]) +/// .expect("a prefilter"); +/// let re = PikeVM::builder() +/// .configure(PikeVM::config().prefilter(Some(pre))) +/// .build(r"Bruce \w+")?; +/// let mut cache = re.create_cache(); +/// // We find no match even though the regex does match. +/// assert_eq!( +/// None, +/// re.find(&mut cache, "Hello Bruce Springsteen!"), +/// ); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Prefilter { + #[cfg(not(feature = "alloc"))] + _unused: (), + #[cfg(feature = "alloc")] + pre: Arc, + #[cfg(feature = "alloc")] + is_fast: bool, + #[cfg(feature = "alloc")] + max_needle_len: usize, +} + +impl Prefilter { + /// Create a new prefilter from a sequence of needles and a corresponding + /// match semantics. + /// + /// This may return `None` for a variety of reasons, for example, if + /// a suitable prefilter could not be constructed. That might occur + /// if they are unavailable (e.g., the `perf-literal-substring` and + /// `perf-literal-multisubstring` features aren't enabled), or it might + /// occur because of heuristics or other artifacts of how the prefilter + /// works. + /// + /// Note that if you have an [`Hir`] expression, it may be more convenient + /// to use [`Prefilter::from_hir_prefix`]. It will automatically handle the + /// task of extracting prefix literals for you. + /// + /// # Example + /// + /// This example shows how match semantics can impact the matching + /// algorithm used by the prefilter. For this reason, it is important to + /// ensure that the match semantics given here are consistent with the + /// match semantics intended for the regular expression that the literals + /// were extracted from. + /// + /// ``` + /// use regex_automata::{ + /// util::{prefilter::Prefilter, syntax}, + /// MatchKind, Span, + /// }; + /// + /// let hay = "Hello samwise"; + /// + /// // With leftmost-first, we find 'samwise' here because it comes + /// // before 'sam' in the sequence we give it.. + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["samwise", "sam"]) + /// .expect("a prefilter"); + /// assert_eq!( + /// Some(Span::from(6..13)), + /// pre.find(hay.as_bytes(), Span::from(0..hay.len())), + /// ); + /// // Still with leftmost-first but with the literals reverse, now 'sam' + /// // will match instead! + /// let pre = Prefilter::new(MatchKind::LeftmostFirst, &["sam", "samwise"]) + /// .expect("a prefilter"); + /// assert_eq!( + /// Some(Span::from(6..9)), + /// pre.find(hay.as_bytes(), Span::from(0..hay.len())), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn new>( + kind: MatchKind, + needles: &[B], + ) -> Option { + Choice::new(kind, needles).and_then(|choice| { + let max_needle_len = + needles.iter().map(|b| b.as_ref().len()).max().unwrap_or(0); + Prefilter::from_choice(choice, max_needle_len) + }) + } + + /// This turns a prefilter selection into a `Prefilter`. That is, in turns + /// the enum given into a trait object. + fn from_choice( + choice: Choice, + max_needle_len: usize, + ) -> Option { + #[cfg(not(feature = "alloc"))] + { + None + } + #[cfg(feature = "alloc")] + { + let pre: Arc = match choice { + Choice::Memchr(p) => Arc::new(p), + Choice::Memchr2(p) => Arc::new(p), + Choice::Memchr3(p) => Arc::new(p), + Choice::Memmem(p) => Arc::new(p), + Choice::Teddy(p) => Arc::new(p), + Choice::ByteSet(p) => Arc::new(p), + Choice::AhoCorasick(p) => Arc::new(p), + }; + let is_fast = pre.is_fast(); + Some(Prefilter { pre, is_fast, max_needle_len }) + } + } + + /// This attempts to extract prefixes from the given `Hir` expression for + /// the given match semantics, and if possible, builds a prefilter for + /// them. + /// + /// # Example + /// + /// This example shows how to build a prefilter directly from an [`Hir`] + /// expression, and use to find an occurrence of a prefix from the regex + /// pattern. + /// + /// ``` + /// use regex_automata::{ + /// util::{prefilter::Prefilter, syntax}, + /// MatchKind, Span, + /// }; + /// + /// let hir = syntax::parse(r"(Bruce|Patti) \w+")?; + /// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir) + /// .expect("a prefilter"); + /// let hay = "Hello Patti Scialfa!"; + /// assert_eq!( + /// Some(Span::from(6..12)), + /// pre.find(hay.as_bytes(), Span::from(0..hay.len())), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn from_hir_prefix(kind: MatchKind, hir: &Hir) -> Option { + Prefilter::from_hirs_prefix(kind, &[hir]) + } + + /// This attempts to extract prefixes from the given `Hir` expressions for + /// the given match semantics, and if possible, builds a prefilter for + /// them. + /// + /// Note that as of now, prefilters throw away information about which + /// pattern each literal comes from. In other words, when a prefilter finds + /// a match, there's no way to know which pattern (or patterns) it came + /// from. Therefore, in order to confirm a match, you'll have to check all + /// of the patterns by running the full regex engine. + /// + /// # Example + /// + /// This example shows how to build a prefilter directly from multiple + /// `Hir` expressions expression, and use it to find an occurrence of a + /// prefix from the regex patterns. + /// + /// ``` + /// use regex_automata::{ + /// util::{prefilter::Prefilter, syntax}, + /// MatchKind, Span, + /// }; + /// + /// let hirs = syntax::parse_many(&[ + /// r"(Bruce|Patti) \w+", + /// r"Mrs?\. Doubtfire", + /// ])?; + /// let pre = Prefilter::from_hirs_prefix(MatchKind::LeftmostFirst, &hirs) + /// .expect("a prefilter"); + /// let hay = "Hello Mrs. Doubtfire"; + /// assert_eq!( + /// Some(Span::from(6..20)), + /// pre.find(hay.as_bytes(), Span::from(0..hay.len())), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[cfg(feature = "syntax")] + pub fn from_hirs_prefix>( + kind: MatchKind, + hirs: &[H], + ) -> Option { + prefixes(kind, hirs) + .literals() + .and_then(|lits| Prefilter::new(kind, lits)) + } + + /// Run this prefilter on `haystack[span.start..end]` and return a matching + /// span if one exists. + /// + /// The span returned is guaranteed to have a start position greater than + /// or equal to the one given, and an end position less than or equal to + /// the one given. + /// + /// # Example + /// + /// This example shows how to build a prefilter directly from an [`Hir`] + /// expression, and use it to find an occurrence of a prefix from the regex + /// pattern. + /// + /// ``` + /// use regex_automata::{ + /// util::{prefilter::Prefilter, syntax}, + /// MatchKind, Span, + /// }; + /// + /// let hir = syntax::parse(r"Bruce \w+")?; + /// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir) + /// .expect("a prefilter"); + /// let hay = "Hello Bruce Springsteen!"; + /// assert_eq!( + /// Some(Span::from(6..12)), + /// pre.find(hay.as_bytes(), Span::from(0..hay.len())), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn find(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "alloc"))] + { + unreachable!() + } + #[cfg(feature = "alloc")] + { + self.pre.find(haystack, span) + } + } + + /// Returns the span of a prefix of `haystack[span.start..span.end]` if + /// the prefilter matches. + /// + /// The span returned is guaranteed to have a start position equivalent to + /// the one given, and an end position less than or equal to the one given. + /// + /// # Example + /// + /// This example shows how to build a prefilter directly from an [`Hir`] + /// expression, and use it to find an occurrence of a prefix from the regex + /// pattern that begins at the start of a haystack only. + /// + /// ``` + /// use regex_automata::{ + /// util::{prefilter::Prefilter, syntax}, + /// MatchKind, Span, + /// }; + /// + /// let hir = syntax::parse(r"Bruce \w+")?; + /// let pre = Prefilter::from_hir_prefix(MatchKind::LeftmostFirst, &hir) + /// .expect("a prefilter"); + /// let hay = "Hello Bruce Springsteen!"; + /// // Nothing is found here because 'Bruce' does + /// // not occur at the beginning of our search. + /// assert_eq!( + /// None, + /// pre.prefix(hay.as_bytes(), Span::from(0..hay.len())), + /// ); + /// // But if we change where we start the search + /// // to begin where 'Bruce ' begins, then a + /// // match will be found. + /// assert_eq!( + /// Some(Span::from(6..12)), + /// pre.prefix(hay.as_bytes(), Span::from(6..hay.len())), + /// ); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn prefix(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "alloc"))] + { + unreachable!() + } + #[cfg(feature = "alloc")] + { + self.pre.prefix(haystack, span) + } + } + + /// Returns the heap memory, in bytes, used by the underlying prefilter. + #[inline] + pub fn memory_usage(&self) -> usize { + #[cfg(not(feature = "alloc"))] + { + unreachable!() + } + #[cfg(feature = "alloc")] + { + self.pre.memory_usage() + } + } + + /// Return the length of the longest needle + /// in this Prefilter + #[inline] + pub fn max_needle_len(&self) -> usize { + #[cfg(not(feature = "alloc"))] + { + unreachable!() + } + #[cfg(feature = "alloc")] + { + self.max_needle_len + } + } + + /// Implementations might return true here if they believe themselves to + /// be "fast." The concept of "fast" is deliberately left vague, but in + /// practice this usually corresponds to whether it's believed that SIMD + /// will be used. + /// + /// Why do we care about this? Well, some prefilter tricks tend to come + /// with their own bits of overhead, and so might only make sense if we + /// know that a scan will be *much* faster than the regex engine itself. + /// Otherwise, the trick may not be worth doing. Whether something is + /// "much" faster than the regex engine generally boils down to whether + /// SIMD is used. (But not always. Even a SIMD matcher with a high false + /// positive rate can become quite slow.) + /// + /// Even if this returns true, it is still possible for the prefilter to + /// be "slow." Remember, prefilters are just heuristics. We can't really + /// *know* a prefilter will be fast without actually trying the prefilter. + /// (Which of course we cannot afford to do.) + #[inline] + pub fn is_fast(&self) -> bool { + #[cfg(not(feature = "alloc"))] + { + unreachable!() + } + #[cfg(feature = "alloc")] + { + self.is_fast + } + } +} + +/// A trait for abstracting over prefilters. Basically, a prefilter is +/// something that do an unanchored *and* an anchored search in a haystack +/// within a given span. +/// +/// This exists pretty much only so that we can use prefilters as a trait +/// object (which is what `Prefilter` is). If we ever move off of trait objects +/// and to an enum, then it's likely this trait could be removed. +pub(crate) trait PrefilterI: + Debug + Send + Sync + RefUnwindSafe + UnwindSafe + 'static +{ + /// Run this prefilter on `haystack[span.start..end]` and return a matching + /// span if one exists. + /// + /// The span returned is guaranteed to have a start position greater than + /// or equal to the one given, and an end position less than or equal to + /// the one given. + fn find(&self, haystack: &[u8], span: Span) -> Option; + + /// Returns the span of a prefix of `haystack[span.start..span.end]` if + /// the prefilter matches. + /// + /// The span returned is guaranteed to have a start position equivalent to + /// the one given, and an end position less than or equal to the one given. + fn prefix(&self, haystack: &[u8], span: Span) -> Option; + + /// Returns the heap memory, in bytes, used by the underlying prefilter. + fn memory_usage(&self) -> usize; + + /// Implementations might return true here if they believe themselves to + /// be "fast." See [`Prefilter::is_fast`] for more details. + fn is_fast(&self) -> bool; +} + +#[cfg(feature = "alloc")] +impl PrefilterI for Arc

 .             any character except new line (includes new line with s flag)
+[0-9]         any ASCII digit
 \d            digit (\p{Nd})
 \D            not digit
 \pX           Unicode character class identified by a one-letter name
@@ -307,20 +663,23 @@ a separate crate, [`regex-syntax`](https://docs.rs/regex-syntax).
 [0-9--4]      Direct subtraction (matching 0-9 except 4)
 [a-g~~b-h]    Symmetric difference (matching `a` and `h` only)
 [\[\]]        Escaping in character classes (matching [ or ])
+[a&&b]        An empty character class matching nothing
 
Any named character class may appear inside a bracketed `[...]` character -class. For example, `[\p{Greek}[:digit:]]` matches any Greek or ASCII -digit. `[\p{Greek}&&\pL]` matches Greek letters. +class. For example, `[\p{Greek}[:digit:]]` matches any ASCII digit or any +codepoint in the `Greek` script. `[\p{Greek}&&\pL]` matches Greek letters. Precedence in character classes, from most binding to least: -1. Ranges: `a-cd` == `[a-c]d` -2. Union: `ab&&bc` == `[ab]&&[bc]` -3. Intersection: `^a-z&&b` == `^[a-z&&b]` -4. Negation +1. Ranges: `[a-cd]` == `[[a-c]d]` +2. Union: `[ab&&bc]` == `[[ab]&&[bc]]` +3. Intersection, difference, symmetric difference. All three have equivalent +precedence, and are evaluated in left-to-right order. For example, +`[\pL--\p{Greek}&&\p{Uppercase}]` == `[[\pL--\p{Greek}]&&\p{Uppercase}]`. +4. Negation: `[^a-z&&b]` == `[^[a-z&&b]]`. -## Composites +### Composites
 xy    concatenation (x followed by y)
@@ -346,7 +705,7 @@ let re = Regex::new(r"sam|samwise").unwrap();
 assert_eq!("sam", re.find(haystack).unwrap().as_str());
 ```
 
-## Repetitions
+### Repetitions
 
 
 x*        zero or more of x (greedy)
@@ -363,21 +722,44 @@ x{n,}?    at least n x (ungreedy/lazy)
 x{n}?     exactly n x
 
-## Empty matches +### Empty matches
-^     the beginning of text (or start-of-line with multi-line mode)
-$     the end of text (or end-of-line with multi-line mode)
-\A    only the beginning of text (even with multi-line mode enabled)
-\z    only the end of text (even with multi-line mode enabled)
-\b    a Unicode word boundary (\w on one side and \W, \A, or \z on other)
-\B    not a Unicode word boundary
+^               the beginning of a haystack (or start-of-line with multi-line mode)
+$               the end of a haystack (or end-of-line with multi-line mode)
+\A              only the beginning of a haystack (even with multi-line mode enabled)
+\z              only the end of a haystack (even with multi-line mode enabled)
+\b              a Unicode word boundary (\w on one side and \W, \A, or \z on other)
+\B              not a Unicode word boundary
+\b{start}, \<   a Unicode start-of-word boundary (\W|\A on the left, \w on the right)
+\b{end}, \>     a Unicode end-of-word boundary (\w on the left, \W|\z on the right))
+\b{start-half}  half of a Unicode start-of-word boundary (\W|\A on the left)
+\b{end-half}    half of a Unicode end-of-word boundary (\W|\z on the right)
 
-The empty regex is valid and matches the empty string. For example, the empty -regex matches `abc` at positions `0`, `1`, `2` and `3`. +The empty regex is valid and matches the empty string. For example, the +empty regex matches `abc` at positions `0`, `1`, `2` and `3`. When using the +top-level [`Regex`] on `&str` haystacks, an empty match that splits a codepoint +is guaranteed to never be returned. However, such matches are permitted when +using a [`bytes::Regex`]. For example: -## Grouping and flags +```rust +let re = regex::Regex::new(r"").unwrap(); +let ranges: Vec<_> = re.find_iter("💩").map(|m| m.range()).collect(); +assert_eq!(ranges, vec![0..0, 4..4]); + +let re = regex::bytes::Regex::new(r"").unwrap(); +let ranges: Vec<_> = re.find_iter("💩".as_bytes()).map(|m| m.range()).collect(); +assert_eq!(ranges, vec![0..0, 1..1, 2..2, 3..3, 4..4]); +``` + +Note that an empty regex is distinct from a regex that can never match. +For example, the regex `[a&&b]` is a character class that represents the +intersection of `a` and `b`. That intersection is empty, which means the +character class is empty. Since nothing is in the empty set, `[a&&b]` matches +nothing, not even the empty string. + +### Grouping and flags
 (exp)          numbered capture group (indexed by opening parenthesis)
@@ -405,6 +787,7 @@ All flags are by default disabled unless stated otherwise. They are:
 i     case-insensitive: letters match both upper and lower case
 m     multi-line mode: ^ and $ match begin/end of line
 s     allow . to match \n
+R     enables CRLF mode: when multi-line mode is enabled, \r\n is used
 U     swap the meaning of x* and x*?
 u     Unicode support (enabled by default)
 x     verbose mode, ignores whitespace and allow line comments (starting with `#`)
@@ -418,22 +801,22 @@ Flags can be toggled within a pattern. Here's an example that matches
 case-insensitively for the first part but case-sensitively for the second part:
 
 ```rust
-# use regex::Regex;
-# fn main() {
+use regex::Regex;
+
 let re = Regex::new(r"(?i)a+(?-i)b+").unwrap();
-let cap = re.captures("AaAaAbbBBBb").unwrap();
-assert_eq!(&cap[0], "AaAaAbb");
-# }
+let m = re.find("AaAaAbbBBBb").unwrap();
+assert_eq!(m.as_str(), "AaAaAbb");
 ```
 
 Notice that the `a+` matches either `a` or `A`, but the `b+` only matches
 `b`.
 
 Multi-line mode means `^` and `$` no longer match just at the beginning/end of
-the input, but at the beginning/end of lines:
+the input, but also at the beginning/end of lines:
 
 ```
-# use regex::Regex;
+use regex::Regex;
+
 let re = Regex::new(r"(?m)^line \d+").unwrap();
 let m = re.find("line one\nline 2\n").unwrap();
 assert_eq!(m.as_str(), "line 2");
@@ -442,44 +825,72 @@ assert_eq!(m.as_str(), "line 2");
 Note that `^` matches after new lines, even at the end of input:
 
 ```
-# use regex::Regex;
+use regex::Regex;
+
 let re = Regex::new(r"(?m)^").unwrap();
 let m = re.find_iter("test\n").last().unwrap();
 assert_eq!((m.start(), m.end()), (5, 5));
 ```
 
-Here is an example that uses an ASCII word boundary instead of a Unicode
-word boundary:
+When both CRLF mode and multi-line mode are enabled, then `^` and `$` will
+match either `\r` and `\n`, but never in the middle of a `\r\n`:
+
+```
+use regex::Regex;
+
+let re = Regex::new(r"(?mR)^foo$").unwrap();
+let m = re.find("\r\nfoo\r\n").unwrap();
+assert_eq!(m.as_str(), "foo");
+```
+
+Unicode mode can also be selectively disabled, although only when the result
+*would not* match invalid UTF-8. One good example of this is using an ASCII
+word boundary instead of a Unicode word boundary, which might make some regex
+searches run faster:
 
 ```rust
-# use regex::Regex;
-# fn main() {
+use regex::Regex;
+
 let re = Regex::new(r"(?-u:\b).+(?-u:\b)").unwrap();
-let cap = re.captures("$$abc$$").unwrap();
-assert_eq!(&cap[0], "abc");
-# }
+let m = re.find("$$abc$$").unwrap();
+assert_eq!(m.as_str(), "abc");
 ```
 
-## Escape sequences
+### Escape sequences
+
+Note that this includes all possible escape sequences, even ones that are
+documented elsewhere.
 
 
-\*          literal *, works for any punctuation character: \.+*?()|[]{}^$
-\a          bell (\x07)
-\f          form feed (\x0C)
-\t          horizontal tab
-\n          new line
-\r          carriage return
-\v          vertical tab (\x0B)
-\123        octal character code (up to three digits) (when enabled)
-\x7F        hex character code (exactly two digits)
-\x{10FFFF}  any hex character code corresponding to a Unicode code point
-\u007F      hex character code (exactly four digits)
-\u{7F}      any hex character code corresponding to a Unicode code point
-\U0000007F  hex character code (exactly eight digits)
-\U{7F}      any hex character code corresponding to a Unicode code point
+\*              literal *, applies to all ASCII except [0-9A-Za-z<>]
+\a              bell (\x07)
+\f              form feed (\x0C)
+\t              horizontal tab
+\n              new line
+\r              carriage return
+\v              vertical tab (\x0B)
+\A              matches at the beginning of a haystack
+\z              matches at the end of a haystack
+\b              word boundary assertion
+\B              negated word boundary assertion
+\b{start}, \<   start-of-word boundary assertion
+\b{end}, \>     end-of-word boundary assertion
+\b{start-half}  half of a start-of-word boundary assertion
+\b{end-half}    half of a end-of-word boundary assertion
+\123            octal character code, up to three digits (when enabled)
+\x7F            hex character code (exactly two digits)
+\x{10FFFF}      any hex character code corresponding to a Unicode code point
+\u007F          hex character code (exactly four digits)
+\u{7F}          any hex character code corresponding to a Unicode code point
+\U0000007F      hex character code (exactly eight digits)
+\U{7F}          any hex character code corresponding to a Unicode code point
+\p{Letter}      Unicode character class
+\P{Letter}      negated Unicode character class
+\d, \s, \w      Perl character class
+\D, \S, \W      negated Perl character class
 
-## Perl character classes (Unicode friendly) +### Perl character classes (Unicode friendly) These classes are based on the definitions provided in [UTS#18](https://www.unicode.org/reports/tr18/#Compatibility_Properties): @@ -493,7 +904,10 @@ These classes are based on the definitions provided in \W not word character
-## ASCII character classes +### ASCII character classes + +These classes are based on the definitions provided in +[UTS#18](https://www.unicode.org/reports/tr18/#Compatibility_Properties):
 [[:alnum:]]    alphanumeric ([0-9A-Za-z])
@@ -512,16 +926,228 @@ These classes are based on the definitions provided in
 [[:xdigit:]]   hex digit ([0-9A-Fa-f])
 
+# Untrusted input + +This crate is meant to be able to run regex searches on untrusted haystacks +without fear of [ReDoS]. This crate also, to a certain extent, supports +untrusted patterns. + +[ReDoS]: https://en.wikipedia.org/wiki/ReDoS + +This crate differs from most (but not all) other regex engines in that it +doesn't use unbounded backtracking to run a regex search. In those cases, +one generally cannot use untrusted patterns *or* untrusted haystacks because +it can be very difficult to know whether a particular pattern will result in +catastrophic backtracking or not. + +We'll first discuss how this crate deals with untrusted inputs and then wrap +it up with a realistic discussion about what practice really looks like. + +### Panics + +Outside of clearly documented cases, most APIs in this crate are intended to +never panic regardless of the inputs given to them. For example, `Regex::new`, +`Regex::is_match`, `Regex::find` and `Regex::captures` should never panic. That +is, it is an API promise that those APIs will never panic no matter what inputs +are given to them. With that said, regex engines are complicated beasts, and +providing a rock solid guarantee that these APIs literally never panic is +essentially equivalent to saying, "there are no bugs in this library." That is +a bold claim, and not really one that can be feasibly made with a straight +face. + +Don't get the wrong impression here. This crate is extensively tested, not just +with unit and integration tests, but also via fuzz testing. For example, this +crate is part of the [OSS-fuzz project]. Panics should be incredibly rare, but +it is possible for bugs to exist, and thus possible for a panic to occur. If +you need a rock solid guarantee against panics, then you should wrap calls into +this library with [`std::panic::catch_unwind`]. + +It's also worth pointing out that this library will *generally* panic when +other regex engines would commit undefined behavior. When undefined behavior +occurs, your program might continue as if nothing bad has happened, but it also +might mean your program is open to the worst kinds of exploits. In contrast, +the worst thing a panic can do is a denial of service. + +[OSS-fuzz project]: https://android.googlesource.com/platform/external/oss-fuzz/+/refs/tags/android-t-preview-1/projects/rust-regex/ +[`std::panic::catch_unwind`]: https://doc.rust-lang.org/std/panic/fn.catch_unwind.html + +### Untrusted patterns + +The principal way this crate deals with them is by limiting their size by +default. The size limit can be configured via [`RegexBuilder::size_limit`]. The +idea of a size limit is that compiling a pattern into a `Regex` will fail if it +becomes "too big." Namely, while *most* resources consumed by compiling a regex +are approximately proportional (albeit with some high constant factors in some +cases, such as with Unicode character classes) to the length of the pattern +itself, there is one particular exception to this: counted repetitions. Namely, +this pattern: + +```text +a{5}{5}{5}{5}{5}{5} +``` + +Is equivalent to this pattern: + +```text +a{15625} +``` + +In both of these cases, the actual pattern string is quite small, but the +resulting `Regex` value is quite large. Indeed, as the first pattern shows, +it isn't enough to locally limit the size of each repetition because they can +be stacked in a way that results in exponential growth. + +To provide a bit more context, a simplified view of regex compilation looks +like this: + +* The pattern string is parsed into a structured representation called an AST. +Counted repetitions are not expanded and Unicode character classes are not +looked up in this stage. That is, the size of the AST is proportional to the +size of the pattern with "reasonable" constant factors. In other words, one +can reasonably limit the memory used by an AST by limiting the length of the +pattern string. +* The AST is translated into an HIR. Counted repetitions are still *not* +expanded at this stage, but Unicode character classes are embedded into the +HIR. The memory usage of a HIR is still proportional to the length of the +original pattern string, but the constant factors---mostly as a result of +Unicode character classes---can be quite high. Still though, the memory used by +an HIR can be reasonably limited by limiting the length of the pattern string. +* The HIR is compiled into a [Thompson NFA]. This is the stage at which +something like `\w{5}` is rewritten to `\w\w\w\w\w`. Thus, this is the stage +at which [`RegexBuilder::size_limit`] is enforced. If the NFA exceeds the +configured size, then this stage will fail. + +[Thompson NFA]: https://en.wikipedia.org/wiki/Thompson%27s_construction + +The size limit helps avoid two different kinds of exorbitant resource usage: + +* It avoids permitting exponential memory usage based on the size of the +pattern string. +* It avoids long search times. This will be discussed in more detail in the +next section, but worst case search time *is* dependent on the size of the +regex. So keeping regexes limited to a reasonable size is also a way of keeping +search times reasonable. + +Finally, it's worth pointing out that regex compilation is guaranteed to take +worst case `O(m)` time, where `m` is proportional to the size of regex. The +size of the regex here is *after* the counted repetitions have been expanded. + +**Advice for those using untrusted regexes**: limit the pattern length to +something small and expand it as needed. Configure [`RegexBuilder::size_limit`] +to something small and then expand it as needed. + +### Untrusted haystacks + +The main way this crate guards against searches from taking a long time is by +using algorithms that guarantee a `O(m * n)` worst case time and space bound. +Namely: + +* `m` is proportional to the size of the regex, where the size of the regex +includes the expansion of all counted repetitions. (See the previous section on +untrusted patterns.) +* `n` is proportional to the length, in bytes, of the haystack. + +In other words, if you consider `m` to be a constant (for example, the regex +pattern is a literal in the source code), then the search can be said to run +in "linear time." Or equivalently, "linear time with respect to the size of the +haystack." + +But the `m` factor here is important not to ignore. If a regex is +particularly big, the search times can get quite slow. This is why, in part, +[`RegexBuilder::size_limit`] exists. + +**Advice for those searching untrusted haystacks**: As long as your regexes +are not enormous, you should expect to be able to search untrusted haystacks +without fear. If you aren't sure, you should benchmark it. Unlike backtracking +engines, if your regex is so big that it's likely to result in slow searches, +this is probably something you'll be able to observe regardless of what the +haystack is made up of. + +### Iterating over matches + +One thing that is perhaps easy to miss is that the worst case time +complexity bound of `O(m * n)` applies to methods like [`Regex::is_match`], +[`Regex::find`] and [`Regex::captures`]. It does **not** apply to +[`Regex::find_iter`] or [`Regex::captures_iter`]. Namely, since iterating over +all matches can execute many searches, and each search can scan the entire +haystack, the worst case time complexity for iterators is `O(m * n^2)`. + +One example of where this occurs is when a pattern consists of an alternation, +where an earlier branch of the alternation requires scanning the entire +haystack only to discover that there is no match. It also requires a later +branch of the alternation to have matched at the beginning of the search. For +example, consider the pattern `.*[^A-Z]|[A-Z]` and the haystack `AAAAA`. The +first search will scan to the end looking for matches of `.*[^A-Z]` even though +a finite automata engine (as in this crate) knows that `[A-Z]` has already +matched the first character of the haystack. This is due to the greedy nature +of regex searching. That first search will report a match at the first `A` only +after scanning to the end to discover that no other match exists. The next +search then begins at the second `A` and the behavior repeats. + +There is no way to avoid this. This means that if both patterns and haystacks +are untrusted and you're iterating over all matches, you're susceptible to +worst case quadratic time complexity. One possible way to mitigate this +is to drop down to the lower level `regex-automata` crate and use its +`meta::Regex` iterator APIs. There, you can configure the search to operate +in "earliest" mode by passing a `Input::new(haystack).earliest(true)` to +`meta::Regex::find_iter` (for example). By enabling this mode, you give up +the normal greedy match semantics of regex searches and instead ask the regex +engine to immediately stop as soon as a match has been found. Enabling this +mode will thus restore the worst case `O(m * n)` time complexity bound, but at +the cost of different semantics. + +### Untrusted inputs in practice + +While providing a `O(m * n)` worst case time bound on all searches goes a long +way toward preventing [ReDoS], that doesn't mean every search you can possibly +run will complete without burning CPU time. In general, there are a few ways +for the `m * n` time bound to still bite you: + +* You are searching an exceptionally long haystack. No matter how you slice +it, a longer haystack will take more time to search. This crate may often make +very quick work of even long haystacks because of its literal optimizations, +but those aren't available for all regexes. +* Unicode character classes can cause searches to be quite slow in some cases. +This is especially true when they are combined with counted repetitions. While +the regex size limit above will protect you from the most egregious cases, +the default size limit still permits pretty big regexes that can execute more +slowly than one might expect. +* While routines like [`Regex::find`] and [`Regex::captures`] guarantee +worst case `O(m * n)` search time, routines like [`Regex::find_iter`] and +[`Regex::captures_iter`] actually have worst case `O(m * n^2)` search time. +This is because `find_iter` runs many searches, and each search takes worst +case `O(m * n)` time. Thus, iteration of all matches in a haystack has +worst case `O(m * n^2)`. A good example of a pattern that exhibits this is +`(?:A+){1000}|` or even `.*[^A-Z]|[A-Z]`. + +In general, unstrusted haystacks are easier to stomach than untrusted patterns. +Untrusted patterns give a lot more control to the caller to impact the +performance of a search. In many cases, a regex search will actually execute in +average case `O(n)` time (i.e., not dependent on the size of the regex), but +this can't be guaranteed in general. Therefore, permitting untrusted patterns +means that your only line of defense is to put a limit on how big `m` (and +perhaps also `n`) can be in `O(m * n)`. `n` is limited by simply inspecting +the length of the haystack while `m` is limited by *both* applying a limit to +the length of the pattern *and* a limit on the compiled size of the regex via +[`RegexBuilder::size_limit`]. + +It bears repeating: if you're accepting untrusted patterns, it would be a good +idea to start with conservative limits on `m` and `n`, and then carefully +increase them as needed. + # Crate features By default, this crate tries pretty hard to make regex matching both as fast -as possible and as correct as it can be, within reason. This means that there -is a lot of code dedicated to performance, the handling of Unicode data and the -Unicode data itself. Overall, this leads to more dependencies, larger binaries -and longer compile times. This trade off may not be appropriate in all cases, -and indeed, even when all Unicode and performance features are disabled, one -is still left with a perfectly serviceable regex engine that will work well -in many cases. +as possible and as correct as it can be. This means that there is a lot of +code dedicated to performance, the handling of Unicode data and the Unicode +data itself. Overall, this leads to more dependencies, larger binaries and +longer compile times. This trade off may not be appropriate in all cases, and +indeed, even when all Unicode and performance features are disabled, one is +still left with a perfectly serviceable regex engine that will work well in +many cases. (Note that code is not arbitrarily reducible, and for this reason, +the [`regex-lite`](https://docs.rs/regex-lite) crate exists to provide an even +more minimal experience by cutting out Unicode and performance, but still +maintaining the linear search time bound.) This crate exposes a number of features for controlling that trade off. Some of these features are strictly performance oriented, such that disabling them @@ -530,32 +1156,61 @@ Other features, such as the ones controlling the presence or absence of Unicode data, can result in a loss of functionality. For example, if one disables the `unicode-case` feature (described below), then compiling the regex `(?i)a` will fail since Unicode case insensitivity is enabled by default. Instead, -callers must use `(?i-u)a` instead to disable Unicode case folding. Stated -differently, enabling or disabling any of the features below can only add or -subtract from the total set of valid regular expressions. Enabling or disabling -a feature will never modify the match semantics of a regular expression. +callers must use `(?i-u)a` to disable Unicode case folding. Stated differently, +enabling or disabling any of the features below can only add or subtract from +the total set of valid regular expressions. Enabling or disabling a feature +will never modify the match semantics of a regular expression. -All features below are enabled by default. +Most features below are enabled by default. Features that aren't enabled by +default are noted. ### Ecosystem features * **std** - - When enabled, this will cause `regex` to use the standard library. Currently, - disabling this feature will always result in a compilation error. It is - intended to add `alloc`-only support to regex in the future. + When enabled, this will cause `regex` to use the standard library. In terms + of APIs, `std` causes error types to implement the `std::error::Error` + trait. Enabling `std` will also result in performance optimizations, + including SIMD and faster synchronization primitives. Notably, **disabling + the `std` feature will result in the use of spin locks**. To use a regex + engine without `std` and without spin locks, you'll need to drop down to + the [`regex-automata`](https://docs.rs/regex-automata) crate. +* **logging** - + When enabled, the `log` crate is used to emit messages about regex + compilation and search strategies. This is **disabled by default**. This is + typically only useful to someone working on this crate's internals, but might + be useful if you're doing some rabbit hole performance hacking. Or if you're + just interested in the kinds of decisions being made by the regex engine. ### Performance features * **perf** - - Enables all performance related features. This feature is enabled by default - and will always cover all features that improve performance, even if more - are added in the future. + Enables all performance related features except for `perf-dfa-full`. This + feature is enabled by default is intended to cover all reasonable features + that improve performance, even if more are added in the future. * **perf-dfa** - Enables the use of a lazy DFA for matching. The lazy DFA is used to compile portions of a regex to a very fast DFA on an as-needed basis. This can result in substantial speedups, usually by an order of magnitude on large haystacks. The lazy DFA does not bring in any new dependencies, but it can make compile times longer. +* **perf-dfa-full** - + Enables the use of a full DFA for matching. Full DFAs are problematic because + they have worst case `O(2^n)` construction time. For this reason, when this + feature is enabled, full DFAs are only used for very small regexes and a + very small space bound is used during determinization to avoid the DFA + from blowing up. This feature is not enabled by default, even as part of + `perf`, because it results in fairly sizeable increases in binary size and + compilation time. It can result in faster search times, but they tend to be + more modest and limited to non-Unicode regexes. +* **perf-onepass** - + Enables the use of a one-pass DFA for extracting the positions of capture + groups. This optimization applies to a subset of certain types of NFAs and + represents the fastest engine in this crate for dealing with capture groups. +* **perf-backtrack** - + Enables the use of a bounded backtracking algorithm for extracting the + positions of capture groups. This usually sits between the slowest engine + (the PikeVM) and the fastest engine (one-pass DFA) for extracting capture + groups. It's used whenever the regex is not one-pass and is small enough. * **perf-inline** - Enables the use of aggressive inlining inside match routines. This reduces the overhead of each match. The aggressive inlining, however, increases @@ -609,193 +1264,83 @@ All features below are enabled by default. This enables using classes like `\p{gcb=Extend}`, `\p{wb=Katakana}` and `\p{sb=ATerm}`. - -# Untrusted input - -This crate can handle both untrusted regular expressions and untrusted -search text. - -Untrusted regular expressions are handled by capping the size of a compiled -regular expression. -(See [`RegexBuilder::size_limit`](struct.RegexBuilder.html#method.size_limit).) -Without this, it would be trivial for an attacker to exhaust your system's -memory with expressions like `a{100}{100}{100}`. - -Untrusted search text is allowed because the matching engine(s) in this -crate have time complexity `O(mn)` (with `m ~ regex` and `n ~ search -text`), which means there's no way to cause exponential blow-up like with -some other regular expression engines. (We pay for this by disallowing -features like arbitrary look-ahead and backreferences.) - -When a DFA is used, pathological cases with exponential state blow-up are -avoided by constructing the DFA lazily or in an "online" manner. Therefore, -at most one new state can be created for each byte of input. This satisfies -our time complexity guarantees, but can lead to memory growth -proportional to the size of the input. As a stopgap, the DFA is only -allowed to store a fixed number of states. When the limit is reached, its -states are wiped and continues on, possibly duplicating previous work. If -the limit is reached too frequently, it gives up and hands control off to -another matching engine with fixed memory requirements. -(The DFA size limit can also be tweaked. See -[`RegexBuilder::dfa_size_limit`](struct.RegexBuilder.html#method.dfa_size_limit).) +# Other crates + +This crate has two required dependencies and several optional dependencies. +This section briefly describes them with the goal of raising awareness of how +different components of this crate may be used independently. + +It is somewhat unusual for a regex engine to have dependencies, as most regex +libraries are self contained units with no dependencies other than a particular +environment's standard library. Indeed, for other similarly optimized regex +engines, most or all of the code in the dependencies of this crate would +normally just be unseparable or coupled parts of the crate itself. But since +Rust and its tooling ecosystem make the use of dependencies so easy, it made +sense to spend some effort de-coupling parts of this crate and making them +independently useful. + +We only briefly describe each crate here. + +* [`regex-lite`](https://docs.rs/regex-lite) is not a dependency of `regex`, +but rather, a standalone zero-dependency simpler version of `regex` that +prioritizes compile times and binary size. In exchange, it eschews Unicode +support and performance. Its match semantics are as identical as possible to +the `regex` crate, and for the things it supports, its APIs are identical to +the APIs in this crate. In other words, for a lot of use cases, it is a drop-in +replacement. +* [`regex-syntax`](https://docs.rs/regex-syntax) provides a regular expression +parser via `Ast` and `Hir` types. It also provides routines for extracting +literals from a pattern. Folks can use this crate to do analysis, or even to +build their own regex engine without having to worry about writing a parser. +* [`regex-automata`](https://docs.rs/regex-automata) provides the regex engines +themselves. One of the downsides of finite automata based regex engines is that +they often need multiple internal engines in order to have similar or better +performance than an unbounded backtracking engine in practice. `regex-automata` +in particular provides public APIs for a PikeVM, a bounded backtracker, a +one-pass DFA, a lazy DFA, a fully compiled DFA and a meta regex engine that +combines all them together. It also has native multi-pattern support and +provides a way to compile and serialize full DFAs such that they can be loaded +and searched in a no-std no-alloc environment. `regex-automata` itself doesn't +even have a required dependency on `regex-syntax`! +* [`memchr`](https://docs.rs/memchr) provides low level SIMD vectorized +routines for quickly finding the location of single bytes or even substrings +in a haystack. In other words, it provides fast `memchr` and `memmem` routines. +These are used by this crate in literal optimizations. +* [`aho-corasick`](https://docs.rs/aho-corasick) provides multi-substring +search. It also provides SIMD vectorized routines in the case where the number +of substrings to search for is relatively small. The `regex` crate also uses +this for literal optimizations. */ +#![no_std] #![deny(missing_docs)] #![cfg_attr(feature = "pattern", feature(pattern))] #![warn(missing_debug_implementations)] -#[cfg(not(feature = "std"))] -compile_error!("`std` feature is currently required to build this crate"); +#[cfg(doctest)] +doc_comment::doctest!("../README.md"); -// To check README's example -// TODO: Re-enable this once the MSRV is 1.43 or greater. -// See: https://github.com/rust-lang/regex/issues/684 -// See: https://github.com/rust-lang/regex/issues/685 -// #[cfg(doctest)] -// doc_comment::doctest!("../README.md"); +extern crate alloc; +#[cfg(any(test, feature = "std"))] +extern crate std; -#[cfg(feature = "std")] pub use crate::error::Error; -#[cfg(feature = "std")] -pub use crate::re_builder::set_unicode::*; -#[cfg(feature = "std")] -pub use crate::re_builder::unicode::*; -#[cfg(feature = "std")] -pub use crate::re_set::unicode::*; -#[cfg(feature = "std")] -pub use crate::re_unicode::{ - escape, CaptureLocations, CaptureMatches, CaptureNames, Captures, - Locations, Match, Matches, NoExpand, Regex, Replacer, ReplacerRef, Split, - SplitN, SubCaptureMatches, -}; - -/** -Match regular expressions on arbitrary bytes. - -This module provides a nearly identical API to the one found in the -top-level of this crate. There are two important differences: - -1. Matching is done on `&[u8]` instead of `&str`. Additionally, `Vec` -is used where `String` would have been used. -2. Unicode support can be disabled even when disabling it would result in -matching invalid UTF-8 bytes. - -# Example: match null terminated string - -This shows how to find all null-terminated strings in a slice of bytes: -```rust -# use regex::bytes::Regex; -let re = Regex::new(r"(?-u)(?P[^\x00]+)\x00").unwrap(); -let text = b"foo\x00bar\x00baz\x00"; - -// Extract all of the strings without the null terminator from each match. -// The unwrap is OK here since a match requires the `cstr` capture to match. -let cstrs: Vec<&[u8]> = - re.captures_iter(text) - .map(|c| c.name("cstr").unwrap().as_bytes()) - .collect(); -assert_eq!(vec![&b"foo"[..], &b"bar"[..], &b"baz"[..]], cstrs); -``` - -# Example: selectively enable Unicode support - -This shows how to match an arbitrary byte pattern followed by a UTF-8 encoded -string (e.g., to extract a title from a Matroska file): - -```rust -# use std::str; -# use regex::bytes::Regex; -let re = Regex::new( - r"(?-u)\x7b\xa9(?:[\x80-\xfe]|[\x40-\xff].)(?u:(.*))" -).unwrap(); -let text = b"\x12\xd0\x3b\x5f\x7b\xa9\x85\xe2\x98\x83\x80\x98\x54\x76\x68\x65"; -let caps = re.captures(text).unwrap(); - -// Notice that despite the `.*` at the end, it will only match valid UTF-8 -// because Unicode mode was enabled with the `u` flag. Without the `u` flag, -// the `.*` would match the rest of the bytes. -let mat = caps.get(1).unwrap(); -assert_eq!((7, 10), (mat.start(), mat.end())); - -// If there was a match, Unicode mode guarantees that `title` is valid UTF-8. -let title = str::from_utf8(&caps[1]).unwrap(); -assert_eq!("☃", title); -``` - -In general, if the Unicode flag is enabled in a capture group and that capture -is part of the overall match, then the capture is *guaranteed* to be valid -UTF-8. - -# Syntax - -The supported syntax is pretty much the same as the syntax for Unicode -regular expressions with a few changes that make sense for matching arbitrary -bytes: - -1. The `u` flag can be disabled even when disabling it might cause the regex to -match invalid UTF-8. When the `u` flag is disabled, the regex is said to be in -"ASCII compatible" mode. -2. In ASCII compatible mode, neither Unicode scalar values nor Unicode -character classes are allowed. -3. In ASCII compatible mode, Perl character classes (`\w`, `\d` and `\s`) -revert to their typical ASCII definition. `\w` maps to `[[:word:]]`, `\d` maps -to `[[:digit:]]` and `\s` maps to `[[:space:]]`. -4. In ASCII compatible mode, word boundaries use the ASCII compatible `\w` to -determine whether a byte is a word byte or not. -5. Hexadecimal notation can be used to specify arbitrary bytes instead of -Unicode codepoints. For example, in ASCII compatible mode, `\xFF` matches the -literal byte `\xFF`, while in Unicode mode, `\xFF` is a Unicode codepoint that -matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal notation when -enabled. -6. In ASCII compatible mode, `.` matches any *byte* except for `\n`. When the -`s` flag is additionally enabled, `.` matches any byte. - -# Performance - -In general, one should expect performance on `&[u8]` to be roughly similar to -performance on `&str`. -*/ -#[cfg(feature = "std")] -pub mod bytes { - pub use crate::re_builder::bytes::*; - pub use crate::re_builder::set_bytes::*; - pub use crate::re_bytes::*; - pub use crate::re_set::bytes::*; -} +pub use crate::{builders::string::*, regex::string::*, regexset::string::*}; -mod backtrack; -mod compile; -#[cfg(feature = "perf-dfa")] -mod dfa; +mod builders; +pub mod bytes; mod error; -mod exec; -mod expand; mod find_byte; -mod input; -mod literal; #[cfg(feature = "pattern")] mod pattern; -mod pikevm; -mod pool; -mod prog; -mod re_builder; -mod re_bytes; -mod re_set; -mod re_trait; -mod re_unicode; -mod sparse; -mod utf8; - -/// The `internal` module exists to support suspicious activity, such as -/// testing different matching engines and supporting the `regex-debug` CLI -/// utility. -#[doc(hidden)] -#[cfg(feature = "std")] -pub mod internal { - pub use crate::compile::Compiler; - pub use crate::exec::{Exec, ExecBuilder}; - pub use crate::input::{Char, CharInput, Input, InputAt}; - pub use crate::literal::LiteralSearcher; - pub use crate::prog::{EmptyLook, Inst, InstRanges, Program}; +mod regex; +mod regexset; + +/// Escapes all regular expression meta characters in `pattern`. +/// +/// The string returned may be safely used as a literal in a regular +/// expression. +pub fn escape(pattern: &str) -> alloc::string::String { + regex_syntax::escape(pattern) } diff --git a/vendor/regex/src/literal/imp.rs b/vendor/regex/src/literal/imp.rs deleted file mode 100644 index 75fa6e3..0000000 --- a/vendor/regex/src/literal/imp.rs +++ /dev/null @@ -1,413 +0,0 @@ -use std::mem; - -use aho_corasick::{self, packed, AhoCorasick}; -use memchr::{memchr, memchr2, memchr3, memmem}; -use regex_syntax::hir::literal::{Literal, Seq}; - -/// A prefix extracted from a compiled regular expression. -/// -/// A regex prefix is a set of literal strings that *must* be matched at the -/// beginning of a regex in order for the entire regex to match. Similarly -/// for a regex suffix. -#[derive(Clone, Debug)] -pub struct LiteralSearcher { - complete: bool, - lcp: Memmem, - lcs: Memmem, - matcher: Matcher, -} - -#[derive(Clone, Debug)] -enum Matcher { - /// No literals. (Never advances through the input.) - Empty, - /// A set of four or more single byte literals. - Bytes(SingleByteSet), - /// A single substring, using vector accelerated routines when available. - Memmem(Memmem), - /// An Aho-Corasick automaton. - AC { ac: AhoCorasick, lits: Vec }, - /// A packed multiple substring searcher, using SIMD. - /// - /// Note that Aho-Corasick will actually use this packed searcher - /// internally automatically, however, there is some overhead associated - /// with going through the Aho-Corasick machinery. So using the packed - /// searcher directly results in some gains. - Packed { s: packed::Searcher, lits: Vec }, -} - -impl LiteralSearcher { - /// Returns a matcher that never matches and never advances the input. - pub fn empty() -> Self { - Self::new(Seq::infinite(), Matcher::Empty) - } - - /// Returns a matcher for literal prefixes from the given set. - pub fn prefixes(lits: Seq) -> Self { - let matcher = Matcher::prefixes(&lits); - Self::new(lits, matcher) - } - - /// Returns a matcher for literal suffixes from the given set. - pub fn suffixes(lits: Seq) -> Self { - let matcher = Matcher::suffixes(&lits); - Self::new(lits, matcher) - } - - fn new(lits: Seq, matcher: Matcher) -> Self { - LiteralSearcher { - complete: lits.is_exact(), - lcp: Memmem::new(lits.longest_common_prefix().unwrap_or(b"")), - lcs: Memmem::new(lits.longest_common_suffix().unwrap_or(b"")), - matcher, - } - } - - /// Returns true if all matches comprise the entire regular expression. - /// - /// This does not necessarily mean that a literal match implies a match - /// of the regular expression. For example, the regular expression `^a` - /// is comprised of a single complete literal `a`, but the regular - /// expression demands that it only match at the beginning of a string. - pub fn complete(&self) -> bool { - self.complete && !self.is_empty() - } - - /// Find the position of a literal in `haystack` if it exists. - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn find(&self, haystack: &[u8]) -> Option<(usize, usize)> { - use self::Matcher::*; - match self.matcher { - Empty => Some((0, 0)), - Bytes(ref sset) => sset.find(haystack).map(|i| (i, i + 1)), - Memmem(ref s) => s.find(haystack).map(|i| (i, i + s.len())), - AC { ref ac, .. } => { - ac.find(haystack).map(|m| (m.start(), m.end())) - } - Packed { ref s, .. } => { - s.find(haystack).map(|m| (m.start(), m.end())) - } - } - } - - /// Like find, except matches must start at index `0`. - pub fn find_start(&self, haystack: &[u8]) -> Option<(usize, usize)> { - for lit in self.iter() { - if lit.len() > haystack.len() { - continue; - } - if lit == &haystack[0..lit.len()] { - return Some((0, lit.len())); - } - } - None - } - - /// Like find, except matches must end at index `haystack.len()`. - pub fn find_end(&self, haystack: &[u8]) -> Option<(usize, usize)> { - for lit in self.iter() { - if lit.len() > haystack.len() { - continue; - } - if lit == &haystack[haystack.len() - lit.len()..] { - return Some((haystack.len() - lit.len(), haystack.len())); - } - } - None - } - - /// Returns an iterator over all literals to be matched. - pub fn iter(&self) -> LiteralIter<'_> { - match self.matcher { - Matcher::Empty => LiteralIter::Empty, - Matcher::Bytes(ref sset) => LiteralIter::Bytes(&sset.dense), - Matcher::Memmem(ref s) => LiteralIter::Single(&s.finder.needle()), - Matcher::AC { ref lits, .. } => LiteralIter::AC(lits), - Matcher::Packed { ref lits, .. } => LiteralIter::Packed(lits), - } - } - - /// Returns a matcher for the longest common prefix of this matcher. - pub fn lcp(&self) -> &Memmem { - &self.lcp - } - - /// Returns a matcher for the longest common suffix of this matcher. - pub fn lcs(&self) -> &Memmem { - &self.lcs - } - - /// Returns true iff this prefix is empty. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Returns the number of prefixes in this machine. - pub fn len(&self) -> usize { - use self::Matcher::*; - match self.matcher { - Empty => 0, - Bytes(ref sset) => sset.dense.len(), - Memmem(_) => 1, - AC { ref ac, .. } => ac.patterns_len(), - Packed { ref lits, .. } => lits.len(), - } - } - - /// Return the approximate heap usage of literals in bytes. - pub fn approximate_size(&self) -> usize { - use self::Matcher::*; - match self.matcher { - Empty => 0, - Bytes(ref sset) => sset.approximate_size(), - Memmem(ref single) => single.approximate_size(), - AC { ref ac, .. } => ac.memory_usage(), - Packed { ref s, .. } => s.memory_usage(), - } - } -} - -impl Matcher { - fn prefixes(lits: &Seq) -> Self { - let sset = SingleByteSet::prefixes(lits); - Matcher::new(lits, sset) - } - - fn suffixes(lits: &Seq) -> Self { - let sset = SingleByteSet::suffixes(lits); - Matcher::new(lits, sset) - } - - fn new(lits: &Seq, sset: SingleByteSet) -> Self { - if lits.is_empty() || lits.min_literal_len() == Some(0) { - return Matcher::Empty; - } - let lits = match lits.literals() { - None => return Matcher::Empty, - Some(members) => members, - }; - if sset.dense.len() >= 26 { - // Avoid trying to match a large number of single bytes. - // This is *very* sensitive to a frequency analysis comparison - // between the bytes in sset and the composition of the haystack. - // No matter the size of sset, if its members all are rare in the - // haystack, then it'd be worth using it. How to tune this... IDK. - // ---AG - return Matcher::Empty; - } - if sset.complete { - return Matcher::Bytes(sset); - } - if lits.len() == 1 { - return Matcher::Memmem(Memmem::new(lits[0].as_bytes())); - } - - let pats: Vec<&[u8]> = lits.iter().map(|lit| lit.as_bytes()).collect(); - let is_aho_corasick_fast = sset.dense.len() <= 1 && sset.all_ascii; - if lits.len() <= 100 && !is_aho_corasick_fast { - let mut builder = packed::Config::new() - .match_kind(packed::MatchKind::LeftmostFirst) - .builder(); - if let Some(s) = builder.extend(&pats).build() { - return Matcher::Packed { s, lits: lits.to_owned() }; - } - } - let ac = AhoCorasick::builder() - .match_kind(aho_corasick::MatchKind::LeftmostFirst) - .kind(Some(aho_corasick::AhoCorasickKind::DFA)) - .build(&pats) - .unwrap(); - Matcher::AC { ac, lits: lits.to_owned() } - } -} - -#[derive(Debug)] -pub enum LiteralIter<'a> { - Empty, - Bytes(&'a [u8]), - Single(&'a [u8]), - AC(&'a [Literal]), - Packed(&'a [Literal]), -} - -impl<'a> Iterator for LiteralIter<'a> { - type Item = &'a [u8]; - - fn next(&mut self) -> Option { - match *self { - LiteralIter::Empty => None, - LiteralIter::Bytes(ref mut many) => { - if many.is_empty() { - None - } else { - let next = &many[0..1]; - *many = &many[1..]; - Some(next) - } - } - LiteralIter::Single(ref mut one) => { - if one.is_empty() { - None - } else { - let next = &one[..]; - *one = &[]; - Some(next) - } - } - LiteralIter::AC(ref mut lits) => { - if lits.is_empty() { - None - } else { - let next = &lits[0]; - *lits = &lits[1..]; - Some(next.as_bytes()) - } - } - LiteralIter::Packed(ref mut lits) => { - if lits.is_empty() { - None - } else { - let next = &lits[0]; - *lits = &lits[1..]; - Some(next.as_bytes()) - } - } - } - } -} - -#[derive(Clone, Debug)] -struct SingleByteSet { - sparse: Vec, - dense: Vec, - complete: bool, - all_ascii: bool, -} - -impl SingleByteSet { - fn new() -> SingleByteSet { - SingleByteSet { - sparse: vec![false; 256], - dense: vec![], - complete: true, - all_ascii: true, - } - } - - fn prefixes(lits: &Seq) -> SingleByteSet { - let mut sset = SingleByteSet::new(); - let lits = match lits.literals() { - None => return sset, - Some(lits) => lits, - }; - for lit in lits.iter() { - sset.complete = sset.complete && lit.len() == 1; - if let Some(&b) = lit.as_bytes().get(0) { - if !sset.sparse[b as usize] { - if b > 0x7F { - sset.all_ascii = false; - } - sset.dense.push(b); - sset.sparse[b as usize] = true; - } - } - } - sset - } - - fn suffixes(lits: &Seq) -> SingleByteSet { - let mut sset = SingleByteSet::new(); - let lits = match lits.literals() { - None => return sset, - Some(lits) => lits, - }; - for lit in lits.iter() { - sset.complete = sset.complete && lit.len() == 1; - if let Some(&b) = lit.as_bytes().last() { - if !sset.sparse[b as usize] { - if b > 0x7F { - sset.all_ascii = false; - } - sset.dense.push(b); - sset.sparse[b as usize] = true; - } - } - } - sset - } - - /// Faster find that special cases certain sizes to use memchr. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find(&self, text: &[u8]) -> Option { - match self.dense.len() { - 0 => None, - 1 => memchr(self.dense[0], text), - 2 => memchr2(self.dense[0], self.dense[1], text), - 3 => memchr3(self.dense[0], self.dense[1], self.dense[2], text), - _ => self._find(text), - } - } - - /// Generic find that works on any sized set. - fn _find(&self, haystack: &[u8]) -> Option { - for (i, &b) in haystack.iter().enumerate() { - if self.sparse[b as usize] { - return Some(i); - } - } - None - } - - fn approximate_size(&self) -> usize { - (self.dense.len() * mem::size_of::()) - + (self.sparse.len() * mem::size_of::()) - } -} - -/// A simple wrapper around the memchr crate's memmem implementation. -/// -/// The API this exposes mirrors the API of previous substring searchers that -/// this supplanted. -#[derive(Clone, Debug)] -pub struct Memmem { - finder: memmem::Finder<'static>, - char_len: usize, -} - -impl Memmem { - fn new(pat: &[u8]) -> Memmem { - Memmem { - finder: memmem::Finder::new(pat).into_owned(), - char_len: char_len_lossy(pat), - } - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn find(&self, haystack: &[u8]) -> Option { - self.finder.find(haystack) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn is_suffix(&self, text: &[u8]) -> bool { - if text.len() < self.len() { - return false; - } - &text[text.len() - self.len()..] == self.finder.needle() - } - - pub fn len(&self) -> usize { - self.finder.needle().len() - } - - pub fn char_len(&self) -> usize { - self.char_len - } - - fn approximate_size(&self) -> usize { - self.finder.needle().len() * mem::size_of::() - } -} - -fn char_len_lossy(bytes: &[u8]) -> usize { - String::from_utf8_lossy(bytes).chars().count() -} diff --git a/vendor/regex/src/literal/mod.rs b/vendor/regex/src/literal/mod.rs deleted file mode 100644 index b9fb77a..0000000 --- a/vendor/regex/src/literal/mod.rs +++ /dev/null @@ -1,55 +0,0 @@ -pub use self::imp::*; - -#[cfg(feature = "perf-literal")] -mod imp; - -#[allow(missing_docs)] -#[cfg(not(feature = "perf-literal"))] -mod imp { - use regex_syntax::hir::literal::Seq; - - #[derive(Clone, Debug)] - pub struct LiteralSearcher(()); - - impl LiteralSearcher { - pub fn empty() -> Self { - LiteralSearcher(()) - } - - pub fn prefixes(_: Seq) -> Self { - LiteralSearcher(()) - } - - pub fn suffixes(_: Seq) -> Self { - LiteralSearcher(()) - } - - pub fn complete(&self) -> bool { - false - } - - pub fn find(&self, _: &[u8]) -> Option<(usize, usize)> { - unreachable!() - } - - pub fn find_start(&self, _: &[u8]) -> Option<(usize, usize)> { - unreachable!() - } - - pub fn find_end(&self, _: &[u8]) -> Option<(usize, usize)> { - unreachable!() - } - - pub fn is_empty(&self) -> bool { - true - } - - pub fn len(&self) -> usize { - 0 - } - - pub fn approximate_size(&self) -> usize { - 0 - } - } -} diff --git a/vendor/regex/src/pattern.rs b/vendor/regex/src/pattern.rs index 00549e5..2db04d8 100644 --- a/vendor/regex/src/pattern.rs +++ b/vendor/regex/src/pattern.rs @@ -1,6 +1,6 @@ -use std::str::pattern::{Pattern, SearchStep, Searcher}; +use core::str::pattern::{Pattern, SearchStep, Searcher}; -use crate::re_unicode::{Matches, Regex}; +use crate::{Matches, Regex}; #[derive(Debug)] pub struct RegexSearcher<'r, 't> { diff --git a/vendor/regex/src/pikevm.rs b/vendor/regex/src/pikevm.rs deleted file mode 100644 index 8c9eac2..0000000 --- a/vendor/regex/src/pikevm.rs +++ /dev/null @@ -1,360 +0,0 @@ -// This module implements the Pike VM. That is, it guarantees linear time -// search of a regex on any text with memory use proportional to the size of -// the regex. -// -// It is equal in power to the backtracking engine in this crate, except the -// backtracking engine is typically faster on small regexes/texts at the -// expense of a bigger memory footprint. -// -// It can do more than the DFA can (specifically, record capture locations -// and execute Unicode word boundary assertions), but at a slower speed. -// Specifically, the Pike VM executes a DFA implicitly by repeatedly expanding -// epsilon transitions. That is, the Pike VM engine can be in multiple states -// at once where as the DFA is only ever in one state at a time. -// -// Therefore, the Pike VM is generally treated as the fallback when the other -// matching engines either aren't feasible to run or are insufficient. - -use std::mem; - -use crate::exec::ProgramCache; -use crate::input::{Input, InputAt}; -use crate::prog::{InstPtr, Program}; -use crate::re_trait::Slot; -use crate::sparse::SparseSet; - -/// An NFA simulation matching engine. -#[derive(Debug)] -pub struct Fsm<'r, I> { - /// The sequence of opcodes (among other things) that is actually executed. - /// - /// The program may be byte oriented or Unicode codepoint oriented. - prog: &'r Program, - /// An explicit stack used for following epsilon transitions. (This is - /// borrowed from the cache.) - stack: &'r mut Vec, - /// The input to search. - input: I, -} - -/// A cached allocation that can be reused on each execution. -#[derive(Clone, Debug)] -pub struct Cache { - /// A pair of ordered sets for tracking NFA states. - clist: Threads, - nlist: Threads, - /// An explicit stack used for following epsilon transitions. - stack: Vec, -} - -/// An ordered set of NFA states and their captures. -#[derive(Clone, Debug)] -struct Threads { - /// An ordered set of opcodes (each opcode is an NFA state). - set: SparseSet, - /// Captures for every NFA state. - /// - /// It is stored in row-major order, where the columns are the capture - /// slots and the rows are the states. - caps: Vec, - /// The number of capture slots stored per thread. (Every capture has - /// two slots.) - slots_per_thread: usize, -} - -/// A representation of an explicit stack frame when following epsilon -/// transitions. This is used to avoid recursion. -#[derive(Clone, Debug)] -enum FollowEpsilon { - /// Follow transitions at the given instruction pointer. - IP(InstPtr), - /// Restore the capture slot with the given position in the input. - Capture { slot: usize, pos: Slot }, -} - -impl Cache { - /// Create a new allocation used by the NFA machine to record execution - /// and captures. - pub fn new(_prog: &Program) -> Self { - Cache { clist: Threads::new(), nlist: Threads::new(), stack: vec![] } - } -} - -impl<'r, I: Input> Fsm<'r, I> { - /// Execute the NFA matching engine. - /// - /// If there's a match, `exec` returns `true` and populates the given - /// captures accordingly. - pub fn exec( - prog: &'r Program, - cache: &ProgramCache, - matches: &mut [bool], - slots: &mut [Slot], - quit_after_match: bool, - input: I, - start: usize, - end: usize, - ) -> bool { - let mut cache = cache.borrow_mut(); - let cache = &mut cache.pikevm; - cache.clist.resize(prog.len(), prog.captures.len()); - cache.nlist.resize(prog.len(), prog.captures.len()); - let at = input.at(start); - Fsm { prog, stack: &mut cache.stack, input }.exec_( - &mut cache.clist, - &mut cache.nlist, - matches, - slots, - quit_after_match, - at, - end, - ) - } - - fn exec_( - &mut self, - mut clist: &mut Threads, - mut nlist: &mut Threads, - matches: &mut [bool], - slots: &mut [Slot], - quit_after_match: bool, - mut at: InputAt, - end: usize, - ) -> bool { - let mut matched = false; - let mut all_matched = false; - clist.set.clear(); - nlist.set.clear(); - 'LOOP: loop { - if clist.set.is_empty() { - // Three ways to bail out when our current set of threads is - // empty. - // - // 1. We have a match---so we're done exploring any possible - // alternatives. Time to quit. (We can't do this if we're - // looking for matches for multiple regexes, unless we know - // they all matched.) - // - // 2. If the expression starts with a '^' we can terminate as - // soon as the last thread dies. - if (matched && matches.len() <= 1) - || all_matched - || (!at.is_start() && self.prog.is_anchored_start) - { - break; - } - - // 3. If there's a literal prefix for the program, try to - // jump ahead quickly. If it can't be found, then we can - // bail out early. - if !self.prog.prefixes.is_empty() { - at = match self.input.prefix_at(&self.prog.prefixes, at) { - None => break, - Some(at) => at, - }; - } - } - - // This simulates a preceding '.*?' for every regex by adding - // a state starting at the current position in the input for the - // beginning of the program only if we don't already have a match. - if clist.set.is_empty() - || (!self.prog.is_anchored_start && !all_matched) - { - self.add(&mut clist, slots, 0, at); - } - // The previous call to "add" actually inspects the position just - // before the current character. For stepping through the machine, - // we can to look at the current character, so we advance the - // input. - let at_next = self.input.at(at.next_pos()); - for i in 0..clist.set.len() { - let ip = clist.set[i]; - if self.step( - &mut nlist, - matches, - slots, - clist.caps(ip), - ip, - at, - at_next, - ) { - matched = true; - all_matched = all_matched || matches.iter().all(|&b| b); - if quit_after_match { - // If we only care if a match occurs (not its - // position), then we can quit right now. - break 'LOOP; - } - if self.prog.matches.len() == 1 { - // We don't need to check the rest of the threads - // in this set because we've matched something - // ("leftmost-first"). However, we still need to check - // threads in the next set to support things like - // greedy matching. - // - // This is only true on normal regexes. For regex sets, - // we need to mush on to observe other matches. - break; - } - } - } - if at.pos() >= end { - break; - } - at = at_next; - mem::swap(clist, nlist); - nlist.set.clear(); - } - matched - } - - /// Step through the input, one token (byte or codepoint) at a time. - /// - /// nlist is the set of states that will be processed on the next token - /// in the input. - /// - /// caps is the set of captures passed by the caller of the NFA. They are - /// written to only when a match state is visited. - /// - /// thread_caps is the set of captures set for the current NFA state, ip. - /// - /// at and at_next are the current and next positions in the input. at or - /// at_next may be EOF. - fn step( - &mut self, - nlist: &mut Threads, - matches: &mut [bool], - slots: &mut [Slot], - thread_caps: &mut [Option], - ip: usize, - at: InputAt, - at_next: InputAt, - ) -> bool { - use crate::prog::Inst::*; - match self.prog[ip] { - Match(match_slot) => { - if match_slot < matches.len() { - matches[match_slot] = true; - } - for (slot, val) in slots.iter_mut().zip(thread_caps.iter()) { - *slot = *val; - } - true - } - Char(ref inst) => { - if inst.c == at.char() { - self.add(nlist, thread_caps, inst.goto, at_next); - } - false - } - Ranges(ref inst) => { - if inst.matches(at.char()) { - self.add(nlist, thread_caps, inst.goto, at_next); - } - false - } - Bytes(ref inst) => { - if let Some(b) = at.byte() { - if inst.matches(b) { - self.add(nlist, thread_caps, inst.goto, at_next); - } - } - false - } - EmptyLook(_) | Save(_) | Split(_) => false, - } - } - - /// Follows epsilon transitions and adds them for processing to nlist, - /// starting at and including ip. - fn add( - &mut self, - nlist: &mut Threads, - thread_caps: &mut [Option], - ip: usize, - at: InputAt, - ) { - self.stack.push(FollowEpsilon::IP(ip)); - while let Some(frame) = self.stack.pop() { - match frame { - FollowEpsilon::IP(ip) => { - self.add_step(nlist, thread_caps, ip, at); - } - FollowEpsilon::Capture { slot, pos } => { - thread_caps[slot] = pos; - } - } - } - } - - /// A helper function for add that avoids excessive pushing to the stack. - fn add_step( - &mut self, - nlist: &mut Threads, - thread_caps: &mut [Option], - mut ip: usize, - at: InputAt, - ) { - // Instead of pushing and popping to the stack, we mutate ip as we - // traverse the set of states. We only push to the stack when we - // absolutely need recursion (restoring captures or following a - // branch). - use crate::prog::Inst::*; - loop { - // Don't visit states we've already added. - if nlist.set.contains(ip) { - return; - } - nlist.set.insert(ip); - match self.prog[ip] { - EmptyLook(ref inst) => { - if self.input.is_empty_match(at, inst) { - ip = inst.goto; - } - } - Save(ref inst) => { - if inst.slot < thread_caps.len() { - self.stack.push(FollowEpsilon::Capture { - slot: inst.slot, - pos: thread_caps[inst.slot], - }); - thread_caps[inst.slot] = Some(at.pos()); - } - ip = inst.goto; - } - Split(ref inst) => { - self.stack.push(FollowEpsilon::IP(inst.goto2)); - ip = inst.goto1; - } - Match(_) | Char(_) | Ranges(_) | Bytes(_) => { - let t = &mut nlist.caps(ip); - for (slot, val) in t.iter_mut().zip(thread_caps.iter()) { - *slot = *val; - } - return; - } - } - } - } -} - -impl Threads { - fn new() -> Self { - Threads { set: SparseSet::new(0), caps: vec![], slots_per_thread: 0 } - } - - fn resize(&mut self, num_insts: usize, ncaps: usize) { - if num_insts == self.set.capacity() { - return; - } - self.slots_per_thread = ncaps * 2; - self.set = SparseSet::new(num_insts); - self.caps = vec![None; self.slots_per_thread * num_insts]; - } - - fn caps(&mut self, pc: usize) -> &mut [Option] { - let i = pc * self.slots_per_thread; - &mut self.caps[i..i + self.slots_per_thread] - } -} diff --git a/vendor/regex/src/pool.rs b/vendor/regex/src/pool.rs deleted file mode 100644 index 6a6f15b..0000000 --- a/vendor/regex/src/pool.rs +++ /dev/null @@ -1,333 +0,0 @@ -// This module provides a relatively simple thread-safe pool of reusable -// objects. For the most part, it's implemented by a stack represented by a -// Mutex>. It has one small trick: because unlocking a mutex is somewhat -// costly, in the case where a pool is accessed by the first thread that tried -// to get a value, we bypass the mutex. Here are some benchmarks showing the -// difference. -// -// 1) misc::anchored_literal_long_non_match 21 (18571 MB/s) -// 2) misc::anchored_literal_long_non_match 107 (3644 MB/s) -// 3) misc::anchored_literal_long_non_match 45 (8666 MB/s) -// 4) misc::anchored_literal_long_non_match 19 (20526 MB/s) -// -// (1) represents our baseline: the master branch at the time of writing when -// using the 'thread_local' crate to implement the pool below. -// -// (2) represents a naive pool implemented completely via Mutex>. There -// is no special trick for bypassing the mutex. -// -// (3) is the same as (2), except it uses Mutex>>. It is twice as -// fast because a Box is much smaller than the T we use with a Pool in this -// crate. So pushing and popping a Box from a Vec is quite a bit faster -// than for T. -// -// (4) is the same as (3), but with the trick for bypassing the mutex in the -// case of the first-to-get thread. -// -// Why move off of thread_local? Even though (4) is a hair faster than (1) -// above, this was not the main goal. The main goal was to move off of -// thread_local and find a way to *simply* re-capture some of its speed for -// regex's specific case. So again, why move off of it? The *primary* reason is -// because of memory leaks. See https://github.com/rust-lang/regex/issues/362 -// for example. (Why do I want it to be simple? Well, I suppose what I mean is, -// "use as much safe code as possible to minimize risk and be as sure as I can -// be that it is correct.") -// -// My guess is that the thread_local design is probably not appropriate for -// regex since its memory usage scales to the number of active threads that -// have used a regex, where as the pool below scales to the number of threads -// that simultaneously use a regex. While neither case permits contraction, -// since we own the pool data structure below, we can add contraction if a -// clear use case pops up in the wild. More pressingly though, it seems that -// there are at least some use case patterns where one might have many threads -// sitting around that might have used a regex at one point. While thread_local -// does try to reuse space previously used by a thread that has since stopped, -// its maximal memory usage still scales with the total number of active -// threads. In contrast, the pool below scales with the total number of threads -// *simultaneously* using the pool. The hope is that this uses less memory -// overall. And if it doesn't, we can hopefully tune it somehow. -// -// It seems that these sort of conditions happen frequently -// in FFI inside of other more "managed" languages. This was -// mentioned in the issue linked above, and also mentioned here: -// https://github.com/BurntSushi/rure-go/issues/3. And in particular, users -// confirm that disabling the use of thread_local resolves the leak. -// -// There were other weaker reasons for moving off of thread_local as well. -// Namely, at the time, I was looking to reduce dependencies. And for something -// like regex, maintenance can be simpler when we own the full dependency tree. - -use std::panic::{RefUnwindSafe, UnwindSafe}; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Mutex; - -/// An atomic counter used to allocate thread IDs. -static COUNTER: AtomicUsize = AtomicUsize::new(1); - -thread_local!( - /// A thread local used to assign an ID to a thread. - static THREAD_ID: usize = { - let next = COUNTER.fetch_add(1, Ordering::Relaxed); - // SAFETY: We cannot permit the reuse of thread IDs since reusing a - // thread ID might result in more than one thread "owning" a pool, - // and thus, permit accessing a mutable value from multiple threads - // simultaneously without synchronization. The intent of this panic is - // to be a sanity check. It is not expected that the thread ID space - // will actually be exhausted in practice. - // - // This checks that the counter never wraps around, since atomic - // addition wraps around on overflow. - if next == 0 { - panic!("regex: thread ID allocation space exhausted"); - } - next - }; -); - -/// The type of the function used to create values in a pool when the pool is -/// empty and the caller requests one. -type CreateFn = - Box T + Send + Sync + UnwindSafe + RefUnwindSafe + 'static>; - -/// A simple thread safe pool for reusing values. -/// -/// Getting a value out comes with a guard. When that guard is dropped, the -/// value is automatically put back in the pool. -/// -/// A Pool impls Sync when T is Send (even if it's not Sync). This means -/// that T can use interior mutability. This is possible because a pool is -/// guaranteed to provide a value to exactly one thread at any time. -/// -/// Currently, a pool never contracts in size. Its size is proportional to the -/// number of simultaneous uses. -pub struct Pool { - /// A stack of T values to hand out. These are used when a Pool is - /// accessed by a thread that didn't create it. - stack: Mutex>>, - /// A function to create more T values when stack is empty and a caller - /// has requested a T. - create: CreateFn, - /// The ID of the thread that owns this pool. The owner is the thread - /// that makes the first call to 'get'. When the owner calls 'get', it - /// gets 'owner_val' directly instead of returning a T from 'stack'. - /// See comments elsewhere for details, but this is intended to be an - /// optimization for the common case that makes getting a T faster. - /// - /// It is initialized to a value of zero (an impossible thread ID) as a - /// sentinel to indicate that it is unowned. - owner: AtomicUsize, - /// A value to return when the caller is in the same thread that created - /// the Pool. - owner_val: T, -} - -// SAFETY: Since we want to use a Pool from multiple threads simultaneously -// behind an Arc, we need for it to be Sync. In cases where T is sync, Pool -// would be Sync. However, since we use a Pool to store mutable scratch space, -// we wind up using a T that has interior mutability and is thus itself not -// Sync. So what we *really* want is for our Pool to by Sync even when T is -// not Sync (but is at least Send). -// -// The only non-sync aspect of a Pool is its 'owner_val' field, which is used -// to implement faster access to a pool value in the common case of a pool -// being accessed in the same thread in which it was created. The 'stack' field -// is also shared, but a Mutex where T: Send is already Sync. So we only -// need to worry about 'owner_val'. -// -// The key is to guarantee that 'owner_val' can only ever be accessed from one -// thread. In our implementation below, we guarantee this by only returning the -// 'owner_val' when the ID of the current thread matches the ID of the thread -// that created the Pool. Since this can only ever be one thread, it follows -// that only one thread can access 'owner_val' at any point in time. Thus, it -// is safe to declare that Pool is Sync when T is Send. -// -// NOTE: It would also be possible to make the owning thread be the *first* -// thread that tries to get a value out of a Pool. However, the current -// implementation is a little simpler and it's not clear if making the first -// thread (rather than the creating thread) is meaningfully better. -// -// If there is a way to achieve our performance goals using safe code, then -// I would very much welcome a patch. As it stands, the implementation below -// tries to balance safety with performance. The case where a Regex is used -// from multiple threads simultaneously will suffer a bit since getting a cache -// will require unlocking a mutex. -unsafe impl Sync for Pool {} - -impl ::std::fmt::Debug for Pool { - fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { - f.debug_struct("Pool") - .field("stack", &self.stack) - .field("owner", &self.owner) - .field("owner_val", &self.owner_val) - .finish() - } -} - -/// A guard that is returned when a caller requests a value from the pool. -/// -/// The purpose of the guard is to use RAII to automatically put the value back -/// in the pool once it's dropped. -#[derive(Debug)] -pub struct PoolGuard<'a, T: Send> { - /// The pool that this guard is attached to. - pool: &'a Pool, - /// This is None when the guard represents the special "owned" value. In - /// which case, the value is retrieved from 'pool.owner_val'. - value: Option>, -} - -impl Pool { - /// Create a new pool. The given closure is used to create values in the - /// pool when necessary. - pub fn new(create: CreateFn) -> Pool { - let owner = AtomicUsize::new(0); - let owner_val = create(); - Pool { stack: Mutex::new(vec![]), create, owner, owner_val } - } - - /// Get a value from the pool. The caller is guaranteed to have exclusive - /// access to the given value. - /// - /// Note that there is no guarantee provided about which value in the - /// pool is returned. That is, calling get, dropping the guard (causing - /// the value to go back into the pool) and then calling get again is NOT - /// guaranteed to return the same value received in the first get call. - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn get(&self) -> PoolGuard<'_, T> { - // Our fast path checks if the caller is the thread that "owns" this - // pool. Or stated differently, whether it is the first thread that - // tried to extract a value from the pool. If it is, then we can return - // a T to the caller without going through a mutex. - // - // SAFETY: We must guarantee that only one thread gets access to this - // value. Since a thread is uniquely identified by the THREAD_ID thread - // local, it follows that is the caller's thread ID is equal to the - // owner, then only one thread may receive this value. - let caller = THREAD_ID.with(|id| *id); - let owner = self.owner.load(Ordering::Relaxed); - if caller == owner { - return self.guard_owned(); - } - self.get_slow(caller, owner) - } - - /// This is the "slow" version that goes through a mutex to pop an - /// allocated value off a stack to return to the caller. (Or, if the stack - /// is empty, a new value is created.) - /// - /// If the pool has no owner, then this will set the owner. - #[cold] - fn get_slow(&self, caller: usize, owner: usize) -> PoolGuard<'_, T> { - use std::sync::atomic::Ordering::Relaxed; - - if owner == 0 { - // The sentinel 0 value means this pool is not yet owned. We - // try to atomically set the owner. If we do, then this thread - // becomes the owner and we can return a guard that represents - // the special T for the owner. - let res = self.owner.compare_exchange(0, caller, Relaxed, Relaxed); - if res.is_ok() { - return self.guard_owned(); - } - } - let mut stack = self.stack.lock().unwrap(); - let value = match stack.pop() { - None => Box::new((self.create)()), - Some(value) => value, - }; - self.guard_stack(value) - } - - /// Puts a value back into the pool. Callers don't need to call this. Once - /// the guard that's returned by 'get' is dropped, it is put back into the - /// pool automatically. - fn put(&self, value: Box) { - let mut stack = self.stack.lock().unwrap(); - stack.push(value); - } - - /// Create a guard that represents the special owned T. - fn guard_owned(&self) -> PoolGuard<'_, T> { - PoolGuard { pool: self, value: None } - } - - /// Create a guard that contains a value from the pool's stack. - fn guard_stack(&self, value: Box) -> PoolGuard<'_, T> { - PoolGuard { pool: self, value: Some(value) } - } -} - -impl<'a, T: Send> PoolGuard<'a, T> { - /// Return the underlying value. - pub fn value(&self) -> &T { - match self.value { - None => &self.pool.owner_val, - Some(ref v) => &**v, - } - } -} - -impl<'a, T: Send> Drop for PoolGuard<'a, T> { - #[cfg_attr(feature = "perf-inline", inline(always))] - fn drop(&mut self) { - if let Some(value) = self.value.take() { - self.pool.put(value); - } - } -} - -#[cfg(test)] -mod tests { - use std::panic::{RefUnwindSafe, UnwindSafe}; - - use super::*; - - #[test] - fn oibits() { - use crate::exec::ProgramCache; - - fn has_oibits() {} - has_oibits::>(); - } - - // Tests that Pool implements the "single owner" optimization. That is, the - // thread that first accesses the pool gets its own copy, while all other - // threads get distinct copies. - #[test] - fn thread_owner_optimization() { - use std::cell::RefCell; - use std::sync::Arc; - - let pool: Arc>>> = - Arc::new(Pool::new(Box::new(|| RefCell::new(vec!['a'])))); - pool.get().value().borrow_mut().push('x'); - - let pool1 = pool.clone(); - let t1 = std::thread::spawn(move || { - let guard = pool1.get(); - let v = guard.value(); - v.borrow_mut().push('y'); - }); - - let pool2 = pool.clone(); - let t2 = std::thread::spawn(move || { - let guard = pool2.get(); - let v = guard.value(); - v.borrow_mut().push('z'); - }); - - t1.join().unwrap(); - t2.join().unwrap(); - - // If we didn't implement the single owner optimization, then one of - // the threads above is likely to have mutated the [a, x] vec that - // we stuffed in the pool before spawning the threads. But since - // neither thread was first to access the pool, and because of the - // optimization, we should be guaranteed that neither thread mutates - // the special owned pool value. - // - // (Technically this is an implementation detail and not a contract of - // Pool's API.) - assert_eq!(vec!['a', 'x'], *pool.get().value().borrow()); - } -} diff --git a/vendor/regex/src/prog.rs b/vendor/regex/src/prog.rs deleted file mode 100644 index 100862c..0000000 --- a/vendor/regex/src/prog.rs +++ /dev/null @@ -1,451 +0,0 @@ -use std::cmp::Ordering; -use std::collections::HashMap; -use std::fmt; -use std::mem; -use std::ops::Deref; -use std::slice; -use std::sync::Arc; - -use crate::input::Char; -use crate::literal::LiteralSearcher; - -/// `InstPtr` represents the index of an instruction in a regex program. -pub type InstPtr = usize; - -/// Program is a sequence of instructions and various facts about thos -/// instructions. -#[derive(Clone)] -pub struct Program { - /// A sequence of instructions that represents an NFA. - pub insts: Vec, - /// Pointers to each Match instruction in the sequence. - /// - /// This is always length 1 unless this program represents a regex set. - pub matches: Vec, - /// The ordered sequence of all capture groups extracted from the AST. - /// Unnamed groups are `None`. - pub captures: Vec>, - /// Pointers to all named capture groups into `captures`. - pub capture_name_idx: Arc>, - /// If the number of capture groups is the same for all possible matches, - /// then this is that number. - pub static_captures_len: Option, - /// A pointer to the start instruction. This can vary depending on how - /// the program was compiled. For example, programs for use with the DFA - /// engine have a `.*?` inserted at the beginning of unanchored regular - /// expressions. The actual starting point of the program is after the - /// `.*?`. - pub start: InstPtr, - /// A set of equivalence classes for discriminating bytes in the compiled - /// program. - pub byte_classes: Vec, - /// When true, this program can only match valid UTF-8. - pub only_utf8: bool, - /// When true, this program uses byte range instructions instead of Unicode - /// range instructions. - pub is_bytes: bool, - /// When true, the program is compiled for DFA matching. For example, this - /// implies `is_bytes` and also inserts a preceding `.*?` for unanchored - /// regexes. - pub is_dfa: bool, - /// When true, the program matches text in reverse (for use only in the - /// DFA). - pub is_reverse: bool, - /// Whether the regex must match from the start of the input. - pub is_anchored_start: bool, - /// Whether the regex must match at the end of the input. - pub is_anchored_end: bool, - /// Whether this program contains a Unicode word boundary instruction. - pub has_unicode_word_boundary: bool, - /// A possibly empty machine for very quickly matching prefix literals. - pub prefixes: LiteralSearcher, - /// A limit on the size of the cache that the DFA is allowed to use while - /// matching. - /// - /// The cache limit specifies approximately how much space we're willing to - /// give to the state cache. Once the state cache exceeds the size, it is - /// wiped and all states must be re-computed. - /// - /// Note that this value does not impact correctness. It can be set to 0 - /// and the DFA will run just fine. (It will only ever store exactly one - /// state in the cache, and will likely run very slowly, but it will work.) - /// - /// Also note that this limit is *per thread of execution*. That is, - /// if the same regex is used to search text across multiple threads - /// simultaneously, then the DFA cache is not shared. Instead, copies are - /// made. - pub dfa_size_limit: usize, -} - -impl Program { - /// Creates an empty instruction sequence. Fields are given default - /// values. - pub fn new() -> Self { - Program { - insts: vec![], - matches: vec![], - captures: vec![], - capture_name_idx: Arc::new(HashMap::new()), - static_captures_len: None, - start: 0, - byte_classes: vec![0; 256], - only_utf8: true, - is_bytes: false, - is_dfa: false, - is_reverse: false, - is_anchored_start: false, - is_anchored_end: false, - has_unicode_word_boundary: false, - prefixes: LiteralSearcher::empty(), - dfa_size_limit: 2 * (1 << 20), - } - } - - /// If pc is an index to a no-op instruction (like Save), then return the - /// next pc that is not a no-op instruction. - pub fn skip(&self, mut pc: usize) -> usize { - loop { - match self[pc] { - Inst::Save(ref i) => pc = i.goto, - _ => return pc, - } - } - } - - /// Return true if and only if an execution engine at instruction `pc` will - /// always lead to a match. - pub fn leads_to_match(&self, pc: usize) -> bool { - if self.matches.len() > 1 { - // If we have a regex set, then we have more than one ending - // state, so leading to one of those states is generally - // meaningless. - return false; - } - match self[self.skip(pc)] { - Inst::Match(_) => true, - _ => false, - } - } - - /// Returns true if the current configuration demands that an implicit - /// `.*?` be prepended to the instruction sequence. - pub fn needs_dotstar(&self) -> bool { - self.is_dfa && !self.is_reverse && !self.is_anchored_start - } - - /// Returns true if this program uses Byte instructions instead of - /// Char/Range instructions. - pub fn uses_bytes(&self) -> bool { - self.is_bytes || self.is_dfa - } - - /// Returns true if this program exclusively matches valid UTF-8 bytes. - /// - /// That is, if an invalid UTF-8 byte is seen, then no match is possible. - pub fn only_utf8(&self) -> bool { - self.only_utf8 - } - - /// Return the approximate heap usage of this instruction sequence in - /// bytes. - pub fn approximate_size(&self) -> usize { - // The only instruction that uses heap space is Ranges (for - // Unicode codepoint programs) to store non-overlapping codepoint - // ranges. To keep this operation constant time, we ignore them. - (self.len() * mem::size_of::()) - + (self.matches.len() * mem::size_of::()) - + (self.captures.len() * mem::size_of::>()) - + (self.capture_name_idx.len() - * (mem::size_of::() + mem::size_of::())) - + (self.byte_classes.len() * mem::size_of::()) - + self.prefixes.approximate_size() - } -} - -impl Deref for Program { - type Target = [Inst]; - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn deref(&self) -> &Self::Target { - &*self.insts - } -} - -impl fmt::Debug for Program { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use self::Inst::*; - - fn with_goto(cur: usize, goto: usize, fmtd: String) -> String { - if goto == cur + 1 { - fmtd - } else { - format!("{} (goto: {})", fmtd, goto) - } - } - - fn visible_byte(b: u8) -> String { - use std::ascii::escape_default; - let escaped = escape_default(b).collect::>(); - String::from_utf8_lossy(&escaped).into_owned() - } - - for (pc, inst) in self.iter().enumerate() { - match *inst { - Match(slot) => write!(f, "{:04} Match({:?})", pc, slot)?, - Save(ref inst) => { - let s = format!("{:04} Save({})", pc, inst.slot); - write!(f, "{}", with_goto(pc, inst.goto, s))?; - } - Split(ref inst) => { - write!( - f, - "{:04} Split({}, {})", - pc, inst.goto1, inst.goto2 - )?; - } - EmptyLook(ref inst) => { - let s = format!("{:?}", inst.look); - write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?; - } - Char(ref inst) => { - let s = format!("{:?}", inst.c); - write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?; - } - Ranges(ref inst) => { - let ranges = inst - .ranges - .iter() - .map(|r| format!("{:?}-{:?}", r.0, r.1)) - .collect::>() - .join(", "); - write!( - f, - "{:04} {}", - pc, - with_goto(pc, inst.goto, ranges) - )?; - } - Bytes(ref inst) => { - let s = format!( - "Bytes({}, {})", - visible_byte(inst.start), - visible_byte(inst.end) - ); - write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?; - } - } - if pc == self.start { - write!(f, " (start)")?; - } - writeln!(f)?; - } - Ok(()) - } -} - -impl<'a> IntoIterator for &'a Program { - type Item = &'a Inst; - type IntoIter = slice::Iter<'a, Inst>; - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - -/// Inst is an instruction code in a Regex program. -/// -/// Regrettably, a regex program either contains Unicode codepoint -/// instructions (Char and Ranges) or it contains byte instructions (Bytes). -/// A regex program can never contain both. -/// -/// It would be worth investigating splitting this into two distinct types and -/// then figuring out how to make the matching engines polymorphic over those -/// types without sacrificing performance. -/// -/// Other than the benefit of moving invariants into the type system, another -/// benefit is the decreased size. If we remove the `Char` and `Ranges` -/// instructions from the `Inst` enum, then its size shrinks from 32 bytes to -/// 24 bytes. (This is because of the removal of a `Box<[]>` in the `Ranges` -/// variant.) Given that byte based machines are typically much bigger than -/// their Unicode analogues (because they can decode UTF-8 directly), this ends -/// up being a pretty significant savings. -#[derive(Clone, Debug)] -pub enum Inst { - /// Match indicates that the program has reached a match state. - /// - /// The number in the match corresponds to the Nth logical regular - /// expression in this program. This index is always 0 for normal regex - /// programs. Values greater than 0 appear when compiling regex sets, and - /// each match instruction gets its own unique value. The value corresponds - /// to the Nth regex in the set. - Match(usize), - /// Save causes the program to save the current location of the input in - /// the slot indicated by InstSave. - Save(InstSave), - /// Split causes the program to diverge to one of two paths in the - /// program, preferring goto1 in InstSplit. - Split(InstSplit), - /// EmptyLook represents a zero-width assertion in a regex program. A - /// zero-width assertion does not consume any of the input text. - EmptyLook(InstEmptyLook), - /// Char requires the regex program to match the character in InstChar at - /// the current position in the input. - Char(InstChar), - /// Ranges requires the regex program to match the character at the current - /// position in the input with one of the ranges specified in InstRanges. - Ranges(InstRanges), - /// Bytes is like Ranges, except it expresses a single byte range. It is - /// used in conjunction with Split instructions to implement multi-byte - /// character classes. - Bytes(InstBytes), -} - -impl Inst { - /// Returns true if and only if this is a match instruction. - pub fn is_match(&self) -> bool { - match *self { - Inst::Match(_) => true, - _ => false, - } - } -} - -/// Representation of the Save instruction. -#[derive(Clone, Debug)] -pub struct InstSave { - /// The next location to execute in the program. - pub goto: InstPtr, - /// The capture slot (there are two slots for every capture in a regex, - /// including the zeroth capture for the entire match). - pub slot: usize, -} - -/// Representation of the Split instruction. -#[derive(Clone, Debug)] -pub struct InstSplit { - /// The first instruction to try. A match resulting from following goto1 - /// has precedence over a match resulting from following goto2. - pub goto1: InstPtr, - /// The second instruction to try. A match resulting from following goto1 - /// has precedence over a match resulting from following goto2. - pub goto2: InstPtr, -} - -/// Representation of the `EmptyLook` instruction. -#[derive(Clone, Debug)] -pub struct InstEmptyLook { - /// The next location to execute in the program if this instruction - /// succeeds. - pub goto: InstPtr, - /// The type of zero-width assertion to check. - pub look: EmptyLook, -} - -/// The set of zero-width match instructions. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum EmptyLook { - /// Start of line or input. - StartLine, - /// End of line or input. - EndLine, - /// Start of input. - StartText, - /// End of input. - EndText, - /// Word character on one side and non-word character on other. - WordBoundary, - /// Word character on both sides or non-word character on both sides. - NotWordBoundary, - /// ASCII word boundary. - WordBoundaryAscii, - /// Not ASCII word boundary. - NotWordBoundaryAscii, -} - -/// Representation of the Char instruction. -#[derive(Clone, Debug)] -pub struct InstChar { - /// The next location to execute in the program if this instruction - /// succeeds. - pub goto: InstPtr, - /// The character to test. - pub c: char, -} - -/// Representation of the Ranges instruction. -#[derive(Clone, Debug)] -pub struct InstRanges { - /// The next location to execute in the program if this instruction - /// succeeds. - pub goto: InstPtr, - /// The set of Unicode scalar value ranges to test. - pub ranges: Box<[(char, char)]>, -} - -impl InstRanges { - /// Tests whether the given input character matches this instruction. - pub fn matches(&self, c: Char) -> bool { - // This speeds up the `match_class_unicode` benchmark by checking - // some common cases quickly without binary search. e.g., Matching - // a Unicode class on predominantly ASCII text. - for r in self.ranges.iter().take(4) { - if c < r.0 { - return false; - } - if c <= r.1 { - return true; - } - } - self.ranges - .binary_search_by(|r| { - if r.1 < c { - Ordering::Less - } else if r.0 > c { - Ordering::Greater - } else { - Ordering::Equal - } - }) - .is_ok() - } - - /// Return the number of distinct characters represented by all of the - /// ranges. - pub fn num_chars(&self) -> usize { - self.ranges - .iter() - .map(|&(s, e)| 1 + (e as u32) - (s as u32)) - .sum::() as usize - } -} - -/// Representation of the Bytes instruction. -#[derive(Clone, Debug)] -pub struct InstBytes { - /// The next location to execute in the program if this instruction - /// succeeds. - pub goto: InstPtr, - /// The start (inclusive) of this byte range. - pub start: u8, - /// The end (inclusive) of this byte range. - pub end: u8, -} - -impl InstBytes { - /// Returns true if and only if the given byte is in this range. - pub fn matches(&self, byte: u8) -> bool { - self.start <= byte && byte <= self.end - } -} - -#[cfg(test)] -mod test { - #[test] - #[cfg(target_pointer_width = "64")] - fn test_size_of_inst() { - use std::mem::size_of; - - use super::Inst; - - assert_eq!(32, size_of::()); - } -} diff --git a/vendor/regex/src/re_builder.rs b/vendor/regex/src/re_builder.rs deleted file mode 100644 index ee63836..0000000 --- a/vendor/regex/src/re_builder.rs +++ /dev/null @@ -1,421 +0,0 @@ -/// The set of user configurable options for compiling zero or more regexes. -#[derive(Clone, Debug)] -#[allow(missing_docs)] -pub struct RegexOptions { - pub pats: Vec, - pub size_limit: usize, - pub dfa_size_limit: usize, - pub nest_limit: u32, - pub case_insensitive: bool, - pub multi_line: bool, - pub dot_matches_new_line: bool, - pub swap_greed: bool, - pub ignore_whitespace: bool, - pub unicode: bool, - pub octal: bool, -} - -impl Default for RegexOptions { - fn default() -> Self { - RegexOptions { - pats: vec![], - size_limit: 10 * (1 << 20), - dfa_size_limit: 2 * (1 << 20), - nest_limit: 250, - case_insensitive: false, - multi_line: false, - dot_matches_new_line: false, - swap_greed: false, - ignore_whitespace: false, - unicode: true, - octal: false, - } - } -} - -macro_rules! define_builder { - ($name:ident, $regex_mod:ident, $only_utf8:expr) => { - pub mod $name { - use super::RegexOptions; - use crate::error::Error; - use crate::exec::ExecBuilder; - - use crate::$regex_mod::Regex; - - /// A configurable builder for a regular expression. - /// - /// A builder can be used to configure how the regex is built, for example, by - /// setting the default flags (which can be overridden in the expression - /// itself) or setting various limits. - #[derive(Debug)] - pub struct RegexBuilder(RegexOptions); - - impl RegexBuilder { - /// Create a new regular expression builder with the given pattern. - /// - /// If the pattern is invalid, then an error will be returned when - /// `build` is called. - pub fn new(pattern: &str) -> RegexBuilder { - let mut builder = RegexBuilder(RegexOptions::default()); - builder.0.pats.push(pattern.to_owned()); - builder - } - - /// Consume the builder and compile the regular expression. - /// - /// Note that calling `as_str` on the resulting `Regex` will produce the - /// pattern given to `new` verbatim. Notably, it will not incorporate any - /// of the flags set on this builder. - pub fn build(&self) -> Result { - ExecBuilder::new_options(self.0.clone()) - .only_utf8($only_utf8) - .build() - .map(Regex::from) - } - - /// Set the value for the case insensitive (`i`) flag. - /// - /// When enabled, letters in the pattern will match both upper case and - /// lower case variants. - pub fn case_insensitive( - &mut self, - yes: bool, - ) -> &mut RegexBuilder { - self.0.case_insensitive = yes; - self - } - - /// Set the value for the multi-line matching (`m`) flag. - /// - /// When enabled, `^` matches the beginning of lines and `$` matches the - /// end of lines. - /// - /// By default, they match beginning/end of the input. - pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.multi_line = yes; - self - } - - /// Set the value for the any character (`s`) flag, where in `.` matches - /// anything when `s` is set and matches anything except for new line when - /// it is not set (the default). - /// - /// N.B. "matches anything" means "any byte" when Unicode is disabled and - /// means "any valid UTF-8 encoding of any Unicode scalar value" when - /// Unicode is enabled. - pub fn dot_matches_new_line( - &mut self, - yes: bool, - ) -> &mut RegexBuilder { - self.0.dot_matches_new_line = yes; - self - } - - /// Set the value for the greedy swap (`U`) flag. - /// - /// When enabled, a pattern like `a*` is lazy (tries to find shortest - /// match) and `a*?` is greedy (tries to find longest match). - /// - /// By default, `a*` is greedy and `a*?` is lazy. - pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.swap_greed = yes; - self - } - - /// Set the value for the ignore whitespace (`x`) flag. - /// - /// When enabled, whitespace such as new lines and spaces will be ignored - /// between expressions of the pattern, and `#` can be used to start a - /// comment until the next new line. - pub fn ignore_whitespace( - &mut self, - yes: bool, - ) -> &mut RegexBuilder { - self.0.ignore_whitespace = yes; - self - } - - /// Set the value for the Unicode (`u`) flag. - /// - /// Enabled by default. When disabled, character classes such as `\w` only - /// match ASCII word characters instead of all Unicode word characters. - pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.unicode = yes; - self - } - - /// Whether to support octal syntax or not. - /// - /// Octal syntax is a little-known way of uttering Unicode codepoints in - /// a regular expression. For example, `a`, `\x61`, `\u0061` and - /// `\141` are all equivalent regular expressions, where the last example - /// shows octal syntax. - /// - /// While supporting octal syntax isn't in and of itself a problem, it does - /// make good error messages harder. That is, in PCRE based regex engines, - /// syntax like `\0` invokes a backreference, which is explicitly - /// unsupported in Rust's regex engine. However, many users expect it to - /// be supported. Therefore, when octal support is disabled, the error - /// message will explicitly mention that backreferences aren't supported. - /// - /// Octal syntax is disabled by default. - pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.octal = yes; - self - } - - /// Set the approximate size limit of the compiled regular expression. - /// - /// This roughly corresponds to the number of bytes occupied by a single - /// compiled program. If the program exceeds this number, then a - /// compilation error is returned. - pub fn size_limit( - &mut self, - limit: usize, - ) -> &mut RegexBuilder { - self.0.size_limit = limit; - self - } - - /// Set the approximate size of the cache used by the DFA. - /// - /// This roughly corresponds to the number of bytes that the DFA will - /// use while searching. - /// - /// Note that this is a *per thread* limit. There is no way to set a global - /// limit. In particular, if a regex is used from multiple threads - /// simultaneously, then each thread may use up to the number of bytes - /// specified here. - pub fn dfa_size_limit( - &mut self, - limit: usize, - ) -> &mut RegexBuilder { - self.0.dfa_size_limit = limit; - self - } - - /// Set the nesting limit for this parser. - /// - /// The nesting limit controls how deep the abstract syntax tree is allowed - /// to be. If the AST exceeds the given limit (e.g., with too many nested - /// groups), then an error is returned by the parser. - /// - /// The purpose of this limit is to act as a heuristic to prevent stack - /// overflow for consumers that do structural induction on an `Ast` using - /// explicit recursion. While this crate never does this (instead using - /// constant stack space and moving the call stack to the heap), other - /// crates may. - /// - /// This limit is not checked until the entire Ast is parsed. Therefore, - /// if callers want to put a limit on the amount of heap space used, then - /// they should impose a limit on the length, in bytes, of the concrete - /// pattern string. In particular, this is viable since this parser - /// implementation will limit itself to heap space proportional to the - /// length of the pattern string. - /// - /// Note that a nest limit of `0` will return a nest limit error for most - /// patterns but not all. For example, a nest limit of `0` permits `a` but - /// not `ab`, since `ab` requires a concatenation, which results in a nest - /// depth of `1`. In general, a nest limit is not something that manifests - /// in an obvious way in the concrete syntax, therefore, it should not be - /// used in a granular way. - pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder { - self.0.nest_limit = limit; - self - } - } - } - }; -} - -define_builder!(bytes, re_bytes, false); -define_builder!(unicode, re_unicode, true); - -macro_rules! define_set_builder { - ($name:ident, $regex_mod:ident, $only_utf8:expr) => { - pub mod $name { - use super::RegexOptions; - use crate::error::Error; - use crate::exec::ExecBuilder; - - use crate::re_set::$regex_mod::RegexSet; - - /// A configurable builder for a set of regular expressions. - /// - /// A builder can be used to configure how the regexes are built, for example, - /// by setting the default flags (which can be overridden in the expression - /// itself) or setting various limits. - #[derive(Debug)] - pub struct RegexSetBuilder(RegexOptions); - - impl RegexSetBuilder { - /// Create a new regular expression builder with the given pattern. - /// - /// If the pattern is invalid, then an error will be returned when - /// `build` is called. - pub fn new(patterns: I) -> RegexSetBuilder - where - S: AsRef, - I: IntoIterator, - { - let mut builder = RegexSetBuilder(RegexOptions::default()); - for pat in patterns { - builder.0.pats.push(pat.as_ref().to_owned()); - } - builder - } - - /// Consume the builder and compile the regular expressions into a set. - pub fn build(&self) -> Result { - ExecBuilder::new_options(self.0.clone()) - .only_utf8($only_utf8) - .build() - .map(RegexSet::from) - } - - /// Set the value for the case insensitive (`i`) flag. - pub fn case_insensitive( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.case_insensitive = yes; - self - } - - /// Set the value for the multi-line matching (`m`) flag. - pub fn multi_line( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.multi_line = yes; - self - } - - /// Set the value for the any character (`s`) flag, where in `.` matches - /// anything when `s` is set and matches anything except for new line when - /// it is not set (the default). - /// - /// N.B. "matches anything" means "any byte" for `regex::bytes::RegexSet` - /// expressions and means "any Unicode scalar value" for `regex::RegexSet` - /// expressions. - pub fn dot_matches_new_line( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.dot_matches_new_line = yes; - self - } - - /// Set the value for the greedy swap (`U`) flag. - pub fn swap_greed( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.swap_greed = yes; - self - } - - /// Set the value for the ignore whitespace (`x`) flag. - pub fn ignore_whitespace( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.ignore_whitespace = yes; - self - } - - /// Set the value for the Unicode (`u`) flag. - pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder { - self.0.unicode = yes; - self - } - - /// Whether to support octal syntax or not. - /// - /// Octal syntax is a little-known way of uttering Unicode codepoints in - /// a regular expression. For example, `a`, `\x61`, `\u0061` and - /// `\141` are all equivalent regular expressions, where the last example - /// shows octal syntax. - /// - /// While supporting octal syntax isn't in and of itself a problem, it does - /// make good error messages harder. That is, in PCRE based regex engines, - /// syntax like `\0` invokes a backreference, which is explicitly - /// unsupported in Rust's regex engine. However, many users expect it to - /// be supported. Therefore, when octal support is disabled, the error - /// message will explicitly mention that backreferences aren't supported. - /// - /// Octal syntax is disabled by default. - pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder { - self.0.octal = yes; - self - } - - /// Set the approximate size limit of the compiled regular expression. - /// - /// This roughly corresponds to the number of bytes occupied by a single - /// compiled program. If the program exceeds this number, then a - /// compilation error is returned. - pub fn size_limit( - &mut self, - limit: usize, - ) -> &mut RegexSetBuilder { - self.0.size_limit = limit; - self - } - - /// Set the approximate size of the cache used by the DFA. - /// - /// This roughly corresponds to the number of bytes that the DFA will - /// use while searching. - /// - /// Note that this is a *per thread* limit. There is no way to set a global - /// limit. In particular, if a regex is used from multiple threads - /// simultaneously, then each thread may use up to the number of bytes - /// specified here. - pub fn dfa_size_limit( - &mut self, - limit: usize, - ) -> &mut RegexSetBuilder { - self.0.dfa_size_limit = limit; - self - } - - /// Set the nesting limit for this parser. - /// - /// The nesting limit controls how deep the abstract syntax tree is allowed - /// to be. If the AST exceeds the given limit (e.g., with too many nested - /// groups), then an error is returned by the parser. - /// - /// The purpose of this limit is to act as a heuristic to prevent stack - /// overflow for consumers that do structural induction on an `Ast` using - /// explicit recursion. While this crate never does this (instead using - /// constant stack space and moving the call stack to the heap), other - /// crates may. - /// - /// This limit is not checked until the entire Ast is parsed. Therefore, - /// if callers want to put a limit on the amount of heap space used, then - /// they should impose a limit on the length, in bytes, of the concrete - /// pattern string. In particular, this is viable since this parser - /// implementation will limit itself to heap space proportional to the - /// length of the pattern string. - /// - /// Note that a nest limit of `0` will return a nest limit error for most - /// patterns but not all. For example, a nest limit of `0` permits `a` but - /// not `ab`, since `ab` requires a concatenation, which results in a nest - /// depth of `1`. In general, a nest limit is not something that manifests - /// in an obvious way in the concrete syntax, therefore, it should not be - /// used in a granular way. - pub fn nest_limit( - &mut self, - limit: u32, - ) -> &mut RegexSetBuilder { - self.0.nest_limit = limit; - self - } - } - } - }; -} - -define_set_builder!(set_bytes, bytes, false); -define_set_builder!(set_unicode, unicode, true); diff --git a/vendor/regex/src/re_bytes.rs b/vendor/regex/src/re_bytes.rs deleted file mode 100644 index e3a3b01..0000000 --- a/vendor/regex/src/re_bytes.rs +++ /dev/null @@ -1,1372 +0,0 @@ -use std::borrow::Cow; -use std::collections::HashMap; -use std::fmt; -use std::iter::FusedIterator; -use std::ops::{Index, Range}; -use std::str::FromStr; -use std::sync::Arc; - -use crate::find_byte::find_byte; - -use crate::error::Error; -use crate::exec::{Exec, ExecNoSync}; -use crate::expand::expand_bytes; -use crate::re_builder::bytes::RegexBuilder; -use crate::re_trait::{self, RegularExpression, SubCapturesPosIter}; - -/// Match represents a single match of a regex in a haystack. -/// -/// The lifetime parameter `'t` refers to the lifetime of the matched text. -#[derive(Copy, Clone, Eq, PartialEq)] -pub struct Match<'t> { - text: &'t [u8], - start: usize, - end: usize, -} - -impl<'t> Match<'t> { - /// Returns the starting byte offset of the match in the haystack. - #[inline] - pub fn start(&self) -> usize { - self.start - } - - /// Returns the ending byte offset of the match in the haystack. - #[inline] - pub fn end(&self) -> usize { - self.end - } - - /// Returns true if and only if this match has a length of zero. - #[inline] - pub fn is_empty(&self) -> bool { - self.start == self.end - } - - /// Returns the length, in bytes, of this match. - #[inline] - pub fn len(&self) -> usize { - self.end - self.start - } - - /// Returns the range over the starting and ending byte offsets of the - /// match in the haystack. - #[inline] - pub fn range(&self) -> Range { - self.start..self.end - } - - /// Returns the matched text. - #[inline] - pub fn as_bytes(&self) -> &'t [u8] { - &self.text[self.range()] - } - - /// Creates a new match from the given haystack and byte offsets. - #[inline] - fn new(haystack: &'t [u8], start: usize, end: usize) -> Match<'t> { - Match { text: haystack, start, end } - } -} - -impl<'t> std::fmt::Debug for Match<'t> { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - let mut fmt = f.debug_struct("Match"); - fmt.field("start", &self.start).field("end", &self.end); - if let Ok(s) = std::str::from_utf8(self.as_bytes()) { - fmt.field("bytes", &s); - } else { - // FIXME: It would be nice if this could be printed as a string - // with invalid UTF-8 replaced with hex escapes. A alloc would - // probably okay if that makes it easier, but regex-automata does - // (at time of writing) have internal routines that do this. So - // maybe we should expose them. - fmt.field("bytes", &self.as_bytes()); - } - fmt.finish() - } -} - -impl<'t> From> for Range { - fn from(m: Match<'t>) -> Range { - m.range() - } -} - -/// A compiled regular expression for matching arbitrary bytes. -/// -/// It can be used to search, split or replace text. All searching is done with -/// an implicit `.*?` at the beginning and end of an expression. To force an -/// expression to match the whole string (or a prefix or a suffix), you must -/// use an anchor like `^` or `$` (or `\A` and `\z`). -/// -/// Like the `Regex` type in the parent module, matches with this regex return -/// byte offsets into the search text. **Unlike** the parent `Regex` type, -/// these byte offsets may not correspond to UTF-8 sequence boundaries since -/// the regexes in this module can match arbitrary bytes. -#[derive(Clone)] -pub struct Regex(Exec); - -impl fmt::Display for Regex { - /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -impl fmt::Debug for Regex { - /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self, f) - } -} - -/// A constructor for Regex from an Exec. -/// -/// This is hidden because Exec isn't actually part of the public API. -#[doc(hidden)] -impl From for Regex { - fn from(exec: Exec) -> Regex { - Regex(exec) - } -} - -impl FromStr for Regex { - type Err = Error; - - /// Attempts to parse a string into a regular expression - fn from_str(s: &str) -> Result { - Regex::new(s) - } -} - -/// Core regular expression methods. -impl Regex { - /// Compiles a regular expression. Once compiled, it can be used repeatedly - /// to search, split or replace text in a string. - /// - /// If an invalid expression is given, then an error is returned. - pub fn new(re: &str) -> Result { - RegexBuilder::new(re).build() - } - - /// Returns true if and only if there is a match for the regex in the - /// string given. - /// - /// It is recommended to use this method if all you need to do is test - /// a match, since the underlying matching engine may be able to do less - /// work. - /// - /// # Example - /// - /// Test if some text contains at least one word with exactly 13 ASCII word - /// bytes: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let text = b"I categorically deny having triskaidekaphobia."; - /// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text)); - /// # } - /// ``` - pub fn is_match(&self, text: &[u8]) -> bool { - self.is_match_at(text, 0) - } - - /// Returns the start and end byte range of the leftmost-first match in - /// `text`. If no match exists, then `None` is returned. - /// - /// Note that this should only be used if you want to discover the position - /// of the match. Testing the existence of a match is faster if you use - /// `is_match`. - /// - /// # Example - /// - /// Find the start and end location of the first word with exactly 13 - /// ASCII word bytes: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let text = b"I categorically deny having triskaidekaphobia."; - /// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap(); - /// assert_eq!((mat.start(), mat.end()), (2, 15)); - /// # } - /// ``` - pub fn find<'t>(&self, text: &'t [u8]) -> Option> { - self.find_at(text, 0) - } - - /// Returns an iterator for each successive non-overlapping match in - /// `text`, returning the start and end byte indices with respect to - /// `text`. - /// - /// # Example - /// - /// Find the start and end location of every word with exactly 13 ASCII - /// word bytes: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let text = b"Retroactively relinquishing remunerations is reprehensible."; - /// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) { - /// println!("{:?}", mat); - /// } - /// # } - /// ``` - pub fn find_iter<'r, 't>(&'r self, text: &'t [u8]) -> Matches<'r, 't> { - Matches(self.0.searcher().find_iter(text)) - } - - /// Returns the capture groups corresponding to the leftmost-first - /// match in `text`. Capture group `0` always corresponds to the entire - /// match. If no match is found, then `None` is returned. - /// - /// You should only use `captures` if you need access to the location of - /// capturing group matches. Otherwise, `find` is faster for discovering - /// the location of the overall match. - /// - /// # Examples - /// - /// Say you have some text with movie names and their release years, - /// like "'Citizen Kane' (1941)". It'd be nice if we could search for text - /// looking like that, while also extracting the movie name and its release - /// year separately. - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); - /// let text = b"Not my favorite movie: 'Citizen Kane' (1941)."; - /// let caps = re.captures(text).unwrap(); - /// assert_eq!(caps.get(1).unwrap().as_bytes(), &b"Citizen Kane"[..]); - /// assert_eq!(caps.get(2).unwrap().as_bytes(), &b"1941"[..]); - /// assert_eq!(caps.get(0).unwrap().as_bytes(), &b"'Citizen Kane' (1941)"[..]); - /// // You can also access the groups by index using the Index notation. - /// // Note that this will panic on an invalid index. - /// assert_eq!(&caps[1], b"Citizen Kane"); - /// assert_eq!(&caps[2], b"1941"); - /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); - /// # } - /// ``` - /// - /// Note that the full match is at capture group `0`. Each subsequent - /// capture group is indexed by the order of its opening `(`. - /// - /// We can make this example a bit clearer by using *named* capture groups: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"'(?P[^']+)'\s+\((?P<year>\d{4})\)") - /// .unwrap(); - /// let text = b"Not my favorite movie: 'Citizen Kane' (1941)."; - /// let caps = re.captures(text).unwrap(); - /// assert_eq!(caps.name("title").unwrap().as_bytes(), b"Citizen Kane"); - /// assert_eq!(caps.name("year").unwrap().as_bytes(), b"1941"); - /// assert_eq!(caps.get(0).unwrap().as_bytes(), &b"'Citizen Kane' (1941)"[..]); - /// // You can also access the groups by name using the Index notation. - /// // Note that this will panic on an invalid group name. - /// assert_eq!(&caps["title"], b"Citizen Kane"); - /// assert_eq!(&caps["year"], b"1941"); - /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); - /// - /// # } - /// ``` - /// - /// Here we name the capture groups, which we can access with the `name` - /// method or the `Index` notation with a `&str`. Note that the named - /// capture groups are still accessible with `get` or the `Index` notation - /// with a `usize`. - /// - /// The `0`th capture group is always unnamed, so it must always be - /// accessed with `get(0)` or `[0]`. - pub fn captures<'t>(&self, text: &'t [u8]) -> Option<Captures<'t>> { - self.captures_at(text, 0) - } - - /// Returns an iterator over all the non-overlapping capture groups matched - /// in `text`. This is operationally the same as `find_iter`, except it - /// yields information about capturing group matches. - /// - /// # Example - /// - /// We can use this to find all movie titles and their release years in - /// some text, where the movie is formatted like "'Title' (xxxx)": - /// - /// ```rust - /// # use std::str; use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") - /// .unwrap(); - /// let text = b"'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; - /// for caps in re.captures_iter(text) { - /// let title = str::from_utf8(&caps["title"]).unwrap(); - /// let year = str::from_utf8(&caps["year"]).unwrap(); - /// println!("Movie: {:?}, Released: {:?}", title, year); - /// } - /// // Output: - /// // Movie: Citizen Kane, Released: 1941 - /// // Movie: The Wizard of Oz, Released: 1939 - /// // Movie: M, Released: 1931 - /// # } - /// ``` - pub fn captures_iter<'r, 't>( - &'r self, - text: &'t [u8], - ) -> CaptureMatches<'r, 't> { - CaptureMatches(self.0.searcher().captures_iter(text)) - } - - /// Returns an iterator of substrings of `text` delimited by a match of the - /// regular expression. Namely, each element of the iterator corresponds to - /// text that *isn't* matched by the regular expression. - /// - /// This method will *not* copy the text given. - /// - /// # Example - /// - /// To split a string delimited by arbitrary amounts of spaces or tabs: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"[ \t]+").unwrap(); - /// let fields: Vec<&[u8]> = re.split(b"a b \t c\td e").collect(); - /// assert_eq!(fields, vec![ - /// &b"a"[..], &b"b"[..], &b"c"[..], &b"d"[..], &b"e"[..], - /// ]); - /// # } - /// ``` - pub fn split<'r, 't>(&'r self, text: &'t [u8]) -> Split<'r, 't> { - Split { finder: self.find_iter(text), last: 0 } - } - - /// Returns an iterator of at most `limit` substrings of `text` delimited - /// by a match of the regular expression. (A `limit` of `0` will return no - /// substrings.) Namely, each element of the iterator corresponds to text - /// that *isn't* matched by the regular expression. The remainder of the - /// string that is not split will be the last element in the iterator. - /// - /// This method will *not* copy the text given. - /// - /// # Example - /// - /// Get the first two words in some text: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"\W+").unwrap(); - /// let fields: Vec<&[u8]> = re.splitn(b"Hey! How are you?", 3).collect(); - /// assert_eq!(fields, vec![&b"Hey"[..], &b"How"[..], &b"are you?"[..]]); - /// # } - /// ``` - pub fn splitn<'r, 't>( - &'r self, - text: &'t [u8], - limit: usize, - ) -> SplitN<'r, 't> { - SplitN { splits: self.split(text), n: limit } - } - - /// Replaces the leftmost-first match with the replacement provided. The - /// replacement can be a regular byte string (where `$N` and `$name` are - /// expanded to match capture groups) or a function that takes the matches' - /// `Captures` and returns the replaced byte string. - /// - /// If no match is found, then a copy of the byte string is returned - /// unchanged. - /// - /// # Replacement string syntax - /// - /// All instances of `$name` in the replacement text is replaced with the - /// corresponding capture group `name`. - /// - /// `name` may be an integer corresponding to the index of the - /// capture group (counted by order of opening parenthesis where `0` is the - /// entire match) or it can be a name (consisting of letters, digits or - /// underscores) corresponding to a named capture group. - /// - /// If `name` isn't a valid capture group (whether the name doesn't exist - /// or isn't a valid index), then it is replaced with the empty string. - /// - /// The longest possible name is used. e.g., `$1a` looks up the capture - /// group named `1a` and not the capture group at index `1`. To exert more - /// precise control over the name, use braces, e.g., `${1}a`. - /// - /// To write a literal `$` use `$$`. - /// - /// # Examples - /// - /// Note that this function is polymorphic with respect to the replacement. - /// In typical usage, this can just be a normal byte string: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new("[^01]+").unwrap(); - /// assert_eq!(re.replace(b"1078910", &b""[..]), &b"1010"[..]); - /// # } - /// ``` - /// - /// But anything satisfying the `Replacer` trait will work. For example, a - /// closure of type `|&Captures| -> Vec<u8>` provides direct access to the - /// captures corresponding to a match. This allows one to access capturing - /// group matches easily: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # use regex::bytes::Captures; fn main() { - /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); - /// let result = re.replace(b"Springsteen, Bruce", |caps: &Captures| { - /// let mut replacement = caps[2].to_owned(); - /// replacement.push(b' '); - /// replacement.extend(&caps[1]); - /// replacement - /// }); - /// assert_eq!(result, &b"Bruce Springsteen"[..]); - /// # } - /// ``` - /// - /// But this is a bit cumbersome to use all the time. Instead, a simple - /// syntax is supported that expands `$name` into the corresponding capture - /// group. Here's the last example, but using this expansion technique - /// with named capture groups: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap(); - /// let result = re.replace(b"Springsteen, Bruce", &b"$first $last"[..]); - /// assert_eq!(result, &b"Bruce Springsteen"[..]); - /// # } - /// ``` - /// - /// Note that using `$2` instead of `$first` or `$1` instead of `$last` - /// would produce the same result. To write a literal `$` use `$$`. - /// - /// Sometimes the replacement string requires use of curly braces to - /// delineate a capture group replacement and surrounding literal text. - /// For example, if we wanted to join two words together with an - /// underscore: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap(); - /// let result = re.replace(b"deep fried", &b"${first}_$second"[..]); - /// assert_eq!(result, &b"deep_fried"[..]); - /// # } - /// ``` - /// - /// Without the curly braces, the capture group name `first_` would be - /// used, and since it doesn't exist, it would be replaced with the empty - /// string. - /// - /// Finally, sometimes you just want to replace a literal string with no - /// regard for capturing group expansion. This can be done by wrapping a - /// byte string with `NoExpand`: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// use regex::bytes::NoExpand; - /// - /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap(); - /// let result = re.replace(b"Springsteen, Bruce", NoExpand(b"$2 $last")); - /// assert_eq!(result, &b"$2 $last"[..]); - /// # } - /// ``` - pub fn replace<'t, R: Replacer>( - &self, - text: &'t [u8], - rep: R, - ) -> Cow<'t, [u8]> { - self.replacen(text, 1, rep) - } - - /// Replaces all non-overlapping matches in `text` with the replacement - /// provided. This is the same as calling `replacen` with `limit` set to - /// `0`. - /// - /// See the documentation for `replace` for details on how to access - /// capturing group matches in the replacement text. - pub fn replace_all<'t, R: Replacer>( - &self, - text: &'t [u8], - rep: R, - ) -> Cow<'t, [u8]> { - self.replacen(text, 0, rep) - } - - /// Replaces at most `limit` non-overlapping matches in `text` with the - /// replacement provided. If `limit` is 0, then all non-overlapping matches - /// are replaced. - /// - /// See the documentation for `replace` for details on how to access - /// capturing group matches in the replacement text. - pub fn replacen<'t, R: Replacer>( - &self, - text: &'t [u8], - limit: usize, - mut rep: R, - ) -> Cow<'t, [u8]> { - if let Some(rep) = rep.no_expansion() { - let mut it = self.find_iter(text).enumerate().peekable(); - if it.peek().is_none() { - return Cow::Borrowed(text); - } - let mut new = Vec::with_capacity(text.len()); - let mut last_match = 0; - for (i, m) in it { - new.extend_from_slice(&text[last_match..m.start()]); - new.extend_from_slice(&rep); - last_match = m.end(); - if limit > 0 && i >= limit - 1 { - break; - } - } - new.extend_from_slice(&text[last_match..]); - return Cow::Owned(new); - } - - // The slower path, which we use if the replacement needs access to - // capture groups. - let mut it = self.captures_iter(text).enumerate().peekable(); - if it.peek().is_none() { - return Cow::Borrowed(text); - } - let mut new = Vec::with_capacity(text.len()); - let mut last_match = 0; - for (i, cap) in it { - // unwrap on 0 is OK because captures only reports matches - let m = cap.get(0).unwrap(); - new.extend_from_slice(&text[last_match..m.start()]); - rep.replace_append(&cap, &mut new); - last_match = m.end(); - if limit > 0 && i >= limit - 1 { - break; - } - } - new.extend_from_slice(&text[last_match..]); - Cow::Owned(new) - } -} - -/// Advanced or "lower level" search methods. -impl Regex { - /// Returns the end location of a match in the text given. - /// - /// This method may have the same performance characteristics as - /// `is_match`, except it provides an end location for a match. In - /// particular, the location returned *may be shorter* than the proper end - /// of the leftmost-first match that you would find via `Regex::find`. - /// - /// Note that it is not guaranteed that this routine finds the shortest or - /// "earliest" possible match. Instead, the main idea of this API is that - /// it returns the offset at the point at which the internal regex engine - /// has determined that a match has occurred. This may vary depending on - /// which internal regex engine is used, and thus, the offset itself may - /// change. - /// - /// # Example - /// - /// Typically, `a+` would match the entire first sequence of `a` in some - /// text, but `shortest_match` can give up as soon as it sees the first - /// `a`. - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let text = b"aaaaa"; - /// let pos = Regex::new(r"a+").unwrap().shortest_match(text); - /// assert_eq!(pos, Some(1)); - /// # } - /// ``` - pub fn shortest_match(&self, text: &[u8]) -> Option<usize> { - self.shortest_match_at(text, 0) - } - - /// Returns the same as shortest_match, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn shortest_match_at( - &self, - text: &[u8], - start: usize, - ) -> Option<usize> { - self.0.searcher().shortest_match_at(text, start) - } - - /// Returns the same as is_match, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn is_match_at(&self, text: &[u8], start: usize) -> bool { - self.0.searcher().is_match_at(text, start) - } - - /// Returns the same as find, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn find_at<'t>( - &self, - text: &'t [u8], - start: usize, - ) -> Option<Match<'t>> { - self.0 - .searcher() - .find_at(text, start) - .map(|(s, e)| Match::new(text, s, e)) - } - - /// Returns the same as [`Regex::captures`], but starts the search at the - /// given offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn captures_at<'t>( - &self, - text: &'t [u8], - start: usize, - ) -> Option<Captures<'t>> { - let mut locs = self.capture_locations(); - self.captures_read_at(&mut locs, text, start).map(move |_| Captures { - text, - locs: locs.0, - named_groups: self.0.capture_name_idx().clone(), - }) - } - - /// This is like `captures`, but uses - /// [`CaptureLocations`](struct.CaptureLocations.html) - /// instead of - /// [`Captures`](struct.Captures.html) in order to amortize allocations. - /// - /// To create a `CaptureLocations` value, use the - /// `Regex::capture_locations` method. - /// - /// This returns the overall match if this was successful, which is always - /// equivalence to the `0`th capture group. - pub fn captures_read<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t [u8], - ) -> Option<Match<'t>> { - self.captures_read_at(locs, text, 0) - } - - /// Returns the same as `captures_read`, but starts the search at the given - /// offset and populates the capture locations given. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn captures_read_at<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t [u8], - start: usize, - ) -> Option<Match<'t>> { - self.0 - .searcher() - .captures_read_at(&mut locs.0, text, start) - .map(|(s, e)| Match::new(text, s, e)) - } - - /// An undocumented alias for `captures_read_at`. - /// - /// The `regex-capi` crate previously used this routine, so to avoid - /// breaking that crate, we continue to provide the name as an undocumented - /// alias. - #[doc(hidden)] - pub fn read_captures_at<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t [u8], - start: usize, - ) -> Option<Match<'t>> { - self.captures_read_at(locs, text, start) - } -} - -/// Auxiliary methods. -impl Regex { - /// Returns the original string of this regex. - pub fn as_str(&self) -> &str { - &self.0.regex_strings()[0] - } - - /// Returns an iterator over the capture names. - pub fn capture_names(&self) -> CaptureNames<'_> { - CaptureNames(self.0.capture_names().iter()) - } - - /// Returns the number of captures. - pub fn captures_len(&self) -> usize { - self.0.capture_names().len() - } - - /// Returns the total number of capturing groups that appear in every - /// possible match. - /// - /// If the number of capture groups can vary depending on the match, then - /// this returns `None`. That is, a value is only returned when the number - /// of matching groups is invariant or "static." - /// - /// Note that like [`Regex::captures_len`], this **does** include the - /// implicit capturing group corresponding to the entire match. Therefore, - /// when a non-None value is returned, it is guaranteed to be at least `1`. - /// Stated differently, a return value of `Some(0)` is impossible. - /// - /// # Example - /// - /// This shows a few cases where a static number of capture groups is - /// available and a few cases where it is not. - /// - /// ``` - /// use regex::bytes::Regex; - /// - /// let len = |pattern| { - /// Regex::new(pattern).map(|re| re.static_captures_len()) - /// }; - /// - /// assert_eq!(Some(1), len("a")?); - /// assert_eq!(Some(2), len("(a)")?); - /// assert_eq!(Some(2), len("(a)|(b)")?); - /// assert_eq!(Some(3), len("(a)(b)|(c)(d)")?); - /// assert_eq!(None, len("(a)|b")?); - /// assert_eq!(None, len("a|(b)")?); - /// assert_eq!(None, len("(b)*")?); - /// assert_eq!(Some(2), len("(b)+")?); - /// - /// # Ok::<(), Box<dyn std::error::Error>>(()) - /// ``` - #[inline] - pub fn static_captures_len(&self) -> Option<usize> { - self.0.static_captures_len().map(|len| len.saturating_add(1)) - } - - /// Returns an empty set of capture locations that can be reused in - /// multiple calls to `captures_read` or `captures_read_at`. - pub fn capture_locations(&self) -> CaptureLocations { - CaptureLocations(self.0.searcher().locations()) - } - - /// An alias for `capture_locations` to preserve backward compatibility. - /// - /// The `regex-capi` crate uses this method, so to avoid breaking that - /// crate, we continue to export it as an undocumented API. - #[doc(hidden)] - pub fn locations(&self) -> CaptureLocations { - CaptureLocations(self.0.searcher().locations()) - } -} - -/// An iterator over all non-overlapping matches for a particular string. -/// -/// The iterator yields a tuple of integers corresponding to the start and end -/// of the match. The indices are byte offsets. The iterator stops when no more -/// matches can be found. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the matched byte string. -#[derive(Debug)] -pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSync<'r>>); - -impl<'r, 't> Iterator for Matches<'r, 't> { - type Item = Match<'t>; - - fn next(&mut self) -> Option<Match<'t>> { - let text = self.0.text(); - self.0.next().map(|(s, e)| Match::new(text, s, e)) - } -} - -impl<'r, 't> FusedIterator for Matches<'r, 't> {} - -/// An iterator that yields all non-overlapping capture groups matching a -/// particular regular expression. -/// -/// The iterator stops when no more matches can be found. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the matched byte string. -#[derive(Debug)] -pub struct CaptureMatches<'r, 't>( - re_trait::CaptureMatches<'t, ExecNoSync<'r>>, -); - -impl<'r, 't> Iterator for CaptureMatches<'r, 't> { - type Item = Captures<'t>; - - fn next(&mut self) -> Option<Captures<'t>> { - self.0.next().map(|locs| Captures { - text: self.0.text(), - locs, - named_groups: self.0.regex().capture_name_idx().clone(), - }) - } -} - -impl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {} - -/// Yields all substrings delimited by a regular expression match. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the byte string being split. -#[derive(Debug)] -pub struct Split<'r, 't> { - finder: Matches<'r, 't>, - last: usize, -} - -impl<'r, 't> Iterator for Split<'r, 't> { - type Item = &'t [u8]; - - fn next(&mut self) -> Option<&'t [u8]> { - let text = self.finder.0.text(); - match self.finder.next() { - None => { - if self.last > text.len() { - None - } else { - let s = &text[self.last..]; - self.last = text.len() + 1; // Next call will return None - Some(s) - } - } - Some(m) => { - let matched = &text[self.last..m.start()]; - self.last = m.end(); - Some(matched) - } - } - } -} - -impl<'r, 't> FusedIterator for Split<'r, 't> {} - -/// Yields at most `N` substrings delimited by a regular expression match. -/// -/// The last substring will be whatever remains after splitting. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the byte string being split. -#[derive(Debug)] -pub struct SplitN<'r, 't> { - splits: Split<'r, 't>, - n: usize, -} - -impl<'r, 't> Iterator for SplitN<'r, 't> { - type Item = &'t [u8]; - - fn next(&mut self) -> Option<&'t [u8]> { - if self.n == 0 { - return None; - } - - self.n -= 1; - if self.n > 0 { - return self.splits.next(); - } - - let text = self.splits.finder.0.text(); - if self.splits.last > text.len() { - // We've already returned all substrings. - None - } else { - // self.n == 0, so future calls will return None immediately - Some(&text[self.splits.last..]) - } - } - - fn size_hint(&self) -> (usize, Option<usize>) { - (0, Some(self.n)) - } -} - -impl<'r, 't> FusedIterator for SplitN<'r, 't> {} - -/// An iterator over the names of all possible captures. -/// -/// `None` indicates an unnamed capture; the first element (capture 0, the -/// whole matched region) is always unnamed. -/// -/// `'r` is the lifetime of the compiled regular expression. -#[derive(Clone, Debug)] -pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>); - -impl<'r> Iterator for CaptureNames<'r> { - type Item = Option<&'r str>; - - fn next(&mut self) -> Option<Option<&'r str>> { - self.0 - .next() - .as_ref() - .map(|slot| slot.as_ref().map(|name| name.as_ref())) - } - - fn size_hint(&self) -> (usize, Option<usize>) { - self.0.size_hint() - } - - fn count(self) -> usize { - self.0.count() - } -} - -impl<'r> ExactSizeIterator for CaptureNames<'r> {} - -impl<'r> FusedIterator for CaptureNames<'r> {} - -/// CaptureLocations is a low level representation of the raw offsets of each -/// submatch. -/// -/// You can think of this as a lower level -/// [`Captures`](struct.Captures.html), where this type does not support -/// named capturing groups directly and it does not borrow the text that these -/// offsets were matched on. -/// -/// Primarily, this type is useful when using the lower level `Regex` APIs -/// such as `read_captures`, which permits amortizing the allocation in which -/// capture match locations are stored. -/// -/// In order to build a value of this type, you'll need to call the -/// `capture_locations` method on the `Regex` being used to execute the search. -/// The value returned can then be reused in subsequent searches. -/// -/// # Example -/// -/// This example shows how to create and use `CaptureLocations` in a search. -/// -/// ``` -/// use regex::bytes::Regex; -/// -/// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); -/// let mut locs = re.capture_locations(); -/// let m = re.captures_read(&mut locs, b"Bruce Springsteen").unwrap(); -/// assert_eq!(0..17, m.range()); -/// assert_eq!(Some((0, 17)), locs.get(0)); -/// assert_eq!(Some((0, 5)), locs.get(1)); -/// assert_eq!(Some((6, 17)), locs.get(2)); -/// -/// // Asking for an invalid capture group always returns None. -/// assert_eq!(None, locs.get(3)); -/// assert_eq!(None, locs.get(34973498648)); -/// assert_eq!(None, locs.get(9944060567225171988)); -/// ``` -#[derive(Clone, Debug)] -pub struct CaptureLocations(re_trait::Locations); - -/// A type alias for `CaptureLocations` for backwards compatibility. -/// -/// Previously, we exported `CaptureLocations` as `Locations` in an -/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`), -/// we continue re-exporting the same undocumented API. -#[doc(hidden)] -pub type Locations = CaptureLocations; - -impl CaptureLocations { - /// Returns the start and end positions of the Nth capture group. Returns - /// `None` if `i` is not a valid capture group or if the capture group did - /// not match anything. The positions returned are *always* byte indices - /// with respect to the original string matched. - #[inline] - pub fn get(&self, i: usize) -> Option<(usize, usize)> { - self.0.pos(i) - } - - /// Returns the total number of capture groups (even if they didn't match). - /// - /// This is always at least `1` since every regex has at least `1` - /// capturing group that corresponds to the entire match. - #[inline] - pub fn len(&self) -> usize { - self.0.len() - } - - /// An alias for the `get` method for backwards compatibility. - /// - /// Previously, we exported `get` as `pos` in an undocumented API. To - /// prevent breaking that code (e.g., in `regex-capi`), we continue - /// re-exporting the same undocumented API. - #[doc(hidden)] - #[inline] - pub fn pos(&self, i: usize) -> Option<(usize, usize)> { - self.get(i) - } -} - -/// Captures represents a group of captured byte strings for a single match. -/// -/// The 0th capture always corresponds to the entire match. Each subsequent -/// index corresponds to the next capture group in the regex. If a capture -/// group is named, then the matched byte string is *also* available via the -/// `name` method. (Note that the 0th capture is always unnamed and so must be -/// accessed with the `get` method.) -/// -/// Positions returned from a capture group are always byte indices. -/// -/// `'t` is the lifetime of the matched text. -pub struct Captures<'t> { - text: &'t [u8], - locs: re_trait::Locations, - named_groups: Arc<HashMap<String, usize>>, -} - -impl<'t> Captures<'t> { - /// Returns the match associated with the capture group at index `i`. If - /// `i` does not correspond to a capture group, or if the capture group - /// did not participate in the match, then `None` is returned. - /// - /// # Examples - /// - /// Get the text of the match with a default of an empty string if this - /// group didn't participate in the match: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap(); - /// let caps = re.captures(b"abc123").unwrap(); - /// - /// let text1 = caps.get(1).map_or(&b""[..], |m| m.as_bytes()); - /// let text2 = caps.get(2).map_or(&b""[..], |m| m.as_bytes()); - /// assert_eq!(text1, &b"123"[..]); - /// assert_eq!(text2, &b""[..]); - /// ``` - pub fn get(&self, i: usize) -> Option<Match<'t>> { - self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e)) - } - - /// Returns the match for the capture group named `name`. If `name` isn't a - /// valid capture group or didn't match anything, then `None` is returned. - pub fn name(&self, name: &str) -> Option<Match<'t>> { - self.named_groups.get(name).and_then(|&i| self.get(i)) - } - - /// An iterator that yields all capturing matches in the order in which - /// they appear in the regex. If a particular capture group didn't - /// participate in the match, then `None` is yielded for that capture. - /// - /// The first match always corresponds to the overall match of the regex. - pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> { - SubCaptureMatches { caps: self, it: self.locs.iter() } - } - - /// Expands all instances of `$name` in `replacement` to the corresponding - /// capture group `name`, and writes them to the `dst` buffer given. - /// - /// `name` may be an integer corresponding to the index of the capture - /// group (counted by order of opening parenthesis where `0` is the - /// entire match) or it can be a name (consisting of letters, digits or - /// underscores) corresponding to a named capture group. - /// - /// If `name` isn't a valid capture group (whether the name doesn't exist - /// or isn't a valid index), then it is replaced with the empty string. - /// - /// The longest possible name consisting of the characters `[_0-9A-Za-z]` - /// is used. e.g., `$1a` looks up the capture group named `1a` and not the - /// capture group at index `1`. To exert more precise control over the - /// name, or to refer to a capture group name that uses characters outside - /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When - /// using braces, any sequence of valid UTF-8 bytes is permitted. If the - /// sequence does not refer to a capture group name in the corresponding - /// regex, then it is replaced with an empty string. - /// - /// To write a literal `$` use `$$`. - pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) { - expand_bytes(self, replacement, dst) - } - - /// Returns the total number of capture groups (even if they didn't match). - /// - /// This is always at least `1`, since every regex has at least one capture - /// group that corresponds to the full match. - #[inline] - pub fn len(&self) -> usize { - self.locs.len() - } -} - -impl<'t> fmt::Debug for Captures<'t> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_tuple("Captures").field(&CapturesDebug(self)).finish() - } -} - -struct CapturesDebug<'c, 't>(&'c Captures<'t>); - -impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fn escape_bytes(bytes: &[u8]) -> String { - let mut s = String::new(); - for &b in bytes { - s.push_str(&escape_byte(b)); - } - s - } - - fn escape_byte(byte: u8) -> String { - use std::ascii::escape_default; - - let escaped: Vec<u8> = escape_default(byte).collect(); - String::from_utf8_lossy(&escaped).into_owned() - } - - // We'd like to show something nice here, even if it means an - // allocation to build a reverse index. - let slot_to_name: HashMap<&usize, &String> = - self.0.named_groups.iter().map(|(a, b)| (b, a)).collect(); - let mut map = f.debug_map(); - for (slot, m) in self.0.locs.iter().enumerate() { - let m = m.map(|(s, e)| escape_bytes(&self.0.text[s..e])); - if let Some(name) = slot_to_name.get(&slot) { - map.entry(&name, &m); - } else { - map.entry(&slot, &m); - } - } - map.finish() - } -} - -/// Get a group by index. -/// -/// `'t` is the lifetime of the matched text. -/// -/// The text can't outlive the `Captures` object if this method is -/// used, because of how `Index` is defined (normally `a[i]` is part -/// of `a` and can't outlive it); to do that, use `get()` instead. -/// -/// # Panics -/// -/// If there is no group at the given index. -impl<'t> Index<usize> for Captures<'t> { - type Output = [u8]; - - fn index(&self, i: usize) -> &[u8] { - self.get(i) - .map(|m| m.as_bytes()) - .unwrap_or_else(|| panic!("no group at index '{}'", i)) - } -} - -/// Get a group by name. -/// -/// `'t` is the lifetime of the matched text and `'i` is the lifetime -/// of the group name (the index). -/// -/// The text can't outlive the `Captures` object if this method is -/// used, because of how `Index` is defined (normally `a[i]` is part -/// of `a` and can't outlive it); to do that, use `name` instead. -/// -/// # Panics -/// -/// If there is no group named by the given value. -impl<'t, 'i> Index<&'i str> for Captures<'t> { - type Output = [u8]; - - fn index<'a>(&'a self, name: &'i str) -> &'a [u8] { - self.name(name) - .map(|m| m.as_bytes()) - .unwrap_or_else(|| panic!("no group named '{}'", name)) - } -} - -/// An iterator that yields all capturing matches in the order in which they -/// appear in the regex. -/// -/// If a particular capture group didn't participate in the match, then `None` -/// is yielded for that capture. The first match always corresponds to the -/// overall match of the regex. -/// -/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and -/// the lifetime `'t` corresponds to the originally matched text. -#[derive(Clone, Debug)] -pub struct SubCaptureMatches<'c, 't> { - caps: &'c Captures<'t>, - it: SubCapturesPosIter<'c>, -} - -impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> { - type Item = Option<Match<'t>>; - - fn next(&mut self) -> Option<Option<Match<'t>>> { - self.it - .next() - .map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e))) - } -} - -impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {} - -/// Replacer describes types that can be used to replace matches in a byte -/// string. -/// -/// In general, users of this crate shouldn't need to implement this trait, -/// since implementations are already provided for `&[u8]` along with other -/// variants of bytes types and `FnMut(&Captures) -> Vec<u8>` (or any -/// `FnMut(&Captures) -> T` where `T: AsRef<[u8]>`), which covers most use cases. -pub trait Replacer { - /// Appends text to `dst` to replace the current match. - /// - /// The current match is represented by `caps`, which is guaranteed to - /// have a match at capture group `0`. - /// - /// For example, a no-op replacement would be - /// `dst.extend(&caps[0])`. - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>); - - /// Return a fixed unchanging replacement byte string. - /// - /// When doing replacements, if access to `Captures` is not needed (e.g., - /// the replacement byte string does not need `$` expansion), then it can - /// be beneficial to avoid finding sub-captures. - /// - /// In general, this is called once for every call to `replacen`. - fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> { - None - } - - /// Return a `Replacer` that borrows and wraps this `Replacer`. - /// - /// This is useful when you want to take a generic `Replacer` (which might - /// not be cloneable) and use it without consuming it, so it can be used - /// more than once. - /// - /// # Example - /// - /// ``` - /// use regex::bytes::{Regex, Replacer}; - /// - /// fn replace_all_twice<R: Replacer>( - /// re: Regex, - /// src: &[u8], - /// mut rep: R, - /// ) -> Vec<u8> { - /// let dst = re.replace_all(src, rep.by_ref()); - /// let dst = re.replace_all(&dst, rep.by_ref()); - /// dst.into_owned() - /// } - /// ``` - fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> { - ReplacerRef(self) - } -} - -/// By-reference adaptor for a `Replacer` -/// -/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref). -#[derive(Debug)] -pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); - -impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - self.0.replace_append(caps, dst) - } - fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> { - self.0.no_expansion() - } -} - -impl<'a> Replacer for &'a [u8] { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - caps.expand(*self, dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - no_expansion(self) - } -} - -impl<'a> Replacer for &'a Vec<u8> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - caps.expand(*self, dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - no_expansion(self) - } -} - -impl Replacer for Vec<u8> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - caps.expand(self, dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - no_expansion(self) - } -} - -impl<'a> Replacer for Cow<'a, [u8]> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - caps.expand(self.as_ref(), dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - no_expansion(self) - } -} - -impl<'a> Replacer for &'a Cow<'a, [u8]> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - caps.expand(self.as_ref(), dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - no_expansion(self) - } -} - -fn no_expansion<T: AsRef<[u8]>>(t: &T) -> Option<Cow<'_, [u8]>> { - let s = t.as_ref(); - match find_byte(b'$', s) { - Some(_) => None, - None => Some(Cow::Borrowed(s)), - } -} - -impl<F, T> Replacer for F -where - F: FnMut(&Captures<'_>) -> T, - T: AsRef<[u8]>, -{ - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - dst.extend_from_slice((*self)(caps).as_ref()); - } -} - -/// `NoExpand` indicates literal byte string replacement. -/// -/// It can be used with `replace` and `replace_all` to do a literal byte string -/// replacement without expanding `$name` to their corresponding capture -/// groups. This can be both convenient (to avoid escaping `$`, for example) -/// and performant (since capture groups don't need to be found). -/// -/// `'t` is the lifetime of the literal text. -#[derive(Clone, Debug)] -pub struct NoExpand<'t>(pub &'t [u8]); - -impl<'t> Replacer for NoExpand<'t> { - fn replace_append(&mut self, _: &Captures<'_>, dst: &mut Vec<u8>) { - dst.extend_from_slice(self.0); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - Some(Cow::Borrowed(self.0)) - } -} diff --git a/vendor/regex/src/re_set.rs b/vendor/regex/src/re_set.rs deleted file mode 100644 index 7c8253f..0000000 --- a/vendor/regex/src/re_set.rs +++ /dev/null @@ -1,518 +0,0 @@ -macro_rules! define_set { - ($name:ident, $builder_mod:ident, $text_ty:ty, $as_bytes:expr, - $(#[$doc_regexset_example:meta])* ) => { - pub mod $name { - use std::fmt; - use std::iter; - use std::slice; - use std::vec; - - use crate::error::Error; - use crate::exec::Exec; - use crate::re_builder::$builder_mod::RegexSetBuilder; - use crate::re_trait::RegularExpression; - -/// Match multiple (possibly overlapping) regular expressions in a single scan. -/// -/// A regex set corresponds to the union of two or more regular expressions. -/// That is, a regex set will match text where at least one of its -/// constituent regular expressions matches. A regex set as its formulated here -/// provides a touch more power: it will also report *which* regular -/// expressions in the set match. Indeed, this is the key difference between -/// regex sets and a single `Regex` with many alternates, since only one -/// alternate can match at a time. -/// -/// For example, consider regular expressions to match email addresses and -/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a -/// regex set is constructed from those regexes, then searching the text -/// `foo@example.com` will report both regexes as matching. Of course, one -/// could accomplish this by compiling each regex on its own and doing two -/// searches over the text. The key advantage of using a regex set is that it -/// will report the matching regexes using a *single pass through the text*. -/// If one has hundreds or thousands of regexes to match repeatedly (like a URL -/// router for a complex web application or a user agent matcher), then a regex -/// set can realize huge performance gains. -/// -/// # Example -/// -/// This shows how the above two regexes (for matching email addresses and -/// domains) might work: -/// -$(#[$doc_regexset_example])* -/// -/// Note that it would be possible to adapt the above example to using `Regex` -/// with an expression like: -/// -/// ```text -/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net)) -/// ``` -/// -/// After a match, one could then inspect the capture groups to figure out -/// which alternates matched. The problem is that it is hard to make this -/// approach scale when there are many regexes since the overlap between each -/// alternate isn't always obvious to reason about. -/// -/// # Limitations -/// -/// Regex sets are limited to answering the following two questions: -/// -/// 1. Does any regex in the set match? -/// 2. If so, which regexes in the set match? -/// -/// As with the main [`Regex`][crate::Regex] type, it is cheaper to ask (1) -/// instead of (2) since the matching engines can stop after the first match -/// is found. -/// -/// You cannot directly extract [`Match`][crate::Match] or -/// [`Captures`][crate::Captures] objects from a regex set. If you need these -/// operations, the recommended approach is to compile each pattern in the set -/// independently and scan the exact same input a second time with those -/// independently compiled patterns: -/// -/// ```rust -/// use regex::{Regex, RegexSet}; -/// -/// let patterns = ["foo", "bar"]; -/// // Both patterns will match different ranges of this string. -/// let text = "barfoo"; -/// -/// // Compile a set matching any of our patterns. -/// let set = RegexSet::new(&patterns).unwrap(); -/// // Compile each pattern independently. -/// let regexes: Vec<_> = set.patterns().iter() -/// .map(|pat| Regex::new(pat).unwrap()) -/// .collect(); -/// -/// // Match against the whole set first and identify the individual -/// // matching patterns. -/// let matches: Vec<&str> = set.matches(text).into_iter() -/// // Dereference the match index to get the corresponding -/// // compiled pattern. -/// .map(|match_idx| ®exes[match_idx]) -/// // To get match locations or any other info, we then have to search -/// // the exact same text again, using our separately-compiled pattern. -/// .map(|pat| pat.find(text).unwrap().as_str()) -/// .collect(); -/// -/// // Matches arrive in the order the constituent patterns were declared, -/// // not the order they appear in the input. -/// assert_eq!(vec!["foo", "bar"], matches); -/// ``` -/// -/// # Performance -/// -/// A `RegexSet` has the same performance characteristics as `Regex`. Namely, -/// search takes `O(mn)` time, where `m` is proportional to the size of the -/// regex set and `n` is proportional to the length of the search text. -#[derive(Clone)] -pub struct RegexSet(Exec); - -impl RegexSet { - /// Create a new regex set with the given regular expressions. - /// - /// This takes an iterator of `S`, where `S` is something that can produce - /// a `&str`. If any of the strings in the iterator are not valid regular - /// expressions, then an error is returned. - /// - /// # Example - /// - /// Create a new regex set from an iterator of strings: - /// - /// ```rust - /// # use regex::RegexSet; - /// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap(); - /// assert!(set.is_match("foo")); - /// ``` - pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error> - where S: AsRef<str>, I: IntoIterator<Item=S> { - RegexSetBuilder::new(exprs).build() - } - - /// Create a new empty regex set. - /// - /// # Example - /// - /// ```rust - /// # use regex::RegexSet; - /// let set = RegexSet::empty(); - /// assert!(set.is_empty()); - /// ``` - pub fn empty() -> RegexSet { - RegexSetBuilder::new(&[""; 0]).build().unwrap() - } - - /// Returns true if and only if one of the regexes in this set matches - /// the text given. - /// - /// This method should be preferred if you only need to test whether any - /// of the regexes in the set should match, but don't care about *which* - /// regexes matched. This is because the underlying matching engine will - /// quit immediately after seeing the first match instead of continuing to - /// find all matches. - /// - /// Note that as with searches using `Regex`, the expression is unanchored - /// by default. That is, if the regex does not start with `^` or `\A`, or - /// end with `$` or `\z`, then it is permitted to match anywhere in the - /// text. - /// - /// # Example - /// - /// Tests whether a set matches some text: - /// - /// ```rust - /// # use regex::RegexSet; - /// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap(); - /// assert!(set.is_match("foo")); - /// assert!(!set.is_match("☃")); - /// ``` - pub fn is_match(&self, text: $text_ty) -> bool { - self.is_match_at(text, 0) - } - - /// Returns the same as is_match, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - #[doc(hidden)] - pub fn is_match_at(&self, text: $text_ty, start: usize) -> bool { - self.0.searcher().is_match_at($as_bytes(text), start) - } - - /// Returns the set of regular expressions that match in the given text. - /// - /// The set returned contains the index of each regular expression that - /// matches in the given text. The index is in correspondence with the - /// order of regular expressions given to `RegexSet`'s constructor. - /// - /// The set can also be used to iterate over the matched indices. - /// - /// Note that as with searches using `Regex`, the expression is unanchored - /// by default. That is, if the regex does not start with `^` or `\A`, or - /// end with `$` or `\z`, then it is permitted to match anywhere in the - /// text. - /// - /// # Example - /// - /// Tests which regular expressions match the given text: - /// - /// ```rust - /// # use regex::RegexSet; - /// let set = RegexSet::new(&[ - /// r"\w+", - /// r"\d+", - /// r"\pL+", - /// r"foo", - /// r"bar", - /// r"barfoo", - /// r"foobar", - /// ]).unwrap(); - /// let matches: Vec<_> = set.matches("foobar").into_iter().collect(); - /// assert_eq!(matches, vec![0, 2, 3, 4, 6]); - /// - /// // You can also test whether a particular regex matched: - /// let matches = set.matches("foobar"); - /// assert!(!matches.matched(5)); - /// assert!(matches.matched(6)); - /// ``` - pub fn matches(&self, text: $text_ty) -> SetMatches { - let mut matches = vec![false; self.0.regex_strings().len()]; - let any = self.read_matches_at(&mut matches, text, 0); - SetMatches { - matched_any: any, - matches: matches, - } - } - - /// Returns the same as matches, but starts the search at the given - /// offset and stores the matches into the slice given. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - /// - /// `matches` must have a length that is at least the number of regexes - /// in this set. - /// - /// This method returns true if and only if at least one member of - /// `matches` is true after executing the set against `text`. - #[doc(hidden)] - pub fn read_matches_at( - &self, - matches: &mut [bool], - text: $text_ty, - start: usize, - ) -> bool { - self.0.searcher().many_matches_at(matches, $as_bytes(text), start) - } - - /// Returns the total number of regular expressions in this set. - pub fn len(&self) -> usize { - self.0.regex_strings().len() - } - - /// Returns `true` if this set contains no regular expressions. - pub fn is_empty(&self) -> bool { - self.0.regex_strings().is_empty() - } - - /// Returns the patterns that this set will match on. - /// - /// This function can be used to determine the pattern for a match. The - /// slice returned has exactly as many patterns givens to this regex set, - /// and the order of the slice is the same as the order of the patterns - /// provided to the set. - /// - /// # Example - /// - /// ```rust - /// # use regex::RegexSet; - /// let set = RegexSet::new(&[ - /// r"\w+", - /// r"\d+", - /// r"\pL+", - /// r"foo", - /// r"bar", - /// r"barfoo", - /// r"foobar", - /// ]).unwrap(); - /// let matches: Vec<_> = set - /// .matches("foobar") - /// .into_iter() - /// .map(|match_idx| &set.patterns()[match_idx]) - /// .collect(); - /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]); - /// ``` - pub fn patterns(&self) -> &[String] { - self.0.regex_strings() - } -} - -impl Default for RegexSet { - fn default() -> Self { - RegexSet::empty() - } -} - -/// A set of matches returned by a regex set. -#[derive(Clone, Debug)] -pub struct SetMatches { - matched_any: bool, - matches: Vec<bool>, -} - -impl SetMatches { - /// Whether this set contains any matches. - pub fn matched_any(&self) -> bool { - self.matched_any - } - - /// Whether the regex at the given index matched. - /// - /// The index for a regex is determined by its insertion order upon the - /// initial construction of a `RegexSet`, starting at `0`. - /// - /// # Panics - /// - /// If `regex_index` is greater than or equal to `self.len()`. - pub fn matched(&self, regex_index: usize) -> bool { - self.matches[regex_index] - } - - /// The total number of regexes in the set that created these matches. - /// - /// **WARNING:** This always returns the same value as [`RegexSet::len`]. - /// In particular, it does *not* return the number of elements yielded by - /// [`SetMatches::iter`]. The only way to determine the total number of - /// matched regexes is to iterate over them. - pub fn len(&self) -> usize { - self.matches.len() - } - - /// Returns an iterator over indexes in the regex that matched. - /// - /// This will always produces matches in ascending order of index, where - /// the index corresponds to the index of the regex that matched with - /// respect to its position when initially building the set. - pub fn iter(&self) -> SetMatchesIter<'_> { - SetMatchesIter((&*self.matches).into_iter().enumerate()) - } -} - -impl IntoIterator for SetMatches { - type IntoIter = SetMatchesIntoIter; - type Item = usize; - - fn into_iter(self) -> Self::IntoIter { - SetMatchesIntoIter(self.matches.into_iter().enumerate()) - } -} - -impl<'a> IntoIterator for &'a SetMatches { - type IntoIter = SetMatchesIter<'a>; - type Item = usize; - - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - -/// An owned iterator over the set of matches from a regex set. -/// -/// This will always produces matches in ascending order of index, where the -/// index corresponds to the index of the regex that matched with respect to -/// its position when initially building the set. -#[derive(Debug)] -pub struct SetMatchesIntoIter(iter::Enumerate<vec::IntoIter<bool>>); - -impl Iterator for SetMatchesIntoIter { - type Item = usize; - - fn next(&mut self) -> Option<usize> { - loop { - match self.0.next() { - None => return None, - Some((_, false)) => {} - Some((i, true)) => return Some(i), - } - } - } - - fn size_hint(&self) -> (usize, Option<usize>) { - self.0.size_hint() - } -} - -impl DoubleEndedIterator for SetMatchesIntoIter { - fn next_back(&mut self) -> Option<usize> { - loop { - match self.0.next_back() { - None => return None, - Some((_, false)) => {} - Some((i, true)) => return Some(i), - } - } - } -} - -impl iter::FusedIterator for SetMatchesIntoIter {} - -/// A borrowed iterator over the set of matches from a regex set. -/// -/// The lifetime `'a` refers to the lifetime of a `SetMatches` value. -/// -/// This will always produces matches in ascending order of index, where the -/// index corresponds to the index of the regex that matched with respect to -/// its position when initially building the set. -#[derive(Clone, Debug)] -pub struct SetMatchesIter<'a>(iter::Enumerate<slice::Iter<'a, bool>>); - -impl<'a> Iterator for SetMatchesIter<'a> { - type Item = usize; - - fn next(&mut self) -> Option<usize> { - loop { - match self.0.next() { - None => return None, - Some((_, &false)) => {} - Some((i, &true)) => return Some(i), - } - } - } - - fn size_hint(&self) -> (usize, Option<usize>) { - self.0.size_hint() - } -} - -impl<'a> DoubleEndedIterator for SetMatchesIter<'a> { - fn next_back(&mut self) -> Option<usize> { - loop { - match self.0.next_back() { - None => return None, - Some((_, &false)) => {} - Some((i, &true)) => return Some(i), - } - } - } -} - -impl<'a> iter::FusedIterator for SetMatchesIter<'a> {} - -#[doc(hidden)] -impl From<Exec> for RegexSet { - fn from(exec: Exec) -> Self { - RegexSet(exec) - } -} - -impl fmt::Debug for RegexSet { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "RegexSet({:?})", self.0.regex_strings()) - } -} - -#[allow(dead_code)] fn as_bytes_str(text: &str) -> &[u8] { text.as_bytes() } -#[allow(dead_code)] fn as_bytes_bytes(text: &[u8]) -> &[u8] { text } - } - } -} - -define_set! { - unicode, - set_unicode, - &str, - as_bytes_str, -/// ```rust -/// # use regex::RegexSet; -/// let set = RegexSet::new(&[ -/// r"[a-z]+@[a-z]+\.(com|org|net)", -/// r"[a-z]+\.(com|org|net)", -/// ]).unwrap(); -/// -/// // Ask whether any regexes in the set match. -/// assert!(set.is_match("foo@example.com")); -/// -/// // Identify which regexes in the set match. -/// let matches: Vec<_> = set.matches("foo@example.com").into_iter().collect(); -/// assert_eq!(vec![0, 1], matches); -/// -/// // Try again, but with text that only matches one of the regexes. -/// let matches: Vec<_> = set.matches("example.com").into_iter().collect(); -/// assert_eq!(vec![1], matches); -/// -/// // Try again, but with text that doesn't match any regex in the set. -/// let matches: Vec<_> = set.matches("example").into_iter().collect(); -/// assert!(matches.is_empty()); -/// ``` -} - -define_set! { - bytes, - set_bytes, - &[u8], - as_bytes_bytes, -/// ```rust -/// # use regex::bytes::RegexSet; -/// let set = RegexSet::new(&[ -/// r"[a-z]+@[a-z]+\.(com|org|net)", -/// r"[a-z]+\.(com|org|net)", -/// ]).unwrap(); -/// -/// // Ask whether any regexes in the set match. -/// assert!(set.is_match(b"foo@example.com")); -/// -/// // Identify which regexes in the set match. -/// let matches: Vec<_> = set.matches(b"foo@example.com").into_iter().collect(); -/// assert_eq!(vec![0, 1], matches); -/// -/// // Try again, but with text that only matches one of the regexes. -/// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect(); -/// assert_eq!(vec![1], matches); -/// -/// // Try again, but with text that doesn't match any regex in the set. -/// let matches: Vec<_> = set.matches(b"example").into_iter().collect(); -/// assert!(matches.is_empty()); -/// ``` -} diff --git a/vendor/regex/src/re_trait.rs b/vendor/regex/src/re_trait.rs deleted file mode 100644 index 505810c..0000000 --- a/vendor/regex/src/re_trait.rs +++ /dev/null @@ -1,294 +0,0 @@ -use std::fmt; -use std::iter::FusedIterator; - -/// Slot is a single saved capture location. Note that there are two slots for -/// every capture in a regular expression (one slot each for the start and end -/// of the capture). -pub type Slot = Option<usize>; - -/// Locations represents the offsets of each capturing group in a regex for -/// a single match. -/// -/// Unlike `Captures`, a `Locations` value only stores offsets. -#[doc(hidden)] -#[derive(Clone, Debug)] -pub struct Locations(Vec<Slot>); - -impl Locations { - /// Returns the start and end positions of the Nth capture group. Returns - /// `None` if `i` is not a valid capture group or if the capture group did - /// not match anything. The positions returned are *always* byte indices - /// with respect to the original string matched. - pub fn pos(&self, i: usize) -> Option<(usize, usize)> { - let (s, e) = (i.checked_mul(2)?, i.checked_mul(2)?.checked_add(1)?); - match (self.0.get(s), self.0.get(e)) { - (Some(&Some(s)), Some(&Some(e))) => Some((s, e)), - _ => None, - } - } - - /// Creates an iterator of all the capture group positions in order of - /// appearance in the regular expression. Positions are byte indices - /// in terms of the original string matched. - pub fn iter(&self) -> SubCapturesPosIter<'_> { - SubCapturesPosIter { idx: 0, locs: self } - } - - /// Returns the total number of capturing groups. - /// - /// This is always at least `1` since every regex has at least `1` - /// capturing group that corresponds to the entire match. - pub fn len(&self) -> usize { - self.0.len() / 2 - } - - /// Return the individual slots as a slice. - pub(crate) fn as_slots(&mut self) -> &mut [Slot] { - &mut self.0 - } -} - -/// An iterator over capture group positions for a particular match of a -/// regular expression. -/// -/// Positions are byte indices in terms of the original string matched. -/// -/// `'c` is the lifetime of the captures. -#[derive(Clone, Debug)] -pub struct SubCapturesPosIter<'c> { - idx: usize, - locs: &'c Locations, -} - -impl<'c> Iterator for SubCapturesPosIter<'c> { - type Item = Option<(usize, usize)>; - - fn next(&mut self) -> Option<Option<(usize, usize)>> { - if self.idx >= self.locs.len() { - return None; - } - let x = match self.locs.pos(self.idx) { - None => Some(None), - Some((s, e)) => Some(Some((s, e))), - }; - self.idx += 1; - x - } - - fn size_hint(&self) -> (usize, Option<usize>) { - let len = self.locs.len() - self.idx; - (len, Some(len)) - } - - fn count(self) -> usize { - self.len() - } -} - -impl<'c> ExactSizeIterator for SubCapturesPosIter<'c> {} - -impl<'c> FusedIterator for SubCapturesPosIter<'c> {} - -/// `RegularExpression` describes types that can implement regex searching. -/// -/// This trait is my attempt at reducing code duplication and to standardize -/// the internal API. Specific duplication that is avoided are the `find` -/// and `capture` iterators, which are slightly tricky. -/// -/// It's not clear whether this trait is worth it, and it also isn't -/// clear whether it's useful as a public trait or not. Methods like -/// `next_after_empty` reak of bad design, but the rest of the methods seem -/// somewhat reasonable. One particular thing this trait would expose would be -/// the ability to start the search of a regex anywhere in a haystack, which -/// isn't possible in the current public API. -pub trait RegularExpression: Sized + fmt::Debug { - /// The type of the haystack. - type Text: ?Sized + fmt::Debug; - - /// The number of capture slots in the compiled regular expression. This is - /// always two times the number of capture groups (two slots per group). - fn slots_len(&self) -> usize; - - /// Allocates fresh space for all capturing groups in this regex. - fn locations(&self) -> Locations { - Locations(vec![None; self.slots_len()]) - } - - /// Returns the position of the next character after `i`. - /// - /// For example, a haystack with type `&[u8]` probably returns `i+1`, - /// whereas a haystack with type `&str` probably returns `i` plus the - /// length of the next UTF-8 sequence. - fn next_after_empty(&self, text: &Self::Text, i: usize) -> usize; - - /// Returns the location of the shortest match. - fn shortest_match_at( - &self, - text: &Self::Text, - start: usize, - ) -> Option<usize>; - - /// Returns whether the regex matches the text given. - fn is_match_at(&self, text: &Self::Text, start: usize) -> bool; - - /// Returns the leftmost-first match location if one exists. - fn find_at( - &self, - text: &Self::Text, - start: usize, - ) -> Option<(usize, usize)>; - - /// Returns the leftmost-first match location if one exists, and also - /// fills in any matching capture slot locations. - fn captures_read_at( - &self, - locs: &mut Locations, - text: &Self::Text, - start: usize, - ) -> Option<(usize, usize)>; - - /// Returns an iterator over all non-overlapping successive leftmost-first - /// matches. - fn find_iter(self, text: &Self::Text) -> Matches<'_, Self> { - Matches { re: self, text, last_end: 0, last_match: None } - } - - /// Returns an iterator over all non-overlapping successive leftmost-first - /// matches with captures. - fn captures_iter(self, text: &Self::Text) -> CaptureMatches<'_, Self> { - CaptureMatches(self.find_iter(text)) - } -} - -/// An iterator over all non-overlapping successive leftmost-first matches. -#[derive(Debug)] -pub struct Matches<'t, R> -where - R: RegularExpression, - R::Text: 't, -{ - re: R, - text: &'t R::Text, - last_end: usize, - last_match: Option<usize>, -} - -impl<'t, R> Matches<'t, R> -where - R: RegularExpression, - R::Text: 't, -{ - /// Return the text being searched. - pub fn text(&self) -> &'t R::Text { - self.text - } - - /// Return the underlying regex. - pub fn regex(&self) -> &R { - &self.re - } -} - -impl<'t, R> Iterator for Matches<'t, R> -where - R: RegularExpression, - R::Text: 't + AsRef<[u8]>, -{ - type Item = (usize, usize); - - fn next(&mut self) -> Option<(usize, usize)> { - if self.last_end > self.text.as_ref().len() { - return None; - } - let (s, e) = match self.re.find_at(self.text, self.last_end) { - None => return None, - Some((s, e)) => (s, e), - }; - if s == e { - // This is an empty match. To ensure we make progress, start - // the next search at the smallest possible starting position - // of the next match following this one. - self.last_end = self.re.next_after_empty(self.text, e); - // Don't accept empty matches immediately following a match. - // Just move on to the next match. - if Some(e) == self.last_match { - return self.next(); - } - } else { - self.last_end = e; - } - self.last_match = Some(e); - Some((s, e)) - } -} - -impl<'t, R> FusedIterator for Matches<'t, R> -where - R: RegularExpression, - R::Text: 't + AsRef<[u8]>, -{ -} - -/// An iterator over all non-overlapping successive leftmost-first matches with -/// captures. -#[derive(Debug)] -pub struct CaptureMatches<'t, R>(Matches<'t, R>) -where - R: RegularExpression, - R::Text: 't; - -impl<'t, R> CaptureMatches<'t, R> -where - R: RegularExpression, - R::Text: 't, -{ - /// Return the text being searched. - pub fn text(&self) -> &'t R::Text { - self.0.text() - } - - /// Return the underlying regex. - pub fn regex(&self) -> &R { - self.0.regex() - } -} - -impl<'t, R> Iterator for CaptureMatches<'t, R> -where - R: RegularExpression, - R::Text: 't + AsRef<[u8]>, -{ - type Item = Locations; - - fn next(&mut self) -> Option<Locations> { - if self.0.last_end > self.0.text.as_ref().len() { - return None; - } - let mut locs = self.0.re.locations(); - let (s, e) = match self.0.re.captures_read_at( - &mut locs, - self.0.text, - self.0.last_end, - ) { - None => return None, - Some((s, e)) => (s, e), - }; - if s == e { - self.0.last_end = self.0.re.next_after_empty(self.0.text, e); - if Some(e) == self.0.last_match { - return self.next(); - } - } else { - self.0.last_end = e; - } - self.0.last_match = Some(e); - Some(locs) - } -} - -impl<'t, R> FusedIterator for CaptureMatches<'t, R> -where - R: RegularExpression, - R::Text: 't + AsRef<[u8]>, -{ -} diff --git a/vendor/regex/src/re_unicode.rs b/vendor/regex/src/re_unicode.rs deleted file mode 100644 index 5768908..0000000 --- a/vendor/regex/src/re_unicode.rs +++ /dev/null @@ -1,1415 +0,0 @@ -use std::borrow::Cow; -use std::collections::HashMap; -use std::fmt; -use std::iter::FusedIterator; -use std::ops::{Index, Range}; -use std::str::FromStr; -use std::sync::Arc; - -use crate::find_byte::find_byte; - -use crate::error::Error; -use crate::exec::{Exec, ExecNoSyncStr}; -use crate::expand::expand_str; -use crate::re_builder::unicode::RegexBuilder; -use crate::re_trait::{self, RegularExpression, SubCapturesPosIter}; - -/// Escapes all regular expression meta characters in `text`. -/// -/// The string returned may be safely used as a literal in a regular -/// expression. -pub fn escape(text: &str) -> String { - regex_syntax::escape(text) -} - -/// Match represents a single match of a regex in a haystack. -/// -/// The lifetime parameter `'t` refers to the lifetime of the matched text. -#[derive(Copy, Clone, Eq, PartialEq)] -pub struct Match<'t> { - text: &'t str, - start: usize, - end: usize, -} - -impl<'t> Match<'t> { - /// Returns the starting byte offset of the match in the haystack. - #[inline] - pub fn start(&self) -> usize { - self.start - } - - /// Returns the ending byte offset of the match in the haystack. - #[inline] - pub fn end(&self) -> usize { - self.end - } - - /// Returns true if and only if this match has a length of zero. - #[inline] - pub fn is_empty(&self) -> bool { - self.start == self.end - } - - /// Returns the length, in bytes, of this match. - #[inline] - pub fn len(&self) -> usize { - self.end - self.start - } - - /// Returns the range over the starting and ending byte offsets of the - /// match in the haystack. - #[inline] - pub fn range(&self) -> Range<usize> { - self.start..self.end - } - - /// Returns the matched text. - #[inline] - pub fn as_str(&self) -> &'t str { - &self.text[self.range()] - } - - /// Creates a new match from the given haystack and byte offsets. - #[inline] - fn new(haystack: &'t str, start: usize, end: usize) -> Match<'t> { - Match { text: haystack, start, end } - } -} - -impl<'t> std::fmt::Debug for Match<'t> { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - f.debug_struct("Match") - .field("start", &self.start) - .field("end", &self.end) - .field("string", &self.as_str()) - .finish() - } -} - -impl<'t> From<Match<'t>> for &'t str { - fn from(m: Match<'t>) -> &'t str { - m.as_str() - } -} - -impl<'t> From<Match<'t>> for Range<usize> { - fn from(m: Match<'t>) -> Range<usize> { - m.range() - } -} - -/// A compiled regular expression for matching Unicode strings. -/// -/// It is represented as either a sequence of bytecode instructions (dynamic) -/// or as a specialized Rust function (native). It can be used to search, split -/// or replace text. All searching is done with an implicit `.*?` at the -/// beginning and end of an expression. To force an expression to match the -/// whole string (or a prefix or a suffix), you must use an anchor like `^` or -/// `$` (or `\A` and `\z`). -/// -/// While this crate will handle Unicode strings (whether in the regular -/// expression or in the search text), all positions returned are **byte -/// indices**. Every byte index is guaranteed to be at a Unicode code point -/// boundary. -/// -/// The lifetimes `'r` and `'t` in this crate correspond to the lifetime of a -/// compiled regular expression and text to search, respectively. -/// -/// The only methods that allocate new strings are the string replacement -/// methods. All other methods (searching and splitting) return borrowed -/// pointers into the string given. -/// -/// # Examples -/// -/// Find the location of a US phone number: -/// -/// ```rust -/// # use regex::Regex; -/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap(); -/// let mat = re.find("phone: 111-222-3333").unwrap(); -/// assert_eq!((mat.start(), mat.end()), (7, 19)); -/// ``` -/// -/// # Using the `std::str::pattern` methods with `Regex` -/// -/// > **Note**: This section requires that this crate is compiled with the -/// > `pattern` Cargo feature enabled, which **requires nightly Rust**. -/// -/// Since `Regex` implements `Pattern`, you can use regexes with methods -/// defined on `&str`. For example, `is_match`, `find`, `find_iter` -/// and `split` can be replaced with `str::contains`, `str::find`, -/// `str::match_indices` and `str::split`. -/// -/// Here are some examples: -/// -/// ```rust,ignore -/// # use regex::Regex; -/// let re = Regex::new(r"\d+").unwrap(); -/// let haystack = "a111b222c"; -/// -/// assert!(haystack.contains(&re)); -/// assert_eq!(haystack.find(&re), Some(1)); -/// assert_eq!(haystack.match_indices(&re).collect::<Vec<_>>(), -/// vec![(1, "111"), (5, "222")]); -/// assert_eq!(haystack.split(&re).collect::<Vec<_>>(), vec!["a", "b", "c"]); -/// ``` -#[derive(Clone)] -pub struct Regex(Exec); - -impl fmt::Display for Regex { - /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -impl fmt::Debug for Regex { - /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self, f) - } -} - -#[doc(hidden)] -impl From<Exec> for Regex { - fn from(exec: Exec) -> Regex { - Regex(exec) - } -} - -impl FromStr for Regex { - type Err = Error; - - /// Attempts to parse a string into a regular expression - fn from_str(s: &str) -> Result<Regex, Error> { - Regex::new(s) - } -} - -/// Core regular expression methods. -impl Regex { - /// Compiles a regular expression. Once compiled, it can be used repeatedly - /// to search, split or replace text in a string. - /// - /// If an invalid expression is given, then an error is returned. - pub fn new(re: &str) -> Result<Regex, Error> { - RegexBuilder::new(re).build() - } - - /// Returns true if and only if there is a match for the regex in the - /// string given. - /// - /// It is recommended to use this method if all you need to do is test - /// a match, since the underlying matching engine may be able to do less - /// work. - /// - /// # Example - /// - /// Test if some text contains at least one word with exactly 13 - /// Unicode word characters: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let text = "I categorically deny having triskaidekaphobia."; - /// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text)); - /// # } - /// ``` - pub fn is_match(&self, text: &str) -> bool { - self.is_match_at(text, 0) - } - - /// Returns the start and end byte range of the leftmost-first match in - /// `text`. If no match exists, then `None` is returned. - /// - /// Note that this should only be used if you want to discover the position - /// of the match. Testing the existence of a match is faster if you use - /// `is_match`. - /// - /// # Example - /// - /// Find the start and end location of the first word with exactly 13 - /// Unicode word characters: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let text = "I categorically deny having triskaidekaphobia."; - /// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap(); - /// assert_eq!(mat.start(), 2); - /// assert_eq!(mat.end(), 15); - /// # } - /// ``` - pub fn find<'t>(&self, text: &'t str) -> Option<Match<'t>> { - self.find_at(text, 0) - } - - /// Returns an iterator for each successive non-overlapping match in - /// `text`, returning the start and end byte indices with respect to - /// `text`. - /// - /// # Example - /// - /// Find the start and end location of every word with exactly 13 Unicode - /// word characters: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let text = "Retroactively relinquishing remunerations is reprehensible."; - /// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) { - /// println!("{:?}", mat); - /// } - /// # } - /// ``` - pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> Matches<'r, 't> { - Matches(self.0.searcher_str().find_iter(text)) - } - - /// Returns the capture groups corresponding to the leftmost-first - /// match in `text`. Capture group `0` always corresponds to the entire - /// match. If no match is found, then `None` is returned. - /// - /// You should only use `captures` if you need access to the location of - /// capturing group matches. Otherwise, `find` is faster for discovering - /// the location of the overall match. - /// - /// # Examples - /// - /// Say you have some text with movie names and their release years, - /// like "'Citizen Kane' (1941)". It'd be nice if we could search for text - /// looking like that, while also extracting the movie name and its release - /// year separately. - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); - /// let text = "Not my favorite movie: 'Citizen Kane' (1941)."; - /// let caps = re.captures(text).unwrap(); - /// assert_eq!(caps.get(1).unwrap().as_str(), "Citizen Kane"); - /// assert_eq!(caps.get(2).unwrap().as_str(), "1941"); - /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)"); - /// // You can also access the groups by index using the Index notation. - /// // Note that this will panic on an invalid index. - /// assert_eq!(&caps[1], "Citizen Kane"); - /// assert_eq!(&caps[2], "1941"); - /// assert_eq!(&caps[0], "'Citizen Kane' (1941)"); - /// # } - /// ``` - /// - /// Note that the full match is at capture group `0`. Each subsequent - /// capture group is indexed by the order of its opening `(`. - /// - /// We can make this example a bit clearer by using *named* capture groups: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") - /// .unwrap(); - /// let text = "Not my favorite movie: 'Citizen Kane' (1941)."; - /// let caps = re.captures(text).unwrap(); - /// assert_eq!(caps.name("title").unwrap().as_str(), "Citizen Kane"); - /// assert_eq!(caps.name("year").unwrap().as_str(), "1941"); - /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)"); - /// // You can also access the groups by name using the Index notation. - /// // Note that this will panic on an invalid group name. - /// assert_eq!(&caps["title"], "Citizen Kane"); - /// assert_eq!(&caps["year"], "1941"); - /// assert_eq!(&caps[0], "'Citizen Kane' (1941)"); - /// - /// # } - /// ``` - /// - /// Here we name the capture groups, which we can access with the `name` - /// method or the `Index` notation with a `&str`. Note that the named - /// capture groups are still accessible with `get` or the `Index` notation - /// with a `usize`. - /// - /// The `0`th capture group is always unnamed, so it must always be - /// accessed with `get(0)` or `[0]`. - pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> { - self.captures_at(text, 0) - } - - /// Returns an iterator over all the non-overlapping capture groups matched - /// in `text`. This is operationally the same as `find_iter`, except it - /// yields information about capturing group matches. - /// - /// # Example - /// - /// We can use this to find all movie titles and their release years in - /// some text, where the movie is formatted like "'Title' (xxxx)": - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") - /// .unwrap(); - /// let text = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; - /// for caps in re.captures_iter(text) { - /// println!("Movie: {:?}, Released: {:?}", - /// &caps["title"], &caps["year"]); - /// } - /// // Output: - /// // Movie: Citizen Kane, Released: 1941 - /// // Movie: The Wizard of Oz, Released: 1939 - /// // Movie: M, Released: 1931 - /// # } - /// ``` - pub fn captures_iter<'r, 't>( - &'r self, - text: &'t str, - ) -> CaptureMatches<'r, 't> { - CaptureMatches(self.0.searcher_str().captures_iter(text)) - } - - /// Returns an iterator of substrings of `text` delimited by a match of the - /// regular expression. Namely, each element of the iterator corresponds to - /// text that *isn't* matched by the regular expression. - /// - /// This method will *not* copy the text given. - /// - /// # Example - /// - /// To split a string delimited by arbitrary amounts of spaces or tabs: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"[ \t]+").unwrap(); - /// let fields: Vec<&str> = re.split("a b \t c\td e").collect(); - /// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]); - /// # } - /// ``` - pub fn split<'r, 't>(&'r self, text: &'t str) -> Split<'r, 't> { - Split { finder: self.find_iter(text), last: 0 } - } - - /// Returns an iterator of at most `limit` substrings of `text` delimited - /// by a match of the regular expression. (A `limit` of `0` will return no - /// substrings.) Namely, each element of the iterator corresponds to text - /// that *isn't* matched by the regular expression. The remainder of the - /// string that is not split will be the last element in the iterator. - /// - /// This method will *not* copy the text given. - /// - /// # Example - /// - /// Get the first two words in some text: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"\W+").unwrap(); - /// let fields: Vec<&str> = re.splitn("Hey! How are you?", 3).collect(); - /// assert_eq!(fields, vec!("Hey", "How", "are you?")); - /// # } - /// ``` - pub fn splitn<'r, 't>( - &'r self, - text: &'t str, - limit: usize, - ) -> SplitN<'r, 't> { - SplitN { splits: self.split(text), n: limit } - } - - /// Replaces the leftmost-first match with the replacement provided. - /// The replacement can be a regular string (where `$N` and `$name` are - /// expanded to match capture groups) or a function that takes the matches' - /// `Captures` and returns the replaced string. - /// - /// If no match is found, then a copy of the string is returned unchanged. - /// - /// # Replacement string syntax - /// - /// All instances of `$name` in the replacement text is replaced with the - /// corresponding capture group `name`. - /// - /// `name` may be an integer corresponding to the index of the - /// capture group (counted by order of opening parenthesis where `0` is the - /// entire match) or it can be a name (consisting of letters, digits or - /// underscores) corresponding to a named capture group. - /// - /// If `name` isn't a valid capture group (whether the name doesn't exist - /// or isn't a valid index), then it is replaced with the empty string. - /// - /// The longest possible name is used. e.g., `$1a` looks up the capture - /// group named `1a` and not the capture group at index `1`. To exert more - /// precise control over the name, use braces, e.g., `${1}a`. - /// - /// To write a literal `$` use `$$`. - /// - /// # Examples - /// - /// Note that this function is polymorphic with respect to the replacement. - /// In typical usage, this can just be a normal string: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new("[^01]+").unwrap(); - /// assert_eq!(re.replace("1078910", ""), "1010"); - /// # } - /// ``` - /// - /// But anything satisfying the `Replacer` trait will work. For example, - /// a closure of type `|&Captures| -> String` provides direct access to the - /// captures corresponding to a match. This allows one to access - /// capturing group matches easily: - /// - /// ```rust - /// # use regex::Regex; - /// # use regex::Captures; fn main() { - /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); - /// let result = re.replace("Springsteen, Bruce", |caps: &Captures| { - /// format!("{} {}", &caps[2], &caps[1]) - /// }); - /// assert_eq!(result, "Bruce Springsteen"); - /// # } - /// ``` - /// - /// But this is a bit cumbersome to use all the time. Instead, a simple - /// syntax is supported that expands `$name` into the corresponding capture - /// group. Here's the last example, but using this expansion technique - /// with named capture groups: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap(); - /// let result = re.replace("Springsteen, Bruce", "$first $last"); - /// assert_eq!(result, "Bruce Springsteen"); - /// # } - /// ``` - /// - /// Note that using `$2` instead of `$first` or `$1` instead of `$last` - /// would produce the same result. To write a literal `$` use `$$`. - /// - /// Sometimes the replacement string requires use of curly braces to - /// delineate a capture group replacement and surrounding literal text. - /// For example, if we wanted to join two words together with an - /// underscore: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap(); - /// let result = re.replace("deep fried", "${first}_$second"); - /// assert_eq!(result, "deep_fried"); - /// # } - /// ``` - /// - /// Without the curly braces, the capture group name `first_` would be - /// used, and since it doesn't exist, it would be replaced with the empty - /// string. - /// - /// Finally, sometimes you just want to replace a literal string with no - /// regard for capturing group expansion. This can be done by wrapping a - /// byte string with `NoExpand`: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// use regex::NoExpand; - /// - /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap(); - /// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last")); - /// assert_eq!(result, "$2 $last"); - /// # } - /// ``` - pub fn replace<'t, R: Replacer>( - &self, - text: &'t str, - rep: R, - ) -> Cow<'t, str> { - self.replacen(text, 1, rep) - } - - /// Replaces all non-overlapping matches in `text` with the replacement - /// provided. This is the same as calling `replacen` with `limit` set to - /// `0`. - /// - /// See the documentation for `replace` for details on how to access - /// capturing group matches in the replacement string. - pub fn replace_all<'t, R: Replacer>( - &self, - text: &'t str, - rep: R, - ) -> Cow<'t, str> { - self.replacen(text, 0, rep) - } - - /// Replaces at most `limit` non-overlapping matches in `text` with the - /// replacement provided. If `limit` is 0, then all non-overlapping matches - /// are replaced. - /// - /// See the documentation for `replace` for details on how to access - /// capturing group matches in the replacement string. - pub fn replacen<'t, R: Replacer>( - &self, - text: &'t str, - limit: usize, - mut rep: R, - ) -> Cow<'t, str> { - // If we know that the replacement doesn't have any capture expansions, - // then we can use the fast path. The fast path can make a tremendous - // difference: - // - // 1) We use `find_iter` instead of `captures_iter`. Not asking for - // captures generally makes the regex engines faster. - // 2) We don't need to look up all of the capture groups and do - // replacements inside the replacement string. We just push it - // at each match and be done with it. - if let Some(rep) = rep.no_expansion() { - let mut it = self.find_iter(text).enumerate().peekable(); - if it.peek().is_none() { - return Cow::Borrowed(text); - } - let mut new = String::with_capacity(text.len()); - let mut last_match = 0; - for (i, m) in it { - new.push_str(&text[last_match..m.start()]); - new.push_str(&rep); - last_match = m.end(); - if limit > 0 && i >= limit - 1 { - break; - } - } - new.push_str(&text[last_match..]); - return Cow::Owned(new); - } - - // The slower path, which we use if the replacement needs access to - // capture groups. - let mut it = self.captures_iter(text).enumerate().peekable(); - if it.peek().is_none() { - return Cow::Borrowed(text); - } - let mut new = String::with_capacity(text.len()); - let mut last_match = 0; - for (i, cap) in it { - // unwrap on 0 is OK because captures only reports matches - let m = cap.get(0).unwrap(); - new.push_str(&text[last_match..m.start()]); - rep.replace_append(&cap, &mut new); - last_match = m.end(); - if limit > 0 && i >= limit - 1 { - break; - } - } - new.push_str(&text[last_match..]); - Cow::Owned(new) - } -} - -/// Advanced or "lower level" search methods. -impl Regex { - /// Returns the end location of a match in the text given. - /// - /// This method may have the same performance characteristics as - /// `is_match`, except it provides an end location for a match. In - /// particular, the location returned *may be shorter* than the proper end - /// of the leftmost-first match that you would find via `Regex::find`. - /// - /// Note that it is not guaranteed that this routine finds the shortest or - /// "earliest" possible match. Instead, the main idea of this API is that - /// it returns the offset at the point at which the internal regex engine - /// has determined that a match has occurred. This may vary depending on - /// which internal regex engine is used, and thus, the offset itself may - /// change. - /// - /// # Example - /// - /// Typically, `a+` would match the entire first sequence of `a` in some - /// text, but `shortest_match` can give up as soon as it sees the first - /// `a`. - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let text = "aaaaa"; - /// let pos = Regex::new(r"a+").unwrap().shortest_match(text); - /// assert_eq!(pos, Some(1)); - /// # } - /// ``` - pub fn shortest_match(&self, text: &str) -> Option<usize> { - self.shortest_match_at(text, 0) - } - - /// Returns the same as `shortest_match`, but starts the search at the - /// given offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only match - /// when `start == 0`. - pub fn shortest_match_at( - &self, - text: &str, - start: usize, - ) -> Option<usize> { - self.0.searcher_str().shortest_match_at(text, start) - } - - /// Returns the same as is_match, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn is_match_at(&self, text: &str, start: usize) -> bool { - self.0.searcher_str().is_match_at(text, start) - } - - /// Returns the same as find, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn find_at<'t>( - &self, - text: &'t str, - start: usize, - ) -> Option<Match<'t>> { - self.0 - .searcher_str() - .find_at(text, start) - .map(|(s, e)| Match::new(text, s, e)) - } - - /// Returns the same as [`Regex::captures`], but starts the search at the - /// given offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn captures_at<'t>( - &self, - text: &'t str, - start: usize, - ) -> Option<Captures<'t>> { - let mut locs = self.capture_locations(); - self.captures_read_at(&mut locs, text, start).map(move |_| Captures { - text, - locs: locs.0, - named_groups: self.0.capture_name_idx().clone(), - }) - } - - /// This is like `captures`, but uses - /// [`CaptureLocations`](struct.CaptureLocations.html) - /// instead of - /// [`Captures`](struct.Captures.html) in order to amortize allocations. - /// - /// To create a `CaptureLocations` value, use the - /// `Regex::capture_locations` method. - /// - /// This returns the overall match if this was successful, which is always - /// equivalence to the `0`th capture group. - pub fn captures_read<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t str, - ) -> Option<Match<'t>> { - self.captures_read_at(locs, text, 0) - } - - /// Returns the same as captures, but starts the search at the given - /// offset and populates the capture locations given. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn captures_read_at<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t str, - start: usize, - ) -> Option<Match<'t>> { - self.0 - .searcher_str() - .captures_read_at(&mut locs.0, text, start) - .map(|(s, e)| Match::new(text, s, e)) - } - - /// An undocumented alias for `captures_read_at`. - /// - /// The `regex-capi` crate previously used this routine, so to avoid - /// breaking that crate, we continue to provide the name as an undocumented - /// alias. - #[doc(hidden)] - pub fn read_captures_at<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t str, - start: usize, - ) -> Option<Match<'t>> { - self.captures_read_at(locs, text, start) - } -} - -/// Auxiliary methods. -impl Regex { - /// Returns the original string of this regex. - pub fn as_str(&self) -> &str { - &self.0.regex_strings()[0] - } - - /// Returns an iterator over the capture names. - pub fn capture_names(&self) -> CaptureNames<'_> { - CaptureNames(self.0.capture_names().iter()) - } - - /// Returns the number of captures. - pub fn captures_len(&self) -> usize { - self.0.capture_names().len() - } - - /// Returns the total number of capturing groups that appear in every - /// possible match. - /// - /// If the number of capture groups can vary depending on the match, then - /// this returns `None`. That is, a value is only returned when the number - /// of matching groups is invariant or "static." - /// - /// Note that like [`Regex::captures_len`], this **does** include the - /// implicit capturing group corresponding to the entire match. Therefore, - /// when a non-None value is returned, it is guaranteed to be at least `1`. - /// Stated differently, a return value of `Some(0)` is impossible. - /// - /// # Example - /// - /// This shows a few cases where a static number of capture groups is - /// available and a few cases where it is not. - /// - /// ``` - /// use regex::Regex; - /// - /// let len = |pattern| { - /// Regex::new(pattern).map(|re| re.static_captures_len()) - /// }; - /// - /// assert_eq!(Some(1), len("a")?); - /// assert_eq!(Some(2), len("(a)")?); - /// assert_eq!(Some(2), len("(a)|(b)")?); - /// assert_eq!(Some(3), len("(a)(b)|(c)(d)")?); - /// assert_eq!(None, len("(a)|b")?); - /// assert_eq!(None, len("a|(b)")?); - /// assert_eq!(None, len("(b)*")?); - /// assert_eq!(Some(2), len("(b)+")?); - /// - /// # Ok::<(), Box<dyn std::error::Error>>(()) - /// ``` - #[inline] - pub fn static_captures_len(&self) -> Option<usize> { - self.0.static_captures_len().map(|len| len.saturating_add(1)) - } - - /// Returns an empty set of capture locations that can be reused in - /// multiple calls to `captures_read` or `captures_read_at`. - pub fn capture_locations(&self) -> CaptureLocations { - CaptureLocations(self.0.searcher_str().locations()) - } - - /// An alias for `capture_locations` to preserve backward compatibility. - /// - /// The `regex-capi` crate uses this method, so to avoid breaking that - /// crate, we continue to export it as an undocumented API. - #[doc(hidden)] - pub fn locations(&self) -> CaptureLocations { - CaptureLocations(self.0.searcher_str().locations()) - } -} - -/// An iterator over the names of all possible captures. -/// -/// `None` indicates an unnamed capture; the first element (capture 0, the -/// whole matched region) is always unnamed. -/// -/// `'r` is the lifetime of the compiled regular expression. -#[derive(Clone, Debug)] -pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>); - -impl<'r> Iterator for CaptureNames<'r> { - type Item = Option<&'r str>; - - fn next(&mut self) -> Option<Option<&'r str>> { - self.0 - .next() - .as_ref() - .map(|slot| slot.as_ref().map(|name| name.as_ref())) - } - - fn size_hint(&self) -> (usize, Option<usize>) { - self.0.size_hint() - } - - fn count(self) -> usize { - self.0.count() - } -} - -impl<'r> ExactSizeIterator for CaptureNames<'r> {} - -impl<'r> FusedIterator for CaptureNames<'r> {} - -/// Yields all substrings delimited by a regular expression match. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the string being split. -#[derive(Debug)] -pub struct Split<'r, 't> { - finder: Matches<'r, 't>, - last: usize, -} - -impl<'r, 't> Iterator for Split<'r, 't> { - type Item = &'t str; - - fn next(&mut self) -> Option<&'t str> { - let text = self.finder.0.text(); - match self.finder.next() { - None => { - if self.last > text.len() { - None - } else { - let s = &text[self.last..]; - self.last = text.len() + 1; // Next call will return None - Some(s) - } - } - Some(m) => { - let matched = &text[self.last..m.start()]; - self.last = m.end(); - Some(matched) - } - } - } -} - -impl<'r, 't> FusedIterator for Split<'r, 't> {} - -/// Yields at most `N` substrings delimited by a regular expression match. -/// -/// The last substring will be whatever remains after splitting. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the string being split. -#[derive(Debug)] -pub struct SplitN<'r, 't> { - splits: Split<'r, 't>, - n: usize, -} - -impl<'r, 't> Iterator for SplitN<'r, 't> { - type Item = &'t str; - - fn next(&mut self) -> Option<&'t str> { - if self.n == 0 { - return None; - } - - self.n -= 1; - if self.n > 0 { - return self.splits.next(); - } - - let text = self.splits.finder.0.text(); - if self.splits.last > text.len() { - // We've already returned all substrings. - None - } else { - // self.n == 0, so future calls will return None immediately - Some(&text[self.splits.last..]) - } - } - - fn size_hint(&self) -> (usize, Option<usize>) { - (0, Some(self.n)) - } -} - -impl<'r, 't> FusedIterator for SplitN<'r, 't> {} - -/// CaptureLocations is a low level representation of the raw offsets of each -/// submatch. -/// -/// You can think of this as a lower level -/// [`Captures`](struct.Captures.html), where this type does not support -/// named capturing groups directly and it does not borrow the text that these -/// offsets were matched on. -/// -/// Primarily, this type is useful when using the lower level `Regex` APIs -/// such as `read_captures`, which permits amortizing the allocation in which -/// capture match locations are stored. -/// -/// In order to build a value of this type, you'll need to call the -/// `capture_locations` method on the `Regex` being used to execute the search. -/// The value returned can then be reused in subsequent searches. -/// -/// # Example -/// -/// This example shows how to create and use `CaptureLocations` in a search. -/// -/// ``` -/// use regex::Regex; -/// -/// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); -/// let mut locs = re.capture_locations(); -/// let m = re.captures_read(&mut locs, "Bruce Springsteen").unwrap(); -/// assert_eq!(0..17, m.range()); -/// assert_eq!(Some((0, 17)), locs.get(0)); -/// assert_eq!(Some((0, 5)), locs.get(1)); -/// assert_eq!(Some((6, 17)), locs.get(2)); -/// -/// // Asking for an invalid capture group always returns None. -/// assert_eq!(None, locs.get(3)); -/// assert_eq!(None, locs.get(34973498648)); -/// assert_eq!(None, locs.get(9944060567225171988)); -/// ``` -#[derive(Clone, Debug)] -pub struct CaptureLocations(re_trait::Locations); - -/// A type alias for `CaptureLocations` for backwards compatibility. -/// -/// Previously, we exported `CaptureLocations` as `Locations` in an -/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`), -/// we continue re-exporting the same undocumented API. -#[doc(hidden)] -pub type Locations = CaptureLocations; - -impl CaptureLocations { - /// Returns the start and end positions of the Nth capture group. Returns - /// `None` if `i` is not a valid capture group or if the capture group did - /// not match anything. The positions returned are *always* byte indices - /// with respect to the original string matched. - #[inline] - pub fn get(&self, i: usize) -> Option<(usize, usize)> { - self.0.pos(i) - } - - /// Returns the total number of capture groups (even if they didn't match). - /// - /// This is always at least `1` since every regex has at least `1` - /// capturing group that corresponds to the entire match. - #[inline] - pub fn len(&self) -> usize { - self.0.len() - } - - /// An alias for the `get` method for backwards compatibility. - /// - /// Previously, we exported `get` as `pos` in an undocumented API. To - /// prevent breaking that code (e.g., in `regex-capi`), we continue - /// re-exporting the same undocumented API. - #[doc(hidden)] - #[inline] - pub fn pos(&self, i: usize) -> Option<(usize, usize)> { - self.get(i) - } -} - -/// Captures represents a group of captured strings for a single match. -/// -/// The 0th capture always corresponds to the entire match. Each subsequent -/// index corresponds to the next capture group in the regex. If a capture -/// group is named, then the matched string is *also* available via the `name` -/// method. (Note that the 0th capture is always unnamed and so must be -/// accessed with the `get` method.) -/// -/// Positions returned from a capture group are always byte indices. -/// -/// `'t` is the lifetime of the matched text. -pub struct Captures<'t> { - text: &'t str, - locs: re_trait::Locations, - named_groups: Arc<HashMap<String, usize>>, -} - -impl<'t> Captures<'t> { - /// Returns the match associated with the capture group at index `i`. If - /// `i` does not correspond to a capture group, or if the capture group - /// did not participate in the match, then `None` is returned. - /// - /// # Examples - /// - /// Get the text of the match with a default of an empty string if this - /// group didn't participate in the match: - /// - /// ```rust - /// # use regex::Regex; - /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap(); - /// let caps = re.captures("abc123").unwrap(); - /// - /// let text1 = caps.get(1).map_or("", |m| m.as_str()); - /// let text2 = caps.get(2).map_or("", |m| m.as_str()); - /// assert_eq!(text1, "123"); - /// assert_eq!(text2, ""); - /// ``` - pub fn get(&self, i: usize) -> Option<Match<'t>> { - self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e)) - } - - /// Returns the match for the capture group named `name`. If `name` isn't a - /// valid capture group or didn't match anything, then `None` is returned. - pub fn name(&self, name: &str) -> Option<Match<'t>> { - self.named_groups.get(name).and_then(|&i| self.get(i)) - } - - /// An iterator that yields all capturing matches in the order in which - /// they appear in the regex. If a particular capture group didn't - /// participate in the match, then `None` is yielded for that capture. - /// - /// The first match always corresponds to the overall match of the regex. - pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> { - SubCaptureMatches { caps: self, it: self.locs.iter() } - } - - /// Expands all instances of `$name` in `replacement` to the corresponding - /// capture group `name`, and writes them to the `dst` buffer given. - /// - /// `name` may be an integer corresponding to the index of the capture - /// group (counted by order of opening parenthesis where `0` is the - /// entire match) or it can be a name (consisting of letters, digits or - /// underscores) corresponding to a named capture group. - /// - /// If `name` isn't a valid capture group (whether the name doesn't exist - /// or isn't a valid index), then it is replaced with the empty string. - /// - /// The longest possible name consisting of the characters `[_0-9A-Za-z]` - /// is used. e.g., `$1a` looks up the capture group named `1a` and not the - /// capture group at index `1`. To exert more precise control over the - /// name, or to refer to a capture group name that uses characters outside - /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When - /// using braces, any sequence of characters is permitted. If the sequence - /// does not refer to a capture group name in the corresponding regex, then - /// it is replaced with an empty string. - /// - /// To write a literal `$` use `$$`. - pub fn expand(&self, replacement: &str, dst: &mut String) { - expand_str(self, replacement, dst) - } - - /// Returns the total number of capture groups (even if they didn't match). - /// - /// This is always at least `1`, since every regex has at least one capture - /// group that corresponds to the full match. - #[inline] - pub fn len(&self) -> usize { - self.locs.len() - } -} - -impl<'t> fmt::Debug for Captures<'t> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_tuple("Captures").field(&CapturesDebug(self)).finish() - } -} - -struct CapturesDebug<'c, 't>(&'c Captures<'t>); - -impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // We'd like to show something nice here, even if it means an - // allocation to build a reverse index. - let slot_to_name: HashMap<&usize, &String> = - self.0.named_groups.iter().map(|(a, b)| (b, a)).collect(); - let mut map = f.debug_map(); - for (slot, m) in self.0.locs.iter().enumerate() { - let m = m.map(|(s, e)| &self.0.text[s..e]); - if let Some(name) = slot_to_name.get(&slot) { - map.entry(&name, &m); - } else { - map.entry(&slot, &m); - } - } - map.finish() - } -} - -/// Get a group by index. -/// -/// `'t` is the lifetime of the matched text. -/// -/// The text can't outlive the `Captures` object if this method is -/// used, because of how `Index` is defined (normally `a[i]` is part -/// of `a` and can't outlive it); to do that, use `get()` instead. -/// -/// # Panics -/// -/// If there is no group at the given index. -impl<'t> Index<usize> for Captures<'t> { - type Output = str; - - fn index(&self, i: usize) -> &str { - self.get(i) - .map(|m| m.as_str()) - .unwrap_or_else(|| panic!("no group at index '{}'", i)) - } -} - -/// Get a group by name. -/// -/// `'t` is the lifetime of the matched text and `'i` is the lifetime -/// of the group name (the index). -/// -/// The text can't outlive the `Captures` object if this method is -/// used, because of how `Index` is defined (normally `a[i]` is part -/// of `a` and can't outlive it); to do that, use `name` instead. -/// -/// # Panics -/// -/// If there is no group named by the given value. -impl<'t, 'i> Index<&'i str> for Captures<'t> { - type Output = str; - - fn index<'a>(&'a self, name: &'i str) -> &'a str { - self.name(name) - .map(|m| m.as_str()) - .unwrap_or_else(|| panic!("no group named '{}'", name)) - } -} - -/// An iterator that yields all capturing matches in the order in which they -/// appear in the regex. -/// -/// If a particular capture group didn't participate in the match, then `None` -/// is yielded for that capture. The first match always corresponds to the -/// overall match of the regex. -/// -/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and -/// the lifetime `'t` corresponds to the originally matched text. -#[derive(Clone, Debug)] -pub struct SubCaptureMatches<'c, 't> { - caps: &'c Captures<'t>, - it: SubCapturesPosIter<'c>, -} - -impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> { - type Item = Option<Match<'t>>; - - fn next(&mut self) -> Option<Option<Match<'t>>> { - self.it - .next() - .map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e))) - } - - fn size_hint(&self) -> (usize, Option<usize>) { - self.it.size_hint() - } - - fn count(self) -> usize { - self.it.count() - } -} - -impl<'c, 't> ExactSizeIterator for SubCaptureMatches<'c, 't> {} - -impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {} - -/// An iterator that yields all non-overlapping capture groups matching a -/// particular regular expression. -/// -/// The iterator stops when no more matches can be found. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the matched string. -#[derive(Debug)] -pub struct CaptureMatches<'r, 't>( - re_trait::CaptureMatches<'t, ExecNoSyncStr<'r>>, -); - -impl<'r, 't> Iterator for CaptureMatches<'r, 't> { - type Item = Captures<'t>; - - fn next(&mut self) -> Option<Captures<'t>> { - self.0.next().map(|locs| Captures { - text: self.0.text(), - locs, - named_groups: self.0.regex().capture_name_idx().clone(), - }) - } -} - -impl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {} - -/// An iterator over all non-overlapping matches for a particular string. -/// -/// The iterator yields a `Match` value. The iterator stops when no more -/// matches can be found. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the matched string. -#[derive(Debug)] -pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSyncStr<'r>>); - -impl<'r, 't> Iterator for Matches<'r, 't> { - type Item = Match<'t>; - - fn next(&mut self) -> Option<Match<'t>> { - let text = self.0.text(); - self.0.next().map(|(s, e)| Match::new(text, s, e)) - } -} - -impl<'r, 't> FusedIterator for Matches<'r, 't> {} - -/// Replacer describes types that can be used to replace matches in a string. -/// -/// In general, users of this crate shouldn't need to implement this trait, -/// since implementations are already provided for `&str` along with other -/// variants of string types and `FnMut(&Captures) -> String` (or any -/// `FnMut(&Captures) -> T` where `T: AsRef<str>`), which covers most use cases. -pub trait Replacer { - /// Appends text to `dst` to replace the current match. - /// - /// The current match is represented by `caps`, which is guaranteed to - /// have a match at capture group `0`. - /// - /// For example, a no-op replacement would be - /// `dst.push_str(caps.get(0).unwrap().as_str())`. - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String); - - /// Return a fixed unchanging replacement string. - /// - /// When doing replacements, if access to `Captures` is not needed (e.g., - /// the replacement byte string does not need `$` expansion), then it can - /// be beneficial to avoid finding sub-captures. - /// - /// In general, this is called once for every call to `replacen`. - fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> { - None - } - - /// Return a `Replacer` that borrows and wraps this `Replacer`. - /// - /// This is useful when you want to take a generic `Replacer` (which might - /// not be cloneable) and use it without consuming it, so it can be used - /// more than once. - /// - /// # Example - /// - /// ``` - /// use regex::{Regex, Replacer}; - /// - /// fn replace_all_twice<R: Replacer>( - /// re: Regex, - /// src: &str, - /// mut rep: R, - /// ) -> String { - /// let dst = re.replace_all(src, rep.by_ref()); - /// let dst = re.replace_all(&dst, rep.by_ref()); - /// dst.into_owned() - /// } - /// ``` - fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> { - ReplacerRef(self) - } -} - -/// By-reference adaptor for a `Replacer` -/// -/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref). -#[derive(Debug)] -pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); - -impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - self.0.replace_append(caps, dst) - } - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - self.0.no_expansion() - } -} - -impl<'a> Replacer for &'a str { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - caps.expand(*self, dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - no_expansion(self) - } -} - -impl<'a> Replacer for &'a String { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - self.as_str().replace_append(caps, dst) - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - no_expansion(self) - } -} - -impl Replacer for String { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - self.as_str().replace_append(caps, dst) - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - no_expansion(self) - } -} - -impl<'a> Replacer for Cow<'a, str> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - self.as_ref().replace_append(caps, dst) - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - no_expansion(self) - } -} - -impl<'a> Replacer for &'a Cow<'a, str> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - self.as_ref().replace_append(caps, dst) - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - no_expansion(self) - } -} - -fn no_expansion<T: AsRef<str>>(t: &T) -> Option<Cow<'_, str>> { - let s = t.as_ref(); - match find_byte(b'$', s.as_bytes()) { - Some(_) => None, - None => Some(Cow::Borrowed(s)), - } -} - -impl<F, T> Replacer for F -where - F: FnMut(&Captures<'_>) -> T, - T: AsRef<str>, -{ - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - dst.push_str((*self)(caps).as_ref()); - } -} - -/// `NoExpand` indicates literal string replacement. -/// -/// It can be used with `replace` and `replace_all` to do a literal string -/// replacement without expanding `$name` to their corresponding capture -/// groups. This can be both convenient (to avoid escaping `$`, for example) -/// and performant (since capture groups don't need to be found). -/// -/// `'t` is the lifetime of the literal text. -#[derive(Clone, Debug)] -pub struct NoExpand<'t>(pub &'t str); - -impl<'t> Replacer for NoExpand<'t> { - fn replace_append(&mut self, _: &Captures<'_>, dst: &mut String) { - dst.push_str(self.0); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - Some(Cow::Borrowed(self.0)) - } -} diff --git a/vendor/regex/src/regex/bytes.rs b/vendor/regex/src/regex/bytes.rs new file mode 100644 index 0000000..ea4f7cd --- /dev/null +++ b/vendor/regex/src/regex/bytes.rs @@ -0,0 +1,2605 @@ +use alloc::{borrow::Cow, string::String, sync::Arc, vec::Vec}; + +use regex_automata::{meta, util::captures, Input, PatternID}; + +use crate::{bytes::RegexBuilder, error::Error}; + +/// A compiled regular expression for searching Unicode haystacks. +/// +/// A `Regex` can be used to search haystacks, split haystacks into substrings +/// or replace substrings in a haystack with a different substring. All +/// searching is done with an implicit `(?s:.)*?` at the beginning and end of +/// an pattern. To force an expression to match the whole string (or a prefix +/// or a suffix), you must use an anchor like `^` or `$` (or `\A` and `\z`). +/// +/// Like the `Regex` type in the parent module, matches with this regex return +/// byte offsets into the haystack. **Unlike** the parent `Regex` type, these +/// byte offsets may not correspond to UTF-8 sequence boundaries since the +/// regexes in this module can match arbitrary bytes. +/// +/// The only methods that allocate new byte strings are the string replacement +/// methods. All other methods (searching and splitting) return borrowed +/// references into the haystack given. +/// +/// # Example +/// +/// Find the offsets of a US phone number: +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap(); +/// let m = re.find(b"phone: 111-222-3333").unwrap(); +/// assert_eq!(7..19, m.range()); +/// ``` +/// +/// # Example: extracting capture groups +/// +/// A common way to use regexes is with capture groups. That is, instead of +/// just looking for matches of an entire regex, parentheses are used to create +/// groups that represent part of the match. +/// +/// For example, consider a haystack with multiple lines, and each line has +/// three whitespace delimited fields where the second field is expected to be +/// a number and the third field a boolean. To make this convenient, we use +/// the [`Captures::extract`] API to put the strings that match each group +/// into a fixed size array: +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let hay = b" +/// rabbit 54 true +/// groundhog 2 true +/// does not match +/// fox 109 false +/// "; +/// let re = Regex::new(r"(?m)^\s*(\S+)\s+([0-9]+)\s+(true|false)\s*$").unwrap(); +/// let mut fields: Vec<(&[u8], i64, bool)> = vec![]; +/// for (_, [f1, f2, f3]) in re.captures_iter(hay).map(|caps| caps.extract()) { +/// // These unwraps are OK because our pattern is written in a way where +/// // all matches for f2 and f3 will be valid UTF-8. +/// let f2 = std::str::from_utf8(f2).unwrap(); +/// let f3 = std::str::from_utf8(f3).unwrap(); +/// fields.push((f1, f2.parse()?, f3.parse()?)); +/// } +/// assert_eq!(fields, vec![ +/// (&b"rabbit"[..], 54, true), +/// (&b"groundhog"[..], 2, true), +/// (&b"fox"[..], 109, false), +/// ]); +/// +/// # Ok::<(), Box<dyn std::error::Error>>(()) +/// ``` +/// +/// # Example: matching invalid UTF-8 +/// +/// One of the reasons for searching `&[u8]` haystacks is that the `&[u8]` +/// might not be valid UTF-8. Indeed, with a `bytes::Regex`, patterns that +/// match invalid UTF-8 are explicitly allowed. Here's one example that looks +/// for valid UTF-8 fields that might be separated by invalid UTF-8. In this +/// case, we use `(?s-u:.)`, which matches any byte. Attempting to use it in a +/// top-level `Regex` will result in the regex failing to compile. Notice also +/// that we use `.` with Unicode mode enabled, in which case, only valid UTF-8 +/// is matched. In this way, we can build one pattern where some parts only +/// match valid UTF-8 while other parts are more permissive. +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// // F0 9F 92 A9 is the UTF-8 encoding for a Pile of Poo. +/// let hay = b"\xFF\xFFfoo\xFF\xFF\xFF\xF0\x9F\x92\xA9\xFF"; +/// // An equivalent to '(?s-u:.)' is '(?-u:[\x00-\xFF])'. +/// let re = Regex::new(r"(?s)(?-u:.)*?(?<f1>.+)(?-u:.)*?(?<f2>.+)").unwrap(); +/// let caps = re.captures(hay).unwrap(); +/// assert_eq!(&caps["f1"], &b"foo"[..]); +/// assert_eq!(&caps["f2"], "💩".as_bytes()); +/// ``` +#[derive(Clone)] +pub struct Regex { + pub(crate) meta: meta::Regex, + pub(crate) pattern: Arc<str>, +} + +impl core::fmt::Display for Regex { + /// Shows the original regular expression. + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl core::fmt::Debug for Regex { + /// Shows the original regular expression. + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple("Regex").field(&self.as_str()).finish() + } +} + +impl core::str::FromStr for Regex { + type Err = Error; + + /// Attempts to parse a string into a regular expression + fn from_str(s: &str) -> Result<Regex, Error> { + Regex::new(s) + } +} + +impl TryFrom<&str> for Regex { + type Error = Error; + + /// Attempts to parse a string into a regular expression + fn try_from(s: &str) -> Result<Regex, Error> { + Regex::new(s) + } +} + +impl TryFrom<String> for Regex { + type Error = Error; + + /// Attempts to parse a string into a regular expression + fn try_from(s: String) -> Result<Regex, Error> { + Regex::new(&s) + } +} + +/// Core regular expression methods. +impl Regex { + /// Compiles a regular expression. Once compiled, it can be used repeatedly + /// to search, split or replace substrings in a haystack. + /// + /// Note that regex compilation tends to be a somewhat expensive process, + /// and unlike higher level environments, compilation is not automatically + /// cached for you. One should endeavor to compile a regex once and then + /// reuse it. For example, it's a bad idea to compile the same regex + /// repeatedly in a loop. + /// + /// # Errors + /// + /// If an invalid pattern is given, then an error is returned. + /// An error is also returned if the pattern is valid, but would + /// produce a regex that is bigger than the configured size limit via + /// [`RegexBuilder::size_limit`]. (A reasonable size limit is enabled by + /// default.) + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// // An Invalid pattern because of an unclosed parenthesis + /// assert!(Regex::new(r"foo(bar").is_err()); + /// // An invalid pattern because the regex would be too big + /// // because Unicode tends to inflate things. + /// assert!(Regex::new(r"\w{1000}").is_err()); + /// // Disabling Unicode can make the regex much smaller, + /// // potentially by up to or more than an order of magnitude. + /// assert!(Regex::new(r"(?-u:\w){1000}").is_ok()); + /// ``` + pub fn new(re: &str) -> Result<Regex, Error> { + RegexBuilder::new(re).build() + } + + /// Returns true if and only if there is a match for the regex anywhere + /// in the haystack given. + /// + /// It is recommended to use this method if all you need to do is test + /// whether a match exists, since the underlying matching engine may be + /// able to do less work. + /// + /// # Example + /// + /// Test if some haystack contains at least one word with exactly 13 + /// Unicode word characters: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = b"I categorically deny having triskaidekaphobia."; + /// assert!(re.is_match(hay)); + /// ``` + #[inline] + pub fn is_match(&self, haystack: &[u8]) -> bool { + self.is_match_at(haystack, 0) + } + + /// This routine searches for the first match of this regex in the + /// haystack given, and if found, returns a [`Match`]. The `Match` + /// provides access to both the byte offsets of the match and the actual + /// substring that matched. + /// + /// Note that this should only be used if you want to find the entire + /// match. If instead you just want to test the existence of a match, + /// it's potentially faster to use `Regex::is_match(hay)` instead of + /// `Regex::find(hay).is_some()`. + /// + /// # Example + /// + /// Find the first word with exactly 13 Unicode word characters: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = b"I categorically deny having triskaidekaphobia."; + /// let mat = re.find(hay).unwrap(); + /// assert_eq!(2..15, mat.range()); + /// assert_eq!(b"categorically", mat.as_bytes()); + /// ``` + #[inline] + pub fn find<'h>(&self, haystack: &'h [u8]) -> Option<Match<'h>> { + self.find_at(haystack, 0) + } + + /// Returns an iterator that yields successive non-overlapping matches in + /// the given haystack. The iterator yields values of type [`Match`]. + /// + /// # Time complexity + /// + /// Note that since `find_iter` runs potentially many searches on the + /// haystack and since each search has worst case `O(m * n)` time + /// complexity, the overall worst case time complexity for iteration is + /// `O(m * n^2)`. + /// + /// # Example + /// + /// Find every word with exactly 13 Unicode word characters: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = b"Retroactively relinquishing remunerations is reprehensible."; + /// let matches: Vec<_> = re.find_iter(hay).map(|m| m.as_bytes()).collect(); + /// assert_eq!(matches, vec![ + /// &b"Retroactively"[..], + /// &b"relinquishing"[..], + /// &b"remunerations"[..], + /// &b"reprehensible"[..], + /// ]); + /// ``` + #[inline] + pub fn find_iter<'r, 'h>(&'r self, haystack: &'h [u8]) -> Matches<'r, 'h> { + Matches { haystack, it: self.meta.find_iter(haystack) } + } + + /// This routine searches for the first match of this regex in the haystack + /// given, and if found, returns not only the overall match but also the + /// matches of each capture group in the regex. If no match is found, then + /// `None` is returned. + /// + /// Capture group `0` always corresponds to an implicit unnamed group that + /// includes the entire match. If a match is found, this group is always + /// present. Subsequent groups may be named and are numbered, starting + /// at 1, by the order in which the opening parenthesis appears in the + /// pattern. For example, in the pattern `(?<a>.(?<b>.))(?<c>.)`, `a`, + /// `b` and `c` correspond to capture group indices `1`, `2` and `3`, + /// respectively. + /// + /// You should only use `captures` if you need access to the capture group + /// matches. Otherwise, [`Regex::find`] is generally faster for discovering + /// just the overall match. + /// + /// # Example + /// + /// Say you have some haystack with movie names and their release years, + /// like "'Citizen Kane' (1941)". It'd be nice if we could search for + /// strings looking like that, while also extracting the movie name and its + /// release year separately. The example below shows how to do that. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); + /// let hay = b"Not my favorite movie: 'Citizen Kane' (1941)."; + /// let caps = re.captures(hay).unwrap(); + /// assert_eq!(caps.get(0).unwrap().as_bytes(), b"'Citizen Kane' (1941)"); + /// assert_eq!(caps.get(1).unwrap().as_bytes(), b"Citizen Kane"); + /// assert_eq!(caps.get(2).unwrap().as_bytes(), b"1941"); + /// // You can also access the groups by index using the Index notation. + /// // Note that this will panic on an invalid index. In this case, these + /// // accesses are always correct because the overall regex will only + /// // match when these capture groups match. + /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); + /// assert_eq!(&caps[1], b"Citizen Kane"); + /// assert_eq!(&caps[2], b"1941"); + /// ``` + /// + /// Note that the full match is at capture group `0`. Each subsequent + /// capture group is indexed by the order of its opening `(`. + /// + /// We can make this example a bit clearer by using *named* capture groups: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"'(?<title>[^']+)'\s+\((?<year>\d{4})\)").unwrap(); + /// let hay = b"Not my favorite movie: 'Citizen Kane' (1941)."; + /// let caps = re.captures(hay).unwrap(); + /// assert_eq!(caps.get(0).unwrap().as_bytes(), b"'Citizen Kane' (1941)"); + /// assert_eq!(caps.name("title").unwrap().as_bytes(), b"Citizen Kane"); + /// assert_eq!(caps.name("year").unwrap().as_bytes(), b"1941"); + /// // You can also access the groups by name using the Index notation. + /// // Note that this will panic on an invalid group name. In this case, + /// // these accesses are always correct because the overall regex will + /// // only match when these capture groups match. + /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); + /// assert_eq!(&caps["title"], b"Citizen Kane"); + /// assert_eq!(&caps["year"], b"1941"); + /// ``` + /// + /// Here we name the capture groups, which we can access with the `name` + /// method or the `Index` notation with a `&str`. Note that the named + /// capture groups are still accessible with `get` or the `Index` notation + /// with a `usize`. + /// + /// The `0`th capture group is always unnamed, so it must always be + /// accessed with `get(0)` or `[0]`. + /// + /// Finally, one other way to to get the matched substrings is with the + /// [`Captures::extract`] API: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); + /// let hay = b"Not my favorite movie: 'Citizen Kane' (1941)."; + /// let (full, [title, year]) = re.captures(hay).unwrap().extract(); + /// assert_eq!(full, b"'Citizen Kane' (1941)"); + /// assert_eq!(title, b"Citizen Kane"); + /// assert_eq!(year, b"1941"); + /// ``` + #[inline] + pub fn captures<'h>(&self, haystack: &'h [u8]) -> Option<Captures<'h>> { + self.captures_at(haystack, 0) + } + + /// Returns an iterator that yields successive non-overlapping matches in + /// the given haystack. The iterator yields values of type [`Captures`]. + /// + /// This is the same as [`Regex::find_iter`], but instead of only providing + /// access to the overall match, each value yield includes access to the + /// matches of all capture groups in the regex. Reporting this extra match + /// data is potentially costly, so callers should only use `captures_iter` + /// over `find_iter` when they actually need access to the capture group + /// matches. + /// + /// # Time complexity + /// + /// Note that since `captures_iter` runs potentially many searches on the + /// haystack and since each search has worst case `O(m * n)` time + /// complexity, the overall worst case time complexity for iteration is + /// `O(m * n^2)`. + /// + /// # Example + /// + /// We can use this to find all movie titles and their release years in + /// some haystack, where the movie is formatted like "'Title' (xxxx)": + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\(([0-9]{4})\)").unwrap(); + /// let hay = b"'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; + /// let mut movies = vec![]; + /// for (_, [title, year]) in re.captures_iter(hay).map(|c| c.extract()) { + /// // OK because [0-9]{4} can only match valid UTF-8. + /// let year = std::str::from_utf8(year).unwrap(); + /// movies.push((title, year.parse::<i64>()?)); + /// } + /// assert_eq!(movies, vec![ + /// (&b"Citizen Kane"[..], 1941), + /// (&b"The Wizard of Oz"[..], 1939), + /// (&b"M"[..], 1931), + /// ]); + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + /// + /// Or with named groups: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"'(?<title>[^']+)'\s+\((?<year>[0-9]{4})\)").unwrap(); + /// let hay = b"'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; + /// let mut it = re.captures_iter(hay); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], b"Citizen Kane"); + /// assert_eq!(&caps["year"], b"1941"); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], b"The Wizard of Oz"); + /// assert_eq!(&caps["year"], b"1939"); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], b"M"); + /// assert_eq!(&caps["year"], b"1931"); + /// ``` + #[inline] + pub fn captures_iter<'r, 'h>( + &'r self, + haystack: &'h [u8], + ) -> CaptureMatches<'r, 'h> { + CaptureMatches { haystack, it: self.meta.captures_iter(haystack) } + } + + /// Returns an iterator of substrings of the haystack given, delimited by a + /// match of the regex. Namely, each element of the iterator corresponds to + /// a part of the haystack that *isn't* matched by the regular expression. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// # Example + /// + /// To split a string delimited by arbitrary amounts of spaces or tabs: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"[ \t]+").unwrap(); + /// let hay = b"a b \t c\td e"; + /// let fields: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(fields, vec![ + /// &b"a"[..], &b"b"[..], &b"c"[..], &b"d"[..], &b"e"[..], + /// ]); + /// ``` + /// + /// # Example: more cases + /// + /// Basic usage: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = b"Mary had a little lamb"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![ + /// &b"Mary"[..], &b"had"[..], &b"a"[..], &b"little"[..], &b"lamb"[..], + /// ]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b""; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![&b""[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"lionXXtigerXleopard"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![ + /// &b"lion"[..], &b""[..], &b"tiger"[..], &b"leopard"[..], + /// ]); + /// + /// let re = Regex::new(r"::").unwrap(); + /// let hay = b"lion::tiger::leopard"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![&b"lion"[..], &b"tiger"[..], &b"leopard"[..]]); + /// ``` + /// + /// If a haystack contains multiple contiguous matches, you will end up + /// with empty spans yielded by the iterator: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"XXXXaXXbXc"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![ + /// &b""[..], &b""[..], &b""[..], &b""[..], + /// &b"a"[..], &b""[..], &b"b"[..], &b"c"[..], + /// ]); + /// + /// let re = Regex::new(r"/").unwrap(); + /// let hay = b"(///)"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![&b"("[..], &b""[..], &b""[..], &b")"[..]]); + /// ``` + /// + /// Separators at the start or end of a haystack are neighbored by empty + /// substring. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"0").unwrap(); + /// let hay = b"010"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![&b""[..], &b"1"[..], &b""[..]]); + /// ``` + /// + /// When the regex can match the empty string, it splits at every byte + /// position in the haystack. This includes between all UTF-8 code units. + /// (The top-level [`Regex::split`](crate::Regex::split) will only split + /// at valid UTF-8 boundaries.) + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let hay = "☃".as_bytes(); + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![ + /// &[][..], &[b'\xE2'][..], &[b'\x98'][..], &[b'\x83'][..], &[][..], + /// ]); + /// ``` + /// + /// Contiguous separators (commonly shows up with whitespace), can lead to + /// possibly surprising behavior. For example, this code is correct: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = b" a b c"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// assert_eq!(got, vec![ + /// &b""[..], &b""[..], &b""[..], &b""[..], + /// &b"a"[..], &b""[..], &b"b"[..], &b"c"[..], + /// ]); + /// ``` + /// + /// It does *not* give you `["a", "b", "c"]`. For that behavior, you'd want + /// to match contiguous space characters: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r" +").unwrap(); + /// let hay = b" a b c"; + /// let got: Vec<&[u8]> = re.split(hay).collect(); + /// // N.B. This does still include a leading empty span because ' +' + /// // matches at the beginning of the haystack. + /// assert_eq!(got, vec![&b""[..], &b"a"[..], &b"b"[..], &b"c"[..]]); + /// ``` + #[inline] + pub fn split<'r, 'h>(&'r self, haystack: &'h [u8]) -> Split<'r, 'h> { + Split { haystack, it: self.meta.split(haystack) } + } + + /// Returns an iterator of at most `limit` substrings of the haystack + /// given, delimited by a match of the regex. (A `limit` of `0` will return + /// no substrings.) Namely, each element of the iterator corresponds to a + /// part of the haystack that *isn't* matched by the regular expression. + /// The remainder of the haystack that is not split will be the last + /// element in the iterator. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// Although note that the worst case time here has an upper bound given + /// by the `limit` parameter. + /// + /// # Example + /// + /// Get the first two words in some haystack: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\W+").unwrap(); + /// let hay = b"Hey! How are you?"; + /// let fields: Vec<&[u8]> = re.splitn(hay, 3).collect(); + /// assert_eq!(fields, vec![&b"Hey"[..], &b"How"[..], &b"are you?"[..]]); + /// ``` + /// + /// # Examples: more cases + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = b"Mary had a little lamb"; + /// let got: Vec<&[u8]> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec![&b"Mary"[..], &b"had"[..], &b"a little lamb"[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b""; + /// let got: Vec<&[u8]> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec![&b""[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"lionXXtigerXleopard"; + /// let got: Vec<&[u8]> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec![&b"lion"[..], &b""[..], &b"tigerXleopard"[..]]); + /// + /// let re = Regex::new(r"::").unwrap(); + /// let hay = b"lion::tiger::leopard"; + /// let got: Vec<&[u8]> = re.splitn(hay, 2).collect(); + /// assert_eq!(got, vec![&b"lion"[..], &b"tiger::leopard"[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"abcXdef"; + /// let got: Vec<&[u8]> = re.splitn(hay, 1).collect(); + /// assert_eq!(got, vec![&b"abcXdef"[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"abcdef"; + /// let got: Vec<&[u8]> = re.splitn(hay, 2).collect(); + /// assert_eq!(got, vec![&b"abcdef"[..]]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = b"abcXdef"; + /// let got: Vec<&[u8]> = re.splitn(hay, 0).collect(); + /// assert!(got.is_empty()); + /// ``` + #[inline] + pub fn splitn<'r, 'h>( + &'r self, + haystack: &'h [u8], + limit: usize, + ) -> SplitN<'r, 'h> { + SplitN { haystack, it: self.meta.splitn(haystack, limit) } + } + + /// Replaces the leftmost-first match in the given haystack with the + /// replacement provided. The replacement can be a regular string (where + /// `$N` and `$name` are expanded to match capture groups) or a function + /// that takes a [`Captures`] and returns the replaced string. + /// + /// If no match is found, then the haystack is returned unchanged. In that + /// case, this implementation will likely return a `Cow::Borrowed` value + /// such that no allocation is performed. + /// + /// # Replacement string syntax + /// + /// All instances of `$ref` in the replacement string are replaced with + /// the substring corresponding to the capture group identified by `ref`. + /// + /// `ref` may be an integer corresponding to the index of the capture group + /// (counted by order of opening parenthesis where `0` is the entire match) + /// or it can be a name (consisting of letters, digits or underscores) + /// corresponding to a named capture group. + /// + /// If `ref` isn't a valid capture group (whether the name doesn't exist or + /// isn't a valid index), then it is replaced with the empty string. + /// + /// The longest possible name is used. For example, `$1a` looks up the + /// capture group named `1a` and not the capture group at index `1`. To + /// exert more precise control over the name, use braces, e.g., `${1}a`. + /// + /// To write a literal `$` use `$$`. + /// + /// # Example + /// + /// Note that this function is polymorphic with respect to the replacement. + /// In typical usage, this can just be a normal string: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"[^01]+").unwrap(); + /// assert_eq!(re.replace(b"1078910", b""), &b"1010"[..]); + /// ``` + /// + /// But anything satisfying the [`Replacer`] trait will work. For example, + /// a closure of type `|&Captures| -> String` provides direct access to the + /// captures corresponding to a match. This allows one to access capturing + /// group matches easily: + /// + /// ``` + /// use regex::bytes::{Captures, Regex}; + /// + /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); + /// let result = re.replace(b"Springsteen, Bruce", |caps: &Captures| { + /// let mut buf = vec![]; + /// buf.extend_from_slice(&caps[2]); + /// buf.push(b' '); + /// buf.extend_from_slice(&caps[1]); + /// buf + /// }); + /// assert_eq!(result, &b"Bruce Springsteen"[..]); + /// ``` + /// + /// But this is a bit cumbersome to use all the time. Instead, a simple + /// syntax is supported (as described above) that expands `$name` into the + /// corresponding capture group. Here's the last example, but using this + /// expansion technique with named capture groups: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap(); + /// let result = re.replace(b"Springsteen, Bruce", b"$first $last"); + /// assert_eq!(result, &b"Bruce Springsteen"[..]); + /// ``` + /// + /// Note that using `$2` instead of `$first` or `$1` instead of `$last` + /// would produce the same result. To write a literal `$` use `$$`. + /// + /// Sometimes the replacement string requires use of curly braces to + /// delineate a capture group replacement when it is adjacent to some other + /// literal text. For example, if we wanted to join two words together with + /// an underscore: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<second>\w+)").unwrap(); + /// let result = re.replace(b"deep fried", b"${first}_$second"); + /// assert_eq!(result, &b"deep_fried"[..]); + /// ``` + /// + /// Without the curly braces, the capture group name `first_` would be + /// used, and since it doesn't exist, it would be replaced with the empty + /// string. + /// + /// Finally, sometimes you just want to replace a literal string with no + /// regard for capturing group expansion. This can be done by wrapping a + /// string with [`NoExpand`]: + /// + /// ``` + /// use regex::bytes::{NoExpand, Regex}; + /// + /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap(); + /// let result = re.replace(b"Springsteen, Bruce", NoExpand(b"$2 $last")); + /// assert_eq!(result, &b"$2 $last"[..]); + /// ``` + /// + /// Using `NoExpand` may also be faster, since the replacement string won't + /// need to be parsed for the `$` syntax. + #[inline] + pub fn replace<'h, R: Replacer>( + &self, + haystack: &'h [u8], + rep: R, + ) -> Cow<'h, [u8]> { + self.replacen(haystack, 1, rep) + } + + /// Replaces all non-overlapping matches in the haystack with the + /// replacement provided. This is the same as calling `replacen` with + /// `limit` set to `0`. + /// + /// The documentation for [`Regex::replace`] goes into more detail about + /// what kinds of replacement strings are supported. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// # Fallibility + /// + /// If you need to write a replacement routine where any individual + /// replacement might "fail," doing so with this API isn't really feasible + /// because there's no way to stop the search process if a replacement + /// fails. Instead, if you need this functionality, you should consider + /// implementing your own replacement routine: + /// + /// ``` + /// use regex::bytes::{Captures, Regex}; + /// + /// fn replace_all<E>( + /// re: &Regex, + /// haystack: &[u8], + /// replacement: impl Fn(&Captures) -> Result<Vec<u8>, E>, + /// ) -> Result<Vec<u8>, E> { + /// let mut new = Vec::with_capacity(haystack.len()); + /// let mut last_match = 0; + /// for caps in re.captures_iter(haystack) { + /// let m = caps.get(0).unwrap(); + /// new.extend_from_slice(&haystack[last_match..m.start()]); + /// new.extend_from_slice(&replacement(&caps)?); + /// last_match = m.end(); + /// } + /// new.extend_from_slice(&haystack[last_match..]); + /// Ok(new) + /// } + /// + /// // Let's replace each word with the number of bytes in that word. + /// // But if we see a word that is "too long," we'll give up. + /// let re = Regex::new(r"\w+").unwrap(); + /// let replacement = |caps: &Captures| -> Result<Vec<u8>, &'static str> { + /// if caps[0].len() >= 5 { + /// return Err("word too long"); + /// } + /// Ok(caps[0].len().to_string().into_bytes()) + /// }; + /// assert_eq!( + /// Ok(b"2 3 3 3?".to_vec()), + /// replace_all(&re, b"hi how are you?", &replacement), + /// ); + /// assert!(replace_all(&re, b"hi there", &replacement).is_err()); + /// ``` + /// + /// # Example + /// + /// This example shows how to flip the order of whitespace (excluding line + /// terminators) delimited fields, and normalizes the whitespace that + /// delimits the fields: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?m)^(\S+)[\s--\r\n]+(\S+)$").unwrap(); + /// let hay = b" + /// Greetings 1973 + /// Wild\t1973 + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "; + /// let new = re.replace_all(hay, b"$2 $1"); + /// assert_eq!(new, &b" + /// 1973 Greetings + /// 1973 Wild + /// 1975 BornToRun + /// 1978 Darkness + /// 1980 TheRiver + /// "[..]); + /// ``` + #[inline] + pub fn replace_all<'h, R: Replacer>( + &self, + haystack: &'h [u8], + rep: R, + ) -> Cow<'h, [u8]> { + self.replacen(haystack, 0, rep) + } + + /// Replaces at most `limit` non-overlapping matches in the haystack with + /// the replacement provided. If `limit` is `0`, then all non-overlapping + /// matches are replaced. That is, `Regex::replace_all(hay, rep)` is + /// equivalent to `Regex::replacen(hay, 0, rep)`. + /// + /// The documentation for [`Regex::replace`] goes into more detail about + /// what kinds of replacement strings are supported. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// Although note that the worst case time here has an upper bound given + /// by the `limit` parameter. + /// + /// # Fallibility + /// + /// See the corresponding section in the docs for [`Regex::replace_all`] + /// for tips on how to deal with a replacement routine that can fail. + /// + /// # Example + /// + /// This example shows how to flip the order of whitespace (excluding line + /// terminators) delimited fields, and normalizes the whitespace that + /// delimits the fields. But we only do it for the first two matches. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?m)^(\S+)[\s--\r\n]+(\S+)$").unwrap(); + /// let hay = b" + /// Greetings 1973 + /// Wild\t1973 + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "; + /// let new = re.replacen(hay, 2, b"$2 $1"); + /// assert_eq!(new, &b" + /// 1973 Greetings + /// 1973 Wild + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "[..]); + /// ``` + #[inline] + pub fn replacen<'h, R: Replacer>( + &self, + haystack: &'h [u8], + limit: usize, + mut rep: R, + ) -> Cow<'h, [u8]> { + // If we know that the replacement doesn't have any capture expansions, + // then we can use the fast path. The fast path can make a tremendous + // difference: + // + // 1) We use `find_iter` instead of `captures_iter`. Not asking for + // captures generally makes the regex engines faster. + // 2) We don't need to look up all of the capture groups and do + // replacements inside the replacement string. We just push it + // at each match and be done with it. + if let Some(rep) = rep.no_expansion() { + let mut it = self.find_iter(haystack).enumerate().peekable(); + if it.peek().is_none() { + return Cow::Borrowed(haystack); + } + let mut new = Vec::with_capacity(haystack.len()); + let mut last_match = 0; + for (i, m) in it { + new.extend_from_slice(&haystack[last_match..m.start()]); + new.extend_from_slice(&rep); + last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } + } + new.extend_from_slice(&haystack[last_match..]); + return Cow::Owned(new); + } + + // The slower path, which we use if the replacement needs access to + // capture groups. + let mut it = self.captures_iter(haystack).enumerate().peekable(); + if it.peek().is_none() { + return Cow::Borrowed(haystack); + } + let mut new = Vec::with_capacity(haystack.len()); + let mut last_match = 0; + for (i, cap) in it { + // unwrap on 0 is OK because captures only reports matches + let m = cap.get(0).unwrap(); + new.extend_from_slice(&haystack[last_match..m.start()]); + rep.replace_append(&cap, &mut new); + last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } + } + new.extend_from_slice(&haystack[last_match..]); + Cow::Owned(new) + } +} + +/// A group of advanced or "lower level" search methods. Some methods permit +/// starting the search at a position greater than `0` in the haystack. Other +/// methods permit reusing allocations, for example, when extracting the +/// matches for capture groups. +impl Regex { + /// Returns the end byte offset of the first match in the haystack given. + /// + /// This method may have the same performance characteristics as + /// `is_match`. Behaviorlly, it doesn't just report whether it match + /// occurs, but also the end offset for a match. In particular, the offset + /// returned *may be shorter* than the proper end of the leftmost-first + /// match that you would find via [`Regex::find`]. + /// + /// Note that it is not guaranteed that this routine finds the shortest or + /// "earliest" possible match. Instead, the main idea of this API is that + /// it returns the offset at the point at which the internal regex engine + /// has determined that a match has occurred. This may vary depending on + /// which internal regex engine is used, and thus, the offset itself may + /// change based on internal heuristics. + /// + /// # Example + /// + /// Typically, `a+` would match the entire first sequence of `a` in some + /// haystack, but `shortest_match` *may* give up as soon as it sees the + /// first `a`. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"a+").unwrap(); + /// let offset = re.shortest_match(b"aaaaa").unwrap(); + /// assert_eq!(offset, 1); + /// ``` + #[inline] + pub fn shortest_match(&self, haystack: &[u8]) -> Option<usize> { + self.shortest_match_at(haystack, 0) + } + + /// Returns the same as `shortest_match`, but starts the search at the + /// given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only match + /// when `start == 0`. + /// + /// If a match is found, the offset returned is relative to the beginning + /// of the haystack, not the beginning of the search. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = b"eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(re.shortest_match(&hay[2..]), Some(4)); + /// // No match because the assertions take the context into account. + /// assert_eq!(re.shortest_match_at(hay, 2), None); + /// ``` + #[inline] + pub fn shortest_match_at( + &self, + haystack: &[u8], + start: usize, + ) -> Option<usize> { + let input = + Input::new(haystack).earliest(true).span(start..haystack.len()); + self.meta.search_half(&input).map(|hm| hm.offset()) + } + + /// Returns the same as [`Regex::is_match`], but starts the search at the + /// given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = b"eschew"; + /// // We get a match here, but it's probably not intended. + /// assert!(re.is_match(&hay[2..])); + /// // No match because the assertions take the context into account. + /// assert!(!re.is_match_at(hay, 2)); + /// ``` + #[inline] + pub fn is_match_at(&self, haystack: &[u8], start: usize) -> bool { + self.meta.is_match(Input::new(haystack).span(start..haystack.len())) + } + + /// Returns the same as [`Regex::find`], but starts the search at the given + /// offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = b"eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(re.find(&hay[2..]).map(|m| m.range()), Some(0..4)); + /// // No match because the assertions take the context into account. + /// assert_eq!(re.find_at(hay, 2), None); + /// ``` + #[inline] + pub fn find_at<'h>( + &self, + haystack: &'h [u8], + start: usize, + ) -> Option<Match<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + self.meta.find(input).map(|m| Match::new(haystack, m.start(), m.end())) + } + + /// Returns the same as [`Regex::captures`], but starts the search at the + /// given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = b"eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(&re.captures(&hay[2..]).unwrap()[0], b"chew"); + /// // No match because the assertions take the context into account. + /// assert!(re.captures_at(hay, 2).is_none()); + /// ``` + #[inline] + pub fn captures_at<'h>( + &self, + haystack: &'h [u8], + start: usize, + ) -> Option<Captures<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + let mut caps = self.meta.create_captures(); + self.meta.captures(input, &mut caps); + if caps.is_match() { + let static_captures_len = self.static_captures_len(); + Some(Captures { haystack, caps, static_captures_len }) + } else { + None + } + } + + /// This is like [`Regex::captures`], but writes the byte offsets of each + /// capture group match into the locations given. + /// + /// A [`CaptureLocations`] stores the same byte offsets as a [`Captures`], + /// but does *not* store a reference to the haystack. This makes its API + /// a bit lower level and less convenient. But in exchange, callers + /// may allocate their own `CaptureLocations` and reuse it for multiple + /// searches. This may be helpful if allocating a `Captures` shows up in a + /// profile as too costly. + /// + /// To create a `CaptureLocations` value, use the + /// [`Regex::capture_locations`] method. + /// + /// This also returns the overall match if one was found. When a match is + /// found, its offsets are also always stored in `locs` at index `0`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"^([a-z]+)=(\S*)$").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert!(re.captures_read(&mut locs, b"id=foo123").is_some()); + /// assert_eq!(Some((0, 9)), locs.get(0)); + /// assert_eq!(Some((0, 2)), locs.get(1)); + /// assert_eq!(Some((3, 9)), locs.get(2)); + /// ``` + #[inline] + pub fn captures_read<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h [u8], + ) -> Option<Match<'h>> { + self.captures_read_at(locs, haystack, 0) + } + + /// Returns the same as [`Regex::captures_read`], but starts the search at + /// the given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = b"eschew"; + /// let mut locs = re.capture_locations(); + /// // We get a match here, but it's probably not intended. + /// assert!(re.captures_read(&mut locs, &hay[2..]).is_some()); + /// // No match because the assertions take the context into account. + /// assert!(re.captures_read_at(&mut locs, hay, 2).is_none()); + /// ``` + #[inline] + pub fn captures_read_at<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h [u8], + start: usize, + ) -> Option<Match<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + self.meta.search_captures(&input, &mut locs.0); + locs.0.get_match().map(|m| Match::new(haystack, m.start(), m.end())) + } + + /// An undocumented alias for `captures_read_at`. + /// + /// The `regex-capi` crate previously used this routine, so to avoid + /// breaking that crate, we continue to provide the name as an undocumented + /// alias. + #[doc(hidden)] + #[inline] + pub fn read_captures_at<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h [u8], + start: usize, + ) -> Option<Match<'h>> { + self.captures_read_at(locs, haystack, start) + } +} + +/// Auxiliary methods. +impl Regex { + /// Returns the original string of this regex. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"foo\w+bar").unwrap(); + /// assert_eq!(re.as_str(), r"foo\w+bar"); + /// ``` + #[inline] + pub fn as_str(&self) -> &str { + &self.pattern + } + + /// Returns an iterator over the capture names in this regex. + /// + /// The iterator returned yields elements of type `Option<&str>`. That is, + /// the iterator yields values for all capture groups, even ones that are + /// unnamed. The order of the groups corresponds to the order of the group's + /// corresponding opening parenthesis. + /// + /// The first element of the iterator always yields the group corresponding + /// to the overall match, and this group is always unnamed. Therefore, the + /// iterator always yields at least one group. + /// + /// # Example + /// + /// This shows basic usage with a mix of named and unnamed capture groups: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), Some(Some("a"))); + /// assert_eq!(names.next(), Some(Some("b"))); + /// assert_eq!(names.next(), Some(None)); + /// // the '(?:.)' group is non-capturing and so doesn't appear here! + /// assert_eq!(names.next(), Some(Some("c"))); + /// assert_eq!(names.next(), None); + /// ``` + /// + /// The iterator always yields at least one element, even for regexes with + /// no capture groups and even for regexes that can never match: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), None); + /// + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), None); + /// ``` + #[inline] + pub fn capture_names(&self) -> CaptureNames<'_> { + CaptureNames(self.meta.group_info().pattern_names(PatternID::ZERO)) + } + + /// Returns the number of captures groups in this regex. + /// + /// This includes all named and unnamed groups, including the implicit + /// unnamed group that is always present and corresponds to the entire + /// match. + /// + /// Since the implicit unnamed group is always included in this length, the + /// length returned is guaranteed to be greater than zero. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"foo").unwrap(); + /// assert_eq!(1, re.captures_len()); + /// + /// let re = Regex::new(r"(foo)").unwrap(); + /// assert_eq!(2, re.captures_len()); + /// + /// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap(); + /// assert_eq!(5, re.captures_len()); + /// + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// assert_eq!(1, re.captures_len()); + /// ``` + #[inline] + pub fn captures_len(&self) -> usize { + self.meta.group_info().group_len(PatternID::ZERO) + } + + /// Returns the total number of capturing groups that appear in every + /// possible match. + /// + /// If the number of capture groups can vary depending on the match, then + /// this returns `None`. That is, a value is only returned when the number + /// of matching groups is invariant or "static." + /// + /// Note that like [`Regex::captures_len`], this **does** include the + /// implicit capturing group corresponding to the entire match. Therefore, + /// when a non-None value is returned, it is guaranteed to be at least `1`. + /// Stated differently, a return value of `Some(0)` is impossible. + /// + /// # Example + /// + /// This shows a few cases where a static number of capture groups is + /// available and a few cases where it is not. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let len = |pattern| { + /// Regex::new(pattern).map(|re| re.static_captures_len()) + /// }; + /// + /// assert_eq!(Some(1), len("a")?); + /// assert_eq!(Some(2), len("(a)")?); + /// assert_eq!(Some(2), len("(a)|(b)")?); + /// assert_eq!(Some(3), len("(a)(b)|(c)(d)")?); + /// assert_eq!(None, len("(a)|b")?); + /// assert_eq!(None, len("a|(b)")?); + /// assert_eq!(None, len("(b)*")?); + /// assert_eq!(Some(2), len("(b)+")?); + /// + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + #[inline] + pub fn static_captures_len(&self) -> Option<usize> { + self.meta.static_captures_len() + } + + /// Returns a fresh allocated set of capture locations that can + /// be reused in multiple calls to [`Regex::captures_read`] or + /// [`Regex::captures_read_at`]. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(.)(.)(\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert!(re.captures_read(&mut locs, b"Padron").is_some()); + /// assert_eq!(locs.get(0), Some((0, 6))); + /// assert_eq!(locs.get(1), Some((0, 1))); + /// assert_eq!(locs.get(2), Some((1, 2))); + /// assert_eq!(locs.get(3), Some((2, 6))); + /// ``` + #[inline] + pub fn capture_locations(&self) -> CaptureLocations { + CaptureLocations(self.meta.create_captures()) + } + + /// An alias for `capture_locations` to preserve backward compatibility. + /// + /// The `regex-capi` crate uses this method, so to avoid breaking that + /// crate, we continue to export it as an undocumented API. + #[doc(hidden)] + #[inline] + pub fn locations(&self) -> CaptureLocations { + self.capture_locations() + } +} + +/// Represents a single match of a regex in a haystack. +/// +/// A `Match` contains both the start and end byte offsets of the match and the +/// actual substring corresponding to the range of those byte offsets. It is +/// guaranteed that `start <= end`. When `start == end`, the match is empty. +/// +/// Unlike the top-level `Match` type, this `Match` type is produced by APIs +/// that search `&[u8]` haystacks. This means that the offsets in a `Match` can +/// point to anywhere in the haystack, including in a place that splits the +/// UTF-8 encoding of a Unicode scalar value. +/// +/// The lifetime parameter `'h` refers to the lifetime of the matched of the +/// haystack that this match was produced from. +/// +/// # Numbering +/// +/// The byte offsets in a `Match` form a half-open interval. That is, the +/// start of the range is inclusive and the end of the range is exclusive. +/// For example, given a haystack `abcFOOxyz` and a match of `FOO`, its byte +/// offset range starts at `3` and ends at `6`. `3` corresponds to `F` and +/// `6` corresponds to `x`, which is one past the end of the match. This +/// corresponds to the same kind of slicing that Rust uses. +/// +/// For more on why this was chosen over other schemes (aside from being +/// consistent with how Rust the language works), see [this discussion] and +/// [Dijkstra's note on a related topic][note]. +/// +/// [this discussion]: https://github.com/rust-lang/regex/discussions/866 +/// [note]: https://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html +/// +/// # Example +/// +/// This example shows the value of each of the methods on `Match` for a +/// particular search. +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let re = Regex::new(r"\p{Greek}+").unwrap(); +/// let hay = "Greek: αβγδ".as_bytes(); +/// let m = re.find(hay).unwrap(); +/// assert_eq!(7, m.start()); +/// assert_eq!(15, m.end()); +/// assert!(!m.is_empty()); +/// assert_eq!(8, m.len()); +/// assert_eq!(7..15, m.range()); +/// assert_eq!("αβγδ".as_bytes(), m.as_bytes()); +/// ``` +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct Match<'h> { + haystack: &'h [u8], + start: usize, + end: usize, +} + +impl<'h> Match<'h> { + /// Returns the byte offset of the start of the match in the haystack. The + /// start of the match corresponds to the position where the match begins + /// and includes the first byte in the match. + /// + /// It is guaranteed that `Match::start() <= Match::end()`. + /// + /// Unlike the top-level `Match` type, the start offset may appear anywhere + /// in the haystack. This includes between the code units of a UTF-8 + /// encoded Unicode scalar value. + #[inline] + pub fn start(&self) -> usize { + self.start + } + + /// Returns the byte offset of the end of the match in the haystack. The + /// end of the match corresponds to the byte immediately following the last + /// byte in the match. This means that `&slice[start..end]` works as one + /// would expect. + /// + /// It is guaranteed that `Match::start() <= Match::end()`. + /// + /// Unlike the top-level `Match` type, the start offset may appear anywhere + /// in the haystack. This includes between the code units of a UTF-8 + /// encoded Unicode scalar value. + #[inline] + pub fn end(&self) -> usize { + self.end + } + + /// Returns true if and only if this match has a length of zero. + /// + /// Note that an empty match can only occur when the regex itself can + /// match the empty string. Here are some examples of regexes that can + /// all match the empty string: `^`, `^$`, `\b`, `a?`, `a*`, `a{0}`, + /// `(foo|\d+|quux)?`. + #[inline] + pub fn is_empty(&self) -> bool { + self.start == self.end + } + + /// Returns the length, in bytes, of this match. + #[inline] + pub fn len(&self) -> usize { + self.end - self.start + } + + /// Returns the range over the starting and ending byte offsets of the + /// match in the haystack. + #[inline] + pub fn range(&self) -> core::ops::Range<usize> { + self.start..self.end + } + + /// Returns the substring of the haystack that matched. + #[inline] + pub fn as_bytes(&self) -> &'h [u8] { + &self.haystack[self.range()] + } + + /// Creates a new match from the given haystack and byte offsets. + #[inline] + fn new(haystack: &'h [u8], start: usize, end: usize) -> Match<'h> { + Match { haystack, start, end } + } +} + +impl<'h> core::fmt::Debug for Match<'h> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut fmt = f.debug_struct("Match"); + fmt.field("start", &self.start).field("end", &self.end); + if let Ok(s) = core::str::from_utf8(self.as_bytes()) { + fmt.field("bytes", &s); + } else { + // FIXME: It would be nice if this could be printed as a string + // with invalid UTF-8 replaced with hex escapes. A alloc would + // probably okay if that makes it easier, but regex-automata does + // (at time of writing) have internal routines that do this. So + // maybe we should expose them. + fmt.field("bytes", &self.as_bytes()); + } + fmt.finish() + } +} + +impl<'h> From<Match<'h>> for &'h [u8] { + fn from(m: Match<'h>) -> &'h [u8] { + m.as_bytes() + } +} + +impl<'h> From<Match<'h>> for core::ops::Range<usize> { + fn from(m: Match<'h>) -> core::ops::Range<usize> { + m.range() + } +} + +/// Represents the capture groups for a single match. +/// +/// Capture groups refer to parts of a regex enclosed in parentheses. They +/// can be optionally named. The purpose of capture groups is to be able to +/// reference different parts of a match based on the original pattern. In +/// essence, a `Captures` is a container of [`Match`] values for each group +/// that participated in a regex match. Each `Match` can be looked up by either +/// its capture group index or name (if it has one). +/// +/// For example, say you want to match the individual letters in a 5-letter +/// word: +/// +/// ```text +/// (?<first>\w)(\w)(?:\w)\w(?<last>\w) +/// ``` +/// +/// This regex has 4 capture groups: +/// +/// * The group at index `0` corresponds to the overall match. It is always +/// present in every match and never has a name. +/// * The group at index `1` with name `first` corresponding to the first +/// letter. +/// * The group at index `2` with no name corresponding to the second letter. +/// * The group at index `3` with name `last` corresponding to the fifth and +/// last letter. +/// +/// Notice that `(?:\w)` was not listed above as a capture group despite it +/// being enclosed in parentheses. That's because `(?:pattern)` is a special +/// syntax that permits grouping but *without* capturing. The reason for not +/// treating it as a capture is that tracking and reporting capture groups +/// requires additional state that may lead to slower searches. So using as few +/// capture groups as possible can help performance. (Although the difference +/// in performance of a couple of capture groups is likely immaterial.) +/// +/// Values with this type are created by [`Regex::captures`] or +/// [`Regex::captures_iter`]. +/// +/// `'h` is the lifetime of the haystack that these captures were matched from. +/// +/// # Example +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let re = Regex::new(r"(?<first>\w)(\w)(?:\w)\w(?<last>\w)").unwrap(); +/// let caps = re.captures(b"toady").unwrap(); +/// assert_eq!(b"toady", &caps[0]); +/// assert_eq!(b"t", &caps["first"]); +/// assert_eq!(b"o", &caps[2]); +/// assert_eq!(b"y", &caps["last"]); +/// ``` +pub struct Captures<'h> { + haystack: &'h [u8], + caps: captures::Captures, + static_captures_len: Option<usize>, +} + +impl<'h> Captures<'h> { + /// Returns the `Match` associated with the capture group at index `i`. If + /// `i` does not correspond to a capture group, or if the capture group did + /// not participate in the match, then `None` is returned. + /// + /// When `i == 0`, this is guaranteed to return a non-`None` value. + /// + /// # Examples + /// + /// Get the substring that matched with a default of an empty string if the + /// group didn't participate in the match: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap(); + /// let caps = re.captures(b"abc123").unwrap(); + /// + /// let substr1 = caps.get(1).map_or(&b""[..], |m| m.as_bytes()); + /// let substr2 = caps.get(2).map_or(&b""[..], |m| m.as_bytes()); + /// assert_eq!(substr1, b"123"); + /// assert_eq!(substr2, b""); + /// ``` + #[inline] + pub fn get(&self, i: usize) -> Option<Match<'h>> { + self.caps + .get_group(i) + .map(|sp| Match::new(self.haystack, sp.start, sp.end)) + } + + /// Returns the `Match` associated with the capture group named `name`. If + /// `name` isn't a valid capture group or it refers to a group that didn't + /// match, then `None` is returned. + /// + /// Note that unlike `caps["name"]`, this returns a `Match` whose lifetime + /// matches the lifetime of the haystack in this `Captures` value. + /// Conversely, the substring returned by `caps["name"]` has a lifetime + /// of the `Captures` value, which is likely shorter than the lifetime of + /// the haystack. In some cases, it may be necessary to use this method to + /// access the matching substring instead of the `caps["name"]` notation. + /// + /// # Examples + /// + /// Get the substring that matched with a default of an empty string if the + /// group didn't participate in the match: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new( + /// r"[a-z]+(?:(?<numbers>[0-9]+)|(?<letters>[A-Z]+))", + /// ).unwrap(); + /// let caps = re.captures(b"abc123").unwrap(); + /// + /// let numbers = caps.name("numbers").map_or(&b""[..], |m| m.as_bytes()); + /// let letters = caps.name("letters").map_or(&b""[..], |m| m.as_bytes()); + /// assert_eq!(numbers, b"123"); + /// assert_eq!(letters, b""); + /// ``` + #[inline] + pub fn name(&self, name: &str) -> Option<Match<'h>> { + self.caps + .get_group_by_name(name) + .map(|sp| Match::new(self.haystack, sp.start, sp.end)) + } + + /// This is a convenience routine for extracting the substrings + /// corresponding to matching capture groups. + /// + /// This returns a tuple where the first element corresponds to the full + /// substring of the haystack that matched the regex. The second element is + /// an array of substrings, with each corresponding to the to the substring + /// that matched for a particular capture group. + /// + /// # Panics + /// + /// This panics if the number of possible matching groups in this + /// `Captures` value is not fixed to `N` in all circumstances. + /// More precisely, this routine only works when `N` is equivalent to + /// [`Regex::static_captures_len`]. + /// + /// Stated more plainly, if the number of matching capture groups in a + /// regex can vary from match to match, then this function always panics. + /// + /// For example, `(a)(b)|(c)` could produce two matching capture groups + /// or one matching capture group for any given match. Therefore, one + /// cannot use `extract` with such a pattern. + /// + /// But a pattern like `(a)(b)|(c)(d)` can be used with `extract` because + /// the number of capture groups in every match is always equivalent, + /// even if the capture _indices_ in each match are not. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap(); + /// let hay = b"On 2010-03-14, I became a Tenneessee lamb."; + /// let Some((full, [year, month, day])) = + /// re.captures(hay).map(|caps| caps.extract()) else { return }; + /// assert_eq!(b"2010-03-14", full); + /// assert_eq!(b"2010", year); + /// assert_eq!(b"03", month); + /// assert_eq!(b"14", day); + /// ``` + /// + /// # Example: iteration + /// + /// This example shows how to use this method when iterating over all + /// `Captures` matches in a haystack. + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap(); + /// let hay = b"1973-01-05, 1975-08-25 and 1980-10-18"; + /// + /// let mut dates: Vec<(&[u8], &[u8], &[u8])> = vec![]; + /// for (_, [y, m, d]) in re.captures_iter(hay).map(|c| c.extract()) { + /// dates.push((y, m, d)); + /// } + /// assert_eq!(dates, vec![ + /// (&b"1973"[..], &b"01"[..], &b"05"[..]), + /// (&b"1975"[..], &b"08"[..], &b"25"[..]), + /// (&b"1980"[..], &b"10"[..], &b"18"[..]), + /// ]); + /// ``` + /// + /// # Example: parsing different formats + /// + /// This API is particularly useful when you need to extract a particular + /// value that might occur in a different format. Consider, for example, + /// an identifier that might be in double quotes or single quotes: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r#"id:(?:"([^"]+)"|'([^']+)')"#).unwrap(); + /// let hay = br#"The first is id:"foo" and the second is id:'bar'."#; + /// let mut ids = vec![]; + /// for (_, [id]) in re.captures_iter(hay).map(|c| c.extract()) { + /// ids.push(id); + /// } + /// assert_eq!(ids, vec![b"foo", b"bar"]); + /// ``` + pub fn extract<const N: usize>(&self) -> (&'h [u8], [&'h [u8]; N]) { + let len = self + .static_captures_len + .expect("number of capture groups can vary in a match") + .checked_sub(1) + .expect("number of groups is always greater than zero"); + assert_eq!(N, len, "asked for {} groups, but must ask for {}", N, len); + // The regex-automata variant of extract is a bit more permissive. + // It doesn't require the number of matching capturing groups to be + // static, and you can even request fewer groups than what's there. So + // this is guaranteed to never panic because we've asserted above that + // the user has requested precisely the number of groups that must be + // present in any match for this regex. + self.caps.extract_bytes(self.haystack) + } + + /// Expands all instances of `$ref` in `replacement` to the corresponding + /// capture group, and writes them to the `dst` buffer given. A `ref` can + /// be a capture group index or a name. If `ref` doesn't refer to a capture + /// group that participated in the match, then it is replaced with the + /// empty string. + /// + /// # Format + /// + /// The format of the replacement string supports two different kinds of + /// capture references: unbraced and braced. + /// + /// For the unbraced format, the format supported is `$ref` where `name` + /// can be any character in the class `[0-9A-Za-z_]`. `ref` is always + /// the longest possible parse. So for example, `$1a` corresponds to the + /// capture group named `1a` and not the capture group at index `1`. If + /// `ref` matches `^[0-9]+$`, then it is treated as a capture group index + /// itself and not a name. + /// + /// For the braced format, the format supported is `${ref}` where `ref` can + /// be any sequence of bytes except for `}`. If no closing brace occurs, + /// then it is not considered a capture reference. As with the unbraced + /// format, if `ref` matches `^[0-9]+$`, then it is treated as a capture + /// group index and not a name. + /// + /// The braced format is useful for exerting precise control over the name + /// of the capture reference. For example, `${1}a` corresponds to the + /// capture group reference `1` followed by the letter `a`, where as `$1a` + /// (as mentioned above) corresponds to the capture group reference `1a`. + /// The braced format is also useful for expressing capture group names + /// that use characters not supported by the unbraced format. For example, + /// `${foo[bar].baz}` refers to the capture group named `foo[bar].baz`. + /// + /// If a capture group reference is found and it does not refer to a valid + /// capture group, then it will be replaced with the empty string. + /// + /// To write a literal `$`, use `$$`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new( + /// r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})", + /// ).unwrap(); + /// let hay = b"On 14-03-2010, I became a Tenneessee lamb."; + /// let caps = re.captures(hay).unwrap(); + /// + /// let mut dst = vec![]; + /// caps.expand(b"year=$year, month=$month, day=$day", &mut dst); + /// assert_eq!(dst, b"year=2010, month=03, day=14"); + /// ``` + #[inline] + pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) { + self.caps.interpolate_bytes_into(self.haystack, replacement, dst); + } + + /// Returns an iterator over all capture groups. This includes both + /// matching and non-matching groups. + /// + /// The iterator always yields at least one matching group: the first group + /// (at index `0`) with no name. Subsequent groups are returned in the order + /// of their opening parenthesis in the regex. + /// + /// The elements yielded have type `Option<Match<'h>>`, where a non-`None` + /// value is present if the capture group matches. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap(); + /// let caps = re.captures(b"AZ").unwrap(); + /// + /// let mut it = caps.iter(); + /// assert_eq!(it.next().unwrap().map(|m| m.as_bytes()), Some(&b"AZ"[..])); + /// assert_eq!(it.next().unwrap().map(|m| m.as_bytes()), Some(&b"A"[..])); + /// assert_eq!(it.next().unwrap().map(|m| m.as_bytes()), None); + /// assert_eq!(it.next().unwrap().map(|m| m.as_bytes()), Some(&b"Z"[..])); + /// assert_eq!(it.next(), None); + /// ``` + #[inline] + pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 'h> { + SubCaptureMatches { haystack: self.haystack, it: self.caps.iter() } + } + + /// Returns the total number of capture groups. This includes both + /// matching and non-matching groups. + /// + /// The length returned is always equivalent to the number of elements + /// yielded by [`Captures::iter`]. Consequently, the length is always + /// greater than zero since every `Captures` value always includes the + /// match for the entire regex. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap(); + /// let caps = re.captures(b"AZ").unwrap(); + /// assert_eq!(caps.len(), 4); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.caps.group_len() + } +} + +impl<'h> core::fmt::Debug for Captures<'h> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + /// A little helper type to provide a nice map-like debug + /// representation for our capturing group spans. + /// + /// regex-automata has something similar, but it includes the pattern + /// ID in its debug output, which is confusing. It also doesn't include + /// that strings that match because a regex-automata `Captures` doesn't + /// borrow the haystack. + struct CapturesDebugMap<'a> { + caps: &'a Captures<'a>, + } + + impl<'a> core::fmt::Debug for CapturesDebugMap<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut map = f.debug_map(); + let names = + self.caps.caps.group_info().pattern_names(PatternID::ZERO); + for (group_index, maybe_name) in names.enumerate() { + let key = Key(group_index, maybe_name); + match self.caps.get(group_index) { + None => map.entry(&key, &None::<()>), + Some(mat) => map.entry(&key, &Value(mat)), + }; + } + map.finish() + } + } + + struct Key<'a>(usize, Option<&'a str>); + + impl<'a> core::fmt::Debug for Key<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{}", self.0)?; + if let Some(name) = self.1 { + write!(f, "/{:?}", name)?; + } + Ok(()) + } + } + + struct Value<'a>(Match<'a>); + + impl<'a> core::fmt::Debug for Value<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + use regex_automata::util::escape::DebugHaystack; + + write!( + f, + "{}..{}/{:?}", + self.0.start(), + self.0.end(), + DebugHaystack(self.0.as_bytes()) + ) + } + } + + f.debug_tuple("Captures") + .field(&CapturesDebugMap { caps: self }) + .finish() + } +} + +/// Get a matching capture group's haystack substring by index. +/// +/// The haystack substring returned can't outlive the `Captures` object if this +/// method is used, because of how `Index` is defined (normally `a[i]` is part +/// of `a` and can't outlive it). To work around this limitation, do that, use +/// [`Captures::get`] instead. +/// +/// `'h` is the lifetime of the matched haystack, but the lifetime of the +/// `&str` returned by this implementation is the lifetime of the `Captures` +/// value itself. +/// +/// # Panics +/// +/// If there is no matching group at the given index. +impl<'h> core::ops::Index<usize> for Captures<'h> { + type Output = [u8]; + + // The lifetime is written out to make it clear that the &str returned + // does NOT have a lifetime equivalent to 'h. + fn index<'a>(&'a self, i: usize) -> &'a [u8] { + self.get(i) + .map(|m| m.as_bytes()) + .unwrap_or_else(|| panic!("no group at index '{}'", i)) + } +} + +/// Get a matching capture group's haystack substring by name. +/// +/// The haystack substring returned can't outlive the `Captures` object if this +/// method is used, because of how `Index` is defined (normally `a[i]` is part +/// of `a` and can't outlive it). To work around this limitation, do that, use +/// [`Captures::name`] instead. +/// +/// `'h` is the lifetime of the matched haystack, but the lifetime of the +/// `&str` returned by this implementation is the lifetime of the `Captures` +/// value itself. +/// +/// `'n` is the lifetime of the group name used to index the `Captures` value. +/// +/// # Panics +/// +/// If there is no matching group at the given name. +impl<'h, 'n> core::ops::Index<&'n str> for Captures<'h> { + type Output = [u8]; + + fn index<'a>(&'a self, name: &'n str) -> &'a [u8] { + self.name(name) + .map(|m| m.as_bytes()) + .unwrap_or_else(|| panic!("no group named '{}'", name)) + } +} + +/// A low level representation of the byte offsets of each capture group. +/// +/// You can think of this as a lower level [`Captures`], where this type does +/// not support named capturing groups directly and it does not borrow the +/// haystack that these offsets were matched on. +/// +/// Primarily, this type is useful when using the lower level `Regex` APIs such +/// as [`Regex::captures_read`], which permits amortizing the allocation in +/// which capture match offsets are stored. +/// +/// In order to build a value of this type, you'll need to call the +/// [`Regex::capture_locations`] method. The value returned can then be reused +/// in subsequent searches for that regex. Using it for other regexes may +/// result in a panic or otherwise incorrect results. +/// +/// # Example +/// +/// This example shows how to create and use `CaptureLocations` in a search. +/// +/// ``` +/// use regex::bytes::Regex; +/// +/// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); +/// let mut locs = re.capture_locations(); +/// let m = re.captures_read(&mut locs, b"Bruce Springsteen").unwrap(); +/// assert_eq!(0..17, m.range()); +/// assert_eq!(Some((0, 17)), locs.get(0)); +/// assert_eq!(Some((0, 5)), locs.get(1)); +/// assert_eq!(Some((6, 17)), locs.get(2)); +/// +/// // Asking for an invalid capture group always returns None. +/// assert_eq!(None, locs.get(3)); +/// # // literals are too big for 32-bit usize: #1041 +/// # #[cfg(target_pointer_width = "64")] +/// assert_eq!(None, locs.get(34973498648)); +/// # #[cfg(target_pointer_width = "64")] +/// assert_eq!(None, locs.get(9944060567225171988)); +/// ``` +#[derive(Clone, Debug)] +pub struct CaptureLocations(captures::Captures); + +/// A type alias for `CaptureLocations` for backwards compatibility. +/// +/// Previously, we exported `CaptureLocations` as `Locations` in an +/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`), +/// we continue re-exporting the same undocumented API. +#[doc(hidden)] +pub type Locations = CaptureLocations; + +impl CaptureLocations { + /// Returns the start and end byte offsets of the capture group at index + /// `i`. This returns `None` if `i` is not a valid capture group or if the + /// capture group did not match. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// re.captures_read(&mut locs, b"Bruce Springsteen").unwrap(); + /// assert_eq!(Some((0, 17)), locs.get(0)); + /// assert_eq!(Some((0, 5)), locs.get(1)); + /// assert_eq!(Some((6, 17)), locs.get(2)); + /// ``` + #[inline] + pub fn get(&self, i: usize) -> Option<(usize, usize)> { + self.0.get_group(i).map(|sp| (sp.start, sp.end)) + } + + /// Returns the total number of capture groups (even if they didn't match). + /// That is, the length returned is unaffected by the result of a search. + /// + /// This is always at least `1` since every regex has at least `1` + /// capturing group that corresponds to the entire match. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert_eq!(3, locs.len()); + /// re.captures_read(&mut locs, b"Bruce Springsteen").unwrap(); + /// assert_eq!(3, locs.len()); + /// ``` + /// + /// Notice that the length is always at least `1`, regardless of the regex: + /// + /// ``` + /// use regex::bytes::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let locs = re.capture_locations(); + /// assert_eq!(1, locs.len()); + /// + /// // [a&&b] is a regex that never matches anything. + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// let locs = re.capture_locations(); + /// assert_eq!(1, locs.len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + // self.0.group_len() returns 0 if the underlying captures doesn't + // represent a match, but the behavior guaranteed for this method is + // that the length doesn't change based on a match or not. + self.0.group_info().group_len(PatternID::ZERO) + } + + /// An alias for the `get` method for backwards compatibility. + /// + /// Previously, we exported `get` as `pos` in an undocumented API. To + /// prevent breaking that code (e.g., in `regex-capi`), we continue + /// re-exporting the same undocumented API. + #[doc(hidden)] + #[inline] + pub fn pos(&self, i: usize) -> Option<(usize, usize)> { + self.get(i) + } +} + +/// An iterator over all non-overlapping matches in a haystack. +/// +/// This iterator yields [`Match`] values. The iterator stops when no more +/// matches can be found. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the haystack. +/// +/// This iterator is created by [`Regex::find_iter`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct Matches<'r, 'h> { + haystack: &'h [u8], + it: meta::FindMatches<'r, 'h>, +} + +impl<'r, 'h> Iterator for Matches<'r, 'h> { + type Item = Match<'h>; + + #[inline] + fn next(&mut self) -> Option<Match<'h>> { + self.it + .next() + .map(|sp| Match::new(self.haystack, sp.start(), sp.end())) + } + + #[inline] + fn count(self) -> usize { + // This can actually be up to 2x faster than calling `next()` until + // completion, because counting matches when using a DFA only requires + // finding the end of each match. But returning a `Match` via `next()` + // requires the start of each match which, with a DFA, requires a + // reverse forward scan to find it. + self.it.count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for Matches<'r, 'h> {} + +/// An iterator over all non-overlapping capture matches in a haystack. +/// +/// This iterator yields [`Captures`] values. The iterator stops when no more +/// matches can be found. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the matched string. +/// +/// This iterator is created by [`Regex::captures_iter`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct CaptureMatches<'r, 'h> { + haystack: &'h [u8], + it: meta::CapturesMatches<'r, 'h>, +} + +impl<'r, 'h> Iterator for CaptureMatches<'r, 'h> { + type Item = Captures<'h>; + + #[inline] + fn next(&mut self) -> Option<Captures<'h>> { + let static_captures_len = self.it.regex().static_captures_len(); + self.it.next().map(|caps| Captures { + haystack: self.haystack, + caps, + static_captures_len, + }) + } + + #[inline] + fn count(self) -> usize { + // This can actually be up to 2x faster than calling `next()` until + // completion, because counting matches when using a DFA only requires + // finding the end of each match. But returning a `Match` via `next()` + // requires the start of each match which, with a DFA, requires a + // reverse forward scan to find it. + self.it.count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for CaptureMatches<'r, 'h> {} + +/// An iterator over all substrings delimited by a regex match. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the byte string being split. +/// +/// This iterator is created by [`Regex::split`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct Split<'r, 'h> { + haystack: &'h [u8], + it: meta::Split<'r, 'h>, +} + +impl<'r, 'h> Iterator for Split<'r, 'h> { + type Item = &'h [u8]; + + #[inline] + fn next(&mut self) -> Option<&'h [u8]> { + self.it.next().map(|span| &self.haystack[span]) + } +} + +impl<'r, 'h> core::iter::FusedIterator for Split<'r, 'h> {} + +/// An iterator over at most `N` substrings delimited by a regex match. +/// +/// The last substring yielded by this iterator will be whatever remains after +/// `N-1` splits. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the byte string being split. +/// +/// This iterator is created by [`Regex::splitn`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +/// +/// Although note that the worst case time here has an upper bound given +/// by the `limit` parameter to [`Regex::splitn`]. +#[derive(Debug)] +pub struct SplitN<'r, 'h> { + haystack: &'h [u8], + it: meta::SplitN<'r, 'h>, +} + +impl<'r, 'h> Iterator for SplitN<'r, 'h> { + type Item = &'h [u8]; + + #[inline] + fn next(&mut self) -> Option<&'h [u8]> { + self.it.next().map(|span| &self.haystack[span]) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {} + +/// An iterator over the names of all capture groups in a regex. +/// +/// This iterator yields values of type `Option<&str>` in order of the opening +/// capture group parenthesis in the regex pattern. `None` is yielded for +/// groups with no name. The first element always corresponds to the implicit +/// and unnamed group for the overall match. +/// +/// `'r` is the lifetime of the compiled regular expression. +/// +/// This iterator is created by [`Regex::capture_names`]. +#[derive(Clone, Debug)] +pub struct CaptureNames<'r>(captures::GroupInfoPatternNames<'r>); + +impl<'r> Iterator for CaptureNames<'r> { + type Item = Option<&'r str>; + + #[inline] + fn next(&mut self) -> Option<Option<&'r str>> { + self.0.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.0.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.0.count() + } +} + +impl<'r> ExactSizeIterator for CaptureNames<'r> {} + +impl<'r> core::iter::FusedIterator for CaptureNames<'r> {} + +/// An iterator over all group matches in a [`Captures`] value. +/// +/// This iterator yields values of type `Option<Match<'h>>`, where `'h` is the +/// lifetime of the haystack that the matches are for. The order of elements +/// yielded corresponds to the order of the opening parenthesis for the group +/// in the regex pattern. `None` is yielded for groups that did not participate +/// in the match. +/// +/// The first element always corresponds to the implicit group for the overall +/// match. Since this iterator is created by a [`Captures`] value, and a +/// `Captures` value is only created when a match occurs, it follows that the +/// first element yielded by this iterator is guaranteed to be non-`None`. +/// +/// The lifetime `'c` corresponds to the lifetime of the `Captures` value that +/// created this iterator, and the lifetime `'h` corresponds to the originally +/// matched haystack. +#[derive(Clone, Debug)] +pub struct SubCaptureMatches<'c, 'h> { + haystack: &'h [u8], + it: captures::CapturesPatternIter<'c>, +} + +impl<'c, 'h> Iterator for SubCaptureMatches<'c, 'h> { + type Item = Option<Match<'h>>; + + #[inline] + fn next(&mut self) -> Option<Option<Match<'h>>> { + self.it.next().map(|group| { + group.map(|sp| Match::new(self.haystack, sp.start, sp.end)) + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.it.count() + } +} + +impl<'c, 'h> ExactSizeIterator for SubCaptureMatches<'c, 'h> {} + +impl<'c, 'h> core::iter::FusedIterator for SubCaptureMatches<'c, 'h> {} + +/// A trait for types that can be used to replace matches in a haystack. +/// +/// In general, users of this crate shouldn't need to implement this trait, +/// since implementations are already provided for `&[u8]` along with other +/// variants of byte string types, as well as `FnMut(&Captures) -> Vec<u8>` (or +/// any `FnMut(&Captures) -> T` where `T: AsRef<[u8]>`). Those cover most use +/// cases, but callers can implement this trait directly if necessary. +/// +/// # Example +/// +/// This example shows a basic implementation of the `Replacer` trait. This can +/// be done much more simply using the replacement byte string interpolation +/// support (e.g., `$first $last`), but this approach avoids needing to parse +/// the replacement byte string at all. +/// +/// ``` +/// use regex::bytes::{Captures, Regex, Replacer}; +/// +/// struct NameSwapper; +/// +/// impl Replacer for NameSwapper { +/// fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { +/// dst.extend_from_slice(&caps["first"]); +/// dst.extend_from_slice(b" "); +/// dst.extend_from_slice(&caps["last"]); +/// } +/// } +/// +/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap(); +/// let result = re.replace(b"Springsteen, Bruce", NameSwapper); +/// assert_eq!(result, &b"Bruce Springsteen"[..]); +/// ``` +pub trait Replacer { + /// Appends possibly empty data to `dst` to replace the current match. + /// + /// The current match is represented by `caps`, which is guaranteed to have + /// a match at capture group `0`. + /// + /// For example, a no-op replacement would be + /// `dst.extend_from_slice(&caps[0])`. + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>); + + /// Return a fixed unchanging replacement byte string. + /// + /// When doing replacements, if access to [`Captures`] is not needed (e.g., + /// the replacement byte string does not need `$` expansion), then it can + /// be beneficial to avoid finding sub-captures. + /// + /// In general, this is called once for every call to a replacement routine + /// such as [`Regex::replace_all`]. + fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> { + None + } + + /// Returns a type that implements `Replacer`, but that borrows and wraps + /// this `Replacer`. + /// + /// This is useful when you want to take a generic `Replacer` (which might + /// not be cloneable) and use it without consuming it, so it can be used + /// more than once. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::{Regex, Replacer}; + /// + /// fn replace_all_twice<R: Replacer>( + /// re: Regex, + /// src: &[u8], + /// mut rep: R, + /// ) -> Vec<u8> { + /// let dst = re.replace_all(src, rep.by_ref()); + /// let dst = re.replace_all(&dst, rep.by_ref()); + /// dst.into_owned() + /// } + /// ``` + fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> { + ReplacerRef(self) + } +} + +impl<'a, const N: usize> Replacer for &'a [u8; N] { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(&**self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<const N: usize> Replacer for [u8; N] { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(&*self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<'a> Replacer for &'a [u8] { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(*self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<'a> Replacer for &'a Vec<u8> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(*self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl Replacer for Vec<u8> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<'a> Replacer for Cow<'a, [u8]> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(self.as_ref(), dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<'a> Replacer for &'a Cow<'a, [u8]> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + caps.expand(self.as_ref(), dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + no_expansion(self) + } +} + +impl<F, T> Replacer for F +where + F: FnMut(&Captures<'_>) -> T, + T: AsRef<[u8]>, +{ + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + dst.extend_from_slice((*self)(caps).as_ref()); + } +} + +/// A by-reference adaptor for a [`Replacer`]. +/// +/// This permits reusing the same `Replacer` value in multiple calls to a +/// replacement routine like [`Regex::replace_all`]. +/// +/// This type is created by [`Replacer::by_ref`]. +#[derive(Debug)] +pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); + +impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { + self.0.replace_append(caps, dst) + } + + fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> { + self.0.no_expansion() + } +} + +/// A helper type for forcing literal string replacement. +/// +/// It can be used with routines like [`Regex::replace`] and +/// [`Regex::replace_all`] to do a literal string replacement without expanding +/// `$name` to their corresponding capture groups. This can be both convenient +/// (to avoid escaping `$`, for example) and faster (since capture groups +/// don't need to be found). +/// +/// `'s` is the lifetime of the literal string to use. +/// +/// # Example +/// +/// ``` +/// use regex::bytes::{NoExpand, Regex}; +/// +/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap(); +/// let result = re.replace(b"Springsteen, Bruce", NoExpand(b"$2 $last")); +/// assert_eq!(result, &b"$2 $last"[..]); +/// ``` +#[derive(Clone, Debug)] +pub struct NoExpand<'s>(pub &'s [u8]); + +impl<'s> Replacer for NoExpand<'s> { + fn replace_append(&mut self, _: &Captures<'_>, dst: &mut Vec<u8>) { + dst.extend_from_slice(self.0); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { + Some(Cow::Borrowed(self.0)) + } +} + +/// Quickly checks the given replacement string for whether interpolation +/// should be done on it. It returns `None` if a `$` was found anywhere in the +/// given string, which suggests interpolation needs to be done. But if there's +/// no `$` anywhere, then interpolation definitely does not need to be done. In +/// that case, the given string is returned as a borrowed `Cow`. +/// +/// This is meant to be used to implement the `Replacer::no_expandsion` method +/// in its various trait impls. +fn no_expansion<T: AsRef<[u8]>>(replacement: &T) -> Option<Cow<'_, [u8]>> { + let replacement = replacement.as_ref(); + match crate::find_byte::find_byte(b'$', replacement) { + Some(_) => None, + None => Some(Cow::Borrowed(replacement)), + } +} diff --git a/vendor/regex/src/regex/mod.rs b/vendor/regex/src/regex/mod.rs new file mode 100644 index 0000000..93fadec --- /dev/null +++ b/vendor/regex/src/regex/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod bytes; +pub(crate) mod string; diff --git a/vendor/regex/src/regex/string.rs b/vendor/regex/src/regex/string.rs new file mode 100644 index 0000000..824f45c --- /dev/null +++ b/vendor/regex/src/regex/string.rs @@ -0,0 +1,2587 @@ +use alloc::{borrow::Cow, string::String, sync::Arc}; + +use regex_automata::{meta, util::captures, Input, PatternID}; + +use crate::{error::Error, RegexBuilder}; + +/// A compiled regular expression for searching Unicode haystacks. +/// +/// A `Regex` can be used to search haystacks, split haystacks into substrings +/// or replace substrings in a haystack with a different substring. All +/// searching is done with an implicit `(?s:.)*?` at the beginning and end of +/// an pattern. To force an expression to match the whole string (or a prefix +/// or a suffix), you must use an anchor like `^` or `$` (or `\A` and `\z`). +/// +/// While this crate will handle Unicode strings (whether in the regular +/// expression or in the haystack), all positions returned are **byte +/// offsets**. Every byte offset is guaranteed to be at a Unicode code point +/// boundary. That is, all offsets returned by the `Regex` API are guaranteed +/// to be ranges that can slice a `&str` without panicking. If you want to +/// relax this requirement, then you must search `&[u8]` haystacks with a +/// [`bytes::Regex`](crate::bytes::Regex). +/// +/// The only methods that allocate new strings are the string replacement +/// methods. All other methods (searching and splitting) return borrowed +/// references into the haystack given. +/// +/// # Example +/// +/// Find the offsets of a US phone number: +/// +/// ``` +/// use regex::Regex; +/// +/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap(); +/// let m = re.find("phone: 111-222-3333").unwrap(); +/// assert_eq!(7..19, m.range()); +/// ``` +/// +/// # Example: extracting capture groups +/// +/// A common way to use regexes is with capture groups. That is, instead of +/// just looking for matches of an entire regex, parentheses are used to create +/// groups that represent part of the match. +/// +/// For example, consider a haystack with multiple lines, and each line has +/// three whitespace delimited fields where the second field is expected to be +/// a number and the third field a boolean. To make this convenient, we use +/// the [`Captures::extract`] API to put the strings that match each group +/// into a fixed size array: +/// +/// ``` +/// use regex::Regex; +/// +/// let hay = " +/// rabbit 54 true +/// groundhog 2 true +/// does not match +/// fox 109 false +/// "; +/// let re = Regex::new(r"(?m)^\s*(\S+)\s+([0-9]+)\s+(true|false)\s*$").unwrap(); +/// let mut fields: Vec<(&str, i64, bool)> = vec![]; +/// for (_, [f1, f2, f3]) in re.captures_iter(hay).map(|caps| caps.extract()) { +/// fields.push((f1, f2.parse()?, f3.parse()?)); +/// } +/// assert_eq!(fields, vec![ +/// ("rabbit", 54, true), +/// ("groundhog", 2, true), +/// ("fox", 109, false), +/// ]); +/// +/// # Ok::<(), Box<dyn std::error::Error>>(()) +/// ``` +/// +/// # Example: searching with the `Pattern` trait +/// +/// **Note**: This section requires that this crate is compiled with the +/// `pattern` Cargo feature enabled, which **requires nightly Rust**. +/// +/// Since `Regex` implements `Pattern` from the standard library, one can +/// use regexes with methods defined on `&str`. For example, `is_match`, +/// `find`, `find_iter` and `split` can, in some cases, be replaced with +/// `str::contains`, `str::find`, `str::match_indices` and `str::split`. +/// +/// Here are some examples: +/// +/// ```ignore +/// use regex::Regex; +/// +/// let re = Regex::new(r"\d+").unwrap(); +/// let hay = "a111b222c"; +/// +/// assert!(hay.contains(&re)); +/// assert_eq!(hay.find(&re), Some(1)); +/// assert_eq!(hay.match_indices(&re).collect::<Vec<_>>(), vec![ +/// (1, "111"), +/// (5, "222"), +/// ]); +/// assert_eq!(hay.split(&re).collect::<Vec<_>>(), vec!["a", "b", "c"]); +/// ``` +#[derive(Clone)] +pub struct Regex { + pub(crate) meta: meta::Regex, + pub(crate) pattern: Arc<str>, +} + +impl core::fmt::Display for Regex { + /// Shows the original regular expression. + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl core::fmt::Debug for Regex { + /// Shows the original regular expression. + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_tuple("Regex").field(&self.as_str()).finish() + } +} + +impl core::str::FromStr for Regex { + type Err = Error; + + /// Attempts to parse a string into a regular expression + fn from_str(s: &str) -> Result<Regex, Error> { + Regex::new(s) + } +} + +impl TryFrom<&str> for Regex { + type Error = Error; + + /// Attempts to parse a string into a regular expression + fn try_from(s: &str) -> Result<Regex, Error> { + Regex::new(s) + } +} + +impl TryFrom<String> for Regex { + type Error = Error; + + /// Attempts to parse a string into a regular expression + fn try_from(s: String) -> Result<Regex, Error> { + Regex::new(&s) + } +} + +/// Core regular expression methods. +impl Regex { + /// Compiles a regular expression. Once compiled, it can be used repeatedly + /// to search, split or replace substrings in a haystack. + /// + /// Note that regex compilation tends to be a somewhat expensive process, + /// and unlike higher level environments, compilation is not automatically + /// cached for you. One should endeavor to compile a regex once and then + /// reuse it. For example, it's a bad idea to compile the same regex + /// repeatedly in a loop. + /// + /// # Errors + /// + /// If an invalid pattern is given, then an error is returned. + /// An error is also returned if the pattern is valid, but would + /// produce a regex that is bigger than the configured size limit via + /// [`RegexBuilder::size_limit`]. (A reasonable size limit is enabled by + /// default.) + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// // An Invalid pattern because of an unclosed parenthesis + /// assert!(Regex::new(r"foo(bar").is_err()); + /// // An invalid pattern because the regex would be too big + /// // because Unicode tends to inflate things. + /// assert!(Regex::new(r"\w{1000}").is_err()); + /// // Disabling Unicode can make the regex much smaller, + /// // potentially by up to or more than an order of magnitude. + /// assert!(Regex::new(r"(?-u:\w){1000}").is_ok()); + /// ``` + pub fn new(re: &str) -> Result<Regex, Error> { + RegexBuilder::new(re).build() + } + + /// Returns true if and only if there is a match for the regex anywhere + /// in the haystack given. + /// + /// It is recommended to use this method if all you need to do is test + /// whether a match exists, since the underlying matching engine may be + /// able to do less work. + /// + /// # Example + /// + /// Test if some haystack contains at least one word with exactly 13 + /// Unicode word characters: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = "I categorically deny having triskaidekaphobia."; + /// assert!(re.is_match(hay)); + /// ``` + #[inline] + pub fn is_match(&self, haystack: &str) -> bool { + self.is_match_at(haystack, 0) + } + + /// This routine searches for the first match of this regex in the + /// haystack given, and if found, returns a [`Match`]. The `Match` + /// provides access to both the byte offsets of the match and the actual + /// substring that matched. + /// + /// Note that this should only be used if you want to find the entire + /// match. If instead you just want to test the existence of a match, + /// it's potentially faster to use `Regex::is_match(hay)` instead of + /// `Regex::find(hay).is_some()`. + /// + /// # Example + /// + /// Find the first word with exactly 13 Unicode word characters: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = "I categorically deny having triskaidekaphobia."; + /// let mat = re.find(hay).unwrap(); + /// assert_eq!(2..15, mat.range()); + /// assert_eq!("categorically", mat.as_str()); + /// ``` + #[inline] + pub fn find<'h>(&self, haystack: &'h str) -> Option<Match<'h>> { + self.find_at(haystack, 0) + } + + /// Returns an iterator that yields successive non-overlapping matches in + /// the given haystack. The iterator yields values of type [`Match`]. + /// + /// # Time complexity + /// + /// Note that since `find_iter` runs potentially many searches on the + /// haystack and since each search has worst case `O(m * n)` time + /// complexity, the overall worst case time complexity for iteration is + /// `O(m * n^2)`. + /// + /// # Example + /// + /// Find every word with exactly 13 Unicode word characters: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\b\w{13}\b").unwrap(); + /// let hay = "Retroactively relinquishing remunerations is reprehensible."; + /// let matches: Vec<_> = re.find_iter(hay).map(|m| m.as_str()).collect(); + /// assert_eq!(matches, vec![ + /// "Retroactively", + /// "relinquishing", + /// "remunerations", + /// "reprehensible", + /// ]); + /// ``` + #[inline] + pub fn find_iter<'r, 'h>(&'r self, haystack: &'h str) -> Matches<'r, 'h> { + Matches { haystack, it: self.meta.find_iter(haystack) } + } + + /// This routine searches for the first match of this regex in the haystack + /// given, and if found, returns not only the overall match but also the + /// matches of each capture group in the regex. If no match is found, then + /// `None` is returned. + /// + /// Capture group `0` always corresponds to an implicit unnamed group that + /// includes the entire match. If a match is found, this group is always + /// present. Subsequent groups may be named and are numbered, starting + /// at 1, by the order in which the opening parenthesis appears in the + /// pattern. For example, in the pattern `(?<a>.(?<b>.))(?<c>.)`, `a`, + /// `b` and `c` correspond to capture group indices `1`, `2` and `3`, + /// respectively. + /// + /// You should only use `captures` if you need access to the capture group + /// matches. Otherwise, [`Regex::find`] is generally faster for discovering + /// just the overall match. + /// + /// # Example + /// + /// Say you have some haystack with movie names and their release years, + /// like "'Citizen Kane' (1941)". It'd be nice if we could search for + /// substrings looking like that, while also extracting the movie name and + /// its release year separately. The example below shows how to do that. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); + /// let hay = "Not my favorite movie: 'Citizen Kane' (1941)."; + /// let caps = re.captures(hay).unwrap(); + /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)"); + /// assert_eq!(caps.get(1).unwrap().as_str(), "Citizen Kane"); + /// assert_eq!(caps.get(2).unwrap().as_str(), "1941"); + /// // You can also access the groups by index using the Index notation. + /// // Note that this will panic on an invalid index. In this case, these + /// // accesses are always correct because the overall regex will only + /// // match when these capture groups match. + /// assert_eq!(&caps[0], "'Citizen Kane' (1941)"); + /// assert_eq!(&caps[1], "Citizen Kane"); + /// assert_eq!(&caps[2], "1941"); + /// ``` + /// + /// Note that the full match is at capture group `0`. Each subsequent + /// capture group is indexed by the order of its opening `(`. + /// + /// We can make this example a bit clearer by using *named* capture groups: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"'(?<title>[^']+)'\s+\((?<year>\d{4})\)").unwrap(); + /// let hay = "Not my favorite movie: 'Citizen Kane' (1941)."; + /// let caps = re.captures(hay).unwrap(); + /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)"); + /// assert_eq!(caps.name("title").unwrap().as_str(), "Citizen Kane"); + /// assert_eq!(caps.name("year").unwrap().as_str(), "1941"); + /// // You can also access the groups by name using the Index notation. + /// // Note that this will panic on an invalid group name. In this case, + /// // these accesses are always correct because the overall regex will + /// // only match when these capture groups match. + /// assert_eq!(&caps[0], "'Citizen Kane' (1941)"); + /// assert_eq!(&caps["title"], "Citizen Kane"); + /// assert_eq!(&caps["year"], "1941"); + /// ``` + /// + /// Here we name the capture groups, which we can access with the `name` + /// method or the `Index` notation with a `&str`. Note that the named + /// capture groups are still accessible with `get` or the `Index` notation + /// with a `usize`. + /// + /// The `0`th capture group is always unnamed, so it must always be + /// accessed with `get(0)` or `[0]`. + /// + /// Finally, one other way to to get the matched substrings is with the + /// [`Captures::extract`] API: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); + /// let hay = "Not my favorite movie: 'Citizen Kane' (1941)."; + /// let (full, [title, year]) = re.captures(hay).unwrap().extract(); + /// assert_eq!(full, "'Citizen Kane' (1941)"); + /// assert_eq!(title, "Citizen Kane"); + /// assert_eq!(year, "1941"); + /// ``` + #[inline] + pub fn captures<'h>(&self, haystack: &'h str) -> Option<Captures<'h>> { + self.captures_at(haystack, 0) + } + + /// Returns an iterator that yields successive non-overlapping matches in + /// the given haystack. The iterator yields values of type [`Captures`]. + /// + /// This is the same as [`Regex::find_iter`], but instead of only providing + /// access to the overall match, each value yield includes access to the + /// matches of all capture groups in the regex. Reporting this extra match + /// data is potentially costly, so callers should only use `captures_iter` + /// over `find_iter` when they actually need access to the capture group + /// matches. + /// + /// # Time complexity + /// + /// Note that since `captures_iter` runs potentially many searches on the + /// haystack and since each search has worst case `O(m * n)` time + /// complexity, the overall worst case time complexity for iteration is + /// `O(m * n^2)`. + /// + /// # Example + /// + /// We can use this to find all movie titles and their release years in + /// some haystack, where the movie is formatted like "'Title' (xxxx)": + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"'([^']+)'\s+\(([0-9]{4})\)").unwrap(); + /// let hay = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; + /// let mut movies = vec![]; + /// for (_, [title, year]) in re.captures_iter(hay).map(|c| c.extract()) { + /// movies.push((title, year.parse::<i64>()?)); + /// } + /// assert_eq!(movies, vec![ + /// ("Citizen Kane", 1941), + /// ("The Wizard of Oz", 1939), + /// ("M", 1931), + /// ]); + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + /// + /// Or with named groups: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"'(?<title>[^']+)'\s+\((?<year>[0-9]{4})\)").unwrap(); + /// let hay = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; + /// let mut it = re.captures_iter(hay); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], "Citizen Kane"); + /// assert_eq!(&caps["year"], "1941"); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], "The Wizard of Oz"); + /// assert_eq!(&caps["year"], "1939"); + /// + /// let caps = it.next().unwrap(); + /// assert_eq!(&caps["title"], "M"); + /// assert_eq!(&caps["year"], "1931"); + /// ``` + #[inline] + pub fn captures_iter<'r, 'h>( + &'r self, + haystack: &'h str, + ) -> CaptureMatches<'r, 'h> { + CaptureMatches { haystack, it: self.meta.captures_iter(haystack) } + } + + /// Returns an iterator of substrings of the haystack given, delimited by a + /// match of the regex. Namely, each element of the iterator corresponds to + /// a part of the haystack that *isn't* matched by the regular expression. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// # Example + /// + /// To split a string delimited by arbitrary amounts of spaces or tabs: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"[ \t]+").unwrap(); + /// let hay = "a b \t c\td e"; + /// let fields: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]); + /// ``` + /// + /// # Example: more cases + /// + /// Basic usage: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = "Mary had a little lamb"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["Mary", "had", "a", "little", "lamb"]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = ""; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec![""]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "lionXXtigerXleopard"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["lion", "", "tiger", "leopard"]); + /// + /// let re = Regex::new(r"::").unwrap(); + /// let hay = "lion::tiger::leopard"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["lion", "tiger", "leopard"]); + /// ``` + /// + /// If a haystack contains multiple contiguous matches, you will end up + /// with empty spans yielded by the iterator: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "XXXXaXXbXc"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]); + /// + /// let re = Regex::new(r"/").unwrap(); + /// let hay = "(///)"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["(", "", "", ")"]); + /// ``` + /// + /// Separators at the start or end of a haystack are neighbored by empty + /// substring. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"0").unwrap(); + /// let hay = "010"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["", "1", ""]); + /// ``` + /// + /// When the empty string is used as a regex, it splits at every valid + /// UTF-8 boundary by default (which includes the beginning and end of the + /// haystack): + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let hay = "rust"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["", "r", "u", "s", "t", ""]); + /// + /// // Splitting by an empty string is UTF-8 aware by default! + /// let re = Regex::new(r"").unwrap(); + /// let hay = "☃"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["", "☃", ""]); + /// ``` + /// + /// Contiguous separators (commonly shows up with whitespace), can lead to + /// possibly surprising behavior. For example, this code is correct: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = " a b c"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// assert_eq!(got, vec!["", "", "", "", "a", "", "b", "c"]); + /// ``` + /// + /// It does *not* give you `["a", "b", "c"]`. For that behavior, you'd want + /// to match contiguous space characters: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r" +").unwrap(); + /// let hay = " a b c"; + /// let got: Vec<&str> = re.split(hay).collect(); + /// // N.B. This does still include a leading empty span because ' +' + /// // matches at the beginning of the haystack. + /// assert_eq!(got, vec!["", "a", "b", "c"]); + /// ``` + #[inline] + pub fn split<'r, 'h>(&'r self, haystack: &'h str) -> Split<'r, 'h> { + Split { haystack, it: self.meta.split(haystack) } + } + + /// Returns an iterator of at most `limit` substrings of the haystack + /// given, delimited by a match of the regex. (A `limit` of `0` will return + /// no substrings.) Namely, each element of the iterator corresponds to a + /// part of the haystack that *isn't* matched by the regular expression. + /// The remainder of the haystack that is not split will be the last + /// element in the iterator. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// Although note that the worst case time here has an upper bound given + /// by the `limit` parameter. + /// + /// # Example + /// + /// Get the first two words in some haystack: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\W+").unwrap(); + /// let hay = "Hey! How are you?"; + /// let fields: Vec<&str> = re.splitn(hay, 3).collect(); + /// assert_eq!(fields, vec!["Hey", "How", "are you?"]); + /// ``` + /// + /// # Examples: more cases + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r" ").unwrap(); + /// let hay = "Mary had a little lamb"; + /// let got: Vec<&str> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec!["Mary", "had", "a little lamb"]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = ""; + /// let got: Vec<&str> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec![""]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "lionXXtigerXleopard"; + /// let got: Vec<&str> = re.splitn(hay, 3).collect(); + /// assert_eq!(got, vec!["lion", "", "tigerXleopard"]); + /// + /// let re = Regex::new(r"::").unwrap(); + /// let hay = "lion::tiger::leopard"; + /// let got: Vec<&str> = re.splitn(hay, 2).collect(); + /// assert_eq!(got, vec!["lion", "tiger::leopard"]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "abcXdef"; + /// let got: Vec<&str> = re.splitn(hay, 1).collect(); + /// assert_eq!(got, vec!["abcXdef"]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "abcdef"; + /// let got: Vec<&str> = re.splitn(hay, 2).collect(); + /// assert_eq!(got, vec!["abcdef"]); + /// + /// let re = Regex::new(r"X").unwrap(); + /// let hay = "abcXdef"; + /// let got: Vec<&str> = re.splitn(hay, 0).collect(); + /// assert!(got.is_empty()); + /// ``` + #[inline] + pub fn splitn<'r, 'h>( + &'r self, + haystack: &'h str, + limit: usize, + ) -> SplitN<'r, 'h> { + SplitN { haystack, it: self.meta.splitn(haystack, limit) } + } + + /// Replaces the leftmost-first match in the given haystack with the + /// replacement provided. The replacement can be a regular string (where + /// `$N` and `$name` are expanded to match capture groups) or a function + /// that takes a [`Captures`] and returns the replaced string. + /// + /// If no match is found, then the haystack is returned unchanged. In that + /// case, this implementation will likely return a `Cow::Borrowed` value + /// such that no allocation is performed. + /// + /// # Replacement string syntax + /// + /// All instances of `$ref` in the replacement string are replaced with + /// the substring corresponding to the capture group identified by `ref`. + /// + /// `ref` may be an integer corresponding to the index of the capture group + /// (counted by order of opening parenthesis where `0` is the entire match) + /// or it can be a name (consisting of letters, digits or underscores) + /// corresponding to a named capture group. + /// + /// If `ref` isn't a valid capture group (whether the name doesn't exist or + /// isn't a valid index), then it is replaced with the empty string. + /// + /// The longest possible name is used. For example, `$1a` looks up the + /// capture group named `1a` and not the capture group at index `1`. To + /// exert more precise control over the name, use braces, e.g., `${1}a`. + /// + /// To write a literal `$` use `$$`. + /// + /// # Example + /// + /// Note that this function is polymorphic with respect to the replacement. + /// In typical usage, this can just be a normal string: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"[^01]+").unwrap(); + /// assert_eq!(re.replace("1078910", ""), "1010"); + /// ``` + /// + /// But anything satisfying the [`Replacer`] trait will work. For example, + /// a closure of type `|&Captures| -> String` provides direct access to the + /// captures corresponding to a match. This allows one to access capturing + /// group matches easily: + /// + /// ``` + /// use regex::{Captures, Regex}; + /// + /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); + /// let result = re.replace("Springsteen, Bruce", |caps: &Captures| { + /// format!("{} {}", &caps[2], &caps[1]) + /// }); + /// assert_eq!(result, "Bruce Springsteen"); + /// ``` + /// + /// But this is a bit cumbersome to use all the time. Instead, a simple + /// syntax is supported (as described above) that expands `$name` into the + /// corresponding capture group. Here's the last example, but using this + /// expansion technique with named capture groups: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap(); + /// let result = re.replace("Springsteen, Bruce", "$first $last"); + /// assert_eq!(result, "Bruce Springsteen"); + /// ``` + /// + /// Note that using `$2` instead of `$first` or `$1` instead of `$last` + /// would produce the same result. To write a literal `$` use `$$`. + /// + /// Sometimes the replacement string requires use of curly braces to + /// delineate a capture group replacement when it is adjacent to some other + /// literal text. For example, if we wanted to join two words together with + /// an underscore: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<second>\w+)").unwrap(); + /// let result = re.replace("deep fried", "${first}_$second"); + /// assert_eq!(result, "deep_fried"); + /// ``` + /// + /// Without the curly braces, the capture group name `first_` would be + /// used, and since it doesn't exist, it would be replaced with the empty + /// string. + /// + /// Finally, sometimes you just want to replace a literal string with no + /// regard for capturing group expansion. This can be done by wrapping a + /// string with [`NoExpand`]: + /// + /// ``` + /// use regex::{NoExpand, Regex}; + /// + /// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap(); + /// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last")); + /// assert_eq!(result, "$2 $last"); + /// ``` + /// + /// Using `NoExpand` may also be faster, since the replacement string won't + /// need to be parsed for the `$` syntax. + #[inline] + pub fn replace<'h, R: Replacer>( + &self, + haystack: &'h str, + rep: R, + ) -> Cow<'h, str> { + self.replacen(haystack, 1, rep) + } + + /// Replaces all non-overlapping matches in the haystack with the + /// replacement provided. This is the same as calling `replacen` with + /// `limit` set to `0`. + /// + /// The documentation for [`Regex::replace`] goes into more detail about + /// what kinds of replacement strings are supported. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// # Fallibility + /// + /// If you need to write a replacement routine where any individual + /// replacement might "fail," doing so with this API isn't really feasible + /// because there's no way to stop the search process if a replacement + /// fails. Instead, if you need this functionality, you should consider + /// implementing your own replacement routine: + /// + /// ``` + /// use regex::{Captures, Regex}; + /// + /// fn replace_all<E>( + /// re: &Regex, + /// haystack: &str, + /// replacement: impl Fn(&Captures) -> Result<String, E>, + /// ) -> Result<String, E> { + /// let mut new = String::with_capacity(haystack.len()); + /// let mut last_match = 0; + /// for caps in re.captures_iter(haystack) { + /// let m = caps.get(0).unwrap(); + /// new.push_str(&haystack[last_match..m.start()]); + /// new.push_str(&replacement(&caps)?); + /// last_match = m.end(); + /// } + /// new.push_str(&haystack[last_match..]); + /// Ok(new) + /// } + /// + /// // Let's replace each word with the number of bytes in that word. + /// // But if we see a word that is "too long," we'll give up. + /// let re = Regex::new(r"\w+").unwrap(); + /// let replacement = |caps: &Captures| -> Result<String, &'static str> { + /// if caps[0].len() >= 5 { + /// return Err("word too long"); + /// } + /// Ok(caps[0].len().to_string()) + /// }; + /// assert_eq!( + /// Ok("2 3 3 3?".to_string()), + /// replace_all(&re, "hi how are you?", &replacement), + /// ); + /// assert!(replace_all(&re, "hi there", &replacement).is_err()); + /// ``` + /// + /// # Example + /// + /// This example shows how to flip the order of whitespace (excluding line + /// terminators) delimited fields, and normalizes the whitespace that + /// delimits the fields: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?m)^(\S+)[\s--\r\n]+(\S+)$").unwrap(); + /// let hay = " + /// Greetings 1973 + /// Wild\t1973 + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "; + /// let new = re.replace_all(hay, "$2 $1"); + /// assert_eq!(new, " + /// 1973 Greetings + /// 1973 Wild + /// 1975 BornToRun + /// 1978 Darkness + /// 1980 TheRiver + /// "); + /// ``` + #[inline] + pub fn replace_all<'h, R: Replacer>( + &self, + haystack: &'h str, + rep: R, + ) -> Cow<'h, str> { + self.replacen(haystack, 0, rep) + } + + /// Replaces at most `limit` non-overlapping matches in the haystack with + /// the replacement provided. If `limit` is `0`, then all non-overlapping + /// matches are replaced. That is, `Regex::replace_all(hay, rep)` is + /// equivalent to `Regex::replacen(hay, 0, rep)`. + /// + /// The documentation for [`Regex::replace`] goes into more detail about + /// what kinds of replacement strings are supported. + /// + /// # Time complexity + /// + /// Since iterators over all matches requires running potentially many + /// searches on the haystack, and since each search has worst case + /// `O(m * n)` time complexity, the overall worst case time complexity for + /// this routine is `O(m * n^2)`. + /// + /// Although note that the worst case time here has an upper bound given + /// by the `limit` parameter. + /// + /// # Fallibility + /// + /// See the corresponding section in the docs for [`Regex::replace_all`] + /// for tips on how to deal with a replacement routine that can fail. + /// + /// # Example + /// + /// This example shows how to flip the order of whitespace (excluding line + /// terminators) delimited fields, and normalizes the whitespace that + /// delimits the fields. But we only do it for the first two matches. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?m)^(\S+)[\s--\r\n]+(\S+)$").unwrap(); + /// let hay = " + /// Greetings 1973 + /// Wild\t1973 + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "; + /// let new = re.replacen(hay, 2, "$2 $1"); + /// assert_eq!(new, " + /// 1973 Greetings + /// 1973 Wild + /// BornToRun\t\t\t\t1975 + /// Darkness 1978 + /// TheRiver 1980 + /// "); + /// ``` + #[inline] + pub fn replacen<'h, R: Replacer>( + &self, + haystack: &'h str, + limit: usize, + mut rep: R, + ) -> Cow<'h, str> { + // If we know that the replacement doesn't have any capture expansions, + // then we can use the fast path. The fast path can make a tremendous + // difference: + // + // 1) We use `find_iter` instead of `captures_iter`. Not asking for + // captures generally makes the regex engines faster. + // 2) We don't need to look up all of the capture groups and do + // replacements inside the replacement string. We just push it + // at each match and be done with it. + if let Some(rep) = rep.no_expansion() { + let mut it = self.find_iter(haystack).enumerate().peekable(); + if it.peek().is_none() { + return Cow::Borrowed(haystack); + } + let mut new = String::with_capacity(haystack.len()); + let mut last_match = 0; + for (i, m) in it { + new.push_str(&haystack[last_match..m.start()]); + new.push_str(&rep); + last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } + } + new.push_str(&haystack[last_match..]); + return Cow::Owned(new); + } + + // The slower path, which we use if the replacement may need access to + // capture groups. + let mut it = self.captures_iter(haystack).enumerate().peekable(); + if it.peek().is_none() { + return Cow::Borrowed(haystack); + } + let mut new = String::with_capacity(haystack.len()); + let mut last_match = 0; + for (i, cap) in it { + // unwrap on 0 is OK because captures only reports matches + let m = cap.get(0).unwrap(); + new.push_str(&haystack[last_match..m.start()]); + rep.replace_append(&cap, &mut new); + last_match = m.end(); + if limit > 0 && i >= limit - 1 { + break; + } + } + new.push_str(&haystack[last_match..]); + Cow::Owned(new) + } +} + +/// A group of advanced or "lower level" search methods. Some methods permit +/// starting the search at a position greater than `0` in the haystack. Other +/// methods permit reusing allocations, for example, when extracting the +/// matches for capture groups. +impl Regex { + /// Returns the end byte offset of the first match in the haystack given. + /// + /// This method may have the same performance characteristics as + /// `is_match`. Behaviorlly, it doesn't just report whether it match + /// occurs, but also the end offset for a match. In particular, the offset + /// returned *may be shorter* than the proper end of the leftmost-first + /// match that you would find via [`Regex::find`]. + /// + /// Note that it is not guaranteed that this routine finds the shortest or + /// "earliest" possible match. Instead, the main idea of this API is that + /// it returns the offset at the point at which the internal regex engine + /// has determined that a match has occurred. This may vary depending on + /// which internal regex engine is used, and thus, the offset itself may + /// change based on internal heuristics. + /// + /// # Example + /// + /// Typically, `a+` would match the entire first sequence of `a` in some + /// haystack, but `shortest_match` *may* give up as soon as it sees the + /// first `a`. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"a+").unwrap(); + /// let offset = re.shortest_match("aaaaa").unwrap(); + /// assert_eq!(offset, 1); + /// ``` + #[inline] + pub fn shortest_match(&self, haystack: &str) -> Option<usize> { + self.shortest_match_at(haystack, 0) + } + + /// Returns the same as [`Regex::shortest_match`], but starts the search at + /// the given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only match + /// when `start == 0`. + /// + /// If a match is found, the offset returned is relative to the beginning + /// of the haystack, not the beginning of the search. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = "eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(re.shortest_match(&hay[2..]), Some(4)); + /// // No match because the assertions take the context into account. + /// assert_eq!(re.shortest_match_at(hay, 2), None); + /// ``` + #[inline] + pub fn shortest_match_at( + &self, + haystack: &str, + start: usize, + ) -> Option<usize> { + let input = + Input::new(haystack).earliest(true).span(start..haystack.len()); + self.meta.search_half(&input).map(|hm| hm.offset()) + } + + /// Returns the same as [`Regex::is_match`], but starts the search at the + /// given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = "eschew"; + /// // We get a match here, but it's probably not intended. + /// assert!(re.is_match(&hay[2..])); + /// // No match because the assertions take the context into account. + /// assert!(!re.is_match_at(hay, 2)); + /// ``` + #[inline] + pub fn is_match_at(&self, haystack: &str, start: usize) -> bool { + let input = + Input::new(haystack).earliest(true).span(start..haystack.len()); + self.meta.search_half(&input).is_some() + } + + /// Returns the same as [`Regex::find`], but starts the search at the given + /// offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = "eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(re.find(&hay[2..]).map(|m| m.range()), Some(0..4)); + /// // No match because the assertions take the context into account. + /// assert_eq!(re.find_at(hay, 2), None); + /// ``` + #[inline] + pub fn find_at<'h>( + &self, + haystack: &'h str, + start: usize, + ) -> Option<Match<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + self.meta + .search(&input) + .map(|m| Match::new(haystack, m.start(), m.end())) + } + + /// Returns the same as [`Regex::captures`], but starts the search at the + /// given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = "eschew"; + /// // We get a match here, but it's probably not intended. + /// assert_eq!(&re.captures(&hay[2..]).unwrap()[0], "chew"); + /// // No match because the assertions take the context into account. + /// assert!(re.captures_at(hay, 2).is_none()); + /// ``` + #[inline] + pub fn captures_at<'h>( + &self, + haystack: &'h str, + start: usize, + ) -> Option<Captures<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + let mut caps = self.meta.create_captures(); + self.meta.search_captures(&input, &mut caps); + if caps.is_match() { + let static_captures_len = self.static_captures_len(); + Some(Captures { haystack, caps, static_captures_len }) + } else { + None + } + } + + /// This is like [`Regex::captures`], but writes the byte offsets of each + /// capture group match into the locations given. + /// + /// A [`CaptureLocations`] stores the same byte offsets as a [`Captures`], + /// but does *not* store a reference to the haystack. This makes its API + /// a bit lower level and less convenient. But in exchange, callers + /// may allocate their own `CaptureLocations` and reuse it for multiple + /// searches. This may be helpful if allocating a `Captures` shows up in a + /// profile as too costly. + /// + /// To create a `CaptureLocations` value, use the + /// [`Regex::capture_locations`] method. + /// + /// This also returns the overall match if one was found. When a match is + /// found, its offsets are also always stored in `locs` at index `0`. + /// + /// # Panics + /// + /// This routine may panic if the given `CaptureLocations` was not created + /// by this regex. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"^([a-z]+)=(\S*)$").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert!(re.captures_read(&mut locs, "id=foo123").is_some()); + /// assert_eq!(Some((0, 9)), locs.get(0)); + /// assert_eq!(Some((0, 2)), locs.get(1)); + /// assert_eq!(Some((3, 9)), locs.get(2)); + /// ``` + #[inline] + pub fn captures_read<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h str, + ) -> Option<Match<'h>> { + self.captures_read_at(locs, haystack, 0) + } + + /// Returns the same as [`Regex::captures_read`], but starts the search at + /// the given offset. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// This routine may also panic if the given `CaptureLocations` was not + /// created by this regex. + /// + /// # Example + /// + /// This example shows the significance of `start` by demonstrating how it + /// can be used to permit look-around assertions in a regex to take the + /// surrounding context into account. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"\bchew\b").unwrap(); + /// let hay = "eschew"; + /// let mut locs = re.capture_locations(); + /// // We get a match here, but it's probably not intended. + /// assert!(re.captures_read(&mut locs, &hay[2..]).is_some()); + /// // No match because the assertions take the context into account. + /// assert!(re.captures_read_at(&mut locs, hay, 2).is_none()); + /// ``` + #[inline] + pub fn captures_read_at<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h str, + start: usize, + ) -> Option<Match<'h>> { + let input = Input::new(haystack).span(start..haystack.len()); + self.meta.search_captures(&input, &mut locs.0); + locs.0.get_match().map(|m| Match::new(haystack, m.start(), m.end())) + } + + /// An undocumented alias for `captures_read_at`. + /// + /// The `regex-capi` crate previously used this routine, so to avoid + /// breaking that crate, we continue to provide the name as an undocumented + /// alias. + #[doc(hidden)] + #[inline] + pub fn read_captures_at<'h>( + &self, + locs: &mut CaptureLocations, + haystack: &'h str, + start: usize, + ) -> Option<Match<'h>> { + self.captures_read_at(locs, haystack, start) + } +} + +/// Auxiliary methods. +impl Regex { + /// Returns the original string of this regex. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"foo\w+bar").unwrap(); + /// assert_eq!(re.as_str(), r"foo\w+bar"); + /// ``` + #[inline] + pub fn as_str(&self) -> &str { + &self.pattern + } + + /// Returns an iterator over the capture names in this regex. + /// + /// The iterator returned yields elements of type `Option<&str>`. That is, + /// the iterator yields values for all capture groups, even ones that are + /// unnamed. The order of the groups corresponds to the order of the group's + /// corresponding opening parenthesis. + /// + /// The first element of the iterator always yields the group corresponding + /// to the overall match, and this group is always unnamed. Therefore, the + /// iterator always yields at least one group. + /// + /// # Example + /// + /// This shows basic usage with a mix of named and unnamed capture groups: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), Some(Some("a"))); + /// assert_eq!(names.next(), Some(Some("b"))); + /// assert_eq!(names.next(), Some(None)); + /// // the '(?:.)' group is non-capturing and so doesn't appear here! + /// assert_eq!(names.next(), Some(Some("c"))); + /// assert_eq!(names.next(), None); + /// ``` + /// + /// The iterator always yields at least one element, even for regexes with + /// no capture groups and even for regexes that can never match: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), None); + /// + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// let mut names = re.capture_names(); + /// assert_eq!(names.next(), Some(None)); + /// assert_eq!(names.next(), None); + /// ``` + #[inline] + pub fn capture_names(&self) -> CaptureNames<'_> { + CaptureNames(self.meta.group_info().pattern_names(PatternID::ZERO)) + } + + /// Returns the number of captures groups in this regex. + /// + /// This includes all named and unnamed groups, including the implicit + /// unnamed group that is always present and corresponds to the entire + /// match. + /// + /// Since the implicit unnamed group is always included in this length, the + /// length returned is guaranteed to be greater than zero. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"foo").unwrap(); + /// assert_eq!(1, re.captures_len()); + /// + /// let re = Regex::new(r"(foo)").unwrap(); + /// assert_eq!(2, re.captures_len()); + /// + /// let re = Regex::new(r"(?<a>.(?<b>.))(.)(?:.)(?<c>.)").unwrap(); + /// assert_eq!(5, re.captures_len()); + /// + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// assert_eq!(1, re.captures_len()); + /// ``` + #[inline] + pub fn captures_len(&self) -> usize { + self.meta.group_info().group_len(PatternID::ZERO) + } + + /// Returns the total number of capturing groups that appear in every + /// possible match. + /// + /// If the number of capture groups can vary depending on the match, then + /// this returns `None`. That is, a value is only returned when the number + /// of matching groups is invariant or "static." + /// + /// Note that like [`Regex::captures_len`], this **does** include the + /// implicit capturing group corresponding to the entire match. Therefore, + /// when a non-None value is returned, it is guaranteed to be at least `1`. + /// Stated differently, a return value of `Some(0)` is impossible. + /// + /// # Example + /// + /// This shows a few cases where a static number of capture groups is + /// available and a few cases where it is not. + /// + /// ``` + /// use regex::Regex; + /// + /// let len = |pattern| { + /// Regex::new(pattern).map(|re| re.static_captures_len()) + /// }; + /// + /// assert_eq!(Some(1), len("a")?); + /// assert_eq!(Some(2), len("(a)")?); + /// assert_eq!(Some(2), len("(a)|(b)")?); + /// assert_eq!(Some(3), len("(a)(b)|(c)(d)")?); + /// assert_eq!(None, len("(a)|b")?); + /// assert_eq!(None, len("a|(b)")?); + /// assert_eq!(None, len("(b)*")?); + /// assert_eq!(Some(2), len("(b)+")?); + /// + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + #[inline] + pub fn static_captures_len(&self) -> Option<usize> { + self.meta.static_captures_len() + } + + /// Returns a fresh allocated set of capture locations that can + /// be reused in multiple calls to [`Regex::captures_read`] or + /// [`Regex::captures_read_at`]. + /// + /// The returned locations can be used for any subsequent search for this + /// particular regex. There is no guarantee that it is correct to use for + /// other regexes, even if they have the same number of capture groups. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(.)(.)(\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert!(re.captures_read(&mut locs, "Padron").is_some()); + /// assert_eq!(locs.get(0), Some((0, 6))); + /// assert_eq!(locs.get(1), Some((0, 1))); + /// assert_eq!(locs.get(2), Some((1, 2))); + /// assert_eq!(locs.get(3), Some((2, 6))); + /// ``` + #[inline] + pub fn capture_locations(&self) -> CaptureLocations { + CaptureLocations(self.meta.create_captures()) + } + + /// An alias for `capture_locations` to preserve backward compatibility. + /// + /// The `regex-capi` crate used this method, so to avoid breaking that + /// crate, we continue to export it as an undocumented API. + #[doc(hidden)] + #[inline] + pub fn locations(&self) -> CaptureLocations { + self.capture_locations() + } +} + +/// Represents a single match of a regex in a haystack. +/// +/// A `Match` contains both the start and end byte offsets of the match and the +/// actual substring corresponding to the range of those byte offsets. It is +/// guaranteed that `start <= end`. When `start == end`, the match is empty. +/// +/// Since this `Match` can only be produced by the top-level `Regex` APIs +/// that only support searching UTF-8 encoded strings, the byte offsets for a +/// `Match` are guaranteed to fall on valid UTF-8 codepoint boundaries. That +/// is, slicing a `&str` with [`Match::range`] is guaranteed to never panic. +/// +/// Values with this type are created by [`Regex::find`] or +/// [`Regex::find_iter`]. Other APIs can create `Match` values too. For +/// example, [`Captures::get`]. +/// +/// The lifetime parameter `'h` refers to the lifetime of the matched of the +/// haystack that this match was produced from. +/// +/// # Numbering +/// +/// The byte offsets in a `Match` form a half-open interval. That is, the +/// start of the range is inclusive and the end of the range is exclusive. +/// For example, given a haystack `abcFOOxyz` and a match of `FOO`, its byte +/// offset range starts at `3` and ends at `6`. `3` corresponds to `F` and +/// `6` corresponds to `x`, which is one past the end of the match. This +/// corresponds to the same kind of slicing that Rust uses. +/// +/// For more on why this was chosen over other schemes (aside from being +/// consistent with how Rust the language works), see [this discussion] and +/// [Dijkstra's note on a related topic][note]. +/// +/// [this discussion]: https://github.com/rust-lang/regex/discussions/866 +/// [note]: https://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html +/// +/// # Example +/// +/// This example shows the value of each of the methods on `Match` for a +/// particular search. +/// +/// ``` +/// use regex::Regex; +/// +/// let re = Regex::new(r"\p{Greek}+").unwrap(); +/// let hay = "Greek: αβγδ"; +/// let m = re.find(hay).unwrap(); +/// assert_eq!(7, m.start()); +/// assert_eq!(15, m.end()); +/// assert!(!m.is_empty()); +/// assert_eq!(8, m.len()); +/// assert_eq!(7..15, m.range()); +/// assert_eq!("αβγδ", m.as_str()); +/// ``` +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct Match<'h> { + haystack: &'h str, + start: usize, + end: usize, +} + +impl<'h> Match<'h> { + /// Returns the byte offset of the start of the match in the haystack. The + /// start of the match corresponds to the position where the match begins + /// and includes the first byte in the match. + /// + /// It is guaranteed that `Match::start() <= Match::end()`. + /// + /// This is guaranteed to fall on a valid UTF-8 codepoint boundary. That + /// is, it will never be an offset that appears between the UTF-8 code + /// units of a UTF-8 encoded Unicode scalar value. Consequently, it is + /// always safe to slice the corresponding haystack using this offset. + #[inline] + pub fn start(&self) -> usize { + self.start + } + + /// Returns the byte offset of the end of the match in the haystack. The + /// end of the match corresponds to the byte immediately following the last + /// byte in the match. This means that `&slice[start..end]` works as one + /// would expect. + /// + /// It is guaranteed that `Match::start() <= Match::end()`. + /// + /// This is guaranteed to fall on a valid UTF-8 codepoint boundary. That + /// is, it will never be an offset that appears between the UTF-8 code + /// units of a UTF-8 encoded Unicode scalar value. Consequently, it is + /// always safe to slice the corresponding haystack using this offset. + #[inline] + pub fn end(&self) -> usize { + self.end + } + + /// Returns true if and only if this match has a length of zero. + /// + /// Note that an empty match can only occur when the regex itself can + /// match the empty string. Here are some examples of regexes that can + /// all match the empty string: `^`, `^$`, `\b`, `a?`, `a*`, `a{0}`, + /// `(foo|\d+|quux)?`. + #[inline] + pub fn is_empty(&self) -> bool { + self.start == self.end + } + + /// Returns the length, in bytes, of this match. + #[inline] + pub fn len(&self) -> usize { + self.end - self.start + } + + /// Returns the range over the starting and ending byte offsets of the + /// match in the haystack. + /// + /// It is always correct to slice the original haystack searched with this + /// range. That is, because the offsets are guaranteed to fall on valid + /// UTF-8 boundaries, the range returned is always valid. + #[inline] + pub fn range(&self) -> core::ops::Range<usize> { + self.start..self.end + } + + /// Returns the substring of the haystack that matched. + #[inline] + pub fn as_str(&self) -> &'h str { + &self.haystack[self.range()] + } + + /// Creates a new match from the given haystack and byte offsets. + #[inline] + fn new(haystack: &'h str, start: usize, end: usize) -> Match<'h> { + Match { haystack, start, end } + } +} + +impl<'h> core::fmt::Debug for Match<'h> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_struct("Match") + .field("start", &self.start) + .field("end", &self.end) + .field("string", &self.as_str()) + .finish() + } +} + +impl<'h> From<Match<'h>> for &'h str { + fn from(m: Match<'h>) -> &'h str { + m.as_str() + } +} + +impl<'h> From<Match<'h>> for core::ops::Range<usize> { + fn from(m: Match<'h>) -> core::ops::Range<usize> { + m.range() + } +} + +/// Represents the capture groups for a single match. +/// +/// Capture groups refer to parts of a regex enclosed in parentheses. They +/// can be optionally named. The purpose of capture groups is to be able to +/// reference different parts of a match based on the original pattern. In +/// essence, a `Captures` is a container of [`Match`] values for each group +/// that participated in a regex match. Each `Match` can be looked up by either +/// its capture group index or name (if it has one). +/// +/// For example, say you want to match the individual letters in a 5-letter +/// word: +/// +/// ```text +/// (?<first>\w)(\w)(?:\w)\w(?<last>\w) +/// ``` +/// +/// This regex has 4 capture groups: +/// +/// * The group at index `0` corresponds to the overall match. It is always +/// present in every match and never has a name. +/// * The group at index `1` with name `first` corresponding to the first +/// letter. +/// * The group at index `2` with no name corresponding to the second letter. +/// * The group at index `3` with name `last` corresponding to the fifth and +/// last letter. +/// +/// Notice that `(?:\w)` was not listed above as a capture group despite it +/// being enclosed in parentheses. That's because `(?:pattern)` is a special +/// syntax that permits grouping but *without* capturing. The reason for not +/// treating it as a capture is that tracking and reporting capture groups +/// requires additional state that may lead to slower searches. So using as few +/// capture groups as possible can help performance. (Although the difference +/// in performance of a couple of capture groups is likely immaterial.) +/// +/// Values with this type are created by [`Regex::captures`] or +/// [`Regex::captures_iter`]. +/// +/// `'h` is the lifetime of the haystack that these captures were matched from. +/// +/// # Example +/// +/// ``` +/// use regex::Regex; +/// +/// let re = Regex::new(r"(?<first>\w)(\w)(?:\w)\w(?<last>\w)").unwrap(); +/// let caps = re.captures("toady").unwrap(); +/// assert_eq!("toady", &caps[0]); +/// assert_eq!("t", &caps["first"]); +/// assert_eq!("o", &caps[2]); +/// assert_eq!("y", &caps["last"]); +/// ``` +pub struct Captures<'h> { + haystack: &'h str, + caps: captures::Captures, + static_captures_len: Option<usize>, +} + +impl<'h> Captures<'h> { + /// Returns the `Match` associated with the capture group at index `i`. If + /// `i` does not correspond to a capture group, or if the capture group did + /// not participate in the match, then `None` is returned. + /// + /// When `i == 0`, this is guaranteed to return a non-`None` value. + /// + /// # Examples + /// + /// Get the substring that matched with a default of an empty string if the + /// group didn't participate in the match: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap(); + /// let caps = re.captures("abc123").unwrap(); + /// + /// let substr1 = caps.get(1).map_or("", |m| m.as_str()); + /// let substr2 = caps.get(2).map_or("", |m| m.as_str()); + /// assert_eq!(substr1, "123"); + /// assert_eq!(substr2, ""); + /// ``` + #[inline] + pub fn get(&self, i: usize) -> Option<Match<'h>> { + self.caps + .get_group(i) + .map(|sp| Match::new(self.haystack, sp.start, sp.end)) + } + + /// Returns the `Match` associated with the capture group named `name`. If + /// `name` isn't a valid capture group or it refers to a group that didn't + /// match, then `None` is returned. + /// + /// Note that unlike `caps["name"]`, this returns a `Match` whose lifetime + /// matches the lifetime of the haystack in this `Captures` value. + /// Conversely, the substring returned by `caps["name"]` has a lifetime + /// of the `Captures` value, which is likely shorter than the lifetime of + /// the haystack. In some cases, it may be necessary to use this method to + /// access the matching substring instead of the `caps["name"]` notation. + /// + /// # Examples + /// + /// Get the substring that matched with a default of an empty string if the + /// group didn't participate in the match: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new( + /// r"[a-z]+(?:(?<numbers>[0-9]+)|(?<letters>[A-Z]+))", + /// ).unwrap(); + /// let caps = re.captures("abc123").unwrap(); + /// + /// let numbers = caps.name("numbers").map_or("", |m| m.as_str()); + /// let letters = caps.name("letters").map_or("", |m| m.as_str()); + /// assert_eq!(numbers, "123"); + /// assert_eq!(letters, ""); + /// ``` + #[inline] + pub fn name(&self, name: &str) -> Option<Match<'h>> { + self.caps + .get_group_by_name(name) + .map(|sp| Match::new(self.haystack, sp.start, sp.end)) + } + + /// This is a convenience routine for extracting the substrings + /// corresponding to matching capture groups. + /// + /// This returns a tuple where the first element corresponds to the full + /// substring of the haystack that matched the regex. The second element is + /// an array of substrings, with each corresponding to the to the substring + /// that matched for a particular capture group. + /// + /// # Panics + /// + /// This panics if the number of possible matching groups in this + /// `Captures` value is not fixed to `N` in all circumstances. + /// More precisely, this routine only works when `N` is equivalent to + /// [`Regex::static_captures_len`]. + /// + /// Stated more plainly, if the number of matching capture groups in a + /// regex can vary from match to match, then this function always panics. + /// + /// For example, `(a)(b)|(c)` could produce two matching capture groups + /// or one matching capture group for any given match. Therefore, one + /// cannot use `extract` with such a pattern. + /// + /// But a pattern like `(a)(b)|(c)(d)` can be used with `extract` because + /// the number of capture groups in every match is always equivalent, + /// even if the capture _indices_ in each match are not. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap(); + /// let hay = "On 2010-03-14, I became a Tenneessee lamb."; + /// let Some((full, [year, month, day])) = + /// re.captures(hay).map(|caps| caps.extract()) else { return }; + /// assert_eq!("2010-03-14", full); + /// assert_eq!("2010", year); + /// assert_eq!("03", month); + /// assert_eq!("14", day); + /// ``` + /// + /// # Example: iteration + /// + /// This example shows how to use this method when iterating over all + /// `Captures` matches in a haystack. + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap(); + /// let hay = "1973-01-05, 1975-08-25 and 1980-10-18"; + /// + /// let mut dates: Vec<(&str, &str, &str)> = vec![]; + /// for (_, [y, m, d]) in re.captures_iter(hay).map(|c| c.extract()) { + /// dates.push((y, m, d)); + /// } + /// assert_eq!(dates, vec![ + /// ("1973", "01", "05"), + /// ("1975", "08", "25"), + /// ("1980", "10", "18"), + /// ]); + /// ``` + /// + /// # Example: parsing different formats + /// + /// This API is particularly useful when you need to extract a particular + /// value that might occur in a different format. Consider, for example, + /// an identifier that might be in double quotes or single quotes: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r#"id:(?:"([^"]+)"|'([^']+)')"#).unwrap(); + /// let hay = r#"The first is id:"foo" and the second is id:'bar'."#; + /// let mut ids = vec![]; + /// for (_, [id]) in re.captures_iter(hay).map(|c| c.extract()) { + /// ids.push(id); + /// } + /// assert_eq!(ids, vec!["foo", "bar"]); + /// ``` + pub fn extract<const N: usize>(&self) -> (&'h str, [&'h str; N]) { + let len = self + .static_captures_len + .expect("number of capture groups can vary in a match") + .checked_sub(1) + .expect("number of groups is always greater than zero"); + assert_eq!(N, len, "asked for {} groups, but must ask for {}", N, len); + // The regex-automata variant of extract is a bit more permissive. + // It doesn't require the number of matching capturing groups to be + // static, and you can even request fewer groups than what's there. So + // this is guaranteed to never panic because we've asserted above that + // the user has requested precisely the number of groups that must be + // present in any match for this regex. + self.caps.extract(self.haystack) + } + + /// Expands all instances of `$ref` in `replacement` to the corresponding + /// capture group, and writes them to the `dst` buffer given. A `ref` can + /// be a capture group index or a name. If `ref` doesn't refer to a capture + /// group that participated in the match, then it is replaced with the + /// empty string. + /// + /// # Format + /// + /// The format of the replacement string supports two different kinds of + /// capture references: unbraced and braced. + /// + /// For the unbraced format, the format supported is `$ref` where `name` + /// can be any character in the class `[0-9A-Za-z_]`. `ref` is always + /// the longest possible parse. So for example, `$1a` corresponds to the + /// capture group named `1a` and not the capture group at index `1`. If + /// `ref` matches `^[0-9]+$`, then it is treated as a capture group index + /// itself and not a name. + /// + /// For the braced format, the format supported is `${ref}` where `ref` can + /// be any sequence of bytes except for `}`. If no closing brace occurs, + /// then it is not considered a capture reference. As with the unbraced + /// format, if `ref` matches `^[0-9]+$`, then it is treated as a capture + /// group index and not a name. + /// + /// The braced format is useful for exerting precise control over the name + /// of the capture reference. For example, `${1}a` corresponds to the + /// capture group reference `1` followed by the letter `a`, where as `$1a` + /// (as mentioned above) corresponds to the capture group reference `1a`. + /// The braced format is also useful for expressing capture group names + /// that use characters not supported by the unbraced format. For example, + /// `${foo[bar].baz}` refers to the capture group named `foo[bar].baz`. + /// + /// If a capture group reference is found and it does not refer to a valid + /// capture group, then it will be replaced with the empty string. + /// + /// To write a literal `$`, use `$$`. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new( + /// r"(?<day>[0-9]{2})-(?<month>[0-9]{2})-(?<year>[0-9]{4})", + /// ).unwrap(); + /// let hay = "On 14-03-2010, I became a Tenneessee lamb."; + /// let caps = re.captures(hay).unwrap(); + /// + /// let mut dst = String::new(); + /// caps.expand("year=$year, month=$month, day=$day", &mut dst); + /// assert_eq!(dst, "year=2010, month=03, day=14"); + /// ``` + #[inline] + pub fn expand(&self, replacement: &str, dst: &mut String) { + self.caps.interpolate_string_into(self.haystack, replacement, dst); + } + + /// Returns an iterator over all capture groups. This includes both + /// matching and non-matching groups. + /// + /// The iterator always yields at least one matching group: the first group + /// (at index `0`) with no name. Subsequent groups are returned in the order + /// of their opening parenthesis in the regex. + /// + /// The elements yielded have type `Option<Match<'h>>`, where a non-`None` + /// value is present if the capture group matches. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap(); + /// let caps = re.captures("AZ").unwrap(); + /// + /// let mut it = caps.iter(); + /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), Some("AZ")); + /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), Some("A")); + /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), None); + /// assert_eq!(it.next().unwrap().map(|m| m.as_str()), Some("Z")); + /// assert_eq!(it.next(), None); + /// ``` + #[inline] + pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 'h> { + SubCaptureMatches { haystack: self.haystack, it: self.caps.iter() } + } + + /// Returns the total number of capture groups. This includes both + /// matching and non-matching groups. + /// + /// The length returned is always equivalent to the number of elements + /// yielded by [`Captures::iter`]. Consequently, the length is always + /// greater than zero since every `Captures` value always includes the + /// match for the entire regex. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(\w)(\d)?(\w)").unwrap(); + /// let caps = re.captures("AZ").unwrap(); + /// assert_eq!(caps.len(), 4); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.caps.group_len() + } +} + +impl<'h> core::fmt::Debug for Captures<'h> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + /// A little helper type to provide a nice map-like debug + /// representation for our capturing group spans. + /// + /// regex-automata has something similar, but it includes the pattern + /// ID in its debug output, which is confusing. It also doesn't include + /// that strings that match because a regex-automata `Captures` doesn't + /// borrow the haystack. + struct CapturesDebugMap<'a> { + caps: &'a Captures<'a>, + } + + impl<'a> core::fmt::Debug for CapturesDebugMap<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let mut map = f.debug_map(); + let names = + self.caps.caps.group_info().pattern_names(PatternID::ZERO); + for (group_index, maybe_name) in names.enumerate() { + let key = Key(group_index, maybe_name); + match self.caps.get(group_index) { + None => map.entry(&key, &None::<()>), + Some(mat) => map.entry(&key, &Value(mat)), + }; + } + map.finish() + } + } + + struct Key<'a>(usize, Option<&'a str>); + + impl<'a> core::fmt::Debug for Key<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{}", self.0)?; + if let Some(name) = self.1 { + write!(f, "/{:?}", name)?; + } + Ok(()) + } + } + + struct Value<'a>(Match<'a>); + + impl<'a> core::fmt::Debug for Value<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "{}..{}/{:?}", + self.0.start(), + self.0.end(), + self.0.as_str() + ) + } + } + + f.debug_tuple("Captures") + .field(&CapturesDebugMap { caps: self }) + .finish() + } +} + +/// Get a matching capture group's haystack substring by index. +/// +/// The haystack substring returned can't outlive the `Captures` object if this +/// method is used, because of how `Index` is defined (normally `a[i]` is part +/// of `a` and can't outlive it). To work around this limitation, do that, use +/// [`Captures::get`] instead. +/// +/// `'h` is the lifetime of the matched haystack, but the lifetime of the +/// `&str` returned by this implementation is the lifetime of the `Captures` +/// value itself. +/// +/// # Panics +/// +/// If there is no matching group at the given index. +impl<'h> core::ops::Index<usize> for Captures<'h> { + type Output = str; + + // The lifetime is written out to make it clear that the &str returned + // does NOT have a lifetime equivalent to 'h. + fn index<'a>(&'a self, i: usize) -> &'a str { + self.get(i) + .map(|m| m.as_str()) + .unwrap_or_else(|| panic!("no group at index '{}'", i)) + } +} + +/// Get a matching capture group's haystack substring by name. +/// +/// The haystack substring returned can't outlive the `Captures` object if this +/// method is used, because of how `Index` is defined (normally `a[i]` is part +/// of `a` and can't outlive it). To work around this limitation, do that, use +/// [`Captures::name`] instead. +/// +/// `'h` is the lifetime of the matched haystack, but the lifetime of the +/// `&str` returned by this implementation is the lifetime of the `Captures` +/// value itself. +/// +/// `'n` is the lifetime of the group name used to index the `Captures` value. +/// +/// # Panics +/// +/// If there is no matching group at the given name. +impl<'h, 'n> core::ops::Index<&'n str> for Captures<'h> { + type Output = str; + + fn index<'a>(&'a self, name: &'n str) -> &'a str { + self.name(name) + .map(|m| m.as_str()) + .unwrap_or_else(|| panic!("no group named '{}'", name)) + } +} + +/// A low level representation of the byte offsets of each capture group. +/// +/// You can think of this as a lower level [`Captures`], where this type does +/// not support named capturing groups directly and it does not borrow the +/// haystack that these offsets were matched on. +/// +/// Primarily, this type is useful when using the lower level `Regex` APIs such +/// as [`Regex::captures_read`], which permits amortizing the allocation in +/// which capture match offsets are stored. +/// +/// In order to build a value of this type, you'll need to call the +/// [`Regex::capture_locations`] method. The value returned can then be reused +/// in subsequent searches for that regex. Using it for other regexes may +/// result in a panic or otherwise incorrect results. +/// +/// # Example +/// +/// This example shows how to create and use `CaptureLocations` in a search. +/// +/// ``` +/// use regex::Regex; +/// +/// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); +/// let mut locs = re.capture_locations(); +/// let m = re.captures_read(&mut locs, "Bruce Springsteen").unwrap(); +/// assert_eq!(0..17, m.range()); +/// assert_eq!(Some((0, 17)), locs.get(0)); +/// assert_eq!(Some((0, 5)), locs.get(1)); +/// assert_eq!(Some((6, 17)), locs.get(2)); +/// +/// // Asking for an invalid capture group always returns None. +/// assert_eq!(None, locs.get(3)); +/// # // literals are too big for 32-bit usize: #1041 +/// # #[cfg(target_pointer_width = "64")] +/// assert_eq!(None, locs.get(34973498648)); +/// # #[cfg(target_pointer_width = "64")] +/// assert_eq!(None, locs.get(9944060567225171988)); +/// ``` +#[derive(Clone, Debug)] +pub struct CaptureLocations(captures::Captures); + +/// A type alias for `CaptureLocations` for backwards compatibility. +/// +/// Previously, we exported `CaptureLocations` as `Locations` in an +/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`), +/// we continue re-exporting the same undocumented API. +#[doc(hidden)] +pub type Locations = CaptureLocations; + +impl CaptureLocations { + /// Returns the start and end byte offsets of the capture group at index + /// `i`. This returns `None` if `i` is not a valid capture group or if the + /// capture group did not match. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// re.captures_read(&mut locs, "Bruce Springsteen").unwrap(); + /// assert_eq!(Some((0, 17)), locs.get(0)); + /// assert_eq!(Some((0, 5)), locs.get(1)); + /// assert_eq!(Some((6, 17)), locs.get(2)); + /// ``` + #[inline] + pub fn get(&self, i: usize) -> Option<(usize, usize)> { + self.0.get_group(i).map(|sp| (sp.start, sp.end)) + } + + /// Returns the total number of capture groups (even if they didn't match). + /// That is, the length returned is unaffected by the result of a search. + /// + /// This is always at least `1` since every regex has at least `1` + /// capturing group that corresponds to the entire match. + /// + /// # Example + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)").unwrap(); + /// let mut locs = re.capture_locations(); + /// assert_eq!(3, locs.len()); + /// re.captures_read(&mut locs, "Bruce Springsteen").unwrap(); + /// assert_eq!(3, locs.len()); + /// ``` + /// + /// Notice that the length is always at least `1`, regardless of the regex: + /// + /// ``` + /// use regex::Regex; + /// + /// let re = Regex::new(r"").unwrap(); + /// let locs = re.capture_locations(); + /// assert_eq!(1, locs.len()); + /// + /// // [a&&b] is a regex that never matches anything. + /// let re = Regex::new(r"[a&&b]").unwrap(); + /// let locs = re.capture_locations(); + /// assert_eq!(1, locs.len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + // self.0.group_len() returns 0 if the underlying captures doesn't + // represent a match, but the behavior guaranteed for this method is + // that the length doesn't change based on a match or not. + self.0.group_info().group_len(PatternID::ZERO) + } + + /// An alias for the `get` method for backwards compatibility. + /// + /// Previously, we exported `get` as `pos` in an undocumented API. To + /// prevent breaking that code (e.g., in `regex-capi`), we continue + /// re-exporting the same undocumented API. + #[doc(hidden)] + #[inline] + pub fn pos(&self, i: usize) -> Option<(usize, usize)> { + self.get(i) + } +} + +/// An iterator over all non-overlapping matches in a haystack. +/// +/// This iterator yields [`Match`] values. The iterator stops when no more +/// matches can be found. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the haystack. +/// +/// This iterator is created by [`Regex::find_iter`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct Matches<'r, 'h> { + haystack: &'h str, + it: meta::FindMatches<'r, 'h>, +} + +impl<'r, 'h> Iterator for Matches<'r, 'h> { + type Item = Match<'h>; + + #[inline] + fn next(&mut self) -> Option<Match<'h>> { + self.it + .next() + .map(|sp| Match::new(self.haystack, sp.start(), sp.end())) + } + + #[inline] + fn count(self) -> usize { + // This can actually be up to 2x faster than calling `next()` until + // completion, because counting matches when using a DFA only requires + // finding the end of each match. But returning a `Match` via `next()` + // requires the start of each match which, with a DFA, requires a + // reverse forward scan to find it. + self.it.count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for Matches<'r, 'h> {} + +/// An iterator over all non-overlapping capture matches in a haystack. +/// +/// This iterator yields [`Captures`] values. The iterator stops when no more +/// matches can be found. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the matched string. +/// +/// This iterator is created by [`Regex::captures_iter`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct CaptureMatches<'r, 'h> { + haystack: &'h str, + it: meta::CapturesMatches<'r, 'h>, +} + +impl<'r, 'h> Iterator for CaptureMatches<'r, 'h> { + type Item = Captures<'h>; + + #[inline] + fn next(&mut self) -> Option<Captures<'h>> { + let static_captures_len = self.it.regex().static_captures_len(); + self.it.next().map(|caps| Captures { + haystack: self.haystack, + caps, + static_captures_len, + }) + } + + #[inline] + fn count(self) -> usize { + // This can actually be up to 2x faster than calling `next()` until + // completion, because counting matches when using a DFA only requires + // finding the end of each match. But returning a `Match` via `next()` + // requires the start of each match which, with a DFA, requires a + // reverse forward scan to find it. + self.it.count() + } +} + +impl<'r, 'h> core::iter::FusedIterator for CaptureMatches<'r, 'h> {} + +/// An iterator over all substrings delimited by a regex match. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the byte string being split. +/// +/// This iterator is created by [`Regex::split`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +#[derive(Debug)] +pub struct Split<'r, 'h> { + haystack: &'h str, + it: meta::Split<'r, 'h>, +} + +impl<'r, 'h> Iterator for Split<'r, 'h> { + type Item = &'h str; + + #[inline] + fn next(&mut self) -> Option<&'h str> { + self.it.next().map(|span| &self.haystack[span]) + } +} + +impl<'r, 'h> core::iter::FusedIterator for Split<'r, 'h> {} + +/// An iterator over at most `N` substrings delimited by a regex match. +/// +/// The last substring yielded by this iterator will be whatever remains after +/// `N-1` splits. +/// +/// `'r` is the lifetime of the compiled regular expression and `'h` is the +/// lifetime of the byte string being split. +/// +/// This iterator is created by [`Regex::splitn`]. +/// +/// # Time complexity +/// +/// Note that since an iterator runs potentially many searches on the haystack +/// and since each search has worst case `O(m * n)` time complexity, the +/// overall worst case time complexity for iteration is `O(m * n^2)`. +/// +/// Although note that the worst case time here has an upper bound given +/// by the `limit` parameter to [`Regex::splitn`]. +#[derive(Debug)] +pub struct SplitN<'r, 'h> { + haystack: &'h str, + it: meta::SplitN<'r, 'h>, +} + +impl<'r, 'h> Iterator for SplitN<'r, 'h> { + type Item = &'h str; + + #[inline] + fn next(&mut self) -> Option<&'h str> { + self.it.next().map(|span| &self.haystack[span]) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {} + +/// An iterator over the names of all capture groups in a regex. +/// +/// This iterator yields values of type `Option<&str>` in order of the opening +/// capture group parenthesis in the regex pattern. `None` is yielded for +/// groups with no name. The first element always corresponds to the implicit +/// and unnamed group for the overall match. +/// +/// `'r` is the lifetime of the compiled regular expression. +/// +/// This iterator is created by [`Regex::capture_names`]. +#[derive(Clone, Debug)] +pub struct CaptureNames<'r>(captures::GroupInfoPatternNames<'r>); + +impl<'r> Iterator for CaptureNames<'r> { + type Item = Option<&'r str>; + + #[inline] + fn next(&mut self) -> Option<Option<&'r str>> { + self.0.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.0.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.0.count() + } +} + +impl<'r> ExactSizeIterator for CaptureNames<'r> {} + +impl<'r> core::iter::FusedIterator for CaptureNames<'r> {} + +/// An iterator over all group matches in a [`Captures`] value. +/// +/// This iterator yields values of type `Option<Match<'h>>`, where `'h` is the +/// lifetime of the haystack that the matches are for. The order of elements +/// yielded corresponds to the order of the opening parenthesis for the group +/// in the regex pattern. `None` is yielded for groups that did not participate +/// in the match. +/// +/// The first element always corresponds to the implicit group for the overall +/// match. Since this iterator is created by a [`Captures`] value, and a +/// `Captures` value is only created when a match occurs, it follows that the +/// first element yielded by this iterator is guaranteed to be non-`None`. +/// +/// The lifetime `'c` corresponds to the lifetime of the `Captures` value that +/// created this iterator, and the lifetime `'h` corresponds to the originally +/// matched haystack. +#[derive(Clone, Debug)] +pub struct SubCaptureMatches<'c, 'h> { + haystack: &'h str, + it: captures::CapturesPatternIter<'c>, +} + +impl<'c, 'h> Iterator for SubCaptureMatches<'c, 'h> { + type Item = Option<Match<'h>>; + + #[inline] + fn next(&mut self) -> Option<Option<Match<'h>>> { + self.it.next().map(|group| { + group.map(|sp| Match::new(self.haystack, sp.start, sp.end)) + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } + + #[inline] + fn count(self) -> usize { + self.it.count() + } +} + +impl<'c, 'h> ExactSizeIterator for SubCaptureMatches<'c, 'h> {} + +impl<'c, 'h> core::iter::FusedIterator for SubCaptureMatches<'c, 'h> {} + +/// A trait for types that can be used to replace matches in a haystack. +/// +/// In general, users of this crate shouldn't need to implement this trait, +/// since implementations are already provided for `&str` along with other +/// variants of string types, as well as `FnMut(&Captures) -> String` (or any +/// `FnMut(&Captures) -> T` where `T: AsRef<str>`). Those cover most use cases, +/// but callers can implement this trait directly if necessary. +/// +/// # Example +/// +/// This example shows a basic implementation of the `Replacer` trait. This +/// can be done much more simply using the replacement string interpolation +/// support (e.g., `$first $last`), but this approach avoids needing to parse +/// the replacement string at all. +/// +/// ``` +/// use regex::{Captures, Regex, Replacer}; +/// +/// struct NameSwapper; +/// +/// impl Replacer for NameSwapper { +/// fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { +/// dst.push_str(&caps["first"]); +/// dst.push_str(" "); +/// dst.push_str(&caps["last"]); +/// } +/// } +/// +/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(?<first>\S+)").unwrap(); +/// let result = re.replace("Springsteen, Bruce", NameSwapper); +/// assert_eq!(result, "Bruce Springsteen"); +/// ``` +pub trait Replacer { + /// Appends possibly empty data to `dst` to replace the current match. + /// + /// The current match is represented by `caps`, which is guaranteed to + /// have a match at capture group `0`. + /// + /// For example, a no-op replacement would be `dst.push_str(&caps[0])`. + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String); + + /// Return a fixed unchanging replacement string. + /// + /// When doing replacements, if access to [`Captures`] is not needed (e.g., + /// the replacement string does not need `$` expansion), then it can be + /// beneficial to avoid finding sub-captures. + /// + /// In general, this is called once for every call to a replacement routine + /// such as [`Regex::replace_all`]. + fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> { + None + } + + /// Returns a type that implements `Replacer`, but that borrows and wraps + /// this `Replacer`. + /// + /// This is useful when you want to take a generic `Replacer` (which might + /// not be cloneable) and use it without consuming it, so it can be used + /// more than once. + /// + /// # Example + /// + /// ``` + /// use regex::{Regex, Replacer}; + /// + /// fn replace_all_twice<R: Replacer>( + /// re: Regex, + /// src: &str, + /// mut rep: R, + /// ) -> String { + /// let dst = re.replace_all(src, rep.by_ref()); + /// let dst = re.replace_all(&dst, rep.by_ref()); + /// dst.into_owned() + /// } + /// ``` + fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> { + ReplacerRef(self) + } +} + +impl<'a> Replacer for &'a str { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + caps.expand(*self, dst); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + no_expansion(self) + } +} + +impl<'a> Replacer for &'a String { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + self.as_str().replace_append(caps, dst) + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + no_expansion(self) + } +} + +impl Replacer for String { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + self.as_str().replace_append(caps, dst) + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + no_expansion(self) + } +} + +impl<'a> Replacer for Cow<'a, str> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + self.as_ref().replace_append(caps, dst) + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + no_expansion(self) + } +} + +impl<'a> Replacer for &'a Cow<'a, str> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + self.as_ref().replace_append(caps, dst) + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + no_expansion(self) + } +} + +impl<F, T> Replacer for F +where + F: FnMut(&Captures<'_>) -> T, + T: AsRef<str>, +{ + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + dst.push_str((*self)(caps).as_ref()); + } +} + +/// A by-reference adaptor for a [`Replacer`]. +/// +/// This permits reusing the same `Replacer` value in multiple calls to a +/// replacement routine like [`Regex::replace_all`]. +/// +/// This type is created by [`Replacer::by_ref`]. +#[derive(Debug)] +pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); + +impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { + self.0.replace_append(caps, dst) + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + self.0.no_expansion() + } +} + +/// A helper type for forcing literal string replacement. +/// +/// It can be used with routines like [`Regex::replace`] and +/// [`Regex::replace_all`] to do a literal string replacement without expanding +/// `$name` to their corresponding capture groups. This can be both convenient +/// (to avoid escaping `$`, for example) and faster (since capture groups +/// don't need to be found). +/// +/// `'s` is the lifetime of the literal string to use. +/// +/// # Example +/// +/// ``` +/// use regex::{NoExpand, Regex}; +/// +/// let re = Regex::new(r"(?<last>[^,\s]+),\s+(\S+)").unwrap(); +/// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last")); +/// assert_eq!(result, "$2 $last"); +/// ``` +#[derive(Clone, Debug)] +pub struct NoExpand<'s>(pub &'s str); + +impl<'s> Replacer for NoExpand<'s> { + fn replace_append(&mut self, _: &Captures<'_>, dst: &mut String) { + dst.push_str(self.0); + } + + fn no_expansion(&mut self) -> Option<Cow<'_, str>> { + Some(Cow::Borrowed(self.0)) + } +} + +/// Quickly checks the given replacement string for whether interpolation +/// should be done on it. It returns `None` if a `$` was found anywhere in the +/// given string, which suggests interpolation needs to be done. But if there's +/// no `$` anywhere, then interpolation definitely does not need to be done. In +/// that case, the given string is returned as a borrowed `Cow`. +/// +/// This is meant to be used to implement the `Replacer::no_expandsion` method +/// in its various trait impls. +fn no_expansion<T: AsRef<str>>(replacement: &T) -> Option<Cow<'_, str>> { + let replacement = replacement.as_ref(); + match crate::find_byte::find_byte(b'$', replacement.as_bytes()) { + Some(_) => None, + None => Some(Cow::Borrowed(replacement)), + } +} diff --git a/vendor/regex/src/regexset/bytes.rs b/vendor/regex/src/regexset/bytes.rs new file mode 100644 index 0000000..1220a14 --- /dev/null +++ b/vendor/regex/src/regexset/bytes.rs @@ -0,0 +1,710 @@ +use alloc::string::String; + +use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter}; + +use crate::{bytes::RegexSetBuilder, Error}; + +/// Match multiple, possibly overlapping, regexes in a single search. +/// +/// A regex set corresponds to the union of zero or more regular expressions. +/// That is, a regex set will match a haystack when at least one of its +/// constituent regexes matches. A regex set as its formulated here provides a +/// touch more power: it will also report *which* regular expressions in the +/// set match. Indeed, this is the key difference between regex sets and a +/// single `Regex` with many alternates, since only one alternate can match at +/// a time. +/// +/// For example, consider regular expressions to match email addresses and +/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a +/// regex set is constructed from those regexes, then searching the haystack +/// `foo@example.com` will report both regexes as matching. Of course, one +/// could accomplish this by compiling each regex on its own and doing two +/// searches over the haystack. The key advantage of using a regex set is +/// that it will report the matching regexes using a *single pass through the +/// haystack*. If one has hundreds or thousands of regexes to match repeatedly +/// (like a URL router for a complex web application or a user agent matcher), +/// then a regex set *can* realize huge performance gains. +/// +/// Unlike the top-level [`RegexSet`](crate::RegexSet), this `RegexSet` +/// searches haystacks with type `&[u8]` instead of `&str`. Consequently, this +/// `RegexSet` is permitted to match invalid UTF-8. +/// +/// # Limitations +/// +/// Regex sets are limited to answering the following two questions: +/// +/// 1. Does any regex in the set match? +/// 2. If so, which regexes in the set match? +/// +/// As with the main [`Regex`][crate::bytes::Regex] type, it is cheaper to ask +/// (1) instead of (2) since the matching engines can stop after the first +/// match is found. +/// +/// You cannot directly extract [`Match`][crate::bytes::Match] or +/// [`Captures`][crate::bytes::Captures] objects from a regex set. If you need +/// these operations, the recommended approach is to compile each pattern in +/// the set independently and scan the exact same haystack a second time with +/// those independently compiled patterns: +/// +/// ``` +/// use regex::bytes::{Regex, RegexSet}; +/// +/// let patterns = ["foo", "bar"]; +/// // Both patterns will match different ranges of this string. +/// let hay = b"barfoo"; +/// +/// // Compile a set matching any of our patterns. +/// let set = RegexSet::new(patterns).unwrap(); +/// // Compile each pattern independently. +/// let regexes: Vec<_> = set +/// .patterns() +/// .iter() +/// .map(|pat| Regex::new(pat).unwrap()) +/// .collect(); +/// +/// // Match against the whole set first and identify the individual +/// // matching patterns. +/// let matches: Vec<&[u8]> = set +/// .matches(hay) +/// .into_iter() +/// // Dereference the match index to get the corresponding +/// // compiled pattern. +/// .map(|index| ®exes[index]) +/// // To get match locations or any other info, we then have to search the +/// // exact same haystack again, using our separately-compiled pattern. +/// .map(|re| re.find(hay).unwrap().as_bytes()) +/// .collect(); +/// +/// // Matches arrive in the order the constituent patterns were declared, +/// // not the order they appear in the haystack. +/// assert_eq!(vec![&b"foo"[..], &b"bar"[..]], matches); +/// ``` +/// +/// # Performance +/// +/// A `RegexSet` has the same performance characteristics as `Regex`. Namely, +/// search takes `O(m * n)` time, where `m` is proportional to the size of the +/// regex set and `n` is proportional to the length of the haystack. +/// +/// # Trait implementations +/// +/// The `Default` trait is implemented for `RegexSet`. The default value +/// is an empty set. An empty set can also be explicitly constructed via +/// [`RegexSet::empty`]. +/// +/// # Example +/// +/// This shows how the above two regexes (for matching email addresses and +/// domains) might work: +/// +/// ``` +/// use regex::bytes::RegexSet; +/// +/// let set = RegexSet::new(&[ +/// r"[a-z]+@[a-z]+\.(com|org|net)", +/// r"[a-z]+\.(com|org|net)", +/// ]).unwrap(); +/// +/// // Ask whether any regexes in the set match. +/// assert!(set.is_match(b"foo@example.com")); +/// +/// // Identify which regexes in the set match. +/// let matches: Vec<_> = set.matches(b"foo@example.com").into_iter().collect(); +/// assert_eq!(vec![0, 1], matches); +/// +/// // Try again, but with a haystack that only matches one of the regexes. +/// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect(); +/// assert_eq!(vec![1], matches); +/// +/// // Try again, but with a haystack that doesn't match any regex in the set. +/// let matches: Vec<_> = set.matches(b"example").into_iter().collect(); +/// assert!(matches.is_empty()); +/// ``` +/// +/// Note that it would be possible to adapt the above example to using `Regex` +/// with an expression like: +/// +/// ```text +/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net)) +/// ``` +/// +/// After a match, one could then inspect the capture groups to figure out +/// which alternates matched. The problem is that it is hard to make this +/// approach scale when there are many regexes since the overlap between each +/// alternate isn't always obvious to reason about. +#[derive(Clone)] +pub struct RegexSet { + pub(crate) meta: meta::Regex, + pub(crate) patterns: alloc::sync::Arc<[String]>, +} + +impl RegexSet { + /// Create a new regex set with the given regular expressions. + /// + /// This takes an iterator of `S`, where `S` is something that can produce + /// a `&str`. If any of the strings in the iterator are not valid regular + /// expressions, then an error is returned. + /// + /// # Example + /// + /// Create a new regex set from an iterator of strings: + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); + /// assert!(set.is_match(b"foo")); + /// ``` + pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error> + where + S: AsRef<str>, + I: IntoIterator<Item = S>, + { + RegexSetBuilder::new(exprs).build() + } + + /// Create a new empty regex set. + /// + /// An empty regex never matches anything. + /// + /// This is a convenience function for `RegexSet::new([])`, but doesn't + /// require one to specify the type of the input. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::empty(); + /// assert!(set.is_empty()); + /// // an empty set matches nothing + /// assert!(!set.is_match(b"")); + /// ``` + pub fn empty() -> RegexSet { + let empty: [&str; 0] = []; + RegexSetBuilder::new(empty).build().unwrap() + } + + /// Returns true if and only if one of the regexes in this set matches + /// the haystack given. + /// + /// This method should be preferred if you only need to test whether any + /// of the regexes in the set should match, but don't care about *which* + /// regexes matched. This is because the underlying matching engine will + /// quit immediately after seeing the first match instead of continuing to + /// find all matches. + /// + /// Note that as with searches using [`Regex`](crate::bytes::Regex), the + /// expression is unanchored by default. That is, if the regex does not + /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted + /// to match anywhere in the haystack. + /// + /// # Example + /// + /// Tests whether a set matches somewhere in a haystack: + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); + /// assert!(set.is_match(b"foo")); + /// assert!(!set.is_match("☃".as_bytes())); + /// ``` + #[inline] + pub fn is_match(&self, haystack: &[u8]) -> bool { + self.is_match_at(haystack, 0) + } + + /// Returns true if and only if one of the regexes in this set matches the + /// haystack given, with the search starting at the offset given. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start`. Namely, consider a + /// haystack `foobar` and a desire to execute a search starting at offset + /// `3`. You could search a substring explicitly, but then the look-around + /// assertions won't work correctly. Instead, you can use this method to + /// specify the start position of a search. + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); + /// let hay = b"foobar"; + /// // We get a match here, but it's probably not intended. + /// assert!(set.is_match(&hay[3..])); + /// // No match because the assertions take the context into account. + /// assert!(!set.is_match_at(hay, 3)); + /// ``` + #[inline] + pub fn is_match_at(&self, haystack: &[u8], start: usize) -> bool { + self.meta.is_match(Input::new(haystack).span(start..haystack.len())) + } + + /// Returns the set of regexes that match in the given haystack. + /// + /// The set returned contains the index of each regex that matches in + /// the given haystack. The index is in correspondence with the order of + /// regular expressions given to `RegexSet`'s constructor. + /// + /// The set can also be used to iterate over the matched indices. The order + /// of iteration is always ascending with respect to the matching indices. + /// + /// Note that as with searches using [`Regex`](crate::bytes::Regex), the + /// expression is unanchored by default. That is, if the regex does not + /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted + /// to match anywhere in the haystack. + /// + /// # Example + /// + /// Tests which regular expressions match the given haystack: + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"\w+", + /// r"\d+", + /// r"\pL+", + /// r"foo", + /// r"bar", + /// r"barfoo", + /// r"foobar", + /// ]).unwrap(); + /// let matches: Vec<_> = set.matches(b"foobar").into_iter().collect(); + /// assert_eq!(matches, vec![0, 2, 3, 4, 6]); + /// + /// // You can also test whether a particular regex matched: + /// let matches = set.matches(b"foobar"); + /// assert!(!matches.matched(5)); + /// assert!(matches.matched(6)); + /// ``` + #[inline] + pub fn matches(&self, haystack: &[u8]) -> SetMatches { + self.matches_at(haystack, 0) + } + + /// Returns the set of regexes that match in the given haystack. + /// + /// The set returned contains the index of each regex that matches in + /// the given haystack. The index is in correspondence with the order of + /// regular expressions given to `RegexSet`'s constructor. + /// + /// The set can also be used to iterate over the matched indices. The order + /// of iteration is always ascending with respect to the matching indices. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// Tests which regular expressions match the given haystack: + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); + /// let hay = b"foobar"; + /// // We get matches here, but it's probably not intended. + /// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect(); + /// assert_eq!(matches, vec![0, 1]); + /// // No matches because the assertions take the context into account. + /// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect(); + /// assert_eq!(matches, vec![]); + /// ``` + #[inline] + pub fn matches_at(&self, haystack: &[u8], start: usize) -> SetMatches { + let input = Input::new(haystack).span(start..haystack.len()); + let mut patset = PatternSet::new(self.meta.pattern_len()); + self.meta.which_overlapping_matches(&input, &mut patset); + SetMatches(patset) + } + + /// Returns the same as matches, but starts the search at the given + /// offset and stores the matches into the slice given. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// `matches` must have a length that is at least the number of regexes + /// in this set. + /// + /// This method returns true if and only if at least one member of + /// `matches` is true after executing the set against `haystack`. + #[doc(hidden)] + #[inline] + pub fn matches_read_at( + &self, + matches: &mut [bool], + haystack: &[u8], + start: usize, + ) -> bool { + // This is pretty dumb. We should try to fix this, but the + // regex-automata API doesn't provide a way to store matches in an + // arbitrary &mut [bool]. Thankfully, this API is is doc(hidden) and + // thus not public... But regex-capi currently uses it. We should + // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet + // is in regex-automata, not regex. So maybe we should just accept a + // 'SetMatches', which is basically just a newtype around PatternSet. + let mut patset = PatternSet::new(self.meta.pattern_len()); + let mut input = Input::new(haystack); + input.set_start(start); + self.meta.which_overlapping_matches(&input, &mut patset); + for pid in patset.iter() { + matches[pid] = true; + } + !patset.is_empty() + } + + /// An alias for `matches_read_at` to preserve backward compatibility. + /// + /// The `regex-capi` crate used this method, so to avoid breaking that + /// crate, we continue to export it as an undocumented API. + #[doc(hidden)] + #[inline] + pub fn read_matches_at( + &self, + matches: &mut [bool], + haystack: &[u8], + start: usize, + ) -> bool { + self.matches_read_at(matches, haystack, start) + } + + /// Returns the total number of regexes in this set. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// assert_eq!(0, RegexSet::empty().len()); + /// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len()); + /// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.meta.pattern_len() + } + + /// Returns `true` if this set contains no regexes. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// assert!(RegexSet::empty().is_empty()); + /// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty()); + /// ``` + #[inline] + pub fn is_empty(&self) -> bool { + self.meta.pattern_len() == 0 + } + + /// Returns the regex patterns that this regex set was constructed from. + /// + /// This function can be used to determine the pattern for a match. The + /// slice returned has exactly as many patterns givens to this regex set, + /// and the order of the slice is the same as the order of the patterns + /// provided to the set. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new(&[ + /// r"\w+", + /// r"\d+", + /// r"\pL+", + /// r"foo", + /// r"bar", + /// r"barfoo", + /// r"foobar", + /// ]).unwrap(); + /// let matches: Vec<_> = set + /// .matches(b"foobar") + /// .into_iter() + /// .map(|index| &set.patterns()[index]) + /// .collect(); + /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]); + /// ``` + #[inline] + pub fn patterns(&self) -> &[String] { + &self.patterns + } +} + +impl Default for RegexSet { + fn default() -> Self { + RegexSet::empty() + } +} + +/// A set of matches returned by a regex set. +/// +/// Values of this type are constructed by [`RegexSet::matches`]. +#[derive(Clone, Debug)] +pub struct SetMatches(PatternSet); + +impl SetMatches { + /// Whether this set contains any matches. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new(&[ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches(b"foo@example.com"); + /// assert!(matches.matched_any()); + /// ``` + #[inline] + pub fn matched_any(&self) -> bool { + !self.0.is_empty() + } + + /// Whether the regex at the given index matched. + /// + /// The index for a regex is determined by its insertion order upon the + /// initial construction of a `RegexSet`, starting at `0`. + /// + /// # Panics + /// + /// If `index` is greater than or equal to the number of regexes in the + /// original set that produced these matches. Equivalently, when `index` + /// is greater than or equal to [`SetMatches::len`]. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches(b"example.com"); + /// assert!(!matches.matched(0)); + /// assert!(matches.matched(1)); + /// ``` + #[inline] + pub fn matched(&self, index: usize) -> bool { + self.0.contains(PatternID::new_unchecked(index)) + } + + /// The total number of regexes in the set that created these matches. + /// + /// **WARNING:** This always returns the same value as [`RegexSet::len`]. + /// In particular, it does *not* return the number of elements yielded by + /// [`SetMatches::iter`]. The only way to determine the total number of + /// matched regexes is to iterate over them. + /// + /// # Example + /// + /// Notice that this method returns the total number of regexes in the + /// original set, and *not* the total number of regexes that matched. + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches(b"example.com"); + /// // Total number of patterns that matched. + /// assert_eq!(1, matches.iter().count()); + /// // Total number of patterns in the set. + /// assert_eq!(2, matches.len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.0.capacity() + } + + /// Returns an iterator over the indices of the regexes that matched. + /// + /// This will always produces matches in ascending order, where the index + /// yielded corresponds to the index of the regex that matched with respect + /// to its position when initially building the set. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[0-9]", + /// r"[a-z]", + /// r"[A-Z]", + /// r"\p{Greek}", + /// ]).unwrap(); + /// let hay = "βa1".as_bytes(); + /// let matches: Vec<_> = set.matches(hay).iter().collect(); + /// assert_eq!(matches, vec![0, 1, 3]); + /// ``` + /// + /// Note that `SetMatches` also implemnets the `IntoIterator` trait, so + /// this method is not always needed. For example: + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[0-9]", + /// r"[a-z]", + /// r"[A-Z]", + /// r"\p{Greek}", + /// ]).unwrap(); + /// let hay = "βa1".as_bytes(); + /// let mut matches = vec![]; + /// for index in set.matches(hay) { + /// matches.push(index); + /// } + /// assert_eq!(matches, vec![0, 1, 3]); + /// ``` + #[inline] + pub fn iter(&self) -> SetMatchesIter<'_> { + SetMatchesIter(self.0.iter()) + } +} + +impl IntoIterator for SetMatches { + type IntoIter = SetMatchesIntoIter; + type Item = usize; + + fn into_iter(self) -> Self::IntoIter { + let it = 0..self.0.capacity(); + SetMatchesIntoIter { patset: self.0, it } + } +} + +impl<'a> IntoIterator for &'a SetMatches { + type IntoIter = SetMatchesIter<'a>; + type Item = usize; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +/// An owned iterator over the set of matches from a regex set. +/// +/// This will always produces matches in ascending order of index, where the +/// index corresponds to the index of the regex that matched with respect to +/// its position when initially building the set. +/// +/// This iterator is created by calling `SetMatches::into_iter` via the +/// `IntoIterator` trait. This is automatically done in `for` loops. +/// +/// # Example +/// +/// ``` +/// use regex::bytes::RegexSet; +/// +/// let set = RegexSet::new([ +/// r"[0-9]", +/// r"[a-z]", +/// r"[A-Z]", +/// r"\p{Greek}", +/// ]).unwrap(); +/// let hay = "βa1".as_bytes(); +/// let mut matches = vec![]; +/// for index in set.matches(hay) { +/// matches.push(index); +/// } +/// assert_eq!(matches, vec![0, 1, 3]); +/// ``` +#[derive(Debug)] +pub struct SetMatchesIntoIter { + patset: PatternSet, + it: core::ops::Range<usize>, +} + +impl Iterator for SetMatchesIntoIter { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + loop { + let id = self.it.next()?; + if self.patset.contains(PatternID::new_unchecked(id)) { + return Some(id); + } + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl DoubleEndedIterator for SetMatchesIntoIter { + fn next_back(&mut self) -> Option<usize> { + loop { + let id = self.it.next_back()?; + if self.patset.contains(PatternID::new_unchecked(id)) { + return Some(id); + } + } + } +} + +impl core::iter::FusedIterator for SetMatchesIntoIter {} + +/// A borrowed iterator over the set of matches from a regex set. +/// +/// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that +/// created this iterator. +/// +/// This will always produces matches in ascending order, where the index +/// corresponds to the index of the regex that matched with respect to its +/// position when initially building the set. +/// +/// This iterator is created by the [`SetMatches::iter`] method. +#[derive(Clone, Debug)] +pub struct SetMatchesIter<'a>(PatternSetIter<'a>); + +impl<'a> Iterator for SetMatchesIter<'a> { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + self.0.next().map(|pid| pid.as_usize()) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.0.size_hint() + } +} + +impl<'a> DoubleEndedIterator for SetMatchesIter<'a> { + fn next_back(&mut self) -> Option<usize> { + self.0.next_back().map(|pid| pid.as_usize()) + } +} + +impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {} + +impl core::fmt::Debug for RegexSet { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "RegexSet({:?})", self.patterns()) + } +} diff --git a/vendor/regex/src/regexset/mod.rs b/vendor/regex/src/regexset/mod.rs new file mode 100644 index 0000000..93fadec --- /dev/null +++ b/vendor/regex/src/regexset/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod bytes; +pub(crate) mod string; diff --git a/vendor/regex/src/regexset/string.rs b/vendor/regex/src/regexset/string.rs new file mode 100644 index 0000000..2a3e7b8 --- /dev/null +++ b/vendor/regex/src/regexset/string.rs @@ -0,0 +1,706 @@ +use alloc::string::String; + +use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter}; + +use crate::{Error, RegexSetBuilder}; + +/// Match multiple, possibly overlapping, regexes in a single search. +/// +/// A regex set corresponds to the union of zero or more regular expressions. +/// That is, a regex set will match a haystack when at least one of its +/// constituent regexes matches. A regex set as its formulated here provides a +/// touch more power: it will also report *which* regular expressions in the +/// set match. Indeed, this is the key difference between regex sets and a +/// single `Regex` with many alternates, since only one alternate can match at +/// a time. +/// +/// For example, consider regular expressions to match email addresses and +/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a +/// regex set is constructed from those regexes, then searching the haystack +/// `foo@example.com` will report both regexes as matching. Of course, one +/// could accomplish this by compiling each regex on its own and doing two +/// searches over the haystack. The key advantage of using a regex set is +/// that it will report the matching regexes using a *single pass through the +/// haystack*. If one has hundreds or thousands of regexes to match repeatedly +/// (like a URL router for a complex web application or a user agent matcher), +/// then a regex set *can* realize huge performance gains. +/// +/// # Limitations +/// +/// Regex sets are limited to answering the following two questions: +/// +/// 1. Does any regex in the set match? +/// 2. If so, which regexes in the set match? +/// +/// As with the main [`Regex`][crate::Regex] type, it is cheaper to ask (1) +/// instead of (2) since the matching engines can stop after the first match +/// is found. +/// +/// You cannot directly extract [`Match`][crate::Match] or +/// [`Captures`][crate::Captures] objects from a regex set. If you need these +/// operations, the recommended approach is to compile each pattern in the set +/// independently and scan the exact same haystack a second time with those +/// independently compiled patterns: +/// +/// ``` +/// use regex::{Regex, RegexSet}; +/// +/// let patterns = ["foo", "bar"]; +/// // Both patterns will match different ranges of this string. +/// let hay = "barfoo"; +/// +/// // Compile a set matching any of our patterns. +/// let set = RegexSet::new(patterns).unwrap(); +/// // Compile each pattern independently. +/// let regexes: Vec<_> = set +/// .patterns() +/// .iter() +/// .map(|pat| Regex::new(pat).unwrap()) +/// .collect(); +/// +/// // Match against the whole set first and identify the individual +/// // matching patterns. +/// let matches: Vec<&str> = set +/// .matches(hay) +/// .into_iter() +/// // Dereference the match index to get the corresponding +/// // compiled pattern. +/// .map(|index| ®exes[index]) +/// // To get match locations or any other info, we then have to search the +/// // exact same haystack again, using our separately-compiled pattern. +/// .map(|re| re.find(hay).unwrap().as_str()) +/// .collect(); +/// +/// // Matches arrive in the order the constituent patterns were declared, +/// // not the order they appear in the haystack. +/// assert_eq!(vec!["foo", "bar"], matches); +/// ``` +/// +/// # Performance +/// +/// A `RegexSet` has the same performance characteristics as `Regex`. Namely, +/// search takes `O(m * n)` time, where `m` is proportional to the size of the +/// regex set and `n` is proportional to the length of the haystack. +/// +/// # Trait implementations +/// +/// The `Default` trait is implemented for `RegexSet`. The default value +/// is an empty set. An empty set can also be explicitly constructed via +/// [`RegexSet::empty`]. +/// +/// # Example +/// +/// This shows how the above two regexes (for matching email addresses and +/// domains) might work: +/// +/// ``` +/// use regex::RegexSet; +/// +/// let set = RegexSet::new(&[ +/// r"[a-z]+@[a-z]+\.(com|org|net)", +/// r"[a-z]+\.(com|org|net)", +/// ]).unwrap(); +/// +/// // Ask whether any regexes in the set match. +/// assert!(set.is_match("foo@example.com")); +/// +/// // Identify which regexes in the set match. +/// let matches: Vec<_> = set.matches("foo@example.com").into_iter().collect(); +/// assert_eq!(vec![0, 1], matches); +/// +/// // Try again, but with a haystack that only matches one of the regexes. +/// let matches: Vec<_> = set.matches("example.com").into_iter().collect(); +/// assert_eq!(vec![1], matches); +/// +/// // Try again, but with a haystack that doesn't match any regex in the set. +/// let matches: Vec<_> = set.matches("example").into_iter().collect(); +/// assert!(matches.is_empty()); +/// ``` +/// +/// Note that it would be possible to adapt the above example to using `Regex` +/// with an expression like: +/// +/// ```text +/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net)) +/// ``` +/// +/// After a match, one could then inspect the capture groups to figure out +/// which alternates matched. The problem is that it is hard to make this +/// approach scale when there are many regexes since the overlap between each +/// alternate isn't always obvious to reason about. +#[derive(Clone)] +pub struct RegexSet { + pub(crate) meta: meta::Regex, + pub(crate) patterns: alloc::sync::Arc<[String]>, +} + +impl RegexSet { + /// Create a new regex set with the given regular expressions. + /// + /// This takes an iterator of `S`, where `S` is something that can produce + /// a `&str`. If any of the strings in the iterator are not valid regular + /// expressions, then an error is returned. + /// + /// # Example + /// + /// Create a new regex set from an iterator of strings: + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); + /// assert!(set.is_match("foo")); + /// ``` + pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error> + where + S: AsRef<str>, + I: IntoIterator<Item = S>, + { + RegexSetBuilder::new(exprs).build() + } + + /// Create a new empty regex set. + /// + /// An empty regex never matches anything. + /// + /// This is a convenience function for `RegexSet::new([])`, but doesn't + /// require one to specify the type of the input. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::empty(); + /// assert!(set.is_empty()); + /// // an empty set matches nothing + /// assert!(!set.is_match("")); + /// ``` + pub fn empty() -> RegexSet { + let empty: [&str; 0] = []; + RegexSetBuilder::new(empty).build().unwrap() + } + + /// Returns true if and only if one of the regexes in this set matches + /// the haystack given. + /// + /// This method should be preferred if you only need to test whether any + /// of the regexes in the set should match, but don't care about *which* + /// regexes matched. This is because the underlying matching engine will + /// quit immediately after seeing the first match instead of continuing to + /// find all matches. + /// + /// Note that as with searches using [`Regex`](crate::Regex), the + /// expression is unanchored by default. That is, if the regex does not + /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted + /// to match anywhere in the haystack. + /// + /// # Example + /// + /// Tests whether a set matches somewhere in a haystack: + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); + /// assert!(set.is_match("foo")); + /// assert!(!set.is_match("☃")); + /// ``` + #[inline] + pub fn is_match(&self, haystack: &str) -> bool { + self.is_match_at(haystack, 0) + } + + /// Returns true if and only if one of the regexes in this set matches the + /// haystack given, with the search starting at the offset given. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// This example shows the significance of `start`. Namely, consider a + /// haystack `foobar` and a desire to execute a search starting at offset + /// `3`. You could search a substring explicitly, but then the look-around + /// assertions won't work correctly. Instead, you can use this method to + /// specify the start position of a search. + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); + /// let hay = "foobar"; + /// // We get a match here, but it's probably not intended. + /// assert!(set.is_match(&hay[3..])); + /// // No match because the assertions take the context into account. + /// assert!(!set.is_match_at(hay, 3)); + /// ``` + #[inline] + pub fn is_match_at(&self, haystack: &str, start: usize) -> bool { + self.meta.is_match(Input::new(haystack).span(start..haystack.len())) + } + + /// Returns the set of regexes that match in the given haystack. + /// + /// The set returned contains the index of each regex that matches in + /// the given haystack. The index is in correspondence with the order of + /// regular expressions given to `RegexSet`'s constructor. + /// + /// The set can also be used to iterate over the matched indices. The order + /// of iteration is always ascending with respect to the matching indices. + /// + /// Note that as with searches using [`Regex`](crate::Regex), the + /// expression is unanchored by default. That is, if the regex does not + /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted + /// to match anywhere in the haystack. + /// + /// # Example + /// + /// Tests which regular expressions match the given haystack: + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"\w+", + /// r"\d+", + /// r"\pL+", + /// r"foo", + /// r"bar", + /// r"barfoo", + /// r"foobar", + /// ]).unwrap(); + /// let matches: Vec<_> = set.matches("foobar").into_iter().collect(); + /// assert_eq!(matches, vec![0, 2, 3, 4, 6]); + /// + /// // You can also test whether a particular regex matched: + /// let matches = set.matches("foobar"); + /// assert!(!matches.matched(5)); + /// assert!(matches.matched(6)); + /// ``` + #[inline] + pub fn matches(&self, haystack: &str) -> SetMatches { + self.matches_at(haystack, 0) + } + + /// Returns the set of regexes that match in the given haystack. + /// + /// The set returned contains the index of each regex that matches in + /// the given haystack. The index is in correspondence with the order of + /// regular expressions given to `RegexSet`'s constructor. + /// + /// The set can also be used to iterate over the matched indices. The order + /// of iteration is always ascending with respect to the matching indices. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// # Panics + /// + /// This panics when `start >= haystack.len() + 1`. + /// + /// # Example + /// + /// Tests which regular expressions match the given haystack: + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); + /// let hay = "foobar"; + /// // We get matches here, but it's probably not intended. + /// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect(); + /// assert_eq!(matches, vec![0, 1]); + /// // No matches because the assertions take the context into account. + /// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect(); + /// assert_eq!(matches, vec![]); + /// ``` + #[inline] + pub fn matches_at(&self, haystack: &str, start: usize) -> SetMatches { + let input = Input::new(haystack).span(start..haystack.len()); + let mut patset = PatternSet::new(self.meta.pattern_len()); + self.meta.which_overlapping_matches(&input, &mut patset); + SetMatches(patset) + } + + /// Returns the same as matches, but starts the search at the given + /// offset and stores the matches into the slice given. + /// + /// The significance of the starting point is that it takes the surrounding + /// context into consideration. For example, the `\A` anchor can only + /// match when `start == 0`. + /// + /// `matches` must have a length that is at least the number of regexes + /// in this set. + /// + /// This method returns true if and only if at least one member of + /// `matches` is true after executing the set against `haystack`. + #[doc(hidden)] + #[inline] + pub fn matches_read_at( + &self, + matches: &mut [bool], + haystack: &str, + start: usize, + ) -> bool { + // This is pretty dumb. We should try to fix this, but the + // regex-automata API doesn't provide a way to store matches in an + // arbitrary &mut [bool]. Thankfully, this API is is doc(hidden) and + // thus not public... But regex-capi currently uses it. We should + // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet + // is in regex-automata, not regex. So maybe we should just accept a + // 'SetMatches', which is basically just a newtype around PatternSet. + let mut patset = PatternSet::new(self.meta.pattern_len()); + let mut input = Input::new(haystack); + input.set_start(start); + self.meta.which_overlapping_matches(&input, &mut patset); + for pid in patset.iter() { + matches[pid] = true; + } + !patset.is_empty() + } + + /// An alias for `matches_read_at` to preserve backward compatibility. + /// + /// The `regex-capi` crate used this method, so to avoid breaking that + /// crate, we continue to export it as an undocumented API. + #[doc(hidden)] + #[inline] + pub fn read_matches_at( + &self, + matches: &mut [bool], + haystack: &str, + start: usize, + ) -> bool { + self.matches_read_at(matches, haystack, start) + } + + /// Returns the total number of regexes in this set. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// assert_eq!(0, RegexSet::empty().len()); + /// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len()); + /// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.meta.pattern_len() + } + + /// Returns `true` if this set contains no regexes. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// assert!(RegexSet::empty().is_empty()); + /// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty()); + /// ``` + #[inline] + pub fn is_empty(&self) -> bool { + self.meta.pattern_len() == 0 + } + + /// Returns the regex patterns that this regex set was constructed from. + /// + /// This function can be used to determine the pattern for a match. The + /// slice returned has exactly as many patterns givens to this regex set, + /// and the order of the slice is the same as the order of the patterns + /// provided to the set. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new(&[ + /// r"\w+", + /// r"\d+", + /// r"\pL+", + /// r"foo", + /// r"bar", + /// r"barfoo", + /// r"foobar", + /// ]).unwrap(); + /// let matches: Vec<_> = set + /// .matches("foobar") + /// .into_iter() + /// .map(|index| &set.patterns()[index]) + /// .collect(); + /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]); + /// ``` + #[inline] + pub fn patterns(&self) -> &[String] { + &self.patterns + } +} + +impl Default for RegexSet { + fn default() -> Self { + RegexSet::empty() + } +} + +/// A set of matches returned by a regex set. +/// +/// Values of this type are constructed by [`RegexSet::matches`]. +#[derive(Clone, Debug)] +pub struct SetMatches(PatternSet); + +impl SetMatches { + /// Whether this set contains any matches. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new(&[ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches("foo@example.com"); + /// assert!(matches.matched_any()); + /// ``` + #[inline] + pub fn matched_any(&self) -> bool { + !self.0.is_empty() + } + + /// Whether the regex at the given index matched. + /// + /// The index for a regex is determined by its insertion order upon the + /// initial construction of a `RegexSet`, starting at `0`. + /// + /// # Panics + /// + /// If `index` is greater than or equal to the number of regexes in the + /// original set that produced these matches. Equivalently, when `index` + /// is greater than or equal to [`SetMatches::len`]. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches("example.com"); + /// assert!(!matches.matched(0)); + /// assert!(matches.matched(1)); + /// ``` + #[inline] + pub fn matched(&self, index: usize) -> bool { + self.0.contains(PatternID::new_unchecked(index)) + } + + /// The total number of regexes in the set that created these matches. + /// + /// **WARNING:** This always returns the same value as [`RegexSet::len`]. + /// In particular, it does *not* return the number of elements yielded by + /// [`SetMatches::iter`]. The only way to determine the total number of + /// matched regexes is to iterate over them. + /// + /// # Example + /// + /// Notice that this method returns the total number of regexes in the + /// original set, and *not* the total number of regexes that matched. + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[a-z]+@[a-z]+\.(com|org|net)", + /// r"[a-z]+\.(com|org|net)", + /// ]).unwrap(); + /// let matches = set.matches("example.com"); + /// // Total number of patterns that matched. + /// assert_eq!(1, matches.iter().count()); + /// // Total number of patterns in the set. + /// assert_eq!(2, matches.len()); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.0.capacity() + } + + /// Returns an iterator over the indices of the regexes that matched. + /// + /// This will always produces matches in ascending order, where the index + /// yielded corresponds to the index of the regex that matched with respect + /// to its position when initially building the set. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[0-9]", + /// r"[a-z]", + /// r"[A-Z]", + /// r"\p{Greek}", + /// ]).unwrap(); + /// let hay = "βa1"; + /// let matches: Vec<_> = set.matches(hay).iter().collect(); + /// assert_eq!(matches, vec![0, 1, 3]); + /// ``` + /// + /// Note that `SetMatches` also implemnets the `IntoIterator` trait, so + /// this method is not always needed. For example: + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new([ + /// r"[0-9]", + /// r"[a-z]", + /// r"[A-Z]", + /// r"\p{Greek}", + /// ]).unwrap(); + /// let hay = "βa1"; + /// let mut matches = vec![]; + /// for index in set.matches(hay) { + /// matches.push(index); + /// } + /// assert_eq!(matches, vec![0, 1, 3]); + /// ``` + #[inline] + pub fn iter(&self) -> SetMatchesIter<'_> { + SetMatchesIter(self.0.iter()) + } +} + +impl IntoIterator for SetMatches { + type IntoIter = SetMatchesIntoIter; + type Item = usize; + + fn into_iter(self) -> Self::IntoIter { + let it = 0..self.0.capacity(); + SetMatchesIntoIter { patset: self.0, it } + } +} + +impl<'a> IntoIterator for &'a SetMatches { + type IntoIter = SetMatchesIter<'a>; + type Item = usize; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +/// An owned iterator over the set of matches from a regex set. +/// +/// This will always produces matches in ascending order of index, where the +/// index corresponds to the index of the regex that matched with respect to +/// its position when initially building the set. +/// +/// This iterator is created by calling `SetMatches::into_iter` via the +/// `IntoIterator` trait. This is automatically done in `for` loops. +/// +/// # Example +/// +/// ``` +/// use regex::RegexSet; +/// +/// let set = RegexSet::new([ +/// r"[0-9]", +/// r"[a-z]", +/// r"[A-Z]", +/// r"\p{Greek}", +/// ]).unwrap(); +/// let hay = "βa1"; +/// let mut matches = vec![]; +/// for index in set.matches(hay) { +/// matches.push(index); +/// } +/// assert_eq!(matches, vec![0, 1, 3]); +/// ``` +#[derive(Debug)] +pub struct SetMatchesIntoIter { + patset: PatternSet, + it: core::ops::Range<usize>, +} + +impl Iterator for SetMatchesIntoIter { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + loop { + let id = self.it.next()?; + if self.patset.contains(PatternID::new_unchecked(id)) { + return Some(id); + } + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.it.size_hint() + } +} + +impl DoubleEndedIterator for SetMatchesIntoIter { + fn next_back(&mut self) -> Option<usize> { + loop { + let id = self.it.next_back()?; + if self.patset.contains(PatternID::new_unchecked(id)) { + return Some(id); + } + } + } +} + +impl core::iter::FusedIterator for SetMatchesIntoIter {} + +/// A borrowed iterator over the set of matches from a regex set. +/// +/// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that +/// created this iterator. +/// +/// This will always produces matches in ascending order, where the index +/// corresponds to the index of the regex that matched with respect to its +/// position when initially building the set. +/// +/// This iterator is created by the [`SetMatches::iter`] method. +#[derive(Clone, Debug)] +pub struct SetMatchesIter<'a>(PatternSetIter<'a>); + +impl<'a> Iterator for SetMatchesIter<'a> { + type Item = usize; + + fn next(&mut self) -> Option<usize> { + self.0.next().map(|pid| pid.as_usize()) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.0.size_hint() + } +} + +impl<'a> DoubleEndedIterator for SetMatchesIter<'a> { + fn next_back(&mut self) -> Option<usize> { + self.0.next_back().map(|pid| pid.as_usize()) + } +} + +impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {} + +impl core::fmt::Debug for RegexSet { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "RegexSet({:?})", self.patterns()) + } +} diff --git a/vendor/regex/src/sparse.rs b/vendor/regex/src/sparse.rs deleted file mode 100644 index 98b7266..0000000 --- a/vendor/regex/src/sparse.rs +++ /dev/null @@ -1,84 +0,0 @@ -use std::fmt; -use std::ops::Deref; -use std::slice; - -/// A sparse set used for representing ordered NFA states. -/// -/// This supports constant time addition and membership testing. Clearing an -/// entire set can also be done in constant time. Iteration yields elements -/// in the order in which they were inserted. -/// -/// The data structure is based on: https://research.swtch.com/sparse -/// Note though that we don't actually use uninitialized memory. We generally -/// reuse allocations, so the initial allocation cost is bareable. However, -/// its other properties listed above are extremely useful. -#[derive(Clone)] -pub struct SparseSet { - /// Dense contains the instruction pointers in the order in which they - /// were inserted. - dense: Vec<usize>, - /// Sparse maps instruction pointers to their location in dense. - /// - /// An instruction pointer is in the set if and only if - /// sparse[ip] < dense.len() && ip == dense[sparse[ip]]. - sparse: Box<[usize]>, -} - -impl SparseSet { - pub fn new(size: usize) -> SparseSet { - SparseSet { - dense: Vec::with_capacity(size), - sparse: vec![0; size].into_boxed_slice(), - } - } - - pub fn len(&self) -> usize { - self.dense.len() - } - - pub fn is_empty(&self) -> bool { - self.dense.is_empty() - } - - pub fn capacity(&self) -> usize { - self.dense.capacity() - } - - pub fn insert(&mut self, value: usize) { - let i = self.len(); - assert!(i < self.capacity()); - self.dense.push(value); - self.sparse[value] = i; - } - - pub fn contains(&self, value: usize) -> bool { - let i = self.sparse[value]; - self.dense.get(i) == Some(&value) - } - - pub fn clear(&mut self) { - self.dense.clear(); - } -} - -impl fmt::Debug for SparseSet { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "SparseSet({:?})", self.dense) - } -} - -impl Deref for SparseSet { - type Target = [usize]; - - fn deref(&self) -> &Self::Target { - &self.dense - } -} - -impl<'a> IntoIterator for &'a SparseSet { - type Item = &'a usize; - type IntoIter = slice::Iter<'a, usize>; - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} diff --git a/vendor/regex/src/testdata/LICENSE b/vendor/regex/src/testdata/LICENSE deleted file mode 100644 index f47dbf4..0000000 --- a/vendor/regex/src/testdata/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -The following license covers testregex.c and all associated test data. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software -without restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, and/or sell copies of the -Software, and to permit persons to whom the Software is furnished to do -so, subject to the following disclaimer: - -THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/regex/src/testdata/README b/vendor/regex/src/testdata/README deleted file mode 100644 index 6efc2da..0000000 --- a/vendor/regex/src/testdata/README +++ /dev/null @@ -1,17 +0,0 @@ -Test data was taken from the Go distribution, which was in turn taken from the -testregex test suite: - - http://www2.research.att.com/~astopen/testregex/testregex.html - -The LICENSE in this directory corresponds to the LICENSE that the data was -released under. - -The tests themselves were modified for RE2/Go. A couple were modified further -by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them. -(Yes, it seems like RE2/Go includes failing test cases.) This may or may not -have been a bad idea, but I think being consistent with an established Regex -library is worth something. - -Note that these files are read by 'scripts/regex-match-tests.py' and turned -into Rust tests found in 'regex_macros/tests/matches.rs'. - diff --git a/vendor/regex/src/utf8.rs b/vendor/regex/src/utf8.rs deleted file mode 100644 index 2dfd2c0..0000000 --- a/vendor/regex/src/utf8.rs +++ /dev/null @@ -1,264 +0,0 @@ -/// A few elementary UTF-8 encoding and decoding functions used by the matching -/// engines. -/// -/// In an ideal world, the matching engines operate on `&str` and we can just -/// lean on the standard library for all our UTF-8 needs. However, to support -/// byte based regexes (that can match on arbitrary bytes which may contain -/// UTF-8), we need to be capable of searching and decoding UTF-8 on a `&[u8]`. -/// The standard library doesn't really recognize this use case, so we have -/// to build it out ourselves. -/// -/// Should this be factored out into a separate crate? It seems independently -/// useful. There are other crates that already exist (e.g., `utf-8`) that have -/// overlapping use cases. Not sure what to do. -use std::char; - -const TAG_CONT: u8 = 0b1000_0000; -const TAG_TWO: u8 = 0b1100_0000; -const TAG_THREE: u8 = 0b1110_0000; -const TAG_FOUR: u8 = 0b1111_0000; - -/// Returns the smallest possible index of the next valid UTF-8 sequence -/// starting after `i`. -pub fn next_utf8(text: &[u8], i: usize) -> usize { - let b = match text.get(i) { - None => return i + 1, - Some(&b) => b, - }; - let inc = if b <= 0x7F { - 1 - } else if b <= 0b110_11111 { - 2 - } else if b <= 0b1110_1111 { - 3 - } else { - 4 - }; - i + inc -} - -/// Decode a single UTF-8 sequence into a single Unicode codepoint from `src`. -/// -/// If no valid UTF-8 sequence could be found, then `None` is returned. -/// Otherwise, the decoded codepoint and the number of bytes read is returned. -/// The number of bytes read (for a valid UTF-8 sequence) is guaranteed to be -/// 1, 2, 3 or 4. -/// -/// Note that a UTF-8 sequence is invalid if it is incorrect UTF-8, encodes a -/// codepoint that is out of range (surrogate codepoints are out of range) or -/// is not the shortest possible UTF-8 sequence for that codepoint. -#[inline] -pub fn decode_utf8(src: &[u8]) -> Option<(char, usize)> { - let b0 = match src.get(0) { - None => return None, - Some(&b) if b <= 0x7F => return Some((b as char, 1)), - Some(&b) => b, - }; - match b0 { - 0b110_00000..=0b110_11111 => { - if src.len() < 2 { - return None; - } - let b1 = src[1]; - if 0b11_000000 & b1 != TAG_CONT { - return None; - } - let cp = ((b0 & !TAG_TWO) as u32) << 6 | ((b1 & !TAG_CONT) as u32); - match cp { - 0x80..=0x7FF => char::from_u32(cp).map(|cp| (cp, 2)), - _ => None, - } - } - 0b1110_0000..=0b1110_1111 => { - if src.len() < 3 { - return None; - } - let (b1, b2) = (src[1], src[2]); - if 0b11_000000 & b1 != TAG_CONT { - return None; - } - if 0b11_000000 & b2 != TAG_CONT { - return None; - } - let cp = ((b0 & !TAG_THREE) as u32) << 12 - | ((b1 & !TAG_CONT) as u32) << 6 - | ((b2 & !TAG_CONT) as u32); - match cp { - // char::from_u32 will disallow surrogate codepoints. - 0x800..=0xFFFF => char::from_u32(cp).map(|cp| (cp, 3)), - _ => None, - } - } - 0b11110_000..=0b11110_111 => { - if src.len() < 4 { - return None; - } - let (b1, b2, b3) = (src[1], src[2], src[3]); - if 0b11_000000 & b1 != TAG_CONT { - return None; - } - if 0b11_000000 & b2 != TAG_CONT { - return None; - } - if 0b11_000000 & b3 != TAG_CONT { - return None; - } - let cp = ((b0 & !TAG_FOUR) as u32) << 18 - | ((b1 & !TAG_CONT) as u32) << 12 - | ((b2 & !TAG_CONT) as u32) << 6 - | ((b3 & !TAG_CONT) as u32); - match cp { - 0x10000..=0x0010_FFFF => char::from_u32(cp).map(|cp| (cp, 4)), - _ => None, - } - } - _ => None, - } -} - -/// Like `decode_utf8`, but decodes the last UTF-8 sequence in `src` instead -/// of the first. -pub fn decode_last_utf8(src: &[u8]) -> Option<(char, usize)> { - if src.is_empty() { - return None; - } - let mut start = src.len() - 1; - if src[start] <= 0x7F { - return Some((src[start] as char, 1)); - } - while start > src.len().saturating_sub(4) { - start -= 1; - if is_start_byte(src[start]) { - break; - } - } - match decode_utf8(&src[start..]) { - None => None, - Some((_, n)) if n < src.len() - start => None, - Some((cp, n)) => Some((cp, n)), - } -} - -fn is_start_byte(b: u8) -> bool { - b & 0b11_000000 != 0b1_0000000 -} - -#[cfg(test)] -mod tests { - use std::str; - - use quickcheck::quickcheck; - - use super::{ - decode_last_utf8, decode_utf8, TAG_CONT, TAG_FOUR, TAG_THREE, TAG_TWO, - }; - - #[test] - fn prop_roundtrip() { - fn p(given_cp: char) -> bool { - let mut tmp = [0; 4]; - let encoded_len = given_cp.encode_utf8(&mut tmp).len(); - let (got_cp, got_len) = decode_utf8(&tmp[..encoded_len]).unwrap(); - encoded_len == got_len && given_cp == got_cp - } - quickcheck(p as fn(char) -> bool) - } - - #[test] - fn prop_roundtrip_last() { - fn p(given_cp: char) -> bool { - let mut tmp = [0; 4]; - let encoded_len = given_cp.encode_utf8(&mut tmp).len(); - let (got_cp, got_len) = - decode_last_utf8(&tmp[..encoded_len]).unwrap(); - encoded_len == got_len && given_cp == got_cp - } - quickcheck(p as fn(char) -> bool) - } - - #[test] - fn prop_encode_matches_std() { - fn p(cp: char) -> bool { - let mut got = [0; 4]; - let n = cp.encode_utf8(&mut got).len(); - let expected = cp.to_string(); - &got[..n] == expected.as_bytes() - } - quickcheck(p as fn(char) -> bool) - } - - #[test] - fn prop_decode_matches_std() { - fn p(given_cp: char) -> bool { - let mut tmp = [0; 4]; - let n = given_cp.encode_utf8(&mut tmp).len(); - let (got_cp, _) = decode_utf8(&tmp[..n]).unwrap(); - let expected_cp = - str::from_utf8(&tmp[..n]).unwrap().chars().next().unwrap(); - got_cp == expected_cp - } - quickcheck(p as fn(char) -> bool) - } - - #[test] - fn prop_decode_last_matches_std() { - fn p(given_cp: char) -> bool { - let mut tmp = [0; 4]; - let n = given_cp.encode_utf8(&mut tmp).len(); - let (got_cp, _) = decode_last_utf8(&tmp[..n]).unwrap(); - let expected_cp = str::from_utf8(&tmp[..n]) - .unwrap() - .chars() - .rev() - .next() - .unwrap(); - got_cp == expected_cp - } - quickcheck(p as fn(char) -> bool) - } - - #[test] - fn reject_invalid() { - // Invalid start byte - assert_eq!(decode_utf8(&[0xFF]), None); - // Surrogate pair - assert_eq!(decode_utf8(&[0xED, 0xA0, 0x81]), None); - // Invalid continuation byte. - assert_eq!(decode_utf8(&[0xD4, 0xC2]), None); - // Bad lengths - assert_eq!(decode_utf8(&[0xC3]), None); // 2 bytes - assert_eq!(decode_utf8(&[0xEF, 0xBF]), None); // 3 bytes - assert_eq!(decode_utf8(&[0xF4, 0x8F, 0xBF]), None); // 4 bytes - // Not a minimal UTF-8 sequence - assert_eq!(decode_utf8(&[TAG_TWO, TAG_CONT | b'a']), None); - assert_eq!(decode_utf8(&[TAG_THREE, TAG_CONT, TAG_CONT | b'a']), None); - assert_eq!( - decode_utf8(&[TAG_FOUR, TAG_CONT, TAG_CONT, TAG_CONT | b'a',]), - None - ); - } - - #[test] - fn reject_invalid_last() { - // Invalid start byte - assert_eq!(decode_last_utf8(&[0xFF]), None); - // Surrogate pair - assert_eq!(decode_last_utf8(&[0xED, 0xA0, 0x81]), None); - // Bad lengths - assert_eq!(decode_last_utf8(&[0xC3]), None); // 2 bytes - assert_eq!(decode_last_utf8(&[0xEF, 0xBF]), None); // 3 bytes - assert_eq!(decode_last_utf8(&[0xF4, 0x8F, 0xBF]), None); // 4 bytes - // Not a minimal UTF-8 sequence - assert_eq!(decode_last_utf8(&[TAG_TWO, TAG_CONT | b'a']), None); - assert_eq!( - decode_last_utf8(&[TAG_THREE, TAG_CONT, TAG_CONT | b'a',]), - None - ); - assert_eq!( - decode_last_utf8( - &[TAG_FOUR, TAG_CONT, TAG_CONT, TAG_CONT | b'a',] - ), - None - ); - } -} diff --git a/vendor/regex/test b/vendor/regex/test index b10564f..48224c6 100755 --- a/vendor/regex/test +++ b/vendor/regex/test @@ -2,14 +2,23 @@ set -e +# cd to the directory containing this crate's Cargo.toml so that we don't need +# to pass --manifest-path to every `cargo` command. +cd "$(dirname "$0")" + # This is a convenience script for running a broad swath of tests across # features. We don't test the complete space, since the complete space is quite # large. Hopefully once we migrate the test suite to better infrastructure # (like regex-automata), we'll be able to test more of the space. -echo "===== DEFAULT FEATURES ===" +echo "===== DEFAULT FEATURES =====" cargo test -echo "===== DOC TESTS ===" +# no-std mode is annoyingly difficult to test. Currently, the integration tests +# don't run. So for now, we just test that library tests run. (There aren't +# many because `regex` is just a wrapper crate.) +cargo test --no-default-features --lib + +echo "===== DOC TESTS =====" cargo test --doc features=( @@ -21,10 +30,17 @@ features=( "std perf-dfa" "std perf-inline" "std perf-literal" + "std perf-dfa-full" + "std perf-onepass" + "std perf-backtrack" ) for f in "${features[@]}"; do - echo "===== FEATURE: $f (default) ===" - cargo test --test default --no-default-features --features "$f" - echo "===== FEATURE: $f (default-bytes) ===" - cargo test --test default-bytes --no-default-features --features "$f" + echo "===== FEATURE: $f =====" + cargo test --test integration --no-default-features --features "$f" done + +# And test the probably-forever-nightly-only 'pattern' feature... +if rustc --version | grep -q nightly; then + echo "===== FEATURE: std,pattern,unicode-perl =====" + cargo test --test integration --no-default-features --features std,pattern,unicode-perl +fi diff --git a/vendor/regex/testdata/README.md b/vendor/regex/testdata/README.md new file mode 100644 index 0000000..c3bc1ac --- /dev/null +++ b/vendor/regex/testdata/README.md @@ -0,0 +1,22 @@ +This directory contains a large suite of regex tests defined in a TOML format. +They are used to drive tests in `tests/lib.rs`, `regex-automata/tests/lib.rs` +and `regex-lite/tests/lib.rs`. + +See the [`regex-test`][regex-test] crate documentation for an explanation of +the format and how it generates tests. + +The basic idea here is that we have many different regex engines but generally +one set of tests. We want to be able to run those tests (or most of them) on +every engine. Prior to `regex 1.9`, we used to do this with a hodge podge soup +of macros and a different test executable for each engine. It overall took a +longer time to compile, was harder to maintain and it made the test definitions +themselves less clear. + +In `regex 1.9`, when we moved over to `regex-automata`, the situation got a lot +worse because of an increase in the number of engines. So I devised an engine +independent format for testing regex patterns and their semantics. + +Note: the naming scheme used in these tests isn't terribly consistent. It would +be great to fix that. + +[regex-test]: https://docs.rs/regex-test diff --git a/vendor/regex/testdata/anchored.toml b/vendor/regex/testdata/anchored.toml new file mode 100644 index 0000000..0f2248d --- /dev/null +++ b/vendor/regex/testdata/anchored.toml @@ -0,0 +1,127 @@ +# These tests are specifically geared toward searches with 'anchored = true'. +# While they are interesting in their own right, they are particularly +# important for testing the one-pass DFA since the one-pass DFA can't work in +# unanchored contexts. +# +# Note that "anchored" in this context does not mean "^". Anchored searches are +# searches whose matches must begin at the start of the search, which may not +# be at the start of the haystack. That's why anchored searches---and there are +# some examples below---can still report multiple matches. This occurs when the +# matches are adjacent to one another. + +[[test]] +name = "greedy" +regex = '(abc)+' +haystack = "abcabcabc" +matches = [ + [[0, 9], [6, 9]], +] +anchored = true + +# When a "earliest" search is used, greediness doesn't really exist because +# matches are reported as soon as they are known. +[[test]] +name = "greedy-earliest" +regex = '(abc)+' +haystack = "abcabcabc" +matches = [ + [[0, 3], [0, 3]], + [[3, 6], [3, 6]], + [[6, 9], [6, 9]], +] +anchored = true +search-kind = "earliest" + +[[test]] +name = "nongreedy" +regex = '(abc)+?' +haystack = "abcabcabc" +matches = [ + [[0, 3], [0, 3]], + [[3, 6], [3, 6]], + [[6, 9], [6, 9]], +] +anchored = true + +# When "all" semantics are used, non-greediness doesn't exist since the longest +# possible match is always taken. +[[test]] +name = "nongreedy-all" +regex = '(abc)+?' +haystack = "abcabcabc" +matches = [ + [[0, 9], [6, 9]], +] +anchored = true +match-kind = "all" + +[[test]] +name = "word-boundary-unicode-01" +regex = '\b\w+\b' +haystack = 'βββ☃' +matches = [[0, 6]] +anchored = true + +[[test]] +name = "word-boundary-nounicode-01" +regex = '\b\w+\b' +haystack = 'abcβ' +matches = [[0, 3]] +anchored = true +unicode = false + +# Tests that '.c' doesn't match 'abc' when performing an anchored search from +# the beginning of the haystack. This test found two different bugs in the +# PikeVM and the meta engine. +[[test]] +name = "no-match-at-start" +regex = '.c' +haystack = 'abc' +matches = [] +anchored = true + +# Like above, but at a non-zero start offset. +[[test]] +name = "no-match-at-start-bounds" +regex = '.c' +haystack = 'aabc' +bounds = [1, 4] +matches = [] +anchored = true + +# This is like no-match-at-start, but hits the "reverse inner" optimization +# inside the meta engine. (no-match-at-start hits the "reverse suffix" +# optimization.) +[[test]] +name = "no-match-at-start-reverse-inner" +regex = '.c[a-z]' +haystack = 'abcz' +matches = [] +anchored = true + +# Like above, but at a non-zero start offset. +[[test]] +name = "no-match-at-start-reverse-inner-bounds" +regex = '.c[a-z]' +haystack = 'aabcz' +bounds = [1, 5] +matches = [] +anchored = true + +# Same as no-match-at-start, but applies to the meta engine's "reverse +# anchored" optimization. +[[test]] +name = "no-match-at-start-reverse-anchored" +regex = '.c[a-z]$' +haystack = 'abcz' +matches = [] +anchored = true + +# Like above, but at a non-zero start offset. +[[test]] +name = "no-match-at-start-reverse-anchored-bounds" +regex = '.c[a-z]$' +haystack = 'aabcz' +bounds = [1, 5] +matches = [] +anchored = true diff --git a/vendor/regex/testdata/bytes.toml b/vendor/regex/testdata/bytes.toml new file mode 100644 index 0000000..346e369 --- /dev/null +++ b/vendor/regex/testdata/bytes.toml @@ -0,0 +1,235 @@ +# These are tests specifically crafted for regexes that can match arbitrary +# bytes. In some cases, we also test the Unicode variant as well, just because +# it's good sense to do so. But also, these tests aren't really about Unicode, +# but whether matches are only reported at valid UTF-8 boundaries. For most +# tests in this entire collection, utf8 = true. But for these tests, we use +# utf8 = false. + +[[test]] +name = "word-boundary-ascii" +regex = ' \b' +haystack = " δ" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-unicode" +regex = ' \b' +haystack = " δ" +matches = [[0, 1]] +unicode = true +utf8 = false + +[[test]] +name = "word-boundary-ascii-not" +regex = ' \B' +haystack = " δ" +matches = [[0, 1]] +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-unicode-not" +regex = ' \B' +haystack = " δ" +matches = [] +unicode = true +utf8 = false + +[[test]] +name = "perl-word-ascii" +regex = '\w+' +haystack = "aδ" +matches = [[0, 1]] +unicode = false +utf8 = false + +[[test]] +name = "perl-word-unicode" +regex = '\w+' +haystack = "aδ" +matches = [[0, 3]] +unicode = true +utf8 = false + +[[test]] +name = "perl-decimal-ascii" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 1], [7, 8]] +unicode = false +utf8 = false + +[[test]] +name = "perl-decimal-unicode" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 8]] +unicode = true +utf8 = false + +[[test]] +name = "perl-whitespace-ascii" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 1]] +unicode = false +utf8 = false + +[[test]] +name = "perl-whitespace-unicode" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 4]] +unicode = true +utf8 = false + +# The first `(.+)` matches two Unicode codepoints, but can't match the 5th +# byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and +# matches. +[[test]] +name = "mixed-dot" +regex = '(.+)(?-u)(.+)' +haystack = '\xCE\x93\xCE\x94\xFF' +matches = [ + [[0, 5], [0, 4], [4, 5]], +] +unescape = true +unicode = true +utf8 = false + +[[test]] +name = "case-one-ascii" +regex = 'a' +haystack = "A" +matches = [[0, 1]] +case-insensitive = true +unicode = false +utf8 = false + +[[test]] +name = "case-one-unicode" +regex = 'a' +haystack = "A" +matches = [[0, 1]] +case-insensitive = true +unicode = true +utf8 = false + +[[test]] +name = "case-class-simple-ascii" +regex = '[a-z]+' +haystack = "AaAaA" +matches = [[0, 5]] +case-insensitive = true +unicode = false +utf8 = false + +[[test]] +name = "case-class-ascii" +regex = '[a-z]+' +haystack = "aA\u212AaA" +matches = [[0, 2], [5, 7]] +case-insensitive = true +unicode = false +utf8 = false + +[[test]] +name = "case-class-unicode" +regex = '[a-z]+' +haystack = "aA\u212AaA" +matches = [[0, 7]] +case-insensitive = true +unicode = true +utf8 = false + +[[test]] +name = "negate-ascii" +regex = '[^a]' +haystack = "δ" +matches = [[0, 1], [1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "negate-unicode" +regex = '[^a]' +haystack = "δ" +matches = [[0, 2]] +unicode = true +utf8 = false + +# When utf8=true, this won't match, because the implicit '.*?' prefix is +# Unicode aware and will refuse to match through invalid UTF-8 bytes. +[[test]] +name = "dotstar-prefix-ascii" +regex = 'a' +haystack = '\xFFa' +matches = [[1, 2]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "dotstar-prefix-unicode" +regex = 'a' +haystack = '\xFFa' +matches = [[1, 2]] +unescape = true +unicode = true +utf8 = false + +[[test]] +name = "null-bytes" +regex = '(?P<cstr>[^\x00]+)\x00' +haystack = 'foo\x00' +matches = [ + [[0, 4], [0, 3]], +] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "invalid-utf8-anchor-100" +regex = '\xCC?^' +haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' +matches = [[0, 0]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "invalid-utf8-anchor-200" +regex = '^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$' +haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' +matches = [[22, 22]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "invalid-utf8-anchor-300" +regex = '^|ddp\xff\xffdddddlQd@\x80' +haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4' +matches = [[0, 0]] +unescape = true +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-ascii-100" +regex = '\Bx\B' +haystack = "áxβ" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "word-boundary-ascii-200" +regex = '\B' +haystack = "0\U0007EF5E" +matches = [[2, 2], [3, 3], [4, 4], [5, 5]] +unicode = false +utf8 = false diff --git a/vendor/regex/testdata/crazy.toml b/vendor/regex/testdata/crazy.toml new file mode 100644 index 0000000..aed46ea --- /dev/null +++ b/vendor/regex/testdata/crazy.toml @@ -0,0 +1,315 @@ +[[test]] +name = "nothing-empty" +regex = [] +haystack = "" +matches = [] + +[[test]] +name = "nothing-something" +regex = [] +haystack = "wat" +matches = [] + +[[test]] +name = "ranges" +regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b' +haystack = "num: 255" +matches = [[5, 8]] + +[[test]] +name = "ranges-not" +regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b' +haystack = "num: 256" +matches = [] + +[[test]] +name = "float1" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "0.1" +matches = [[0, 3]] + +[[test]] +name = "float2" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "0.1.2" +matches = [[0, 3]] +match-limit = 1 + +[[test]] +name = "float3" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "a1.2" +matches = [[1, 4]] + +[[test]] +name = "float4" +regex = '[-+]?[0-9]*\.?[0-9]+' +haystack = "1.a" +matches = [[0, 1]] + +[[test]] +name = "float5" +regex = '^[-+]?[0-9]*\.?[0-9]+$' +haystack = "1.a" +matches = [] + +[[test]] +name = "email" +regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b' +haystack = "mine is jam.slam@gmail.com " +matches = [[8, 26]] + +[[test]] +name = "email-not" +regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b' +haystack = "mine is jam.slam@gmail " +matches = [] + +[[test]] +name = "email-big" +regex = '''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?''' +haystack = "mine is jam.slam@gmail.com " +matches = [[8, 26]] + +[[test]] +name = "date1" +regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$' +haystack = "1900-01-01" +matches = [[0, 10]] +unicode = false + +[[test]] +name = "date2" +regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$' +haystack = "1900-00-01" +matches = [] +unicode = false + +[[test]] +name = "date3" +regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$' +haystack = "1900-13-01" +matches = [] +unicode = false + +[[test]] +name = "start-end-empty" +regex = '^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-rev" +regex = '$^' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-many-1" +regex = '^$^$^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-many-2" +regex = '^^^$$$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "start-end-empty-rep" +regex = '(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "start-end-empty-rep-rev" +regex = '(?:$^)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "neg-class-letter" +regex = '[^ac]' +haystack = "acx" +matches = [[2, 3]] + +[[test]] +name = "neg-class-letter-comma" +regex = '[^a,]' +haystack = "a,x" +matches = [[2, 3]] + +[[test]] +name = "neg-class-letter-space" +regex = '[^a[:space:]]' +haystack = "a x" +matches = [[2, 3]] + +[[test]] +name = "neg-class-comma" +regex = '[^,]' +haystack = ",,x" +matches = [[2, 3]] + +[[test]] +name = "neg-class-space" +regex = '[^[:space:]]' +haystack = " a" +matches = [[1, 2]] + +[[test]] +name = "neg-class-space-comma" +regex = '[^,[:space:]]' +haystack = ", a" +matches = [[2, 3]] + +[[test]] +name = "neg-class-comma-space" +regex = '[^[:space:],]' +haystack = " ,a" +matches = [[2, 3]] + +[[test]] +name = "neg-class-ascii" +regex = '[^[:alpha:]Z]' +haystack = "A1" +matches = [[1, 2]] + +[[test]] +name = "lazy-many-many" +regex = '(?:(?:.*)*?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-many-optional" +regex = '(?:(?:.?)*?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-one-many-many" +regex = '(?:(?:.*)+?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-one-many-optional" +regex = '(?:(?:.?)+?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-range-min-many" +regex = '(?:(?:.*){1,}?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "lazy-range-many" +regex = '(?:(?:.*){1,2}?)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-many-many" +regex = '(?:(?:.*)*)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-many-optional" +regex = '(?:(?:.?)*)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-one-many-many" +regex = '(?:(?:.*)+)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-one-many-optional" +regex = '(?:(?:.?)+)=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-range-min-many" +regex = '(?:(?:.*){1,})=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "greedy-range-many" +regex = '(?:(?:.*){1,2})=' +haystack = "a=b" +matches = [[0, 2]] + +[[test]] +name = "empty1" +regex = '' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "empty2" +regex = '' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty3" +regex = '(?:)' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty4" +regex = '(?:)*' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty5" +regex = '(?:)+' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty6" +regex = '(?:)?' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty7" +regex = '(?:)(?:)' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty8" +regex = '(?:)+|z' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty9" +regex = 'z|(?:)+' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty10" +regex = '(?:)+|b' +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty11" +regex = 'b|(?:)+' +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] diff --git a/vendor/regex/testdata/crlf.toml b/vendor/regex/testdata/crlf.toml new file mode 100644 index 0000000..9e2d376 --- /dev/null +++ b/vendor/regex/testdata/crlf.toml @@ -0,0 +1,117 @@ +# This is a basic test that checks ^ and $ treat \r\n as a single line +# terminator. If ^ and $ only treated \n as a line terminator, then this would +# only match 'xyz' at the end of the haystack. +[[test]] +name = "basic" +regex = '(?mR)^[a-z]+$' +haystack = "abc\r\ndef\r\nxyz" +matches = [[0, 3], [5, 8], [10, 13]] + +# Tests that a CRLF-aware '^$' assertion does not match between CR and LF. +[[test]] +name = "start-end-non-empty" +regex = '(?mR)^$' +haystack = "abc\r\ndef\r\nxyz" +matches = [] + +# Tests that a CRLF-aware '^$' assertion matches the empty string, just like +# a non-CRLF-aware '^$' assertion. +[[test]] +name = "start-end-empty" +regex = '(?mR)^$' +haystack = "" +matches = [[0, 0]] + +# Tests that a CRLF-aware '^$' assertion matches the empty string preceding +# and following a line terminator. +[[test]] +name = "start-end-before-after" +regex = '(?mR)^$' +haystack = "\r\n" +matches = [[0, 0], [2, 2]] + +# Tests that a CRLF-aware '^' assertion does not split a line terminator. +[[test]] +name = "start-no-split" +regex = '(?mR)^' +haystack = "abc\r\ndef\r\nxyz" +matches = [[0, 0], [5, 5], [10, 10]] + +# Same as above, but with adjacent runs of line terminators. +[[test]] +name = "start-no-split-adjacent" +regex = '(?mR)^' +haystack = "\r\n\r\n\r\n" +matches = [[0, 0], [2, 2], [4, 4], [6, 6]] + +# Same as above, but with adjacent runs of just carriage returns. +[[test]] +name = "start-no-split-adjacent-cr" +regex = '(?mR)^' +haystack = "\r\r\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Same as above, but with adjacent runs of just line feeds. +[[test]] +name = "start-no-split-adjacent-lf" +regex = '(?mR)^' +haystack = "\n\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Tests that a CRLF-aware '$' assertion does not split a line terminator. +[[test]] +name = "end-no-split" +regex = '(?mR)$' +haystack = "abc\r\ndef\r\nxyz" +matches = [[3, 3], [8, 8], [13, 13]] + +# Same as above, but with adjacent runs of line terminators. +[[test]] +name = "end-no-split-adjacent" +regex = '(?mR)$' +haystack = "\r\n\r\n\r\n" +matches = [[0, 0], [2, 2], [4, 4], [6, 6]] + +# Same as above, but with adjacent runs of just carriage returns. +[[test]] +name = "end-no-split-adjacent-cr" +regex = '(?mR)$' +haystack = "\r\r\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Same as above, but with adjacent runs of just line feeds. +[[test]] +name = "end-no-split-adjacent-lf" +regex = '(?mR)$' +haystack = "\n\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +# Tests that '.' does not match either \r or \n when CRLF mode is enabled. Note +# that this doesn't require multi-line mode to be enabled. +[[test]] +name = "dot-no-crlf" +regex = '(?R).' +haystack = "\r\n\r\n\r\n" +matches = [] + +# This is a test that caught a bug in the one-pass DFA where it (amazingly) was +# using 'is_end_lf' instead of 'is_end_crlf' here. It was probably a copy & +# paste bug. We insert an empty capture group here because it provokes the meta +# regex engine to first find a match and then trip over a panic because the +# one-pass DFA erroneously says there is no match. +[[test]] +name = "onepass-wrong-crlf-with-capture" +regex = '(?Rm:().$)' +haystack = "ZZ\r" +matches = [[[1, 2], [1, 1]]] + +# This is like onepass-wrong-crlf-with-capture above, except it sets up the +# test so that it can be run by the one-pass DFA directly. (i.e., Make it +# anchored and start the search at the right place.) +[[test]] +name = "onepass-wrong-crlf-anchored" +regex = '(?Rm:.$)' +haystack = "ZZ\r" +matches = [[1, 2]] +anchored = true +bounds = [1, 3] diff --git a/vendor/regex/testdata/earliest.toml b/vendor/regex/testdata/earliest.toml new file mode 100644 index 0000000..9516893 --- /dev/null +++ b/vendor/regex/testdata/earliest.toml @@ -0,0 +1,52 @@ +[[test]] +name = "no-greedy-100" +regex = 'a+' +haystack = "aaa" +matches = [[0, 1], [1, 2], [2, 3]] +search-kind = "earliest" + +[[test]] +name = "no-greedy-200" +regex = 'abc+' +haystack = "zzzabccc" +matches = [[3, 6]] +search-kind = "earliest" + +[[test]] +name = "is-ungreedy" +regex = 'a+?' +haystack = "aaa" +matches = [[0, 1], [1, 2], [2, 3]] +search-kind = "earliest" + +[[test]] +name = "look-start-test" +regex = '^(abc|a)' +haystack = "abc" +matches = [ + [[0, 1], [0, 1]], +] +search-kind = "earliest" + +[[test]] +name = "look-end-test" +regex = '(abc|a)$' +haystack = "abc" +matches = [ + [[0, 3], [0, 3]], +] +search-kind = "earliest" + +[[test]] +name = "no-leftmost-first-100" +regex = 'abc|a' +haystack = "abc" +matches = [[0, 1]] +search-kind = "earliest" + +[[test]] +name = "no-leftmost-first-200" +regex = 'aba|a' +haystack = "aba" +matches = [[0, 1], [2, 3]] +search-kind = "earliest" diff --git a/vendor/regex/testdata/empty.toml b/vendor/regex/testdata/empty.toml new file mode 100644 index 0000000..7dfd802 --- /dev/null +++ b/vendor/regex/testdata/empty.toml @@ -0,0 +1,113 @@ +[[test]] +name = "100" +regex = "|b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "110" +regex = "b|" +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "120" +regex = "|z" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "130" +regex = "z|" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "200" +regex = "|" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "210" +regex = "||" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "220" +regex = "||b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "230" +regex = "b||" +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "240" +regex = "||z" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "300" +regex = "(?:)|b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "310" +regex = "b|(?:)" +haystack = "abc" +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "320" +regex = "(?:|)" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "330" +regex = "(?:|)|z" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "400" +regex = "a(?:)|b" +haystack = "abc" +matches = [[0, 1], [1, 2]] + +[[test]] +name = "500" +regex = "" +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "510" +regex = "" +haystack = "a" +matches = [[0, 0], [1, 1]] + +[[test]] +name = "520" +regex = "" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "600" +regex = '(?:|a)*' +haystack = "aaa" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "610" +regex = '(?:|a)+' +haystack = "aaa" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] diff --git a/vendor/regex/testdata/expensive.toml b/vendor/regex/testdata/expensive.toml new file mode 100644 index 0000000..b70e42f --- /dev/null +++ b/vendor/regex/testdata/expensive.toml @@ -0,0 +1,23 @@ +# This file represent tests that may be expensive to run on some regex engines. +# For example, tests that build a full DFA ahead of time and minimize it can +# take a horrendously long time on regexes that are large (or result in an +# explosion in the number of states). We group these tests together so that +# such engines can simply skip these tests. + +# See: https://github.com/rust-lang/regex/issues/98 +[[test]] +name = "regression-many-repeat-no-stack-overflow" +regex = '^.{1,2500}' +haystack = "a" +matches = [[0, 1]] + +# This test is meant to blow the bounded backtracker's visited capacity. In +# order to do that, we need a somewhat sizeable regex. The purpose of this +# is to make sure there's at least one test that exercises this path in the +# backtracker. All other tests (at time of writing) are small enough that the +# backtracker can handle them fine. +[[test]] +name = "backtrack-blow-visited-capacity" +regex = '\pL{50}' +haystack = "abcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyZZ" +matches = [[0, 50], [50, 100], [100, 150]] diff --git a/vendor/regex/testdata/flags.toml b/vendor/regex/testdata/flags.toml new file mode 100644 index 0000000..30b412c --- /dev/null +++ b/vendor/regex/testdata/flags.toml @@ -0,0 +1,68 @@ +[[test]] +name = "1" +regex = "(?i)abc" +haystack = "ABC" +matches = [[0, 3]] + +[[test]] +name = "2" +regex = "(?i)a(?-i)bc" +haystack = "Abc" +matches = [[0, 3]] + +[[test]] +name = "3" +regex = "(?i)a(?-i)bc" +haystack = "ABC" +matches = [] + +[[test]] +name = "4" +regex = "(?is)a." +haystack = "A\n" +matches = [[0, 2]] + +[[test]] +name = "5" +regex = "(?is)a.(?-is)a." +haystack = "A\nab" +matches = [[0, 4]] + +[[test]] +name = "6" +regex = "(?is)a.(?-is)a." +haystack = "A\na\n" +matches = [] + +[[test]] +name = "7" +regex = "(?is)a.(?-is:a.)?" +haystack = "A\na\n" +matches = [[0, 2]] +match-limit = 1 + +[[test]] +name = "8" +regex = "(?U)a+" +haystack = "aa" +matches = [[0, 1]] +match-limit = 1 + +[[test]] +name = "9" +regex = "(?U)a+?" +haystack = "aa" +matches = [[0, 2]] + +[[test]] +name = "10" +regex = "(?U)(?-U)a+" +haystack = "aa" +matches = [[0, 2]] + +[[test]] +name = "11" +regex = '(?m)(?:^\d+$\n?)+' +haystack = "123\n456\n789" +matches = [[0, 11]] +unicode = false diff --git a/vendor/regex/testdata/fowler/basic.toml b/vendor/regex/testdata/fowler/basic.toml new file mode 100644 index 0000000..92b4e4c --- /dev/null +++ b/vendor/regex/testdata/fowler/basic.toml @@ -0,0 +1,1611 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by 'regex-cli generate fowler'. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[test]] +name = "basic3" +regex = '''abracadabra$''' +haystack = '''abracadabracadabra''' +matches = [[[7, 18]]] +match-limit = 1 + +[[test]] +name = "basic4" +regex = '''a...b''' +haystack = '''abababbb''' +matches = [[[2, 7]]] +match-limit = 1 + +[[test]] +name = "basic5" +regex = '''XXXXXX''' +haystack = '''..XXXXXX''' +matches = [[[2, 8]]] +match-limit = 1 + +[[test]] +name = "basic6" +regex = '''\)''' +haystack = '''()''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic7" +regex = '''a]''' +haystack = '''a]a''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic9" +regex = '''\}''' +haystack = '''}''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic10" +regex = '''\]''' +haystack = ''']''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic12" +regex = ''']''' +haystack = ''']''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic15" +regex = '''^a''' +haystack = '''ax''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic16" +regex = '''\^a''' +haystack = '''a^a''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic17" +regex = '''a\^''' +haystack = '''a^''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic18" +regex = '''a$''' +haystack = '''aa''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic19" +regex = '''a\$''' +haystack = '''a$''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic20" +regex = '''^$''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic21" +regex = '''$^''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic22" +regex = '''a($)''' +haystack = '''aa''' +matches = [[[1, 2], [2, 2]]] +match-limit = 1 + +[[test]] +name = "basic23" +regex = '''a*(^a)''' +haystack = '''aa''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic24" +regex = '''(..)*(...)*''' +haystack = '''a''' +matches = [[[0, 0], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic25" +regex = '''(..)*(...)*''' +haystack = '''abcd''' +matches = [[[0, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic26" +regex = '''(ab|a)(bc|c)''' +haystack = '''abc''' +matches = [[[0, 3], [0, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic27" +regex = '''(ab)c|abc''' +haystack = '''abc''' +matches = [[[0, 3], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic28" +regex = '''a{0}b''' +haystack = '''ab''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic29" +regex = '''(a*)(b?)(b+)b{3}''' +haystack = '''aaabbbbbbb''' +matches = [[[0, 10], [0, 3], [3, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic30" +regex = '''(a*)(b{0,1})(b{1,})b{3}''' +haystack = '''aaabbbbbbb''' +matches = [[[0, 10], [0, 3], [3, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic32" +regex = '''((a|a)|a)''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic33" +regex = '''(a*)(a|aa)''' +haystack = '''aaaa''' +matches = [[[0, 4], [0, 3], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic34" +regex = '''a*(a.|aa)''' +haystack = '''aaaa''' +matches = [[[0, 4], [2, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic35" +regex = '''a(b)|c(d)|a(e)f''' +haystack = '''aef''' +matches = [[[0, 3], [], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic36" +regex = '''(a|b)?.*''' +haystack = '''b''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic37" +regex = '''(a|b)c|a(b|c)''' +haystack = '''ac''' +matches = [[[0, 2], [0, 1], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic38" +regex = '''(a|b)c|a(b|c)''' +haystack = '''ab''' +matches = [[[0, 2], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic39" +regex = '''(a|b)*c|(a|ab)*c''' +haystack = '''abc''' +matches = [[[0, 3], [1, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic40" +regex = '''(a|b)*c|(a|ab)*c''' +haystack = '''xc''' +matches = [[[1, 2], [], []]] +match-limit = 1 + +[[test]] +name = "basic41" +regex = '''(.a|.b).*|.*(.a|.b)''' +haystack = '''xa''' +matches = [[[0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic42" +regex = '''a?(ab|ba)ab''' +haystack = '''abab''' +matches = [[[0, 4], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic43" +regex = '''a?(ac{0}b|ba)ab''' +haystack = '''abab''' +matches = [[[0, 4], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic44" +regex = '''ab|abab''' +haystack = '''abbabab''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic45" +regex = '''aba|bab|bba''' +haystack = '''baaabbbaba''' +matches = [[[5, 8]]] +match-limit = 1 + +[[test]] +name = "basic46" +regex = '''aba|bab''' +haystack = '''baaabbbaba''' +matches = [[[6, 9]]] +match-limit = 1 + +[[test]] +name = "basic47" +regex = '''(aa|aaa)*|(a|aaaaa)''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic48" +regex = '''(a.|.a.)*|(a|.a...)''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic49" +regex = '''ab|a''' +haystack = '''xabc''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic50" +regex = '''ab|a''' +haystack = '''xxabc''' +matches = [[[2, 4]]] +match-limit = 1 + +[[test]] +name = "basic51" +regex = '''(Ab|cD)*''' +haystack = '''aBcD''' +matches = [[[0, 4], [2, 4]]] +match-limit = 1 +anchored = true +case-insensitive = true + +[[test]] +name = "basic52" +regex = '''[^-]''' +haystack = '''--a''' +matches = [[[2, 3]]] +match-limit = 1 + +[[test]] +name = "basic53" +regex = '''[a-]*''' +haystack = '''--a''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic54" +regex = '''[a-m-]*''' +haystack = '''--amoma--''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic55" +regex = ''':::1:::0:|:::1:1:0:''' +haystack = ''':::0:::1:::1:::0:''' +matches = [[[8, 17]]] +match-limit = 1 + +[[test]] +name = "basic56" +regex = ''':::1:::0:|:::1:1:1:''' +haystack = ''':::0:::1:::1:::0:''' +matches = [[[8, 17]]] +match-limit = 1 + +[[test]] +name = "basic57" +regex = '''[[:upper:]]''' +haystack = '''A''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic58" +regex = '''[[:lower:]]+''' +haystack = '''`az{''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic59" +regex = '''[[:upper:]]+''' +haystack = '''@AZ[''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic65" +regex = '''\n''' +haystack = '''\n''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic66" +regex = '''\n''' +haystack = '''\n''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic67" +regex = '''[^a]''' +haystack = '''\n''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic68" +regex = '''\na''' +haystack = '''\na''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic69" +regex = '''(a)(b)(c)''' +haystack = '''abc''' +matches = [[[0, 3], [0, 1], [1, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic70" +regex = '''xxx''' +haystack = '''xxx''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic72" +regex = '''(?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$)''' +haystack = '''feb 6,''' +matches = [[[0, 6]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic74" +regex = '''(?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$)''' +haystack = '''2/7''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic76" +regex = '''(?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$)''' +haystack = '''feb 1,Feb 6''' +matches = [[[5, 11]]] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "basic78" +regex = '''(((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))''' +haystack = '''x''' +matches = [[[0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "basic80" +regex = '''(((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))*''' +haystack = '''xx''' +matches = [[[0, 2], [1, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic81" +regex = '''a?(ab|ba)*''' +haystack = '''ababababababababababababababababababababababababababababababababababababababababa''' +matches = [[[0, 81], [79, 81]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic82" +regex = '''abaa|abbaa|abbbaa|abbbbaa''' +haystack = '''ababbabbbabbbabbbbabbbbaa''' +matches = [[[18, 25]]] +match-limit = 1 + +[[test]] +name = "basic83" +regex = '''abaa|abbaa|abbbaa|abbbbaa''' +haystack = '''ababbabbbabbbabbbbabaa''' +matches = [[[18, 22]]] +match-limit = 1 + +[[test]] +name = "basic84" +regex = '''aaac|aabc|abac|abbc|baac|babc|bbac|bbbc''' +haystack = '''baaabbbabac''' +matches = [[[7, 11]]] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "basic86" +regex = '''.*''' +haystack = '''\x01\x7f''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic87" +regex = '''aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll''' +haystack = '''XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa''' +matches = [[[53, 57]]] +match-limit = 1 + +[[test]] +name = "basic89" +regex = '''a*a*a*a*a*b''' +haystack = '''aaaaaaaaab''' +matches = [[[0, 10]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic90" +regex = '''^''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic91" +regex = '''$''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic92" +regex = '''^$''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic93" +regex = '''^a$''' +haystack = '''a''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic94" +regex = '''abc''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic95" +regex = '''abc''' +haystack = '''xabcy''' +matches = [[[1, 4]]] +match-limit = 1 + +[[test]] +name = "basic96" +regex = '''abc''' +haystack = '''ababc''' +matches = [[[2, 5]]] +match-limit = 1 + +[[test]] +name = "basic97" +regex = '''ab*c''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic98" +regex = '''ab*bc''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic99" +regex = '''ab*bc''' +haystack = '''abbc''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic100" +regex = '''ab*bc''' +haystack = '''abbbbc''' +matches = [[[0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic101" +regex = '''ab+bc''' +haystack = '''abbc''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic102" +regex = '''ab+bc''' +haystack = '''abbbbc''' +matches = [[[0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic103" +regex = '''ab?bc''' +haystack = '''abbc''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic104" +regex = '''ab?bc''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic105" +regex = '''ab?c''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic106" +regex = '''^abc$''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic107" +regex = '''^abc''' +haystack = '''abcc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic108" +regex = '''abc$''' +haystack = '''aabc''' +matches = [[[1, 4]]] +match-limit = 1 + +[[test]] +name = "basic109" +regex = '''^''' +haystack = '''abc''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic110" +regex = '''$''' +haystack = '''abc''' +matches = [[[3, 3]]] +match-limit = 1 + +[[test]] +name = "basic111" +regex = '''a.c''' +haystack = '''abc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic112" +regex = '''a.c''' +haystack = '''axc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic113" +regex = '''a.*c''' +haystack = '''axyzc''' +matches = [[[0, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic114" +regex = '''a[bc]d''' +haystack = '''abd''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic115" +regex = '''a[b-d]e''' +haystack = '''ace''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic116" +regex = '''a[b-d]''' +haystack = '''aac''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic117" +regex = '''a[-b]''' +haystack = '''a-''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic118" +regex = '''a[b-]''' +haystack = '''a-''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic119" +regex = '''a]''' +haystack = '''a]''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic120" +regex = '''a[]]b''' +haystack = '''a]b''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic121" +regex = '''a[^bc]d''' +haystack = '''aed''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic122" +regex = '''a[^-b]c''' +haystack = '''adc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic123" +regex = '''a[^]b]c''' +haystack = '''adc''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic124" +regex = '''ab|cd''' +haystack = '''abc''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic125" +regex = '''ab|cd''' +haystack = '''abcd''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic126" +regex = '''a\(b''' +haystack = '''a(b''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic127" +regex = '''a\(*b''' +haystack = '''ab''' +matches = [[[0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic128" +regex = '''a\(*b''' +haystack = '''a((b''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic129" +regex = '''((a))''' +haystack = '''abc''' +matches = [[[0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic130" +regex = '''(a)b(c)''' +haystack = '''abc''' +matches = [[[0, 3], [0, 1], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic131" +regex = '''a+b+c''' +haystack = '''aabbabc''' +matches = [[[4, 7]]] +match-limit = 1 + +[[test]] +name = "basic132" +regex = '''a*''' +haystack = '''aaa''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic133" +regex = '''(a*)*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic134" +regex = '''(a*)+''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic135" +regex = '''(a*|b)*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic136" +regex = '''(a+|b)*''' +haystack = '''ab''' +matches = [[[0, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic137" +regex = '''(a+|b)+''' +haystack = '''ab''' +matches = [[[0, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic138" +regex = '''(a+|b)?''' +haystack = '''ab''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic139" +regex = '''[^ab]*''' +haystack = '''cde''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic140" +regex = '''(^)*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic141" +regex = '''a*''' +haystack = '''''' +matches = [[[0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic142" +regex = '''([abc])*d''' +haystack = '''abbbcd''' +matches = [[[0, 6], [4, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic143" +regex = '''([abc])*bcd''' +haystack = '''abcd''' +matches = [[[0, 4], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic144" +regex = '''a|b|c|d|e''' +haystack = '''e''' +matches = [[[0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic145" +regex = '''(a|b|c|d|e)f''' +haystack = '''ef''' +matches = [[[0, 2], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic146" +regex = '''((a*|b))*''' +haystack = '''-''' +matches = [[[0, 0], [0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic147" +regex = '''abcd*efg''' +haystack = '''abcdefg''' +matches = [[[0, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic148" +regex = '''ab*''' +haystack = '''xabyabbbz''' +matches = [[[1, 3]]] +match-limit = 1 + +[[test]] +name = "basic149" +regex = '''ab*''' +haystack = '''xayabbbz''' +matches = [[[1, 2]]] +match-limit = 1 + +[[test]] +name = "basic150" +regex = '''(ab|cd)e''' +haystack = '''abcde''' +matches = [[[2, 5], [2, 4]]] +match-limit = 1 + +[[test]] +name = "basic151" +regex = '''[abhgefdc]ij''' +haystack = '''hij''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic152" +regex = '''(a|b)c*d''' +haystack = '''abcd''' +matches = [[[1, 4], [1, 2]]] +match-limit = 1 + +[[test]] +name = "basic153" +regex = '''(ab|ab*)bc''' +haystack = '''abc''' +matches = [[[0, 3], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic154" +regex = '''a([bc]*)c*''' +haystack = '''abc''' +matches = [[[0, 3], [1, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic155" +regex = '''a([bc]*)(c*d)''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 3], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic156" +regex = '''a([bc]+)(c*d)''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 3], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic157" +regex = '''a([bc]*)(c+d)''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 2], [2, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic158" +regex = '''a[bcd]*dcdcde''' +haystack = '''adcdcde''' +matches = [[[0, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic159" +regex = '''(ab|a)b*c''' +haystack = '''abc''' +matches = [[[0, 3], [0, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic160" +regex = '''((a)(b)c)(d)''' +haystack = '''abcd''' +matches = [[[0, 4], [0, 3], [0, 1], [1, 2], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic161" +regex = '''[A-Za-z_][A-Za-z0-9_]*''' +haystack = '''alpha''' +matches = [[[0, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic162" +regex = '''^a(bc+|b[eh])g|.h$''' +haystack = '''abh''' +matches = [[[1, 3], []]] +match-limit = 1 + +[[test]] +name = "basic163" +regex = '''(bc+d$|ef*g.|h?i(j|k))''' +haystack = '''effgz''' +matches = [[[0, 5], [0, 5], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic164" +regex = '''(bc+d$|ef*g.|h?i(j|k))''' +haystack = '''ij''' +matches = [[[0, 2], [0, 2], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic165" +regex = '''(bc+d$|ef*g.|h?i(j|k))''' +haystack = '''reffgz''' +matches = [[[1, 6], [1, 6], []]] +match-limit = 1 + +[[test]] +name = "basic166" +regex = '''(((((((((a)))))))))''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic167" +regex = '''multiple words''' +haystack = '''multiple words yeah''' +matches = [[[0, 14]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic168" +regex = '''(.*)c(.*)''' +haystack = '''abcde''' +matches = [[[0, 5], [0, 2], [3, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic169" +regex = '''abcd''' +haystack = '''abcd''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic170" +regex = '''a(bc)d''' +haystack = '''abcd''' +matches = [[[0, 4], [1, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic171" +regex = '''a[\x01-\x03]?c''' +haystack = '''a\x02c''' +matches = [[[0, 3]]] +match-limit = 1 +anchored = true +unescape = true + +[[test]] +name = "basic172" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Qaddafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic173" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Mo'ammar Gadhafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic174" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Kaddafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic175" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Qadhafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic176" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Gadafi''' +matches = [[[0, 14], [], [10, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic177" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Mu'ammar Qadafi''' +matches = [[[0, 15], [], [11, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic178" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Moamar Gaddafi''' +matches = [[[0, 14], [], [9, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic179" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Mu'ammar Qadhdhafi''' +matches = [[[0, 18], [], [13, 15]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic180" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Khaddafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic181" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Ghaddafy''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic182" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Ghadafi''' +matches = [[[0, 15], [], [11, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic183" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Ghaddafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic184" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muamar Kaddafi''' +matches = [[[0, 14], [], [9, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic185" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Quathafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic186" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Muammar Gheddafi''' +matches = [[[0, 16], [], [11, 13]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic187" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Moammar Khadafy''' +matches = [[[0, 15], [], [11, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic188" +regex = '''M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]''' +haystack = '''Moammar Qudhafi''' +matches = [[[0, 15], [], [10, 12]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic189" +regex = '''a+(b|c)*d+''' +haystack = '''aabcdd''' +matches = [[[0, 6], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic190" +regex = '''^.+$''' +haystack = '''vivi''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic191" +regex = '''^(.+)$''' +haystack = '''vivi''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic192" +regex = '''^([^!.]+).att.com!(.+)$''' +haystack = '''gryphon.att.com!eby''' +matches = [[[0, 19], [0, 7], [16, 19]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic193" +regex = '''^([^!]+!)?([^!]+)$''' +haystack = '''bas''' +matches = [[[0, 3], [], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic194" +regex = '''^([^!]+!)?([^!]+)$''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic195" +regex = '''^([^!]+!)?([^!]+)$''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 4], [4, 7]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic196" +regex = '''^.+!([^!]+!)([^!]+)$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic197" +regex = '''((foo)|(bar))!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3], [], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic198" +regex = '''((foo)|(bar))!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7], [], [4, 7]]] +match-limit = 1 + +[[test]] +name = "basic199" +regex = '''((foo)|(bar))!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3], [0, 3], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic200" +regex = '''((foo)|bar)!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic201" +regex = '''((foo)|bar)!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7], []]] +match-limit = 1 + +[[test]] +name = "basic202" +regex = '''((foo)|bar)!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic203" +regex = '''(foo|(bar))!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic204" +regex = '''(foo|(bar))!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7], [4, 7]]] +match-limit = 1 + +[[test]] +name = "basic205" +regex = '''(foo|(bar))!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic206" +regex = '''(foo|bar)!bas''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic207" +regex = '''(foo|bar)!bas''' +haystack = '''foo!bar!bas''' +matches = [[[4, 11], [4, 7]]] +match-limit = 1 + +[[test]] +name = "basic208" +regex = '''(foo|bar)!bas''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic209" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [0, 11], [], [], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic210" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''bas''' +matches = [[[0, 3], [], [0, 3], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic211" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic212" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [], [], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic213" +regex = '''^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic214" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''bas''' +matches = [[[0, 3], [0, 3], [], [0, 3], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic215" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''bar!bas''' +matches = [[[0, 7], [0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic216" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''foo!bar!bas''' +matches = [[[0, 11], [0, 11], [], [], [4, 8], [8, 11]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic217" +regex = '''^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$''' +haystack = '''foo!bas''' +matches = [[[0, 7], [0, 7], [0, 4], [4, 7], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic218" +regex = '''.*(/XXX).*''' +haystack = '''/XXX''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic219" +regex = '''.*(\\XXX).*''' +haystack = '''\XXX''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic220" +regex = '''\\XXX''' +haystack = '''\XXX''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic221" +regex = '''.*(/000).*''' +haystack = '''/000''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic222" +regex = '''.*(\\000).*''' +haystack = '''\000''' +matches = [[[0, 4], [0, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "basic223" +regex = '''\\000''' +haystack = '''\000''' +matches = [[[0, 4]]] +match-limit = 1 +anchored = true + diff --git a/vendor/regex/testdata/fowler/dat/README b/vendor/regex/testdata/fowler/dat/README new file mode 100644 index 0000000..242a0e6 --- /dev/null +++ b/vendor/regex/testdata/fowler/dat/README @@ -0,0 +1,25 @@ +Test data was taken from the Go distribution, which was in turn taken from the +testregex test suite: + + http://web.archive.org/web/20150925124103/http://www2.research.att.com/~astopen/testregex/testregex.html + +Unfortunately, the original web site now appears dead, but the test data lives +on. + +The LICENSE in this directory corresponds to the LICENSE that the data was +originally released under. + +The tests themselves were modified for RE2/Go (and marked as such). A +couple were modified further by me (Andrew Gallant) and marked with 'Rust'. + +After some number of years, these tests were transformed into a TOML format +using the 'regex-cli generate fowler' command. To re-generate the +TOML files, run the following from the root of this repository: + + regex-cli generate fowler tests/data/fowler tests/data/fowler/dat/*.dat + +This assumes that you have 'regex-cli' installed. See 'regex-cli/README.md' +from the root of the repository for more information. + +This brings the Fowler tests into a more "sensible" structured format in which +other tests can be written such that they aren't write-only. diff --git a/vendor/regex/src/testdata/basic.dat b/vendor/regex/testdata/fowler/dat/basic.dat similarity index 87% rename from vendor/regex/src/testdata/basic.dat rename to vendor/regex/testdata/fowler/dat/basic.dat index 632e1bb..654a72b 100644 --- a/vendor/regex/src/testdata/basic.dat +++ b/vendor/regex/testdata/fowler/dat/basic.dat @@ -48,7 +48,7 @@ E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2) E (a.|.a.)*|(a|.a...) aa (0,2)(0,2) E ab|a xabc (1,3) E ab|a xxabc (2,4) -Ei (?-u)(Ab|cD)* aBcD (0,4)(2,4) +Ei (Ab|cD)* aBcD (0,4)(2,4) BE [^-] --a (2,3) BE [a-]* --a (0,3) BE [a-m-]* --amoma-- (0,4) @@ -68,16 +68,22 @@ BE$ [^a] \n (0,1) BE$ \na \na (0,2) E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3) BE xxx xxx (0,3) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11) -E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) -E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) +#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6) +E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) feb 6, (0,6) Rust +#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3) +E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) 2/7 (0,3) Rust +#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11) +E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) feb 1,Feb 6 (5,11) Rust +#E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) +E (((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) Rust +#E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) +E (((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) Rust E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81) E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25) E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22) E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11) -BE$ .* \x01\x7f (0,2) +#BE$ .* \x01\xff (0,2) +BE$ .* \x01\x7f (0,2) Rust E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57) L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH E a*a*a*a*a*b aaaaaaaaab (0,10) @@ -124,24 +130,20 @@ E ((a)) abc (0,1)(0,1)(0,1) E (a)b(c) abc (0,3)(0,1)(2,3) E a+b+c aabbabc (4,7) E a* aaa (0,3) -#E (a*)* - (0,0)(0,0) -E (a*)* - (0,0)(?,?) RE2/Go +E (a*)* - (0,0)(0,0) E (a*)+ - (0,0)(0,0) -#E (a*|b)* - (0,0)(0,0) -E (a*|b)* - (0,0)(?,?) RE2/Go +E (a*|b)* - (0,0)(0,0) E (a+|b)* ab (0,2)(1,2) E (a+|b)+ ab (0,2)(1,2) E (a+|b)? ab (0,1)(0,1) BE [^ab]* cde (0,3) -#E (^)* - (0,0)(0,0) -E (^)* - (0,0)(?,?) RE2/Go +E (^)* - (0,0)(0,0) BE a* NULL (0,0) E ([abc])*d abbbcd (0,6)(4,5) E ([abc])*bcd abcd (0,4)(0,1) E a|b|c|d|e e (0,1) E (a|b|c|d|e)f ef (0,2)(0,1) -#E ((a*|b))* - (0,0)(0,0)(0,0) -E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go +E ((a*|b))* - (0,0)(0,0)(0,0) BE abcd*efg abcdefg (0,7) BE ab* xabyabbbz (1,3) BE ab* xayabbbz (1,2) diff --git a/vendor/regex/src/testdata/nullsubexpr.dat b/vendor/regex/testdata/fowler/dat/nullsubexpr.dat similarity index 81% rename from vendor/regex/src/testdata/nullsubexpr.dat rename to vendor/regex/testdata/fowler/dat/nullsubexpr.dat index 2e18fbb..a944306 100644 --- a/vendor/regex/src/testdata/nullsubexpr.dat +++ b/vendor/regex/testdata/fowler/dat/nullsubexpr.dat @@ -1,8 +1,7 @@ NOTE null subexpression matches : 2002-06-06 E (a*)* a (0,1)(0,1) -#E SAME x (0,0)(0,0) -E SAME x (0,0)(?,?) RE2/Go +E SAME x (0,0)(0,0) E SAME aaaaaa (0,6)(0,6) E SAME aaaaaax (0,6)(0,6) E (a*)+ a (0,1)(0,1) @@ -19,8 +18,7 @@ E SAME aaaaaa (0,6)(0,6) E SAME aaaaaax (0,6)(0,6) E ([a]*)* a (0,1)(0,1) -#E SAME x (0,0)(0,0) -E SAME x (0,0)(?,?) RE2/Go +E SAME x (0,0)(0,0) E SAME aaaaaa (0,6)(0,6) E SAME aaaaaax (0,6)(0,6) E ([a]*)+ a (0,1)(0,1) @@ -28,8 +26,7 @@ E SAME x (0,0)(0,0) E SAME aaaaaa (0,6)(0,6) E SAME aaaaaax (0,6)(0,6) E ([^b]*)* a (0,1)(0,1) -#E SAME b (0,0)(0,0) -E SAME b (0,0)(?,?) RE2/Go +E SAME b (0,0)(0,0) E SAME aaaaaa (0,6)(0,6) E SAME aaaaaab (0,6)(0,6) E ([ab]*)* a (0,1)(0,1) @@ -41,13 +38,12 @@ E SAME bbbbbb (0,6)(0,6) E SAME aaaabcde (0,5)(0,5) E ([^a]*)* b (0,1)(0,1) E SAME bbbbbb (0,6)(0,6) -#E SAME aaaaaa (0,0)(0,0) -E SAME aaaaaa (0,0)(?,?) RE2/Go +E SAME aaaaaa (0,0)(0,0) E ([^ab]*)* ccccxx (0,6)(0,6) -#E SAME ababab (0,0)(0,0) -E SAME ababab (0,0)(?,?) RE2/Go +E SAME ababab (0,0)(0,0) -E ((z)+|a)* zabcde (0,2)(1,2) +#E ((z)+|a)* zabcde (0,2)(1,2) +E ((z)+|a)* zabcde (0,2)(1,2)(0,1) Rust #{E a+? aaaaaa (0,1) no *? +? mimimal match ops #E (a) aaa (0,1)(0,1) @@ -65,8 +61,7 @@ B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3) B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4) B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3) -#E (a*)*(x) x (0,1)(0,0)(0,1) -E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go +E (a*)*(x) x (0,1)(0,0)(0,1) E (a*)*(x) ax (0,2)(0,1)(1,2) E (a*)*(x) axa (0,2)(0,1)(1,2) diff --git a/vendor/regex/src/testdata/repetition.dat b/vendor/regex/testdata/fowler/dat/repetition.dat similarity index 89% rename from vendor/regex/src/testdata/repetition.dat rename to vendor/regex/testdata/fowler/dat/repetition.dat index 3bb2121..cf0d838 100644 --- a/vendor/regex/src/testdata/repetition.dat +++ b/vendor/regex/testdata/fowler/dat/repetition.dat @@ -84,7 +84,7 @@ E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?) NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02 -# These test a bug in OS X / FreeBSD / NetBSD, and libtree. +# These test a bug in OS X / FreeBSD / NetBSD, and libtree. # Linux/GLIBC gets the {8,} and {8,8} wrong. :HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8) @@ -123,18 +123,24 @@ NOTE additional repetition tests graciously provided by Chris Kuklewicz www.hask # OS X / FreeBSD / NetBSD badly fail many of these, with impossible # results like (0,6)(4,5)(6,6). -:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1) -:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1) +#:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1) Rust +#:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6) +:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1) Rust :HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) :HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) :HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH -:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1) -:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1) +#:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1) Rust +#:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6) +:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1) Rust :HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) :HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) :HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH -:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1) -:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1) +#:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6) +:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1) Rust +#:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6) +:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1) Rust # The above worked on Linux/GLIBC but the following often fail. # They also trip up OS X / FreeBSD / NetBSD: diff --git a/vendor/regex/testdata/fowler/nullsubexpr.toml b/vendor/regex/testdata/fowler/nullsubexpr.toml new file mode 100644 index 0000000..2f1f018 --- /dev/null +++ b/vendor/regex/testdata/fowler/nullsubexpr.toml @@ -0,0 +1,405 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by 'regex-cli generate fowler'. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[test]] +name = "nullsubexpr3" +regex = '''(a*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr4" +regex = '''(a*)*''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr5" +regex = '''(a*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr6" +regex = '''(a*)*''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr7" +regex = '''(a*)+''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr8" +regex = '''(a*)+''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr9" +regex = '''(a*)+''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr10" +regex = '''(a*)+''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr11" +regex = '''(a+)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr12" +regex = '''(a+)*''' +haystack = '''x''' +matches = [[[0, 0], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr13" +regex = '''(a+)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr14" +regex = '''(a+)*''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr15" +regex = '''(a+)+''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr16" +regex = '''(a+)+''' +haystack = '''x''' +matches = [] +match-limit = 1 + +[[test]] +name = "nullsubexpr17" +regex = '''(a+)+''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr18" +regex = '''(a+)+''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr20" +regex = '''([a]*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr21" +regex = '''([a]*)*''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr22" +regex = '''([a]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr23" +regex = '''([a]*)*''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr24" +regex = '''([a]*)+''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr25" +regex = '''([a]*)+''' +haystack = '''x''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr26" +regex = '''([a]*)+''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr27" +regex = '''([a]*)+''' +haystack = '''aaaaaax''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr28" +regex = '''([^b]*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr29" +regex = '''([^b]*)*''' +haystack = '''b''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr30" +regex = '''([^b]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr31" +regex = '''([^b]*)*''' +haystack = '''aaaaaab''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr32" +regex = '''([ab]*)*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr33" +regex = '''([ab]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr34" +regex = '''([ab]*)*''' +haystack = '''ababab''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr35" +regex = '''([ab]*)*''' +haystack = '''bababa''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr36" +regex = '''([ab]*)*''' +haystack = '''b''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr37" +regex = '''([ab]*)*''' +haystack = '''bbbbbb''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr38" +regex = '''([ab]*)*''' +haystack = '''aaaabcde''' +matches = [[[0, 5], [0, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr39" +regex = '''([^a]*)*''' +haystack = '''b''' +matches = [[[0, 1], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr40" +regex = '''([^a]*)*''' +haystack = '''bbbbbb''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr41" +regex = '''([^a]*)*''' +haystack = '''aaaaaa''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr42" +regex = '''([^ab]*)*''' +haystack = '''ccccxx''' +matches = [[[0, 6], [0, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr43" +regex = '''([^ab]*)*''' +haystack = '''ababab''' +matches = [[[0, 0], [0, 0]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "nullsubexpr46" +regex = '''((z)+|a)*''' +haystack = '''zabcde''' +matches = [[[0, 2], [1, 2], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr64" +regex = '''(a*)*(x)''' +haystack = '''x''' +matches = [[[0, 1], [0, 0], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr65" +regex = '''(a*)*(x)''' +haystack = '''ax''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr66" +regex = '''(a*)*(x)''' +haystack = '''axa''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr68" +regex = '''(a*)+(x)''' +haystack = '''x''' +matches = [[[0, 1], [0, 0], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr69" +regex = '''(a*)+(x)''' +haystack = '''ax''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr70" +regex = '''(a*)+(x)''' +haystack = '''axa''' +matches = [[[0, 2], [0, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr72" +regex = '''(a*){2}(x)''' +haystack = '''x''' +matches = [[[0, 1], [0, 0], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr73" +regex = '''(a*){2}(x)''' +haystack = '''ax''' +matches = [[[0, 2], [1, 1], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "nullsubexpr74" +regex = '''(a*){2}(x)''' +haystack = '''axa''' +matches = [[[0, 2], [1, 1], [1, 2]]] +match-limit = 1 +anchored = true + diff --git a/vendor/regex/testdata/fowler/repetition.toml b/vendor/regex/testdata/fowler/repetition.toml new file mode 100644 index 0000000..d6a7112 --- /dev/null +++ b/vendor/regex/testdata/fowler/repetition.toml @@ -0,0 +1,746 @@ +# !!! DO NOT EDIT !!! +# Automatically generated by 'regex-cli generate fowler'. +# Numbers in the test names correspond to the line number of the test from +# the original dat file. + +[[test]] +name = "repetition10" +regex = '''((..)|(.))''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition11" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition12" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition14" +regex = '''((..)|(.)){1}''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition15" +regex = '''((..)|(.)){2}''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition16" +regex = '''((..)|(.)){3}''' +haystack = '''''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition18" +regex = '''((..)|(.))*''' +haystack = '''''' +matches = [[[0, 0], [], [], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition20" +regex = '''((..)|(.))''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition21" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition22" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition24" +regex = '''((..)|(.)){1}''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition25" +regex = '''((..)|(.)){2}''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition26" +regex = '''((..)|(.)){3}''' +haystack = '''a''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition28" +regex = '''((..)|(.))*''' +haystack = '''a''' +matches = [[[0, 1], [0, 1], [], [0, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition30" +regex = '''((..)|(.))''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition31" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aa''' +matches = [[[0, 2], [0, 1], [], [0, 1], [1, 2], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition32" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aa''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition34" +regex = '''((..)|(.)){1}''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition35" +regex = '''((..)|(.)){2}''' +haystack = '''aa''' +matches = [[[0, 2], [1, 2], [], [1, 2]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition36" +regex = '''((..)|(.)){3}''' +haystack = '''aa''' +matches = [] +match-limit = 1 + +[[test]] +name = "repetition38" +regex = '''((..)|(.))*''' +haystack = '''aa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition40" +regex = '''((..)|(.))''' +haystack = '''aaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition41" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaa''' +matches = [[[0, 3], [0, 2], [0, 2], [], [2, 3], [], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition42" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaa''' +matches = [[[0, 3], [0, 1], [], [0, 1], [1, 2], [], [1, 2], [2, 3], [], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition44" +regex = '''((..)|(.)){1}''' +haystack = '''aaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition46" +regex = '''((..)|(.)){2}''' +haystack = '''aaa''' +matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition47" +regex = '''((..)|(.)){3}''' +haystack = '''aaa''' +matches = [[[0, 3], [2, 3], [], [2, 3]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition50" +regex = '''((..)|(.))*''' +haystack = '''aaa''' +matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition52" +regex = '''((..)|(.))''' +haystack = '''aaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition53" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition54" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 3], [], [2, 3], [3, 4], [], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition56" +regex = '''((..)|(.)){1}''' +haystack = '''aaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition57" +regex = '''((..)|(.)){2}''' +haystack = '''aaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition59" +regex = '''((..)|(.)){3}''' +haystack = '''aaaa''' +matches = [[[0, 4], [3, 4], [0, 2], [3, 4]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition61" +regex = '''((..)|(.))*''' +haystack = '''aaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition63" +regex = '''((..)|(.))''' +haystack = '''aaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition64" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition65" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaaaa''' +matches = [[[0, 5], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 5], [], [4, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition67" +regex = '''((..)|(.)){1}''' +haystack = '''aaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition68" +regex = '''((..)|(.)){2}''' +haystack = '''aaaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition70" +regex = '''((..)|(.)){3}''' +haystack = '''aaaaa''' +matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition73" +regex = '''((..)|(.))*''' +haystack = '''aaaaa''' +matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition75" +regex = '''((..)|(.))''' +haystack = '''aaaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition76" +regex = '''((..)|(.))((..)|(.))''' +haystack = '''aaaaaa''' +matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition77" +regex = '''((..)|(.))((..)|(.))((..)|(.))''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 6], [4, 6], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition79" +regex = '''((..)|(.)){1}''' +haystack = '''aaaaaa''' +matches = [[[0, 2], [0, 2], [0, 2], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition80" +regex = '''((..)|(.)){2}''' +haystack = '''aaaaaa''' +matches = [[[0, 4], [2, 4], [2, 4], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition81" +regex = '''((..)|(.)){3}''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [4, 6], [4, 6], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition83" +regex = '''((..)|(.))*''' +haystack = '''aaaaaa''' +matches = [[[0, 6], [4, 6], [4, 6], []]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive90" +regex = '''X(.?){0,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive91" +regex = '''X(.?){1,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive92" +regex = '''X(.?){2,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive93" +regex = '''X(.?){3,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive94" +regex = '''X(.?){4,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive95" +regex = '''X(.?){5,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive96" +regex = '''X(.?){6,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive97" +regex = '''X(.?){7,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [7, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive98" +regex = '''X(.?){8,}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive100" +regex = '''X(.?){0,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive102" +regex = '''X(.?){1,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive104" +regex = '''X(.?){2,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive106" +regex = '''X(.?){3,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive108" +regex = '''X(.?){4,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive110" +regex = '''X(.?){5,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive112" +regex = '''X(.?){6,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive114" +regex = '''X(.?){7,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive115" +regex = '''X(.?){8,8}Y''' +haystack = '''X1234567Y''' +matches = [[[0, 9], [8, 8]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive127" +regex = '''(a|ab|c|bcd){0,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive129" +regex = '''(a|ab|c|bcd){1,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive130" +regex = '''(a|ab|c|bcd){2,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive131" +regex = '''(a|ab|c|bcd){3,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive132" +regex = '''(a|ab|c|bcd){4,}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive134" +regex = '''(a|ab|c|bcd){0,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive136" +regex = '''(a|ab|c|bcd){1,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive137" +regex = '''(a|ab|c|bcd){2,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive138" +regex = '''(a|ab|c|bcd){3,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [3, 6], [6, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive139" +regex = '''(a|ab|c|bcd){4,10}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive141" +regex = '''(a|ab|c|bcd)*(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by Rust regex project. +[[test]] +name = "repetition-expensive143" +regex = '''(a|ab|c|bcd)+(d*)''' +haystack = '''ababcd''' +matches = [[[0, 1], [0, 1], [1, 1]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive149" +regex = '''(ab|a|c|bcd){0,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive151" +regex = '''(ab|a|c|bcd){1,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive153" +regex = '''(ab|a|c|bcd){2,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive155" +regex = '''(ab|a|c|bcd){3,}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive156" +regex = '''(ab|a|c|bcd){4,}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive158" +regex = '''(ab|a|c|bcd){0,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive160" +regex = '''(ab|a|c|bcd){1,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive162" +regex = '''(ab|a|c|bcd){2,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive164" +regex = '''(ab|a|c|bcd){3,10}(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +[[test]] +name = "repetition-expensive165" +regex = '''(ab|a|c|bcd){4,10}(d*)''' +haystack = '''ababcd''' +matches = [] +match-limit = 1 + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive167" +regex = '''(ab|a|c|bcd)*(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + +# Test added by RE2/Go project. +[[test]] +name = "repetition-expensive169" +regex = '''(ab|a|c|bcd)+(d*)''' +haystack = '''ababcd''' +matches = [[[0, 6], [4, 5], [5, 6]]] +match-limit = 1 +anchored = true + diff --git a/vendor/regex/testdata/iter.toml b/vendor/regex/testdata/iter.toml new file mode 100644 index 0000000..329b9f0 --- /dev/null +++ b/vendor/regex/testdata/iter.toml @@ -0,0 +1,143 @@ +[[test]] +name = "1" +regex = "a" +haystack = "aaa" +matches = [[0, 1], [1, 2], [2, 3]] + +[[test]] +name = "2" +regex = "a" +haystack = "aba" +matches = [[0, 1], [2, 3]] + +[[test]] +name = "empty1" +regex = '' +haystack = '' +matches = [[0, 0]] + +[[test]] +name = "empty2" +regex = '' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty3" +regex = '(?:)' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty4" +regex = '(?:)*' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty5" +regex = '(?:)+' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty6" +regex = '(?:)?' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty7" +regex = '(?:)(?:)' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty8" +regex = '(?:)+|z' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty9" +regex = 'z|(?:)+' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty10" +regex = '(?:)+|b' +haystack = 'abc' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] + +[[test]] +name = "empty11" +regex = 'b|(?:)+' +haystack = 'abc' +matches = [[0, 0], [1, 2], [3, 3]] + +[[test]] +name = "start1" +regex = "^a" +haystack = "a" +matches = [[0, 1]] + +[[test]] +name = "start2" +regex = "^a" +haystack = "aa" +matches = [[0, 1]] + +[[test]] +name = "anchored1" +regex = "a" +haystack = "a" +matches = [[0, 1]] +anchored = true + +# This test is pretty subtle. It demonstrates the crucial difference between +# '^a' and 'a' compiled in 'anchored' mode. The former regex exclusively +# matches at the start of a haystack and nowhere else. The latter regex has +# no such restriction, but its automaton is constructed such that it lacks a +# `.*?` prefix. So it can actually produce matches at multiple locations. +# The anchored3 test drives this point home. +[[test]] +name = "anchored2" +regex = "a" +haystack = "aa" +matches = [[0, 1], [1, 2]] +anchored = true + +# Unlikely anchored2, this test stops matching anything after it sees `b` +# since it lacks a `.*?` prefix. Since it is looking for 'a' but sees 'b', it +# determines that there are no remaining matches. +[[test]] +name = "anchored3" +regex = "a" +haystack = "aaba" +matches = [[0, 1], [1, 2]] +anchored = true + +[[test]] +name = "nonempty-followedby-empty" +regex = 'abc|.*?' +haystack = "abczzz" +matches = [[0, 3], [4, 4], [5, 5], [6, 6]] + +[[test]] +name = "nonempty-followedby-oneempty" +regex = 'abc|.*?' +haystack = "abcz" +matches = [[0, 3], [4, 4]] + +[[test]] +name = "nonempty-followedby-onemixed" +regex = 'abc|.*?' +haystack = "abczabc" +matches = [[0, 3], [4, 7]] + +[[test]] +name = "nonempty-followedby-twomixed" +regex = 'abc|.*?' +haystack = "abczzabc" +matches = [[0, 3], [4, 4], [5, 8]] diff --git a/vendor/regex/testdata/leftmost-all.toml b/vendor/regex/testdata/leftmost-all.toml new file mode 100644 index 0000000..e3fd950 --- /dev/null +++ b/vendor/regex/testdata/leftmost-all.toml @@ -0,0 +1,25 @@ +[[test]] +name = "alt" +regex = 'foo|foobar' +haystack = "foobar" +matches = [[0, 6]] +match-kind = "all" +search-kind = "leftmost" + +[[test]] +name = "multi" +regex = ['foo', 'foobar'] +haystack = "foobar" +matches = [ + { id = 1, span = [0, 6] }, +] +match-kind = "all" +search-kind = "leftmost" + +[[test]] +name = "dotall" +regex = '(?s:.)' +haystack = "foobar" +matches = [[5, 6]] +match-kind = "all" +search-kind = "leftmost" diff --git a/vendor/regex/testdata/line-terminator.toml b/vendor/regex/testdata/line-terminator.toml new file mode 100644 index 0000000..a398daf --- /dev/null +++ b/vendor/regex/testdata/line-terminator.toml @@ -0,0 +1,109 @@ +# This tests that we can switch the line terminator to the NUL byte. +[[test]] +name = "nul" +regex = '(?m)^[a-z]+$' +haystack = '\x00abc\x00' +matches = [[1, 4]] +unescape = true +line-terminator = '\x00' + +# This tests that '.' will not match the configured line terminator, but will +# match \n. +[[test]] +name = "dot-changes-with-line-terminator" +regex = '.' +haystack = '\x00\n' +matches = [[1, 2]] +unescape = true +line-terminator = '\x00' + +# This tests that when we switch the line terminator, \n is no longer +# recognized as the terminator. +[[test]] +name = "not-line-feed" +regex = '(?m)^[a-z]+$' +haystack = '\nabc\n' +matches = [] +unescape = true +line-terminator = '\x00' + +# This tests that we can set the line terminator to a non-ASCII byte and have +# it behave as expected. +[[test]] +name = "non-ascii" +regex = '(?m)^[a-z]+$' +haystack = '\xFFabc\xFF' +matches = [[1, 4]] +unescape = true +line-terminator = '\xFF' +utf8 = false + +# This tests a tricky case where the line terminator is set to \r. This ensures +# that the StartLF look-behind assertion is tracked when computing the start +# state. +[[test]] +name = "carriage" +regex = '(?m)^[a-z]+' +haystack = 'ABC\rabc' +matches = [[4, 7]] +bounds = [4, 7] +unescape = true +line-terminator = '\r' + +# This tests that we can set the line terminator to a byte corresponding to a +# word character, and things work as expected. +[[test]] +name = "word-byte" +regex = '(?m)^[a-z]+$' +haystack = 'ZabcZ' +matches = [[1, 4]] +unescape = true +line-terminator = 'Z' + +# This tests that we can set the line terminator to a byte corresponding to a +# non-word character, and things work as expected. +[[test]] +name = "non-word-byte" +regex = '(?m)^[a-z]+$' +haystack = '%abc%' +matches = [[1, 4]] +unescape = true +line-terminator = '%' + +# This combines "set line terminator to a word byte" with a word boundary +# assertion, which should result in no match even though ^/$ matches. +[[test]] +name = "word-boundary" +regex = '(?m)^\b[a-z]+\b$' +haystack = 'ZabcZ' +matches = [] +unescape = true +line-terminator = 'Z' + +# Like 'word-boundary', but does an anchored search at the point where ^ +# matches, but where \b should not. +[[test]] +name = "word-boundary-at" +regex = '(?m)^\b[a-z]+\b$' +haystack = 'ZabcZ' +matches = [] +bounds = [1, 4] +anchored = true +unescape = true +line-terminator = 'Z' + +# Like 'word-boundary-at', but flips the word boundary to a negation. This +# in particular tests a tricky case in DFA engines, where they must consider +# explicitly that a starting configuration from a custom line terminator may +# also required setting the "is from word byte" flag on a state. Otherwise, +# it's treated as "not from a word byte," which would result in \B not matching +# here when it should. +[[test]] +name = "not-word-boundary-at" +regex = '(?m)^\B[a-z]+\B$' +haystack = 'ZabcZ' +matches = [[1, 4]] +bounds = [1, 4] +anchored = true +unescape = true +line-terminator = 'Z' diff --git a/vendor/regex/testdata/misc.toml b/vendor/regex/testdata/misc.toml new file mode 100644 index 0000000..c65531f --- /dev/null +++ b/vendor/regex/testdata/misc.toml @@ -0,0 +1,99 @@ +[[test]] +name = "ascii-literal" +regex = "a" +haystack = "a" +matches = [[0, 1]] + +[[test]] +name = "ascii-literal-not" +regex = "a" +haystack = "z" +matches = [] + +[[test]] +name = "ascii-literal-anchored" +regex = "a" +haystack = "a" +matches = [[0, 1]] +anchored = true + +[[test]] +name = "ascii-literal-anchored-not" +regex = "a" +haystack = "z" +matches = [] +anchored = true + +[[test]] +name = "anchor-start-end-line" +regex = '(?m)^bar$' +haystack = "foo\nbar\nbaz" +matches = [[4, 7]] + +[[test]] +name = "prefix-literal-match" +regex = '^abc' +haystack = "abc" +matches = [[0, 3]] + +[[test]] +name = "prefix-literal-match-ascii" +regex = '^abc' +haystack = "abc" +matches = [[0, 3]] +unicode = false +utf8 = false + +[[test]] +name = "prefix-literal-no-match" +regex = '^abc' +haystack = "zabc" +matches = [] + +[[test]] +name = "one-literal-edge" +regex = 'abc' +haystack = "xxxxxab" +matches = [] + +[[test]] +name = "terminates" +regex = 'a$' +haystack = "a" +matches = [[0, 1]] + +[[test]] +name = "suffix-100" +regex = '.*abcd' +haystack = "abcd" +matches = [[0, 4]] + +[[test]] +name = "suffix-200" +regex = '.*(?:abcd)+' +haystack = "abcd" +matches = [[0, 4]] + +[[test]] +name = "suffix-300" +regex = '.*(?:abcd)+' +haystack = "abcdabcd" +matches = [[0, 8]] + +[[test]] +name = "suffix-400" +regex = '.*(?:abcd)+' +haystack = "abcdxabcd" +matches = [[0, 9]] + +[[test]] +name = "suffix-500" +regex = '.*x(?:abcd)+' +haystack = "abcdxabcd" +matches = [[0, 9]] + +[[test]] +name = "suffix-600" +regex = '[^abcd]*x(?:abcd)+' +haystack = "abcdxabcd" +matches = [[4, 9]] diff --git a/vendor/regex/testdata/multiline.toml b/vendor/regex/testdata/multiline.toml new file mode 100644 index 0000000..3acc901 --- /dev/null +++ b/vendor/regex/testdata/multiline.toml @@ -0,0 +1,845 @@ +[[test]] +name = "basic1" +regex = '(?m)^[a-z]+$' +haystack = "abc\ndef\nxyz" +matches = [[0, 3], [4, 7], [8, 11]] + +[[test]] +name = "basic1-crlf" +regex = '(?Rm)^[a-z]+$' +haystack = "abc\ndef\nxyz" +matches = [[0, 3], [4, 7], [8, 11]] + +[[test]] +name = "basic1-crlf-cr" +regex = '(?Rm)^[a-z]+$' +haystack = "abc\rdef\rxyz" +matches = [[0, 3], [4, 7], [8, 11]] + +[[test]] +name = "basic2" +regex = '(?m)^$' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic2-crlf" +regex = '(?Rm)^$' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic2-crlf-cr" +regex = '(?Rm)^$' +haystack = "abc\rdef\rxyz" +matches = [] + +[[test]] +name = "basic3" +regex = '(?m)^' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [4, 4], [8, 8]] + +[[test]] +name = "basic3-crlf" +regex = '(?Rm)^' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [4, 4], [8, 8]] + +[[test]] +name = "basic3-crlf-cr" +regex = '(?Rm)^' +haystack = "abc\rdef\rxyz" +matches = [[0, 0], [4, 4], [8, 8]] + +[[test]] +name = "basic4" +regex = '(?m)$' +haystack = "abc\ndef\nxyz" +matches = [[3, 3], [7, 7], [11, 11]] + +[[test]] +name = "basic4-crlf" +regex = '(?Rm)$' +haystack = "abc\ndef\nxyz" +matches = [[3, 3], [7, 7], [11, 11]] + +[[test]] +name = "basic4-crlf-cr" +regex = '(?Rm)$' +haystack = "abc\rdef\rxyz" +matches = [[3, 3], [7, 7], [11, 11]] + +[[test]] +name = "basic5" +regex = '(?m)^[a-z]' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "basic5-crlf" +regex = '(?Rm)^[a-z]' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "basic5-crlf-cr" +regex = '(?Rm)^[a-z]' +haystack = "abc\rdef\rxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "basic6" +regex = '(?m)[a-z]^' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic6-crlf" +regex = '(?Rm)[a-z]^' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic6-crlf-cr" +regex = '(?Rm)[a-z]^' +haystack = "abc\rdef\rxyz" +matches = [] + +[[test]] +name = "basic7" +regex = '(?m)[a-z]$' +haystack = "abc\ndef\nxyz" +matches = [[2, 3], [6, 7], [10, 11]] + +[[test]] +name = "basic7-crlf" +regex = '(?Rm)[a-z]$' +haystack = "abc\ndef\nxyz" +matches = [[2, 3], [6, 7], [10, 11]] + +[[test]] +name = "basic7-crlf-cr" +regex = '(?Rm)[a-z]$' +haystack = "abc\rdef\rxyz" +matches = [[2, 3], [6, 7], [10, 11]] + +[[test]] +name = "basic8" +regex = '(?m)$[a-z]' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic8-crlf" +regex = '(?Rm)$[a-z]' +haystack = "abc\ndef\nxyz" +matches = [] + +[[test]] +name = "basic8-crlf-cr" +regex = '(?Rm)$[a-z]' +haystack = "abc\rdef\rxyz" +matches = [] + +[[test]] +name = "basic9" +regex = '(?m)^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "basic9-crlf" +regex = '(?Rm)^$' +haystack = "" +matches = [[0, 0]] + +[[test]] +name = "repeat1" +regex = '(?m)(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-crlf" +regex = '(?Rm)(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-crlf-cr" +regex = '(?Rm)(?:^$)*' +haystack = "a\rb\rc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-no-multi" +regex = '(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-no-multi-crlf" +regex = '(?R)(?:^$)*' +haystack = "a\nb\nc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat1-no-multi-crlf-cr" +regex = '(?R)(?:^$)*' +haystack = "a\rb\rc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +[[test]] +name = "repeat2" +regex = '(?m)(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat2-crlf" +regex = '(?Rm)(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat2-crlf-cr" +regex = '(?Rm)(?:^|a)+' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat2-no-multi" +regex = '(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat2-no-multi-crlf" +regex = '(?R)(?:^|a)+' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat2-no-multi-crlf-cr" +regex = '(?R)(?:^|a)+' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat3" +regex = '(?m)(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat3-crlf" +regex = '(?Rm)(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat3-crlf-cr" +regex = '(?Rm)(?:^|a)*' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat3-no-multi" +regex = '(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat3-no-multi-crlf" +regex = '(?R)(?:^|a)*' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat3-no-multi-crlf-cr" +regex = '(?R)(?:^|a)*' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat4" +regex = '(?m)(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat4-crlf" +regex = '(?Rm)(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat4-crlf-cr" +regex = '(?Rm)(?:^|a+)' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat4-no-multi" +regex = '(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat4-no-multi-crlf" +regex = '(?R)(?:^|a+)' +haystack = "a\naaa\n" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat4-no-multi-crlf-cr" +regex = '(?R)(?:^|a+)' +haystack = "a\raaa\r" +matches = [[0, 0], [2, 5]] + +[[test]] +name = "repeat5" +regex = '(?m)(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat5-crlf" +regex = '(?Rm)(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat5-crlf-cr" +regex = '(?Rm)(?:^|a*)' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]] + +[[test]] +name = "repeat5-no-multi" +regex = '(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat5-no-multi-crlf" +regex = '(?R)(?:^|a*)' +haystack = "a\naaa\n" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat5-no-multi-crlf-cr" +regex = '(?R)(?:^|a*)' +haystack = "a\raaa\r" +matches = [[0, 0], [1, 1], [2, 5], [6, 6]] + +[[test]] +name = "repeat6" +regex = '(?m)(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "repeat6-crlf" +regex = '(?Rm)(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "repeat6-crlf-cr" +regex = '(?Rm)(?:^[a-z])+' +haystack = "abc\rdef\rxyz" +matches = [[0, 1], [4, 5], [8, 9]] + +[[test]] +name = "repeat6-no-multi" +regex = '(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1]] + +[[test]] +name = "repeat6-no-multi-crlf" +regex = '(?R)(?:^[a-z])+' +haystack = "abc\ndef\nxyz" +matches = [[0, 1]] + +[[test]] +name = "repeat6-no-multi-crlf-cr" +regex = '(?R)(?:^[a-z])+' +haystack = "abc\rdef\rxyz" +matches = [[0, 1]] + +[[test]] +name = "repeat7" +regex = '(?m)(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat7-crlf" +regex = '(?Rm)(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat7-crlf-cr" +regex = '(?Rm)(?:^[a-z]{3}\r?)+' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat7-no-multi" +regex = '(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 4]] + +[[test]] +name = "repeat7-no-multi-crlf" +regex = '(?R)(?:^[a-z]{3}\n?)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 4]] + +[[test]] +name = "repeat7-no-multi-crlf-cr" +regex = '(?R)(?:^[a-z]{3}\r?)+' +haystack = "abc\rdef\rxyz" +matches = [[0, 4]] + +[[test]] +name = "repeat8" +regex = '(?m)(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat8-crlf" +regex = '(?Rm)(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat8-crlf-cr" +regex = '(?Rm)(?:^[a-z]{3}\r?)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat8-no-multi" +regex = '(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]] + +[[test]] +name = "repeat8-no-multi-crlf" +regex = '(?R)(?:^[a-z]{3}\n?)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]] + +[[test]] +name = "repeat8-no-multi-crlf-cr" +regex = '(?R)(?:^[a-z]{3}\r?)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]] + +[[test]] +name = "repeat9" +regex = '(?m)(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat9-crlf" +regex = '(?Rm)(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat9-crlf-cr" +regex = '(?Rm)(?:\r?[a-z]{3}$)+' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat9-no-multi" +regex = '(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[7, 11]] + +[[test]] +name = "repeat9-no-multi-crlf" +regex = '(?R)(?:\n?[a-z]{3}$)+' +haystack = "abc\ndef\nxyz" +matches = [[7, 11]] + +[[test]] +name = "repeat9-no-multi-crlf-cr" +regex = '(?R)(?:\r?[a-z]{3}$)+' +haystack = "abc\rdef\rxyz" +matches = [[7, 11]] + +[[test]] +name = "repeat10" +regex = '(?m)(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat10-crlf" +regex = '(?Rm)(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat10-crlf-cr" +regex = '(?Rm)(?:\r?[a-z]{3}$)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 11]] + +[[test]] +name = "repeat10-no-multi" +regex = '(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]] + +[[test]] +name = "repeat10-no-multi-crlf" +regex = '(?R)(?:\n?[a-z]{3}$)*' +haystack = "abc\ndef\nxyz" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]] + +[[test]] +name = "repeat10-no-multi-crlf-cr" +regex = '(?R)(?:\r?[a-z]{3}$)*' +haystack = "abc\rdef\rxyz" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]] + +[[test]] +name = "repeat11" +regex = '(?m)^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-crlf" +regex = '(?Rm)^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-crlf-cr" +regex = '(?Rm)^*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-no-multi" +regex = '^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-no-multi-crlf" +regex = '(?R)^*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat11-no-multi-crlf-cr" +regex = '(?R)^*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat12" +regex = '(?m)^+' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [4, 4]] + +[[test]] +name = "repeat12-crlf" +regex = '(?Rm)^+' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [4, 4]] + +[[test]] +name = "repeat12-crlf-cr" +regex = '(?Rm)^+' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [4, 4]] + +[[test]] +name = "repeat12-no-multi" +regex = '^+' +haystack = "\naa\n" +matches = [[0, 0]] + +[[test]] +name = "repeat12-no-multi-crlf" +regex = '(?R)^+' +haystack = "\naa\n" +matches = [[0, 0]] + +[[test]] +name = "repeat12-no-multi-crlf-cr" +regex = '(?R)^+' +haystack = "\raa\r" +matches = [[0, 0]] + +[[test]] +name = "repeat13" +regex = '(?m)$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-crlf" +regex = '(?Rm)$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-crlf-cr" +regex = '(?Rm)$*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-no-multi" +regex = '$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-no-multi-crlf" +regex = '(?R)$*' +haystack = "\naa\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat13-no-multi-crlf-cr" +regex = '(?R)$*' +haystack = "\raa\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +[[test]] +name = "repeat14" +regex = '(?m)$+' +haystack = "\naa\n" +matches = [[0, 0], [3, 3], [4, 4]] + +[[test]] +name = "repeat14-crlf" +regex = '(?Rm)$+' +haystack = "\naa\n" +matches = [[0, 0], [3, 3], [4, 4]] + +[[test]] +name = "repeat14-crlf-cr" +regex = '(?Rm)$+' +haystack = "\raa\r" +matches = [[0, 0], [3, 3], [4, 4]] + +[[test]] +name = "repeat14-no-multi" +regex = '$+' +haystack = "\naa\n" +matches = [[4, 4]] + +[[test]] +name = "repeat14-no-multi-crlf" +regex = '(?R)$+' +haystack = "\naa\n" +matches = [[4, 4]] + +[[test]] +name = "repeat14-no-multi-crlf-cr" +regex = '(?R)$+' +haystack = "\raa\r" +matches = [[4, 4]] + +[[test]] +name = "repeat15" +regex = '(?m)(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat15-crlf" +regex = '(?Rm)(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat15-crlf-cr" +regex = '(?Rm)(?:$\r)+' +haystack = "\r\raaa\r\r" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat15-no-multi" +regex = '(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat15-no-multi-crlf" +regex = '(?R)(?:$\n)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat15-no-multi-crlf-cr" +regex = '(?R)(?:$\r)+' +haystack = "\r\raaa\r\r" +matches = [] + +[[test]] +name = "repeat16" +regex = '(?m)(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [3, 3], [4, 4], [5, 7]] + +[[test]] +name = "repeat16-crlf" +regex = '(?Rm)(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [3, 3], [4, 4], [5, 7]] + +[[test]] +name = "repeat16-crlf-cr" +regex = '(?Rm)(?:$\r)*' +haystack = "\r\raaa\r\r" +matches = [[0, 2], [3, 3], [4, 4], [5, 7]] + +[[test]] +name = "repeat16-no-multi" +regex = '(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat16-no-multi-crlf" +regex = '(?R)(?:$\n)*' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat16-no-multi-crlf-cr" +regex = '(?R)(?:$\r)*' +haystack = "\r\raaa\r\r" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat17" +regex = '(?m)(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat17-crlf" +regex = '(?Rm)(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat17-crlf-cr" +regex = '(?Rm)(?:$\r^)+' +haystack = "\r\raaa\r\r" +matches = [[0, 2], [5, 7]] + +[[test]] +name = "repeat17-no-multi" +regex = '(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat17-no-multi-crlf" +regex = '(?R)(?:$\n^)+' +haystack = "\n\naaa\n\n" +matches = [] + +[[test]] +name = "repeat17-no-multi-crlf-cr" +regex = '(?R)(?:$\r^)+' +haystack = "\r\raaa\r\r" +matches = [] + +[[test]] +name = "repeat18" +regex = '(?m)(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat18-crlf" +regex = '(?Rm)(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat18-crlf-cr" +regex = '(?Rm)(?:^|$)+' +haystack = "\r\raaa\r\r" +matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]] + +[[test]] +name = "repeat18-no-multi" +regex = '(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [7, 7]] + +[[test]] +name = "repeat18-no-multi-crlf" +regex = '(?R)(?:^|$)+' +haystack = "\n\naaa\n\n" +matches = [[0, 0], [7, 7]] + +[[test]] +name = "repeat18-no-multi-crlf-cr" +regex = '(?R)(?:^|$)+' +haystack = "\r\raaa\r\r" +matches = [[0, 0], [7, 7]] + +[[test]] +name = "match-line-100" +regex = '(?m)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] + +[[test]] +name = "match-line-100-crlf" +regex = '(?Rm)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] + +[[test]] +name = "match-line-100-crlf-cr" +regex = '(?Rm)^.+$' +haystack = "aa\raaaaaaaaaaaaaaaaaaa\r" +matches = [[0, 2], [3, 22]] + +[[test]] +name = "match-line-200" +regex = '(?m)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] +unicode = false +utf8 = false + +[[test]] +name = "match-line-200-crlf" +regex = '(?Rm)^.+$' +haystack = "aa\naaaaaaaaaaaaaaaaaaa\n" +matches = [[0, 2], [3, 22]] +unicode = false +utf8 = false + +[[test]] +name = "match-line-200-crlf-cr" +regex = '(?Rm)^.+$' +haystack = "aa\raaaaaaaaaaaaaaaaaaa\r" +matches = [[0, 2], [3, 22]] +unicode = false +utf8 = false diff --git a/vendor/regex/testdata/no-unicode.toml b/vendor/regex/testdata/no-unicode.toml new file mode 100644 index 0000000..0ddac4c --- /dev/null +++ b/vendor/regex/testdata/no-unicode.toml @@ -0,0 +1,222 @@ +[[test]] +name = "invalid-utf8-literal1" +regex = '\xFF' +haystack = '\xFF' +matches = [[0, 1]] +unicode = false +utf8 = false +unescape = true + + +[[test]] +name = "mixed" +regex = '(?:.+)(?-u)(?:.+)' +haystack = '\xCE\x93\xCE\x94\xFF' +matches = [[0, 5]] +utf8 = false +unescape = true + + +[[test]] +name = "case1" +regex = "a" +haystack = "A" +matches = [[0, 1]] +case-insensitive = true +unicode = false + +[[test]] +name = "case2" +regex = "[a-z]+" +haystack = "AaAaA" +matches = [[0, 5]] +case-insensitive = true +unicode = false + +[[test]] +name = "case3" +regex = "[a-z]+" +haystack = "aA\u212AaA" +matches = [[0, 7]] +case-insensitive = true + +[[test]] +name = "case4" +regex = "[a-z]+" +haystack = "aA\u212AaA" +matches = [[0, 2], [5, 7]] +case-insensitive = true +unicode = false + + +[[test]] +name = "negate1" +regex = "[^a]" +haystack = "δ" +matches = [[0, 2]] + +[[test]] +name = "negate2" +regex = "[^a]" +haystack = "δ" +matches = [[0, 1], [1, 2]] +unicode = false +utf8 = false + + +[[test]] +name = "dotstar-prefix1" +regex = "a" +haystack = '\xFFa' +matches = [[1, 2]] +unicode = false +utf8 = false +unescape = true + +[[test]] +name = "dotstar-prefix2" +regex = "a" +haystack = '\xFFa' +matches = [[1, 2]] +utf8 = false +unescape = true + + +[[test]] +name = "null-bytes1" +regex = '[^\x00]+\x00' +haystack = 'foo\x00' +matches = [[0, 4]] +unicode = false +utf8 = false +unescape = true + + +[[test]] +name = "word-ascii" +regex = '\w+' +haystack = "aδ" +matches = [[0, 1]] +unicode = false + +[[test]] +name = "word-unicode" +regex = '\w+' +haystack = "aδ" +matches = [[0, 3]] + +[[test]] +name = "decimal-ascii" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 1], [7, 8]] +unicode = false + +[[test]] +name = "decimal-unicode" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 8]] + +[[test]] +name = "space-ascii" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 1]] +unicode = false + +[[test]] +name = "space-unicode" +regex = '\s+' +haystack = " \u1680" +matches = [[0, 4]] + + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-bytes" +regex = '' +haystack = "☃" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +utf8 = false + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-utf8" +regex = '' +haystack = "☃" +matches = [[0, 0], [3, 3]] + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +# Note that iter2-utf8 doesn't make sense here, since the input isn't UTF-8. +name = "iter2-bytes" +regex = '' +haystack = 'b\xFFr' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +unescape = true +utf8 = false + + +# These test that unanchored prefixes can munch through invalid UTF-8 even when +# utf8 is enabled. +# +# This test actually reflects an interesting simplification in how the Thompson +# NFA is constructed. It used to be that the NFA could be built with an +# unanchored prefix that either matched any byte or _only_ matched valid UTF-8. +# But the latter turns out to be pretty precarious when it comes to prefilters, +# because if you search a haystack that contains invalid UTF-8 but have an +# unanchored prefix that requires UTF-8, then prefilters are no longer a valid +# optimization because you actually have to check that everything is valid +# UTF-8. +# +# Originally, I had thought that we needed a valid UTF-8 unanchored prefix in +# order to guarantee that we only match at valid UTF-8 boundaries. But this +# isn't actually true! There are really only two things to consider here: +# +# 1) Will a regex match split an encoded codepoint? No. Because by construction, +# we ensure that a MATCH state can only be reached by following valid UTF-8 (assuming +# all of the UTF-8 modes are enabled). +# +# 2) Will a regex match arbitrary bytes that aren't valid UTF-8? Again, no, +# assuming all of the UTF-8 modes are enabled. +[[test]] +name = "unanchored-invalid-utf8-match-100" +regex = '[a-z]' +haystack = '\xFFa\xFF' +matches = [[1, 2]] +unescape = true +utf8 = false + +# This test shows that we can still prevent a match from occurring by requiring +# that valid UTF-8 match by inserting our own unanchored prefix. Thus, if the +# behavior of not munching through invalid UTF-8 anywhere is needed, then it +# can be achieved thusly. +[[test]] +name = "unanchored-invalid-utf8-nomatch" +regex = '^(?s:.)*?[a-z]' +haystack = '\xFFa\xFF' +matches = [] +unescape = true +utf8 = false + +# This is a tricky test that makes sure we don't accidentally do a kind of +# unanchored search when we've requested that a regex engine not report +# empty matches that split a codepoint. This test caught a regression during +# development where the code for skipping over bad empty matches would do so +# even if the search should have been anchored. This is ultimately what led to +# making 'anchored' an 'Input' option, so that it was always clear what kind +# of search was being performed. (Before that, whether a search was anchored +# or not was a config knob on the regex engine.) This did wind up making DFAs +# a little more complex to configure (with their 'StartKind' knob), but it +# generally smoothed out everything else. +# +# Great example of a test whose failure motivated a sweeping API refactoring. +[[test]] +name = "anchored-iter-empty-utf8" +regex = '' +haystack = 'a☃z' +matches = [[0, 0], [1, 1]] +unescape = false +utf8 = true +anchored = true diff --git a/vendor/regex/testdata/overlapping.toml b/vendor/regex/testdata/overlapping.toml new file mode 100644 index 0000000..7bcd45a --- /dev/null +++ b/vendor/regex/testdata/overlapping.toml @@ -0,0 +1,280 @@ +# NOTE: We define a number of tests where the *match* kind is 'leftmost-first' +# but the *search* kind is 'overlapping'. This is a somewhat nonsensical +# combination and can produce odd results. Nevertheless, those results should +# be consistent so we test them here. (At the time of writing this note, I +# hadn't yet decided whether to make 'leftmost-first' with 'overlapping' result +# in unspecified behavior.) + +# This demonstrates how a full overlapping search is obvious quadratic. This +# regex reports a match for every substring in the haystack. +[[test]] +name = "ungreedy-dotstar-matches-everything-100" +regex = [".*?"] +haystack = "zzz" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "greedy-dotstar-matches-everything-100" +regex = [".*"] +haystack = "zzz" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-leftmost-first-100" +regex = 'a+' +haystack = "aaa" +matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-leftmost-first-110" +regex = '☃+' +haystack = "☃☃☃" +matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-all-100" +regex = 'a+' +haystack = "aaa" +matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-all-110" +regex = '☃+' +haystack = "☃☃☃" +matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-leftmost-first-200" +regex = '(abc)+' +haystack = "zzabcabczzabc" +matches = [ + [[2, 5], [2, 5]], + [[5, 8], [5, 8]], + [[2, 8], [5, 8]], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-plus-all-200" +regex = '(abc)+' +haystack = "zzabcabczzabc" +matches = [ + [[2, 5], [2, 5]], + [[5, 8], [5, 8]], + [[2, 8], [5, 8]], + [[10, 13], [10, 13]], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-leftmost-first-100" +regex = 'a*' +haystack = "aaa" +matches = [ + [0, 0], + [1, 1], + [0, 1], + [2, 2], + [1, 2], + [0, 2], + [3, 3], + [2, 3], + [1, 3], + [0, 3], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-all-100" +regex = 'a*' +haystack = "aaa" +matches = [ + [0, 0], + [1, 1], + [0, 1], + [2, 2], + [1, 2], + [0, 2], + [3, 3], + [2, 3], + [1, 3], + [0, 3], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-leftmost-first-200" +regex = '(abc)*' +haystack = "zzabcabczzabc" +matches = [ + [[0, 0], []], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "repetition-star-all-200" +regex = '(abc)*' +haystack = "zzabcabczzabc" +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 2], []], + [[3, 3], []], + [[4, 4], []], + [[5, 5], []], + [[2, 5], [2, 5]], + [[6, 6], []], + [[7, 7], []], + [[8, 8], []], + [[5, 8], [5, 8]], + [[2, 8], [5, 8]], + [[9, 9], []], + [[10, 10], []], + [[11, 11], []], + [[12, 12], []], + [[13, 13], []], + [[10, 13], [10, 13]], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "start-end-rep-leftmost-first" +regex = '(^$)*' +haystack = "abc" +matches = [ + [[0, 0], []], +] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "start-end-rep-all" +regex = '(^$)*' +haystack = "abc" +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 2], []], + [[3, 3], []], +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "alt-leftmost-first-100" +regex = 'abc|a' +haystack = "zzabcazzaabc" +matches = [[2, 3], [2, 5]] +match-kind = "leftmost-first" +search-kind = "overlapping" + +[[test]] +name = "alt-all-100" +regex = 'abc|a' +haystack = "zzabcazzaabc" +matches = [[2, 3], [2, 5], [5, 6], [8, 9], [9, 10], [9, 12]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty-000" +regex = "" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty-alt-000" +regex = "|b" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty-alt-010" +regex = "b|" +haystack = "abc" +matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-bytes" +regex = '' +haystack = "☃" +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +[[test]] +# See: https://github.com/rust-lang/regex/issues/484 +name = "iter1-utf8" +regex = '' +haystack = "☃" +matches = [[0, 0], [3, 3]] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "iter1-incomplete-utf8" +regex = '' +haystack = '\xE2\x98' # incomplete snowman +matches = [[0, 0], [1, 1], [2, 2]] +match-kind = "all" +search-kind = "overlapping" +unescape = true +utf8 = false + +[[test]] +name = "scratch" +regex = ['sam', 'samwise'] +haystack = "samwise" +matches = [ + { id = 0, span = [0, 3] }, +] +match-kind = "leftmost-first" +search-kind = "overlapping" diff --git a/vendor/regex/testdata/regex-lite.toml b/vendor/regex/testdata/regex-lite.toml new file mode 100644 index 0000000..1769d80 --- /dev/null +++ b/vendor/regex/testdata/regex-lite.toml @@ -0,0 +1,98 @@ +# These tests are specifically written to test the regex-lite crate. While it +# largely has the same semantics as the regex crate, there are some differences +# around Unicode support and UTF-8. +# +# To be clear, regex-lite supports far fewer patterns because of its lack of +# Unicode support, nested character classes and character class set operations. +# What we're talking about here are the patterns that both crates support but +# where the semantics might differ. + +# regex-lite uses ASCII definitions for Perl character classes. +[[test]] +name = "perl-class-decimal" +regex = '\d' +haystack = '᠕' +matches = [] +unicode = true + +# regex-lite uses ASCII definitions for Perl character classes. +[[test]] +name = "perl-class-space" +regex = '\s' +haystack = "\u2000" +matches = [] +unicode = true + +# regex-lite uses ASCII definitions for Perl character classes. +[[test]] +name = "perl-class-word" +regex = '\w' +haystack = 'δ' +matches = [] +unicode = true + +# regex-lite uses the ASCII definition of word for word boundary assertions. +[[test]] +name = "word-boundary" +regex = '\b' +haystack = 'δ' +matches = [] +unicode = true + +# regex-lite uses the ASCII definition of word for negated word boundary +# assertions. But note that it should still not split codepoints! +[[test]] +name = "word-boundary-negated" +regex = '\B' +haystack = 'δ' +matches = [[0, 0], [2, 2]] +unicode = true + +# While we're here, the empty regex---which matches at every +# position---shouldn't split a codepoint either. +[[test]] +name = "empty-no-split-codepoint" +regex = '' +haystack = '💩' +matches = [[0, 0], [4, 4]] +unicode = true + +# A dot always matches a full codepoint. +[[test]] +name = "dot-always-matches-codepoint" +regex = '.' +haystack = '💩' +matches = [[0, 4]] +unicode = false + +# A negated character class also always matches a full codepoint. +[[test]] +name = "negated-class-always-matches-codepoint" +regex = '[^a]' +haystack = '💩' +matches = [[0, 4]] +unicode = false + +# regex-lite only supports ASCII-aware case insensitive matching. +[[test]] +name = "case-insensitive-is-ascii-only" +regex = 's' +haystack = 'ſ' +matches = [] +unicode = true +case-insensitive = true + +# Negated word boundaries shouldn't split a codepoint, but they will match +# between invalid UTF-8. +# +# This test is only valid for a 'bytes' API, but that doesn't (yet) exist in +# regex-lite. This can't happen in the main API because &str can't contain +# invalid UTF-8. +# [[test]] +# name = "word-boundary-invalid-utf8" +# regex = '\B' +# haystack = '\xFF\xFF\xFF\xFF' +# unescape = true +# matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +# unicode = true +# utf8 = false diff --git a/vendor/regex/testdata/regression.toml b/vendor/regex/testdata/regression.toml new file mode 100644 index 0000000..53b0701 --- /dev/null +++ b/vendor/regex/testdata/regression.toml @@ -0,0 +1,830 @@ +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-100" +regex = '(*)' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-200" +regex = '(?:?)' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-300" +regex = '(?)' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/48 +[[test]] +name = "invalid-regex-no-crash-400" +regex = '*' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/75 +[[test]] +name = "unsorted-binary-search-100" +regex = '(?i-u)[a_]+' +haystack = "A_" +matches = [[0, 2]] + +# See: https://github.com/rust-lang/regex/issues/75 +[[test]] +name = "unsorted-binary-search-200" +regex = '(?i-u)[A_]+' +haystack = "a_" +matches = [[0, 2]] + +# See: https://github.com/rust-lang/regex/issues/76 +[[test]] +name = "unicode-case-lower-nocase-flag" +regex = '(?i)\p{Ll}+' +haystack = "ΛΘΓΔα" +matches = [[0, 10]] + +# See: https://github.com/rust-lang/regex/issues/99 +[[test]] +name = "negated-char-class-100" +regex = '(?i)[^x]' +haystack = "x" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/99 +[[test]] +name = "negated-char-class-200" +regex = '(?i)[^x]' +haystack = "X" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/101 +[[test]] +name = "ascii-word-underscore" +regex = '[[:word:]]' +haystack = "_" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/129 +[[test]] +name = "captures-repeat" +regex = '([a-f]){2}(?P<foo>[x-z])' +haystack = "abx" +matches = [ + [[0, 3], [1, 2], [2, 3]], +] + +# See: https://github.com/rust-lang/regex/issues/153 +[[test]] +name = "alt-in-alt-100" +regex = 'ab?|$' +haystack = "az" +matches = [[0, 1], [2, 2]] + +# See: https://github.com/rust-lang/regex/issues/153 +[[test]] +name = "alt-in-alt-200" +regex = '^(?:.*?)(?:\n|\r\n?|$)' +haystack = "ab\rcd" +matches = [[0, 3]] + +# See: https://github.com/rust-lang/regex/issues/169 +[[test]] +name = "leftmost-first-prefix" +regex = 'z*azb' +haystack = "azb" +matches = [[0, 3]] + +# See: https://github.com/rust-lang/regex/issues/191 +[[test]] +name = "many-alternates" +regex = '1|2|3|4|5|6|7|8|9|10|int' +haystack = "int" +matches = [[0, 3]] + +# See: https://github.com/rust-lang/regex/issues/204 +[[test]] +name = "word-boundary-alone-100" +regex = '\b' +haystack = "Should this (work?)" +matches = [[0, 0], [6, 6], [7, 7], [11, 11], [13, 13], [17, 17]] + +# See: https://github.com/rust-lang/regex/issues/204 +[[test]] +name = "word-boundary-alone-200" +regex = '\b' +haystack = "a b c" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + +# See: https://github.com/rust-lang/regex/issues/264 +[[test]] +name = "word-boundary-ascii-no-capture" +regex = '\B' +haystack = "\U00028F3E" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/264 +[[test]] +name = "word-boundary-ascii-capture" +regex = '(?:\B)' +haystack = "\U00028F3E" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/268 +[[test]] +name = "partial-anchor" +regex = '^a|b' +haystack = "ba" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "endl-or-word-boundary" +regex = '(?m:$)|(?-u:\b)' +haystack = "\U0006084E" +matches = [[4, 4]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "zero-or-end" +regex = '(?i-u:\x00)|$' +haystack = "\U000E682F" +matches = [[4, 4]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "y-or-endl" +regex = '(?i-u:y)|(?m:$)' +haystack = "\U000B4331" +matches = [[4, 4]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "word-boundary-start-x" +regex = '(?u:\b)^(?-u:X)' +haystack = "X" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "word-boundary-ascii-start-x" +regex = '(?-u:\b)^(?-u:X)' +haystack = "X" +matches = [[0, 1]] + +# See: https://github.com/rust-lang/regex/issues/271 +[[test]] +name = "end-not-word-boundary" +regex = '$\B' +haystack = "\U0005C124\U000B576C" +matches = [[8, 8]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/280 +[[test]] +name = "partial-anchor-alternate-begin" +regex = '^a|z' +haystack = "yyyyya" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/280 +[[test]] +name = "partial-anchor-alternate-end" +regex = 'a$|z' +haystack = "ayyyyy" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/289 +[[test]] +name = "lits-unambiguous-100" +regex = '(?:ABC|CDA|BC)X' +haystack = "CDAX" +matches = [[0, 4]] + +# See: https://github.com/rust-lang/regex/issues/291 +[[test]] +name = "lits-unambiguous-200" +regex = '((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$' +haystack = "CIMG2341" +matches = [ + [[0, 8], [0, 4], [], [0, 4], [4, 8]], +] + +# See: https://github.com/rust-lang/regex/issues/303 +# +# 2022-09-19: This has now been "properly" fixed in that empty character +# classes are fully supported as something that can never match. This test +# used to be marked as 'compiles = false', but now it works. +[[test]] +name = "negated-full-byte-range" +regex = '[^\x00-\xFF]' +haystack = "" +matches = [] +compiles = true +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/321 +[[test]] +name = "strange-anchor-non-complete-prefix" +regex = 'a^{2}' +haystack = "" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/321 +[[test]] +name = "strange-anchor-non-complete-suffix" +regex = '${2}a' +haystack = "" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/334 +# See: https://github.com/rust-lang/regex/issues/557 +[[test]] +name = "captures-after-dfa-premature-end-100" +regex = 'a(b*(X|$))?' +haystack = "abcbX" +matches = [ + [[0, 1], [], []], +] + +# See: https://github.com/rust-lang/regex/issues/334 +# See: https://github.com/rust-lang/regex/issues/557 +[[test]] +name = "captures-after-dfa-premature-end-200" +regex = 'a(bc*(X|$))?' +haystack = "abcbX" +matches = [ + [[0, 1], [], []], +] + +# See: https://github.com/rust-lang/regex/issues/334 +# See: https://github.com/rust-lang/regex/issues/557 +[[test]] +name = "captures-after-dfa-premature-end-300" +regex = '(aa$)?' +haystack = "aaz" +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 2], []], + [[3, 3], []], +] + +# Plucked from "Why aren’t regular expressions a lingua franca? an empirical +# study on the re-use and portability of regular expressions", The ACM Joint +# European Software Engineering Conference and Symposium on the Foundations of +# Software Engineering (ESEC/FSE), 2019. +# +# Link: https://dl.acm.org/doi/pdf/10.1145/3338906.3338909 +[[test]] +name = "captures-after-dfa-premature-end-400" +regex = '(a)\d*\.?\d+\b' +haystack = "a0.0c" +matches = [ + [[0, 2], [0, 1]], +] + +# See: https://github.com/rust-lang/regex/issues/437 +[[test]] +name = "literal-panic" +regex = 'typename type\-parameter\-[0-9]+\-[0-9]+::.+' +haystack = "test" +matches = [] + +# See: https://github.com/rust-lang/regex/issues/527 +[[test]] +name = "empty-flag-expr" +regex = '(?:(?:(?x)))' +haystack = "" +matches = [[0, 0]] + +# See: https://github.com/rust-lang/regex/issues/533 +#[[tests]] +#name = "blank-matches-nothing-between-space-and-tab" +#regex = '[[:blank:]]' +#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F' +#match = false +#unescape = true + +# See: https://github.com/rust-lang/regex/issues/533 +#[[tests]] +#name = "blank-matches-nothing-between-space-and-tab-inverted" +#regex = '^[[:^blank:]]+$' +#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F' +#match = true +#unescape = true + +# See: https://github.com/rust-lang/regex/issues/555 +[[test]] +name = "invalid-repetition" +regex = '(?m){1,1}' +haystack = "" +matches = [] +compiles = false + +# See: https://github.com/rust-lang/regex/issues/640 +[[test]] +name = "flags-are-unset" +regex = '(?:(?i)foo)|Bar' +haystack = "foo Foo bar Bar" +matches = [[0, 3], [4, 7], [12, 15]] + +# Note that 'Ј' is not 'j', but cyrillic Je +# https://en.wikipedia.org/wiki/Je_(Cyrillic) +# +# See: https://github.com/rust-lang/regex/issues/659 +[[test]] +name = "empty-group-with-unicode" +regex = '(?:)Ј01' +haystack = 'zЈ01' +matches = [[1, 5]] + +# See: https://github.com/rust-lang/regex/issues/579 +[[test]] +name = "word-boundary-weird" +regex = '\b..\b' +haystack = "I have 12, he has 2!" +matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]] + +# See: https://github.com/rust-lang/regex/issues/579 +[[test]] +name = "word-boundary-weird-ascii" +regex = '\b..\b' +haystack = "I have 12, he has 2!" +matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]] +unicode = false +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/579 +[[test]] +name = "word-boundary-weird-minimal-ascii" +regex = '\b..\b' +haystack = "az,,b" +matches = [[0, 2], [2, 4]] +unicode = false +utf8 = false + +# See: https://github.com/BurntSushi/ripgrep/issues/1203 +[[test]] +name = "reverse-suffix-100" +regex = '[0-4][0-4][0-4]000' +haystack = "153.230000" +matches = [[4, 10]] + +# See: https://github.com/BurntSushi/ripgrep/issues/1203 +[[test]] +name = "reverse-suffix-200" +regex = '[0-9][0-9][0-9]000' +haystack = "153.230000\n" +matches = [[4, 10]] + +# This is a tricky case for the reverse suffix optimization, because it +# finds the 'foobar' match but the reverse scan must fail to find a match by +# correctly dealing with the word boundary following the 'foobar' literal when +# computing the start state. +# +# This test exists because I tried to break the following assumption that +# is currently in the code: that if a suffix is found and the reverse scan +# succeeds, then it's guaranteed that there is an overall match. Namely, the +# 'is_match' routine does *not* do another forward scan in this case because of +# this assumption. +[[test]] +name = "reverse-suffix-300" +regex = '\w+foobar\b' +haystack = "xyzfoobarZ" +matches = [] +unicode = false +utf8 = false + +# See: https://github.com/BurntSushi/ripgrep/issues/1247 +[[test]] +name = "stops" +regex = '\bs(?:[ab])' +haystack = 's\xE4' +matches = [] +unescape = true +utf8 = false + +# See: https://github.com/BurntSushi/ripgrep/issues/1247 +[[test]] +name = "stops-ascii" +regex = '(?-u:\b)s(?:[ab])' +haystack = 's\xE4' +matches = [] +unescape = true +utf8 = false + +# See: https://github.com/rust-lang/regex/issues/850 +[[test]] +name = "adjacent-line-boundary-100" +regex = '(?m)^(?:[^ ]+?)$' +haystack = "line1\nline2" +matches = [[0, 5], [6, 11]] + +# Continued. +[[test]] +name = "adjacent-line-boundary-200" +regex = '(?m)^(?:[^ ]+?)$' +haystack = "A\nB" +matches = [[0, 1], [2, 3]] + +# There is no issue for this bug. +[[test]] +name = "anchored-prefix-100" +regex = '^a[[:^space:]]' +haystack = "a " +matches = [] + +# There is no issue for this bug. +[[test]] +name = "anchored-prefix-200" +regex = '^a[[:^space:]]' +haystack = "foo boo a" +matches = [] + +# There is no issue for this bug. +[[test]] +name = "anchored-prefix-300" +regex = '^-[a-z]' +haystack = "r-f" +matches = [] + +# Tests that a possible Aho-Corasick optimization works correctly. It only +# kicks in when we have a lot of literals. By "works correctly," we mean that +# leftmost-first match semantics are properly respected. That is, samwise +# should match, not sam. +# +# There is no issue for this bug. +[[test]] +name = "aho-corasick-100" +regex = 'samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z' +haystack = "samwise" +matches = [[0, 7]] + +# See: https://github.com/rust-lang/regex/issues/921 +[[test]] +name = "interior-anchor-capture" +regex = '(a$)b$' +haystack = 'ab' +matches = [] + +# I found this bug in the course of adding some of the regexes that Ruff uses +# to rebar. It turns out that the lazy DFA was finding a match that was being +# rejected by the one-pass DFA. Yikes. I then minimized the regex and haystack. +# +# Source: https://github.com/charliermarsh/ruff/blob/a919041ddaa64cdf6f216f90dd0480dab69fd3ba/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs#L52 +[[test]] +name = "ruff-whitespace-around-keywords" +regex = '^(a|ab)$' +haystack = "ab" +anchored = true +unicode = false +utf8 = true +matches = [[[0, 2], [0, 2]]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-0" +regex = '(?:(?-u:\b)|(?u:h))+' +haystack = "h" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-1" +regex = '(?u:\B)' +haystack = "鋸" +unicode = true +utf8 = false +matches = [] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-2" +regex = '(?:(?u:\b)|(?s-u:.))+' +haystack = "oB" +unicode = true +utf8 = false +matches = [[0, 0], [1, 2]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-3" +regex = '(?:(?-u:\B)|(?su:.))+' +haystack = "\U000FEF80" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-3-utf8" +regex = '(?:(?-u:\B)|(?su:.))+' +haystack = "\U000FEF80" +unicode = true +utf8 = true +matches = [[0, 0], [4, 4]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-4" +regex = '(?m:$)(?m:^)(?su:.)' +haystack = "\n‣" +unicode = true +utf8 = false +matches = [[0, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-5" +regex = '(?m:$)^(?m:^)' +haystack = "\n" +unicode = true +utf8 = false +matches = [[0, 0]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-6" +regex = '(?P<kp>(?iu:do)(?m:$))*' +haystack = "dodo" +unicode = true +utf8 = false +matches = [ + [[0, 0], []], + [[1, 1], []], + [[2, 4], [2, 4]], +] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-7" +regex = '(?u:\B)' +haystack = "䡁" +unicode = true +utf8 = false +matches = [] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-8" +regex = '(?:(?-u:\b)|(?u:[\u{0}-W]))+' +haystack = "0" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-9" +regex = '((?m:$)(?-u:\B)(?s-u:.)(?-u:\B)$)' +haystack = "\n\n" +unicode = true +utf8 = false +matches = [ + [[1, 2], [1, 2]], +] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-10" +regex = '(?m:$)(?m:$)^(?su:.)' +haystack = "\n\u0081¨\u200a" +unicode = true +utf8 = false +matches = [[0, 1]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-11" +regex = '(?-u:\B)(?m:^)' +haystack = "0\n" +unicode = true +utf8 = false +matches = [[2, 2]] + +# From: https://github.com/rust-lang/regex/issues/429 +[[test]] +name = "i429-12" +regex = '(?:(?u:\b)|(?-u:.))+' +haystack = "0" +unicode = true +utf8 = false +matches = [[0, 0], [1, 1]] + +# From: https://github.com/rust-lang/regex/issues/969 +[[test]] +name = "i969" +regex = 'c.*d\z' +haystack = "ababcd" +bounds = [4, 6] +search-kind = "earliest" +matches = [[4, 6]] + +# I found this during the regex-automata migration. This is the fowler basic +# 154 test, but without anchored = true and without a match limit. +# +# This test caught a subtle bug in the hybrid reverse DFA search, where it +# would skip over the termination condition if it entered a start state. This +# was a double bug. Firstly, the reverse DFA shouldn't have had start states +# specialized in the first place, and thus it shouldn't have possible to detect +# that the DFA had entered a start state. The second bug was that the start +# state handling was incorrect by jumping over the termination condition. +[[test]] +name = "fowler-basic154-unanchored" +regex = '''a([bc]*)c*''' +haystack = '''abc''' +matches = [[[0, 3], [1, 3]]] + +# From: https://github.com/rust-lang/regex/issues/981 +# +# This was never really a problem in the new architecture because the +# regex-automata engines are far more principled about how they deal with +# look-around. (This was one of the many reasons I wanted to re-work the +# original regex crate engines.) +[[test]] +name = "word-boundary-interact-poorly-with-literal-optimizations" +regex = '(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))' +haystack = 'ubi-Darwin-x86_64.tar.gz' +matches = [] + +# This was found during fuzz testing of regex. It provoked a panic in the meta +# engine as a result of the reverse suffix optimization. Namely, it hit a case +# where a suffix match was found, a corresponding reverse match was found, but +# the forward search turned up no match. The forward search should always match +# if the suffix and reverse search match. +# +# This in turn uncovered an inconsistency between the PikeVM and the DFA (lazy +# and fully compiled) engines. It was caused by a mishandling of the collection +# of NFA state IDs in the generic determinization code (which is why both types +# of DFA were impacted). Namely, when a fail state was encountered (that's the +# `[^\s\S]` in the pattern below), then it would just stop collecting states. +# But that's not correct since a later state could lead to a match. +[[test]] +name = "impossible-branch" +regex = '.*[^\s\S]A|B' +haystack = "B" +matches = [[0, 1]] + +# This was found during fuzz testing in regex-lite. The regex crate never +# suffered from this bug, but it causes regex-lite to incorrectly compile +# captures. +[[test]] +name = "captures-wrong-order" +regex = '(a){0}(a)' +haystack = 'a' +matches = [[[0, 1], [], [0, 1]]] + +# This tests a bug in how quit states are handled in the DFA. At some point +# during development, the DFAs were tweaked slightly such that if they hit +# a quit state (which means, they hit a byte that the caller configured should +# stop the search), then it might not return an error necessarily. Namely, if a +# match had already been found, then it would be returned instead of an error. +# +# But this is actually wrong! Why? Because even though a match had been found, +# it wouldn't be fully correct to return it once a quit state has been seen +# because you can't determine whether the match offset returned is the correct +# greedy/leftmost-first match. Since you can't complete the search as requested +# by the caller, the DFA should just stop and return an error. +# +# Interestingly, this does seem to produce an unavoidable difference between +# 'try_is_match().unwrap()' and 'try_find().unwrap().is_some()' for the DFAs. +# The former will stop immediately once a match is known to occur and return +# 'Ok(true)', where as the latter could find the match but quit with an +# 'Err(..)' first. +# +# Thankfully, I believe this inconsistency between 'is_match()' and 'find()' +# cannot be observed in the higher level meta regex API because it specifically +# will try another engine that won't fail in the case of a DFA failing. +# +# This regression happened in the regex crate rewrite, but before anything got +# released. +[[test]] +name = "negated-unicode-word-boundary-dfa-fail" +regex = '\B.*' +haystack = "!\u02D7" +matches = [[0, 3]] + +# This failure was found in the *old* regex crate (prior to regex 1.9), but +# I didn't investigate why. My best guess is that it's a literal optimization +# bug. It didn't occur in the rewrite. +[[test]] +name = "missed-match" +regex = 'e..+e.ee>' +haystack = 'Zeee.eZZZZZZZZeee>eeeeeee>' +matches = [[1, 26]] + +# This test came from the 'ignore' crate and tripped a bug in how accelerated +# DFA states were handled in an overlapping search. +[[test]] +name = "regex-to-glob" +regex = ['(?-u)^path1/[^/]*$'] +haystack = "path1/foo" +matches = [[0, 9]] +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# See: https://github.com/rust-lang/regex/issues/1060 +[[test]] +name = "reverse-inner-plus-shorter-than-expected" +regex = '(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})' +haystack = '102:12:39' +matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]] + +# Like reverse-inner-plus-shorter-than-expected, but using a far simpler regex +# to demonstrate the extent of the rot. Sigh. +# +# See: https://github.com/rust-lang/regex/issues/1060 +[[test]] +name = "reverse-inner-short" +regex = '(?:([0-9][0-9][0-9]):)?([0-9][0-9]):([0-9][0-9])' +haystack = '102:12:39' +matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]] + +# This regression test was found via the RegexSet APIs. It triggered a +# particular code path where a regex was compiled with 'All' match semantics +# (to support overlapping search), but got funneled down into a standard +# leftmost search when calling 'is_match'. This is fine on its own, but the +# leftmost search will use a prefilter and that's where this went awry. +# +# Namely, since 'All' semantics were used, the aho-corasick prefilter was +# incorrectly compiled with 'Standard' semantics. This was wrong because +# 'Standard' immediately attempts to report a match at every position, even if +# that would mean reporting a match past the leftmost match before reporting +# the leftmost match. This breaks the prefilter contract of never having false +# negatives and leads overall to the engine not finding a match. +# +# See: https://github.com/rust-lang/regex/issues/1070 +[[test]] +name = "prefilter-with-aho-corasick-standard-semantics" +regex = '(?m)^ *v [0-9]' +haystack = 'v 0' +matches = [ + { id = 0, spans = [[0, 3]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = true +utf8 = true + +# This tests that the PikeVM and the meta regex agree on a particular regex. +# This test previously failed when the ad hoc engines inside the meta engine +# did not handle quit states correctly. Namely, the Unicode word boundary here +# combined with a non-ASCII codepoint provokes the quit state. The ad hoc +# engines were previously returning a match even after entering the quit state +# if a match had been previously detected, but this is incorrect. The reason +# is that if a quit state is found, then the search must give up *immediately* +# because it prevents the search from finding the "proper" leftmost-first +# match. If it instead returns a match that has been found, it risks reporting +# an improper match, as it did in this case. +# +# See: https://github.com/rust-lang/regex/issues/1046 +[[test]] +name = "non-prefix-literal-quit-state" +regex = '.+\b\n' +haystack = "β77\n" +matches = [[0, 5]] + +# This is a regression test for some errant HIR interval set operations that +# were made in the regex-syntax 0.8.0 release and then reverted in 0.8.1. The +# issue here is that the HIR produced from the regex had out-of-order ranges. +# +# See: https://github.com/rust-lang/regex/issues/1103 +# Ref: https://github.com/rust-lang/regex/pull/1051 +# Ref: https://github.com/rust-lang/regex/pull/1102 +[[test]] +name = "hir-optimization-out-of-order-class" +regex = '^[[:alnum:]./-]+$' +haystack = "a-b" +matches = [[0, 3]] + +# This is a regression test for an improper reverse suffix optimization. This +# occurred when I "broadened" the applicability of the optimization to include +# multiple possible literal suffixes instead of only sticking to a non-empty +# longest common suffix. It turns out that, at least given how the reverse +# suffix optimization works, we need to stick to the longest common suffix for +# now. +# +# See: https://github.com/rust-lang/regex/issues/1110 +# See also: https://github.com/astral-sh/ruff/pull/7980 +[[test]] +name = 'improper-reverse-suffix-optimization' +regex = '(\\N\{[^}]+})|([{}])' +haystack = 'hiya \N{snowman} bye' +matches = [[[5, 16], [5, 16], []]] diff --git a/vendor/regex/testdata/set.toml b/vendor/regex/testdata/set.toml new file mode 100644 index 0000000..049e8a8 --- /dev/null +++ b/vendor/regex/testdata/set.toml @@ -0,0 +1,641 @@ +# Basic multi-regex tests. + +[[test]] +name = "basic10" +regex = ["a", "a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic10-leftmost-first" +regex = ["a", "a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic20" +regex = ["a", "a"] +haystack = "ba" +matches = [ + { id = 0, span = [1, 2] }, + { id = 1, span = [1, 2] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic30" +regex = ["a", "b"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic40" +regex = ["a", "b"] +haystack = "b" +matches = [ + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic50" +regex = ["a|b", "b|a"] +haystack = "b" +matches = [ + { id = 0, span = [0, 1] }, + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic60" +regex = ["foo", "oo"] +haystack = "foo" +matches = [ + { id = 0, span = [0, 3] }, + { id = 1, span = [1, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic60-leftmost-first" +regex = ["foo", "oo"] +haystack = "foo" +matches = [ + { id = 0, span = [0, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic61" +regex = ["oo", "foo"] +haystack = "foo" +matches = [ + { id = 1, span = [0, 3] }, + { id = 0, span = [1, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic61-leftmost-first" +regex = ["oo", "foo"] +haystack = "foo" +matches = [ + { id = 1, span = [0, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic70" +regex = ["abcd", "bcd", "cd", "d"] +haystack = "abcd" +matches = [ + { id = 0, span = [0, 4] }, + { id = 1, span = [1, 4] }, + { id = 2, span = [2, 4] }, + { id = 3, span = [3, 4] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic71" +regex = ["bcd", "cd", "d", "abcd"] +haystack = "abcd" +matches = [ + { id = 3, span = [0, 4] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "basic80" +regex = ["^foo", "bar$"] +haystack = "foo" +matches = [ + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic81" +regex = ["^foo", "bar$"] +haystack = "foo bar" +matches = [ + { id = 0, span = [0, 3] }, + { id = 1, span = [4, 7] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic82" +regex = ["^foo", "bar$"] +haystack = "bar" +matches = [ + { id = 1, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic90" +regex = ["[a-z]+$", "foo"] +haystack = "01234 foo" +matches = [ + { id = 0, span = [8, 9] }, + { id = 0, span = [7, 9] }, + { id = 0, span = [6, 9] }, + { id = 1, span = [6, 9] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic91" +regex = ["[a-z]+$", "foo"] +haystack = "foo 01234" +matches = [ + { id = 1, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic100" +regex = [".*?", "a"] +haystack = "zzza" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, + { id = 0, span = [4, 4] }, + { id = 0, span = [3, 4] }, + { id = 0, span = [2, 4] }, + { id = 0, span = [1, 4] }, + { id = 0, span = [0, 4] }, + { id = 1, span = [3, 4] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic101" +regex = [".*", "a"] +haystack = "zzza" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, + { id = 0, span = [4, 4] }, + { id = 0, span = [3, 4] }, + { id = 0, span = [2, 4] }, + { id = 0, span = [1, 4] }, + { id = 0, span = [0, 4] }, + { id = 1, span = [3, 4] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic102" +regex = [".*", "a"] +haystack = "zzz" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [0, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [1, 2] }, + { id = 0, span = [0, 2] }, + { id = 0, span = [3, 3] }, + { id = 0, span = [2, 3] }, + { id = 0, span = [1, 3] }, + { id = 0, span = [0, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic110" +regex = ['\ba\b'] +haystack = "hello a bye" +matches = [ + { id = 0, span = [6, 7] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic111" +regex = ['\ba\b', '\be\b'] +haystack = "hello a bye e" +matches = [ + { id = 0, span = [6, 7] }, + { id = 1, span = [12, 13] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic120" +regex = ["a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic121" +regex = [".*a"] +haystack = "a" +matches = [ + { id = 0, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic122" +regex = [".*a", "β"] +haystack = "β" +matches = [ + { id = 1, span = [0, 2] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "basic130" +regex = ["ab", "b"] +haystack = "ba" +matches = [ + { id = 1, span = [0, 1] }, +] +match-kind = "all" +search-kind = "overlapping" + +# These test cases where one of the regexes matches the empty string. + +[[test]] +name = "empty10" +regex = ["", "a"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 1, span = [0, 1] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty10-leftmost-first" +regex = ["", "a"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty11" +regex = ["a", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 0, span = [0, 1] }, + { id = 1, span = [1, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty11-leftmost-first" +regex = ["a", ""] +haystack = "abc" +matches = [ + { id = 0, span = [0, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty20" +regex = ["", "b"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty20-leftmost-first" +regex = ["", "b"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty21" +regex = ["b", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 0, span = [1, 2] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty21-leftmost-first" +regex = ["b", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 0, span = [1, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty22" +regex = ["(?:)", "b"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty23" +regex = ["b", "(?:)"] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 0, span = [1, 2] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty30" +regex = ["", "z"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty30-leftmost-first" +regex = ["", "z"] +haystack = "abc" +matches = [ + { id = 0, span = [0, 0] }, + { id = 0, span = [1, 1] }, + { id = 0, span = [2, 2] }, + { id = 0, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty31" +regex = ["z", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty31-leftmost-first" +regex = ["z", ""] +haystack = "abc" +matches = [ + { id = 1, span = [0, 0] }, + { id = 1, span = [1, 1] }, + { id = 1, span = [2, 2] }, + { id = 1, span = [3, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +[[test]] +name = "empty40" +regex = ["c(?:)", "b"] +haystack = "abc" +matches = [ + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 3] }, +] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "empty40-leftmost-first" +regex = ["c(?:)", "b"] +haystack = "abc" +matches = [ + { id = 1, span = [1, 2] }, + { id = 0, span = [2, 3] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" + +# These test cases where there are no matches. + +[[test]] +name = "nomatch10" +regex = ["a", "a"] +haystack = "b" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "nomatch20" +regex = ["^foo", "bar$"] +haystack = "bar foo" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "nomatch30" +regex = [] +haystack = "a" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +[[test]] +name = "nomatch40" +regex = ["^rooted$", '\.log$'] +haystack = "notrooted" +matches = [] +match-kind = "all" +search-kind = "overlapping" + +# These test multi-regex searches with capture groups. +# +# NOTE: I wrote these tests in the course of developing a first class API for +# overlapping capturing group matches, but ultimately removed that API because +# the semantics for overlapping matches aren't totally clear. However, I've +# left the tests because I believe the semantics for these patterns are clear +# and because we can still test our "which patterns matched" APIs with them. + +[[test]] +name = "caps-010" +regex = ['^(\w+) (\w+)$', '^(\S+) (\S+)$'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 1, spans = [[0, 17], [0, 5], [6, 17]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = false +utf8 = false + +[[test]] +name = "caps-020" +regex = ['^(\w+) (\w+)$', '^[A-Z](\S+) [A-Z](\S+)$'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 1, spans = [[0, 17], [1, 5], [7, 17]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = false +utf8 = false + +[[test]] +name = "caps-030" +regex = ['^(\w+) (\w+)$', '^([A-Z])(\S+) ([A-Z])(\S+)$'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 1, spans = [[0, 17], [0, 1], [1, 5], [6, 7], [7, 17]] }, +] +match-kind = "all" +search-kind = "overlapping" +unicode = false +utf8 = false + +[[test]] +name = "caps-110" +regex = ['(\w+) (\w+)', '(\S+) (\S+)'] +haystack = "Bruce Springsteen" +matches = [ + { id = 0, spans = [[0, 17], [0, 5], [6, 17]] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" +unicode = false +utf8 = false + +[[test]] +name = "caps-120" +regex = ['(\w+) (\w+)', '(\S+) (\S+)'] +haystack = "&ruce $pringsteen" +matches = [ + { id = 1, spans = [[0, 17], [0, 5], [6, 17]] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" +unicode = false +utf8 = false + +[[test]] +name = "caps-121" +regex = ['(\w+) (\w+)', '(\S+) (\S+)'] +haystack = "&ruce $pringsteen Foo Bar" +matches = [ + { id = 1, spans = [[0, 17], [0, 5], [6, 17]] }, + { id = 0, spans = [[18, 25], [18, 21], [22, 25]] }, +] +match-kind = "leftmost-first" +search-kind = "leftmost" +unicode = false +utf8 = false diff --git a/vendor/regex/testdata/substring.toml b/vendor/regex/testdata/substring.toml new file mode 100644 index 0000000..69595ce --- /dev/null +++ b/vendor/regex/testdata/substring.toml @@ -0,0 +1,36 @@ +# These tests check that regex engines perform as expected when the search is +# instructed to only search a substring of a haystack instead of the entire +# haystack. This tends to exercise interesting edge cases that are otherwise +# difficult to provoke. (But not necessarily impossible. Regex search iterators +# for example, make use of the "search just a substring" APIs by changing the +# starting position of a search to the end position of the previous match.) + +[[test]] +name = "unicode-word-start" +regex = '\b[0-9]+\b' +haystack = "β123" +bounds = { start = 2, end = 5 } +matches = [] + +[[test]] +name = "unicode-word-end" +regex = '\b[0-9]+\b' +haystack = "123β" +bounds = { start = 0, end = 3 } +matches = [] + +[[test]] +name = "ascii-word-start" +regex = '\b[0-9]+\b' +haystack = "β123" +bounds = { start = 2, end = 5 } +matches = [[2, 5]] +unicode = false + +[[test]] +name = "ascii-word-end" +regex = '\b[0-9]+\b' +haystack = "123β" +bounds = { start = 0, end = 3 } +matches = [[0, 3]] +unicode = false diff --git a/vendor/regex/testdata/unicode.toml b/vendor/regex/testdata/unicode.toml new file mode 100644 index 0000000..f4ac76b --- /dev/null +++ b/vendor/regex/testdata/unicode.toml @@ -0,0 +1,517 @@ +# Basic Unicode literal support. +[[test]] +name = "literal1" +regex = '☃' +haystack = "☃" +matches = [[0, 3]] + +[[test]] +name = "literal2" +regex = '☃+' +haystack = "☃" +matches = [[0, 3]] + +[[test]] +name = "literal3" +regex = '☃+' +haystack = "☃" +matches = [[0, 3]] +case-insensitive = true + +[[test]] +name = "literal4" +regex = 'Δ' +haystack = "δ" +matches = [[0, 2]] +case-insensitive = true + +# Unicode word boundaries. +[[test]] +name = "wb-100" +regex = '\d\b' +haystack = "6δ" +matches = [] + +[[test]] +name = "wb-200" +regex = '\d\b' +haystack = "6 " +matches = [[0, 1]] + +[[test]] +name = "wb-300" +regex = '\d\B' +haystack = "6δ" +matches = [[0, 1]] + +[[test]] +name = "wb-400" +regex = '\d\B' +haystack = "6 " +matches = [] + +# Unicode character class support. +[[test]] +name = "class1" +regex = '[☃Ⅰ]+' +haystack = "☃" +matches = [[0, 3]] + +[[test]] +name = "class2" +regex = '\pN' +haystack = "Ⅰ" +matches = [[0, 3]] + +[[test]] +name = "class3" +regex = '\pN+' +haystack = "Ⅰ1Ⅱ2" +matches = [[0, 8]] + +[[test]] +name = "class4" +regex = '\PN+' +haystack = "abⅠ" +matches = [[0, 2]] + +[[test]] +name = "class5" +regex = '[\PN]+' +haystack = "abⅠ" +matches = [[0, 2]] + +[[test]] +name = "class6" +regex = '[^\PN]+' +haystack = "abⅠ" +matches = [[2, 5]] + +[[test]] +name = "class7" +regex = '\p{Lu}+' +haystack = "ΛΘΓΔα" +matches = [[0, 8]] + +[[test]] +name = "class8" +regex = '\p{Lu}+' +haystack = "ΛΘΓΔα" +matches = [[0, 10]] +case-insensitive = true + +[[test]] +name = "class9" +regex = '\pL+' +haystack = "ΛΘΓΔα" +matches = [[0, 10]] + +[[test]] +name = "class10" +regex = '\p{Ll}+' +haystack = "ΛΘΓΔα" +matches = [[8, 10]] + +# Unicode aware "Perl" character classes. +[[test]] +name = "perl1" +regex = '\w+' +haystack = "dδd" +matches = [[0, 4]] + +[[test]] +name = "perl2" +regex = '\w+' +haystack = "⥡" +matches = [] + +[[test]] +name = "perl3" +regex = '\W+' +haystack = "⥡" +matches = [[0, 3]] + +[[test]] +name = "perl4" +regex = '\d+' +haystack = "1२३9" +matches = [[0, 8]] + +[[test]] +name = "perl5" +regex = '\d+' +haystack = "Ⅱ" +matches = [] + +[[test]] +name = "perl6" +regex = '\D+' +haystack = "Ⅱ" +matches = [[0, 3]] + +[[test]] +name = "perl7" +regex = '\s+' +haystack = " " +matches = [[0, 3]] + +[[test]] +name = "perl8" +regex = '\s+' +haystack = "☃" +matches = [] + +[[test]] +name = "perl9" +regex = '\S+' +haystack = "☃" +matches = [[0, 3]] + +# Specific tests for Unicode general category classes. +[[test]] +name = "class-gencat1" +regex = '\p{Cased_Letter}' +haystack = "A" +matches = [[0, 3]] + +[[test]] +name = "class-gencat2" +regex = '\p{Close_Punctuation}' +haystack = "❯" +matches = [[0, 3]] + +[[test]] +name = "class-gencat3" +regex = '\p{Connector_Punctuation}' +haystack = "⁀" +matches = [[0, 3]] + +[[test]] +name = "class-gencat4" +regex = '\p{Control}' +haystack = "\u009F" +matches = [[0, 2]] + +[[test]] +name = "class-gencat5" +regex = '\p{Currency_Symbol}' +haystack = "£" +matches = [[0, 3]] + +[[test]] +name = "class-gencat6" +regex = '\p{Dash_Punctuation}' +haystack = "〰" +matches = [[0, 3]] + +[[test]] +name = "class-gencat7" +regex = '\p{Decimal_Number}' +haystack = "𑓙" +matches = [[0, 4]] + +[[test]] +name = "class-gencat8" +regex = '\p{Enclosing_Mark}' +haystack = "\uA672" +matches = [[0, 3]] + +[[test]] +name = "class-gencat9" +regex = '\p{Final_Punctuation}' +haystack = "⸡" +matches = [[0, 3]] + +[[test]] +name = "class-gencat10" +regex = '\p{Format}' +haystack = "\U000E007F" +matches = [[0, 4]] + +[[test]] +name = "class-gencat11" +regex = '\p{Initial_Punctuation}' +haystack = "⸜" +matches = [[0, 3]] + +[[test]] +name = "class-gencat12" +regex = '\p{Letter}' +haystack = "Έ" +matches = [[0, 2]] + +[[test]] +name = "class-gencat13" +regex = '\p{Letter_Number}' +haystack = "ↂ" +matches = [[0, 3]] + +[[test]] +name = "class-gencat14" +regex = '\p{Line_Separator}' +haystack = "\u2028" +matches = [[0, 3]] + +[[test]] +name = "class-gencat15" +regex = '\p{Lowercase_Letter}' +haystack = "ϛ" +matches = [[0, 2]] + +[[test]] +name = "class-gencat16" +regex = '\p{Mark}' +haystack = "\U000E01EF" +matches = [[0, 4]] + +[[test]] +name = "class-gencat17" +regex = '\p{Math}' +haystack = "⋿" +matches = [[0, 3]] + +[[test]] +name = "class-gencat18" +regex = '\p{Modifier_Letter}' +haystack = "𖭃" +matches = [[0, 4]] + +[[test]] +name = "class-gencat19" +regex = '\p{Modifier_Symbol}' +haystack = "🏿" +matches = [[0, 4]] + +[[test]] +name = "class-gencat20" +regex = '\p{Nonspacing_Mark}' +haystack = "\U0001E94A" +matches = [[0, 4]] + +[[test]] +name = "class-gencat21" +regex = '\p{Number}' +haystack = "⓿" +matches = [[0, 3]] + +[[test]] +name = "class-gencat22" +regex = '\p{Open_Punctuation}' +haystack = "⦅" +matches = [[0, 3]] + +[[test]] +name = "class-gencat23" +regex = '\p{Other}' +haystack = "\u0BC9" +matches = [[0, 3]] + +[[test]] +name = "class-gencat24" +regex = '\p{Other_Letter}' +haystack = "ꓷ" +matches = [[0, 3]] + +[[test]] +name = "class-gencat25" +regex = '\p{Other_Number}' +haystack = "㉏" +matches = [[0, 3]] + +[[test]] +name = "class-gencat26" +regex = '\p{Other_Punctuation}' +haystack = "𞥞" +matches = [[0, 4]] + +[[test]] +name = "class-gencat27" +regex = '\p{Other_Symbol}' +haystack = "⅌" +matches = [[0, 3]] + +[[test]] +name = "class-gencat28" +regex = '\p{Paragraph_Separator}' +haystack = "\u2029" +matches = [[0, 3]] + +[[test]] +name = "class-gencat29" +regex = '\p{Private_Use}' +haystack = "\U0010FFFD" +matches = [[0, 4]] + +[[test]] +name = "class-gencat30" +regex = '\p{Punctuation}' +haystack = "𑁍" +matches = [[0, 4]] + +[[test]] +name = "class-gencat31" +regex = '\p{Separator}' +haystack = "\u3000" +matches = [[0, 3]] + +[[test]] +name = "class-gencat32" +regex = '\p{Space_Separator}' +haystack = "\u205F" +matches = [[0, 3]] + +[[test]] +name = "class-gencat33" +regex = '\p{Spacing_Mark}' +haystack = "\U00016F7E" +matches = [[0, 4]] + +[[test]] +name = "class-gencat34" +regex = '\p{Symbol}' +haystack = "⯈" +matches = [[0, 3]] + +[[test]] +name = "class-gencat35" +regex = '\p{Titlecase_Letter}' +haystack = "ῼ" +matches = [[0, 3]] + +[[test]] +name = "class-gencat36" +regex = '\p{Unassigned}' +haystack = "\U0010FFFF" +matches = [[0, 4]] + +[[test]] +name = "class-gencat37" +regex = '\p{Uppercase_Letter}' +haystack = "Ꝋ" +matches = [[0, 3]] + + +# Tests for Unicode emoji properties. +[[test]] +name = "class-emoji1" +regex = '\p{Emoji}' +haystack = "\u23E9" +matches = [[0, 3]] + +[[test]] +name = "class-emoji2" +regex = '\p{emoji}' +haystack = "\U0001F21A" +matches = [[0, 4]] + +[[test]] +name = "class-emoji3" +regex = '\p{extendedpictographic}' +haystack = "\U0001FA6E" +matches = [[0, 4]] + +[[test]] +name = "class-emoji4" +regex = '\p{extendedpictographic}' +haystack = "\U0001FFFD" +matches = [[0, 4]] + + +# Tests for Unicode grapheme cluster properties. +[[test]] +name = "class-gcb1" +regex = '\p{grapheme_cluster_break=prepend}' +haystack = "\U00011D46" +matches = [[0, 4]] + +[[test]] +name = "class-gcb2" +regex = '\p{gcb=regional_indicator}' +haystack = "\U0001F1E6" +matches = [[0, 4]] + +[[test]] +name = "class-gcb3" +regex = '\p{gcb=ri}' +haystack = "\U0001F1E7" +matches = [[0, 4]] + +[[test]] +name = "class-gcb4" +regex = '\p{regionalindicator}' +haystack = "\U0001F1FF" +matches = [[0, 4]] + +[[test]] +name = "class-gcb5" +regex = '\p{gcb=lvt}' +haystack = "\uC989" +matches = [[0, 3]] + +[[test]] +name = "class-gcb6" +regex = '\p{gcb=zwj}' +haystack = "\u200D" +matches = [[0, 3]] + +# Tests for Unicode word boundary properties. +[[test]] +name = "class-word-break1" +regex = '\p{word_break=Hebrew_Letter}' +haystack = "\uFB46" +matches = [[0, 3]] + +[[test]] +name = "class-word-break2" +regex = '\p{wb=hebrewletter}' +haystack = "\uFB46" +matches = [[0, 3]] + +[[test]] +name = "class-word-break3" +regex = '\p{wb=ExtendNumLet}' +haystack = "\uFF3F" +matches = [[0, 3]] + +[[test]] +name = "class-word-break4" +regex = '\p{wb=WSegSpace}' +haystack = "\u3000" +matches = [[0, 3]] + +[[test]] +name = "class-word-break5" +regex = '\p{wb=numeric}' +haystack = "\U0001E950" +matches = [[0, 4]] + +# Tests for Unicode sentence boundary properties. +[[test]] +name = "class-sentence-break1" +regex = '\p{sentence_break=Lower}' +haystack = "\u0469" +matches = [[0, 2]] + +[[test]] +name = "class-sentence-break2" +regex = '\p{sb=lower}' +haystack = "\u0469" +matches = [[0, 2]] + +[[test]] +name = "class-sentence-break3" +regex = '\p{sb=Close}' +haystack = "\uFF60" +matches = [[0, 3]] + +[[test]] +name = "class-sentence-break4" +regex = '\p{sb=Close}' +haystack = "\U0001F677" +matches = [[0, 4]] + +[[test]] +name = "class-sentence-break5" +regex = '\p{sb=SContinue}' +haystack = "\uFF64" +matches = [[0, 3]] diff --git a/vendor/regex/testdata/utf8.toml b/vendor/regex/testdata/utf8.toml new file mode 100644 index 0000000..39e284b --- /dev/null +++ b/vendor/regex/testdata/utf8.toml @@ -0,0 +1,399 @@ +# These test the UTF-8 modes expose by regex-automata. Namely, when utf8 is +# true, then we promise that the haystack is valid UTF-8. (Otherwise behavior +# is unspecified.) This also corresponds to building the regex engine with the +# following two guarantees: +# +# 1) For any non-empty match reported, its span is guaranteed to correspond to +# valid UTF-8. +# 2) All empty or zero-width matches reported must never split a UTF-8 +# encoded codepoint. If the haystack has invalid UTF-8, then this results in +# unspecified behavior. +# +# The (2) is in particular what we focus our testing on since (1) is generally +# guaranteed by regex-syntax's AST-to-HIR translator and is well tested there. +# The thing with (2) is that it can't be described in the HIR, so the regex +# engines have to handle that case. Thus, we test it here. +# +# Note that it is possible to build a regex that has property (1) but not +# (2), and vice versa. This is done by building the HIR with 'utf8=true' but +# building the Thompson NFA with 'utf8=false'. We don't test that here because +# the harness doesn't expose a way to enable or disable UTF-8 mode with that +# granularity. Instead, those combinations are lightly tested via doc examples. +# That's not to say that (1) without (2) is uncommon. Indeed, ripgrep uses it +# because it cannot guarantee that its haystack is valid UTF-8. + +# This tests that an empty regex doesn't split a codepoint. +[[test]] +name = "empty-utf8yes" +regex = '' +haystack = '☃' +matches = [[0, 0], [3, 3]] +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-overlapping" +regex = '' +haystack = '☃' +matches = [[0, 0], [3, 3]] +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# This tests that an empty regex DOES split a codepoint when utf=false. +[[test]] +name = "empty-utf8no" +regex = '' +haystack = '☃' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-overlapping" +regex = '' +haystack = '☃' +matches = [[0, 0], [1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that an empty regex doesn't split a codepoint, even if we give +# it bounds entirely within the codepoint. +# +# This is one of the trickier cases and is what motivated the current UTF-8 +# mode design. In particular, at one point, this test failed the 'is_match' +# variant of the test but not 'find'. This is because the 'is_match' code path +# is specifically optimized for "was a match found" rather than "where is the +# match." In the former case, you don't really care about the empty-vs-non-empty +# matches, and thus, the codepoint splitting filtering logic wasn't getting +# applied. (In multiple ways across multiple regex engines.) In this way, you +# can wind up with a situation where 'is_match' says "yes," but 'find' says, +# "I didn't find anything." Which is... not great. +# +# I could have decided to say that providing boundaries that themselves split +# a codepoint would have unspecified behavior. But I couldn't quite convince +# myself that such boundaries were the only way to get an inconsistency between +# 'is_match' and 'find'. +# +# Note that I also tried to come up with a test like this that fails without +# using `bounds`. Specifically, a test where 'is_match' and 'find' disagree. +# But I couldn't do it, and I'm tempted to conclude it is impossible. The +# fundamental problem is that you need to simultaneously produce an empty match +# that splits a codepoint while *not* matching before or after the codepoint. +[[test]] +name = "empty-utf8yes-bounds" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [] +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-bounds-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [] +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# This tests that an empty regex splits a codepoint when the bounds are +# entirely within the codepoint. +[[test]] +name = "empty-utf8no-bounds" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-bounds-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1], [2, 2], [3, 3]] +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# In this test, we anchor the search. Since the start position is also a UTF-8 +# boundary, we get a match. +[[test]] +name = "empty-utf8yes-anchored" +regex = '' +haystack = '𝛃' +matches = [[0, 0]] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-overlapping" +regex = '' +haystack = '𝛃' +matches = [[0, 0]] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except with UTF-8 mode disabled. It almost doesn't change the +# result, except for the fact that since this is an anchored search and we +# always find all matches, the test harness will keep reporting matches until +# none are found. Because it's anchored, matches will be reported so long as +# they are directly adjacent. Since with UTF-8 mode the next anchored search +# after the match at [0, 0] fails, iteration stops (and doesn't find the last +# match at [4, 4]). +[[test]] +name = "empty-utf8no-anchored" +regex = '' +haystack = '𝛃' +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +# +# Note that overlapping anchored searches are a little weird, and it's not +# totally clear what their semantics ought to be. For now, we just test the +# current behavior of our test shim that implements overlapping search. (This +# is one of the reasons why we don't really expose regex-level overlapping +# searches.) +[[test]] +name = "empty-utf8no-anchored-overlapping" +regex = '' +haystack = '𝛃' +matches = [[0, 0]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# In this test, we anchor the search, but also set bounds. The bounds start the +# search in the middle of a codepoint, so there should never be a match. +[[test]] +name = "empty-utf8yes-anchored-bounds" +regex = '' +haystack = '𝛃' +matches = [] +bounds = [1, 3] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-bounds-overlapping" +regex = '' +haystack = '𝛃' +matches = [] +bounds = [1, 3] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except with UTF-8 mode disabled. Without UTF-8 mode enabled, +# matching within a codepoint is allowed. And remember, as in the anchored test +# above with UTF-8 mode disabled, iteration will report all adjacent matches. +# The matches at [0, 0] and [4, 4] are not included because of the bounds of +# the search. +[[test]] +name = "empty-utf8no-anchored-bounds" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1], [2, 2], [3, 3]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +# +# Note that overlapping anchored searches are a little weird, and it's not +# totally clear what their semantics ought to be. For now, we just test the +# current behavior of our test shim that implements overlapping search. (This +# is one of the reasons why we don't really expose regex-level overlapping +# searches.) +[[test]] +name = "empty-utf8no-anchored-bounds-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 3] +matches = [[1, 1]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that we find the match at the end of the string when the bounds +# exclude the first match. +[[test]] +name = "empty-utf8yes-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[4, 4]] +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[4, 4]] +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except since UTF-8 mode is disabled, we also find the matches +# inbetween that split the codepoint. +[[test]] +name = "empty-utf8no-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1], [2, 2], [3, 3], [4, 4]] +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1], [2, 2], [3, 3], [4, 4]] +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that we don't find any matches in an anchored search, even when +# the bounds include a match (at the end). +[[test]] +name = "empty-utf8yes-anchored-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, except since UTF-8 mode is disabled, we also find the matches +# inbetween that split the codepoint. Even though this is an anchored search, +# since the matches are adjacent, we find all of them. +[[test]] +name = "empty-utf8no-anchored-startbound" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1], [2, 2], [3, 3], [4, 4]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +# +# Note that overlapping anchored searches are a little weird, and it's not +# totally clear what their semantics ought to be. For now, we just test the +# current behavior of our test shim that implements overlapping search. (This +# is one of the reasons why we don't really expose regex-level overlapping +# searches.) +[[test]] +name = "empty-utf8no-anchored-startbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [1, 4] +matches = [[1, 1]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" + +# This tests that we find the match at the end of the haystack in UTF-8 mode +# when our bounds only include the empty string at the end of the haystack. +[[test]] +name = "empty-utf8yes-anchored-endbound" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = true + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8yes-anchored-endbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = true +match-kind = "all" +search-kind = "overlapping" + +# Same as above, but with UTF-8 mode disabled. Results remain the same since +# the only possible match does not split a codepoint. +[[test]] +name = "empty-utf8no-anchored-endbound" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = false + +# Tests the overlapping case of the above. +[[test]] +name = "empty-utf8no-anchored-endbound-overlapping" +regex = '' +haystack = '𝛃' +bounds = [4, 4] +matches = [[4, 4]] +anchored = true +unicode = true +utf8 = false +match-kind = "all" +search-kind = "overlapping" diff --git a/vendor/regex/testdata/word-boundary-special.toml b/vendor/regex/testdata/word-boundary-special.toml new file mode 100644 index 0000000..2b5a2a0 --- /dev/null +++ b/vendor/regex/testdata/word-boundary-special.toml @@ -0,0 +1,687 @@ +# These tests are for the "special" word boundary assertions. That is, +# \b{start}, \b{end}, \b{start-half}, \b{end-half}. These are specialty +# assertions for more niche use cases, but hitting those cases without these +# assertions is difficult. For example, \b{start-half} and \b{end-half} are +# used to implement the -w/--word-regexp flag in a grep program. + +# Tests for (?-u:\b{start}) + +[[test]] +name = "word-start-ascii-010" +regex = '\b{start}' +haystack = "a" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-ascii-020" +regex = '\b{start}' +haystack = "a " +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-ascii-030" +regex = '\b{start}' +haystack = " a " +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-start-ascii-040" +regex = '\b{start}' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-050" +regex = '\b{start}' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-ascii-060" +regex = '\b{start}' +haystack = "𝛃" +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-060-bounds" +regex = '\b{start}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-070" +regex = '\b{start}' +haystack = " 𝛃 " +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-080" +regex = '\b{start}' +haystack = "𝛃𐆀" +matches = [] +unicode = false + +[[test]] +name = "word-start-ascii-090" +regex = '\b{start}' +haystack = "𝛃b" +matches = [[4, 4]] +unicode = false + +[[test]] +name = "word-start-ascii-110" +regex = '\b{start}' +haystack = "b𝛃" +matches = [[0, 0]] +unicode = false + +# Tests for (?-u:\b{end}) + +[[test]] +name = "word-end-ascii-010" +regex = '\b{end}' +haystack = "a" +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-end-ascii-020" +regex = '\b{end}' +haystack = "a " +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-end-ascii-030" +regex = '\b{end}' +haystack = " a " +matches = [[2, 2]] +unicode = false + +[[test]] +name = "word-end-ascii-040" +regex = '\b{end}' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-050" +regex = '\b{end}' +haystack = "ab" +matches = [[2, 2]] +unicode = false + +[[test]] +name = "word-end-ascii-060" +regex = '\b{end}' +haystack = "𝛃" +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-060-bounds" +regex = '\b{end}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-070" +regex = '\b{end}' +haystack = " 𝛃 " +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-080" +regex = '\b{end}' +haystack = "𝛃𐆀" +matches = [] +unicode = false + +[[test]] +name = "word-end-ascii-090" +regex = '\b{end}' +haystack = "𝛃b" +matches = [[5, 5]] +unicode = false + +[[test]] +name = "word-end-ascii-110" +regex = '\b{end}' +haystack = "b𝛃" +matches = [[1, 1]] +unicode = false + +# Tests for \b{start} + +[[test]] +name = "word-start-unicode-010" +regex = '\b{start}' +haystack = "a" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-020" +regex = '\b{start}' +haystack = "a " +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-030" +regex = '\b{start}' +haystack = " a " +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-start-unicode-040" +regex = '\b{start}' +haystack = "" +matches = [] +unicode = true + +[[test]] +name = "word-start-unicode-050" +regex = '\b{start}' +haystack = "ab" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-060" +regex = '\b{start}' +haystack = "𝛃" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-060-bounds" +regex = '\b{start}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-start-unicode-070" +regex = '\b{start}' +haystack = " 𝛃 " +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-start-unicode-080" +regex = '\b{start}' +haystack = "𝛃𐆀" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-090" +regex = '\b{start}' +haystack = "𝛃b" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-unicode-110" +regex = '\b{start}' +haystack = "b𝛃" +matches = [[0, 0]] +unicode = true + +# Tests for \b{end} + +[[test]] +name = "word-end-unicode-010" +regex = '\b{end}' +haystack = "a" +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-end-unicode-020" +regex = '\b{end}' +haystack = "a " +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-end-unicode-030" +regex = '\b{end}' +haystack = " a " +matches = [[2, 2]] +unicode = true + +[[test]] +name = "word-end-unicode-040" +regex = '\b{end}' +haystack = "" +matches = [] +unicode = true + +[[test]] +name = "word-end-unicode-050" +regex = '\b{end}' +haystack = "ab" +matches = [[2, 2]] +unicode = true + +[[test]] +name = "word-end-unicode-060" +regex = '\b{end}' +haystack = "𝛃" +matches = [[4, 4]] +unicode = true + +[[test]] +name = "word-end-unicode-060-bounds" +regex = '\b{end}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-end-unicode-070" +regex = '\b{end}' +haystack = " 𝛃 " +matches = [[5, 5]] +unicode = true + +[[test]] +name = "word-end-unicode-080" +regex = '\b{end}' +haystack = "𝛃𐆀" +matches = [[4, 4]] +unicode = true + +[[test]] +name = "word-end-unicode-090" +regex = '\b{end}' +haystack = "𝛃b" +matches = [[5, 5]] +unicode = true + +[[test]] +name = "word-end-unicode-110" +regex = '\b{end}' +haystack = "b𝛃" +matches = [[5, 5]] +unicode = true + +# Tests for (?-u:\b{start-half}) + +[[test]] +name = "word-start-half-ascii-010" +regex = '\b{start-half}' +haystack = "a" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-half-ascii-020" +regex = '\b{start-half}' +haystack = "a " +matches = [[0, 0], [2, 2]] +unicode = false + +[[test]] +name = "word-start-half-ascii-030" +regex = '\b{start-half}' +haystack = " a " +matches = [[0, 0], [1, 1], [3, 3]] +unicode = false + +[[test]] +name = "word-start-half-ascii-040" +regex = '\b{start-half}' +haystack = "" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-half-ascii-050" +regex = '\b{start-half}' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-start-half-ascii-060" +regex = '\b{start-half}' +haystack = "𝛃" +matches = [[0, 0], [4, 4]] +unicode = false + +[[test]] +name = "word-start-half-ascii-060-noutf8" +regex = '\b{start-half}' +haystack = "𝛃" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]] +unicode = false +utf8 = false + +[[test]] +name = "word-start-half-ascii-060-bounds" +regex = '\b{start-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-start-half-ascii-070" +regex = '\b{start-half}' +haystack = " 𝛃 " +matches = [[0, 0], [1, 1], [5, 5], [6, 6]] +unicode = false + +[[test]] +name = "word-start-half-ascii-080" +regex = '\b{start-half}' +haystack = "𝛃𐆀" +matches = [[0, 0], [4, 4], [8, 8]] +unicode = false + +[[test]] +name = "word-start-half-ascii-090" +regex = '\b{start-half}' +haystack = "𝛃b" +matches = [[0, 0], [4, 4]] +unicode = false + +[[test]] +name = "word-start-half-ascii-110" +regex = '\b{start-half}' +haystack = "b𝛃" +matches = [[0, 0], [5, 5]] +unicode = false + +# Tests for (?-u:\b{end-half}) + +[[test]] +name = "word-end-half-ascii-010" +regex = '\b{end-half}' +haystack = "a" +matches = [[1, 1]] +unicode = false + +[[test]] +name = "word-end-half-ascii-020" +regex = '\b{end-half}' +haystack = "a " +matches = [[1, 1], [2, 2]] +unicode = false + +[[test]] +name = "word-end-half-ascii-030" +regex = '\b{end-half}' +haystack = " a " +matches = [[0, 0], [2, 2], [3, 3]] +unicode = false + +[[test]] +name = "word-end-half-ascii-040" +regex = '\b{end-half}' +haystack = "" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "word-end-half-ascii-050" +regex = '\b{end-half}' +haystack = "ab" +matches = [[2, 2]] +unicode = false + +[[test]] +name = "word-end-half-ascii-060" +regex = '\b{end-half}' +haystack = "𝛃" +matches = [[0, 0], [4, 4]] +unicode = false + +[[test]] +name = "word-end-half-ascii-060-bounds" +regex = '\b{end-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = false + +[[test]] +name = "word-end-half-ascii-070" +regex = '\b{end-half}' +haystack = " 𝛃 " +matches = [[0, 0], [1, 1], [5, 5], [6, 6]] +unicode = false + +[[test]] +name = "word-end-half-ascii-080" +regex = '\b{end-half}' +haystack = "𝛃𐆀" +matches = [[0, 0], [4, 4], [8, 8]] +unicode = false + +[[test]] +name = "word-end-half-ascii-090" +regex = '\b{end-half}' +haystack = "𝛃b" +matches = [[0, 0], [5, 5]] +unicode = false + +[[test]] +name = "word-end-half-ascii-110" +regex = '\b{end-half}' +haystack = "b𝛃" +matches = [[1, 1], [5, 5]] +unicode = false + +# Tests for \b{start-half} + +[[test]] +name = "word-start-half-unicode-010" +regex = '\b{start-half}' +haystack = "a" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-020" +regex = '\b{start-half}' +haystack = "a " +matches = [[0, 0], [2, 2]] +unicode = true + +[[test]] +name = "word-start-half-unicode-030" +regex = '\b{start-half}' +haystack = " a " +matches = [[0, 0], [1, 1], [3, 3]] +unicode = true + +[[test]] +name = "word-start-half-unicode-040" +regex = '\b{start-half}' +haystack = "" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-050" +regex = '\b{start-half}' +haystack = "ab" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-060" +regex = '\b{start-half}' +haystack = "𝛃" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-060-bounds" +regex = '\b{start-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-start-half-unicode-070" +regex = '\b{start-half}' +haystack = " 𝛃 " +matches = [[0, 0], [1, 1], [6, 6]] +unicode = true + +[[test]] +name = "word-start-half-unicode-080" +regex = '\b{start-half}' +haystack = "𝛃𐆀" +matches = [[0, 0], [8, 8]] +unicode = true + +[[test]] +name = "word-start-half-unicode-090" +regex = '\b{start-half}' +haystack = "𝛃b" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-start-half-unicode-110" +regex = '\b{start-half}' +haystack = "b𝛃" +matches = [[0, 0]] +unicode = true + +# Tests for \b{end-half} + +[[test]] +name = "word-end-half-unicode-010" +regex = '\b{end-half}' +haystack = "a" +matches = [[1, 1]] +unicode = true + +[[test]] +name = "word-end-half-unicode-020" +regex = '\b{end-half}' +haystack = "a " +matches = [[1, 1], [2, 2]] +unicode = true + +[[test]] +name = "word-end-half-unicode-030" +regex = '\b{end-half}' +haystack = " a " +matches = [[0, 0], [2, 2], [3, 3]] +unicode = true + +[[test]] +name = "word-end-half-unicode-040" +regex = '\b{end-half}' +haystack = "" +matches = [[0, 0]] +unicode = true + +[[test]] +name = "word-end-half-unicode-050" +regex = '\b{end-half}' +haystack = "ab" +matches = [[2, 2]] +unicode = true + +[[test]] +name = "word-end-half-unicode-060" +regex = '\b{end-half}' +haystack = "𝛃" +matches = [[4, 4]] +unicode = true + +[[test]] +name = "word-end-half-unicode-060-bounds" +regex = '\b{end-half}' +haystack = "𝛃" +bounds = [2, 3] +matches = [] +unicode = true + +[[test]] +name = "word-end-half-unicode-070" +regex = '\b{end-half}' +haystack = " 𝛃 " +matches = [[0, 0], [5, 5], [6, 6]] +unicode = true + +[[test]] +name = "word-end-half-unicode-080" +regex = '\b{end-half}' +haystack = "𝛃𐆀" +matches = [[4, 4], [8, 8]] +unicode = true + +[[test]] +name = "word-end-half-unicode-090" +regex = '\b{end-half}' +haystack = "𝛃b" +matches = [[5, 5]] +unicode = true + +[[test]] +name = "word-end-half-unicode-110" +regex = '\b{end-half}' +haystack = "b𝛃" +matches = [[5, 5]] +unicode = true + +# Specialty tests. + +# Since \r is special cased in the start state computation (to deal with CRLF +# mode), this test ensures that the correct start state is computed when the +# pattern starts with a half word boundary assertion. +[[test]] +name = "word-start-half-ascii-carriage" +regex = '\b{start-half}[a-z]+' +haystack = 'ABC\rabc' +matches = [[4, 7]] +bounds = [4, 7] +unescape = true + +# Since \n is also special cased in the start state computation, this test +# ensures that the correct start state is computed when the pattern starts with +# a half word boundary assertion. +[[test]] +name = "word-start-half-ascii-linefeed" +regex = '\b{start-half}[a-z]+' +haystack = 'ABC\nabc' +matches = [[4, 7]] +bounds = [4, 7] +unescape = true + +# Like the carriage return test above, but with a custom line terminator. +[[test]] +name = "word-start-half-ascii-customlineterm" +regex = '\b{start-half}[a-z]+' +haystack = 'ABC!abc' +matches = [[4, 7]] +bounds = [4, 7] +unescape = true +line-terminator = '!' diff --git a/vendor/regex/testdata/word-boundary.toml b/vendor/regex/testdata/word-boundary.toml new file mode 100644 index 0000000..1d86fc9 --- /dev/null +++ b/vendor/regex/testdata/word-boundary.toml @@ -0,0 +1,781 @@ +# Some of these are cribbed from RE2's test suite. + +# These test \b. Below are tests for \B. +[[test]] +name = "wb1" +regex = '\b' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb2" +regex = '\b' +haystack = "a" +matches = [[0, 0], [1, 1]] +unicode = false + +[[test]] +name = "wb3" +regex = '\b' +haystack = "ab" +matches = [[0, 0], [2, 2]] +unicode = false + +[[test]] +name = "wb4" +regex = '^\b' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "wb5" +regex = '\b$' +haystack = "ab" +matches = [[2, 2]] +unicode = false + +[[test]] +name = "wb6" +regex = '^\b$' +haystack = "ab" +matches = [] +unicode = false + +[[test]] +name = "wb7" +regex = '\bbar\b' +haystack = "nobar bar foo bar" +matches = [[6, 9], [14, 17]] +unicode = false + +[[test]] +name = "wb8" +regex = 'a\b' +haystack = "faoa x" +matches = [[3, 4]] +unicode = false + +[[test]] +name = "wb9" +regex = '\bbar' +haystack = "bar x" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb10" +regex = '\bbar' +haystack = "foo\nbar x" +matches = [[4, 7]] +unicode = false + +[[test]] +name = "wb11" +regex = 'bar\b' +haystack = "foobar" +matches = [[3, 6]] +unicode = false + +[[test]] +name = "wb12" +regex = 'bar\b' +haystack = "foobar\nxxx" +matches = [[3, 6]] +unicode = false + +[[test]] +name = "wb13" +regex = '(?:foo|bar|[A-Z])\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb14" +regex = '(?:foo|bar|[A-Z])\b' +haystack = "foo\n" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb15" +regex = '\b(?:foo|bar|[A-Z])' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb16" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "X" +matches = [[0, 1]] +unicode = false + +[[test]] +name = "wb17" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "XY" +matches = [] +unicode = false + +[[test]] +name = "wb18" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "bar" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb19" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb20" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "foo\n" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb21" +regex = '\b(?:foo|bar|[A-Z])\b' +haystack = "ffoo bbar N x" +matches = [[10, 11]] +unicode = false + +[[test]] +name = "wb22" +regex = '\b(?:fo|foo)\b' +haystack = "fo" +matches = [[0, 2]] +unicode = false + +[[test]] +name = "wb23" +regex = '\b(?:fo|foo)\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb24" +regex = '\b\b' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb25" +regex = '\b\b' +haystack = "a" +matches = [[0, 0], [1, 1]] +unicode = false + +[[test]] +name = "wb26" +regex = '\b$' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb27" +regex = '\b$' +haystack = "x" +matches = [[1, 1]] +unicode = false + +[[test]] +name = "wb28" +regex = '\b$' +haystack = "y x" +matches = [[3, 3]] +unicode = false + +[[test]] +name = "wb29" +regex = '(?-u:\b).$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb30" +regex = '^\b(?:fo|foo)\b' +haystack = "fo" +matches = [[0, 2]] +unicode = false + +[[test]] +name = "wb31" +regex = '^\b(?:fo|foo)\b' +haystack = "foo" +matches = [[0, 3]] +unicode = false + +[[test]] +name = "wb32" +regex = '^\b$' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb33" +regex = '^\b$' +haystack = "x" +matches = [] +unicode = false + +[[test]] +name = "wb34" +regex = '^(?-u:\b).$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb35" +regex = '^(?-u:\b).(?-u:\b)$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb36" +regex = '^^^^^\b$$$$$' +haystack = "" +matches = [] +unicode = false + +[[test]] +name = "wb37" +regex = '^^^^^(?-u:\b).$$$$$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb38" +regex = '^^^^^\b$$$$$' +haystack = "x" +matches = [] +unicode = false + +[[test]] +name = "wb39" +regex = '^^^^^(?-u:\b\b\b).(?-u:\b\b\b)$$$$$' +haystack = "x" +matches = [[0, 1]] + +[[test]] +name = "wb40" +regex = '(?-u:\b).+(?-u:\b)' +haystack = "$$abc$$" +matches = [[2, 5]] + +[[test]] +name = "wb41" +regex = '\b' +haystack = "a b c" +matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] +unicode = false + +[[test]] +name = "wb42" +regex = '\bfoo\b' +haystack = "zzz foo zzz" +matches = [[4, 7]] +unicode = false + +[[test]] +name = "wb43" +regex = '\b^' +haystack = "ab" +matches = [[0, 0]] +unicode = false + +[[test]] +name = "wb44" +regex = '$\b' +haystack = "ab" +matches = [[2, 2]] +unicode = false + + +# Tests for \B. Note that \B is not allowed if UTF-8 mode is enabled, so we +# have to disable it for most of these tests. This is because \B can match at +# non-UTF-8 boundaries. +[[test]] +name = "nb1" +regex = '\Bfoo\B' +haystack = "n foo xfoox that" +matches = [[7, 10]] +unicode = false +utf8 = false + +[[test]] +name = "nb2" +regex = 'a\B' +haystack = "faoa x" +matches = [[1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "nb3" +regex = '\Bbar' +haystack = "bar x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb4" +regex = '\Bbar' +haystack = "foo\nbar x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb5" +regex = 'bar\B' +haystack = "foobar" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb6" +regex = 'bar\B' +haystack = "foobar\nxxx" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb7" +regex = '(?:foo|bar|[A-Z])\B' +haystack = "foox" +matches = [[0, 3]] +unicode = false +utf8 = false + +[[test]] +name = "nb8" +regex = '(?:foo|bar|[A-Z])\B' +haystack = "foo\n" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb9" +regex = '\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb10" +regex = '\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb11" +regex = '\B(?:foo|bar|[A-Z])' +haystack = "foo" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb12" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "xXy" +matches = [[1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "nb13" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "XY" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb14" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "XYZ" +matches = [[1, 2]] +unicode = false +utf8 = false + +[[test]] +name = "nb15" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "abara" +matches = [[1, 4]] +unicode = false +utf8 = false + +[[test]] +name = "nb16" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "xfoo_" +matches = [[1, 4]] +unicode = false +utf8 = false + +[[test]] +name = "nb17" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "xfoo\n" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb18" +regex = '\B(?:foo|bar|[A-Z])\B' +haystack = "foo bar vNX" +matches = [[9, 10]] +unicode = false +utf8 = false + +[[test]] +name = "nb19" +regex = '\B(?:fo|foo)\B' +haystack = "xfoo" +matches = [[1, 3]] +unicode = false +utf8 = false + +[[test]] +name = "nb20" +regex = '\B(?:foo|fo)\B' +haystack = "xfooo" +matches = [[1, 4]] +unicode = false +utf8 = false + +[[test]] +name = "nb21" +regex = '\B\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb22" +regex = '\B\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb23" +regex = '\B$' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb24" +regex = '\B$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb25" +regex = '\B$' +haystack = "y x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb26" +regex = '\B.$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb27" +regex = '^\B(?:fo|foo)\B' +haystack = "fo" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb28" +regex = '^\B(?:fo|foo)\B' +haystack = "fo" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb29" +regex = '^\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb30" +regex = '^\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb31" +regex = '^\B\B' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb32" +regex = '^\B\B' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb33" +regex = '^\B$' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb34" +regex = '^\B$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb35" +regex = '^\B.$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb36" +regex = '^\B.\B$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb37" +regex = '^^^^^\B$$$$$' +haystack = "" +matches = [[0, 0]] +unicode = false +utf8 = false + +[[test]] +name = "nb38" +regex = '^^^^^\B.$$$$$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + +[[test]] +name = "nb39" +regex = '^^^^^\B$$$$$' +haystack = "x" +matches = [] +unicode = false +utf8 = false + + +# unicode1* and unicode2* work for both Unicode and ASCII because all matches +# are reported as byte offsets, and « and » do not correspond to word +# boundaries at either the character or byte level. +[[test]] +name = "unicode1" +regex = '\bx\b' +haystack = "«x" +matches = [[2, 3]] + +[[test]] +name = "unicode1-only-ascii" +regex = '\bx\b' +haystack = "«x" +matches = [[2, 3]] +unicode = false + +[[test]] +name = "unicode2" +regex = '\bx\b' +haystack = "x»" +matches = [[0, 1]] + +[[test]] +name = "unicode2-only-ascii" +regex = '\bx\b' +haystack = "x»" +matches = [[0, 1]] +unicode = false + +# ASCII word boundaries are completely oblivious to Unicode characters, so +# even though β is a character, an ASCII \b treats it as a word boundary +# when it is adjacent to another ASCII character. (The ASCII \b only looks +# at the leading byte of β.) For Unicode \b, the tests are precisely inverted. +[[test]] +name = "unicode3" +regex = '\bx\b' +haystack = 'áxβ' +matches = [] + +[[test]] +name = "unicode3-only-ascii" +regex = '\bx\b' +haystack = 'áxβ' +matches = [[2, 3]] +unicode = false + +[[test]] +name = "unicode4" +regex = '\Bx\B' +haystack = 'áxβ' +matches = [[2, 3]] + +[[test]] +name = "unicode4-only-ascii" +regex = '\Bx\B' +haystack = 'áxβ' +matches = [] +unicode = false +utf8 = false + +# The same as above, but with \b instead of \B as a sanity check. +[[test]] +name = "unicode5" +regex = '\b' +haystack = "0\U0007EF5E" +matches = [[0, 0], [1, 1]] + +[[test]] +name = "unicode5-only-ascii" +regex = '\b' +haystack = "0\U0007EF5E" +matches = [[0, 0], [1, 1]] +unicode = false +utf8 = false + +[[test]] +name = "unicode5-noutf8" +regex = '\b' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [[0, 0], [1, 1]] +unescape = true +utf8 = false + +[[test]] +name = "unicode5-noutf8-only-ascii" +regex = '\b' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [[0, 0], [1, 1]] +unescape = true +unicode = false +utf8 = false + +# Weird special case to ensure that ASCII \B treats each individual code unit +# as a non-word byte. (The specific codepoint is irrelevant. It's an arbitrary +# codepoint that uses 4 bytes in its UTF-8 encoding and is not a member of the +# \w character class.) +[[test]] +name = "unicode5-not" +regex = '\B' +haystack = "0\U0007EF5E" +matches = [[5, 5]] + +[[test]] +name = "unicode5-not-only-ascii" +regex = '\B' +haystack = "0\U0007EF5E" +matches = [[2, 2], [3, 3], [4, 4], [5, 5]] +unicode = false +utf8 = false + +# This gets no matches since \B only matches in the presence of valid UTF-8 +# when Unicode is enabled, even when UTF-8 mode is disabled. +[[test]] +name = "unicode5-not-noutf8" +regex = '\B' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [] +unescape = true +utf8 = false + +# But this DOES get matches since \B in ASCII mode only looks at individual +# bytes. +[[test]] +name = "unicode5-not-noutf8-only-ascii" +regex = '\B' +haystack = '0\xFF\xFF\xFF\xFF' +matches = [[2, 2], [3, 3], [4, 4], [5, 5]] +unescape = true +unicode = false +utf8 = false + +# Some tests of no particular significance. +[[test]] +name = "unicode6" +regex = '\b[0-9]+\b' +haystack = "foo 123 bar 456 quux 789" +matches = [[4, 7], [12, 15], [21, 24]] + +[[test]] +name = "unicode7" +regex = '\b[0-9]+\b' +haystack = "foo 123 bar a456 quux 789" +matches = [[4, 7], [22, 25]] + +[[test]] +name = "unicode8" +regex = '\b[0-9]+\b' +haystack = "foo 123 bar 456a quux 789" +matches = [[4, 7], [22, 25]] + +# A variant of the problem described here: +# https://github.com/google/re2/blob/89567f5de5b23bb5ad0c26cbafc10bdc7389d1fa/re2/dfa.cc#L658-L667 +[[test]] +name = "alt-with-assertion-repetition" +regex = '(?:\b|%)+' +haystack = "z%" +bounds = [1, 2] +anchored = true +matches = [[1, 1]] diff --git a/vendor/regex/tests/api.rs b/vendor/regex/tests/api.rs deleted file mode 100644 index c7250a8..0000000 --- a/vendor/regex/tests/api.rs +++ /dev/null @@ -1,234 +0,0 @@ -#[test] -fn empty_regex_empty_match() { - let re = regex!(""); - assert_eq!(vec![(0, 0)], findall!(re, "")); -} - -#[test] -fn empty_regex_nonempty_match() { - let re = regex!(""); - assert_eq!(vec![(0, 0), (1, 1), (2, 2), (3, 3)], findall!(re, "abc")); -} - -#[test] -fn one_zero_length_match() { - let re = regex!(r"[0-9]*"); - assert_eq!(vec![(0, 0), (1, 2), (3, 4)], findall!(re, "a1b2")); -} - -#[test] -fn many_zero_length_match() { - let re = regex!(r"[0-9]*"); - assert_eq!( - vec![(0, 0), (1, 2), (3, 3), (4, 4), (5, 6)], - findall!(re, "a1bbb2") - ); -} - -#[test] -fn many_sequential_zero_length_match() { - let re = regex!(r"[0-9]?"); - assert_eq!( - vec![(0, 0), (1, 2), (2, 3), (4, 5), (6, 6)], - findall!(re, "a12b3c") - ); -} - -#[test] -fn quoted_bracket_set() { - let re = regex!(r"([\x{5b}\x{5d}])"); - assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]")); - let re = regex!(r"([\[\]])"); - assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]")); -} - -#[test] -fn first_range_starts_with_left_bracket() { - let re = regex!(r"([\[-z])"); - assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]")); -} - -#[test] -fn range_ends_with_escape() { - let re = regex!(r"([\[-\x{5d}])"); - assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]")); -} - -#[test] -fn empty_match_find_iter() { - let re = regex!(r".*?"); - assert_eq!(vec![(0, 0), (1, 1), (2, 2), (3, 3)], findall!(re, "abc")); -} - -#[test] -fn empty_match_captures_iter() { - let re = regex!(r".*?"); - let ms: Vec<_> = re - .captures_iter(text!("abc")) - .map(|c| c.get(0).unwrap()) - .map(|m| (m.start(), m.end())) - .collect(); - assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]); -} - -#[test] -fn capture_names() { - let re = regex!(r"(.)(?P<a>.)"); - assert_eq!(3, re.captures_len()); - assert_eq!((3, Some(3)), re.capture_names().size_hint()); - assert_eq!( - vec![None, None, Some("a")], - re.capture_names().collect::<Vec<_>>() - ); -} - -#[test] -fn regex_string() { - assert_eq!(r"[a-zA-Z0-9]+", regex!(r"[a-zA-Z0-9]+").as_str()); - assert_eq!(r"[a-zA-Z0-9]+", &format!("{}", regex!(r"[a-zA-Z0-9]+"))); - assert_eq!(r"[a-zA-Z0-9]+", &format!("{:?}", regex!(r"[a-zA-Z0-9]+"))); -} - -#[test] -fn capture_index() { - let re = regex!(r"^(?P<name>.+)$"); - let cap = re.captures(t!("abc")).unwrap(); - assert_eq!(&cap[0], t!("abc")); - assert_eq!(&cap[1], t!("abc")); - assert_eq!(&cap["name"], t!("abc")); -} - -#[test] -#[should_panic] -#[cfg_attr(all(target_env = "msvc", target_pointer_width = "32"), ignore)] -fn capture_index_panic_usize() { - let re = regex!(r"^(?P<name>.+)$"); - let cap = re.captures(t!("abc")).unwrap(); - let _ = cap[2]; -} - -#[test] -#[should_panic] -#[cfg_attr(all(target_env = "msvc", target_pointer_width = "32"), ignore)] -fn capture_index_panic_name() { - let re = regex!(r"^(?P<name>.+)$"); - let cap = re.captures(t!("abc")).unwrap(); - let _ = cap["bad name"]; -} - -#[test] -fn capture_index_lifetime() { - // This is a test of whether the types on `caps["..."]` are general - // enough. If not, this will fail to typecheck. - fn inner(s: &str) -> usize { - let re = regex!(r"(?P<number>[0-9]+)"); - let caps = re.captures(t!(s)).unwrap(); - caps["number"].len() - } - assert_eq!(3, inner("123")); -} - -#[test] -fn capture_misc() { - let re = regex!(r"(.)(?P<a>a)?(.)(?P<b>.)"); - let cap = re.captures(t!("abc")).unwrap(); - - assert_eq!(5, cap.len()); - - assert_eq!((0, 3), { - let m = cap.get(0).unwrap(); - (m.start(), m.end()) - }); - assert_eq!(None, cap.get(2)); - assert_eq!((2, 3), { - let m = cap.get(4).unwrap(); - (m.start(), m.end()) - }); - - assert_eq!(t!("abc"), match_text!(cap.get(0).unwrap())); - assert_eq!(None, cap.get(2)); - assert_eq!(t!("c"), match_text!(cap.get(4).unwrap())); - - assert_eq!(None, cap.name("a")); - assert_eq!(t!("c"), match_text!(cap.name("b").unwrap())); -} - -#[test] -fn sub_capture_matches() { - let re = regex!(r"([a-z])(([a-z])|([0-9]))"); - let cap = re.captures(t!("a5")).unwrap(); - let subs: Vec<_> = cap.iter().collect(); - - assert_eq!(5, subs.len()); - assert!(subs[0].is_some()); - assert!(subs[1].is_some()); - assert!(subs[2].is_some()); - assert!(subs[3].is_none()); - assert!(subs[4].is_some()); - - assert_eq!(t!("a5"), match_text!(subs[0].unwrap())); - assert_eq!(t!("a"), match_text!(subs[1].unwrap())); - assert_eq!(t!("5"), match_text!(subs[2].unwrap())); - assert_eq!(t!("5"), match_text!(subs[4].unwrap())); -} - -expand!(expand1, r"(?-u)(?P<foo>\w+)", "abc", "$foo", "abc"); -expand!(expand2, r"(?-u)(?P<foo>\w+)", "abc", "$0", "abc"); -expand!(expand3, r"(?-u)(?P<foo>\w+)", "abc", "$1", "abc"); -expand!(expand4, r"(?-u)(?P<foo>\w+)", "abc", "$$1", "$1"); -expand!(expand5, r"(?-u)(?P<foo>\w+)", "abc", "$$foo", "$foo"); -expand!(expand6, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "$b$a", "123abc"); -expand!(expand7, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "z$bz$az", "z"); -expand!( - expand8, - r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", - "abc 123", - ".$b.$a.", - ".123.abc." -); -expand!( - expand9, - r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", - "abc 123", - " $b $a ", - " 123 abc " -); -expand!(expand10, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "$bz$az", ""); - -expand!(expand_name1, r"%(?P<Z>[a-z]+)", "%abc", "$Z%", "abc%"); -expand!(expand_name2, r"\[(?P<Z>[a-z]+)", "[abc", "$Z[", "abc["); -expand!(expand_name3, r"\{(?P<Z>[a-z]+)", "{abc", "$Z{", "abc{"); -expand!(expand_name4, r"\}(?P<Z>[a-z]+)", "}abc", "$Z}", "abc}"); -expand!(expand_name5, r"%([a-z]+)", "%abc", "$1a%", "%"); -expand!(expand_name6, r"%([a-z]+)", "%abc", "${1}a%", "abca%"); -expand!(expand_name7, r"\[(?P<Z[>[a-z]+)", "[abc", "${Z[}[", "abc["); -expand!(expand_name8, r"\[(?P<Z[>[a-z]+)", "[abc", "${foo}[", "["); -expand!(expand_name9, r"\[(?P<Z[>[a-z]+)", "[abc", "${1a}[", "["); -expand!(expand_name10, r"\[(?P<Z[>[a-z]+)", "[abc", "${#}[", "["); -expand!(expand_name11, r"\[(?P<Z[>[a-z]+)", "[abc", "${$$}[", "["); - -split!( - split1, - r"(?-u)\s+", - "a b\nc\td\n\t e", - &[t!("a"), t!("b"), t!("c"), t!("d"), t!("e")] -); -split!( - split2, - r"(?-u)\b", - "a b c", - &[t!(""), t!("a"), t!(" "), t!("b"), t!(" "), t!("c"), t!("")] -); -split!(split3, r"a$", "a", &[t!(""), t!("")]); -split!(split_none, r"-", r"a", &[t!("a")]); -split!(split_trailing_blank, r"-", r"a-", &[t!("a"), t!("")]); -split!(split_trailing_blanks, r"-", r"a--", &[t!("a"), t!(""), t!("")]); -split!(split_empty, r"-", r"", &[t!("")]); - -splitn!(splitn_below_limit, r"-", r"a", 2, &[t!("a")]); -splitn!(splitn_at_limit, r"-", r"a-b", 2, &[t!("a"), t!("b")]); -splitn!(splitn_above_limit, r"-", r"a-b-c", 2, &[t!("a"), t!("b-c")]); -splitn!(splitn_zero_limit, r"-", r"a-b", 0, empty_vec!()); -splitn!(splitn_trailing_blank, r"-", r"a-", 2, &[t!("a"), t!("")]); -splitn!(splitn_trailing_separator, r"-", r"a--", 2, &[t!("a"), t!("-")]); -splitn!(splitn_empty, r"-", r"", 1, &[t!("")]); diff --git a/vendor/regex/tests/api_str.rs b/vendor/regex/tests/api_str.rs deleted file mode 100644 index 480116d..0000000 --- a/vendor/regex/tests/api_str.rs +++ /dev/null @@ -1,34 +0,0 @@ -// These tests don't really make sense with the bytes API, so we only test them -// on the Unicode API. - -#[test] -fn empty_match_unicode_find_iter() { - // Tests that we still yield byte ranges at valid UTF-8 sequence boundaries - // even when we're susceptible to empty width matches. - let re = regex!(r".*?"); - assert_eq!( - vec![(0, 0), (3, 3), (4, 4), (7, 7), (8, 8)], - findall!(re, "Ⅰ1Ⅱ2") - ); -} - -#[test] -fn empty_match_unicode_captures_iter() { - // Same as empty_match_unicode_find_iter, but tests capture iteration. - let re = regex!(r".*?"); - let ms: Vec<_> = re - .captures_iter(text!("Ⅰ1Ⅱ2")) - .map(|c| c.get(0).unwrap()) - .map(|m| (m.start(), m.end())) - .collect(); - assert_eq!(vec![(0, 0), (3, 3), (4, 4), (7, 7), (8, 8)], ms); -} - -#[test] -fn match_as_str() { - let re = regex!(r"fo+"); - let caps = re.captures("barfoobar").unwrap(); - assert_eq!(caps.get(0).map(|m| m.as_str()), Some("foo")); - assert_eq!(caps.get(0).map(From::from), Some("foo")); - assert_eq!(caps.get(0).map(Into::into), Some("foo")); -} diff --git a/vendor/regex/tests/bytes.rs b/vendor/regex/tests/bytes.rs deleted file mode 100644 index d05f138..0000000 --- a/vendor/regex/tests/bytes.rs +++ /dev/null @@ -1,107 +0,0 @@ -// These are tests specifically crafted for regexes that can match arbitrary -// bytes. - -// A silly wrapper to make it possible to write and match raw bytes. -struct R<'a>(&'a [u8]); -impl<'a> R<'a> { - fn as_bytes(&self) -> &'a [u8] { - self.0 - } -} - -mat!(word_boundary, r"(?-u) \b", " δ", None); -#[cfg(feature = "unicode-perl")] -mat!(word_boundary_unicode, r" \b", " δ", Some((0, 1))); -mat!(word_not_boundary, r"(?-u) \B", " δ", Some((0, 1))); -#[cfg(feature = "unicode-perl")] -mat!(word_not_boundary_unicode, r" \B", " δ", None); - -mat!(perl_w_ascii, r"(?-u)\w+", "aδ", Some((0, 1))); -#[cfg(feature = "unicode-perl")] -mat!(perl_w_unicode, r"\w+", "aδ", Some((0, 3))); -mat!(perl_d_ascii, r"(?-u)\d+", "1२३9", Some((0, 1))); -#[cfg(feature = "unicode-perl")] -mat!(perl_d_unicode, r"\d+", "1२३9", Some((0, 8))); -mat!(perl_s_ascii, r"(?-u)\s+", " \u{1680}", Some((0, 1))); -#[cfg(feature = "unicode-perl")] -mat!(perl_s_unicode, r"\s+", " \u{1680}", Some((0, 4))); - -// The first `(.+)` matches two Unicode codepoints, but can't match the 5th -// byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and -// matches. -mat!( - mixed1, - r"(.+)(?-u)(.+)", - R(b"\xCE\x93\xCE\x94\xFF"), - Some((0, 5)), - Some((0, 4)), - Some((4, 5)) -); - -mat!(case_ascii_one, r"(?i-u)a", "A", Some((0, 1))); -mat!(case_ascii_class, r"(?i-u)[a-z]+", "AaAaA", Some((0, 5))); -#[cfg(feature = "unicode-case")] -mat!(case_unicode, r"(?i)[a-z]+", "aA\u{212A}aA", Some((0, 7))); -mat!(case_not_unicode, r"(?i-u)[a-z]+", "aA\u{212A}aA", Some((0, 2))); - -mat!(negate_unicode, r"[^a]", "δ", Some((0, 2))); -mat!(negate_not_unicode, r"(?-u)[^a]", "δ", Some((0, 1))); - -// This doesn't match in a normal Unicode regex because the implicit preceding -// `.*?` is Unicode aware. -mat!(dotstar_prefix_not_unicode1, r"(?-u)a", R(b"\xFFa"), Some((1, 2))); -mat!(dotstar_prefix_not_unicode2, r"a", R(b"\xFFa"), Some((1, 2))); - -// Have fun with null bytes. -mat!( - null_bytes, - r"(?-u)(?P<cstr>[^\x00]+)\x00", - R(b"foo\x00"), - Some((0, 4)), - Some((0, 3)) -); - -// Test that lookahead operators work properly in the face of invalid UTF-8. -// See: https://github.com/rust-lang/regex/issues/277 -matiter!( - invalidutf8_anchor1, - r"(?-u)\xcc?^", - R(b"\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4"), - (0, 0) -); -matiter!( - invalidutf8_anchor2, - r"(?-u)^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$", - R(b"\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4"), - (22, 22) -); -matiter!( - invalidutf8_anchor3, - r"(?-u)^|ddp\xff\xffdddddlQd@\x80", - R(b"\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4"), - (0, 0) -); - -// See https://github.com/rust-lang/regex/issues/303 -#[test] -fn negated_full_byte_range() { - assert!(::regex::bytes::Regex::new(r#"(?-u)[^\x00-\xff]"#).is_err()); -} - -matiter!(word_boundary_ascii1, r"(?-u:\B)x(?-u:\B)", "áxβ"); -matiter!( - word_boundary_ascii2, - r"(?-u:\B)", - "0\u{7EF5E}", - (2, 2), - (3, 3), - (4, 4), - (5, 5) -); - -// See: https://github.com/rust-lang/regex/issues/264 -mat!(ascii_boundary_no_capture, r"(?-u)\B", "\u{28f3e}", Some((0, 0))); -mat!(ascii_boundary_capture, r"(?-u)(\B)", "\u{28f3e}", Some((0, 0))); - -// See: https://github.com/rust-lang/regex/issues/271 -mat!(end_not_wb, r"$(?-u:\B)", "\u{5c124}\u{b576c}", Some((8, 8))); diff --git a/vendor/regex/tests/consistent.rs b/vendor/regex/tests/consistent.rs deleted file mode 100644 index 722f2a5..0000000 --- a/vendor/regex/tests/consistent.rs +++ /dev/null @@ -1,238 +0,0 @@ -use regex::internal::ExecBuilder; - -/// Given a regex, check if all of the backends produce the same -/// results on a number of different inputs. -/// -/// For now this just throws quickcheck at the problem, which -/// is not very good because it only really tests half of the -/// problem space. It is pretty unlikely that a random string -/// will match any given regex, so this will probably just -/// be checking that the different backends fail in the same -/// way. This is still worthwhile to test, but is definitely not -/// the whole story. -/// -/// TODO(ethan): In order to cover the other half of the problem -/// space, we should generate a random matching string by inspecting -/// the AST of the input regex. The right way to do this probably -/// involves adding a custom Arbitrary instance around a couple -/// of newtypes. That way we can respect the quickcheck size hinting -/// and shrinking and whatnot. -pub fn backends_are_consistent(re: &str) -> Result<u64, String> { - let standard_backends = vec![ - ( - "bounded_backtracking_re", - ExecBuilder::new(re) - .bounded_backtracking() - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "pikevm_re", - ExecBuilder::new(re) - .nfa() - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "default_re", - ExecBuilder::new(re) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ]; - - let utf8bytes_backends = vec![ - ( - "bounded_backtracking_utf8bytes_re", - ExecBuilder::new(re) - .bounded_backtracking() - .bytes(true) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "pikevm_utf8bytes_re", - ExecBuilder::new(re) - .nfa() - .bytes(true) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "default_utf8bytes_re", - ExecBuilder::new(re) - .bytes(true) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ]; - - let bytes_backends = vec![ - ( - "bounded_backtracking_bytes_re", - ExecBuilder::new(re) - .bounded_backtracking() - .only_utf8(false) - .build() - .map(|exec| exec.into_byte_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "pikevm_bytes_re", - ExecBuilder::new(re) - .nfa() - .only_utf8(false) - .build() - .map(|exec| exec.into_byte_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "default_bytes_re", - ExecBuilder::new(re) - .only_utf8(false) - .build() - .map(|exec| exec.into_byte_regex()) - .map_err(|err| format!("{}", err))?, - ), - ]; - - Ok(string_checker::check_backends(&standard_backends)? - + string_checker::check_backends(&utf8bytes_backends)? - + bytes_checker::check_backends(&bytes_backends)?) -} - -// -// A consistency checker parameterized by the input type (&str or &[u8]). -// - -macro_rules! checker { - ($module_name:ident, $regex_type:path, $mk_input:expr) => { - mod $module_name { - use quickcheck; - use quickcheck::{Arbitrary, TestResult}; - - pub fn check_backends( - backends: &[(&str, $regex_type)], - ) -> Result<u64, String> { - let mut total_passed = 0; - for regex in backends[1..].iter() { - total_passed += quickcheck_regex_eq(&backends[0], regex)?; - } - - Ok(total_passed) - } - - fn quickcheck_regex_eq( - &(name1, ref re1): &(&str, $regex_type), - &(name2, ref re2): &(&str, $regex_type), - ) -> Result<u64, String> { - quickcheck::QuickCheck::new() - .quicktest(RegexEqualityTest::new( - re1.clone(), - re2.clone(), - )) - .map_err(|err| { - format!( - "{}(/{}/) and {}(/{}/) are inconsistent.\ - QuickCheck Err: {:?}", - name1, re1, name2, re2, err - ) - }) - } - - struct RegexEqualityTest { - re1: $regex_type, - re2: $regex_type, - } - impl RegexEqualityTest { - fn new(re1: $regex_type, re2: $regex_type) -> Self { - RegexEqualityTest { re1: re1, re2: re2 } - } - } - - impl quickcheck::Testable for RegexEqualityTest { - fn result(&self, gen: &mut quickcheck::Gen) -> TestResult { - let input = $mk_input(gen); - let input = &input; - - if self.re1.find(&input) != self.re2.find(input) { - return TestResult::error(format!( - "find mismatch input={:?}", - input - )); - } - - let cap1 = self.re1.captures(input); - let cap2 = self.re2.captures(input); - match (cap1, cap2) { - (None, None) => {} - (Some(cap1), Some(cap2)) => { - for (c1, c2) in cap1.iter().zip(cap2.iter()) { - if c1 != c2 { - return TestResult::error(format!( - "captures mismatch input={:?}", - input - )); - } - } - } - _ => { - return TestResult::error(format!( - "captures mismatch input={:?}", - input - )) - } - } - - let fi1 = self.re1.find_iter(input); - let fi2 = self.re2.find_iter(input); - for (m1, m2) in fi1.zip(fi2) { - if m1 != m2 { - return TestResult::error(format!( - "find_iter mismatch input={:?}", - input - )); - } - } - - let ci1 = self.re1.captures_iter(input); - let ci2 = self.re2.captures_iter(input); - for (cap1, cap2) in ci1.zip(ci2) { - for (c1, c2) in cap1.iter().zip(cap2.iter()) { - if c1 != c2 { - return TestResult::error(format!( - "captures_iter mismatch input={:?}", - input - )); - } - } - } - - let s1 = self.re1.split(input); - let s2 = self.re2.split(input); - for (chunk1, chunk2) in s1.zip(s2) { - if chunk1 != chunk2 { - return TestResult::error(format!( - "split mismatch input={:?}", - input - )); - } - } - - TestResult::from_bool(true) - } - } - } // mod - }; // rule case -} // macro_rules! - -checker!(string_checker, ::regex::Regex, |gen| String::arbitrary(gen)); -checker!(bytes_checker, ::regex::bytes::Regex, |gen| Vec::<u8>::arbitrary( - gen -)); diff --git a/vendor/regex/tests/crates_regex.rs b/vendor/regex/tests/crates_regex.rs deleted file mode 100644 index 200ec27..0000000 --- a/vendor/regex/tests/crates_regex.rs +++ /dev/null @@ -1,3287 +0,0 @@ -// DO NOT EDIT. Automatically generated by 'scripts/scrape_crates_io.py' -// on 2018-06-20 09:56:32.820354. - -// autoshutdown-0.1.0: r"\s*(\d+)(\w)\s*" -consistent!(autoshutdown_0, r"\s*(\d+)(\w)\s*"); - -// epub-1.1.1: r"/" -consistent!(epub_0, r"/"); - -// rpi-info-0.2.0: "^Revision\t+: ([0-9a-fA-F]+)" -consistent!(rpi_info_0, "^Revision\t+: ([0-9a-fA-F]+)"); - -// rpi-info-0.2.0: "Serial\t+: ([0-9a-fA-F]+)" -consistent!(rpi_info_1, "Serial\t+: ([0-9a-fA-F]+)"); - -// pnet_macros-0.21.0: r"^u([0-9]+)(be|le|he)?$" -consistent!(pnet_macros_0, r"^u([0-9]+)(be|le|he)?$"); - -// iban_validate-1.0.3: r"^[A-Z]{2}\d{2}[A-Z\d]{1,30}$" -consistent!(iban_validate_0, r"^[A-Z]{2}\d{2}[A-Z\d]{1,30}$"); - -// markifier-0.1.0: r".*\[(?P<percent>.+)%.*\].*" -consistent!(markifier_0, r".*\[(?P<percent>.+)%.*\].*"); - -// mallumo-0.3.0: r"(#include) (\S*)(.*)" -consistent!(mallumo_0, r"(#include) (\S*)(.*)"); - -// mallumo-0.3.0: r"(ERROR: \d+:)(\d+)(: )(.+)" -consistent!(mallumo_1, r"(ERROR: \d+:)(\d+)(: )(.+)"); - -// mallumo-0.3.0: r"(\d+\()(\d+)(?:\) : )(.+)" -consistent!(mallumo_2, r"(\d+\()(\d+)(?:\) : )(.+)"); - -// magnet_more-0.0.1: r"(.+?)(\[.*?\])?" -consistent!(magnet_more_0, r"(.+?)(\[.*?\])?"); - -// magnet_app-0.0.1: r":(?P<k>[a-zA-Z_]+)" -consistent!(magnet_app_0, r":(?P<k>[a-zA-Z_]+)"); - -// yubibomb-0.2.0: r"^\d{6}(?:\s*,\s*\d{6})*$" -consistent!(yubibomb_0, r"^\d{6}(?:\s*,\s*\d{6})*$"); - -// multirust-rs-0.0.4: r"[\\/]([^\\/?]+)(\?.*)?$" -consistent!(multirust_rs_0, r"[\\/]([^\\/?]+)(\?.*)?$"); - -// hueclient-0.3.2: "\"[a-z]*\":null" -consistent!(hueclient_0, "\"[a-z]*\":null"); - -// hueclient-0.3.2: ",+" -consistent!(hueclient_1, ",+"); - -// hueclient-0.3.2: ",\\}" -consistent!(hueclient_2, ",\\}"); - -// hueclient-0.3.2: "\\{," -consistent!(hueclient_3, "\\{,"); - -// aerial-0.1.0: r"[a-zA-Z_\$][a-zA-Z_0-9]*" -consistent!(aerial_0, r"[a-zA-Z_\$][a-zA-Z_0-9]*"); - -// aerial-0.1.0: r"thi[sng]+" -consistent!(aerial_1, r"thi[sng]+"); - -// rvue-0.1.0: r"(.+)\s+\((.+?)\)" -consistent!(rvue_0, r"(.+)\s+\((.+?)\)"); - -// rvue-0.1.0: r"([\d\.]+)\s*out\s*of\s*([\d\.]+)" -consistent!(rvue_1, r"([\d\.]+)\s*out\s*of\s*([\d\.]+)"); - -// rvue-0.1.0: r"^([\d\.]+)\s*(?:\(\))?$" -consistent!(rvue_2, r"^([\d\.]+)\s*(?:\(\))?$"); - -// rvue-0.1.0: r"([\d\.]+)\s*Points\s*Possible" -consistent!(rvue_3, r"([\d\.]+)\s*Points\s*Possible"); - -// rvue-0.1.0: r"([\d\.]+)\s*/\s*([\d\.]+)" -consistent!(rvue_4, r"([\d\.]+)\s*/\s*([\d\.]+)"); - -// rvsim-0.1.0: r"_?([_a-z0-9]+)\s*:\s*([_a-z0-9]+)\s*[,)]" -consistent!(rvsim_0, r"_?([_a-z0-9]+)\s*:\s*([_a-z0-9]+)\s*[,)]"); - -// nereon-0.1.4: "(.*[^\\\\])\\{\\}(.*)" -consistent!(nereon_0, "(.*[^\\\\])\\{\\}(.*)"); - -// next_episode-0.3.0: r"((?i)^(.+).s(\d+)e(\d+).*)$" -consistent!(next_episode_0, r"((?i)^(.+).s(\d+)e(\d+).*)$"); - -// migrant_lib-0.19.2: r"[^a-z0-9-]+" -consistent!(migrant_lib_0, r"[^a-z0-9-]+"); - -// migrant_lib-0.19.2: r"[0-9]{14}_[a-z0-9-]+" -consistent!(migrant_lib_1, r"[0-9]{14}_[a-z0-9-]+"); - -// migrant_lib-0.19.2: r"([0-9]{14}_)?[a-z0-9-]+" -consistent!(migrant_lib_2, r"([0-9]{14}_)?[a-z0-9-]+"); - -// minipre-0.2.0: "$_" -consistent!(minipre_0, "$_"); - -// minifier-0.0.13: r">\s+<" -consistent!(minifier_0, r">\s+<"); - -// minifier-0.0.13: r"\s{2,}|[\r\n]" -consistent!(minifier_1, r"\s{2,}|[\r\n]"); - -// minifier-0.0.13: r"<(style|script)[\w|\s].*?>" -consistent!(minifier_2, r"<(style|script)[\w|\s].*?>"); - -// minifier-0.0.13: "<!--(.|\n)*?-->" -consistent!(minifier_3, "<!--(.|\n)*?-->"); - -// minifier-0.0.13: r"<\w.*?>" -consistent!(minifier_4, r"<\w.*?>"); - -// minifier-0.0.13: r" \s+|\s +" -consistent!(minifier_5, r" \s+|\s +"); - -// minifier-0.0.13: r"\w\s+\w" -consistent!(minifier_6, r"\w\s+\w"); - -// minifier-0.0.13: r"'\s+>" -consistent!(minifier_7, r"'\s+>"); - -// minifier-0.0.13: r"\d\s+>" -consistent!(minifier_8, r"\d\s+>"); - -// ggp-rs-0.1.2: r"(?P<relation>\([^)]+\))|(?P<prop>[a-zA-Z0-9_]+)" -consistent!(ggp_rs_0, r"(?P<relation>\([^)]+\))|(?P<prop>[a-zA-Z0-9_]+)"); - -// ggp-rs-0.1.2: r"\((.*)\)." -consistent!(ggp_rs_1, r"\((.*)\)."); - -// poe-superfilter-0.2.0: "[A-Za-z0-9_]" -consistent!(poe_superfilter_0, "[A-Za-z0-9_]"); - -// poke-a-mango-0.5.0: r"(\d+)x(\d+)" -consistent!(poke_a_mango_0, r"(\d+)x(\d+)"); - -// pop3-rs-0.1.0: r"(?P<nmsg>\d+) (?P<size>\d+)" -consistent!(pop3_rs_0, r"(?P<nmsg>\d+) (?P<size>\d+)"); - -// pop3-rs-0.1.0: r"(?P<msgid>\d+) (?P<uidl>[\x21-\x7E]{1,70})" -consistent!(pop3_rs_1, r"(?P<msgid>\d+) (?P<uidl>[\x21-\x7E]{1,70})"); - -// pop3-rs-0.1.0: r"(<.*>)\r\n$" -consistent!(pop3_rs_2, r"(<.*>)\r\n$"); - -// pop3-rs-0.1.0: r"^(?P<status>\+OK|-ERR) (?P<statustext>.*)" -consistent!(pop3_rs_3, r"^(?P<status>\+OK|-ERR) (?P<statustext>.*)"); - -// pop3-1.0.6: r"^\.\r\n$" -consistent!(pop3_0, r"^\.\r\n$"); - -// pop3-1.0.6: r"\+OK(.*)" -consistent!(pop3_1, r"\+OK(.*)"); - -// pop3-1.0.6: r"-ERR(.*)" -consistent!(pop3_2, r"-ERR(.*)"); - -// pop3-1.0.6: r"\+OK (\d+) (\d+)\r\n" -consistent!(pop3_3, r"\+OK (\d+) (\d+)\r\n"); - -// pop3-1.0.6: r"(\d+) ([\x21-\x7e]+)\r\n" -consistent!(pop3_4, r"(\d+) ([\x21-\x7e]+)\r\n"); - -// pop3-1.0.6: r"\+OK (\d+) ([\x21-\x7e]+)\r\n" -consistent!(pop3_5, r"\+OK (\d+) ([\x21-\x7e]+)\r\n"); - -// pop3-1.0.6: r"(\d+) (\d+)\r\n" -consistent!(pop3_6, r"(\d+) (\d+)\r\n"); - -// pop3-1.0.6: r"\+OK (\d+) (\d+)\r\n" -consistent!(pop3_7, r"\+OK (\d+) (\d+)\r\n"); - -// polk-1.1.3: "github:(\\w+)/?(\\w+)?" -consistent!(polk_0, "github:(\\w+)/?(\\w+)?"); - -// geochunk-0.1.5: "^[0-9]{5}" -consistent!(geochunk_0, "^[0-9]{5}"); - -// generic-dns-update-1.1.4: r"((?:(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?)\.){3}(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?))" -consistent!(generic_dns_update_0, r"((?:(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?)\.){3}(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?))"); - -// generic-dns-update-1.1.4: r"((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(([0-9A-Fa-f]{1,4}:){0,5}:((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(::([0-9A-Fa-f]{1,4}:){0,5}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))" -consistent!(generic_dns_update_1, r"((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(([0-9A-Fa-f]{1,4}:){0,5}:((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(::([0-9A-Fa-f]{1,4}:){0,5}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))"); - -// generic-dns-update-1.1.4: r"<value><string>([0-9.]*)</string></value>" -consistent!( - generic_dns_update_2, - r"<value><string>([0-9.]*)</string></value>" -); - -// generic-dns-update-1.1.4: r"<int>([0-9]+)</int>" -consistent!(generic_dns_update_3, r"<int>([0-9]+)</int>"); - -// generic-dns-update-1.1.4: r"<int>([0-9]+)</int>" -consistent!(generic_dns_update_4, r"<int>([0-9]+)</int>"); - -// generic-dns-update-1.1.4: r"<boolean>([0-1]*)</boolean>" -consistent!(generic_dns_update_5, r"<boolean>([0-1]*)</boolean>"); - -// generate-nix-pkg-0.3.0: r"(\d*)\.(\d*)\.(\d*)(-(\S*))?" -consistent!(generate_nix_pkg_0, r"(\d*)\.(\d*)\.(\d*)(-(\S*))?"); - -// generate-nix-pkg-0.3.0: r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?" -consistent!(generate_nix_pkg_1, r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?"); - -// genact-0.6.0: r"arch/([a-z0-9_])+/" -consistent!(genact_0, r"arch/([a-z0-9_])+/"); - -// genact-0.6.0: r"arch/([a-z0-9_])+/" -consistent!(genact_1, r"arch/([a-z0-9_])+/"); - -// cron_rs-0.1.6: r"^\s*((\*(/\d+)?)|[0-9-,/]+)(\s+((\*(/\d+)?)|[0-9-,/]+)){4,5}\s*$" -consistent!( - cron_rs_0, - r"^\s*((\*(/\d+)?)|[0-9-,/]+)(\s+((\*(/\d+)?)|[0-9-,/]+)){4,5}\s*$" -); - -// systemfd-0.3.0: r"^([a-zA-Z]+)::(.+)$" -consistent!(systemfd_0, r"^([a-zA-Z]+)::(.+)$"); - -// symbolic-debuginfo-5.0.2: "__?hidden#\\d+_" -consistent!(symbolic_debuginfo_0, "__?hidden#\\d+_"); - -// symbolic-minidump-5.0.2: r"^Linux ([^ ]+) (.*) \w+(?: GNU/Linux)?$" -consistent!(symbolic_minidump_0, r"^Linux ([^ ]+) (.*) \w+(?: GNU/Linux)?$"); - -// graphql-idl-parser-0.1.1: "^(?u:\\#)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+" -consistent!(graphql_idl_parser_0, "^(?u:\\#)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+"); - -// graphql-idl-parser-0.1.1: "^(?u:=)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+" -consistent!(graphql_idl_parser_1, "^(?u:=)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+"); - -// graphql-idl-parser-0.1.1: "^(?u:[A-Z_-_a-z])(?u:[0-9A-Z_-_a-z])*" -consistent!(graphql_idl_parser_2, "^(?u:[A-Z_-_a-z])(?u:[0-9A-Z_-_a-z])*"); - -// graphql-idl-parser-0.1.1: "^(?u:!)" -consistent!(graphql_idl_parser_3, "^(?u:!)"); - -// graphql-idl-parser-0.1.1: "^(?u:\\()" -consistent!(graphql_idl_parser_4, "^(?u:\\()"); - -// graphql-idl-parser-0.1.1: "^(?u:\\))" -consistent!(graphql_idl_parser_5, "^(?u:\\))"); - -// graphql-idl-parser-0.1.1: "^(?u:,)" -consistent!(graphql_idl_parser_6, "^(?u:,)"); - -// graphql-idl-parser-0.1.1: "^(?u::)" -consistent!(graphql_idl_parser_7, "^(?u::)"); - -// graphql-idl-parser-0.1.1: "^(?u:@)" -consistent!(graphql_idl_parser_8, "^(?u:@)"); - -// graphql-idl-parser-0.1.1: "^(?u:\\[)" -consistent!(graphql_idl_parser_9, "^(?u:\\[)"); - -// graphql-idl-parser-0.1.1: "^(?u:\\])" -consistent!(graphql_idl_parser_10, "^(?u:\\])"); - -// graphql-idl-parser-0.1.1: "^(?u:enum)" -consistent!(graphql_idl_parser_11, "^(?u:enum)"); - -// graphql-idl-parser-0.1.1: "^(?u:implements)" -consistent!(graphql_idl_parser_12, "^(?u:implements)"); - -// graphql-idl-parser-0.1.1: "^(?u:input)" -consistent!(graphql_idl_parser_13, "^(?u:input)"); - -// graphql-idl-parser-0.1.1: "^(?u:interface)" -consistent!(graphql_idl_parser_14, "^(?u:interface)"); - -// graphql-idl-parser-0.1.1: "^(?u:scalar)" -consistent!(graphql_idl_parser_15, "^(?u:scalar)"); - -// graphql-idl-parser-0.1.1: "^(?u:type)" -consistent!(graphql_idl_parser_16, "^(?u:type)"); - -// graphql-idl-parser-0.1.1: "^(?u:union)" -consistent!(graphql_idl_parser_17, "^(?u:union)"); - -// graphql-idl-parser-0.1.1: "^(?u:\\{)" -consistent!(graphql_idl_parser_18, "^(?u:\\{)"); - -// graphql-idl-parser-0.1.1: "^(?u:\\})" -consistent!(graphql_idl_parser_19, "^(?u:\\})"); - -// grimoire-0.1.0: r"(?s)/\*(?P<config>.*?)\*/" -consistent!(grimoire_0, r"(?s)/\*(?P<config>.*?)\*/"); - -// phonenumber-0.2.0+8.9.0: r"[\d]+(?:[~\x{2053}\x{223C}\x{FF5E}][\d]+)?" -consistent!(phonenumber_0, r"[\d]+(?:[~\x{2053}\x{223C}\x{FF5E}][\d]+)?"); - -// phonenumber-0.2.0+8.9.0: r"[, \[\]]" -consistent!(phonenumber_1, r"[, \[\]]"); - -// phonenumber-0.2.0+8.9.0: r"[\\/] *x" -consistent!(phonenumber_2, r"[\\/] *x"); - -// phonenumber-0.2.0+8.9.0: r"[[\P{N}&&\P{L}]&&[^#]]+$" -consistent!(phonenumber_3, r"[[\P{N}&&\P{L}]&&[^#]]+$"); - -// phonenumber-0.2.0+8.9.0: r"(?:.*?[A-Za-z]){3}.*" -consistent!(phonenumber_4, r"(?:.*?[A-Za-z]){3}.*"); - -// phonenumber-0.2.0+8.9.0: r"(\D+)" -consistent!(phonenumber_5, r"(\D+)"); - -// phonenumber-0.2.0+8.9.0: r"(\$\d)" -consistent!(phonenumber_6, r"(\$\d)"); - -// phonenumber-0.2.0+8.9.0: r"\(?\$1\)?" -consistent!(phonenumber_7, r"\(?\$1\)?"); - -// phone_number-0.1.0: r"\D" -consistent!(phone_number_0, r"\D"); - -// phone_number-0.1.0: r"^0+" -consistent!(phone_number_1, r"^0+"); - -// phone_number-0.1.0: r"^89" -consistent!(phone_number_2, r"^89"); - -// phone_number-0.1.0: r"^8+" -consistent!(phone_number_3, r"^8+"); - -// phile-0.1.4: r"^ *(\^_*\^) *$" -consistent!(phile_0, r"^ *(\^_*\^) *$"); - -// phile-0.1.4: r"^[_\p{XID_Start}]$" -consistent!(phile_1, r"^[_\p{XID_Start}]$"); - -// phile-0.1.4: r"^\p{XID_Continue}$" -consistent!(phile_2, r"^\p{XID_Continue}$"); - -// uritemplate-0.1.2: "%25(?P<hex>[0-9a-fA-F][0-9a-fA-F])" -consistent!(uritemplate_0, "%25(?P<hex>[0-9a-fA-F][0-9a-fA-F])"); - -// urdf-rs-0.4.2: "^package://(\\w+)/" -consistent!(urdf_rs_0, "^package://(\\w+)/"); - -// url-match-0.1.7: r"(?P<key>[?&.])" -consistent!(url_match_0, r"(?P<key>[?&.])"); - -// url-match-0.1.7: r":(?P<key>[a-zA-Z0-9_-]+)" -consistent!(url_match_1, r":(?P<key>[a-zA-Z0-9_-]+)"); - -// tsm-sys-0.1.0: r"hello world" -consistent!(tsm_sys_0, r"hello world"); - -// deb-version-0.1.0: "^(?:(?:(?:\\d+:).+)|(?:[^:]+))$" -consistent!(deb_version_0, "^(?:(?:(?:\\d+:).+)|(?:[^:]+))$"); - -// debcargo-2.1.0: r"^(?i)(a|an|the)\s+" -consistent!(debcargo_0, r"^(?i)(a|an|the)\s+"); - -// debcargo-2.1.0: r"^(?i)(rust\s+)?(implementation|library|tool|crate)\s+(of|to|for)\s+" -consistent!( - debcargo_1, - r"^(?i)(rust\s+)?(implementation|library|tool|crate)\s+(of|to|for)\s+" -); - -// feaders-0.2.0: r"^.*\.h$" -consistent!(feaders_0, r"^.*\.h$"); - -// feaders-0.2.0: r"^.*\.c$" -consistent!(feaders_1, r"^.*\.c$"); - -// feaders-0.2.0: r"^.*\.hpp$" -consistent!(feaders_2, r"^.*\.hpp$"); - -// feaders-0.2.0: r"^.*\.cc$" -consistent!(feaders_3, r"^.*\.cc$"); - -// feaders-0.2.0: r"^.*\.cpp$" -consistent!(feaders_4, r"^.*\.cpp$"); - -// hyperscan-0.1.6: r"CPtr\(\w+\)" -consistent!(hyperscan_0, r"CPtr\(\w+\)"); - -// hyperscan-0.1.6: r"^Version:\s(\d\.\d\.\d)\sFeatures:\s+(\w+)?\sMode:\s(\w+)$" -consistent!( - hyperscan_1, - r"^Version:\s(\d\.\d\.\d)\sFeatures:\s+(\w+)?\sMode:\s(\w+)$" -); - -// hyperscan-0.1.6: r"RawDatabase<Block>\{db: \w+\}" -consistent!(hyperscan_2, r"RawDatabase<Block>\{db: \w+\}"); - -// hyperscan-0.1.6: r"RawSerializedDatabase\{p: \w+, len: \d+\}" -consistent!(hyperscan_3, r"RawSerializedDatabase\{p: \w+, len: \d+\}"); - -// ucd-parse-0.1.1: r"[0-9A-F]+" -consistent!(ucd_parse_0, r"[0-9A-F]+"); - -// afsort-0.2.0: r".*" -consistent!(afsort_0, r".*"); - -// afsort-0.2.0: r".*" -consistent!(afsort_1, r".*"); - -// afsort-0.2.0: r".*" -consistent!(afsort_2, r".*"); - -// afsort-0.2.0: r".*" -consistent!(afsort_3, r".*"); - -// afsort-0.2.0: r".*" -consistent!(afsort_4, r".*"); - -// afsort-0.2.0: r".*" -consistent!(afsort_5, r".*"); - -// afsort-0.2.0: r"^[a-z]+$" -consistent!(afsort_6, r"^[a-z]+$"); - -// afsort-0.2.0: r"^[a-z]+$" -consistent!(afsort_7, r"^[a-z]+$"); - -// tin-summer-1.21.4: r"(\.git|\.pijul|_darcs|\.hg)$" -consistent!(tin_summer_0, r"(\.git|\.pijul|_darcs|\.hg)$"); - -// tin-drummer-1.0.1: r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$" -consistent!(tin_drummer_0, r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"); - -// tin-drummer-1.0.1: r".*?\.(stats|conf|h|out|cache.*|dat|pc|info|\.js)$" -consistent!( - tin_drummer_1, - r".*?\.(stats|conf|h|out|cache.*|dat|pc|info|\.js)$" -); - -// tin-drummer-1.0.1: r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$" -consistent!(tin_drummer_2, r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"); - -// tin-drummer-1.0.1: r".*?\.(stats|conf|h|out|cache.*|\.js)$" -consistent!(tin_drummer_3, r".*?\.(stats|conf|h|out|cache.*|\.js)$"); - -// tin-drummer-1.0.1: r"(\.git|\.pijul|_darcs|\.hg)$" -consistent!(tin_drummer_4, r"(\.git|\.pijul|_darcs|\.hg)$"); - -// tin-drummer-1.0.1: r".*?\.(dyn_o|out|d|hi|dyn_hi|dump-.*|p_hi|p_o|prof|tix)$" -consistent!( - tin_drummer_5, - r".*?\.(dyn_o|out|d|hi|dyn_hi|dump-.*|p_hi|p_o|prof|tix)$" -); - -// tin-drummer-1.0.1: r".*?\.(ibc)$" -consistent!(tin_drummer_6, r".*?\.(ibc)$"); - -// tin-drummer-1.0.1: r"\.stack-work|dist-newstyle" -consistent!(tin_drummer_7, r"\.stack-work|dist-newstyle"); - -// timmy-0.3.0: r"_NET_WM_PID\(CARDINAL\) = (\d+)" -consistent!(timmy_0, r"_NET_WM_PID\(CARDINAL\) = (\d+)"); - -// timmy-0.3.0: r"today|yesterday|now" -consistent!(timmy_1, r"today|yesterday|now"); - -// timmy-0.3.0: r"(?P<day>\d{1,2})/(?P<month>\d{1,2})(/(?P<year>\d{4}|\d{2}))?" -consistent!( - timmy_2, - r"(?P<day>\d{1,2})/(?P<month>\d{1,2})(/(?P<year>\d{4}|\d{2}))?" -); - -// timmy-0.3.0: r"(?P<n>\d+) (days?|ds?)(?P<ago>( ago)?)" -consistent!(timmy_3, r"(?P<n>\d+) (days?|ds?)(?P<ago>( ago)?)"); - -// timmy-0.3.0: r"(?P<hr>\d{2}):(?P<mins>\d{2})" -consistent!(timmy_4, r"(?P<hr>\d{2}):(?P<mins>\d{2})"); - -// tinfo-0.5.0: r"^(\d+): \d+ windows \(.*\) \[\d+x\d+\]( \(attached\))?" -consistent!( - tinfo_0, - r"^(\d+): \d+ windows \(.*\) \[\d+x\d+\]( \(attached\))?" -); - -// tinfo-0.5.0: r"^(\d+):(\d+): (.*) \((\d+) panes\) \[(\d+)x(\d+)\]" -consistent!(tinfo_1, r"^(\d+):(\d+): (.*) \((\d+) panes\) \[(\d+)x(\d+)\]"); - -// timespan-0.0.4: r"(?:\\\{start\\\}|\\\{end\\\})" -consistent!(timespan_0, r"(?:\\\{start\\\}|\\\{end\\\})"); - -// timespan-0.0.4: r"(.*)\s+-\s+(.*)" -consistent!(timespan_1, r"(.*)\s+-\s+(.*)"); - -// timespan-0.0.4: r"(.*)\s+(\w+)$" -consistent!(timespan_2, r"(.*)\s+(\w+)$"); - -// timespan-0.0.4: r"(.*)\s+(\w+)$" -consistent!(timespan_3, r"(.*)\s+(\w+)$"); - -// timespan-0.0.4: r"(.*)\s+-\s+(.*)" -consistent!(timespan_4, r"(.*)\s+-\s+(.*)"); - -// titlecase-0.10.0: r"[[:lower:]]" -consistent!(titlecase_0, r"[[:lower:]]"); - -// tight-0.1.3: r"^\d+ (day|week|month|year)s?$" -consistent!(tight_0, r"^\d+ (day|week|month|year)s?$"); - -// tight-0.1.3: r"^\d+ (day|week|month|year)s?$" -consistent!(tight_1, r"^\d+ (day|week|month|year)s?$"); - -// yaml-0.2.1: r"^[-+]?(0|[1-9][0-9_]*)$" -consistent!(yaml_0, r"^[-+]?(0|[1-9][0-9_]*)$"); - -// yaml-0.2.1: r"^([-+]?)0o?([0-7_]+)$" -consistent!(yaml_1, r"^([-+]?)0o?([0-7_]+)$"); - -// yaml-0.2.1: r"^([-+]?)0x([0-9a-fA-F_]+)$" -consistent!(yaml_2, r"^([-+]?)0x([0-9a-fA-F_]+)$"); - -// yaml-0.2.1: r"^([-+]?)0b([0-1_]+)$" -consistent!(yaml_3, r"^([-+]?)0b([0-1_]+)$"); - -// yaml-0.2.1: r"^([-+]?)(\.[0-9]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?)$" -consistent!( - yaml_4, - r"^([-+]?)(\.[0-9]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?)$" -); - -// yaml-0.2.1: r"^[+]?(\.inf|\.Inf|\.INF)$" -consistent!(yaml_5, r"^[+]?(\.inf|\.Inf|\.INF)$"); - -// yaml-0.2.1: r"^-(\.inf|\.Inf|\.INF)$" -consistent!(yaml_6, r"^-(\.inf|\.Inf|\.INF)$"); - -// yaml-0.2.1: r"^(\.nan|\.NaN|\.NAN)$" -consistent!(yaml_7, r"^(\.nan|\.NaN|\.NAN)$"); - -// yaml-0.2.1: r"^(null|Null|NULL|~)$" -consistent!(yaml_8, r"^(null|Null|NULL|~)$"); - -// yaml-0.2.1: r"^(true|True|TRUE|yes|Yes|YES)$" -consistent!(yaml_9, r"^(true|True|TRUE|yes|Yes|YES)$"); - -// yaml-0.2.1: r"^(false|False|FALSE|no|No|NO)$" -consistent!(yaml_10, r"^(false|False|FALSE|no|No|NO)$"); - -// kefia-0.1.0: r"(?m)^(\S+)/(\S+) (\S+)(?: \((.*)\))?$" -consistent!(kefia_0, r"(?m)^(\S+)/(\S+) (\S+)(?: \((.*)\))?$"); - -// risp-0.7.0: "^(\\s+|;.*?(\n|$))+" -consistent!(risp_0, "^(\\s+|;.*?(\n|$))+"); - -// risp-0.7.0: "^\".*?\"" -consistent!(risp_1, "^\".*?\""); - -// risp-0.7.0: r"^[^\s\{\}()\[\]]+" -consistent!(risp_2, r"^[^\s\{\}()\[\]]+"); - -// risp-0.7.0: r"^-?\d+" -consistent!(risp_3, r"^-?\d+"); - -// ripgrep-0.8.1: "^([0-9]+)([KMG])?$" -consistent!(ripgrep_0, "^([0-9]+)([KMG])?$"); - -// riquid-0.0.1: r"^\w+" -consistent!(riquid_0, r"^\w+"); - -// riquid-0.0.1: r"^\d+" -consistent!(riquid_1, r"^\d+"); - -// recursive_disassembler-2.1.2: r"\A(0x)?([a-fA-F0-9]+)\z" -consistent!(recursive_disassembler_0, r"\A(0x)?([a-fA-F0-9]+)\z"); - -// remake-0.1.0: r"^[a-zA-Z_][a-zA-Z0-9_]*" -consistent!(remake_0, r"^[a-zA-Z_][a-zA-Z0-9_]*"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_0, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_1, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_2, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_3, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_4, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_5, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{2})\)" -consistent!(regex_decode_6, r"'(?P<title>[^']+)'\s+\((?P<year>\d{2})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_7, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_8, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)" -consistent!(regex_decode_9, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)" -consistent!(regex_decode_10, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)" -consistent!(regex_decode_11, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)" -consistent!(regex_decode_12, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)" -consistent!(regex_decode_13, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"); - -// regex-cache-0.2.0: "[0-9]{3}-[0-9]{3}-[0-9]{4}" -consistent!(regex_cache_0, "[0-9]{3}-[0-9]{3}-[0-9]{4}"); - -// regex-cache-0.2.0: r"^\d+$" -consistent!(regex_cache_1, r"^\d+$"); - -// regex-cache-0.2.0: r"^[a-z]+$" -consistent!(regex_cache_2, r"^[a-z]+$"); - -// regex-cache-0.2.0: r"^\d+$" -consistent!(regex_cache_3, r"^\d+$"); - -// regex-cache-0.2.0: r"^\d+$" -consistent!(regex_cache_4, r"^\d+$"); - -// regex_dfa-0.5.0: r"\d{4}-\d{2}-\d{2}" -consistent!(regex_dfa_0, r"\d{4}-\d{2}-\d{2}"); - -// reaper-2.0.0: r"^[0-9\p{L} _\\.]{3,16}$" -consistent!(reaper_0, r"^[0-9\p{L} _\\.]{3,16}$"); - -// retdec-0.1.0: r"^attachment; filename=(.+)$" -consistent!(retdec_0, r"^attachment; filename=(.+)$"); - -// renvsubst-0.1.2: r"(\\)(?P<head>\$[0-9A-Za-z_{])" -consistent!(renvsubst_0, r"(\\)(?P<head>\$[0-9A-Za-z_{])"); - -// renvsubst-0.1.2: r"\$([[:word:]]+)" -consistent!(renvsubst_1, r"\$([[:word:]]+)"); - -// renvsubst-0.1.2: r"\$\{([[:word:]]+)\}" -consistent!(renvsubst_2, r"\$\{([[:word:]]+)\}"); - -// rexpect-0.3.0: r"'[a-z]+'" -consistent!(rexpect_0, r"'[a-z]+'"); - -// rexpect-0.3.0: r"^\d{4}-\d{2}-\d{2}$" -consistent!(rexpect_1, r"^\d{4}-\d{2}-\d{2}$"); - -// rexpect-0.3.0: r"-\d{2}-" -consistent!(rexpect_2, r"-\d{2}-"); - -// luther-0.1.0: "^a(b|c)c*$" -consistent!(luther_0, "^a(b|c)c*$"); - -// little_boxes-1.6.0: r"(\x9B|\x1B\[)[0-?]*[ -/]*[@-~]" -consistent!(little_boxes_0, r"(\x9B|\x1B\[)[0-?]*[ -/]*[@-~]"); - -// libimagentrytag-0.8.0: "^[a-zA-Z]([a-zA-Z0-9_-]*)$" -consistent!(libimagentrytag_0, "^[a-zA-Z]([a-zA-Z0-9_-]*)$"); - -// libimaginteraction-0.8.0: r"^[Yy](\n?)$" -consistent!(libimaginteraction_0, r"^[Yy](\n?)$"); - -// libimaginteraction-0.8.0: r"^[Nn](\n?)$" -consistent!(libimaginteraction_1, r"^[Nn](\n?)$"); - -// libimagutil-0.8.0: "^(?P<KEY>([^=]*))=(.*)$" -consistent!(libimagutil_0, "^(?P<KEY>([^=]*))=(.*)$"); - -// libimagutil-0.8.0: "(.*)=(\"(?P<QVALUE>([^\"]*))\"|(?P<VALUE>(.*)))$" -consistent!(libimagutil_1, "(.*)=(\"(?P<QVALUE>([^\"]*))\"|(?P<VALUE>(.*)))$"); - -// linux_ip-0.1.0: r"\s+" -consistent!(linux_ip_0, r"\s+"); - -// linux_ip-0.1.0: r"\s*[\n\r]+\s*" -consistent!(linux_ip_1, r"\s*[\n\r]+\s*"); - -// linux_ip-0.1.0: r"^([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$" -consistent!(linux_ip_2, r"^([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$"); - -// linux_ip-0.1.0: r"^([0-9a-fA-F\.:/]+|default)\s+via\s+([a-z0-9\.:]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$" -consistent!(linux_ip_3, r"^([0-9a-fA-F\.:/]+|default)\s+via\s+([a-z0-9\.:]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$"); - -// linux_ip-0.1.0: r"^(blackhole)\s+([0-9a-fA-F\.:/]+)$" -consistent!(linux_ip_4, r"^(blackhole)\s+([0-9a-fA-F\.:/]+)$"); - -// linux_ip-0.1.0: r"^(unreachable)\s+([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s+(.*)$" -consistent!( - linux_ip_5, - r"^(unreachable)\s+([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s+(.*)$" -); - -// linux_ip-0.1.0: r"\s*[\n\r]+\s*" -consistent!(linux_ip_6, r"\s*[\n\r]+\s*"); - -// linux_ip-0.1.0: r"^\d+:\s+([a-zA-Z0-9\.-]+)(@\S+)*:\s+(.*)$" -consistent!(linux_ip_7, r"^\d+:\s+([a-zA-Z0-9\.-]+)(@\S+)*:\s+(.*)$"); - -// linux_ip-0.1.0: r"\s*link/ether\s+([a-f0-9:]+)\s+.*" -consistent!(linux_ip_8, r"\s*link/ether\s+([a-f0-9:]+)\s+.*"); - -// linux_ip-0.1.0: r"\s*inet[6]*\s+([0-9a-f:\./]+)\s+.*" -consistent!(linux_ip_9, r"\s*inet[6]*\s+([0-9a-f:\./]+)\s+.*"); - -// linky-0.1.4: r"[^\w -]" -consistent!(linky_0, r"[^\w -]"); - -// linky-0.1.4: r"^(.*):(\d+): [^ ]* ([^ ]*)$" -consistent!(linky_1, r"^(.*):(\d+): [^ ]* ([^ ]*)$"); - -// limonite-0.2.1: r"^(\d{4}-\d{2}-\d{2})-(\d{3})-(.+)$" -consistent!(limonite_0, r"^(\d{4}-\d{2}-\d{2})-(\d{3})-(.+)$"); - -// process-queue-0.1.1: r"^[a-zA-Z]+$" -consistent!(process_queue_0, r"^[a-zA-Z]+$"); - -// pronghorn-0.1.2: r"^\{([a-zA-Z_]+)\}$" -consistent!(pronghorn_0, r"^\{([a-zA-Z_]+)\}$"); - -// protocol-ftp-client-0.1.1: "(?m:^(\\d{3}) (.+)\r$)" -consistent!(protocol_ftp_client_0, "(?m:^(\\d{3}) (.+)\r$)"); - -// protocol-ftp-client-0.1.1: "\"(.+)\"" -consistent!(protocol_ftp_client_1, "\"(.+)\""); - -// protocol-ftp-client-0.1.1: "(\\w+) [Tt]ype: (\\w+)" -consistent!(protocol_ftp_client_2, "(\\w+) [Tt]ype: (\\w+)"); - -// protocol-ftp-client-0.1.1: "(?m:^(\\d{3})-.+\r$)" -consistent!(protocol_ftp_client_3, "(?m:^(\\d{3})-.+\r$)"); - -// protocol-ftp-client-0.1.1: "Entering Passive Mode \\((\\d+),(\\d+),(\\d+),(\\d+),(\\d+),(\\d+)\\)" -consistent!( - protocol_ftp_client_4, - "Entering Passive Mode \\((\\d+),(\\d+),(\\d+),(\\d+),(\\d+),(\\d+)\\)" -); - -// protocol-ftp-client-0.1.1: "(?m:^(.+)\r$)" -consistent!(protocol_ftp_client_5, "(?m:^(.+)\r$)"); - -// protocol-ftp-client-0.1.1: "^([d-])(?:[rwx-]{3}){3} +\\d+ +\\w+ +\\w+ +(\\d+) +(.+) +(.+)$" -consistent!( - protocol_ftp_client_6, - "^([d-])(?:[rwx-]{3}){3} +\\d+ +\\w+ +\\w+ +(\\d+) +(.+) +(.+)$" -); - -// article-date-extractor-0.1.1: r"([\./\-_]{0,1}(19|20)\d{2})[\./\-_]{0,1}(([0-3]{0,1}[0-9][\./\-_])|(\w{3,5}[\./\-_]))([0-3]{0,1}[0-9][\./\-]{0,1})" -consistent!(article_date_extractor_0, r"([\./\-_]{0,1}(19|20)\d{2})[\./\-_]{0,1}(([0-3]{0,1}[0-9][\./\-_])|(\w{3,5}[\./\-_]))([0-3]{0,1}[0-9][\./\-]{0,1})"); - -// article-date-extractor-0.1.1: r"(?i)publishdate|pubdate|timestamp|article_date|articledate|date" -consistent!( - article_date_extractor_1, - r"(?i)publishdate|pubdate|timestamp|article_date|articledate|date" -); - -// arthas_plugin-0.1.1: r"type\((.*)\)" -consistent!(arthas_plugin_0, r"type\((.*)\)"); - -// arthas_plugin-0.1.1: r"Vec<(.*)>" -consistent!(arthas_plugin_1, r"Vec<(.*)>"); - -// arthas_plugin-0.1.1: r"Option<(.*)>" -consistent!(arthas_plugin_2, r"Option<(.*)>"); - -// arthas_plugin-0.1.1: r"HashMap<[a-z0-9A-Z]+, *(.*)>" -consistent!(arthas_plugin_3, r"HashMap<[a-z0-9A-Z]+, *(.*)>"); - -// arthas_derive-0.1.0: "Vec *< *(.*) *>" -consistent!(arthas_derive_0, "Vec *< *(.*) *>"); - -// arthas_derive-0.1.0: r"Option *< *(.*) *>" -consistent!(arthas_derive_1, r"Option *< *(.*) *>"); - -// arthas_derive-0.1.0: r"HashMap *< *[a-z0-9A-Z]+ *, *(.*) *>" -consistent!(arthas_derive_2, r"HashMap *< *[a-z0-9A-Z]+ *, *(.*) *>"); - -// arpabet-0.2.0: r"^([\w\-\(\)\.']+)\s+([^\s].*)\s*$" -consistent!(arpabet_0, r"^([\w\-\(\)\.']+)\s+([^\s].*)\s*$"); - -// arpabet-0.2.0: r"^;;;\s+" -consistent!(arpabet_1, r"^;;;\s+"); - -// glossy_codegen-0.2.0: r"/\*.*?\*/|//.*" -consistent!(glossy_codegen_0, r"/\*.*?\*/|//.*"); - -// glossy_codegen-0.2.0: "^\\s*#\\s*include\\s+<([:print:]+)>\\s*$" -consistent!(glossy_codegen_1, "^\\s*#\\s*include\\s+<([:print:]+)>\\s*$"); - -// glossy_codegen-0.2.0: "^\\s*#\\s*include\\s+\"([:print:]+)\"\\s*$" -consistent!(glossy_codegen_2, "^\\s*#\\s*include\\s+\"([:print:]+)\"\\s*$"); - -// glossy_codegen-0.2.0: r"^\s*#\s*version\s+(\d+)" -consistent!(glossy_codegen_3, r"^\s*#\s*version\s+(\d+)"); - -// glossy_codegen-0.2.0: r"^\s*$" -consistent!(glossy_codegen_4, r"^\s*$"); - -// gluster-1.0.1: r"(?P<addr>via \S+)" -consistent!(gluster_0, r"(?P<addr>via \S+)"); - -// gluster-1.0.1: r"(?P<src>src \S+)" -consistent!(gluster_1, r"(?P<src>src \S+)"); - -// gl_helpers-0.1.7: r"(.*)\[\d+\]" -consistent!(gl_helpers_0, r"(.*)\[\d+\]"); - -// gl_helpers-0.1.7: r"(\d+).(\d+)" -consistent!(gl_helpers_1, r"(\d+).(\d+)"); - -// glr-parser-0.0.1: r"(?P<c>[\\\.\+\*\?\(\)\|\[\]\{\}\^\$])" -consistent!(glr_parser_0, r"(?P<c>[\\\.\+\*\?\(\)\|\[\]\{\}\^\$])"); - -// glr-parser-0.0.1: r"^\w+$" -consistent!(glr_parser_1, r"^\w+$"); - -// glr-parser-0.0.1: "'[^']+'" -consistent!(glr_parser_2, "'[^']+'"); - -// hoodlum-0.5.0: r"(?m)//.*" -consistent!(hoodlum_0, r"(?m)//.*"); - -// form-checker-0.2.2: r"^1\d{10}$" -consistent!(form_checker_0, r"^1\d{10}$"); - -// form-checker-0.2.2: r"(?i)^[\w.%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4}$" -consistent!(form_checker_1, r"(?i)^[\w.%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4}$"); - -// wikibase-0.2.0: r"(?P<user_agent>[a-zA-Z0-9-_]+/[0-9\.]+)" -consistent!(wikibase_0, r"(?P<user_agent>[a-zA-Z0-9-_]+/[0-9\.]+)"); - -// wifiscanner-0.3.6: r"Cell [0-9]{2,} - Address:" -consistent!(wifiscanner_0, r"Cell [0-9]{2,} - Address:"); - -// wifiscanner-0.3.6: r"([0-9a-zA-Z]{1}[0-9a-zA-Z]{1}[:]{1}){5}[0-9a-zA-Z]{1}[0-9a-zA-Z]{1}" -consistent!( - wifiscanner_1, - r"([0-9a-zA-Z]{1}[0-9a-zA-Z]{1}[:]{1}){5}[0-9a-zA-Z]{1}[0-9a-zA-Z]{1}" -); - -// wifiscanner-0.3.6: r"Signal level=(\d+)/100" -consistent!(wifiscanner_2, r"Signal level=(\d+)/100"); - -// bbcode-1.0.2: r"(?s)\[b\](.*?)\[/b\]" -consistent!(bbcode_0, r"(?s)\[b\](.*?)\[/b\]"); - -// bbcode-1.0.2: r"(?s)\[i\](.*?)\[/i\]" -consistent!(bbcode_1, r"(?s)\[i\](.*?)\[/i\]"); - -// bbcode-1.0.2: r"(?s)\[u\](.*?)\[/u\]" -consistent!(bbcode_2, r"(?s)\[u\](.*?)\[/u\]"); - -// bbcode-1.0.2: r"(?s)\[s\](.*?)\[/s\]" -consistent!(bbcode_3, r"(?s)\[s\](.*?)\[/s\]"); - -// bbcode-1.0.2: r"(?s)\[size=(\d+)](.*?)\[/size\]" -consistent!(bbcode_4, r"(?s)\[size=(\d+)](.*?)\[/size\]"); - -// bbcode-1.0.2: r"(?s)\[color=(.+)](.*?)\[/color\]" -consistent!(bbcode_5, r"(?s)\[color=(.+)](.*?)\[/color\]"); - -// bbcode-1.0.2: r"(?s)\[center\](.*?)\[/center\]" -consistent!(bbcode_6, r"(?s)\[center\](.*?)\[/center\]"); - -// bbcode-1.0.2: r"(?s)\[left\](.*?)\[/left\]" -consistent!(bbcode_7, r"(?s)\[left\](.*?)\[/left\]"); - -// bbcode-1.0.2: r"(?s)\[right\](.*?)\[/right\]" -consistent!(bbcode_8, r"(?s)\[right\](.*?)\[/right\]"); - -// bbcode-1.0.2: r"(?s)\[table\](.*?)\[/table\]" -consistent!(bbcode_9, r"(?s)\[table\](.*?)\[/table\]"); - -// bbcode-1.0.2: r"(?s)\[td\](.*?)\[/td\]" -consistent!(bbcode_10, r"(?s)\[td\](.*?)\[/td\]"); - -// bbcode-1.0.2: r"(?s)\[tr\](.*?)\[/tr\]" -consistent!(bbcode_11, r"(?s)\[tr\](.*?)\[/tr\]"); - -// bbcode-1.0.2: r"(?s)\[th\](.*?)\[/th\]" -consistent!(bbcode_12, r"(?s)\[th\](.*?)\[/th\]"); - -// bbcode-1.0.2: r"(?s)\[url\](.*?)\[/url\]" -consistent!(bbcode_13, r"(?s)\[url\](.*?)\[/url\]"); - -// bbcode-1.0.2: r"(?s)\[url=(.+)\](.*?)\[/url\]" -consistent!(bbcode_14, r"(?s)\[url=(.+)\](.*?)\[/url\]"); - -// bbcode-1.0.2: r"(?s)\[quote\](.*?)\[/quote\]" -consistent!(bbcode_15, r"(?s)\[quote\](.*?)\[/quote\]"); - -// bbcode-1.0.2: r"(?s)\[quote=(.+)\](.*?)\[/quote\]" -consistent!(bbcode_16, r"(?s)\[quote=(.+)\](.*?)\[/quote\]"); - -// bbcode-1.0.2: r"(?s)\[img=(\d+)x(\d+)(\b.*)?\](.*?)\[/img\]" -consistent!(bbcode_17, r"(?s)\[img=(\d+)x(\d+)(\b.*)?\](.*?)\[/img\]"); - -// bbcode-1.0.2: r"(?s)\[img=(.+)(\b.*)?\](.*?)\[/img\]" -consistent!(bbcode_18, r"(?s)\[img=(.+)(\b.*)?\](.*?)\[/img\]"); - -// bbcode-1.0.2: r"(?s)\[img(\b.*)?\](.*?)\[/img\]" -consistent!(bbcode_19, r"(?s)\[img(\b.*)?\](.*?)\[/img\]"); - -// bbcode-1.0.2: r"(?s)\[ol\](.*?)\[/ol\]" -consistent!(bbcode_20, r"(?s)\[ol\](.*?)\[/ol\]"); - -// bbcode-1.0.2: r"(?s)\[ul\](.*?)\[/ul\]" -consistent!(bbcode_21, r"(?s)\[ul\](.*?)\[/ul\]"); - -// bbcode-1.0.2: r"(?s)\[list\](.*?)\[/list\]" -consistent!(bbcode_22, r"(?s)\[list\](.*?)\[/list\]"); - -// bbcode-1.0.2: r"(?s)\[youtube\](.*?)\[/youtube\]" -consistent!(bbcode_23, r"(?s)\[youtube\](.*?)\[/youtube\]"); - -// bbcode-1.0.2: r"(?s)\[youtube=(\d+)x(\d+)\](.*?)\[/youtube\]" -consistent!(bbcode_24, r"(?s)\[youtube=(\d+)x(\d+)\](.*?)\[/youtube\]"); - -// bbcode-1.0.2: r"(?s)\[li\](.*?)\[/li\]" -consistent!(bbcode_25, r"(?s)\[li\](.*?)\[/li\]"); - -// block-utils-0.5.0: r"loop\d+" -consistent!(block_utils_0, r"loop\d+"); - -// block-utils-0.5.0: r"ram\d+" -consistent!(block_utils_1, r"ram\d+"); - -// block-utils-0.5.0: r"md\d+" -consistent!(block_utils_2, r"md\d+"); - -// kvvliveapi-0.1.0: r"^([1-9]) min$" -consistent!(kvvliveapi_0, r"^([1-9]) min$"); - -// rfc822_sanitizer-0.3.3: r"(\d{2}):(\d{2}):(\d{2})" -consistent!(rfc822_sanitizer_0, r"(\d{2}):(\d{2}):(\d{2})"); - -// rfc822_sanitizer-0.3.3: r"(\d{1,2}):(\d{1,2}):(\d{1,2})" -consistent!(rfc822_sanitizer_1, r"(\d{1,2}):(\d{1,2}):(\d{1,2})"); - -// faker-0.0.4: r"[2-9]" -consistent!(faker_0, r"[2-9]"); - -// faker-0.0.4: r"[1-9]" -consistent!(faker_1, r"[1-9]"); - -// faker-0.0.4: r"[0-9]" -consistent!(faker_2, r"[0-9]"); - -// faker-0.0.4: r"\d{10}" -consistent!(faker_3, r"\d{10}"); - -// faker-0.0.4: r"\d{1}" -consistent!(faker_4, r"\d{1}"); - -// faker-0.0.4: r"^\w+" -consistent!(faker_5, r"^\w+"); - -// faker-0.0.4: r"^\w+" -consistent!(faker_6, r"^\w+"); - -// faker-0.0.4: r"^(\w+\.? ?){2,3}$" -consistent!(faker_7, r"^(\w+\.? ?){2,3}$"); - -// faker-0.0.4: r"^[A-Z][a-z]+\.?$" -consistent!(faker_8, r"^[A-Z][a-z]+\.?$"); - -// faker-0.0.4: r"^[A-Z][A-Za-z]*\.?$" -consistent!(faker_9, r"^[A-Z][A-Za-z]*\.?$"); - -// faker-0.0.4: r"http://lorempixel.com/100/100/\w+" -consistent!(faker_10, r"http://lorempixel.com/100/100/\w+"); - -// faker-0.0.4: r"http://lorempixel.com/100/100/cats" -consistent!(faker_11, r"http://lorempixel.com/100/100/cats"); - -// fancy-regex-0.1.0: "(?i:ß)" -consistent!(fancy_regex_0, "(?i:ß)"); - -// fancy-regex-0.1.0: "(?i:\\x{0587})" -consistent!(fancy_regex_1, "(?i:\\x{0587})"); - -// fancy-regex-0.1.0: "^\\\\([!-/:-@\\[-`\\{-~aftnrv]|[0-7]{1,3}|x[0-9a-fA-F]{2}|x\\{[0-9a-fA-F]{1,6}\\})" -consistent!(fancy_regex_2, "^\\\\([!-/:-@\\[-`\\{-~aftnrv]|[0-7]{1,3}|x[0-9a-fA-F]{2}|x\\{[0-9a-fA-F]{1,6}\\})"); - -// fancy-prompt-0.1.5: r"/([^/])[^/]+/" -consistent!(fancy_prompt_0, r"/([^/])[^/]+/"); - -// fancy-prompt-0.1.5: r"^([^:]+):.*?(?::([^:]+))?$" -consistent!(fancy_prompt_1, r"^([^:]+):.*?(?::([^:]+))?$"); - -// fanta-0.2.0: r"^(/?__\w+__)/(.*)" -consistent!(fanta_0, r"^(/?__\w+__)/(.*)"); - -// fanta-cli-0.1.1: r"(.)([A-Z])" -consistent!(fanta_cli_0, r"(.)([A-Z])"); - -// fanta-cli-0.1.1: "\\{:[^\\s]+\\}" -consistent!(fanta_cli_1, "\\{:[^\\s]+\\}"); - -// amethyst_tools-0.7.1: "(?P<last>[^\r])\n" -consistent!(amethyst_tools_0, "(?P<last>[^\r])\n"); - -// amigo-0.3.1: r"^-?\d+(\.\d)?" -consistent!(amigo_0, r"^-?\d+(\.\d)?"); - -// amigo-0.3.1: r"^[a-zA-Z_]+[\w-]*[!?_]?" -consistent!(amigo_1, r"^[a-zA-Z_]+[\w-]*[!?_]?"); - -// amigo-0.3.1: r"^\(" -consistent!(amigo_2, r"^\("); - -// amigo-0.3.1: r"^\)" -consistent!(amigo_3, r"^\)"); - -// amigo-0.3.1: r"^\s+" -consistent!(amigo_4, r"^\s+"); - -// ethcore-logger-1.12.0: "\x1b\\[[^m]+m" -consistent!(ethcore_logger_0, "\x1b\\[[^m]+m"); - -// dash2html-1.0.1: r"__.*?__" -consistent!(dash2html_0, r"__.*?__"); - -// dash2html-1.0.1: r"(?i)@(?:time|clipboard|cursor|date)" -consistent!(dash2html_1, r"(?i)@(?:time|clipboard|cursor|date)"); - -// os_type-2.0.0: r"^Microsoft Windows \[Version\s(\d+\.\d+\.\d+)\]$" -consistent!(os_type_0, r"^Microsoft Windows \[Version\s(\d+\.\d+\.\d+)\]$"); - -// os_type-2.0.0: r"ProductName:\s([\w\s]+)\n" -consistent!(os_type_1, r"ProductName:\s([\w\s]+)\n"); - -// os_type-2.0.0: r"ProductVersion:\s(\w+\.\w+\.\w+)" -consistent!(os_type_2, r"ProductVersion:\s(\w+\.\w+\.\w+)"); - -// os_type-2.0.0: r"BuildVersion:\s(\w+)" -consistent!(os_type_3, r"BuildVersion:\s(\w+)"); - -// os_type-2.0.0: r"(\w+) Linux release" -consistent!(os_type_4, r"(\w+) Linux release"); - -// os_type-2.0.0: r"release\s([\w\.]+)" -consistent!(os_type_5, r"release\s([\w\.]+)"); - -// os_type-2.0.0: r"Distributor ID:\s(\w+)" -consistent!(os_type_6, r"Distributor ID:\s(\w+)"); - -// os_type-2.0.0: r"Release:\s([\w\.]+)" -consistent!(os_type_7, r"Release:\s([\w\.]+)"); - -// bindgen-0.37.0: r"typename type\-parameter\-\d+\-\d+::.+" -consistent!(bindgen_0, r"typename type\-parameter\-\d+\-\d+::.+"); - -// imap-0.8.1: "^+(.*)\r\n" -consistent!(imap_0, "^+(.*)\r\n"); - -// image-base64-0.1.0: r"^ffd8ffe0" -consistent!(image_base64_0, r"^ffd8ffe0"); - -// image-base64-0.1.0: r"^89504e47" -consistent!(image_base64_1, r"^89504e47"); - -// image-base64-0.1.0: r"^47494638" -consistent!(image_base64_2, r"^47494638"); - -// json-pointer-0.3.2: "^(/([^/~]|~[01])*)*$" -consistent!(json_pointer_0, "^(/([^/~]|~[01])*)*$"); - -// json-pointer-0.3.2: "^#(/([^/~%]|~[01]|%[0-9a-fA-F]{2})*)*$" -consistent!(json_pointer_1, "^#(/([^/~%]|~[01]|%[0-9a-fA-F]{2})*)*$"); - -// mysql_common-0.7.0: r"^5.5.5-(\d{1,2})\.(\d{1,2})\.(\d{1,3})-MariaDB" -consistent!(mysql_common_0, r"^5.5.5-(\d{1,2})\.(\d{1,2})\.(\d{1,3})-MariaDB"); - -// mysql_common-0.7.0: r"^(\d{1,2})\.(\d{1,2})\.(\d{1,3})(.*)" -consistent!(mysql_common_1, r"^(\d{1,2})\.(\d{1,2})\.(\d{1,3})(.*)"); - -// government_id-0.1.0: r"^[0-9]{4}[0-9A-Z]{2}[0-9]{3}$" -consistent!(government_id_0, r"^[0-9]{4}[0-9A-Z]{2}[0-9]{3}$"); - -// ohmers-0.1.1: r"UniqueIndexViolation: (\w+)" -consistent!(ohmers_0, r"UniqueIndexViolation: (\w+)"); - -// eliza-1.0.0: r"(.*) you are (.*)" -consistent!(eliza_0, r"(.*) you are (.*)"); - -// eliza-1.0.0: r"(.*) you are (.*)" -consistent!(eliza_1, r"(.*) you are (.*)"); - -// eliza-1.0.0: r"(.*) you are (.*)" -consistent!(eliza_2, r"(.*) you are (.*)"); - -// chema-0.0.5: "^\\s*\\*" -consistent!(chema_0, "^\\s*\\*"); - -// chema-0.0.5: "^\\s*@(\\w+)\\s+(.*)" -consistent!(chema_1, "^\\s*@(\\w+)\\s+(.*)"); - -// chord3-0.3.0: r"^\s*#" -consistent!(chord3_0, r"^\s*#"); - -// chord3-0.3.0: r"\{(?P<cmd>\w+)(?::?\s*(?P<arg>.*))?\}" -consistent!(chord3_1, r"\{(?P<cmd>\w+)(?::?\s*(?P<arg>.*))?\}"); - -// chord3-0.3.0: r"\{(eot|end_of_tab):?\s*" -consistent!(chord3_2, r"\{(eot|end_of_tab):?\s*"); - -// chord3-0.3.0: r"([^\[]*)(?:\[([^\]]*)\])?" -consistent!(chord3_3, r"([^\[]*)(?:\[([^\]]*)\])?"); - -// checkmail-0.1.1: "^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$" -consistent!(checkmail_0, "^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"); - -// cntk-0.2.1: r"\b\w\w+\b" -consistent!(cntk_0, r"\b\w\w+\b"); - -// cntk-0.2.1: r"\b\w\w+\b" -consistent!(cntk_1, r"\b\w\w+\b"); - -// cniguru-0.1.0: r"\(id: (\d+)\)" -consistent!(cniguru_0, r"\(id: (\d+)\)"); - -// upm_lib-0.3.0: r"^(\d+)\.(\d+)\.(\d+)(?:-([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?(?:\+([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?$" -consistent!(upm_lib_0, r"^(\d+)\.(\d+)\.(\d+)(?:-([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?(?:\+([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?$"); - -// avro-0.2.1: r"^\s*(\*+(\s+))?" -consistent!(avro_0, r"^\s*(\*+(\s+))?"); - -// avro-0.2.1: r"^\s*(\*+)?" -consistent!(avro_1, r"^\s*(\*+)?"); - -// nomi-0.0.2: "[0-9]+" -consistent!(nomi_0, "[0-9]+"); - -// nodes-0.1.0: "([0-9]+)@(?:nodes|n)?:([^@]+)?" -consistent!(nodes_0, "([0-9]+)@(?:nodes|n)?:([^@]+)?"); - -// not-stakkr-1.0.0: r"(?i)in (\d+) (second|minute|hour|day|week)s?" -consistent!(not_stakkr_0, r"(?i)in (\d+) (second|minute|hour|day|week)s?"); - -// notetxt-0.0.1: "^([A-Za-z0-9 -_:]+)\n-+\n" -consistent!(notetxt_0, "^([A-Za-z0-9 -_:]+)\n-+\n"); - -// nail-0.1.0-pre.0: r"^-?[0-9]+(\.[0-9]+)?([eE]-?[0-9]+)?$" -consistent!(nail_0, r"^-?[0-9]+(\.[0-9]+)?([eE]-?[0-9]+)?$"); - -// nail-0.1.0-pre.0: r"^-?[0-9]+$" -consistent!(nail_1, r"^-?[0-9]+$"); - -// askalono-0.2.0: r"[^\w\s\pP]+" -consistent!(askalono_0, r"[^\w\s\pP]+"); - -// askalono-0.2.0: r"(?x)[ \t\p{Zs} \\ / \| \x2044 ]+" -consistent!(askalono_1, r"(?x)[ \t\p{Zs} \\ / \| \x2044 ]+"); - -// askalono-0.2.0: r"\p{Pd}+" -consistent!(askalono_2, r"\p{Pd}+"); - -// askalono-0.2.0: r"\p{Ps}+" -consistent!(askalono_3, r"\p{Ps}+"); - -// askalono-0.2.0: r"\p{Pe}+" -consistent!(askalono_4, r"\p{Pe}+"); - -// askalono-0.2.0: r"\p{Pc}+" -consistent!(askalono_5, r"\p{Pc}+"); - -// askalono-0.2.0: r"[©Ⓒⓒ]" -consistent!(askalono_6, r"[©Ⓒⓒ]"); - -// askalono-0.2.0: r"[\r\n\v\f]" -consistent!(askalono_7, r"[\r\n\v\f]"); - -// askalono-0.2.0: r"\n{3,}" -consistent!(askalono_8, r"\n{3,}"); - -// askalono-0.2.0: r"[^\w\s]+" -consistent!(askalono_9, r"[^\w\s]+"); - -// askalono-0.2.0: r"\s+" -consistent!(askalono_10, r"\s+"); - -// assembunny_plus-0.0.3: r"[^0-9a-zA-Z_]" -consistent!(assembunny_plus_0, r"[^0-9a-zA-Z_]"); - -// assembunny_plus-0.0.3: r"[0-9]" -consistent!(assembunny_plus_1, r"[0-9]"); - -// salt-compressor-0.4.0: r"(?m)^Minion (\S*) did not respond\. No job will be sent\.$" -consistent!( - salt_compressor_0, - r"(?m)^Minion (\S*) did not respond\. No job will be sent\.$" -); - -// sabisabi-0.4.1: r"</?[^>]+?>" -consistent!(sabisabi_0, r"</?[^>]+?>"); - -// sabisabi-0.4.1: r"\([^)]*\)" -consistent!(sabisabi_1, r"\([^)]*\)"); - -// sassers-0.13.5-h28: "@import \"([^\"]*)\";" -consistent!(sassers_0, "@import \"([^\"]*)\";"); - -// shadowsocks-0.6.2: r"[A-Za-z\d-]{1,63}$" -consistent!(shadowsocks_0, r"[A-Za-z\d-]{1,63}$"); - -// shkeleton-0.1.5: "[abc]+" -consistent!(shkeleton_0, "[abc]+"); - -// shellwords-0.1.0: r"([^A-Za-z0-9_\-.,:/@\n])" -consistent!(shellwords_0, r"([^A-Za-z0-9_\-.,:/@\n])"); - -// shellwords-0.1.0: r"\n" -consistent!(shellwords_1, r"\n"); - -// shush-0.1.5: "(?P<num>[0-9]+)(?P<units>[dhms])" -consistent!(shush_0, "(?P<num>[0-9]+)(?P<units>[dhms])"); - -// woothee-0.8.0: r"(?:Chrome|CrMo|CriOS)/([.0-9]+)" -consistent!(woothee_0, r"(?:Chrome|CrMo|CriOS)/([.0-9]+)"); - -// woothee-0.8.0: r"Vivaldi/([.0-9]+)" -consistent!(woothee_1, r"Vivaldi/([.0-9]+)"); - -// woothee-0.8.0: r"Firefox/([.0-9]+)" -consistent!(woothee_2, r"Firefox/([.0-9]+)"); - -// woothee-0.8.0: r"^Mozilla/[.0-9]+ \((?:Mobile|Tablet);(?:.*;)? rv:([.0-9]+)\) Gecko/[.0-9]+ Firefox/[.0-9]+$" -consistent!(woothee_3, r"^Mozilla/[.0-9]+ \((?:Mobile|Tablet);(?:.*;)? rv:([.0-9]+)\) Gecko/[.0-9]+ Firefox/[.0-9]+$"); - -// woothee-0.8.0: r"FxiOS/([.0-9]+)" -consistent!(woothee_4, r"FxiOS/([.0-9]+)"); - -// woothee-0.8.0: r"\(([^;)]+);FOMA;" -consistent!(woothee_5, r"\(([^;)]+);FOMA;"); - -// woothee-0.8.0: r"jig browser[^;]+; ([^);]+)" -consistent!(woothee_6, r"jig browser[^;]+; ([^);]+)"); - -// woothee-0.8.0: r"(?i)rss(?:reader|bar|[-_ /;()]|[ +]*/)" -consistent!(woothee_7, r"(?i)rss(?:reader|bar|[-_ /;()]|[ +]*/)"); - -// woothee-0.8.0: r"(?i)(?:bot|crawler|spider)(?:[-_ ./;@()]|$)" -consistent!(woothee_8, r"(?i)(?:bot|crawler|spider)(?:[-_ ./;@()]|$)"); - -// woothee-0.8.0: r"(?i)(?:feed|web) ?parser" -consistent!(woothee_9, r"(?i)(?:feed|web) ?parser"); - -// woothee-0.8.0: r"(?i)watch ?dog" -consistent!(woothee_10, r"(?i)watch ?dog"); - -// woothee-0.8.0: r"Edge/([.0-9]+)" -consistent!(woothee_11, r"Edge/([.0-9]+)"); - -// woothee-0.8.0: r"MSIE ([.0-9]+);" -consistent!(woothee_12, r"MSIE ([.0-9]+);"); - -// woothee-0.8.0: r"Version/([.0-9]+)" -consistent!(woothee_13, r"Version/([.0-9]+)"); - -// woothee-0.8.0: r"Opera[/ ]([.0-9]+)" -consistent!(woothee_14, r"Opera[/ ]([.0-9]+)"); - -// woothee-0.8.0: r"OPR/([.0-9]+)" -consistent!(woothee_15, r"OPR/([.0-9]+)"); - -// woothee-0.8.0: r"Version/([.0-9]+)" -consistent!(woothee_16, r"Version/([.0-9]+)"); - -// woothee-0.8.0: r"(?:SoftBank|Vodafone|J-PHONE)/[.0-9]+/([^ /;()]+)" -consistent!(woothee_17, r"(?:SoftBank|Vodafone|J-PHONE)/[.0-9]+/([^ /;()]+)"); - -// woothee-0.8.0: r"Trident/([.0-9]+);" -consistent!(woothee_18, r"Trident/([.0-9]+);"); - -// woothee-0.8.0: r" rv:([.0-9]+)" -consistent!(woothee_19, r" rv:([.0-9]+)"); - -// woothee-0.8.0: r"IEMobile/([.0-9]+);" -consistent!(woothee_20, r"IEMobile/([.0-9]+);"); - -// woothee-0.8.0: r"(?:WILLCOM|DDIPOCKET);[^/]+/([^ /;()]+)" -consistent!(woothee_21, r"(?:WILLCOM|DDIPOCKET);[^/]+/([^ /;()]+)"); - -// woothee-0.8.0: r"Windows ([ .a-zA-Z0-9]+)[;\\)]" -consistent!(woothee_22, r"Windows ([ .a-zA-Z0-9]+)[;\\)]"); - -// woothee-0.8.0: r"^Phone(?: OS)? ([.0-9]+)" -consistent!(woothee_23, r"^Phone(?: OS)? ([.0-9]+)"); - -// woothee-0.8.0: r"iP(hone;|ad;|od) .*like Mac OS X" -consistent!(woothee_24, r"iP(hone;|ad;|od) .*like Mac OS X"); - -// woothee-0.8.0: r"Version/([.0-9]+)" -consistent!(woothee_25, r"Version/([.0-9]+)"); - -// woothee-0.8.0: r"rv:(\d+\.\d+\.\d+)" -consistent!(woothee_26, r"rv:(\d+\.\d+\.\d+)"); - -// woothee-0.8.0: r"FreeBSD ([^;\)]+);" -consistent!(woothee_27, r"FreeBSD ([^;\)]+);"); - -// woothee-0.8.0: r"CrOS ([^\)]+)\)" -consistent!(woothee_28, r"CrOS ([^\)]+)\)"); - -// woothee-0.8.0: r"Android[- ](\d+\.\d+(?:\.\d+)?)" -consistent!(woothee_29, r"Android[- ](\d+\.\d+(?:\.\d+)?)"); - -// woothee-0.8.0: r"PSP \(PlayStation Portable\); ([.0-9]+)\)" -consistent!(woothee_30, r"PSP \(PlayStation Portable\); ([.0-9]+)\)"); - -// woothee-0.8.0: r"PLAYSTATION 3;? ([.0-9]+)\)" -consistent!(woothee_31, r"PLAYSTATION 3;? ([.0-9]+)\)"); - -// woothee-0.8.0: r"PlayStation Vita ([.0-9]+)\)" -consistent!(woothee_32, r"PlayStation Vita ([.0-9]+)\)"); - -// woothee-0.8.0: r"PlayStation 4 ([.0-9]+)\)" -consistent!(woothee_33, r"PlayStation 4 ([.0-9]+)\)"); - -// woothee-0.8.0: r"BB10(?:.+)Version/([.0-9]+) " -consistent!(woothee_34, r"BB10(?:.+)Version/([.0-9]+) "); - -// woothee-0.8.0: r"BlackBerry(?:\d+)/([.0-9]+) " -consistent!(woothee_35, r"BlackBerry(?:\d+)/([.0-9]+) "); - -// woothee-0.8.0: r"; CPU(?: iPhone)? OS (\d+_\d+(?:_\d+)?) like Mac OS X" -consistent!( - woothee_36, - r"; CPU(?: iPhone)? OS (\d+_\d+(?:_\d+)?) like Mac OS X" -); - -// woothee-0.8.0: r"Mac OS X (10[._]\d+(?:[._]\d+)?)(?:\)|;)" -consistent!(woothee_37, r"Mac OS X (10[._]\d+(?:[._]\d+)?)(?:\)|;)"); - -// woothee-0.8.0: r"^(?:Apache-HttpClient/|Jakarta Commons-HttpClient/|Java/)" -consistent!( - woothee_38, - r"^(?:Apache-HttpClient/|Jakarta Commons-HttpClient/|Java/)" -); - -// woothee-0.8.0: r"[- ]HttpClient(/|$)" -consistent!(woothee_39, r"[- ]HttpClient(/|$)"); - -// woothee-0.8.0: r"^(?:PHP|WordPress|CakePHP|PukiWiki|PECL::HTTP)(?:/| |$)" -consistent!( - woothee_40, - r"^(?:PHP|WordPress|CakePHP|PukiWiki|PECL::HTTP)(?:/| |$)" -); - -// woothee-0.8.0: r"(?:PEAR HTTP_Request|HTTP_Request)(?: class|2)" -consistent!(woothee_41, r"(?:PEAR HTTP_Request|HTTP_Request)(?: class|2)"); - -// woothee-0.8.0: r"(?:Rome Client |UnwindFetchor/|ia_archiver |Summify |PostRank/)" -consistent!( - woothee_42, - r"(?:Rome Client |UnwindFetchor/|ia_archiver |Summify |PostRank/)" -); - -// woothee-0.8.0: r"Sleipnir/([.0-9]+)" -consistent!(woothee_43, r"Sleipnir/([.0-9]+)"); - -// word_replace-0.0.3: r"@@[a-z|A-Z|\d]+@@" -consistent!(word_replace_0, r"@@[a-z|A-Z|\d]+@@"); - -// wordcount-0.1.0: r"\w+" -consistent!(wordcount_0, r"\w+"); - -// just-0.3.12: "^([^=]+)=(.*)$" -consistent!(just_0, "^([^=]+)=(.*)$"); - -// emote-0.1.0: r":[a-zA-Z_]+?:" -consistent!(emote_0, r":[a-zA-Z_]+?:"); - -// emojicons-1.0.1: r":([a-zA-Z0-9_+-]+):" -consistent!(emojicons_0, r":([a-zA-Z0-9_+-]+):"); - -// git2_codecommit-0.1.2: r"git-codecommit\.([a-z0-9-]+)\.amazonaws\.com" -consistent!( - git2_codecommit_0, - r"git-codecommit\.([a-z0-9-]+)\.amazonaws\.com" -); - -// git-workarea-3.1.2: r"^submodule\.(?P<name>.*)\.(?P<key>[^=]*)=(?P<value>.*)$" -consistent!( - git_workarea_0, - r"^submodule\.(?P<name>.*)\.(?P<key>[^=]*)=(?P<value>.*)$" -); - -// git-shell-enforce-directory-1.0.0: r"^(?P<command>git-(?:receive|upload)-pack) '(?P<path>.+)'$" -consistent!( - git_shell_enforce_directory_0, - r"^(?P<command>git-(?:receive|upload)-pack) '(?P<path>.+)'$" -); - -// git-journal-1.6.3: r"[ \n]:(.*?):" -consistent!(git_journal_0, r"[ \n]:(.*?):"); - -// git-find-0.3.2: r"^git@(?P<host>[[:alnum:]\._-]+):(?P<path>[[:alnum:]\._\-/]+).git$" -consistent!( - git_find_0, - r"^git@(?P<host>[[:alnum:]\._-]+):(?P<path>[[:alnum:]\._\-/]+).git$" -); - -// gitlab-api-0.6.0: r"private_token=\w{20}" -consistent!(gitlab_api_0, r"private_token=\w{20}"); - -// td-client-0.7.0: "^(http://|https://)" -consistent!(td_client_0, "^(http://|https://)"); - -// karaconv-0.3.0: r"--(?P<type>[a-zA-Z]+)-- (?P<contents>.*)" -consistent!(karaconv_0, r"--(?P<type>[a-zA-Z]+)-- (?P<contents>.*)"); - -// katana-1.0.2: r"(?P<comp>et al\.)(?:\.)" -consistent!(katana_0, r"(?P<comp>et al\.)(?:\.)"); - -// katana-1.0.2: r"\.{3}" -consistent!(katana_1, r"\.{3}"); - -// katana-1.0.2: r"(?P<number>[0-9]+)\.(?P<decimal>[0-9]+)" -consistent!(katana_2, r"(?P<number>[0-9]+)\.(?P<decimal>[0-9]+)"); - -// katana-1.0.2: r"\s\.(?P<nums>[0-9]+)" -consistent!(katana_3, r"\s\.(?P<nums>[0-9]+)"); - -// katana-1.0.2: r"(?:[A-Za-z]\.){2,}" -consistent!(katana_4, r"(?:[A-Za-z]\.){2,}"); - -// katana-1.0.2: r"(?P<init>[A-Z])(?P<point>\.)" -consistent!(katana_5, r"(?P<init>[A-Z])(?P<point>\.)"); - -// katana-1.0.2: r"(?P<title>[A-Z][a-z]{1,3})(\.)" -consistent!(katana_6, r"(?P<title>[A-Z][a-z]{1,3})(\.)"); - -// katana-1.0.2: r"&==&(?P<p>[.!?])" -consistent!(katana_7, r"&==&(?P<p>[.!?])"); - -// katana-1.0.2: r"&\^&(?P<p>[.!?])" -consistent!(katana_8, r"&\^&(?P<p>[.!?])"); - -// katana-1.0.2: r"&\*\*&(?P<p>[.!?])" -consistent!(katana_9, r"&\*\*&(?P<p>[.!?])"); - -// katana-1.0.2: r"&=&(?P<p>[.!?])" -consistent!(katana_10, r"&=&(?P<p>[.!?])"); - -// katana-1.0.2: r"&##&(?P<p>[.!?])" -consistent!(katana_11, r"&##&(?P<p>[.!?])"); - -// katana-1.0.2: r"&\$&(?P<p>[.!?])" -consistent!(katana_12, r"&\$&(?P<p>[.!?])"); - -// kailua_syntax-1.1.0: r"@(?:_|\d+(?:/\d+(?:-\d+)?)?)" -consistent!(kailua_syntax_0, r"@(?:_|\d+(?:/\d+(?:-\d+)?)?)"); - -// kailua_syntax-1.1.0: r"<(\d+)>" -consistent!(kailua_syntax_1, r"<(\d+)>"); - -// ftp-3.0.1: r"\((\d+),(\d+),(\d+),(\d+),(\d+),(\d+)\)" -consistent!(ftp_0, r"\((\d+),(\d+),(\d+),(\d+),(\d+),(\d+)\)"); - -// ftp-3.0.1: r"\b(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\b" -consistent!(ftp_1, r"\b(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\b"); - -// ftp-3.0.1: r"\s+(\d+)\s*$" -consistent!(ftp_2, r"\s+(\d+)\s*$"); - -// vat-0.1.0: r"<countryCode>(.*?)</countryCode>" -consistent!(vat_0, r"<countryCode>(.*?)</countryCode>"); - -// vat-0.1.0: r"<vatNumber>(.*?)</vatNumber>" -consistent!(vat_1, r"<vatNumber>(.*?)</vatNumber>"); - -// vat-0.1.0: r"<name>(.*?)</name>" -consistent!(vat_2, r"<name>(.*?)</name>"); - -// vat-0.1.0: r"<address>(?s)(.*?)(?-s)</address>" -consistent!(vat_3, r"<address>(?s)(.*?)(?-s)</address>"); - -// vat-0.1.0: r"<valid>(true|false)</valid>" -consistent!(vat_4, r"<valid>(true|false)</valid>"); - -// vat-0.1.0: r"^ATU\d{8}$" -consistent!(vat_5, r"^ATU\d{8}$"); - -// vat-0.1.0: r"^BE0?\d{9, 10}$" -consistent!(vat_6, r"^BE0?\d{9, 10}$"); - -// vat-0.1.0: r"^BG\d{9,10}$" -consistent!(vat_7, r"^BG\d{9,10}$"); - -// vat-0.1.0: r"^HR\d{11}$" -consistent!(vat_8, r"^HR\d{11}$"); - -// vat-0.1.0: r"^CY\d{8}[A-Z]$" -consistent!(vat_9, r"^CY\d{8}[A-Z]$"); - -// vat-0.1.0: r"^CZ\d{8,10}$" -consistent!(vat_10, r"^CZ\d{8,10}$"); - -// vat-0.1.0: r"^DK\d{8}$" -consistent!(vat_11, r"^DK\d{8}$"); - -// vat-0.1.0: r"^EE\d{9}$" -consistent!(vat_12, r"^EE\d{9}$"); - -// vat-0.1.0: r"^FI\d{8}$" -consistent!(vat_13, r"^FI\d{8}$"); - -// vat-0.1.0: r"^FR[A-HJ-NP-Z0-9][A-HJ-NP-Z0-9]\d{9}$" -consistent!(vat_14, r"^FR[A-HJ-NP-Z0-9][A-HJ-NP-Z0-9]\d{9}$"); - -// vat-0.1.0: r"^DE\d{9}$" -consistent!(vat_15, r"^DE\d{9}$"); - -// vat-0.1.0: r"^EL\d{9}$" -consistent!(vat_16, r"^EL\d{9}$"); - -// vat-0.1.0: r"^HU\d{8}$" -consistent!(vat_17, r"^HU\d{8}$"); - -// vat-0.1.0: r"^IE\d[A-Z0-9\+\*]\d{5}[A-Z]{1,2}$" -consistent!(vat_18, r"^IE\d[A-Z0-9\+\*]\d{5}[A-Z]{1,2}$"); - -// vat-0.1.0: r"^IT\d{11}$" -consistent!(vat_19, r"^IT\d{11}$"); - -// vat-0.1.0: r"^LV\d{11}$" -consistent!(vat_20, r"^LV\d{11}$"); - -// vat-0.1.0: r"^LT(\d{9}|\d{12})$" -consistent!(vat_21, r"^LT(\d{9}|\d{12})$"); - -// vat-0.1.0: r"^LU\d{8}$" -consistent!(vat_22, r"^LU\d{8}$"); - -// vat-0.1.0: r"^MT\d{8}$" -consistent!(vat_23, r"^MT\d{8}$"); - -// vat-0.1.0: r"^NL\d{9}B\d{2}$" -consistent!(vat_24, r"^NL\d{9}B\d{2}$"); - -// vat-0.1.0: r"^PL\d{10}$" -consistent!(vat_25, r"^PL\d{10}$"); - -// vat-0.1.0: r"^PT\d{9}$" -consistent!(vat_26, r"^PT\d{9}$"); - -// vat-0.1.0: r"^RO\d{2,10}$" -consistent!(vat_27, r"^RO\d{2,10}$"); - -// vat-0.1.0: r"^SK\d{10}$" -consistent!(vat_28, r"^SK\d{10}$"); - -// vat-0.1.0: r"^SI\d{8}$" -consistent!(vat_29, r"^SI\d{8}$"); - -// vat-0.1.0: r"^ES[A-Z0-9]\d{7}[A-Z0-9]$" -consistent!(vat_30, r"^ES[A-Z0-9]\d{7}[A-Z0-9]$"); - -// vat-0.1.0: r"^SE\d{10}01$" -consistent!(vat_31, r"^SE\d{10}01$"); - -// vat-0.1.0: r"^(GB(GD|HA)\d{3}|GB\d{9}|GB\d{12})$" -consistent!(vat_32, r"^(GB(GD|HA)\d{3}|GB\d{9}|GB\d{12})$"); - -// eve-0.1.1: r"\{\{(.*)\}\}" -consistent!(eve_0, r"\{\{(.*)\}\}"); - -// egc-0.1.2: "^mio" -consistent!(egc_0, "^mio"); - -// pew-0.2.3: "" -consistent!(pew_0, ""); - -// pew-0.2.3: "" -consistent!(pew_1, ""); - -// mob-0.4.3: "y" -consistent!(mob_0, "y"); - -// lit-0.2.8: "@([a-z]+)" -consistent!(lit_0, "@([a-z]+)"); - -// lit-0.2.8: "([A-Z-]+):(.*)" -consistent!(lit_1, "([A-Z-]+):(.*)"); - -// lit-0.2.8: "^[a-zA-Z_][a-zA-Z0-9_]*$" -consistent!(lit_2, "^[a-zA-Z_][a-zA-Z0-9_]*$"); - -// avm-1.0.1: r"\d+\.\d+\.\d+" -consistent!(avm_0, r"\d+\.\d+\.\d+"); - -// avm-1.0.1: r"\d+\.\d+\.\d+" -consistent!(avm_1, r"\d+\.\d+\.\d+"); - -// orm-0.2.0: r"^Vec<(.+)>$" -consistent!(orm_0, r"^Vec<(.+)>$"); - -// sgf-0.1.5: r"\\(\r\n|\n\r|\n|\r)" -consistent!(sgf_0, r"\\(\r\n|\n\r|\n|\r)"); - -// sgf-0.1.5: r"\\(.)" -consistent!(sgf_1, r"\\(.)"); - -// sgf-0.1.5: r"\r\n|\n\r|\n|\r" -consistent!(sgf_2, r"\r\n|\n\r|\n|\r"); - -// sgf-0.1.5: r"([\]\\:])" -consistent!(sgf_3, r"([\]\\:])"); - -// dok-0.2.0: "^Bearer realm=\"(.+?)\",service=\"(.+?)\",scope=\"(.+?)\"$" -consistent!( - dok_0, - "^Bearer realm=\"(.+?)\",service=\"(.+?)\",scope=\"(.+?)\"$" -); - -// d20-0.1.0: r"([+-]?\s*\d+[dD]\d+|[+-]?\s*\d+)" -consistent!(d20_0, r"([+-]?\s*\d+[dD]\d+|[+-]?\s*\d+)"); - -// dvb-0.3.0: "E" -consistent!(dvb_0, "E"); - -// dvb-0.3.0: "^F" -consistent!(dvb_1, "^F"); - -// dvb-0.3.0: "^S" -consistent!(dvb_2, "^S"); - -// ger-0.2.0: r"Change-Id: (I[a-f0-9]{40})$" -consistent!(ger_0, r"Change-Id: (I[a-f0-9]{40})$"); - -// ger-0.2.0: r"(refs|ref|fix|fixes|close|closes)\s+([A-Z]{2,5}-[0-9]{1,5})$" -consistent!( - ger_1, - r"(refs|ref|fix|fixes|close|closes)\s+([A-Z]{2,5}-[0-9]{1,5})$" -); - -// n5-0.2.1: r"(\d+)(\.(\d+))?(\.(\d+))?(.*)" -consistent!(n5_0, r"(\d+)(\.(\d+))?(\.(\d+))?(.*)"); - -// po-0.1.4: r"[A-Za-z0-9]" -consistent!(po_0, r"[A-Za-z0-9]"); - -// carnix-0.8.5: "path is (‘|')?([^’'\n]*)(’|')?" -consistent!(carnix_0, "path is (‘|')?([^’'\n]*)(’|')?"); - -// carnix-0.8.5: r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?(.*)?" -consistent!(carnix_1, r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?(.*)?"); - -// carnix-0.8.5: r"(\d*)\.(\d*)\.(\d*)(-(\S*))?" -consistent!(carnix_2, r"(\d*)\.(\d*)\.(\d*)(-(\S*))?"); - -// carnix-0.8.5: r"(\S*)-(\d*)\.(\d*)\.(\d*)(-(\S*))?" -consistent!(carnix_3, r"(\S*)-(\d*)\.(\d*)\.(\d*)(-(\S*))?"); - -// caseless-0.2.1: r"^# CaseFolding-(\d+)\.(\d+)\.(\d+).txt$" -consistent!(caseless_0, r"^# CaseFolding-(\d+)\.(\d+)\.(\d+).txt$"); - -// caseless-0.2.1: r"^([0-9A-F]+); [CF]; ([0-9A-F ]+);" -consistent!(caseless_1, r"^([0-9A-F]+); [CF]; ([0-9A-F ]+);"); - -// cabot-0.2.0: "\r?\n\r?\n" -consistent!(cabot_0, "\r?\n\r?\n"); - -// cabot-0.2.0: "\r?\n" -consistent!(cabot_1, "\r?\n"); - -// card-validate-2.2.1: r"^600" -consistent!(card_validate_0, r"^600"); - -// card-validate-2.2.1: r"^5019" -consistent!(card_validate_1, r"^5019"); - -// card-validate-2.2.1: r"^4" -consistent!(card_validate_2, r"^4"); - -// card-validate-2.2.1: r"^(5[1-5]|2[2-7])" -consistent!(card_validate_3, r"^(5[1-5]|2[2-7])"); - -// card-validate-2.2.1: r"^3[47]" -consistent!(card_validate_4, r"^3[47]"); - -// card-validate-2.2.1: r"^3[0689]" -consistent!(card_validate_5, r"^3[0689]"); - -// card-validate-2.2.1: r"^6([045]|22)" -consistent!(card_validate_6, r"^6([045]|22)"); - -// card-validate-2.2.1: r"^(62|88)" -consistent!(card_validate_7, r"^(62|88)"); - -// card-validate-2.2.1: r"^35" -consistent!(card_validate_8, r"^35"); - -// card-validate-2.2.1: r"^[0-9]+$" -consistent!(card_validate_9, r"^[0-9]+$"); - -// cargo-testify-0.3.0: r"\d{1,} passed.*filtered out" -consistent!(cargo_testify_0, r"\d{1,} passed.*filtered out"); - -// cargo-testify-0.3.0: r"error(:|\[).*" -consistent!(cargo_testify_1, r"error(:|\[).*"); - -// cargo-wix-0.0.5: r"<(.*?)>" -consistent!(cargo_wix_0, r"<(.*?)>"); - -// cargo-wix-0.0.5: r"<(.*?)>" -consistent!(cargo_wix_1, r"<(.*?)>"); - -// cargo-wix-0.0.5: r"<(.*?)>" -consistent!(cargo_wix_2, r"<(.*?)>"); - -// cargo-wix-0.0.5: r"<(.*?)>" -consistent!(cargo_wix_3, r"<(.*?)>"); - -// cargo-incremental-0.1.23: r"(?m)^incremental: re-using (\d+) out of (\d+) modules$" -consistent!( - cargo_incremental_0, - r"(?m)^incremental: re-using (\d+) out of (\d+) modules$" -); - -// cargo-incremental-0.1.23: "(?m)(warning|error): (.*)\n --> ([^:]:\\d+:\\d+)$" -consistent!( - cargo_incremental_1, - "(?m)(warning|error): (.*)\n --> ([^:]:\\d+:\\d+)$" -); - -// cargo-incremental-0.1.23: r"(?m)^test (.*) \.\.\. (\w+)" -consistent!(cargo_incremental_2, r"(?m)^test (.*) \.\.\. (\w+)"); - -// cargo-incremental-0.1.23: r"(?m)(\d+) passed; (\d+) failed; (\d+) ignored; \d+ measured" -consistent!( - cargo_incremental_3, - r"(?m)(\d+) passed; (\d+) failed; (\d+) ignored; \d+ measured" -); - -// cargo-testjs-0.1.2: r"^[^-]+-[0-9a-f]+\.js$" -consistent!(cargo_testjs_0, r"^[^-]+-[0-9a-f]+\.js$"); - -// cargo-tarpaulin-0.6.2: r"\s*//" -consistent!(cargo_tarpaulin_0, r"\s*//"); - -// cargo-tarpaulin-0.6.2: r"/\*" -consistent!(cargo_tarpaulin_1, r"/\*"); - -// cargo-tarpaulin-0.6.2: r"\*/" -consistent!(cargo_tarpaulin_2, r"\*/"); - -// cargo-culture-kit-0.1.0: r"^fo" -consistent!(cargo_culture_kit_0, r"^fo"); - -// cargo-screeps-0.1.3: "\\s+" -consistent!(cargo_screeps_0, "\\s+"); - -// cargo-brew-0.1.4: r"`(\S+) v([0-9.]+)" -consistent!(cargo_brew_0, r"`(\S+) v([0-9.]+)"); - -// cargo-release-0.10.2: "^\\[.+\\]" -consistent!(cargo_release_0, "^\\[.+\\]"); - -// cargo-release-0.10.2: "^\\[\\[.+\\]\\]" -consistent!(cargo_release_1, "^\\[\\[.+\\]\\]"); - -// cargo-edit-0.3.0-beta.1: r"^https://github.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$" -consistent!( - cargo_edit_0, - r"^https://github.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$" -); - -// cargo-edit-0.3.0-beta.1: r"^https://gitlab.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$" -consistent!( - cargo_edit_1, - r"^https://gitlab.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$" -); - -// cargo-disassemble-0.1.1: ".*" -consistent!(cargo_disassemble_0, ".*"); - -// cargo-demangle-0.1.2: r"(?m)(?P<symbol>_ZN[0-9]+.*E)" -consistent!(cargo_demangle_0, r"(?m)(?P<symbol>_ZN[0-9]+.*E)"); - -// cargo-coverage-annotations-0.1.5: r"^\s*\}(?:\)*;?|\s*else\s*\{)$" -consistent!(cargo_coverage_annotations_0, r"^\s*\}(?:\)*;?|\s*else\s*\{)$"); - -// cargo-urlcrate-1.0.1: "[\u{001b}\u{009b}][\\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><]" -consistent!(cargo_urlcrate_0, "[\u{001b}\u{009b}][\\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><]"); - -// cargo-script-0.2.8: r"^\s*\*( |$)" -consistent!(cargo_script_0, r"^\s*\*( |$)"); - -// cargo-script-0.2.8: r"^(\s+)" -consistent!(cargo_script_1, r"^(\s+)"); - -// cargo-script-0.2.8: r"/\*|\*/" -consistent!(cargo_script_2, r"/\*|\*/"); - -// cargo-script-0.2.8: r"^\s*//!" -consistent!(cargo_script_3, r"^\s*//!"); - -// cargo-script-0.2.8: r"^#![^\[].*?(\r\n|\n)" -consistent!(cargo_script_4, r"^#![^\[].*?(\r\n|\n)"); - -// cargo-update-1.5.2: r"cargo-install-update\.exe-v.+" -consistent!(cargo_update_0, r"cargo-install-update\.exe-v.+"); - -// canteen-0.4.1: r"^<(?:(int|uint|str|float|path):)?([\w_][a-zA-Z0-9_]*)>$" -consistent!( - canteen_0, - r"^<(?:(int|uint|str|float|path):)?([\w_][a-zA-Z0-9_]*)>$" -); - -// thruster-cli-0.1.3: r"(.)([A-Z])" -consistent!(thruster_cli_0, r"(.)([A-Z])"); - -// thieves-cant-0.1.0: "([Z]+)$" -consistent!(thieves_cant_0, "([Z]+)$"); - -// codeowners-0.1.3: r"^@\S+/\S+" -consistent!(codeowners_0, r"^@\S+/\S+"); - -// codeowners-0.1.3: r"^@\S+" -consistent!(codeowners_1, r"^@\S+"); - -// codeowners-0.1.3: r"^\S+@\S+" -consistent!(codeowners_2, r"^\S+@\S+"); - -// conserve-0.4.2: r"^b0000 {21} complete 20[-0-9T:+]+\s +\d+s\n$" -consistent!(conserve_0, r"^b0000 {21} complete 20[-0-9T:+]+\s +\d+s\n$"); - -// commodore-0.3.0: r"(?P<greeting>\S+?) (?P<name>\S+?)$" -consistent!(commodore_0, r"(?P<greeting>\S+?) (?P<name>\S+?)$"); - -// corollary-0.3.0: r"([ \t]*)```haskell([\s\S]*?)```" -consistent!(corollary_0, r"([ \t]*)```haskell([\s\S]*?)```"); - -// corollary-0.3.0: r"\b((?:a|b|t)\d*)\b" -consistent!(corollary_1, r"\b((?:a|b|t)\d*)\b"); - -// colorizex-0.1.3: "NB" -consistent!(colorizex_0, "NB"); - -// colorstring-0.0.1: r"(?i)\[[a-z0-9_-]+\]" -consistent!(colorstring_0, r"(?i)\[[a-z0-9_-]+\]"); - -// colorstring-0.0.1: r"^(?i)(\[[a-z0-9_-]+\])+" -consistent!(colorstring_1, r"^(?i)(\[[a-z0-9_-]+\])+"); - -// cosmogony-0.3.0: "name:(.+)" -consistent!(cosmogony_0, "name:(.+)"); - -// cobalt-bin-0.12.1: r"(?m:^ {0,3}\[[^\]]+\]:.+$)" -consistent!(cobalt_bin_0, r"(?m:^ {0,3}\[[^\]]+\]:.+$)"); - -// comrak-0.2.12: r"[^\p{L}\p{M}\p{N}\p{Pc} -]" -consistent!(comrak_0, r"[^\p{L}\p{M}\p{N}\p{Pc} -]"); - -// content-blocker-0.2.3: "" -consistent!(content_blocker_0, ""); - -// content-blocker-0.2.3: "(?i)hi" -consistent!(content_blocker_1, "(?i)hi"); - -// content-blocker-0.2.3: "http[s]?://domain.org" -consistent!(content_blocker_2, "http[s]?://domain.org"); - -// content-blocker-0.2.3: "(?i)http[s]?://domain.org" -consistent!(content_blocker_3, "(?i)http[s]?://domain.org"); - -// content-blocker-0.2.3: "http://domain.org" -consistent!(content_blocker_4, "http://domain.org"); - -// content-blocker-0.2.3: "http://domain.org" -consistent!(content_blocker_5, "http://domain.org"); - -// content-blocker-0.2.3: "ad.html" -consistent!(content_blocker_6, "ad.html"); - -// content-blocker-0.2.3: "ad.html" -consistent!(content_blocker_7, "ad.html"); - -// content-blocker-0.2.3: "http://domain.org" -consistent!(content_blocker_8, "http://domain.org"); - -// content-blocker-0.2.3: "http://domain.org/nocookies.sjs" -consistent!(content_blocker_9, "http://domain.org/nocookies.sjs"); - -// content-blocker-0.2.3: "http://domain.org/nocookies.sjs" -consistent!(content_blocker_10, "http://domain.org/nocookies.sjs"); - -// content-blocker-0.2.3: "http://domain.org/hideme.jpg" -consistent!(content_blocker_11, "http://domain.org/hideme.jpg"); - -// content-blocker-0.2.3: "http://domain.org/ok.html" -consistent!(content_blocker_12, "http://domain.org/ok.html"); - -// content-blocker-0.2.3: "http://domain.org/ok.html\\?except_this=1" -consistent!(content_blocker_13, "http://domain.org/ok.html\\?except_this=1"); - -// victoria-dom-0.1.2: "[A-Za-z0-9=]" -consistent!(victoria_dom_0, "[A-Za-z0-9=]"); - -// numbat-1.0.0: r"^nsq://" -consistent!(numbat_0, r"^nsq://"); - -// airkorea-0.1.2: r"[\s\t\r\n]" -consistent!(airkorea_0, r"[\s\t\r\n]"); - -// airkorea-0.1.2: r"([\{\[,])|([\}\]])" -consistent!(airkorea_1, r"([\{\[,])|([\}\]])"); - -// airkorea-0.1.2: r"[^.\d]+$" -consistent!(airkorea_2, r"[^.\d]+$"); - -// rofl-0.0.1: r"\b" -// consistent!(rofl_0, r"\b"); - -// rogcat-0.2.15: r"--------- beginning of.*" -consistent!(rogcat_0, r"--------- beginning of.*"); - -// rogcat-0.2.15: r"a|e|i|o|u" -consistent!(rogcat_1, r"a|e|i|o|u"); - -// rogcat-0.2.15: r"^(\d+)([kMG])$" -consistent!(rogcat_2, r"^(\d+)([kMG])$"); - -// media_filename-0.1.4: "\\.([A-Za-z0-9]{2,4})$" -consistent!(media_filename_0, "\\.([A-Za-z0-9]{2,4})$"); - -// media_filename-0.1.4: "([0-9]{3,4}p|[0-9]{3,4}x[0-9]{3,4})" -consistent!(media_filename_1, "([0-9]{3,4}p|[0-9]{3,4}x[0-9]{3,4})"); - -// media_filename-0.1.4: "(?:^\\[([^]]+)\\]|- ?([^-]+)$)" -consistent!(media_filename_2, "(?:^\\[([^]]+)\\]|- ?([^-]+)$)"); - -// media_filename-0.1.4: "(?:[eE]([0-9]{2,3})|[^0-9A-Za-z]([0-9]{2,3})(?:v[0-9])?[^0-9A-Za-z])" -consistent!( - media_filename_3, - "(?:[eE]([0-9]{2,3})|[^0-9A-Za-z]([0-9]{2,3})(?:v[0-9])?[^0-9A-Za-z])" -); - -// media_filename-0.1.4: "[sS]([0-9]{1,2})" -consistent!(media_filename_4, "[sS]([0-9]{1,2})"); - -// media_filename-0.1.4: "((?i)(?:PPV.)?[HP]DTV|(?:HD)?CAM|BRRIP|[^a-z]TS[^a-z]|(?:PPV )?WEB.?DL(?: DVDRip)?|HDRip|DVDRip|CamRip|W[EB]BRip|BluRay|BD|DVD|DvDScr|hdtv)" -consistent!(media_filename_5, "((?i)(?:PPV.)?[HP]DTV|(?:HD)?CAM|BRRIP|[^a-z]TS[^a-z]|(?:PPV )?WEB.?DL(?: DVDRip)?|HDRip|DVDRip|CamRip|W[EB]BRip|BluRay|BD|DVD|DvDScr|hdtv)"); - -// media_filename-0.1.4: "((19[0-9]|20[01])[0-9])" -consistent!(media_filename_6, "((19[0-9]|20[01])[0-9])"); - -// media_filename-0.1.4: "((?i)xvid|x264|h\\.?264)" -consistent!(media_filename_7, "((?i)xvid|x264|h\\.?264)"); - -// media_filename-0.1.4: "((?i)MP3|DD5\\.?1|Dual[- ]Audio|LiNE|DTS|AAC(?:\\.?2\\.0)?|AC3(?:\\.5\\.1)?)" -consistent!(media_filename_8, "((?i)MP3|DD5\\.?1|Dual[- ]Audio|LiNE|DTS|AAC(?:\\.?2\\.0)?|AC3(?:\\.5\\.1)?)"); - -// media_filename-0.1.4: "\\[([0-9A-F]{8})\\]" -consistent!(media_filename_9, "\\[([0-9A-F]{8})\\]"); - -// termimage-0.3.2: r"(\d+)[xX](\d+)" -consistent!(termimage_0, r"(\d+)[xX](\d+)"); - -// teensy-0.1.0: r".*(\d{4}-\d{2}-\d{2}).*" -consistent!(teensy_0, r".*(\d{4}-\d{2}-\d{2}).*"); - -// telescreen-0.1.3: r"<@(.+)>" -consistent!(telescreen_0, r"<@(.+)>"); - -// tempus_fugit-0.4.4: r"^(\d+)" -consistent!(tempus_fugit_0, r"^(\d+)"); - -// fselect-0.4.1: "(\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)" -consistent!(fselect_0, "(\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)"); - -// fselect-0.4.1: "(%|_|\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)" -consistent!(fselect_1, "(%|_|\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)"); - -// fs_eventbridge-0.1.0: r"^([A-Z]+)(?:\s(.+))?\s*" -consistent!(fs_eventbridge_0, r"^([A-Z]+)(?:\s(.+))?\s*"); - -// joseki-0.0.1: r"(\w{1,2})\[(.+?)\]" -consistent!(joseki_0, r"(\w{1,2})\[(.+?)\]"); - -// tweetr-0.2.1: r"(?i)in (\d+) (second|minute|hour|day|week)s?" -consistent!(tweetr_0, r"(?i)in (\d+) (second|minute|hour|day|week)s?"); - -// bullet_core-0.1.1: "^(?u:[0-9])+" -consistent!(bullet_core_0, "^(?u:[0-9])+"); - -// bullet_core-0.1.1: "^(?u:[0-9])+(?u:\\.)(?u:[0-9])+" -consistent!(bullet_core_1, "^(?u:[0-9])+(?u:\\.)(?u:[0-9])+"); - -// bullet_core-0.1.1: "^(?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ff-stﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+" -consistent!(bullet_core_2, "^(?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ff-stﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+"); - -// bullet_core-0.1.1: "^(?u:d/d)((?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ff-stﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+)" -consistent!(bullet_core_3, "^(?u:d/d)((?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ff-stﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+)"); - -// bullet_core-0.1.1: "^(?u:\\()" -consistent!(bullet_core_4, "^(?u:\\()"); - -// bullet_core-0.1.1: "^(?u:\\))" -consistent!(bullet_core_5, "^(?u:\\))"); - -// bullet_core-0.1.1: "^(?u:\\*)" -consistent!(bullet_core_6, "^(?u:\\*)"); - -// bullet_core-0.1.1: "^(?u:\\+)" -consistent!(bullet_core_7, "^(?u:\\+)"); - -// bullet_core-0.1.1: "^(?u:,)" -consistent!(bullet_core_8, "^(?u:,)"); - -// bullet_core-0.1.1: "^(?u:\\-)" -consistent!(bullet_core_9, "^(?u:\\-)"); - -// bullet_core-0.1.1: "^(?u:/)" -consistent!(bullet_core_10, "^(?u:/)"); - -// bullet_core-0.1.1: "^(?u:\\[)" -consistent!(bullet_core_11, "^(?u:\\[)"); - -// bullet_core-0.1.1: "^(?u:\\])" -consistent!(bullet_core_12, "^(?u:\\])"); - -// bullet_core-0.1.1: "^(?u:\\^)" -consistent!(bullet_core_13, "^(?u:\\^)"); - -// bullet_core-0.1.1: "^(?u:·)" -consistent!(bullet_core_14, "^(?u:·)"); - -// actix-web-0.6.13: "//+" -consistent!(actix_web_0, "//+"); - -// actix-web-0.6.13: "//+" -consistent!(actix_web_1, "//+"); - -// althea_kernel_interface-0.1.0: r"(\S*) .* (\S*) (REACHABLE|STALE|DELAY)" -consistent!( - althea_kernel_interface_0, - r"(\S*) .* (\S*) (REACHABLE|STALE|DELAY)" -); - -// althea_kernel_interface-0.1.0: r"-s (.*) --ip6-dst (.*)/.* bcnt = (.*)" -consistent!( - althea_kernel_interface_1, - r"-s (.*) --ip6-dst (.*)/.* bcnt = (.*)" -); - -// alcibiades-0.3.0: r"\buci(?:\s|$)" -consistent!(alcibiades_0, r"\buci(?:\s|$)"); - -// ruma-identifiers-0.11.0: r"\A[a-z0-9._=-]+\z" -consistent!(ruma_identifiers_0, r"\A[a-z0-9._=-]+\z"); - -// rusqbin-0.2.3: r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})$" -consistent!(rusqbin_0, r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})$"); - -// rusqbin-0.2.3: r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})/requests/?$" -consistent!(rusqbin_1, r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})/requests/?$"); - -// rust-install-0.0.4: r"^(nightly|beta|stable)(?:-(\d{4}-\d{2}-\d{2}))?$" -consistent!( - rust_install_0, - r"^(nightly|beta|stable)(?:-(\d{4}-\d{2}-\d{2}))?$" -); - -// rust_inbox-0.0.5: "^+(.*)\r\n" -consistent!(rust_inbox_0, "^+(.*)\r\n"); - -// rust_inbox-0.0.5: r"^\* CAPABILITY (.*)\r\n" -consistent!(rust_inbox_1, r"^\* CAPABILITY (.*)\r\n"); - -// rust_inbox-0.0.5: r"^([a-zA-Z0-9]+) (OK|NO|BAD)(.*)" -consistent!(rust_inbox_2, r"^([a-zA-Z0-9]+) (OK|NO|BAD)(.*)"); - -// rust_inbox-0.0.5: r"^\* (\d+) EXISTS\r\n" -consistent!(rust_inbox_3, r"^\* (\d+) EXISTS\r\n"); - -// rust_inbox-0.0.5: r"^\* (\d+) RECENT\r\n" -consistent!(rust_inbox_4, r"^\* (\d+) RECENT\r\n"); - -// rust_inbox-0.0.5: r"^\* FLAGS (.+)\r\n" -consistent!(rust_inbox_5, r"^\* FLAGS (.+)\r\n"); - -// rust_inbox-0.0.5: r"^\* OK \[UNSEEN (\d+)\](.*)\r\n" -consistent!(rust_inbox_6, r"^\* OK \[UNSEEN (\d+)\](.*)\r\n"); - -// rust_inbox-0.0.5: r"^\* OK \[UIDVALIDITY (\d+)\](.*)\r\n" -consistent!(rust_inbox_7, r"^\* OK \[UIDVALIDITY (\d+)\](.*)\r\n"); - -// rust_inbox-0.0.5: r"^\* OK \[UIDNEXT (\d+)\](.*)\r\n" -consistent!(rust_inbox_8, r"^\* OK \[UIDNEXT (\d+)\](.*)\r\n"); - -// rust_inbox-0.0.5: r"^\* OK \[PERMANENTFLAGS (.+)\](.*)\r\n" -consistent!(rust_inbox_9, r"^\* OK \[PERMANENTFLAGS (.+)\](.*)\r\n"); - -// rustml-0.0.7: r"^[a-z]+ (\d+)$" -consistent!(rustml_0, r"^[a-z]+ (\d+)$"); - -// rustml-0.0.7: r"^[a-z]+ (\d+)$" -consistent!(rustml_1, r"^[a-z]+ (\d+)$"); - -// rustml-0.0.7: r"^[a-z]+ (\d+)$" -consistent!(rustml_2, r"^[a-z]+ (\d+)$"); - -// rustfmt-0.10.0: r"([^\\](\\\\)*)\\[\n\r][[:space:]]*" -consistent!(rustfmt_0, r"([^\\](\\\\)*)\\[\n\r][[:space:]]*"); - -// rustfmt-core-0.4.0: r"(^\s*$)|(^\s*//\s*rustfmt-[^:]+:\s*\S+)" -consistent!(rustfmt_core_0, r"(^\s*$)|(^\s*//\s*rustfmt-[^:]+:\s*\S+)"); - -// rustfmt-core-0.4.0: r"^## `([^`]+)`" -consistent!(rustfmt_core_1, r"^## `([^`]+)`"); - -// rustfmt-core-0.4.0: r"([^\\](\\\\)*)\\[\n\r][[:space:]]*" -consistent!(rustfmt_core_2, r"([^\\](\\\\)*)\\[\n\r][[:space:]]*"); - -// rustfmt-core-0.4.0: r"\s;" -consistent!(rustfmt_core_3, r"\s;"); - -// rust-enum-derive-0.4.0: r"^(0x)?([:digit:]+)$" -consistent!(rust_enum_derive_0, r"^(0x)?([:digit:]+)$"); - -// rust-enum-derive-0.4.0: r"^([:digit:]+)[:space:]*<<[:space:]*([:digit:]+)$" -consistent!( - rust_enum_derive_1, - r"^([:digit:]+)[:space:]*<<[:space:]*([:digit:]+)$" -); - -// rust-enum-derive-0.4.0: r"^[:space:]*([[:alnum:]_]+)([:space:]*=[:space:]*([:graph:]+))?[:space:]*," -consistent!(rust_enum_derive_2, r"^[:space:]*([[:alnum:]_]+)([:space:]*=[:space:]*([:graph:]+))?[:space:]*,"); - -// rust-enum-derive-0.4.0: r"^#define[:space:]+([:graph:]+)[:space:]+([:graph:]+)" -consistent!( - rust_enum_derive_3, - r"^#define[:space:]+([:graph:]+)[:space:]+([:graph:]+)" -); - -// rustsourcebundler-0.2.0: r"^\s*pub mod (.+);$" -consistent!(rustsourcebundler_0, r"^\s*pub mod (.+);$"); - -// rustsourcebundler-0.2.0: r"^\s*pub mod (.+);$" -consistent!(rustsourcebundler_1, r"^\s*pub mod (.+);$"); - -// rustfmt-nightly-0.8.2: r"([^\\](\\\\)*)\\[\n\r][[:space:]]*" -consistent!(rustfmt_nightly_0, r"([^\\](\\\\)*)\\[\n\r][[:space:]]*"); - -// rustfmt-nightly-0.8.2: r"\s;" -consistent!(rustfmt_nightly_1, r"\s;"); - -// rustache-0.1.0: r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)" -consistent!(rustache_0, r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)"); - -// rustfilt-0.2.0: r"_ZN[\$\._[:alnum:]]*" -consistent!(rustfilt_0, r"_ZN[\$\._[:alnum:]]*"); - -// rustache-lists-0.1.2: r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)" -consistent!(rustache_lists_0, r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)"); - -// rural-0.7.3: "(.+)=(.+)" -consistent!(rural_0, "(.+)=(.+)"); - -// rural-0.7.3: "(.*):(.+)" -consistent!(rural_1, "(.*):(.+)"); - -// rural-0.7.3: "(.+):=(.+)" -consistent!(rural_2, "(.+):=(.+)"); - -// rural-0.7.3: "(.*)==(.+)" -consistent!(rural_3, "(.*)==(.+)"); - -// rusoto_credential-0.11.0: r"^\[([^\]]+)\]$" -consistent!(rusoto_credential_0, r"^\[([^\]]+)\]$"); - -// rumblebars-0.3.0: "([:blank:]*)$" -consistent!(rumblebars_0, "([:blank:]*)$"); - -// rumblebars-0.3.0: "(\r?\n)[:blank:]*(\\{\\{~?[#!/](?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z" -consistent!(rumblebars_1, "(\r?\n)[:blank:]*(\\{\\{~?[#!/](?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z"); - -// rumblebars-0.3.0: "(\r?\n[:blank:]*)(\\{\\{~?>(?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z" -consistent!( - rumblebars_2, - "(\r?\n[:blank:]*)(\\{\\{~?>(?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z" -); - -// rumblebars-0.3.0: "((?:[:blank:]|\r?\n)*)(\r?\n)[:blank:]*$" -consistent!(rumblebars_3, "((?:[:blank:]|\r?\n)*)(\r?\n)[:blank:]*$"); - -// rumblebars-0.3.0: "^([:blank:]*\r?\n)(.*)" -consistent!(rumblebars_4, "^([:blank:]*\r?\n)(.*)"); - -// diesel_cli-1.3.1: r"(?P<stamp>[\d-]*)_hello" -consistent!(diesel_cli_0, r"(?P<stamp>[\d-]*)_hello"); - -// dishub-0.1.1: r"(\d+)s" -consistent!(dishub_0, r"(\d+)s"); - -// spreadsheet_textconv-0.1.0: r"\n" -consistent!(spreadsheet_textconv_0, r"\n"); - -// spreadsheet_textconv-0.1.0: r"\r" -consistent!(spreadsheet_textconv_1, r"\r"); - -// spreadsheet_textconv-0.1.0: r"\t" -consistent!(spreadsheet_textconv_2, r"\t"); - -// split_aud-0.1.0: r"DELAY (-?\d+)ms" -consistent!(split_aud_0, r"DELAY (-?\d+)ms"); - -// split_aud-0.1.0: r"Trim\((\d+), ?(\d+)\)" -consistent!(split_aud_1, r"Trim\((\d+), ?(\d+)\)"); - -// spotrust-0.0.5: r"spotify:[a-z]+:[a-zA-Z0-9]+" -consistent!(spotrust_0, r"spotify:[a-z]+:[a-zA-Z0-9]+"); - -// spaceslugs-0.1.0: r"[^\x00-\x7F]" -consistent!(spaceslugs_0, r"[^\x00-\x7F]"); - -// spaceslugs-0.1.0: r"[']+" -consistent!(spaceslugs_1, r"[']+"); - -// spaceslugs-0.1.0: r"\W+" -consistent!(spaceslugs_2, r"\W+"); - -// spaceslugs-0.1.0: r"[ ]+" -consistent!(spaceslugs_3, r"[ ]+"); - -// space_email_api-0.1.1: "PHPSESSID=([0-9a-f]+)" -consistent!(space_email_api_0, "PHPSESSID=([0-9a-f]+)"); - -// lorikeet-0.7.0: "[^0-9.,]" -consistent!(lorikeet_0, "[^0-9.,]"); - -// claude-0.3.0: r"^(?:\b|(-)?)(\p{Currency_Symbol})?((?:(?:\d{1,3}[\.,])+\d{3})|\d+)(?:[\.,](\d{2}))?\b$" -consistent!(claude_0, r"^(?:\b|(-)?)(\p{Currency_Symbol})?((?:(?:\d{1,3}[\.,])+\d{3})|\d+)(?:[\.,](\d{2}))?\b$"); - -// clam-0.1.6: r"<%=\s*(.+?)\s*%>" -consistent!(clam_0, r"<%=\s*(.+?)\s*%>"); - -// classifier-0.0.3: r"(\s)" -consistent!(classifier_0, r"(\s)"); - -// click-0.3.2: r"(-----BEGIN .*-----\n)((?:(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)*\n)+)(-----END .*-----)" -consistent!(click_0, r"(-----BEGIN .*-----\n)((?:(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)*\n)+)(-----END .*-----)"); - -// click-0.3.2: r"-----BEGIN PRIVATE KEY-----" -consistent!(click_1, r"-----BEGIN PRIVATE KEY-----"); - -// ultrastar-txt-0.1.2: r"#([A-Z3a-z]*):(.*)" -consistent!(ultrastar_txt_0, r"#([A-Z3a-z]*):(.*)"); - -// ultrastar-txt-0.1.2: "^-\\s?(-?[0-9]+)\\s*$" -consistent!(ultrastar_txt_1, "^-\\s?(-?[0-9]+)\\s*$"); - -// ultrastar-txt-0.1.2: "^-\\s?(-?[0-9]+)\\s+(-?[0-9]+)" -consistent!(ultrastar_txt_2, "^-\\s?(-?[0-9]+)\\s+(-?[0-9]+)"); - -// ultrastar-txt-0.1.2: "^(.)\\s*(-?[0-9]+)\\s+(-?[0-9]+)\\s+(-?[0-9]+)\\s?(.*)" -consistent!( - ultrastar_txt_3, - "^(.)\\s*(-?[0-9]+)\\s+(-?[0-9]+)\\s+(-?[0-9]+)\\s?(.*)" -); - -// ultrastar-txt-0.1.2: "^P\\s?(-?[0-9]+)" -consistent!(ultrastar_txt_4, "^P\\s?(-?[0-9]+)"); - -// db-accelerate-2.0.0: r"^template\.add($|\..+$)" -consistent!(db_accelerate_0, r"^template\.add($|\..+$)"); - -// db-accelerate-2.0.0: r"^template\.sub($|\..+$)" -consistent!(db_accelerate_1, r"^template\.sub($|\..+$)"); - -// sterling-0.3.0: r"(\d+)([cegps])" -consistent!(sterling_0, r"(\d+)([cegps])"); - -// stache-0.2.0: r"[^\w]" -consistent!(stache_0, r"[^\w]"); - -// strukt-0.1.0: "\"([<>]?)([xcbB\\?hHiIlLqQfdspP]*)\"" -consistent!(strukt_0, "\"([<>]?)([xcbB\\?hHiIlLqQfdspP]*)\""); - -// steamid-ng-0.3.1: r"^STEAM_([0-4]):([0-1]):([0-9]{1,10})$" -consistent!(steamid_ng_0, r"^STEAM_([0-4]):([0-1]):([0-9]{1,10})$"); - -// steamid-ng-0.3.1: r"^\[([AGMPCgcLTIUai]):([0-4]):([0-9]{1,10})(:([0-9]+))?\]$" -consistent!( - steamid_ng_1, - r"^\[([AGMPCgcLTIUai]):([0-4]):([0-9]{1,10})(:([0-9]+))?\]$" -); - -// strscan-0.1.1: r"^\w+" -consistent!(strscan_0, r"^\w+"); - -// strscan-0.1.1: r"^\s+" -consistent!(strscan_1, r"^\s+"); - -// strscan-0.1.1: r"^\w+" -consistent!(strscan_2, r"^\w+"); - -// strscan-0.1.1: r"^\s+" -consistent!(strscan_3, r"^\s+"); - -// strscan-0.1.1: r"^(\w+)\s+" -consistent!(strscan_4, r"^(\w+)\s+"); - -// tk-carbon-0.2.0: r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$" -consistent!(tk_carbon_0, r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$"); - -// tk-carbon-0.2.0: r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$" -consistent!(tk_carbon_1, r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$"); - -// evalrs-0.0.10: r"extern\s+crate\s+([a-z0-9_]+)\s*;(\s*//(.+))?" -consistent!(evalrs_0, r"extern\s+crate\s+([a-z0-9_]+)\s*;(\s*//(.+))?"); - -// evalrs-0.0.10: r"(?m)^# " -consistent!(evalrs_1, r"(?m)^# "); - -// evalrs-0.0.10: r"(?m)^\s*fn +main *\( *\)" -consistent!(evalrs_2, r"(?m)^\s*fn +main *\( *\)"); - -// evalrs-0.0.10: r"(extern\s+crate\s+[a-z0-9_]+\s*;)" -consistent!(evalrs_3, r"(extern\s+crate\s+[a-z0-9_]+\s*;)"); - -// gate_build-0.5.0: "(.*)_t([0-9]+)" -consistent!(gate_build_0, "(.*)_t([0-9]+)"); - -// rake-0.1.1: r"[^\P{P}-]|\s+-\s+" -consistent!(rake_0, r"[^\P{P}-]|\s+-\s+"); - -// rafy-0.2.1: r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*" -consistent!(rafy_0, r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*"); - -// raven-0.2.1: r"^(?P<protocol>.*?)://(?P<public_key>.*?):(?P<secret_key>.*?)@(?P<host>.*?)/(?P<path>.*/)?(?P<project_id>.*)$" -consistent!(raven_0, r"^(?P<protocol>.*?)://(?P<public_key>.*?):(?P<secret_key>.*?)@(?P<host>.*?)/(?P<path>.*/)?(?P<project_id>.*)$"); - -// rargs-0.2.0: r"\{[[:space:]]*[^{}]*[[:space:]]*\}" -consistent!(rargs_0, r"\{[[:space:]]*[^{}]*[[:space:]]*\}"); - -// rargs-0.2.0: r"^\{[[:space:]]*(?P<name>[[:word:]]*)[[:space:]]*\}$" -consistent!(rargs_1, r"^\{[[:space:]]*(?P<name>[[:word:]]*)[[:space:]]*\}$"); - -// rargs-0.2.0: r"^\{[[:space:]]*(?P<num>-?\d+)[[:space:]]*\}$" -consistent!(rargs_2, r"^\{[[:space:]]*(?P<num>-?\d+)[[:space:]]*\}$"); - -// rargs-0.2.0: r"^\{(?P<left>-?\d*)?\.\.(?P<right>-?\d*)?(?::(?P<sep>.*))?\}$" -consistent!( - rargs_3, - r"^\{(?P<left>-?\d*)?\.\.(?P<right>-?\d*)?(?::(?P<sep>.*))?\}$" -); - -// rargs-0.2.0: r"(.*?)[[:space:]]+|(.*?)$" -consistent!(rargs_4, r"(.*?)[[:space:]]+|(.*?)$"); - -// indradb-lib-0.15.0: r"[a-zA-Z0-9]{8}" -consistent!(indradb_lib_0, r"[a-zA-Z0-9]{8}"); - -// fungi-lang-0.1.50: r"::" -consistent!(fungi_lang_0, r"::"); - -// nickel-0.10.1: "/hello/(?P<name>[a-zA-Z]+)" -consistent!(nickel_0, "/hello/(?P<name>[a-zA-Z]+)"); - -// nickel-0.10.1: "/hello/(?P<name>[a-zA-Z]+)" -consistent!(nickel_1, "/hello/(?P<name>[a-zA-Z]+)"); - -// pact_verifier-0.4.0: r"\{(\w+)\}" -consistent!(pact_verifier_0, r"\{(\w+)\}"); - -// pact_matching-0.4.1: "application/.*json" -consistent!(pact_matching_0, "application/.*json"); - -// pact_matching-0.4.1: "application/json.*" -consistent!(pact_matching_1, "application/json.*"); - -// pact_matching-0.4.1: "application/.*xml" -consistent!(pact_matching_2, "application/.*xml"); - -// pangu-0.2.0: "([\"'\\(\\[\\{{<\u{201c}])(\\s*)(.+?)(\\s*)([\"'\\)\\]\\}}>\u{201d}])" -consistent!( - pangu_0, - "([\"'\\(\\[\\{{<\u{201c}])(\\s*)(.+?)(\\s*)([\"'\\)\\]\\}}>\u{201d}])" -); - -// pangu-0.2.0: "([\\(\\[\\{{<\u{201c}]+)(\\s*)(.+?)(\\s*)([\\)\\]\\}}>\u{201d}]+)" -consistent!( - pangu_1, - "([\\(\\[\\{{<\u{201c}]+)(\\s*)(.+?)(\\s*)([\\)\\]\\}}>\u{201d}]+)" -); - -// parser-haskell-0.2.0: r"\{-[\s\S]*?-\}" -consistent!(parser_haskell_0, r"\{-[\s\S]*?-\}"); - -// parser-haskell-0.2.0: r"(?m);+\s*$" -consistent!(parser_haskell_1, r"(?m);+\s*$"); - -// parser-haskell-0.2.0: r"(?m)^#(if|ifn?def|endif|else|include|elif).*" -consistent!(parser_haskell_2, r"(?m)^#(if|ifn?def|endif|else|include|elif).*"); - -// parser-haskell-0.2.0: r"'([^'\\]|\\[A-Z]{1,3}|\\.)'" -consistent!(parser_haskell_3, r"'([^'\\]|\\[A-Z]{1,3}|\\.)'"); - -// parser-haskell-0.2.0: r"forall\s+(.*?)\." -consistent!(parser_haskell_4, r"forall\s+(.*?)\."); - -// html2md-0.2.1: "\\s{2,}" -consistent!(html2md_0, "\\s{2,}"); - -// html2md-0.2.1: "\\n{2,}" -consistent!(html2md_1, "\\n{2,}"); - -// html2md-0.2.1: "(?m)(\\S) $" -consistent!(html2md_2, "(?m)(\\S) $"); - -// html2md-0.2.1: "(?m)^[-*] " -consistent!(html2md_3, "(?m)^[-*] "); - -// ovpnfile-0.1.2: r"#.*$" -consistent!(ovpnfile_0, r"#.*$"); - -// ovpnfile-0.1.2: r"^<(\S+)>" -consistent!(ovpnfile_1, r"^<(\S+)>"); - -// ovpnfile-0.1.2: r"^</(\S+)>" -consistent!(ovpnfile_2, r"^</(\S+)>"); - -// screenruster-saver-fractal-0.1.1: r"#([:xdigit:]{2})([:xdigit:]{2})([:xdigit:]{2})" -consistent!( - screenruster_saver_fractal_0, - r"#([:xdigit:]{2})([:xdigit:]{2})([:xdigit:]{2})" -); - -// scarlet-0.2.2: r"rgb\((?: *(\d{1,3}),)(?: *(\d{1,3}),)(?: *(\d{1,3}))\)" -consistent!( - scarlet_0, - r"rgb\((?: *(\d{1,3}),)(?: *(\d{1,3}),)(?: *(\d{1,3}))\)" -); - -// cpp_to_rust_generator-0.2.0: r"^([\w:]+)<(.+)>$" -consistent!(cpp_to_rust_generator_0, r"^([\w:]+)<(.+)>$"); - -// cpp_to_rust_generator-0.2.0: r"^type-parameter-(\d+)-(\d+)$" -consistent!(cpp_to_rust_generator_1, r"^type-parameter-(\d+)-(\d+)$"); - -// cpp_to_rust_generator-0.2.0: r"^([\w~]+)<[^<>]+>$" -consistent!(cpp_to_rust_generator_2, r"^([\w~]+)<[^<>]+>$"); - -// cpp_to_rust_generator-0.2.0: r"(signals|Q_SIGNALS)\s*:" -consistent!(cpp_to_rust_generator_3, r"(signals|Q_SIGNALS)\s*:"); - -// cpp_to_rust_generator-0.2.0: r"(slots|Q_SLOTS)\s*:" -consistent!(cpp_to_rust_generator_4, r"(slots|Q_SLOTS)\s*:"); - -// cpp_to_rust_generator-0.2.0: r"(public|protected|private)\s*:" -consistent!(cpp_to_rust_generator_5, r"(public|protected|private)\s*:"); - -// cpp_to_rust-0.5.3: r"^([\w:]+)<(.+)>$" -consistent!(cpp_to_rust_0, r"^([\w:]+)<(.+)>$"); - -// cpp_to_rust-0.5.3: r"^type-parameter-(\d+)-(\d+)$" -consistent!(cpp_to_rust_1, r"^type-parameter-(\d+)-(\d+)$"); - -// cpp_to_rust-0.5.3: r"^([\w~]+)<[^<>]+>$" -consistent!(cpp_to_rust_2, r"^([\w~]+)<[^<>]+>$"); - -// cpp_to_rust-0.5.3: r"(signals|Q_SIGNALS)\s*:" -consistent!(cpp_to_rust_3, r"(signals|Q_SIGNALS)\s*:"); - -// cpp_to_rust-0.5.3: r"(slots|Q_SLOTS)\s*:" -consistent!(cpp_to_rust_4, r"(slots|Q_SLOTS)\s*:"); - -// cpp_to_rust-0.5.3: r"(public|protected|private)\s*:" -consistent!(cpp_to_rust_5, r"(public|protected|private)\s*:"); - -// fritzbox_logs-0.2.0: "(\\d{2}\\.\\d{2}\\.\\d{2}) (\\d{2}:\\d{2}:\\d{2}) (.*)" -consistent!( - fritzbox_logs_0, - "(\\d{2}\\.\\d{2}\\.\\d{2}) (\\d{2}:\\d{2}:\\d{2}) (.*)" -); - -// fractal-matrix-api-3.29.0: r"mxc://(?P<server>[^/]+)/(?P<media>.+)" -consistent!(fractal_matrix_api_0, r"mxc://(?P<server>[^/]+)/(?P<media>.+)"); - -// smtp2go-0.1.4: r"^api-[a-zA-Z0-9]{32}$" -consistent!(smtp2go_0, r"^api-[a-zA-Z0-9]{32}$"); - -// pusher-0.3.1: r"^[-a-zA-Z0-9_=@,.;]+$" -consistent!(pusher_0, r"^[-a-zA-Z0-9_=@,.;]+$"); - -// pusher-0.3.1: r"\A\d+\.\d+\z" -consistent!(pusher_1, r"\A\d+\.\d+\z"); - -// bakervm-0.9.0: r"^\.(.+?) +?(.+)$" -consistent!(bakervm_0, r"^\.(.+?) +?(.+)$"); - -// bakervm-0.9.0: r"^\.([^\s]+)$" -consistent!(bakervm_1, r"^\.([^\s]+)$"); - -// bakervm-0.9.0: r"^include! +([^\s]+)$" -consistent!(bakervm_2, r"^include! +([^\s]+)$"); - -// bakervm-0.9.0: r"^@(\d+)$" -consistent!(bakervm_3, r"^@(\d+)$"); - -// bakervm-0.9.0: r"^true|false$" -consistent!(bakervm_4, r"^true|false$"); - -// bakervm-0.9.0: r"^(-?\d+)?\.[0-9]+$" -consistent!(bakervm_5, r"^(-?\d+)?\.[0-9]+$"); - -// bakervm-0.9.0: r"^(-?\d+)?$" -consistent!(bakervm_6, r"^(-?\d+)?$"); - -// bakervm-0.9.0: r"^#([0-9abcdefABCDEF]{6})$" -consistent!(bakervm_7, r"^#([0-9abcdefABCDEF]{6})$"); - -// bakervm-0.9.0: r"^'(.)'$" -consistent!(bakervm_8, r"^'(.)'$"); - -// bakervm-0.9.0: r"^\$vi\((\d+)\)$" -consistent!(bakervm_9, r"^\$vi\((\d+)\)$"); - -// bakervm-0.9.0: r"^\$key\((\d+)\)$" -consistent!(bakervm_10, r"^\$key\((\d+)\)$"); - -// banana-0.0.2: "(?P<type>[A-Z^']+) (?P<route>[^']+) HTTP/(?P<http>[^']+)" -consistent!( - banana_0, - "(?P<type>[A-Z^']+) (?P<route>[^']+) HTTP/(?P<http>[^']+)" -); - -// serial-key-2.0.0: r"[A-F0-9]{8}" -consistent!(serial_key_0, r"[A-F0-9]{8}"); - -// serde-hjson-0.8.1: "[\\\\\"\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]" -consistent!(serde_hjson_0, "[\\\\\"\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]"); - -// serde-hjson-0.8.1: "[\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]" -consistent!(serde_hjson_1, "[\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]"); - -// serde-hjson-0.8.1: "'''|[\x00-\x09\x0b\x0c\x0e-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]" -consistent!(serde_hjson_2, "'''|[\x00-\x09\x0b\x0c\x0e-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]"); - -// serde-odbc-0.1.0: r"/todos/(?P<id>\d+)" -consistent!(serde_odbc_0, r"/todos/(?P<id>\d+)"); - -// sentry-0.6.0: r"^(?:_<)?([a-zA-Z0-9_]+?)(?:\.\.|::)" -consistent!(sentry_0, r"^(?:_<)?([a-zA-Z0-9_]+?)(?:\.\.|::)"); - -// sentiment-0.1.1: r"[^a-zA-Z0 -]+" -consistent!(sentiment_0, r"[^a-zA-Z0 -]+"); - -// sentiment-0.1.1: r" {2,}" -consistent!(sentiment_1, r" {2,}"); - -// verilog-0.0.1: r"(?m)//.*" -consistent!(verilog_0, r"(?m)//.*"); - -// verex-0.2.2: "(?P<robot>C3PO)" -consistent!(verex_0, "(?P<robot>C3PO)"); - -// handlebars-0.32.4: ">|<|\"|&" -consistent!(handlebars_0, ">|<|\"|&"); - -// haikunator-0.1.2: r"^\w+-\w+-[0123456789]{4}$" -consistent!(haikunator_0, r"^\w+-\w+-[0123456789]{4}$"); - -// haikunator-0.1.2: r"^\w+@\w+@[0123456789]{4}$" -consistent!(haikunator_1, r"^\w+@\w+@[0123456789]{4}$"); - -// haikunator-0.1.2: r"^\w+-\w+-[0123456789abcdef]{4}$" -consistent!(haikunator_2, r"^\w+-\w+-[0123456789abcdef]{4}$"); - -// haikunator-0.1.2: r"^\w+-\w+-[0123456789忠犬ハチ公]{10}$" -consistent!(haikunator_3, r"^\w+-\w+-[0123456789忠犬ハチ公]{10}$"); - -// haikunator-0.1.2: r"^\w+-\w+$" -consistent!(haikunator_4, r"^\w+-\w+$"); - -// haikunator-0.1.2: r"^\w+-\w+-[foo]{4}$" -consistent!(haikunator_5, r"^\w+-\w+-[foo]{4}$"); - -// haikunator-0.1.2: r"^\w+-\w+-[0123456789忠犬ハチ公]{5}$" -consistent!(haikunator_6, r"^\w+-\w+-[0123456789忠犬ハチ公]{5}$"); - -// bobbin-cli-0.8.3: r"(.*)" -consistent!(bobbin_cli_0, r"(.*)"); - -// bobbin-cli-0.8.3: r"rustc (.*)" -consistent!(bobbin_cli_1, r"rustc (.*)"); - -// bobbin-cli-0.8.3: r"cargo (.*)" -consistent!(bobbin_cli_2, r"cargo (.*)"); - -// bobbin-cli-0.8.3: r"xargo (.*)\n" -consistent!(bobbin_cli_3, r"xargo (.*)\n"); - -// bobbin-cli-0.8.3: r"Open On-Chip Debugger (.*)" -consistent!(bobbin_cli_4, r"Open On-Chip Debugger (.*)"); - -// bobbin-cli-0.8.3: r"arm-none-eabi-gcc \(GNU Tools for ARM Embedded Processors[^\)]*\) (.*)" -consistent!( - bobbin_cli_5, - r"arm-none-eabi-gcc \(GNU Tools for ARM Embedded Processors[^\)]*\) (.*)" -); - -// bobbin-cli-0.8.3: r"(?m).*\nBasic Open Source SAM-BA Application \(BOSSA\) Version (.*)\n" -consistent!( - bobbin_cli_6, - r"(?m).*\nBasic Open Source SAM-BA Application \(BOSSA\) Version (.*)\n" -); - -// bobbin-cli-0.8.3: r"(?m)SEGGER J-Link Commander (.*)\n" -consistent!(bobbin_cli_7, r"(?m)SEGGER J-Link Commander (.*)\n"); - -// bobbin-cli-0.8.3: r"(?m)Teensy Loader, Command Line, Version (.*)\n" -consistent!(bobbin_cli_8, r"(?m)Teensy Loader, Command Line, Version (.*)\n"); - -// bobbin-cli-0.8.3: r"dfu-util (.*)\n" -consistent!(bobbin_cli_9, r"dfu-util (.*)\n"); - -// borsholder-0.9.1: r"^/static/[\w.]+$" -consistent!(borsholder_0, r"^/static/[\w.]+$"); - -// borsholder-0.9.1: r"^/timeline/([0-9]+)$" -consistent!(borsholder_1, r"^/timeline/([0-9]+)$"); - -// fblog-1.0.1: "\u{001B}\\[[\\d;]*[^\\d;]" -consistent!(fblog_0, "\u{001B}\\[[\\d;]*[^\\d;]"); - -// fblog-1.0.1: "\u{001B}\\[[\\d;]*[^\\d;]" -consistent!(fblog_1, "\u{001B}\\[[\\d;]*[^\\d;]"); - -// toml-query-0.6.0: r"^\[\d+\]$" -consistent!(toml_query_0, r"^\[\d+\]$"); - -// todo-txt-1.1.0: r" (?P<key>[^\s]+):(?P<value>[^\s^/]+)" -consistent!(todo_txt_0, r" (?P<key>[^\s]+):(?P<value>[^\s^/]+)"); - -// findr-0.1.5: r"\band\b" -consistent!(findr_0, r"\band\b"); - -// findr-0.1.5: r"\bor\b" -consistent!(findr_1, r"\bor\b"); - -// findr-0.1.5: r"\bnot\b" -consistent!(findr_2, r"\bnot\b"); - -// file-sniffer-3.0.1: r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$" -consistent!(file_sniffer_0, r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"); - -// file-sniffer-3.0.1: r".*?\.(stats|conf|h|cache.*|dat|pc|info)$" -consistent!(file_sniffer_1, r".*?\.(stats|conf|h|cache.*|dat|pc|info)$"); - -// file-sniffer-3.0.1: r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$" -consistent!(file_sniffer_2, r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"); - -// file-sniffer-3.0.1: r".*?\.(stats|conf|h|cache.*)$" -consistent!(file_sniffer_3, r".*?\.(stats|conf|h|cache.*)$"); - -// file-sniffer-3.0.1: r"(\.git|\.pijul|_darcs|\.hg)$" -consistent!(file_sniffer_4, r"(\.git|\.pijul|_darcs|\.hg)$"); - -// file_logger-0.1.0: "test" -consistent!(file_logger_0, "test"); - -// file_scanner-0.2.0: r"foo" -consistent!(file_scanner_0, r"foo"); - -// file_scanner-0.2.0: r"a+b" -consistent!(file_scanner_1, r"a+b"); - -// file_scanner-0.2.0: r"a[ab]*b" -consistent!(file_scanner_2, r"a[ab]*b"); - -// file_scanner-0.2.0: r"\s+" -consistent!(file_scanner_3, r"\s+"); - -// file_scanner-0.2.0: r"\s+" -consistent!(file_scanner_4, r"\s+"); - -// cellsplit-0.2.1: r"^\s*([^\s]+) %cellsplit<\d+>$" -consistent!(cellsplit_0, r"^\s*([^\s]+) %cellsplit<\d+>$"); - -// cellsplit-0.2.1: r"^\s*([^\s]+) %cellsplit<\d+>$" -consistent!(cellsplit_1, r"^\s*([^\s]+) %cellsplit<\d+>$"); - -// aterm-0.20.0: r"^[+\-]?[0-9]+" -consistent!(aterm_0, r"^[+\-]?[0-9]+"); - -// aterm-0.20.0: r"^[+\-]?[0-9]+\.[0-9]*([eE][+\-]?[0-9]+)?" -consistent!(aterm_1, r"^[+\-]?[0-9]+\.[0-9]*([eE][+\-]?[0-9]+)?"); - -// atarashii_imap-0.3.0: r"^[*] OK" -consistent!(atarashii_imap_0, r"^[*] OK"); - -// atarashii_imap-0.3.0: r"FLAGS\s\((.+)\)" -consistent!(atarashii_imap_1, r"FLAGS\s\((.+)\)"); - -// atarashii_imap-0.3.0: r"\[PERMANENTFLAGS\s\((.+)\)\]" -consistent!(atarashii_imap_2, r"\[PERMANENTFLAGS\s\((.+)\)\]"); - -// atarashii_imap-0.3.0: r"\[UIDVALIDITY\s(\d+)\]" -consistent!(atarashii_imap_3, r"\[UIDVALIDITY\s(\d+)\]"); - -// atarashii_imap-0.3.0: r"(\d+)\sEXISTS" -consistent!(atarashii_imap_4, r"(\d+)\sEXISTS"); - -// atarashii_imap-0.3.0: r"(\d+)\sRECENT" -consistent!(atarashii_imap_5, r"(\d+)\sRECENT"); - -// atarashii_imap-0.3.0: r"\[UNSEEN\s(\d+)\]" -consistent!(atarashii_imap_6, r"\[UNSEEN\s(\d+)\]"); - -// atarashii_imap-0.3.0: r"\[UIDNEXT\s(\d+)\]" -consistent!(atarashii_imap_7, r"\[UIDNEXT\s(\d+)\]"); - -// editorconfig-1.0.0: r"\\(\{|\})" -consistent!(editorconfig_0, r"\\(\{|\})"); - -// editorconfig-1.0.0: r"(^|[^\\])\\\|" -consistent!(editorconfig_1, r"(^|[^\\])\\\|"); - -// editorconfig-1.0.0: r"\[([^\]]*)$" -consistent!(editorconfig_2, r"\[([^\]]*)$"); - -// editorconfig-1.0.0: r"\[(.*/.*)\]" -consistent!(editorconfig_3, r"\[(.*/.*)\]"); - -// editorconfig-1.0.0: r"\{(-?\d+\\\.\\\.-?\d+)\}" -consistent!(editorconfig_4, r"\{(-?\d+\\\.\\\.-?\d+)\}"); - -// editorconfig-1.0.0: r"\{([^,]+)\}" -consistent!(editorconfig_5, r"\{([^,]+)\}"); - -// editorconfig-1.0.0: r"\{(([^\}].*)?(,|\|)(.*[^\\])?)\}" -consistent!(editorconfig_6, r"\{(([^\}].*)?(,|\|)(.*[^\\])?)\}"); - -// editorconfig-1.0.0: r"^/" -consistent!(editorconfig_7, r"^/"); - -// editorconfig-1.0.0: r"(^|[^\\])(\{|\})" -consistent!(editorconfig_8, r"(^|[^\\])(\{|\})"); - -// edmunge-1.0.0: "^#!.*\n" -consistent!(edmunge_0, "^#!.*\n"); - -// unicode_names2_macros-0.2.0: r"\\N\{(.*?)(?:\}|$)" -consistent!(unicode_names2_macros_0, r"\\N\{(.*?)(?:\}|$)"); - -// unidiff-0.2.1: r"^--- (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?" -consistent!( - unidiff_0, - r"^--- (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?" -); - -// unidiff-0.2.1: r"^\+\+\+ (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?" -consistent!( - unidiff_1, - r"^\+\+\+ (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?" -); - -// unidiff-0.2.1: r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)" -consistent!(unidiff_2, r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)"); - -// unidiff-0.2.1: r"^(?P<line_type>[- \n\+\\]?)(?P<value>.*)" -consistent!(unidiff_3, r"^(?P<line_type>[- \n\+\\]?)(?P<value>.*)"); - -// slippy-map-tiles-0.13.1: "/?(?P<zoom>[0-9]?[0-9])/(?P<x>[0-9]{1,10})/(?P<y>[0-9]{1,10})(\\.[a-zA-Z]{3,4})?$" -consistent!(slippy_map_tiles_0, "/?(?P<zoom>[0-9]?[0-9])/(?P<x>[0-9]{1,10})/(?P<y>[0-9]{1,10})(\\.[a-zA-Z]{3,4})?$"); - -// slippy-map-tiles-0.13.1: r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$" -consistent!(slippy_map_tiles_1, r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$"); - -// slippy-map-tiles-0.13.1: r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$" -consistent!(slippy_map_tiles_2, r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$"); - -// sonos-0.1.2: r"^https?://(.+?):1400/xml" -consistent!(sonos_0, r"^https?://(.+?):1400/xml"); - -// validator_derive-0.7.0: r"^[a-z]{2}$" -consistent!(validator_derive_0, r"^[a-z]{2}$"); - -// validator_derive-0.7.0: r"[a-z]{2}" -consistent!(validator_derive_1, r"[a-z]{2}"); - -// validator_derive-0.7.0: r"[a-z]{2}" -consistent!(validator_derive_2, r"[a-z]{2}"); - -// nginx-config-0.8.0: r"one of \d+ options" -consistent!(nginx_config_0, r"one of \d+ options"); - -// waltz-0.4.0: r"[\s,]" -consistent!(waltz_0, r"[\s,]"); - -// warheadhateus-0.2.1: r"^aws_access_key_id = (.*)" -consistent!(warheadhateus_0, r"^aws_access_key_id = (.*)"); - -// warheadhateus-0.2.1: r"^aws_secret_access_key = (.*)" -consistent!(warheadhateus_1, r"^aws_secret_access_key = (.*)"); - -// warheadhateus-0.2.1: r"^aws_access_key_id = (.*)" -consistent!(warheadhateus_2, r"^aws_access_key_id = (.*)"); - -// warheadhateus-0.2.1: r"^aws_secret_access_key = (.*)" -consistent!(warheadhateus_3, r"^aws_secret_access_key = (.*)"); - -// jieba-rs-0.2.2: r"([\u{4E00}-\u{9FD5}a-zA-Z0-9+#&\._%]+)" -consistent!(jieba_rs_0, r"([\u{4E00}-\u{9FD5}a-zA-Z0-9+#&\._%]+)"); - -// jieba-rs-0.2.2: r"(\r\n|\s)" -consistent!(jieba_rs_1, r"(\r\n|\s)"); - -// jieba-rs-0.2.2: "([\u{4E00}-\u{9FD5}]+)" -consistent!(jieba_rs_2, "([\u{4E00}-\u{9FD5}]+)"); - -// jieba-rs-0.2.2: r"[^a-zA-Z0-9+#\n]" -consistent!(jieba_rs_3, r"[^a-zA-Z0-9+#\n]"); - -// jieba-rs-0.2.2: r"([\u{4E00}-\u{9FD5}]+)" -consistent!(jieba_rs_4, r"([\u{4E00}-\u{9FD5}]+)"); - -// jieba-rs-0.2.2: r"([a-zA-Z0-9]+(?:.\d+)?%?)" -consistent!(jieba_rs_5, r"([a-zA-Z0-9]+(?:.\d+)?%?)"); - -// lalrpop-0.15.2: r"Span\([0-9 ,]*\)" -consistent!(lalrpop_0, r"Span\([0-9 ,]*\)"); - -// lalrpop-snap-0.15.2: r"Span\([0-9 ,]*\)" -consistent!(lalrpop_snap_0, r"Span\([0-9 ,]*\)"); - -// nlp-tokenize-0.1.0: r"[\S]+" -consistent!(nlp_tokenize_0, r"[\S]+"); - -// kbgpg-0.1.2: "[[:xdigit:]][70]" -consistent!(kbgpg_0, "[[:xdigit:]][70]"); - -// cdbd-0.1.1: r"^((?P<address>.*):)?(?P<port>\d+)$" -consistent!(cdbd_0, r"^((?P<address>.*):)?(?P<port>\d+)$"); - -// mbutiles-0.1.1: r"[\w\s=+-/]+\((\{(.|\n)*\})\);?" -consistent!(mbutiles_0, r"[\w\s=+-/]+\((\{(.|\n)*\})\);?"); - -// extrahop-0.2.5: r"^-\d+(?:ms|s|m|h|d|w|y)?$" -consistent!(extrahop_0, r"^-\d+(?:ms|s|m|h|d|w|y)?$"); - -// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$" -consistent!(pippin_0, "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$"); - -// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$" -consistent!( - pippin_1, - "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$" -); - -// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$" -consistent!(pippin_2, "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$"); - -// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$" -consistent!( - pippin_3, - "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$" -); - -// pippin-0.1.0: "^.*pn(0|[1-9][0-9]*)(-ss(0|[1-9][0-9]*)(\\.pip|-cl(0|[1-9][0-9]*)\\.piplog))?$" -consistent!(pippin_4, "^.*pn(0|[1-9][0-9]*)(-ss(0|[1-9][0-9]*)(\\.pip|-cl(0|[1-9][0-9]*)\\.piplog))?$"); - -// pippin-0.1.0: "^(.*)-ss(?:0|[1-9][0-9]*)(?:\\.pip|-cl(?:0|[1-9][0-9]*)\\.piplog)$" -consistent!( - pippin_5, - "^(.*)-ss(?:0|[1-9][0-9]*)(?:\\.pip|-cl(?:0|[1-9][0-9]*)\\.piplog)$" -); - -// pinyin-0.3.0: r"(?i)[āáǎàēéěèōóǒòīíǐìūúǔùüǘǚǜńň]" -consistent!( - pinyin_0, - r"(?i)[āáǎàēéěèōóǒòīíǐìūúǔùüǘǚǜńň]" -); - -// pinyin-0.3.0: r"([aeoiuvnm])([0-4])$" -consistent!(pinyin_1, r"([aeoiuvnm])([0-4])$"); - -// duration-parser-0.2.0: r"(?P<value>\d+)(?P<units>[a-z])" -consistent!(duration_parser_0, r"(?P<value>\d+)(?P<units>[a-z])"); - -// dutree-0.2.7: r"^\d+\D?$" -consistent!(dutree_0, r"^\d+\D?$"); - -// djangohashers-0.3.0: r"^[A-Za-z0-9]*$" -consistent!(djangohashers_0, r"^[A-Za-z0-9]*$"); - -// rtag-0.3.5: r"^[A-Z][A-Z0-9]{2,}$" -consistent!(rtag_0, r"^[A-Z][A-Z0-9]{2,}$"); - -// rtag-0.3.5: r"^http://www\.emusic\.com" -consistent!(rtag_1, r"^http://www\.emusic\.com"); - -// rtag-0.3.5: r"^[A-Z][A-Z0-9]{2,}" -consistent!(rtag_2, r"^[A-Z][A-Z0-9]{2,}"); - -// rtag-0.3.5: r"(^[\x{0}|\x{feff}|\x{fffe}]*|[\x{0}|\x{feff}|\x{fffe}]*$)" -consistent!( - rtag_3, - r"(^[\x{0}|\x{feff}|\x{fffe}]*|[\x{0}|\x{feff}|\x{fffe}]*$)" -); - -// rtow-0.1.0: r"(\d+)[xX](\d+)" -consistent!(rtow_0, r"(\d+)[xX](\d+)"); - -// pleingres-sql-plugin-0.1.0: r"\$([a-zA-Z0-9_]+)" -consistent!(pleingres_sql_plugin_0, r"\$([a-zA-Z0-9_]+)"); - -// dono-2.0.0: "[\\n]+" -consistent!(dono_0, "[\\n]+"); - -// dono-2.0.0: "(?m)^\\n" -consistent!(dono_1, "(?m)^\\n"); - -// dono-2.0.0: "(?m)^\\n" -consistent!(dono_2, "(?m)^\\n"); - -// ssb-common-0.3.0: r"^[0-9A-Za-z\+/]{43}=\.ed25519$" -consistent!(ssb_common_0, r"^[0-9A-Za-z\+/]{43}=\.ed25519$"); - -// ssb-common-0.3.0: r"^[0-9A-Za-z\+/]{86}==\.ed25519$" -consistent!(ssb_common_1, r"^[0-9A-Za-z\+/]{86}==\.ed25519$"); - -// ssb-common-0.3.0: r"^[0-9A-Za-z\+/]{43}=\.sha256$" -consistent!(ssb_common_2, r"^[0-9A-Za-z\+/]{43}=\.sha256$"); - -// mozversion-0.1.3: r"^(?P<major>\d+)\.(?P<minor>\d+)(?:\.(?P<patch>\d+))?(?:(?P<pre0>[a-z]+)(?P<pre1>\d*))?$" -consistent!(mozversion_0, r"^(?P<major>\d+)\.(?P<minor>\d+)(?:\.(?P<patch>\d+))?(?:(?P<pre0>[a-z]+)(?P<pre1>\d*))?$"); - -// monger-0.5.6: r"^(\d+)\.(\d+)$" -consistent!(monger_0, r"^(\d+)\.(\d+)$"); - -// mongo_rub-0.0.2: r"^[rv]2\.6" -consistent!(mongo_rub_0, r"^[rv]2\.6"); - -// flow-0.3.5: "body value" -consistent!(flow_0, "body value"); - -// flow-0.3.5: "start marker" -consistent!(flow_1, "start marker"); - -// flow-0.3.5: "end marker" -consistent!(flow_2, "end marker"); - -// flow-0.3.5: "body value" -consistent!(flow_3, "body value"); - -// vobsub-0.2.3: "^([A-Za-z/ ]+): (.*)" -consistent!(vobsub_0, "^([A-Za-z/ ]+): (.*)"); - -// voidmap-1.1.2: r"#([^\s=]+)*" -consistent!(voidmap_0, r"#([^\s=]+)*"); - -// voidmap-1.1.2: r"#(\S+)*" -consistent!(voidmap_1, r"#(\S+)*"); - -// voidmap-1.1.2: r"#prio=(\d+)" -consistent!(voidmap_2, r"#prio=(\d+)"); - -// voidmap-1.1.2: r"\[(\S+)\]" -consistent!(voidmap_3, r"\[(\S+)\]"); - -// voidmap-1.1.2: r"#limit=(\d+)" -consistent!(voidmap_4, r"#limit=(\d+)"); - -// voidmap-1.1.2: r"#tagged=(\S+)" -consistent!(voidmap_5, r"#tagged=(\S+)"); - -// voidmap-1.1.2: r"#rev\b" -consistent!(voidmap_6, r"#rev\b"); - -// voidmap-1.1.2: r"#done\b" -consistent!(voidmap_7, r"#done\b"); - -// voidmap-1.1.2: r"#open\b" -consistent!(voidmap_8, r"#open\b"); - -// voidmap-1.1.2: r"#since=(\S+)" -consistent!(voidmap_9, r"#since=(\S+)"); - -// voidmap-1.1.2: r"#until=(\S+)" -consistent!(voidmap_10, r"#until=(\S+)"); - -// voidmap-1.1.2: r"#plot=(\S+)" -consistent!(voidmap_11, r"#plot=(\S+)"); - -// voidmap-1.1.2: r"#n=(\d+)" -consistent!(voidmap_12, r"#n=(\d+)"); - -// voidmap-1.1.2: r"(\S+)" -consistent!(voidmap_13, r"(\S+)"); - -// voidmap-1.1.2: r"(?P<y>\d+)y" -consistent!(voidmap_14, r"(?P<y>\d+)y"); - -// voidmap-1.1.2: r"(?P<m>\d+)m" -consistent!(voidmap_15, r"(?P<m>\d+)m"); - -// voidmap-1.1.2: r"(?P<w>\d+)w" -consistent!(voidmap_16, r"(?P<w>\d+)w"); - -// voidmap-1.1.2: r"(?P<d>\d+)d" -consistent!(voidmap_17, r"(?P<d>\d+)d"); - -// voidmap-1.1.2: r"(?P<h>\d+)h" -consistent!(voidmap_18, r"(?P<h>\d+)h"); - -// voidmap-1.1.2: r"C-(.)" -consistent!(voidmap_19, r"C-(.)"); - -// qt_generator-0.2.0: r"^\.\./qt[^/]+/" -consistent!(qt_generator_0, r"^\.\./qt[^/]+/"); - -// qt_generator-0.2.0: "(href|src)=\"([^\"]*)\"" -consistent!(qt_generator_1, "(href|src)=\"([^\"]*)\""); - -// kryptos-0.6.1: r"[01]{5}" -consistent!(kryptos_0, r"[01]{5}"); - -// cifar_10_loader-0.2.0: "data_batch_[1-5].bin" -consistent!(cifar_10_loader_0, "data_batch_[1-5].bin"); - -// cifar_10_loader-0.2.0: "test_batch.bin" -consistent!(cifar_10_loader_1, "test_batch.bin"); - -// circadian-0.6.0: r"^\d+.\d+s$" -consistent!(circadian_0, r"^\d+.\d+s$"); - -// circadian-0.6.0: r"^\d+:\d+$" -consistent!(circadian_1, r"^\d+:\d+$"); - -// circadian-0.6.0: r"^\d+:\d+m$" -consistent!(circadian_2, r"^\d+:\d+m$"); - -// cicada-0.8.1: r"!!" -consistent!(cicada_0, r"!!"); - -// cicada-0.8.1: r"^([^`]*)`([^`]+)`(.*)$" -consistent!(cicada_1, r"^([^`]*)`([^`]+)`(.*)$"); - -// cicada-0.8.1: r"\*+" -consistent!(cicada_2, r"\*+"); - -// cicada-0.8.1: r"([^\$]*)\$\{?([A-Za-z0-9\?\$_]+)\}?(.*)" -consistent!(cicada_3, r"([^\$]*)\$\{?([A-Za-z0-9\?\$_]+)\}?(.*)"); - -// cicada-0.8.1: r"^ *alias +([a-zA-Z0-9_\.-]+)=(.*)$" -consistent!(cicada_4, r"^ *alias +([a-zA-Z0-9_\.-]+)=(.*)$"); - -// vterm-sys-0.1.0: r"hi" -consistent!(vterm_sys_0, r"hi"); - -// skim-0.5.0: r".*?\t" -consistent!(skim_0, r".*?\t"); - -// skim-0.5.0: r".*?[\t ]" -consistent!(skim_1, r".*?[\t ]"); - -// skim-0.5.0: r"(\{-?[0-9.,q]*?})" -consistent!(skim_2, r"(\{-?[0-9.,q]*?})"); - -// skim-0.5.0: r"[ \t\n]+" -consistent!(skim_3, r"[ \t\n]+"); - -// skim-0.5.0: r"[ \t\n]+" -consistent!(skim_4, r"[ \t\n]+"); - -// skim-0.5.0: r"([^ |]+( +\| +[^ |]*)+)|( +)" -consistent!(skim_5, r"([^ |]+( +\| +[^ |]*)+)|( +)"); - -// skim-0.5.0: r" +\| +" -consistent!(skim_6, r" +\| +"); - -// skim-0.5.0: r"^(?P<left>-?\d+)?(?P<sep>\.\.)?(?P<right>-?\d+)?$" -consistent!(skim_7, r"^(?P<left>-?\d+)?(?P<sep>\.\.)?(?P<right>-?\d+)?$"); - -// skim-0.5.0: "," -consistent!(skim_8, ","); - -// skim-0.5.0: ".*?," -consistent!(skim_9, ".*?,"); - -// skim-0.5.0: ".*?," -consistent!(skim_10, ".*?,"); - -// skim-0.5.0: "," -consistent!(skim_11, ","); - -// skim-0.5.0: r"\x1B\[(?:([0-9]+;[0-9]+[Hf])|([0-9]+[ABCD])|(s|u|2J|K)|([0-9;]*m)|(=[0-9]+[hI]))" -consistent!(skim_12, r"\x1B\[(?:([0-9]+;[0-9]+[Hf])|([0-9]+[ABCD])|(s|u|2J|K)|([0-9;]*m)|(=[0-9]+[hI]))"); - -// egg-mode-text-1.14.7: r"[-_./]\z" -consistent!(egg_mode_text_0, r"[-_./]\z"); - -// java-properties-1.1.1: "^[ \t\r\n\x0c]*[#!]" -consistent!(java_properties_0, "^[ \t\r\n\x0c]*[#!]"); - -// java-properties-1.1.1: r"^[ \t\x0c]*[#!][^\r\n]*$" -consistent!(java_properties_1, r"^[ \t\x0c]*[#!][^\r\n]*$"); - -// java-properties-1.1.1: r"^([ \t\x0c]*[:=][ \t\x0c]*|[ \t\x0c]+)$" -consistent!(java_properties_2, r"^([ \t\x0c]*[:=][ \t\x0c]*|[ \t\x0c]+)$"); - -// ipaddress-0.1.2: r":.+\." -consistent!(ipaddress_0, r":.+\."); - -// ipaddress-0.1.2: r"\." -consistent!(ipaddress_1, r"\."); - -// ipaddress-0.1.2: r":" -consistent!(ipaddress_2, r":"); - -// iptables-0.2.2: r"v(\d+)\.(\d+)\.(\d+)" -consistent!(iptables_0, r"v(\d+)\.(\d+)\.(\d+)"); - -// rsure-0.8.1: r"^([^-]+)-(.*)\.dat\.gz$" -consistent!(rsure_0, r"^([^-]+)-(.*)\.dat\.gz$"); - -// rs-jsonpath-0.1.0: "^(.*?)(<=|<|==|>=|>)(.*?)$" -consistent!(rs_jsonpath_0, "^(.*?)(<=|<|==|>=|>)(.*?)$"); - -// oatie-0.3.0: r"(\n|^)(\w+):([\n\w\W]+?)(\n(?:\w)|(\n\]))" -consistent!(oatie_0, r"(\n|^)(\w+):([\n\w\W]+?)(\n(?:\w)|(\n\]))"); - -// weld-0.2.0: "#.*$" -consistent!(weld_0, "#.*$"); - -// weld-0.2.0: r"^[A-Za-z$_][A-Za-z0-9$_]*$" -consistent!(weld_1, r"^[A-Za-z$_][A-Za-z0-9$_]*$"); - -// weld-0.2.0: r"^[0-9]+[cC]$" -consistent!(weld_2, r"^[0-9]+[cC]$"); - -// weld-0.2.0: r"^0b[0-1]+[cC]$" -consistent!(weld_3, r"^0b[0-1]+[cC]$"); - -// weld-0.2.0: r"^0x[0-9a-fA-F]+[cC]$" -consistent!(weld_4, r"^0x[0-9a-fA-F]+[cC]$"); - -// weld-0.2.0: r"^[0-9]+$" -consistent!(weld_5, r"^[0-9]+$"); - -// weld-0.2.0: r"^0b[0-1]+$" -consistent!(weld_6, r"^0b[0-1]+$"); - -// weld-0.2.0: r"^0x[0-9a-fA-F]+$" -consistent!(weld_7, r"^0x[0-9a-fA-F]+$"); - -// weld-0.2.0: r"^[0-9]+[lL]$" -consistent!(weld_8, r"^[0-9]+[lL]$"); - -// weld-0.2.0: r"^0b[0-1]+[lL]$" -consistent!(weld_9, r"^0b[0-1]+[lL]$"); - -// weld-0.2.0: r"^0x[0-9a-fA-F]+[lL]$" -consistent!(weld_10, r"^0x[0-9a-fA-F]+[lL]$"); - -// webgl_generator-0.1.0: "([(, ])enum\\b" -consistent!(webgl_generator_0, "([(, ])enum\\b"); - -// webgl_generator-0.1.0: "\\bAcquireResourcesCallback\\b" -consistent!(webgl_generator_1, "\\bAcquireResourcesCallback\\b"); - -// weave-0.2.0: r"^(\d+)(,(\d+))?([acd]).*$" -consistent!(weave_0, r"^(\d+)(,(\d+))?([acd]).*$"); - -// wemo-0.0.12: r"<BinaryState>(\d)(\|-?\d+)*</BinaryState>" -consistent!(wemo_0, r"<BinaryState>(\d)(\|-?\d+)*</BinaryState>"); - -// webscale-0.9.4: r"(http[s]?://[^\s]+)" -consistent!(webscale_0, r"(http[s]?://[^\s]+)"); - -// svgrep-1.1.0: r"^\d+.*$" -consistent!(svgrep_0, r"^\d+.*$"); - -// ignore-0.4.2: r"^[\pL\pN]+$" -consistent!(ignore_0, r"^[\pL\pN]+$"); - -// ommui_string_patterns-0.1.2: r"^([A-Za-z][0-9A-Za-z_]*)?$" -consistent!(ommui_string_patterns_0, r"^([A-Za-z][0-9A-Za-z_]*)?$"); - -// ommui_string_patterns-0.1.2: r"^(\S+(?:.*\S)?)?$" -consistent!(ommui_string_patterns_1, r"^(\S+(?:.*\S)?)?$"); - -// opcua-types-0.3.0: "^(?P<min>[0-9]{1,10})(:(?P<max>[0-9]{1,10}))?$" -consistent!(opcua_types_0, "^(?P<min>[0-9]{1,10})(:(?P<max>[0-9]{1,10}))?$"); - -// opcua-types-0.3.0: r"^(ns=(?P<ns>[0-9]+);)?(?P<t>[isgb])=(?P<v>.+)$" -consistent!(opcua_types_1, r"^(ns=(?P<ns>[0-9]+);)?(?P<t>[isgb])=(?P<v>.+)$"); - -// open_read_later-1.1.1: r"^(.+?)\s*:\s*(.+)$" -consistent!(open_read_later_0, r"^(.+?)\s*:\s*(.+)$"); - -// youtube-downloader-0.1.0: r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*" -consistent!(youtube_downloader_0, r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*"); - -// yobot-0.1.1: "." -consistent!(yobot_0, "."); - -// yobot-0.1.1: r"." -consistent!(yobot_1, r"."); - -// yobot-0.1.1: r".+" -consistent!(yobot_2, r".+"); - -// yobot-0.1.1: r"." -consistent!(yobot_3, r"."); - -// ubiquity-0.1.5: r"foo" -consistent!(ubiquity_0, r"foo"); - -// ubiquity-0.1.5: r"/target/" -consistent!(ubiquity_1, r"/target/"); - -// ubiquity-0.1.5: r".DS_Store" -consistent!(ubiquity_2, r".DS_Store"); - -// qasm-1.0.0: r"//.*" -consistent!(qasm_0, r"//.*"); - -// drill-0.3.5: r"\{\{ *([a-z\._]+) *\}\}" -consistent!(drill_0, r"\{\{ *([a-z\._]+) *\}\}"); - -// queryst-2.0.0: r"^([^\]\[]+)" -consistent!(queryst_0, r"^([^\]\[]+)"); - -// queryst-2.0.0: r"(\[[^\]\[]*\])" -consistent!(queryst_1, r"(\[[^\]\[]*\])"); - -// qui-vive-0.1.0: r"^/(\w+)$" -consistent!(qui_vive_0, r"^/(\w+)$"); - -// qui-vive-0.1.0: r"^/key$" -consistent!(qui_vive_1, r"^/key$"); - -// qui-vive-0.1.0: r"^/key/(\w+)$" -consistent!(qui_vive_2, r"^/key/(\w+)$"); - -// qui-vive-0.1.0: r"^/url$" -consistent!(qui_vive_3, r"^/url$"); - -// qui-vive-0.1.0: r"^/url/(\w+)$" -consistent!(qui_vive_4, r"^/url/(\w+)$"); - -// qui-vive-0.1.0: r"^/inv$" -consistent!(qui_vive_5, r"^/inv$"); - -// qui-vive-0.1.0: r"^/inv/(\w+)$" -consistent!(qui_vive_6, r"^/inv/(\w+)$"); - -// subdiff-0.1.0: r"\b" -// consistent!(subdiff_0, r"\b"); - -// substudy-0.4.5: r"^(\d+)/(\d+)$" -consistent!(substudy_0, r"^(\d+)/(\d+)$"); - -// substudy-0.4.5: r"\s+" -consistent!(substudy_1, r"\s+"); - -// substudy-0.4.5: r"<[a-z/][^>]*>" -consistent!(substudy_2, r"<[a-z/][^>]*>"); - -// substudy-0.4.5: r"(\([^)]*\)|♪[^♪]*♪|[A-Z]{2,} ?:)" -consistent!(substudy_3, r"(\([^)]*\)|♪[^♪]*♪|[A-Z]{2,} ?:)"); - -// substudy-0.4.5: r"\s+" -consistent!(substudy_4, r"\s+"); - -// isbnid-0.1.3: r"^(\d(-| )?){9}(x|X|\d|(\d(-| )?){3}\d)$" -consistent!(isbnid_0, r"^(\d(-| )?){9}(x|X|\d|(\d(-| )?){3}\d)$"); - -// isbnid-0.1.3: r"[^0-9X]" -consistent!(isbnid_1, r"[^0-9X]"); - -// ispc-0.3.5: r"Intel\(r\) SPMD Program Compiler \(ispc\), (\d+\.\d+\.\d+)" -consistent!( - ispc_0, - r"Intel\(r\) SPMD Program Compiler \(ispc\), (\d+\.\d+\.\d+)" -); diff --git a/vendor/regex/tests/crazy.rs b/vendor/regex/tests/crazy.rs deleted file mode 100644 index 293ac1a..0000000 --- a/vendor/regex/tests/crazy.rs +++ /dev/null @@ -1,459 +0,0 @@ -mat!(ascii_literal, r"a", "a", Some((0, 1))); - -// Some crazy expressions from regular-expressions.info. -mat!( - match_ranges, - r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", - "num: 255", - Some((5, 8)) -); -mat!( - match_ranges_not, - r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", - "num: 256", - None -); -mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3))); -mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3))); -mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4))); -mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None); -mat!( - match_email, - r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", - "mine is jam.slam@gmail.com ", - Some((8, 26)) -); -mat!( - match_email_not, - r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", - "mine is jam.slam@gmail ", - None -); -mat!( - match_email_big, - r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?", - "mine is jam.slam@gmail.com ", - Some((8, 26)) -); -mat!( - match_date1, - r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", - "1900-01-01", - Some((0, 10)) -); -mat!( - match_date2, - r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", - "1900-00-01", - None -); -mat!( - match_date3, - r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", - "1900-13-01", - None -); - -// Do some crazy dancing with the start/end assertions. -matiter!(match_start_end_empty, r"^$", "", (0, 0)); -matiter!(match_start_end_empty_many_1, r"^$^$^$", "", (0, 0)); -matiter!(match_start_end_empty_many_2, r"^^^$$$", "", (0, 0)); -matiter!(match_start_end_empty_rev, r"$^", "", (0, 0)); -matiter!( - match_start_end_empty_rep, - r"(?:^$)*", - "a\nb\nc", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4), - (5, 5) -); -matiter!( - match_start_end_empty_rep_rev, - r"(?:$^)*", - "a\nb\nc", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4), - (5, 5) -); - -// Test negated character classes. -mat!(negclass_letters, r"[^ac]", "acx", Some((2, 3))); -mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3))); -mat!(negclass_letter_space, r"[^a[:space:]]", "a x", Some((2, 3))); -mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3))); -mat!(negclass_space, r"[^[:space:]]", " a", Some((1, 2))); -mat!(negclass_space_comma, r"[^,[:space:]]", ", a", Some((2, 3))); -mat!(negclass_comma_space, r"[^[:space:],]", " ,a", Some((2, 3))); -mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2))); - -// Test that repeated empty expressions don't loop forever. -mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2))); -mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2))); -mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2))); -mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2))); -mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2))); -mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2))); -mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2))); -mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2))); -mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2))); -mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2))); -mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2))); -mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2))); - -// Test that we handle various flavors of empty expressions. -matiter!(match_empty1, r"", "", (0, 0)); -matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3)); -matiter!(match_empty12, r"|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty13, r"b|", "abc", (0, 0), (1, 2), (3, 3)); -matiter!(match_empty14, r"|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty15, r"z|", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty16, r"|", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty17, r"||", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty18, r"||z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty19, r"(?:)|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty20, r"b|(?:)", "abc", (0, 0), (1, 2), (3, 3)); -matiter!(match_empty21, r"(?:|)", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty22, r"(?:|)|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty23, r"a(?:)|b", "abc", (0, 1), (1, 2)); - -// Test that the DFA can handle pathological cases. -// (This should result in the DFA's cache being flushed too frequently, which -// should cause it to quit and fall back to the NFA algorithm.) -#[test] -fn dfa_handles_pathological_case() { - fn ones_and_zeroes(count: usize) -> String { - use rand::rngs::SmallRng; - use rand::{Rng, SeedableRng}; - - let mut rng = SmallRng::from_entropy(); - let mut s = String::new(); - for _ in 0..count { - if rng.gen() { - s.push('1'); - } else { - s.push('0'); - } - } - s - } - - let re = regex!(r"[01]*1[01]{20}$"); - let text = { - let mut pieces = ones_and_zeroes(100_000); - pieces.push('1'); - pieces.push_str(&ones_and_zeroes(20)); - pieces - }; - assert!(re.is_match(text!(&*text))); -} - -#[test] -fn nest_limit_makes_it_parse() { - use regex::RegexBuilder; - - RegexBuilder::new( - r#"(?-u) - 2(?: - [45]\d{3}| - 7(?: - 1[0-267]| - 2[0-289]| - 3[0-29]| - 4[01]| - 5[1-3]| - 6[013]| - 7[0178]| - 91 - )| - 8(?: - 0[125]| - [139][1-6]| - 2[0157-9]| - 41| - 6[1-35]| - 7[1-5]| - 8[1-8]| - 90 - )| - 9(?: - 0[0-2]| - 1[0-4]| - 2[568]| - 3[3-6]| - 5[5-7]| - 6[0167]| - 7[15]| - 8[0146-9] - ) - )\d{4}| - 3(?: - 12?[5-7]\d{2}| - 0(?: - 2(?: - [025-79]\d| - [348]\d{1,2} - )| - 3(?: - [2-4]\d| - [56]\d? - ) - )| - 2(?: - 1\d{2}| - 2(?: - [12]\d| - [35]\d{1,2}| - 4\d? - ) - )| - 3(?: - 1\d{2}| - 2(?: - [2356]\d| - 4\d{1,2} - ) - )| - 4(?: - 1\d{2}| - 2(?: - 2\d{1,2}| - [47]| - 5\d{2} - ) - )| - 5(?: - 1\d{2}| - 29 - )| - [67]1\d{2}| - 8(?: - 1\d{2}| - 2(?: - 2\d{2}| - 3| - 4\d - ) - ) - )\d{3}| - 4(?: - 0(?: - 2(?: - [09]\d| - 7 - )| - 33\d{2} - )| - 1\d{3}| - 2(?: - 1\d{2}| - 2(?: - [25]\d?| - [348]\d| - [67]\d{1,2} - ) - )| - 3(?: - 1\d{2}(?: - \d{2} - )?| - 2(?: - [045]\d| - [236-9]\d{1,2} - )| - 32\d{2} - )| - 4(?: - [18]\d{2}| - 2(?: - [2-46]\d{2}| - 3 - )| - 5[25]\d{2} - )| - 5(?: - 1\d{2}| - 2(?: - 3\d| - 5 - ) - )| - 6(?: - [18]\d{2}| - 2(?: - 3(?: - \d{2} - )?| - [46]\d{1,2}| - 5\d{2}| - 7\d - )| - 5(?: - 3\d?| - 4\d| - [57]\d{1,2}| - 6\d{2}| - 8 - ) - )| - 71\d{2}| - 8(?: - [18]\d{2}| - 23\d{2}| - 54\d{2} - )| - 9(?: - [18]\d{2}| - 2[2-5]\d{2}| - 53\d{1,2} - ) - )\d{3}| - 5(?: - 02[03489]\d{2}| - 1\d{2}| - 2(?: - 1\d{2}| - 2(?: - 2(?: - \d{2} - )?| - [457]\d{2} - ) - )| - 3(?: - 1\d{2}| - 2(?: - [37](?: - \d{2} - )?| - [569]\d{2} - ) - )| - 4(?: - 1\d{2}| - 2[46]\d{2} - )| - 5(?: - 1\d{2}| - 26\d{1,2} - )| - 6(?: - [18]\d{2}| - 2| - 53\d{2} - )| - 7(?: - 1| - 24 - )\d{2}| - 8(?: - 1| - 26 - )\d{2}| - 91\d{2} - )\d{3}| - 6(?: - 0(?: - 1\d{2}| - 2(?: - 3\d{2}| - 4\d{1,2} - ) - )| - 2(?: - 2[2-5]\d{2}| - 5(?: - [3-5]\d{2}| - 7 - )| - 8\d{2} - )| - 3(?: - 1| - 2[3478] - )\d{2}| - 4(?: - 1| - 2[34] - )\d{2}| - 5(?: - 1| - 2[47] - )\d{2}| - 6(?: - [18]\d{2}| - 6(?: - 2(?: - 2\d| - [34]\d{2} - )| - 5(?: - [24]\d{2}| - 3\d| - 5\d{1,2} - ) - ) - )| - 72[2-5]\d{2}| - 8(?: - 1\d{2}| - 2[2-5]\d{2} - )| - 9(?: - 1\d{2}| - 2[2-6]\d{2} - ) - )\d{3}| - 7(?: - (?: - 02| - [3-589]1| - 6[12]| - 72[24] - )\d{2}| - 21\d{3}| - 32 - )\d{3}| - 8(?: - (?: - 4[12]| - [5-7]2| - 1\d? - )| - (?: - 0| - 3[12]| - [5-7]1| - 217 - )\d - )\d{4}| - 9(?: - [35]1| - (?: - [024]2| - 81 - )\d| - (?: - 1| - [24]1 - )\d{2} - )\d{3} - "#, - ) - .build() - .unwrap(); -} diff --git a/vendor/regex/tests/flags.rs b/vendor/regex/tests/flags.rs deleted file mode 100644 index c33b82d..0000000 --- a/vendor/regex/tests/flags.rs +++ /dev/null @@ -1,31 +0,0 @@ -mat!(match_flag_case, "(?-u)(?i)abc", "ABC", Some((0, 3))); -mat!(match_flag_weird_case, "(?-u)(?i)a(?-i)bc", "Abc", Some((0, 3))); -mat!(match_flag_weird_case_not, "(?-u)(?i)a(?-i)bc", "ABC", None); -mat!(match_flag_case_dotnl, "(?-u)(?is)a(?u:.)", "A\n", Some((0, 2))); -mat!( - match_flag_case_dotnl_toggle, - "(?-u)(?is)a(?u:.)(?-is)a(?u:.)", - "A\nab", - Some((0, 4)) -); -mat!( - match_flag_case_dotnl_toggle_not, - "(?-u)(?is)a(?u:.)(?-is)a(?u:.)", - "A\na\n", - None -); -mat!( - match_flag_case_dotnl_toggle_ok, - "(?-u)(?is)a(?u:.)(?-is:a(?u:.))?", - "A\na\n", - Some((0, 2)) -); -mat!( - match_flag_multi, - r"(?-u)(?m)(?:^\d+$\n?)+", - "123\n456\n789", - Some((0, 11)) -); -mat!(match_flag_ungreedy, "(?U)a+", "aa", Some((0, 1))); -mat!(match_flag_ungreedy_greedy, "(?U)a+?", "aa", Some((0, 2))); -mat!(match_flag_ungreedy_noop, "(?U)(?-U)a+", "aa", Some((0, 2))); diff --git a/vendor/regex/tests/fowler.rs b/vendor/regex/tests/fowler.rs deleted file mode 100644 index 7f56a75..0000000 --- a/vendor/regex/tests/fowler.rs +++ /dev/null @@ -1,1588 +0,0 @@ -// DO NOT EDIT. Automatically generated by 'scripts/regex-match-tests.py' -// on 2019-09-02 11:07:37.849994. - -// Tests from basic.dat -mat!(match_basic_3, r"abracadabra$", r"abracadabracadabra", Some((7, 18))); -mat!(match_basic_4, r"a...b", r"abababbb", Some((2, 7))); -mat!(match_basic_5, r"XXXXXX", r"..XXXXXX", Some((2, 8))); -mat!(match_basic_6, r"\)", r"()", Some((1, 2))); -mat!(match_basic_7, r"a]", r"a]a", Some((0, 2))); -mat!(match_basic_9, r"\}", r"}", Some((0, 1))); -mat!(match_basic_10, r"\]", r"]", Some((0, 1))); -mat!(match_basic_12, r"]", r"]", Some((0, 1))); -mat!(match_basic_15, r"^a", r"ax", Some((0, 1))); -mat!(match_basic_16, r"\^a", r"a^a", Some((1, 3))); -mat!(match_basic_17, r"a\^", r"a^", Some((0, 2))); -mat!(match_basic_18, r"a$", r"aa", Some((1, 2))); -mat!(match_basic_19, r"a\$", r"a$", Some((0, 2))); -mat!(match_basic_20, r"^$", r"", Some((0, 0))); -mat!(match_basic_21, r"$^", r"", Some((0, 0))); -mat!(match_basic_22, r"a($)", r"aa", Some((1, 2)), Some((2, 2))); -mat!(match_basic_23, r"a*(^a)", r"aa", Some((0, 1)), Some((0, 1))); -mat!(match_basic_24, r"(..)*(...)*", r"a", Some((0, 0))); -mat!(match_basic_25, r"(..)*(...)*", r"abcd", Some((0, 4)), Some((2, 4))); -mat!( - match_basic_26, - r"(ab|a)(bc|c)", - r"abc", - Some((0, 3)), - Some((0, 2)), - Some((2, 3)) -); -mat!(match_basic_27, r"(ab)c|abc", r"abc", Some((0, 3)), Some((0, 2))); -mat!(match_basic_28, r"a{0}b", r"ab", Some((1, 2))); -mat!( - match_basic_29, - r"(a*)(b?)(b+)b{3}", - r"aaabbbbbbb", - Some((0, 10)), - Some((0, 3)), - Some((3, 4)), - Some((4, 7)) -); -mat!( - match_basic_30, - r"(a*)(b{0,1})(b{1,})b{3}", - r"aaabbbbbbb", - Some((0, 10)), - Some((0, 3)), - Some((3, 4)), - Some((4, 7)) -); -mat!( - match_basic_32, - r"((a|a)|a)", - r"a", - Some((0, 1)), - Some((0, 1)), - Some((0, 1)) -); -mat!( - match_basic_33, - r"(a*)(a|aa)", - r"aaaa", - Some((0, 4)), - Some((0, 3)), - Some((3, 4)) -); -mat!(match_basic_34, r"a*(a.|aa)", r"aaaa", Some((0, 4)), Some((2, 4))); -mat!( - match_basic_35, - r"a(b)|c(d)|a(e)f", - r"aef", - Some((0, 3)), - None, - None, - Some((1, 2)) -); -mat!(match_basic_36, r"(a|b)?.*", r"b", Some((0, 1)), Some((0, 1))); -mat!(match_basic_37, r"(a|b)c|a(b|c)", r"ac", Some((0, 2)), Some((0, 1))); -mat!( - match_basic_38, - r"(a|b)c|a(b|c)", - r"ab", - Some((0, 2)), - None, - Some((1, 2)) -); -mat!(match_basic_39, r"(a|b)*c|(a|ab)*c", r"abc", Some((0, 3)), Some((1, 2))); -mat!(match_basic_40, r"(a|b)*c|(a|ab)*c", r"xc", Some((1, 2))); -mat!( - match_basic_41, - r"(.a|.b).*|.*(.a|.b)", - r"xa", - Some((0, 2)), - Some((0, 2)) -); -mat!(match_basic_42, r"a?(ab|ba)ab", r"abab", Some((0, 4)), Some((0, 2))); -mat!(match_basic_43, r"a?(ac{0}b|ba)ab", r"abab", Some((0, 4)), Some((0, 2))); -mat!(match_basic_44, r"ab|abab", r"abbabab", Some((0, 2))); -mat!(match_basic_45, r"aba|bab|bba", r"baaabbbaba", Some((5, 8))); -mat!(match_basic_46, r"aba|bab", r"baaabbbaba", Some((6, 9))); -mat!( - match_basic_47, - r"(aa|aaa)*|(a|aaaaa)", - r"aa", - Some((0, 2)), - Some((0, 2)) -); -mat!( - match_basic_48, - r"(a.|.a.)*|(a|.a...)", - r"aa", - Some((0, 2)), - Some((0, 2)) -); -mat!(match_basic_49, r"ab|a", r"xabc", Some((1, 3))); -mat!(match_basic_50, r"ab|a", r"xxabc", Some((2, 4))); -mat!( - match_basic_51, - r"(?i)(?-u)(Ab|cD)*", - r"aBcD", - Some((0, 4)), - Some((2, 4)) -); -mat!(match_basic_52, r"[^-]", r"--a", Some((2, 3))); -mat!(match_basic_53, r"[a-]*", r"--a", Some((0, 3))); -mat!(match_basic_54, r"[a-m-]*", r"--amoma--", Some((0, 4))); -mat!( - match_basic_55, - r":::1:::0:|:::1:1:0:", - r":::0:::1:::1:::0:", - Some((8, 17)) -); -mat!( - match_basic_56, - r":::1:::0:|:::1:1:1:", - r":::0:::1:::1:::0:", - Some((8, 17)) -); -mat!(match_basic_57, r"[[:upper:]]", r"A", Some((0, 1))); -mat!(match_basic_58, r"[[:lower:]]+", r"`az{", Some((1, 3))); -mat!(match_basic_59, r"[[:upper:]]+", r"@AZ[", Some((1, 3))); -mat!( - match_basic_65, - r" -", - r" -", - Some((0, 1)) -); -mat!( - match_basic_66, - r" -", - r" -", - Some((0, 1)) -); -mat!( - match_basic_67, - r"[^a]", - r" -", - Some((0, 1)) -); -mat!( - match_basic_68, - r" -a", - r" -a", - Some((0, 2)) -); -mat!( - match_basic_69, - r"(a)(b)(c)", - r"abc", - Some((0, 3)), - Some((0, 1)), - Some((1, 2)), - Some((2, 3)) -); -mat!(match_basic_70, r"xxx", r"xxx", Some((0, 3))); -mat!( - match_basic_71, - r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", - r"feb 6,", - Some((0, 6)) -); -mat!( - match_basic_72, - r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", - r"2/7", - Some((0, 3)) -); -mat!( - match_basic_73, - r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", - r"feb 1,Feb 6", - Some((5, 11)) -); -mat!( - match_basic_74, - r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))", - r"x", - Some((0, 1)), - Some((0, 1)), - Some((0, 1)) -); -mat!( - match_basic_75, - r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*", - r"xx", - Some((0, 2)), - Some((1, 2)), - Some((1, 2)) -); -mat!( - match_basic_76, - r"a?(ab|ba)*", - r"ababababababababababababababababababababababababababababababababababababababababa", - Some((0, 81)), - Some((79, 81)) -); -mat!( - match_basic_77, - r"abaa|abbaa|abbbaa|abbbbaa", - r"ababbabbbabbbabbbbabbbbaa", - Some((18, 25)) -); -mat!( - match_basic_78, - r"abaa|abbaa|abbbaa|abbbbaa", - r"ababbabbbabbbabbbbabaa", - Some((18, 22)) -); -mat!( - match_basic_79, - r"aaac|aabc|abac|abbc|baac|babc|bbac|bbbc", - r"baaabbbabac", - Some((7, 11)) -); -mat!(match_basic_80, r".*", r"", Some((0, 2))); -mat!( - match_basic_81, - r"aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll", - r"XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa", - Some((53, 57)) -); -mat!(match_basic_83, r"a*a*a*a*a*b", r"aaaaaaaaab", Some((0, 10))); -mat!(match_basic_84, r"^", r"", Some((0, 0))); -mat!(match_basic_85, r"$", r"", Some((0, 0))); -mat!(match_basic_86, r"^$", r"", Some((0, 0))); -mat!(match_basic_87, r"^a$", r"a", Some((0, 1))); -mat!(match_basic_88, r"abc", r"abc", Some((0, 3))); -mat!(match_basic_89, r"abc", r"xabcy", Some((1, 4))); -mat!(match_basic_90, r"abc", r"ababc", Some((2, 5))); -mat!(match_basic_91, r"ab*c", r"abc", Some((0, 3))); -mat!(match_basic_92, r"ab*bc", r"abc", Some((0, 3))); -mat!(match_basic_93, r"ab*bc", r"abbc", Some((0, 4))); -mat!(match_basic_94, r"ab*bc", r"abbbbc", Some((0, 6))); -mat!(match_basic_95, r"ab+bc", r"abbc", Some((0, 4))); -mat!(match_basic_96, r"ab+bc", r"abbbbc", Some((0, 6))); -mat!(match_basic_97, r"ab?bc", r"abbc", Some((0, 4))); -mat!(match_basic_98, r"ab?bc", r"abc", Some((0, 3))); -mat!(match_basic_99, r"ab?c", r"abc", Some((0, 3))); -mat!(match_basic_100, r"^abc$", r"abc", Some((0, 3))); -mat!(match_basic_101, r"^abc", r"abcc", Some((0, 3))); -mat!(match_basic_102, r"abc$", r"aabc", Some((1, 4))); -mat!(match_basic_103, r"^", r"abc", Some((0, 0))); -mat!(match_basic_104, r"$", r"abc", Some((3, 3))); -mat!(match_basic_105, r"a.c", r"abc", Some((0, 3))); -mat!(match_basic_106, r"a.c", r"axc", Some((0, 3))); -mat!(match_basic_107, r"a.*c", r"axyzc", Some((0, 5))); -mat!(match_basic_108, r"a[bc]d", r"abd", Some((0, 3))); -mat!(match_basic_109, r"a[b-d]e", r"ace", Some((0, 3))); -mat!(match_basic_110, r"a[b-d]", r"aac", Some((1, 3))); -mat!(match_basic_111, r"a[-b]", r"a-", Some((0, 2))); -mat!(match_basic_112, r"a[b-]", r"a-", Some((0, 2))); -mat!(match_basic_113, r"a]", r"a]", Some((0, 2))); -mat!(match_basic_114, r"a[]]b", r"a]b", Some((0, 3))); -mat!(match_basic_115, r"a[^bc]d", r"aed", Some((0, 3))); -mat!(match_basic_116, r"a[^-b]c", r"adc", Some((0, 3))); -mat!(match_basic_117, r"a[^]b]c", r"adc", Some((0, 3))); -mat!(match_basic_118, r"ab|cd", r"abc", Some((0, 2))); -mat!(match_basic_119, r"ab|cd", r"abcd", Some((0, 2))); -mat!(match_basic_120, r"a\(b", r"a(b", Some((0, 3))); -mat!(match_basic_121, r"a\(*b", r"ab", Some((0, 2))); -mat!(match_basic_122, r"a\(*b", r"a((b", Some((0, 4))); -mat!( - match_basic_123, - r"((a))", - r"abc", - Some((0, 1)), - Some((0, 1)), - Some((0, 1)) -); -mat!( - match_basic_124, - r"(a)b(c)", - r"abc", - Some((0, 3)), - Some((0, 1)), - Some((2, 3)) -); -mat!(match_basic_125, r"a+b+c", r"aabbabc", Some((4, 7))); -mat!(match_basic_126, r"a*", r"aaa", Some((0, 3))); -mat!(match_basic_128, r"(a*)*", r"-", Some((0, 0)), None); -mat!(match_basic_129, r"(a*)+", r"-", Some((0, 0)), Some((0, 0))); -mat!(match_basic_131, r"(a*|b)*", r"-", Some((0, 0)), None); -mat!(match_basic_132, r"(a+|b)*", r"ab", Some((0, 2)), Some((1, 2))); -mat!(match_basic_133, r"(a+|b)+", r"ab", Some((0, 2)), Some((1, 2))); -mat!(match_basic_134, r"(a+|b)?", r"ab", Some((0, 1)), Some((0, 1))); -mat!(match_basic_135, r"[^ab]*", r"cde", Some((0, 3))); -mat!(match_basic_137, r"(^)*", r"-", Some((0, 0)), None); -mat!(match_basic_138, r"a*", r"", Some((0, 0))); -mat!(match_basic_139, r"([abc])*d", r"abbbcd", Some((0, 6)), Some((4, 5))); -mat!(match_basic_140, r"([abc])*bcd", r"abcd", Some((0, 4)), Some((0, 1))); -mat!(match_basic_141, r"a|b|c|d|e", r"e", Some((0, 1))); -mat!(match_basic_142, r"(a|b|c|d|e)f", r"ef", Some((0, 2)), Some((0, 1))); -mat!(match_basic_144, r"((a*|b))*", r"-", Some((0, 0)), None, None); -mat!(match_basic_145, r"abcd*efg", r"abcdefg", Some((0, 7))); -mat!(match_basic_146, r"ab*", r"xabyabbbz", Some((1, 3))); -mat!(match_basic_147, r"ab*", r"xayabbbz", Some((1, 2))); -mat!(match_basic_148, r"(ab|cd)e", r"abcde", Some((2, 5)), Some((2, 4))); -mat!(match_basic_149, r"[abhgefdc]ij", r"hij", Some((0, 3))); -mat!(match_basic_150, r"(a|b)c*d", r"abcd", Some((1, 4)), Some((1, 2))); -mat!(match_basic_151, r"(ab|ab*)bc", r"abc", Some((0, 3)), Some((0, 1))); -mat!(match_basic_152, r"a([bc]*)c*", r"abc", Some((0, 3)), Some((1, 3))); -mat!( - match_basic_153, - r"a([bc]*)(c*d)", - r"abcd", - Some((0, 4)), - Some((1, 3)), - Some((3, 4)) -); -mat!( - match_basic_154, - r"a([bc]+)(c*d)", - r"abcd", - Some((0, 4)), - Some((1, 3)), - Some((3, 4)) -); -mat!( - match_basic_155, - r"a([bc]*)(c+d)", - r"abcd", - Some((0, 4)), - Some((1, 2)), - Some((2, 4)) -); -mat!(match_basic_156, r"a[bcd]*dcdcde", r"adcdcde", Some((0, 7))); -mat!(match_basic_157, r"(ab|a)b*c", r"abc", Some((0, 3)), Some((0, 2))); -mat!( - match_basic_158, - r"((a)(b)c)(d)", - r"abcd", - Some((0, 4)), - Some((0, 3)), - Some((0, 1)), - Some((1, 2)), - Some((3, 4)) -); -mat!(match_basic_159, r"[A-Za-z_][A-Za-z0-9_]*", r"alpha", Some((0, 5))); -mat!(match_basic_160, r"^a(bc+|b[eh])g|.h$", r"abh", Some((1, 3))); -mat!( - match_basic_161, - r"(bc+d$|ef*g.|h?i(j|k))", - r"effgz", - Some((0, 5)), - Some((0, 5)) -); -mat!( - match_basic_162, - r"(bc+d$|ef*g.|h?i(j|k))", - r"ij", - Some((0, 2)), - Some((0, 2)), - Some((1, 2)) -); -mat!( - match_basic_163, - r"(bc+d$|ef*g.|h?i(j|k))", - r"reffgz", - Some((1, 6)), - Some((1, 6)) -); -mat!( - match_basic_164, - r"(((((((((a)))))))))", - r"a", - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)) -); -mat!( - match_basic_165, - r"multiple words", - r"multiple words yeah", - Some((0, 14)) -); -mat!( - match_basic_166, - r"(.*)c(.*)", - r"abcde", - Some((0, 5)), - Some((0, 2)), - Some((3, 5)) -); -mat!(match_basic_167, r"abcd", r"abcd", Some((0, 4))); -mat!(match_basic_168, r"a(bc)d", r"abcd", Some((0, 4)), Some((1, 3))); -mat!(match_basic_169, r"a[-]?c", r"ac", Some((0, 3))); -mat!( - match_basic_170, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Qaddafi", - Some((0, 15)), - None, - Some((10, 12)) -); -mat!( - match_basic_171, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Mo'ammar Gadhafi", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_172, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Kaddafi", - Some((0, 15)), - None, - Some((10, 12)) -); -mat!( - match_basic_173, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Qadhafi", - Some((0, 15)), - None, - Some((10, 12)) -); -mat!( - match_basic_174, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Gadafi", - Some((0, 14)), - None, - Some((10, 11)) -); -mat!( - match_basic_175, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Mu'ammar Qadafi", - Some((0, 15)), - None, - Some((11, 12)) -); -mat!( - match_basic_176, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Moamar Gaddafi", - Some((0, 14)), - None, - Some((9, 11)) -); -mat!( - match_basic_177, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Mu'ammar Qadhdhafi", - Some((0, 18)), - None, - Some((13, 15)) -); -mat!( - match_basic_178, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Khaddafi", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_179, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Ghaddafy", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_180, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Ghadafi", - Some((0, 15)), - None, - Some((11, 12)) -); -mat!( - match_basic_181, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Ghaddafi", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_182, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muamar Kaddafi", - Some((0, 14)), - None, - Some((9, 11)) -); -mat!( - match_basic_183, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Quathafi", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_184, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Gheddafi", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_185, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Moammar Khadafy", - Some((0, 15)), - None, - Some((11, 12)) -); -mat!( - match_basic_186, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Moammar Qudhafi", - Some((0, 15)), - None, - Some((10, 12)) -); -mat!(match_basic_187, r"a+(b|c)*d+", r"aabcdd", Some((0, 6)), Some((3, 4))); -mat!(match_basic_188, r"^.+$", r"vivi", Some((0, 4))); -mat!(match_basic_189, r"^(.+)$", r"vivi", Some((0, 4)), Some((0, 4))); -mat!( - match_basic_190, - r"^([^!.]+).att.com!(.+)$", - r"gryphon.att.com!eby", - Some((0, 19)), - Some((0, 7)), - Some((16, 19)) -); -mat!( - match_basic_191, - r"^([^!]+!)?([^!]+)$", - r"bas", - Some((0, 3)), - None, - Some((0, 3)) -); -mat!( - match_basic_192, - r"^([^!]+!)?([^!]+)$", - r"bar!bas", - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!( - match_basic_193, - r"^([^!]+!)?([^!]+)$", - r"foo!bas", - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!( - match_basic_194, - r"^.+!([^!]+!)([^!]+)$", - r"foo!bar!bas", - Some((0, 11)), - Some((4, 8)), - Some((8, 11)) -); -mat!( - match_basic_195, - r"((foo)|(bar))!bas", - r"bar!bas", - Some((0, 7)), - Some((0, 3)), - None, - Some((0, 3)) -); -mat!( - match_basic_196, - r"((foo)|(bar))!bas", - r"foo!bar!bas", - Some((4, 11)), - Some((4, 7)), - None, - Some((4, 7)) -); -mat!( - match_basic_197, - r"((foo)|(bar))!bas", - r"foo!bas", - Some((0, 7)), - Some((0, 3)), - Some((0, 3)) -); -mat!( - match_basic_198, - r"((foo)|bar)!bas", - r"bar!bas", - Some((0, 7)), - Some((0, 3)) -); -mat!( - match_basic_199, - r"((foo)|bar)!bas", - r"foo!bar!bas", - Some((4, 11)), - Some((4, 7)) -); -mat!( - match_basic_200, - r"((foo)|bar)!bas", - r"foo!bas", - Some((0, 7)), - Some((0, 3)), - Some((0, 3)) -); -mat!( - match_basic_201, - r"(foo|(bar))!bas", - r"bar!bas", - Some((0, 7)), - Some((0, 3)), - Some((0, 3)) -); -mat!( - match_basic_202, - r"(foo|(bar))!bas", - r"foo!bar!bas", - Some((4, 11)), - Some((4, 7)), - Some((4, 7)) -); -mat!( - match_basic_203, - r"(foo|(bar))!bas", - r"foo!bas", - Some((0, 7)), - Some((0, 3)) -); -mat!( - match_basic_204, - r"(foo|bar)!bas", - r"bar!bas", - Some((0, 7)), - Some((0, 3)) -); -mat!( - match_basic_205, - r"(foo|bar)!bas", - r"foo!bar!bas", - Some((4, 11)), - Some((4, 7)) -); -mat!( - match_basic_206, - r"(foo|bar)!bas", - r"foo!bas", - Some((0, 7)), - Some((0, 3)) -); -mat!( - match_basic_207, - r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", - r"foo!bar!bas", - Some((0, 11)), - Some((0, 11)), - None, - None, - Some((4, 8)), - Some((8, 11)) -); -mat!( - match_basic_208, - r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", - r"bas", - Some((0, 3)), - None, - Some((0, 3)) -); -mat!( - match_basic_209, - r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", - r"bar!bas", - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!( - match_basic_210, - r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", - r"foo!bar!bas", - Some((0, 11)), - None, - None, - Some((4, 8)), - Some((8, 11)) -); -mat!( - match_basic_211, - r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", - r"foo!bas", - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!( - match_basic_212, - r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", - r"bas", - Some((0, 3)), - Some((0, 3)), - None, - Some((0, 3)) -); -mat!( - match_basic_213, - r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", - r"bar!bas", - Some((0, 7)), - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!( - match_basic_214, - r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", - r"foo!bar!bas", - Some((0, 11)), - Some((0, 11)), - None, - None, - Some((4, 8)), - Some((8, 11)) -); -mat!( - match_basic_215, - r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", - r"foo!bas", - Some((0, 7)), - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!(match_basic_216, r".*(/XXX).*", r"/XXX", Some((0, 4)), Some((0, 4))); -mat!(match_basic_217, r".*(\\XXX).*", r"\XXX", Some((0, 4)), Some((0, 4))); -mat!(match_basic_218, r"\\XXX", r"\XXX", Some((0, 4))); -mat!(match_basic_219, r".*(/000).*", r"/000", Some((0, 4)), Some((0, 4))); -mat!(match_basic_220, r".*(\\000).*", r"\000", Some((0, 4)), Some((0, 4))); -mat!(match_basic_221, r"\\000", r"\000", Some((0, 4))); - -// Tests from nullsubexpr.dat -mat!(match_nullsubexpr_3, r"(a*)*", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_5, r"(a*)*", r"x", Some((0, 0)), None); -mat!(match_nullsubexpr_6, r"(a*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_7, r"(a*)*", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_8, r"(a*)+", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_9, r"(a*)+", r"x", Some((0, 0)), Some((0, 0))); -mat!(match_nullsubexpr_10, r"(a*)+", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_11, r"(a*)+", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_12, r"(a+)*", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_13, r"(a+)*", r"x", Some((0, 0))); -mat!(match_nullsubexpr_14, r"(a+)*", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_15, r"(a+)*", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_16, r"(a+)+", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_17, r"(a+)+", r"x", None); -mat!(match_nullsubexpr_18, r"(a+)+", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_19, r"(a+)+", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_21, r"([a]*)*", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_23, r"([a]*)*", r"x", Some((0, 0)), None); -mat!(match_nullsubexpr_24, r"([a]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_25, r"([a]*)*", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_26, r"([a]*)+", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_27, r"([a]*)+", r"x", Some((0, 0)), Some((0, 0))); -mat!(match_nullsubexpr_28, r"([a]*)+", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_29, r"([a]*)+", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_30, r"([^b]*)*", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_32, r"([^b]*)*", r"b", Some((0, 0)), None); -mat!(match_nullsubexpr_33, r"([^b]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!( - match_nullsubexpr_34, - r"([^b]*)*", - r"aaaaaab", - Some((0, 6)), - Some((0, 6)) -); -mat!(match_nullsubexpr_35, r"([ab]*)*", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_36, r"([ab]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_37, r"([ab]*)*", r"ababab", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_38, r"([ab]*)*", r"bababa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_39, r"([ab]*)*", r"b", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_40, r"([ab]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6))); -mat!( - match_nullsubexpr_41, - r"([ab]*)*", - r"aaaabcde", - Some((0, 5)), - Some((0, 5)) -); -mat!(match_nullsubexpr_42, r"([^a]*)*", r"b", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_43, r"([^a]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_45, r"([^a]*)*", r"aaaaaa", Some((0, 0)), None); -mat!( - match_nullsubexpr_46, - r"([^ab]*)*", - r"ccccxx", - Some((0, 6)), - Some((0, 6)) -); -mat!(match_nullsubexpr_48, r"([^ab]*)*", r"ababab", Some((0, 0)), None); -mat!( - match_nullsubexpr_50, - r"((z)+|a)*", - r"zabcde", - Some((0, 2)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_69, - r"(a*)*(x)", - r"x", - Some((0, 1)), - None, - Some((0, 1)) -); -mat!( - match_nullsubexpr_70, - r"(a*)*(x)", - r"ax", - Some((0, 2)), - Some((0, 1)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_71, - r"(a*)*(x)", - r"axa", - Some((0, 2)), - Some((0, 1)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_73, - r"(a*)+(x)", - r"x", - Some((0, 1)), - Some((0, 0)), - Some((0, 1)) -); -mat!( - match_nullsubexpr_74, - r"(a*)+(x)", - r"ax", - Some((0, 2)), - Some((0, 1)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_75, - r"(a*)+(x)", - r"axa", - Some((0, 2)), - Some((0, 1)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_77, - r"(a*){2}(x)", - r"x", - Some((0, 1)), - Some((0, 0)), - Some((0, 1)) -); -mat!( - match_nullsubexpr_78, - r"(a*){2}(x)", - r"ax", - Some((0, 2)), - Some((1, 1)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_79, - r"(a*){2}(x)", - r"axa", - Some((0, 2)), - Some((1, 1)), - Some((1, 2)) -); - -// Tests from repetition.dat -mat!(match_repetition_10, r"((..)|(.))", r"", None); -mat!(match_repetition_11, r"((..)|(.))((..)|(.))", r"", None); -mat!(match_repetition_12, r"((..)|(.))((..)|(.))((..)|(.))", r"", None); -mat!(match_repetition_14, r"((..)|(.)){1}", r"", None); -mat!(match_repetition_15, r"((..)|(.)){2}", r"", None); -mat!(match_repetition_16, r"((..)|(.)){3}", r"", None); -mat!(match_repetition_18, r"((..)|(.))*", r"", Some((0, 0))); -mat!( - match_repetition_20, - r"((..)|(.))", - r"a", - Some((0, 1)), - Some((0, 1)), - None, - Some((0, 1)) -); -mat!(match_repetition_21, r"((..)|(.))((..)|(.))", r"a", None); -mat!(match_repetition_22, r"((..)|(.))((..)|(.))((..)|(.))", r"a", None); -mat!( - match_repetition_24, - r"((..)|(.)){1}", - r"a", - Some((0, 1)), - Some((0, 1)), - None, - Some((0, 1)) -); -mat!(match_repetition_25, r"((..)|(.)){2}", r"a", None); -mat!(match_repetition_26, r"((..)|(.)){3}", r"a", None); -mat!( - match_repetition_28, - r"((..)|(.))*", - r"a", - Some((0, 1)), - Some((0, 1)), - None, - Some((0, 1)) -); -mat!( - match_repetition_30, - r"((..)|(.))", - r"aa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_31, - r"((..)|(.))((..)|(.))", - r"aa", - Some((0, 2)), - Some((0, 1)), - None, - Some((0, 1)), - Some((1, 2)), - None, - Some((1, 2)) -); -mat!(match_repetition_32, r"((..)|(.))((..)|(.))((..)|(.))", r"aa", None); -mat!( - match_repetition_34, - r"((..)|(.)){1}", - r"aa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_35, - r"((..)|(.)){2}", - r"aa", - Some((0, 2)), - Some((1, 2)), - None, - Some((1, 2)) -); -mat!(match_repetition_36, r"((..)|(.)){3}", r"aa", None); -mat!( - match_repetition_38, - r"((..)|(.))*", - r"aa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_40, - r"((..)|(.))", - r"aaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_41, - r"((..)|(.))((..)|(.))", - r"aaa", - Some((0, 3)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 3)), - None, - Some((2, 3)) -); -mat!( - match_repetition_42, - r"((..)|(.))((..)|(.))((..)|(.))", - r"aaa", - Some((0, 3)), - Some((0, 1)), - None, - Some((0, 1)), - Some((1, 2)), - None, - Some((1, 2)), - Some((2, 3)), - None, - Some((2, 3)) -); -mat!( - match_repetition_44, - r"((..)|(.)){1}", - r"aaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_46, - r"((..)|(.)){2}", - r"aaa", - Some((0, 3)), - Some((2, 3)), - Some((0, 2)), - Some((2, 3)) -); -mat!( - match_repetition_47, - r"((..)|(.)){3}", - r"aaa", - Some((0, 3)), - Some((2, 3)), - None, - Some((2, 3)) -); -mat!( - match_repetition_50, - r"((..)|(.))*", - r"aaa", - Some((0, 3)), - Some((2, 3)), - Some((0, 2)), - Some((2, 3)) -); -mat!( - match_repetition_52, - r"((..)|(.))", - r"aaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_53, - r"((..)|(.))((..)|(.))", - r"aaaa", - Some((0, 4)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_54, - r"((..)|(.))((..)|(.))((..)|(.))", - r"aaaa", - Some((0, 4)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 3)), - None, - Some((2, 3)), - Some((3, 4)), - None, - Some((3, 4)) -); -mat!( - match_repetition_56, - r"((..)|(.)){1}", - r"aaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_57, - r"((..)|(.)){2}", - r"aaaa", - Some((0, 4)), - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_59, - r"((..)|(.)){3}", - r"aaaa", - Some((0, 4)), - Some((3, 4)), - Some((0, 2)), - Some((3, 4)) -); -mat!( - match_repetition_61, - r"((..)|(.))*", - r"aaaa", - Some((0, 4)), - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_63, - r"((..)|(.))", - r"aaaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_64, - r"((..)|(.))((..)|(.))", - r"aaaaa", - Some((0, 4)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_65, - r"((..)|(.))((..)|(.))((..)|(.))", - r"aaaaa", - Some((0, 5)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 4)), - Some((2, 4)), - None, - Some((4, 5)), - None, - Some((4, 5)) -); -mat!( - match_repetition_67, - r"((..)|(.)){1}", - r"aaaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_68, - r"((..)|(.)){2}", - r"aaaaa", - Some((0, 4)), - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_70, - r"((..)|(.)){3}", - r"aaaaa", - Some((0, 5)), - Some((4, 5)), - Some((2, 4)), - Some((4, 5)) -); -mat!( - match_repetition_73, - r"((..)|(.))*", - r"aaaaa", - Some((0, 5)), - Some((4, 5)), - Some((2, 4)), - Some((4, 5)) -); -mat!( - match_repetition_75, - r"((..)|(.))", - r"aaaaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_76, - r"((..)|(.))((..)|(.))", - r"aaaaaa", - Some((0, 4)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_77, - r"((..)|(.))((..)|(.))((..)|(.))", - r"aaaaaa", - Some((0, 6)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 4)), - Some((2, 4)), - None, - Some((4, 6)), - Some((4, 6)), - None -); -mat!( - match_repetition_79, - r"((..)|(.)){1}", - r"aaaaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_80, - r"((..)|(.)){2}", - r"aaaaaa", - Some((0, 4)), - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_81, - r"((..)|(.)){3}", - r"aaaaaa", - Some((0, 6)), - Some((4, 6)), - Some((4, 6)), - None -); -mat!( - match_repetition_83, - r"((..)|(.))*", - r"aaaaaa", - Some((0, 6)), - Some((4, 6)), - Some((4, 6)), - None -); -mat!( - match_repetition_90, - r"X(.?){0,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_91, - r"X(.?){1,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_92, - r"X(.?){2,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_93, - r"X(.?){3,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_94, - r"X(.?){4,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_95, - r"X(.?){5,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_96, - r"X(.?){6,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_97, - r"X(.?){7,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_98, - r"X(.?){8,}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_100, - r"X(.?){0,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_102, - r"X(.?){1,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_104, - r"X(.?){2,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_106, - r"X(.?){3,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_108, - r"X(.?){4,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_110, - r"X(.?){5,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_112, - r"X(.?){6,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_114, - r"X(.?){7,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_115, - r"X(.?){8,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_126, - r"(a|ab|c|bcd){0,}(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_127, - r"(a|ab|c|bcd){1,}(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_128, - r"(a|ab|c|bcd){2,}(d*)", - r"ababcd", - Some((0, 6)), - Some((3, 6)), - Some((6, 6)) -); -mat!( - match_repetition_129, - r"(a|ab|c|bcd){3,}(d*)", - r"ababcd", - Some((0, 6)), - Some((3, 6)), - Some((6, 6)) -); -mat!(match_repetition_130, r"(a|ab|c|bcd){4,}(d*)", r"ababcd", None); -mat!( - match_repetition_131, - r"(a|ab|c|bcd){0,10}(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_132, - r"(a|ab|c|bcd){1,10}(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_133, - r"(a|ab|c|bcd){2,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((3, 6)), - Some((6, 6)) -); -mat!( - match_repetition_134, - r"(a|ab|c|bcd){3,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((3, 6)), - Some((6, 6)) -); -mat!(match_repetition_135, r"(a|ab|c|bcd){4,10}(d*)", r"ababcd", None); -mat!( - match_repetition_136, - r"(a|ab|c|bcd)*(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_137, - r"(a|ab|c|bcd)+(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_143, - r"(ab|a|c|bcd){0,}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_145, - r"(ab|a|c|bcd){1,}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_147, - r"(ab|a|c|bcd){2,}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_149, - r"(ab|a|c|bcd){3,}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!(match_repetition_150, r"(ab|a|c|bcd){4,}(d*)", r"ababcd", None); -mat!( - match_repetition_152, - r"(ab|a|c|bcd){0,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_154, - r"(ab|a|c|bcd){1,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_156, - r"(ab|a|c|bcd){2,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_158, - r"(ab|a|c|bcd){3,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!(match_repetition_159, r"(ab|a|c|bcd){4,10}(d*)", r"ababcd", None); -mat!( - match_repetition_161, - r"(ab|a|c|bcd)*(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_163, - r"(ab|a|c|bcd)+(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); diff --git a/vendor/regex/tests/fuzz/mod.rs b/vendor/regex/tests/fuzz/mod.rs new file mode 100644 index 0000000..88c196a --- /dev/null +++ b/vendor/regex/tests/fuzz/mod.rs @@ -0,0 +1,166 @@ +// This set of tests is different from regression_fuzz in that the tests start +// from the fuzzer data directly. The test essentially duplicates the fuzz +// target. I wonder if there's a better way to set this up... Hmmm. I bet +// `cargo fuzz` has something where it can run a target against crash files and +// verify that they pass. + +// This case found by the fuzzer causes the meta engine to use the "reverse +// inner" literal strategy. That in turn uses a specialized search routine +// for the lazy DFA in order to avoid worst case quadratic behavior. That +// specialized search routine had a bug where it assumed that start state +// specialization was disabled. But this is indeed not the case, since it +// reuses the "general" lazy DFA for the full regex created as part of the core +// strategy, which might very well have start states specialized due to the +// existence of a prefilter. +// +// This is a somewhat weird case because if the core engine has a prefilter, +// then it's usually the case that the "reverse inner" optimization won't be +// pursued in that case. But there are some heuristics that try to detect +// whether a prefilter is "fast" or not. If it's not, then the meta engine will +// attempt the reverse inner optimization. And indeed, that's what happens +// here. So the reverse inner optimization ends up with a lazy DFA that has +// start states specialized. Ideally this wouldn't happen because specializing +// start states without a prefilter inside the DFA can be disastrous for +// performance by causing the DFA to ping-pong in and out of the special state +// handling. In this case, it's probably not a huge deal because the lazy +// DFA is only used for part of the matching where as the work horse is the +// prefilter found by the reverse inner optimization. +// +// We could maybe fix this by refactoring the meta engine to be a little more +// careful. For example, by attempting the optimizations before building the +// core engine. But this is perhaps a little tricky. +#[test] +fn meta_stopat_specialize_start_states() { + let data = include_bytes!( + "testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9", + ); + let _ = run(data); +} + +// Same bug as meta_stopat_specialize_start_states, but minimized by the +// fuzzer. +#[test] +fn meta_stopat_specialize_start_states_min() { + let data = include_bytes!( + "testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9", + ); + let _ = run(data); +} + +// This input generated a pattern with a fail state (e.g., \P{any}, [^\s\S] +// or [a&&b]). But the fail state was in a branch, where a subsequent branch +// should have led to an overall match, but handling of the fail state +// prevented it from doing so. A hand-minimized version of this is '[^\s\S]A|B' +// on the haystack 'B'. That should yield a match of 'B'. +// +// The underlying cause was an issue in how DFA determinization handled fail +// states. The bug didn't impact the PikeVM or the bounded backtracker. +#[test] +fn fail_branch_prevents_match() { + let data = include_bytes!( + "testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04", + ); + let _ = run(data); +} + +// This input generated a pattern that contained a sub-expression like this: +// +// a{0}{50000} +// +// This turned out to provoke quadratic behavior in the NFA compiler. +// Basically, the NFA compiler works in two phases. The first phase builds +// a more complicated-but-simpler-to-construct sequence of NFA states that +// includes unconditional epsilon transitions. As part of converting this +// sequence to the "final" NFA, we remove those unconditional espilon +// transition. The code responsible for doing this follows every chain of +// these transitions and remaps the state IDs. The way we were doing this +// before resulted in re-following every subsequent part of the chain for each +// state in the chain, which ended up being quadratic behavior. We effectively +// memoized this, which fixed the performance bug. +#[test] +fn slow_big_empty_chain() { + let data = include_bytes!( + "testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain2() { + let data = include_bytes!( + "testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain3() { + let data = include_bytes!( + "testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain4() { + let data = include_bytes!( + "testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain5() { + let data = include_bytes!( + "testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c", + ); + let _ = run(data); +} + +// A different case of slow_big_empty_chain. +#[test] +fn slow_big_empty_chain6() { + let data = include_bytes!( + "testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085", + ); + let _ = run(data); +} + +// This fuzz input generated a pattern with a large repetition that would fail +// NFA compilation, but its HIR was small. (HIR doesn't expand repetitions.) +// But, the bounds were high enough that the minimum length calculation +// overflowed. We fixed this by using saturating arithmetic (and also checked +// arithmetic for the maximum length calculation). +// +// Incidentally, this was the only unguarded arithmetic operation performed in +// the HIR smart constructors. And the fuzzer found it. Hah. Nice. +#[test] +fn minimum_len_overflow() { + let data = include_bytes!( + "testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff", + ); + let _ = run(data); +} + +// This is the fuzz target function. We duplicate it here since this is the +// thing we use to interpret the data. It is ultimately what we want to +// succeed. +fn run(data: &[u8]) -> Option<()> { + if data.len() < 2 { + return None; + } + let mut split_at = usize::from(data[0]); + let data = std::str::from_utf8(&data[1..]).ok()?; + // Split data into a regex and haystack to search. + let len = usize::try_from(data.chars().count()).ok()?; + split_at = std::cmp::max(split_at, 1) % len; + let char_index = data.char_indices().nth(split_at)?.0; + let (pattern, input) = data.split_at(char_index); + let re = regex::Regex::new(pattern).ok()?; + re.is_match(input); + Some(()) +} diff --git a/vendor/regex/tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff b/vendor/regex/tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff new file mode 100644 index 0000000..f7ffbc9 Binary files /dev/null and b/vendor/regex/tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff differ diff --git a/vendor/regex/tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9 b/vendor/regex/tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9 new file mode 100644 index 0000000..8674819 Binary files /dev/null and b/vendor/regex/tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9 differ diff --git a/vendor/regex/tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04 b/vendor/regex/tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04 new file mode 100644 index 0000000..152769d Binary files /dev/null and b/vendor/regex/tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04 differ diff --git a/vendor/regex/tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9 b/vendor/regex/tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9 new file mode 100644 index 0000000..69663d5 Binary files /dev/null and b/vendor/regex/tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9 differ diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e b/vendor/regex/tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e new file mode 100644 index 0000000..6c22803 Binary files /dev/null and b/vendor/regex/tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e differ diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c b/vendor/regex/tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c new file mode 100644 index 0000000..0570f32 Binary files /dev/null and b/vendor/regex/tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c differ diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085 b/vendor/regex/tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085 new file mode 100644 index 0000000..182bc7f Binary files /dev/null and b/vendor/regex/tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085 differ diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0 b/vendor/regex/tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0 new file mode 100644 index 0000000..f939c33 Binary files /dev/null and b/vendor/regex/tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0 differ diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a b/vendor/regex/tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a new file mode 100644 index 0000000..a87de23 Binary files /dev/null and b/vendor/regex/tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a differ diff --git a/vendor/regex/tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6 b/vendor/regex/tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6 new file mode 100644 index 0000000..dc33293 Binary files /dev/null and b/vendor/regex/tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6 differ diff --git a/vendor/regex/tests/lib.rs b/vendor/regex/tests/lib.rs new file mode 100644 index 0000000..b3f6942 --- /dev/null +++ b/vendor/regex/tests/lib.rs @@ -0,0 +1,58 @@ +#![cfg_attr(feature = "pattern", feature(pattern))] + +mod fuzz; +mod misc; +mod regression; +mod regression_fuzz; +mod replace; +#[cfg(feature = "pattern")] +mod searcher; +mod suite_bytes; +mod suite_bytes_set; +mod suite_string; +mod suite_string_set; + +const BLACKLIST: &[&str] = &[ + // Nothing to blacklist yet! +]; + +fn suite() -> anyhow::Result<regex_test::RegexTests> { + let _ = env_logger::try_init(); + + let mut tests = regex_test::RegexTests::new(); + macro_rules! load { + ($name:expr) => {{ + const DATA: &[u8] = + include_bytes!(concat!("../testdata/", $name, ".toml")); + tests.load_slice($name, DATA)?; + }}; + } + + load!("anchored"); + load!("bytes"); + load!("crazy"); + load!("crlf"); + load!("earliest"); + load!("empty"); + load!("expensive"); + load!("flags"); + load!("iter"); + load!("leftmost-all"); + load!("line-terminator"); + load!("misc"); + load!("multiline"); + load!("no-unicode"); + load!("overlapping"); + load!("regression"); + load!("set"); + load!("substring"); + load!("unicode"); + load!("utf8"); + load!("word-boundary"); + load!("word-boundary-special"); + load!("fowler/basic"); + load!("fowler/nullsubexpr"); + load!("fowler/repetition"); + + Ok(tests) +} diff --git a/vendor/regex/tests/macros.rs b/vendor/regex/tests/macros.rs deleted file mode 100644 index e70e948..0000000 --- a/vendor/regex/tests/macros.rs +++ /dev/null @@ -1,160 +0,0 @@ -// Convenience macros. - -macro_rules! findall { - ($re:expr, $text:expr) => {{ - $re.find_iter(text!($text)) - .map(|m| (m.start(), m.end())).collect::<Vec<_>>() - }} -} - -// Macros for automatically producing tests. - -macro_rules! ismatch { - ($name:ident, $re:expr, $text:expr, $ismatch:expr) => { - #[test] - fn $name() { - let re = regex!($re); - assert_eq!($ismatch, re.is_match(text!($text))); - } - }; -} - -macro_rules! mat( - ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => ( - #[test] - fn $name() { - let text = text!($text); - let expected: Vec<Option<_>> = vec![$($loc)+]; - let r = regex!($re); - let got: Vec<Option<_>> = match r.captures(text) { - Some(c) => { - assert!(r.is_match(text)); - assert!(r.shortest_match(text).is_some()); - r.capture_names() - .enumerate() - .map(|(i, _)| c.get(i).map(|m| (m.start(), m.end()))) - .collect() - } - None => vec![None], - }; - // The test set sometimes leave out capture groups, so truncate - // actual capture groups to match test set. - let mut sgot = &got[..]; - if sgot.len() > expected.len() { - sgot = &sgot[0..expected.len()] - } - if expected != sgot { - panic!("For RE '{}' against '{:?}', \ - expected '{:?}' but got '{:?}'", - $re, text, expected, sgot); - } - } - ); -); - -macro_rules! matiter( - ($name:ident, $re:expr, $text:expr) => ( - #[test] - fn $name() { - let text = text!($text); - let expected: Vec<(usize, usize)> = vec![]; - let r = regex!($re); - let got: Vec<_> = - r.find_iter(text).map(|m| (m.start(), m.end())).collect(); - if expected != got { - panic!("For RE '{}' against '{:?}', \ - expected '{:?}' but got '{:?}'", - $re, text, expected, got); - } - let captures_got: Vec<_> = - r.captures_iter(text) - .map(|c| c.get(0).unwrap()) - .map(|m| (m.start(), m.end())) - .collect(); - if captures_got != got { - panic!("For RE '{}' against '{:?}', \ - got '{:?}' using find_iter but got '{:?}' \ - using captures_iter", - $re, text, got, captures_got); - } - } - ); - ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => ( - #[test] - fn $name() { - let text = text!($text); - let expected: Vec<_> = vec![$($loc)+]; - let r = regex!($re); - let got: Vec<_> = - r.find_iter(text).map(|m| (m.start(), m.end())).collect(); - if expected != got { - panic!("For RE '{}' against '{:?}', \ - expected '{:?}' but got '{:?}'", - $re, text, expected, got); - } - let captures_got: Vec<_> = - r.captures_iter(text) - .map(|c| c.get(0).unwrap()) - .map(|m| (m.start(), m.end())) - .collect(); - if captures_got != got { - panic!("For RE '{}' against '{:?}', \ - got '{:?}' using find_iter but got '{:?}' \ - using captures_iter", - $re, text, got, captures_got); - } - } - ); -); - -macro_rules! matset { - ($name:ident, $res:expr, $text:expr, $($match_index:expr),*) => { - #[test] - fn $name() { - let text = text!($text); - let set = regex_set!($res); - assert!(set.is_match(text)); - let expected = vec![$($match_index),*]; - let matches = set.matches(text); - assert!(matches.matched_any()); - let got: Vec<_> = matches.into_iter().collect(); - assert_eq!(expected, got); - } - } -} - -macro_rules! nomatset { - ($name:ident, $res:expr, $text:expr) => { - #[test] - fn $name() { - let text = text!($text); - let set = regex_set!($res); - assert!(!set.is_match(text)); - let matches = set.matches(text); - assert!(!matches.matched_any()); - assert_eq!(0, matches.into_iter().count()); - } - } -} - -macro_rules! split { - ($name:ident, $re:expr, $text:expr, $expected:expr) => { - #[test] - fn $name() { - let re = regex!($re); - let splitted: Vec<_> = re.split(t!($text)).collect(); - assert_eq!($expected, &*splitted); - } - } -} - -macro_rules! splitn { - ($name:ident, $re:expr, $text:expr, $limit:expr, $expected:expr) => { - #[test] - fn $name() { - let re = regex!($re); - let splitted: Vec<_> = re.splitn(t!($text), $limit).collect(); - assert_eq!($expected, &*splitted); - } - } -} diff --git a/vendor/regex/tests/macros_bytes.rs b/vendor/regex/tests/macros_bytes.rs deleted file mode 100644 index 3d6c8c3..0000000 --- a/vendor/regex/tests/macros_bytes.rs +++ /dev/null @@ -1,39 +0,0 @@ -// Macros for use in writing tests generic over &str/&[u8]. -macro_rules! text { ($text:expr) => { $text.as_bytes() } } -macro_rules! t { ($re:expr) => { text!($re) } } -macro_rules! match_text { ($text:expr) => { $text.as_bytes() } } -macro_rules! use_ { ($($path: tt)*) => { use regex::bytes::$($path)*; } } -macro_rules! empty_vec { () => { <Vec<&[u8]>>::new() } } -macro_rules! bytes { ($text:expr) => { $text } } - -macro_rules! no_expand { - ($text:expr) => {{ - use regex::bytes::NoExpand; - NoExpand(text!($text)) - }} -} - -macro_rules! show { - ($text:expr) => {{ - use std::ascii::escape_default; - let mut s = vec![]; - for &b in bytes!($text) { - s.extend(escape_default(b)); - } - String::from_utf8(s).unwrap() - }} -} - -macro_rules! expand { - ($name:ident, $re:expr, $text:expr, $expand:expr, $expected:expr) => { - #[test] - fn $name() { - let re = regex!($re); - let cap = re.captures(t!($text)).unwrap(); - - let mut got = vec![]; - cap.expand(t!($expand), &mut got); - assert_eq!(show!(t!($expected)), show!(&*got)); - } - } -} diff --git a/vendor/regex/tests/macros_str.rs b/vendor/regex/tests/macros_str.rs deleted file mode 100644 index 7b7eb11..0000000 --- a/vendor/regex/tests/macros_str.rs +++ /dev/null @@ -1,38 +0,0 @@ -// Macros for use in writing tests generic over &str/&[u8]. -macro_rules! text { ($text:expr) => { $text } } -macro_rules! t { ($text:expr) => { text!($text) } } -macro_rules! match_text { ($text:expr) => { $text.as_str() } } -macro_rules! use_ { ($($path: tt)*) => { use regex::$($path)*; } } -macro_rules! empty_vec { () => { <Vec<&str>>::new() } } -macro_rules! bytes { ($text:expr) => { std::str::from_utf8($text.as_ref()).unwrap() } } - -macro_rules! no_expand { - ($text:expr) => {{ - use regex::NoExpand; - NoExpand(text!($text)) - }} -} - -macro_rules! show { ($text:expr) => { $text } } - -// N.B. The expansion API for &str and &[u8] APIs differs slightly for now, -// but they should be unified in 1.0. Then we can move this macro back into -// tests/api.rs where it is used. ---AG -macro_rules! expand { - ($name:ident, $re:expr, $text:expr, $expand:expr, $expected:expr) => { - #[test] - fn $name() { - let re = regex!($re); - let cap = re.captures(t!($text)).unwrap(); - - let mut got = String::new(); - cap.expand(t!($expand), &mut got); - assert_eq!(show!(t!($expected)), show!(&*got)); - } - } -} - -#[cfg(feature = "pattern")] -macro_rules! searcher_expr { ($e:expr) => ($e) } -#[cfg(not(feature = "pattern"))] -macro_rules! searcher_expr { ($e:expr) => ({}) } diff --git a/vendor/regex/tests/misc.rs b/vendor/regex/tests/misc.rs index 314811e..91e7d28 100644 --- a/vendor/regex/tests/misc.rs +++ b/vendor/regex/tests/misc.rs @@ -1,4 +1,143 @@ -mat!(prefix_literal_match, r"^abc", r"abc", Some((0, 3))); -mat!(prefix_literal_nomatch, r"^abc", r"zabc", None); -mat!(one_literal_edge, r"abc", r"xxxxxab", None); -matiter!(terminates, r"a$", r"a", (0, 1)); +use regex::Regex; + +macro_rules! regex { + ($pattern:expr) => { + regex::Regex::new($pattern).unwrap() + }; +} + +#[test] +fn unclosed_group_error() { + let err = Regex::new(r"(").unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("unclosed group"), "error message: {:?}", msg); +} + +#[test] +fn regex_string() { + assert_eq!(r"[a-zA-Z0-9]+", regex!(r"[a-zA-Z0-9]+").as_str()); + assert_eq!(r"[a-zA-Z0-9]+", &format!("{}", regex!(r"[a-zA-Z0-9]+"))); + assert_eq!( + r#"Regex("[a-zA-Z0-9]+")"#, + &format!("{:?}", regex!(r"[a-zA-Z0-9]+")) + ); +} + +#[test] +fn capture_names() { + let re = regex!(r"(.)(?P<a>.)"); + assert_eq!(3, re.captures_len()); + assert_eq!((3, Some(3)), re.capture_names().size_hint()); + assert_eq!( + vec![None, None, Some("a")], + re.capture_names().collect::<Vec<_>>() + ); +} + +#[test] +fn capture_index() { + let re = regex!(r"^(?P<name>.+)$"); + let cap = re.captures("abc").unwrap(); + assert_eq!(&cap[0], "abc"); + assert_eq!(&cap[1], "abc"); + assert_eq!(&cap["name"], "abc"); +} + +#[test] +#[should_panic] +fn capture_index_panic_usize() { + let re = regex!(r"^(?P<name>.+)$"); + let cap = re.captures("abc").unwrap(); + let _ = cap[2]; +} + +#[test] +#[should_panic] +fn capture_index_panic_name() { + let re = regex!(r"^(?P<name>.+)$"); + let cap = re.captures("abc").unwrap(); + let _ = cap["bad name"]; +} + +#[test] +fn capture_index_lifetime() { + // This is a test of whether the types on `caps["..."]` are general + // enough. If not, this will fail to typecheck. + fn inner(s: &str) -> usize { + let re = regex!(r"(?P<number>[0-9]+)"); + let caps = re.captures(s).unwrap(); + caps["number"].len() + } + assert_eq!(3, inner("123")); +} + +#[test] +fn capture_misc() { + let re = regex!(r"(.)(?P<a>a)?(.)(?P<b>.)"); + let cap = re.captures("abc").unwrap(); + + assert_eq!(5, cap.len()); + + assert_eq!((0, 3), { + let m = cap.get(0).unwrap(); + (m.start(), m.end()) + }); + assert_eq!(None, cap.get(2)); + assert_eq!((2, 3), { + let m = cap.get(4).unwrap(); + (m.start(), m.end()) + }); + + assert_eq!("abc", cap.get(0).unwrap().as_str()); + assert_eq!(None, cap.get(2)); + assert_eq!("c", cap.get(4).unwrap().as_str()); + + assert_eq!(None, cap.name("a")); + assert_eq!("c", cap.name("b").unwrap().as_str()); +} + +#[test] +fn sub_capture_matches() { + let re = regex!(r"([a-z])(([a-z])|([0-9]))"); + let cap = re.captures("a5").unwrap(); + let subs: Vec<_> = cap.iter().collect(); + + assert_eq!(5, subs.len()); + assert!(subs[0].is_some()); + assert!(subs[1].is_some()); + assert!(subs[2].is_some()); + assert!(subs[3].is_none()); + assert!(subs[4].is_some()); + + assert_eq!("a5", subs[0].unwrap().as_str()); + assert_eq!("a", subs[1].unwrap().as_str()); + assert_eq!("5", subs[2].unwrap().as_str()); + assert_eq!("5", subs[4].unwrap().as_str()); +} + +// Test that the DFA can handle pathological cases. (This should result in the +// DFA's cache being flushed too frequently, which should cause it to quit and +// fall back to the NFA algorithm.) +#[test] +fn dfa_handles_pathological_case() { + fn ones_and_zeroes(count: usize) -> String { + let mut s = String::new(); + for i in 0..count { + if i % 3 == 0 { + s.push('1'); + } else { + s.push('0'); + } + } + s + } + + let re = regex!(r"[01]*1[01]{20}$"); + let text = { + let mut pieces = ones_and_zeroes(100_000); + pieces.push('1'); + pieces.push_str(&ones_and_zeroes(20)); + pieces + }; + assert!(re.is_match(&text)); +} diff --git a/vendor/regex/tests/multiline.rs b/vendor/regex/tests/multiline.rs deleted file mode 100644 index 62ee47b..0000000 --- a/vendor/regex/tests/multiline.rs +++ /dev/null @@ -1,144 +0,0 @@ -matiter!( - match_multi_1, - r"(?m)^[a-z]+$", - "abc\ndef\nxyz", - (0, 3), - (4, 7), - (8, 11) -); -matiter!(match_multi_2, r"(?m)^$", "abc\ndef\nxyz"); -matiter!(match_multi_3, r"(?m)^", "abc\ndef\nxyz", (0, 0), (4, 4), (8, 8)); -matiter!(match_multi_4, r"(?m)$", "abc\ndef\nxyz", (3, 3), (7, 7), (11, 11)); -matiter!( - match_multi_5, - r"(?m)^[a-z]", - "abc\ndef\nxyz", - (0, 1), - (4, 5), - (8, 9) -); -matiter!(match_multi_6, r"(?m)[a-z]^", "abc\ndef\nxyz"); -matiter!( - match_multi_7, - r"(?m)[a-z]$", - "abc\ndef\nxyz", - (2, 3), - (6, 7), - (10, 11) -); -matiter!(match_multi_8, r"(?m)$[a-z]", "abc\ndef\nxyz"); -matiter!(match_multi_9, r"(?m)^$", "", (0, 0)); - -matiter!( - match_multi_rep_1, - r"(?m)(?:^$)*", - "a\nb\nc", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4), - (5, 5) -); -matiter!( - match_multi_rep_2, - r"(?m)(?:^|a)+", - "a\naaa\n", - (0, 0), - (2, 2), - (3, 5), - (6, 6) -); -matiter!( - match_multi_rep_3, - r"(?m)(?:^|a)*", - "a\naaa\n", - (0, 1), - (2, 5), - (6, 6) -); -matiter!( - match_multi_rep_4, - r"(?m)(?:^[a-z])+", - "abc\ndef\nxyz", - (0, 1), - (4, 5), - (8, 9) -); -matiter!( - match_multi_rep_5, - r"(?m)(?:^[a-z]{3}\n?)+", - "abc\ndef\nxyz", - (0, 11) -); -matiter!( - match_multi_rep_6, - r"(?m)(?:^[a-z]{3}\n?)*", - "abc\ndef\nxyz", - (0, 11) -); -matiter!( - match_multi_rep_7, - r"(?m)(?:\n?[a-z]{3}$)+", - "abc\ndef\nxyz", - (0, 11) -); -matiter!( - match_multi_rep_8, - r"(?m)(?:\n?[a-z]{3}$)*", - "abc\ndef\nxyz", - (0, 11) -); -matiter!( - match_multi_rep_9, - r"(?m)^*", - "\naa\n", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4) -); -matiter!(match_multi_rep_10, r"(?m)^+", "\naa\n", (0, 0), (1, 1), (4, 4)); -matiter!( - match_multi_rep_11, - r"(?m)$*", - "\naa\n", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4) -); -matiter!(match_multi_rep_12, r"(?m)$+", "\naa\n", (0, 0), (3, 3), (4, 4)); -matiter!(match_multi_rep_13, r"(?m)(?:$\n)+", "\n\naaa\n\n", (0, 2), (5, 7)); -matiter!( - match_multi_rep_14, - r"(?m)(?:$\n)*", - "\n\naaa\n\n", - (0, 2), - (3, 3), - (4, 4), - (5, 7) -); -matiter!(match_multi_rep_15, r"(?m)(?:$\n^)+", "\n\naaa\n\n", (0, 2), (5, 7)); -matiter!( - match_multi_rep_16, - r"(?m)(?:^|$)+", - "\n\naaa\n\n", - (0, 0), - (1, 1), - (2, 2), - (5, 5), - (6, 6), - (7, 7) -); -matiter!( - match_multi_rep_17, - r"(?m)(?:$\n)*", - "\n\naaa\n\n", - (0, 2), - (3, 3), - (4, 4), - (5, 7) -); diff --git a/vendor/regex/tests/noparse.rs b/vendor/regex/tests/noparse.rs deleted file mode 100644 index 8ded1dc..0000000 --- a/vendor/regex/tests/noparse.rs +++ /dev/null @@ -1,45 +0,0 @@ -macro_rules! noparse( - ($name:ident, $re:expr) => ( - #[test] - fn $name() { - let re = $re; - match regex_new!(re) { - Err(_) => {}, - Ok(_) => panic!("Regex '{}' should cause a parse error.", re), - } - } - ); -); - -noparse!(fail_no_repeat_arg, "*"); -noparse!(fail_incomplete_escape, "\\"); -noparse!(fail_class_incomplete, "[A-"); -noparse!(fail_class_not_closed, "[A"); -noparse!(fail_class_no_begin, r"[\A]"); -noparse!(fail_class_no_end, r"[\z]"); -noparse!(fail_class_no_boundary, r"[\b]"); -noparse!(fail_open_paren, "("); -noparse!(fail_close_paren, ")"); -noparse!(fail_invalid_range, "[a-Z]"); -noparse!(fail_empty_capture_name, "(?P<>a)"); -noparse!(fail_bad_capture_name, "(?P<na-me>)"); -noparse!(fail_bad_flag, "(?a)a"); -noparse!(fail_too_big, "a{10000000}"); -noparse!(fail_counted_no_close, "a{1001"); -noparse!(fail_counted_decreasing, "a{2,1}"); -noparse!(fail_counted_nonnegative, "a{-1,1}"); -noparse!(fail_unfinished_cap, "(?"); -noparse!(fail_unfinished_escape, "\\"); -noparse!(fail_octal_digit, r"\8"); -noparse!(fail_hex_digit, r"\xG0"); -noparse!(fail_hex_short, r"\xF"); -noparse!(fail_hex_long_digits, r"\x{fffg}"); -noparse!(fail_flag_bad, "(?a)"); -noparse!(fail_flag_empty, "(?)"); -noparse!(fail_double_neg, "(?-i-i)"); -noparse!(fail_neg_empty, "(?i-)"); -noparse!(fail_dupe_named, "(?P<a>.)(?P<a>.)"); -noparse!(fail_range_end_no_class, "[a-[:lower:]]"); -noparse!(fail_range_end_no_begin, r"[a-\A]"); -noparse!(fail_range_end_no_end, r"[a-\z]"); -noparse!(fail_range_end_no_boundary, r"[a-\b]"); diff --git a/vendor/regex/tests/regression.rs b/vendor/regex/tests/regression.rs index e24fecb..a586701 100644 --- a/vendor/regex/tests/regression.rs +++ b/vendor/regex/tests/regression.rs @@ -1,235 +1,67 @@ +use regex::Regex; + +macro_rules! regex { + ($pattern:expr) => { + regex::Regex::new($pattern).unwrap() + }; +} + // See: https://github.com/rust-lang/regex/issues/48 #[test] fn invalid_regexes_no_crash() { - assert!(regex_new!("(*)").is_err()); - assert!(regex_new!("(?:?)").is_err()); - assert!(regex_new!("(?)").is_err()); - assert!(regex_new!("*").is_err()); + assert!(Regex::new("(*)").is_err()); + assert!(Regex::new("(?:?)").is_err()); + assert!(Regex::new("(?)").is_err()); + assert!(Regex::new("*").is_err()); } // See: https://github.com/rust-lang/regex/issues/98 #[test] fn regression_many_repeat_stack_overflow() { let re = regex!("^.{1,2500}"); - assert_eq!(vec![(0, 1)], findall!(re, "a")); + assert_eq!( + vec![0..1], + re.find_iter("a").map(|m| m.range()).collect::<Vec<_>>() + ); } // See: https://github.com/rust-lang/regex/issues/555 #[test] fn regression_invalid_repetition_expr() { - assert!(regex_new!("(?m){1,1}").is_err()); + assert!(Regex::new("(?m){1,1}").is_err()); } // See: https://github.com/rust-lang/regex/issues/527 #[test] fn regression_invalid_flags_expression() { - assert!(regex_new!("(((?x)))").is_ok()); + assert!(Regex::new("(((?x)))").is_ok()); } -// See: https://github.com/rust-lang/regex/issues/75 -mat!(regression_unsorted_binary_search_1, r"(?i-u)[a_]+", "A_", Some((0, 2))); -mat!(regression_unsorted_binary_search_2, r"(?i-u)[A_]+", "a_", Some((0, 2))); - -// See: https://github.com/rust-lang/regex/issues/99 -#[cfg(feature = "unicode-case")] -mat!(regression_negated_char_class_1, r"(?i)[^x]", "x", None); -#[cfg(feature = "unicode-case")] -mat!(regression_negated_char_class_2, r"(?i)[^x]", "X", None); - -// See: https://github.com/rust-lang/regex/issues/101 -mat!(regression_ascii_word_underscore, r"[[:word:]]", "_", Some((0, 1))); - // See: https://github.com/rust-lang/regex/issues/129 #[test] fn regression_captures_rep() { let re = regex!(r"([a-f]){2}(?P<foo>[x-z])"); - let caps = re.captures(text!("abx")).unwrap(); - assert_eq!(match_text!(caps.name("foo").unwrap()), text!("x")); + let caps = re.captures("abx").unwrap(); + assert_eq!(&caps["foo"], "x"); } -// See: https://github.com/rust-lang/regex/issues/153 -mat!(regression_alt_in_alt1, r"ab?|$", "az", Some((0, 1))); -mat!(regression_alt_in_alt2, r"^(.*?)(\n|\r\n?|$)", "ab\rcd", Some((0, 3))); - -// See: https://github.com/rust-lang/regex/issues/169 -mat!(regression_leftmost_first_prefix, r"z*azb", "azb", Some((0, 3))); - -// See: https://github.com/rust-lang/regex/issues/76 -#[cfg(all(feature = "unicode-case", feature = "unicode-gencat"))] -mat!(uni_case_lower_nocase_flag, r"(?i)\p{Ll}+", "ΛΘΓΔα", Some((0, 10))); - -// See: https://github.com/rust-lang/regex/issues/191 -mat!(many_alternates, r"1|2|3|4|5|6|7|8|9|10|int", "int", Some((0, 3))); - -// burntsushi was bad and didn't create an issue for this bug. -mat!(anchored_prefix1, r"^a[[:^space:]]", "a ", None); -mat!(anchored_prefix2, r"^a[[:^space:]]", "foo boo a ", None); -mat!(anchored_prefix3, r"^-[a-z]", "r-f", None); - -// See: https://github.com/rust-lang/regex/issues/204 -#[cfg(feature = "unicode-perl")] -split!( - split_on_word_boundary, - r"\b", - r"Should this (work?)", - &[ - t!(""), - t!("Should"), - t!(" "), - t!("this"), - t!(" ("), - t!("work"), - t!("?)") - ] -); -#[cfg(feature = "unicode-perl")] -matiter!( - word_boundary_dfa, - r"\b", - "a b c", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4), - (5, 5) -); - -// See: https://github.com/rust-lang/regex/issues/268 -matiter!(partial_anchor, r"^a|b", "ba", (0, 1)); - -// See: https://github.com/rust-lang/regex/issues/280 -ismatch!(partial_anchor_alternate_begin, r"^a|z", "yyyyya", false); -ismatch!(partial_anchor_alternate_end, r"a$|z", "ayyyyy", false); - -// See: https://github.com/rust-lang/regex/issues/289 -mat!(lits_unambiguous1, r"(ABC|CDA|BC)X", "CDAX", Some((0, 4))); - -// See: https://github.com/rust-lang/regex/issues/291 -mat!( - lits_unambiguous2, - r"((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$", - "CIMG2341", - Some((0, 8)), - Some((0, 4)), - None, - Some((0, 4)), - Some((4, 8)) -); - -// See: https://github.com/rust-lang/regex/issues/271 -mat!(endl_or_wb, r"(?m:$)|(?-u:\b)", "\u{6084e}", Some((4, 4))); -mat!(zero_or_end, r"(?i-u:\x00)|$", "\u{e682f}", Some((4, 4))); -mat!(y_or_endl, r"(?i-u:y)|(?m:$)", "\u{b4331}", Some((4, 4))); -#[cfg(feature = "unicode-perl")] -mat!(wb_start_x, r"(?u:\b)^(?-u:X)", "X", Some((0, 1))); - -// See: https://github.com/rust-lang/regex/issues/321 -ismatch!(strange_anchor_non_complete_prefix, r"a^{2}", "", false); -ismatch!(strange_anchor_non_complete_suffix, r"${2}a", "", false); - -// See: https://github.com/BurntSushi/ripgrep/issues/1203 -ismatch!(reverse_suffix1, r"[0-4][0-4][0-4]000", "153.230000", true); -ismatch!(reverse_suffix2, r"[0-9][0-9][0-9]000", "153.230000\n", true); -matiter!(reverse_suffix3, r"[0-9][0-9][0-9]000", "153.230000\n", (4, 10)); - -// See: https://github.com/rust-lang/regex/issues/334 -// See: https://github.com/rust-lang/regex/issues/557 -mat!( - captures_after_dfa_premature_end1, - r"a(b*(X|$))?", - "abcbX", - Some((0, 1)), - None, - None -); -mat!( - captures_after_dfa_premature_end2, - r"a(bc*(X|$))?", - "abcbX", - Some((0, 1)), - None, - None -); -mat!(captures_after_dfa_premature_end3, r"(aa$)?", "aaz", Some((0, 0))); - -// See: https://github.com/rust-lang/regex/issues/437 -ismatch!( - literal_panic, - r"typename type\-parameter\-[0-9]+\-[0-9]+::.+", - "test", - false -); - -// See: https://github.com/rust-lang/regex/issues/533 -ismatch!( - blank_matches_nothing_between_space_and_tab, - r"[[:blank:]]", - "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\ - \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\ - \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}", - false -); - -ismatch!( - inverted_blank_matches_everything_between_space_and_tab, - r"^[[:^blank:]]+$", - "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\ - \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\ - \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}", - true -); - -// Tests that our Aho-Corasick optimization works correctly. It only -// kicks in when we have >32 literals. By "works correctly," we mean that -// leftmost-first match semantics are properly respected. That is, samwise -// should match, not sam. -mat!( - ahocorasick1, - "samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|\ - A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z", - "samwise", - Some((0, 7)) -); - // See: https://github.com/BurntSushi/ripgrep/issues/1247 -#[test] #[cfg(feature = "unicode-perl")] +#[test] fn regression_nfa_stops1() { - let re = ::regex::bytes::Regex::new(r"\bs(?:[ab])").unwrap(); + let re = regex::bytes::Regex::new(r"\bs(?:[ab])").unwrap(); assert_eq!(0, re.find_iter(b"s\xE4").count()); } -// See: https://github.com/rust-lang/regex/issues/640 -#[cfg(feature = "unicode-case")] -matiter!( - flags_are_unset, - r"((?i)foo)|Bar", - "foo Foo bar Bar", - (0, 3), - (4, 7), - (12, 15) -); - -// See: https://github.com/rust-lang/regex/issues/659 -// -// Note that 'Ј' is not 'j', but cyrillic Je -// https://en.wikipedia.org/wiki/Je_(Cyrillic) -ismatch!(empty_group_match, r"()Ј01", "zЈ01", true); -matiter!(empty_group_find, r"()Ј01", "zЈ01", (1, 5)); - -// See: https://github.com/rust-lang/regex/issues/862 -mat!(non_greedy_question_literal, r"ab??", "ab", Some((0, 1))); - // See: https://github.com/rust-lang/regex/issues/981 #[cfg(feature = "unicode")] #[test] fn regression_bad_word_boundary() { - let re = regex_new!(r#"(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))"#).unwrap(); + let re = regex!(r#"(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))"#); let hay = "ubi-Darwin-x86_64.tar.gz"; - assert!(!re.is_match(text!(hay))); + assert!(!re.is_match(hay)); let hay = "ubi-Windows-x86_64.zip"; - assert!(re.is_match(text!(hay))); + assert!(re.is_match(hay)); } // See: https://github.com/rust-lang/regex/issues/982 @@ -237,6 +69,26 @@ fn regression_bad_word_boundary() { #[test] fn regression_unicode_perl_not_enabled() { let pat = r"(\d+\s?(years|year|y))?\s?(\d+\s?(months|month|m))?\s?(\d+\s?(weeks|week|w))?\s?(\d+\s?(days|day|d))?\s?(\d+\s?(hours|hour|h))?"; - let re = regex_new!(pat); - assert!(re.is_ok()); + assert!(Regex::new(pat).is_ok()); +} + +// See: https://github.com/rust-lang/regex/issues/995 +#[test] +fn regression_big_regex_overflow() { + let pat = r" {2147483516}{2147483416}{5}"; + assert!(Regex::new(pat).is_err()); +} + +// See: https://github.com/rust-lang/regex/issues/999 +#[test] +fn regression_complete_literals_suffix_incorrect() { + let needles = vec![ + "aA", "bA", "cA", "dA", "eA", "fA", "gA", "hA", "iA", "jA", "kA", + "lA", "mA", "nA", "oA", "pA", "qA", "rA", "sA", "tA", "uA", "vA", + "wA", "xA", "yA", "zA", + ]; + let pattern = needles.join("|"); + let re = regex!(&pattern); + let hay = "FUBAR"; + assert_eq!(0, re.find_iter(hay).count()); } diff --git a/vendor/regex/tests/regression_fuzz.rs b/vendor/regex/tests/regression_fuzz.rs index 5f49530..f90ad4c 100644 --- a/vendor/regex/tests/regression_fuzz.rs +++ b/vendor/regex/tests/regression_fuzz.rs @@ -2,6 +2,14 @@ // can take quite a long time. Some of them take long enough that it's not // practical to run them in debug mode. :-/ +use regex::Regex; + +macro_rules! regex { + ($pattern:expr) => { + regex::Regex::new($pattern).unwrap() + }; +} + // See: https://oss-fuzz.com/testcase-detail/5673225499181056 // // Ignored by default since it takes too long in debug mode (almost a minute). @@ -14,8 +22,9 @@ fn fuzz1() { // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=26505 // See: https://github.com/rust-lang/regex/issues/722 #[test] +#[cfg(feature = "unicode")] fn empty_any_errors_no_panic() { - assert!(regex_new!(r"\P{any}").is_err()); + assert!(Regex::new(r"\P{any}").is_ok()); } // This tests that a very large regex errors during compilation instead of @@ -27,7 +36,7 @@ fn empty_any_errors_no_panic() { #[test] fn big_regex_fails_to_compile() { let pat = "[\u{0}\u{e}\u{2}\\w~~>[l\t\u{0}]p?<]{971158}"; - assert!(regex_new!(pat).is_err()); + assert!(Regex::new(pat).is_err()); } // This was caught while on master but before a release went out(!). @@ -36,5 +45,17 @@ fn big_regex_fails_to_compile() { #[test] fn todo() { let pat = "(?:z|xx)@|xx"; - assert!(regex_new!(pat).is_ok()); + assert!(Regex::new(pat).is_ok()); +} + +// This was caused by the fuzzer, and then minimized by hand. +// +// This was caused by a bug in DFA determinization that mishandled NFA fail +// states. +#[test] +fn fail_branch_prevents_match() { + let pat = r".*[a&&b]A|B"; + let hay = "B"; + let re = Regex::new(pat).unwrap(); + assert!(re.is_match(hay)); } diff --git a/vendor/regex/tests/replace.rs b/vendor/regex/tests/replace.rs index f23c575..f26ae46 100644 --- a/vendor/regex/tests/replace.rs +++ b/vendor/regex/tests/replace.rs @@ -3,39 +3,32 @@ macro_rules! replace( $search:expr, $replace:expr, $result:expr) => ( #[test] fn $name() { - let re = regex!($re); - assert_eq!(re.$which(text!($search), $replace), text!($result)); + let re = regex::Regex::new($re).unwrap(); + assert_eq!(re.$which($search, $replace), $result); } ); ); -replace!(first, replace, r"[0-9]", "age: 26", t!("Z"), "age: Z6"); -replace!(plus, replace, r"[0-9]+", "age: 26", t!("Z"), "age: Z"); -replace!(all, replace_all, r"[0-9]", "age: 26", t!("Z"), "age: ZZ"); -replace!( - groups, - replace, - r"([^ ]+)[ ]+([^ ]+)", - "w1 w2", - t!("$2 $1"), - "w2 w1" -); +replace!(first, replace, r"[0-9]", "age: 26", "Z", "age: Z6"); +replace!(plus, replace, r"[0-9]+", "age: 26", "Z", "age: Z"); +replace!(all, replace_all, r"[0-9]", "age: 26", "Z", "age: ZZ"); +replace!(groups, replace, r"([^ ]+)[ ]+([^ ]+)", "w1 w2", "$2 $1", "w2 w1"); replace!( double_dollar, replace, r"([^ ]+)[ ]+([^ ]+)", "w1 w2", - t!("$2 $$1"), + "$2 $$1", "w2 $1" ); // replace!(adjacent_index, replace, -// r"([^aeiouy])ies$", "skies", t!("$1y"), "sky"); +// r"([^aeiouy])ies$", "skies", "$1y", "sky"); replace!( named, replace_all, r"(?P<first>[^ ]+)[ ]+(?P<last>[^ ]+)(?P<space>[ ]*)", "w1 w2 w3 w4", - t!("$last $first$space"), + "$last $first$space", "w2 w1 w4 w3" ); replace!( @@ -43,17 +36,17 @@ replace!( replace_all, "^[ \t]+|[ \t]+$", " \t trim me\t \t", - t!(""), + "", "trim me" ); -replace!(number_hypen, replace, r"(.)(.)", "ab", t!("$1-$2"), "a-b"); -// replace!(number_underscore, replace, r"(.)(.)", "ab", t!("$1_$2"), "a_b"); +replace!(number_hyphen, replace, r"(.)(.)", "ab", "$1-$2", "a-b"); +// replace!(number_underscore, replace, r"(.)(.)", "ab", "$1_$2", "a_b"); replace!( simple_expand, replace_all, r"([a-z]) ([a-z])", "a b", - t!("$2 $1"), + "$2 $1", "b a" ); replace!( @@ -61,7 +54,7 @@ replace!( replace_all, r"([a-z]+) ([a-z]+)", "a b", - t!("$$1"), + "$$1", "$1" ); replace!( @@ -69,7 +62,7 @@ replace!( replace_all, r"([a-z]+) ([a-z]+)", "a b", - t!("$2 $$c $1"), + "$2 $$c $1", "b $c a" ); replace!( @@ -77,7 +70,7 @@ replace!( replace, r"([^ ]+)[ ]+([^ ]+)", "w1 w2", - no_expand!("$2 $1"), + regex::NoExpand("$2 $1"), "$2 $1" ); replace!( @@ -85,18 +78,15 @@ replace!( replace, r"([^ ]+)[ ]+([^ ]+)", "w1 w2", - no_expand!("$$1"), + regex::NoExpand("$$1"), "$$1" ); -use_!(Captures); replace!( closure_returning_reference, replace, r"([0-9]+)", "age: 26", - |captures: &Captures<'_>| { - match_text!(captures.get(1).unwrap())[0..1].to_owned() - }, + |captures: ®ex::Captures<'_>| { captures[1][0..1].to_owned() }, "age: 2" ); replace!( @@ -104,7 +94,7 @@ replace!( replace, r"[0-9]+", "age: 26", - |_captures: &Captures<'_>| t!("Z").to_owned(), + |_captures: ®ex::Captures<'_>| "Z".to_owned(), "age: Z" ); @@ -114,12 +104,12 @@ replace!( replace_all, r"foo", "foobar", - t!(""), + "", "bar" ); // See https://github.com/rust-lang/regex/issues/393 -replace!(single_empty_match, replace, r"^", "bar", t!("foo"), "foobar"); +replace!(single_empty_match, replace, r"^", "bar", "foo", "foobar"); // See https://github.com/rust-lang/regex/issues/399 replace!( @@ -127,7 +117,7 @@ replace!( replace_all, r"(.)", "b", - t!("${1}a $1a"), + "${1}a $1a", "ba " ); @@ -136,7 +126,7 @@ replace!( replace, r"[0-9]", "age: 26", - t!("Z".to_string()), + "Z".to_string(), "age: Z6" ); replace!( @@ -144,7 +134,7 @@ replace!( replace, r"[0-9]", "age: 26", - t!(&"Z".to_string()), + &"Z".to_string(), "age: Z6" ); replace!( @@ -152,7 +142,7 @@ replace!( replace, r"[0-9]", "age: 26", - t!(std::borrow::Cow::<'_, str>::Borrowed("Z")), + std::borrow::Cow::<'_, str>::Borrowed("Z"), "age: Z6" ); replace!( @@ -160,7 +150,7 @@ replace!( replace, r"[0-9]", "age: 26", - t!(&std::borrow::Cow::<'_, str>::Borrowed("Z")), + &std::borrow::Cow::<'_, str>::Borrowed("Z"), "age: Z6" ); replace!( @@ -168,7 +158,7 @@ replace!( replace, r"[0-9]", "age: 26", - t!(std::borrow::Cow::<'_, str>::Owned("Z".to_string())), + std::borrow::Cow::<'_, str>::Owned("Z".to_string()), "age: Z6" ); replace!( @@ -176,73 +166,18 @@ replace!( replace, r"[0-9]", "age: 26", - t!(&std::borrow::Cow::<'_, str>::Owned("Z".to_string())), - "age: Z6" -); - -replace!( - impl_vec_u8, - replace, - r"[0-9]", - "age: 26", - bytes!(vec![b'Z']), - "age: Z6" -); -replace!( - impl_vec_u8_ref, - replace, - r"[0-9]", - "age: 26", - bytes!(&vec![b'Z']), - "age: Z6" -); -replace!( - impl_cow_slice_borrowed, - replace, - r"[0-9]", - "age: 26", - bytes!(std::borrow::Cow::<'_, [u8]>::Borrowed(&[b'Z'])), - "age: Z6" -); -replace!( - impl_cow_slice_borrowed_ref, - replace, - r"[0-9]", - "age: 26", - bytes!(&std::borrow::Cow::<'_, [u8]>::Borrowed(&[b'Z'])), - "age: Z6" -); -replace!( - impl_cow_slice_owned, - replace, - r"[0-9]", - "age: 26", - bytes!(std::borrow::Cow::<'_, [u8]>::Owned(vec![b'Z'])), - "age: Z6" -); -replace!( - impl_cow_slice_owned_ref, - replace, - r"[0-9]", - "age: 26", - bytes!(&std::borrow::Cow::<'_, [u8]>::Owned(vec![b'Z'])), + &std::borrow::Cow::<'_, str>::Owned("Z".to_string()), "age: Z6" ); #[test] fn replacen_no_captures() { - let re = regex!(r"[0-9]"); - assert_eq!( - re.replacen(text!("age: 1234"), 2, t!("Z")), - text!("age: ZZ34") - ); + let re = regex::Regex::new(r"[0-9]").unwrap(); + assert_eq!(re.replacen("age: 1234", 2, "Z"), "age: ZZ34"); } #[test] fn replacen_with_captures() { - let re = regex!(r"([0-9])"); - assert_eq!( - re.replacen(text!("age: 1234"), 2, t!("${1}Z")), - text!("age: 1Z2Z34") - ); + let re = regex::Regex::new(r"([0-9])").unwrap(); + assert_eq!(re.replacen("age: 1234", 2, "${1}Z"), "age: 1Z2Z34"); } diff --git a/vendor/regex/tests/searcher.rs b/vendor/regex/tests/searcher.rs index 3779f54..f6dae13 100644 --- a/vendor/regex/tests/searcher.rs +++ b/vendor/regex/tests/searcher.rs @@ -12,20 +12,18 @@ macro_rules! searcher { #[test] #[allow(unused_imports)] fn $name() { - searcher_expr! {{ - use std::str::pattern::{Pattern, Searcher}; - use std::str::pattern::SearchStep::{Match, Reject, Done}; - let re = regex!($re); - let mut se = re.into_searcher($haystack); - let mut got_steps = vec![]; - loop { - match se.next() { - Done => break, - step => { got_steps.push(step); } - } + use std::str::pattern::{Pattern, Searcher}; + use std::str::pattern::SearchStep::{Match, Reject, Done}; + let re = regex::Regex::new($re).unwrap(); + let mut se = re.into_searcher($haystack); + let mut got_steps = vec![]; + loop { + match se.next() { + Done => break, + step => { got_steps.push(step); } } - assert_eq!(got_steps, $expect_steps); - }} + } + assert_eq!(got_steps, $expect_steps); } ); } diff --git a/vendor/regex/tests/set.rs b/vendor/regex/tests/set.rs deleted file mode 100644 index d1144d6..0000000 --- a/vendor/regex/tests/set.rs +++ /dev/null @@ -1,74 +0,0 @@ -matset!(set1, &["a", "a"], "a", 0, 1); -matset!(set2, &["a", "a"], "ba", 0, 1); -matset!(set3, &["a", "b"], "a", 0); -matset!(set4, &["a", "b"], "b", 1); -matset!(set5, &["a|b", "b|a"], "b", 0, 1); -matset!(set6, &["foo", "oo"], "foo", 0, 1); -matset!(set7, &["^foo", "bar$"], "foo", 0); -matset!(set8, &["^foo", "bar$"], "foo bar", 0, 1); -matset!(set9, &["^foo", "bar$"], "bar", 1); -matset!(set10, &[r"[a-z]+$", "foo"], "01234 foo", 0, 1); -matset!(set11, &[r"[a-z]+$", "foo"], "foo 01234", 1); -matset!(set12, &[r".*?", "a"], "zzzzzza", 0, 1); -matset!(set13, &[r".*", "a"], "zzzzzza", 0, 1); -matset!(set14, &[r".*", "a"], "zzzzzz", 0); -matset!(set15, &[r"(?-u)\ba\b"], "hello a bye", 0); -matset!(set16, &["a"], "a", 0); -matset!(set17, &[".*a"], "a", 0); -matset!(set18, &["a", "β"], "β", 1); - -// regexes that match the empty string -matset!(setempty1, &["", "a"], "abc", 0, 1); -matset!(setempty2, &["", "b"], "abc", 0, 1); -matset!(setempty3, &["", "z"], "abc", 0); -matset!(setempty4, &["a", ""], "abc", 0, 1); -matset!(setempty5, &["b", ""], "abc", 0, 1); -matset!(setempty6, &["z", ""], "abc", 1); -matset!(setempty7, &["b", "(?:)"], "abc", 0, 1); -matset!(setempty8, &["(?:)", "b"], "abc", 0, 1); -matset!(setempty9, &["c(?:)", "b"], "abc", 0, 1); - -nomatset!(nset1, &["a", "a"], "b"); -nomatset!(nset2, &["^foo", "bar$"], "bar foo"); -nomatset!( - nset3, - { - let xs: &[&str] = &[]; - xs - }, - "a" -); -nomatset!(nset4, &[r"^rooted$", r"\.log$"], "notrooted"); - -// See: https://github.com/rust-lang/regex/issues/187 -#[test] -fn regression_subsequent_matches() { - let set = regex_set!(&["ab", "b"]); - let text = text!("ba"); - assert!(set.matches(text).matched(1)); - assert!(set.matches(text).matched(1)); -} - -#[test] -fn get_set_patterns() { - let set = regex_set!(&["a", "b"]); - assert_eq!(vec!["a", "b"], set.patterns()); -} - -#[test] -fn len_and_empty() { - let empty = regex_set!(&[""; 0]); - assert_eq!(empty.len(), 0); - assert!(empty.is_empty()); - - let not_empty = regex_set!(&["ab", "b"]); - assert_eq!(not_empty.len(), 2); - assert!(!not_empty.is_empty()); -} - -#[test] -fn default_set_is_empty() { - let set: regex::bytes::RegexSet = Default::default(); - assert_eq!(set.len(), 0); - assert!(set.is_empty()); -} diff --git a/vendor/regex/tests/shortest_match.rs b/vendor/regex/tests/shortest_match.rs deleted file mode 100644 index f8b4fed..0000000 --- a/vendor/regex/tests/shortest_match.rs +++ /dev/null @@ -1,14 +0,0 @@ -macro_rules! shortmat { - ($name:ident, $re:expr, $text:expr, $shortest_match:expr) => { - #[test] - fn $name() { - let text = text!($text); - let re = regex!($re); - assert_eq!($shortest_match, re.shortest_match(text)); - } - }; -} - -shortmat!(t01, r"a+", r"aa", Some(1)); -// Test that the reverse suffix optimization gets it right. -shortmat!(t02, r".*(?:abcd)+", r"abcdabcd", Some(4)); diff --git a/vendor/regex/tests/suffix_reverse.rs b/vendor/regex/tests/suffix_reverse.rs deleted file mode 100644 index 774c9e8..0000000 --- a/vendor/regex/tests/suffix_reverse.rs +++ /dev/null @@ -1,6 +0,0 @@ -mat!(t01, r".*abcd", r"abcd", Some((0, 4))); -mat!(t02, r".*(?:abcd)+", r"abcd", Some((0, 4))); -mat!(t03, r".*(?:abcd)+", r"abcdabcd", Some((0, 8))); -mat!(t04, r".*(?:abcd)+", r"abcdxabcd", Some((0, 9))); -mat!(t05, r".*x(?:abcd)+", r"abcdxabcd", Some((0, 9))); -mat!(t06, r"[^abcd]*x(?:abcd)+", r"abcdxabcd", Some((4, 9))); diff --git a/vendor/regex/tests/suite_bytes.rs b/vendor/regex/tests/suite_bytes.rs new file mode 100644 index 0000000..106d998 --- /dev/null +++ b/vendor/regex/tests/suite_bytes.rs @@ -0,0 +1,108 @@ +use { + anyhow::Result, + regex::bytes::{Regex, RegexBuilder}, + regex_test::{ + CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner, + }, +}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &Regex, test: &RegexTest) -> TestResult { + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(test.haystack())), + "find" => TestResult::matches( + re.find_iter(test.haystack()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: 0, + span: Span { start: m.start(), end: m.end() }, + }), + ), + "captures" => { + let it = re + .captures_iter(test.haystack()) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // We're only testing bytes::Regex here, which supports one pattern only. + let pattern = match test.regexes().len() { + 1 => &test.regexes()[0], + _ => return skip, + }; + // We only test is_match, find_iter and captures_iter. All of those are + // leftmost searches. + if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) { + return skip; + } + // The top-level single-pattern regex API always uses leftmost-first. + if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) { + return skip; + } + // The top-level regex API always runs unanchored searches. ... But we can + // handle tests that are anchored but have only one match. + if test.anchored() && test.match_limit() != Some(1) { + return skip; + } + // We don't support tests with explicit search bounds. We could probably + // support this by using the 'find_at' (and such) APIs. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The bytes::Regex API specifically does not support enabling UTF-8 mode. + // It could I suppose, but currently it does not. That is, it permits + // matches to have offsets that split codepoints. + if test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexBuilder::new(pattern) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} + +/// Convert `Captures` into the test suite's capture values. +fn testify_captures( + caps: ®ex::bytes::Captures<'_>, +) -> regex_test::Captures { + let spans = caps.iter().map(|group| { + group.map(|m| regex_test::Span { start: m.start(), end: m.end() }) + }); + // This unwrap is OK because we assume our 'caps' represents a match, and + // a match always gives a non-zero number of groups with the first group + // being non-None. + regex_test::Captures::new(0, spans).unwrap() +} diff --git a/vendor/regex/tests/suite_bytes_set.rs b/vendor/regex/tests/suite_bytes_set.rs new file mode 100644 index 0000000..899d24c --- /dev/null +++ b/vendor/regex/tests/suite_bytes_set.rs @@ -0,0 +1,71 @@ +use { + anyhow::Result, + regex::bytes::{RegexSet, RegexSetBuilder}, + regex_test::{CompiledRegex, RegexTest, TestResult, TestRunner}, +}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "which"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &RegexSet, test: &RegexTest) -> TestResult { + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(test.haystack())), + "which" => TestResult::which(re.matches(test.haystack()).iter()), + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // The top-level RegexSet API only supports "overlapping" semantics. + if !matches!(test.search_kind(), regex_test::SearchKind::Overlapping) { + return skip; + } + // The top-level RegexSet API only supports "all" semantics. + if !matches!(test.match_kind(), regex_test::MatchKind::All) { + return skip; + } + // The top-level RegexSet API always runs unanchored searches. + if test.anchored() { + return skip; + } + // We don't support tests with explicit search bounds. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The bytes::Regex API specifically does not support enabling UTF-8 mode. + // It could I suppose, but currently it does not. That is, it permits + // matches to have offsets that split codepoints. + if test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexSetBuilder::new(test.regexes()) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} diff --git a/vendor/regex/tests/suite_string.rs b/vendor/regex/tests/suite_string.rs new file mode 100644 index 0000000..1e5bf0b --- /dev/null +++ b/vendor/regex/tests/suite_string.rs @@ -0,0 +1,114 @@ +use { + anyhow::Result, + regex::{Regex, RegexBuilder}, + regex_test::{ + CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner, + }, +}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &Regex, test: &RegexTest) -> TestResult { + let hay = match std::str::from_utf8(test.haystack()) { + Ok(hay) => hay, + Err(err) => { + return TestResult::fail(&format!( + "haystack is not valid UTF-8: {}", + err + )); + } + }; + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(hay)), + "find" => TestResult::matches( + re.find_iter(hay) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: 0, + span: Span { start: m.start(), end: m.end() }, + }), + ), + "captures" => { + let it = re + .captures_iter(hay) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // We're only testing bytes::Regex here, which supports one pattern only. + let pattern = match test.regexes().len() { + 1 => &test.regexes()[0], + _ => return skip, + }; + // We only test is_match, find_iter and captures_iter. All of those are + // leftmost searches. + if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) { + return skip; + } + // The top-level single-pattern regex API always uses leftmost-first. + if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) { + return skip; + } + // The top-level regex API always runs unanchored searches. ... But we can + // handle tests that are anchored but have only one match. + if test.anchored() && test.match_limit() != Some(1) { + return skip; + } + // We don't support tests with explicit search bounds. We could probably + // support this by using the 'find_at' (and such) APIs. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The Regex API specifically does not support disabling UTF-8 mode because + // it can only search &str which is always valid UTF-8. + if !test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexBuilder::new(pattern) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} + +/// Convert `Captures` into the test suite's capture values. +fn testify_captures(caps: ®ex::Captures<'_>) -> regex_test::Captures { + let spans = caps.iter().map(|group| { + group.map(|m| regex_test::Span { start: m.start(), end: m.end() }) + }); + // This unwrap is OK because we assume our 'caps' represents a match, and + // a match always gives a non-zero number of groups with the first group + // being non-None. + regex_test::Captures::new(0, spans).unwrap() +} diff --git a/vendor/regex/tests/suite_string_set.rs b/vendor/regex/tests/suite_string_set.rs new file mode 100644 index 0000000..dffdc70 --- /dev/null +++ b/vendor/regex/tests/suite_string_set.rs @@ -0,0 +1,79 @@ +use { + anyhow::Result, + regex::{RegexSet, RegexSetBuilder}, + regex_test::{CompiledRegex, RegexTest, TestResult, TestRunner}, +}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "which"], |test| test.compiles()) + .blacklist_iter(super::BLACKLIST) + .test_iter(crate::suite()?.iter(), compiler) + .assert(); + Ok(()) +} + +fn run_test(re: &RegexSet, test: &RegexTest) -> TestResult { + let hay = match std::str::from_utf8(test.haystack()) { + Ok(hay) => hay, + Err(err) => { + return TestResult::fail(&format!( + "haystack is not valid UTF-8: {}", + err + )); + } + }; + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(hay)), + "which" => TestResult::which(re.matches(hay).iter()), + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Converts the given regex test to a closure that searches with a +/// `bytes::Regex`. If the test configuration is unsupported, then a +/// `CompiledRegex` that skips the test is returned. +fn compiler( + test: &RegexTest, + _patterns: &[String], +) -> anyhow::Result<CompiledRegex> { + let skip = Ok(CompiledRegex::skip()); + + // The top-level RegexSet API only supports "overlapping" semantics. + if !matches!(test.search_kind(), regex_test::SearchKind::Overlapping) { + return skip; + } + // The top-level RegexSet API only supports "all" semantics. + if !matches!(test.match_kind(), regex_test::MatchKind::All) { + return skip; + } + // The top-level RegexSet API always runs unanchored searches. + if test.anchored() { + return skip; + } + // We don't support tests with explicit search bounds. + let bounds = test.bounds(); + if !(bounds.start == 0 && bounds.end == test.haystack().len()) { + return skip; + } + // The Regex API specifically does not support disabling UTF-8 mode because + // it can only search &str which is always valid UTF-8. + if !test.utf8() { + return skip; + } + // If the test requires Unicode but the Unicode feature isn't enabled, + // skip it. This is a little aggressive, but the test suite doesn't + // have any easy way of communicating which Unicode features are needed. + if test.unicode() && !cfg!(feature = "unicode") { + return skip; + } + let re = RegexSetBuilder::new(test.regexes()) + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .line_terminator(test.line_terminator()) + .build()?; + Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) +} diff --git a/vendor/regex/tests/test_backtrack.rs b/vendor/regex/tests/test_backtrack.rs deleted file mode 100644 index fb934e2..0000000 --- a/vendor/regex/tests/test_backtrack.rs +++ /dev/null @@ -1,56 +0,0 @@ -#![cfg_attr(feature = "pattern", feature(pattern))] - -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re) - .bounded_backtracking() - .build() - .map(|e| e.into_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re) - .bounded_backtracking() - .build() - .map(|e| e.into_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_str.rs"); -include!("macros.rs"); - -mod api; -mod api_str; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod searcher; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/vendor/regex/tests/test_backtrack_bytes.rs b/vendor/regex/tests/test_backtrack_bytes.rs deleted file mode 100644 index a59426c..0000000 --- a/vendor/regex/tests/test_backtrack_bytes.rs +++ /dev/null @@ -1,55 +0,0 @@ -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re) - .bounded_backtracking() - .only_utf8(false) - .build() - .map(|e| e.into_byte_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re) - .bounded_backtracking() - .only_utf8(false) - .build() - .map(|e| e.into_byte_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_bytes.rs"); -include!("macros.rs"); - -mod api; -mod bytes; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_ascii; diff --git a/vendor/regex/tests/test_backtrack_utf8bytes.rs b/vendor/regex/tests/test_backtrack_utf8bytes.rs deleted file mode 100644 index 6d308e9..0000000 --- a/vendor/regex/tests/test_backtrack_utf8bytes.rs +++ /dev/null @@ -1,58 +0,0 @@ -#![cfg_attr(feature = "pattern", feature(pattern))] - -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re) - .bounded_backtracking() - .bytes(true) - .build() - .map(|e| e.into_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re) - .bounded_backtracking() - .bytes(true) - .build() - .map(|e| e.into_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_str.rs"); -include!("macros.rs"); - -mod api; -mod api_str; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod searcher; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/vendor/regex/tests/test_crates_regex.rs b/vendor/regex/tests/test_crates_regex.rs deleted file mode 100644 index a681604..0000000 --- a/vendor/regex/tests/test_crates_regex.rs +++ /dev/null @@ -1,54 +0,0 @@ -/* - * This test is a minimal version of <rofl_0> and <subdiff_0> - * - * Once this bug gets fixed, uncomment rofl_0 and subdiff_0 - * (in `tests/crates_regex.rs`). -#[test] -fn word_boundary_backtracking_default_mismatch() { - use regex::internal::ExecBuilder; - - let backtrack_re = ExecBuilder::new(r"\b") - .bounded_backtracking() - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let default_re = ExecBuilder::new(r"\b") - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let input = "䅅\\u{a0}"; - - let fi1 = backtrack_re.find_iter(input); - let fi2 = default_re.find_iter(input); - for (m1, m2) in fi1.zip(fi2) { - assert_eq!(m1, m2); - } -} -*/ - -mod consistent; - -mod crates_regex { - - macro_rules! consistent { - ($test_name:ident, $regex_src:expr) => { - #[test] - fn $test_name() { - use super::consistent::backends_are_consistent; - - if option_env!("RUST_REGEX_RANDOM_TEST").is_some() { - match backends_are_consistent($regex_src) { - Ok(_) => {} - Err(err) => panic!("{}", err), - } - } - } - }; - } - - include!("crates_regex.rs"); -} diff --git a/vendor/regex/tests/test_default.rs b/vendor/regex/tests/test_default.rs deleted file mode 100644 index 19a319a..0000000 --- a/vendor/regex/tests/test_default.rs +++ /dev/null @@ -1,232 +0,0 @@ -#![cfg_attr(feature = "pattern", feature(pattern))] - -use regex; - -// Due to macro scoping rules, this definition only applies for the modules -// defined below. Effectively, it allows us to use the same tests for both -// native and dynamic regexes. -// -// This is also used to test the various matching engines. This one exercises -// the normal code path which automatically chooses the engine based on the -// regex and the input. Other dynamic tests explicitly set the engine to use. -macro_rules! regex_new { - ($re:expr) => {{ - use regex::Regex; - Regex::new($re) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::RegexSet; - RegexSet::new($re) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_str.rs"); -include!("macros.rs"); - -mod api; -mod api_str; -mod crazy; -mod flags; -mod fowler; -mod misc; -mod multiline; -mod noparse; -mod regression; -mod regression_fuzz; -mod replace; -mod searcher; -mod set; -mod shortest_match; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; - -#[test] -fn disallow_non_utf8() { - assert!(regex::Regex::new(r"(?-u)\xFF").is_err()); - assert!(regex::Regex::new(r"(?-u).").is_err()); - assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err()); - assert!(regex::Regex::new(r"(?-u)☃").is_err()); -} - -#[test] -fn disallow_octal() { - assert!(regex::Regex::new(r"\0").is_err()); -} - -#[test] -fn allow_octal() { - assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok()); -} - -#[test] -fn oibits() { - use regex::bytes; - use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder}; - use std::panic::{RefUnwindSafe, UnwindSafe}; - - fn assert_send<T: Send>() {} - fn assert_sync<T: Sync>() {} - fn assert_unwind_safe<T: UnwindSafe>() {} - fn assert_ref_unwind_safe<T: RefUnwindSafe>() {} - - assert_send::<Regex>(); - assert_sync::<Regex>(); - assert_unwind_safe::<Regex>(); - assert_ref_unwind_safe::<Regex>(); - assert_send::<RegexBuilder>(); - assert_sync::<RegexBuilder>(); - assert_unwind_safe::<RegexBuilder>(); - assert_ref_unwind_safe::<RegexBuilder>(); - - assert_send::<bytes::Regex>(); - assert_sync::<bytes::Regex>(); - assert_unwind_safe::<bytes::Regex>(); - assert_ref_unwind_safe::<bytes::Regex>(); - assert_send::<bytes::RegexBuilder>(); - assert_sync::<bytes::RegexBuilder>(); - assert_unwind_safe::<bytes::RegexBuilder>(); - assert_ref_unwind_safe::<bytes::RegexBuilder>(); - - assert_send::<RegexSet>(); - assert_sync::<RegexSet>(); - assert_unwind_safe::<RegexSet>(); - assert_ref_unwind_safe::<RegexSet>(); - assert_send::<RegexSetBuilder>(); - assert_sync::<RegexSetBuilder>(); - assert_unwind_safe::<RegexSetBuilder>(); - assert_ref_unwind_safe::<RegexSetBuilder>(); - - assert_send::<bytes::RegexSet>(); - assert_sync::<bytes::RegexSet>(); - assert_unwind_safe::<bytes::RegexSet>(); - assert_ref_unwind_safe::<bytes::RegexSet>(); - assert_send::<bytes::RegexSetBuilder>(); - assert_sync::<bytes::RegexSetBuilder>(); - assert_unwind_safe::<bytes::RegexSetBuilder>(); - assert_ref_unwind_safe::<bytes::RegexSetBuilder>(); -} - -// See: https://github.com/rust-lang/regex/issues/568 -#[test] -fn oibits_regression() { - use regex::Regex; - use std::panic; - - let _ = panic::catch_unwind(|| Regex::new("a").unwrap()); -} - -// See: https://github.com/rust-lang/regex/issues/750 -#[test] -#[cfg(target_pointer_width = "64")] -fn regex_is_reasonably_small() { - use std::mem::size_of; - - use regex::bytes; - use regex::{Regex, RegexSet}; - - assert_eq!(16, size_of::<Regex>()); - assert_eq!(16, size_of::<RegexSet>()); - assert_eq!(16, size_of::<bytes::Regex>()); - assert_eq!(16, size_of::<bytes::RegexSet>()); -} - -// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8 -// See: CVE-2022-24713 -// -// We test that our regex compiler will correctly return a "too big" error when -// we try to use a very large repetition on an *empty* sub-expression. -// -// At the time this test was written, the regex compiler does not represent -// empty sub-expressions with any bytecode instructions. In effect, it's an -// "optimization" to leave them out, since they would otherwise correspond -// to an unconditional JUMP in the regex bytecode (i.e., an unconditional -// epsilon transition in the NFA graph). Therefore, an empty sub-expression -// represents an interesting case for the compiler's size limits. Since it -// doesn't actually contribute any additional memory to the compiled regex -// instructions, the size limit machinery never detects it. Instead, it just -// dumbly tries to compile the empty sub-expression N times, where N is the -// repetition size. -// -// When N is very large, this will cause the compiler to essentially spin and -// do nothing for a decently large amount of time. It causes the regex to take -// quite a bit of time to compile, despite the concrete syntax of the regex -// being quite small. -// -// The degree to which this is actually a problem is somewhat of a judgment -// call. Some regexes simply take a long time to compile. But in general, you -// should be able to reasonably control this by setting lower or higher size -// limits on the compiled object size. But this mitigation doesn't work at all -// for this case. -// -// This particular test is somewhat narrow. It merely checks that regex -// compilation will, at some point, return a "too big" error. Before the -// fix landed, this test would eventually fail because the regex would be -// successfully compiled (after enough time elapsed). So while this test -// doesn't check that we exit in a reasonable amount of time, it does at least -// check that we are properly returning an error at some point. -#[test] -fn big_empty_regex_fails() { - use regex::Regex; - - let result = Regex::new("(?:){4294967295}"); - assert!(result.is_err()); -} - -// Below is a "billion laughs" variant of the previous test case. -#[test] -fn big_empty_reps_chain_regex_fails() { - use regex::Regex; - - let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}"); - assert!(result.is_err()); -} - -// Below is another situation where a zero-length sub-expression can be -// introduced. -#[test] -fn big_zero_reps_regex_fails() { - use regex::Regex; - - let result = Regex::new(r"x{0}{4294967295}"); - assert!(result.is_err()); -} - -// Testing another case for completeness. -#[test] -fn empty_alt_regex_fails() { - use regex::Regex; - - let result = Regex::new(r"(?:|){4294967295}"); - assert!(result.is_err()); -} - -// Regression test for: https://github.com/rust-lang/regex/issues/969 -#[test] -fn regression_i969() { - use regex::Regex; - - let re = Regex::new(r"c.*d\z").unwrap(); - assert_eq!(Some(6), re.shortest_match_at("ababcd", 4)); - assert_eq!(Some(6), re.find_at("ababcd", 4).map(|m| m.end())); -} diff --git a/vendor/regex/tests/test_default_bytes.rs b/vendor/regex/tests/test_default_bytes.rs deleted file mode 100644 index f200596..0000000 --- a/vendor/regex/tests/test_default_bytes.rs +++ /dev/null @@ -1,75 +0,0 @@ -macro_rules! regex_new { - ($re:expr) => {{ - use regex::bytes::Regex; - Regex::new($re) - }}; -} - -macro_rules! regex_set_new { - ($res:expr) => {{ - use regex::bytes::RegexSet; - RegexSet::new($res) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_bytes.rs"); -include!("macros.rs"); - -// A silly wrapper to make it possible to write and match raw bytes. -struct R<'a>(&'a [u8]); -impl<'a> R<'a> { - fn as_bytes(&self) -> &'a [u8] { - self.0 - } -} - -// See: https://github.com/rust-lang/regex/issues/321 -// -// These tests are here because they do not have the same behavior in every -// regex engine. -mat!(invalid_utf8_nfa1, r".", R(b"\xD4\xC2\x65\x2B\x0E\xFE"), Some((2, 3))); -mat!(invalid_utf8_nfa2, r"${2}ä", R(b"\xD4\xC2\x65\x2B\x0E\xFE"), None); -mat!( - invalid_utf8_nfa3, - r".", - R(b"\x0A\xDB\x82\x6E\x33\x01\xDD\x33\xCD"), - Some((1, 3)) -); -mat!( - invalid_utf8_nfa4, - r"${2}ä", - R(b"\x0A\xDB\x82\x6E\x33\x01\xDD\x33\xCD"), - None -); - -mod api; -mod bytes; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod set; -mod shortest_match; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/vendor/regex/tests/test_nfa.rs b/vendor/regex/tests/test_nfa.rs deleted file mode 100644 index e5a67d1..0000000 --- a/vendor/regex/tests/test_nfa.rs +++ /dev/null @@ -1,50 +0,0 @@ -#![cfg_attr(feature = "pattern", feature(pattern))] - -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re).nfa().build().map(|e| e.into_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re).nfa().build().map(|e| e.into_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_str.rs"); -include!("macros.rs"); - -mod api; -mod api_str; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod searcher; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/vendor/regex/tests/test_nfa_bytes.rs b/vendor/regex/tests/test_nfa_bytes.rs deleted file mode 100644 index 0a10e03..0000000 --- a/vendor/regex/tests/test_nfa_bytes.rs +++ /dev/null @@ -1,55 +0,0 @@ -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re) - .nfa() - .only_utf8(false) - .build() - .map(|e| e.into_byte_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re) - .nfa() - .only_utf8(false) - .build() - .map(|e| e.into_byte_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_bytes.rs"); -include!("macros.rs"); - -mod api; -mod bytes; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/vendor/regex/tests/test_nfa_utf8bytes.rs b/vendor/regex/tests/test_nfa_utf8bytes.rs deleted file mode 100644 index 36a572b..0000000 --- a/vendor/regex/tests/test_nfa_utf8bytes.rs +++ /dev/null @@ -1,54 +0,0 @@ -#![cfg_attr(feature = "pattern", feature(pattern))] - -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re).nfa().bytes(true).build().map(|e| e.into_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re) - .nfa() - .bytes(true) - .build() - .map(|e| e.into_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_str.rs"); -include!("macros.rs"); - -mod api; -mod api_str; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod searcher; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/vendor/regex/tests/unicode.rs b/vendor/regex/tests/unicode.rs deleted file mode 100644 index d7dbdd3..0000000 --- a/vendor/regex/tests/unicode.rs +++ /dev/null @@ -1,254 +0,0 @@ -mat!(uni_literal, r"☃", "☃", Some((0, 3))); -mat!(uni_literal_plus, r"☃+", "☃", Some((0, 3))); -mat!(uni_literal_casei_plus, r"(?i)☃+", "☃", Some((0, 3))); -mat!(uni_class_plus, r"[☃Ⅰ]+", "☃", Some((0, 3))); -mat!(uni_one, r"\pN", "Ⅰ", Some((0, 3))); -mat!(uni_mixed, r"\pN+", "Ⅰ1Ⅱ2", Some((0, 8))); -mat!(uni_not, r"\PN+", "abⅠ", Some((0, 2))); -mat!(uni_not_class, r"[\PN]+", "abⅠ", Some((0, 2))); -mat!(uni_not_class_neg, r"[^\PN]+", "abⅠ", Some((2, 5))); -mat!(uni_case, r"(?i)Δ", "δ", Some((0, 2))); -mat!(uni_case_upper, r"\p{Lu}+", "ΛΘΓΔα", Some((0, 8))); -mat!(uni_case_upper_nocase_flag, r"(?i)\p{Lu}+", "ΛΘΓΔα", Some((0, 10))); -mat!(uni_case_upper_nocase, r"\p{L}+", "ΛΘΓΔα", Some((0, 10))); -mat!(uni_case_lower, r"\p{Ll}+", "ΛΘΓΔα", Some((8, 10))); - -// Test the Unicode friendliness of Perl character classes. -mat!(uni_perl_w, r"\w+", "dδd", Some((0, 4))); -mat!(uni_perl_w_not, r"\w+", "⥡", None); -mat!(uni_perl_w_neg, r"\W+", "⥡", Some((0, 3))); -mat!(uni_perl_d, r"\d+", "1२३9", Some((0, 8))); -mat!(uni_perl_d_not, r"\d+", "Ⅱ", None); -mat!(uni_perl_d_neg, r"\D+", "Ⅱ", Some((0, 3))); -mat!(uni_perl_s, r"\s+", " ", Some((0, 3))); -mat!(uni_perl_s_not, r"\s+", "☃", None); -mat!(uni_perl_s_neg, r"\S+", "☃", Some((0, 3))); - -// And do the same for word boundaries. -mat!(uni_boundary_none, r"\d\b", "6δ", None); -mat!(uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1))); -mat!(uni_not_boundary_none, r"\d\B", "6δ", Some((0, 1))); -mat!(uni_not_boundary_ogham, r"\d\B", "6 ", None); - -// Test general categories. -// -// We should test more, but there's a lot. Write a script to generate more of -// these tests. -mat!(uni_class_gencat_cased_letter, r"\p{Cased_Letter}", "A", Some((0, 3))); -mat!(uni_class_gencat_cased_letter2, r"\p{gc=LC}", "A", Some((0, 3))); -mat!(uni_class_gencat_cased_letter3, r"\p{LC}", "A", Some((0, 3))); -mat!( - uni_class_gencat_close_punctuation, - r"\p{Close_Punctuation}", - "❯", - Some((0, 3)) -); -mat!( - uni_class_gencat_connector_punctuation, - r"\p{Connector_Punctuation}", - "⁀", - Some((0, 3)) -); -mat!(uni_class_gencat_control, r"\p{Control}", "\u{9f}", Some((0, 2))); -mat!( - uni_class_gencat_currency_symbol, - r"\p{Currency_Symbol}", - "£", - Some((0, 3)) -); -mat!( - uni_class_gencat_dash_punctuation, - r"\p{Dash_Punctuation}", - "〰", - Some((0, 3)) -); -mat!(uni_class_gencat_decimal_numer, r"\p{Decimal_Number}", "𑓙", Some((0, 4))); -mat!( - uni_class_gencat_enclosing_mark, - r"\p{Enclosing_Mark}", - "\u{A672}", - Some((0, 3)) -); -mat!( - uni_class_gencat_final_punctuation, - r"\p{Final_Punctuation}", - "⸡", - Some((0, 3)) -); -mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4))); -// See: https://github.com/rust-lang/regex/issues/719 -mat!(uni_class_gencat_format_abbrev1, r"\p{cf}", "\u{E007F}", Some((0, 4))); -mat!(uni_class_gencat_format_abbrev2, r"\p{gc=cf}", "\u{E007F}", Some((0, 4))); -mat!(uni_class_gencat_format_abbrev3, r"\p{Sc}", "$", Some((0, 1))); -mat!( - uni_class_gencat_initial_punctuation, - r"\p{Initial_Punctuation}", - "⸜", - Some((0, 3)) -); -mat!(uni_class_gencat_letter, r"\p{Letter}", "Έ", Some((0, 2))); -mat!(uni_class_gencat_letter_number, r"\p{Letter_Number}", "ↂ", Some((0, 3))); -mat!( - uni_class_gencat_line_separator, - r"\p{Line_Separator}", - "\u{2028}", - Some((0, 3)) -); -mat!( - uni_class_gencat_lowercase_letter, - r"\p{Lowercase_Letter}", - "ϛ", - Some((0, 2)) -); -mat!(uni_class_gencat_mark, r"\p{Mark}", "\u{E01EF}", Some((0, 4))); -mat!(uni_class_gencat_math, r"\p{Math}", "⋿", Some((0, 3))); -mat!( - uni_class_gencat_modifier_letter, - r"\p{Modifier_Letter}", - "𖭃", - Some((0, 4)) -); -mat!( - uni_class_gencat_modifier_symbol, - r"\p{Modifier_Symbol}", - "🏿", - Some((0, 4)) -); -mat!( - uni_class_gencat_nonspacing_mark, - r"\p{Nonspacing_Mark}", - "\u{1E94A}", - Some((0, 4)) -); -mat!(uni_class_gencat_number, r"\p{Number}", "⓿", Some((0, 3))); -mat!( - uni_class_gencat_open_punctuation, - r"\p{Open_Punctuation}", - "⦅", - Some((0, 3)) -); -mat!(uni_class_gencat_other, r"\p{Other}", "\u{bc9}", Some((0, 3))); -mat!(uni_class_gencat_other_letter, r"\p{Other_Letter}", "ꓷ", Some((0, 3))); -mat!(uni_class_gencat_other_number, r"\p{Other_Number}", "㉏", Some((0, 3))); -mat!( - uni_class_gencat_other_punctuation, - r"\p{Other_Punctuation}", - "𞥞", - Some((0, 4)) -); -mat!(uni_class_gencat_other_symbol, r"\p{Other_Symbol}", "⅌", Some((0, 3))); -mat!( - uni_class_gencat_paragraph_separator, - r"\p{Paragraph_Separator}", - "\u{2029}", - Some((0, 3)) -); -mat!( - uni_class_gencat_private_use, - r"\p{Private_Use}", - "\u{10FFFD}", - Some((0, 4)) -); -mat!(uni_class_gencat_punctuation, r"\p{Punctuation}", "𑁍", Some((0, 4))); -mat!(uni_class_gencat_separator, r"\p{Separator}", "\u{3000}", Some((0, 3))); -mat!( - uni_class_gencat_space_separator, - r"\p{Space_Separator}", - "\u{205F}", - Some((0, 3)) -); -mat!( - uni_class_gencat_spacing_mark, - r"\p{Spacing_Mark}", - "\u{16F7E}", - Some((0, 4)) -); -mat!(uni_class_gencat_symbol, r"\p{Symbol}", "⯈", Some((0, 3))); -mat!( - uni_class_gencat_titlecase_letter, - r"\p{Titlecase_Letter}", - "ῼ", - Some((0, 3)) -); -mat!( - uni_class_gencat_unassigned, - r"\p{Unassigned}", - "\u{10FFFF}", - Some((0, 4)) -); -mat!( - uni_class_gencat_uppercase_letter, - r"\p{Uppercase_Letter}", - "Ꝋ", - Some((0, 3)) -); - -// Test a smattering of properties. -mat!(uni_class_prop_emoji1, r"\p{Emoji}", "\u{23E9}", Some((0, 3))); -mat!(uni_class_prop_emoji2, r"\p{emoji}", "\u{1F21A}", Some((0, 4))); -mat!( - uni_class_prop_picto1, - r"\p{extendedpictographic}", - "\u{1FA6E}", - Some((0, 4)) -); -mat!( - uni_class_prop_picto2, - r"\p{extendedpictographic}", - "\u{1FFFD}", - Some((0, 4)) -); - -// grapheme_cluster_break -mat!( - uni_class_gcb_prepend, - r"\p{grapheme_cluster_break=prepend}", - "\u{11D46}", - Some((0, 4)) -); -mat!( - uni_class_gcb_ri1, - r"\p{gcb=regional_indicator}", - "\u{1F1E6}", - Some((0, 4)) -); -mat!(uni_class_gcb_ri2, r"\p{gcb=ri}", "\u{1F1E7}", Some((0, 4))); -mat!( - uni_class_gcb_ri3, - r"\p{gcb=regionalindicator}", - "\u{1F1FF}", - Some((0, 4)) -); -mat!(uni_class_gcb_lvt, r"\p{gcb=lvt}", "\u{C989}", Some((0, 3))); -mat!(uni_class_gcb_zwj, r"\p{gcb=zwj}", "\u{200D}", Some((0, 3))); - -// word_break -mat!(uni_class_wb1, r"\p{word_break=Hebrew_Letter}", "\u{FB46}", Some((0, 3))); -mat!(uni_class_wb2, r"\p{wb=hebrewletter}", "\u{FB46}", Some((0, 3))); -mat!(uni_class_wb3, r"\p{wb=ExtendNumLet}", "\u{FF3F}", Some((0, 3))); -mat!(uni_class_wb4, r"\p{wb=WSegSpace}", "\u{3000}", Some((0, 3))); -mat!(uni_class_wb5, r"\p{wb=numeric}", "\u{1E950}", Some((0, 4))); - -// sentence_break -mat!(uni_class_sb1, r"\p{sentence_break=Lower}", "\u{0469}", Some((0, 2))); -mat!(uni_class_sb2, r"\p{sb=lower}", "\u{0469}", Some((0, 2))); -mat!(uni_class_sb3, r"\p{sb=Close}", "\u{FF60}", Some((0, 3))); -mat!(uni_class_sb4, r"\p{sb=Close}", "\u{1F677}", Some((0, 4))); -mat!(uni_class_sb5, r"\p{sb=SContinue}", "\u{FF64}", Some((0, 3))); - -// Test 'Vithkuqi' support, which was added in Unicode 14. -// See: https://github.com/rust-lang/regex/issues/877 -mat!( - uni_vithkuqi_literal_upper, - r"(?i)^\u{10570}$", - "\u{10570}", - Some((0, 4)) -); -mat!( - uni_vithkuqi_literal_lower, - r"(?i)^\u{10570}$", - "\u{10597}", - Some((0, 4)) -); -mat!(uni_vithkuqi_word_upper, r"^\w$", "\u{10570}", Some((0, 4))); -mat!(uni_vithkuqi_word_lower, r"^\w$", "\u{10597}", Some((0, 4))); diff --git a/vendor/regex/tests/word_boundary.rs b/vendor/regex/tests/word_boundary.rs deleted file mode 100644 index 7fe97a2..0000000 --- a/vendor/regex/tests/word_boundary.rs +++ /dev/null @@ -1,89 +0,0 @@ -// Many of these are cribbed from RE2's test suite. - -matiter!(wb1, r"\b", ""); -matiter!(wb2, r"\b", "a", (0, 0), (1, 1)); -matiter!(wb3, r"\b", "ab", (0, 0), (2, 2)); -matiter!(wb4, r"^\b", "ab", (0, 0)); -matiter!(wb5, r"\b$", "ab", (2, 2)); -matiter!(wb6, r"^\b$", "ab"); -matiter!(wb7, r"\bbar\b", "nobar bar foo bar", (6, 9), (14, 17)); -matiter!(wb8, r"a\b", "faoa x", (3, 4)); -matiter!(wb9, r"\bbar", "bar x", (0, 3)); -matiter!(wb10, r"\bbar", "foo\nbar x", (4, 7)); -matiter!(wb11, r"bar\b", "foobar", (3, 6)); -matiter!(wb12, r"bar\b", "foobar\nxxx", (3, 6)); -matiter!(wb13, r"(foo|bar|[A-Z])\b", "foo", (0, 3)); -matiter!(wb14, r"(foo|bar|[A-Z])\b", "foo\n", (0, 3)); -matiter!(wb15, r"\b(foo|bar|[A-Z])", "foo", (0, 3)); -matiter!(wb16, r"\b(foo|bar|[A-Z])\b", "X", (0, 1)); -matiter!(wb17, r"\b(foo|bar|[A-Z])\b", "XY"); -matiter!(wb18, r"\b(foo|bar|[A-Z])\b", "bar", (0, 3)); -matiter!(wb19, r"\b(foo|bar|[A-Z])\b", "foo", (0, 3)); -matiter!(wb20, r"\b(foo|bar|[A-Z])\b", "foo\n", (0, 3)); -matiter!(wb21, r"\b(foo|bar|[A-Z])\b", "ffoo bbar N x", (10, 11)); -matiter!(wb22, r"\b(fo|foo)\b", "fo", (0, 2)); -matiter!(wb23, r"\b(fo|foo)\b", "foo", (0, 3)); -matiter!(wb24, r"\b\b", ""); -matiter!(wb25, r"\b\b", "a", (0, 0), (1, 1)); -matiter!(wb26, r"\b$", ""); -matiter!(wb27, r"\b$", "x", (1, 1)); -matiter!(wb28, r"\b$", "y x", (3, 3)); -matiter!(wb29, r"\b.$", "x", (0, 1)); -matiter!(wb30, r"^\b(fo|foo)\b", "fo", (0, 2)); -matiter!(wb31, r"^\b(fo|foo)\b", "foo", (0, 3)); -matiter!(wb32, r"^\b$", ""); -matiter!(wb33, r"^\b$", "x"); -matiter!(wb34, r"^\b.$", "x", (0, 1)); -matiter!(wb35, r"^\b.\b$", "x", (0, 1)); -matiter!(wb36, r"^^^^^\b$$$$$", ""); -matiter!(wb37, r"^^^^^\b.$$$$$", "x", (0, 1)); -matiter!(wb38, r"^^^^^\b$$$$$", "x"); -matiter!(wb39, r"^^^^^\b\b\b.\b\b\b$$$$$", "x", (0, 1)); -matiter!(wb40, r"\b.+\b", "$$abc$$", (2, 5)); -matiter!(wb41, r"\b", "a b c", (0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); - -matiter!(nb1, r"\Bfoo\B", "n foo xfoox that", (7, 10)); -matiter!(nb2, r"a\B", "faoa x", (1, 2)); -matiter!(nb3, r"\Bbar", "bar x"); -matiter!(nb4, r"\Bbar", "foo\nbar x"); -matiter!(nb5, r"bar\B", "foobar"); -matiter!(nb6, r"bar\B", "foobar\nxxx"); -matiter!(nb7, r"(foo|bar|[A-Z])\B", "foox", (0, 3)); -matiter!(nb8, r"(foo|bar|[A-Z])\B", "foo\n"); -matiter!(nb9, r"\B", "", (0, 0)); -matiter!(nb10, r"\B", "x"); -matiter!(nb11, r"\B(foo|bar|[A-Z])", "foo"); -matiter!(nb12, r"\B(foo|bar|[A-Z])\B", "xXy", (1, 2)); -matiter!(nb13, r"\B(foo|bar|[A-Z])\B", "XY"); -matiter!(nb14, r"\B(foo|bar|[A-Z])\B", "XYZ", (1, 2)); -matiter!(nb15, r"\B(foo|bar|[A-Z])\B", "abara", (1, 4)); -matiter!(nb16, r"\B(foo|bar|[A-Z])\B", "xfoo_", (1, 4)); -matiter!(nb17, r"\B(foo|bar|[A-Z])\B", "xfoo\n"); -matiter!(nb18, r"\B(foo|bar|[A-Z])\B", "foo bar vNX", (9, 10)); -matiter!(nb19, r"\B(fo|foo)\B", "xfoo", (1, 3)); -matiter!(nb20, r"\B(foo|fo)\B", "xfooo", (1, 4)); -matiter!(nb21, r"\B\B", "", (0, 0)); -matiter!(nb22, r"\B\B", "x"); -matiter!(nb23, r"\B$", "", (0, 0)); -matiter!(nb24, r"\B$", "x"); -matiter!(nb25, r"\B$", "y x"); -matiter!(nb26, r"\B.$", "x"); -matiter!(nb27, r"^\B(fo|foo)\B", "fo"); -matiter!(nb28, r"^\B(fo|foo)\B", "foo"); -matiter!(nb29, r"^\B", "", (0, 0)); -matiter!(nb30, r"^\B", "x"); -matiter!(nb31, r"^\B\B", "", (0, 0)); -matiter!(nb32, r"^\B\B", "x"); -matiter!(nb33, r"^\B$", "", (0, 0)); -matiter!(nb34, r"^\B$", "x"); -matiter!(nb35, r"^\B.$", "x"); -matiter!(nb36, r"^\B.\B$", "x"); -matiter!(nb37, r"^^^^^\B$$$$$", "", (0, 0)); -matiter!(nb38, r"^^^^^\B.$$$$$", "x"); -matiter!(nb39, r"^^^^^\B$$$$$", "x"); - -// These work for both Unicode and ASCII because all matches are reported as -// byte offsets, and « and » do not correspond to word boundaries at either -// the character or byte level. -matiter!(unicode1, r"\bx\b", "«x", (2, 3)); -matiter!(unicode2, r"\bx\b", "x»", (0, 1)); diff --git a/vendor/regex/tests/word_boundary_ascii.rs b/vendor/regex/tests/word_boundary_ascii.rs deleted file mode 100644 index 5a3cf11..0000000 --- a/vendor/regex/tests/word_boundary_ascii.rs +++ /dev/null @@ -1,9 +0,0 @@ -// ASCII word boundaries are completely oblivious to Unicode characters. -// For Unicode word boundaries, the tests are precisely inverted. -matiter!(ascii1, r"(?-u:\b)x(?-u:\b)", "áxβ", (2, 3)); -matiter!(ascii2, r"(?-u:\B)x(?-u:\B)", "áxβ"); -matiter!(ascii3, r"(?-u:\B)", "0\u{7EF5E}", (2, 2), (3, 3), (4, 4), (5, 5)); - -// We still get Unicode word boundaries by default in byte regexes. -matiter!(unicode1, r"\bx\b", "áxβ"); -matiter!(unicode2, r"\Bx\B", "áxβ", (2, 3)); diff --git a/vendor/regex/tests/word_boundary_unicode.rs b/vendor/regex/tests/word_boundary_unicode.rs deleted file mode 100644 index c41355f..0000000 --- a/vendor/regex/tests/word_boundary_unicode.rs +++ /dev/null @@ -1,6 +0,0 @@ -// Unicode word boundaries know about Unicode characters. -// For ASCII word boundaries, the tests are precisely inverted. -matiter!(unicode1, r"\bx\b", "áxβ"); -matiter!(unicode2, r"\Bx\B", "áxβ", (2, 3)); - -matiter!(ascii1, r"(?-u:\b)x(?-u:\b)", "áxβ", (2, 3)); diff --git a/vendor/ryu/.cargo-checksum.json b/vendor/ryu/.cargo-checksum.json index 509b8aa..f924e7b 100644 --- a/vendor/ryu/.cargo-checksum.json +++ b/vendor/ryu/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.lock":"fc4e22fdfa86a5c14efe88ee7dc00f69b7d791bd95c6d3fe5282ffcb681637fc","Cargo.toml":"00141bba1bf52c7297e9e55feccca374ae2f8f11f3242cf0ef88e016da553fd6","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-BOOST":"c9bff75738922193e67fa726fa225535870d2aa1059f91452c411736284ad566","README.md":"df6a7a024b604ad98dd7603ad261150ef73a94a9de691bd5d2510e12a200021a","benches/bench.rs":"703521c8cb9c6959ee305776a9971d24754b6fff5c1737741be04f956a3692e8","examples/upstream_benchmark.rs":"f702d3598a8fac59134a8058ebf74ba90163b1f23ebbd6c5978a7bd8a888d357","src/buffer/mod.rs":"c5adf9aa037271916e78c61c9fd98e3230a0fed1fca15694d4d57166fa697125","src/common.rs":"cae347e97fc30c50a964f80425e8c3e69ece2b8ab81f9b81b9baa7fcec64a001","src/d2s.rs":"83f821f17fd8d2cf72bcc47cc8c603ab24f2377db6cd0f08638031716f8dc17c","src/d2s_full_table.rs":"9b0186acbc6d65dc55c17e16125be707a2bfb920d22b35d33234b4cc38566a36","src/d2s_intrinsics.rs":"658d00a64ce2aca7f0780a1acc5939167e4a66d836b51c46de1047820992fec1","src/d2s_small_table.rs":"7b25cfbf0793d0662d83f5d92a9f880295652db9979b5acf702b313359996508","src/digit_table.rs":"02351ca54cb8cb3679f635115dd094f32fd91750e9f66103c1ee9ec3db507072","src/f2s.rs":"55320c2301680d8be3a908620cccd9d103b0cd3ad7a7d3378589e274ffc2587b","src/f2s_intrinsics.rs":"97bab98093838e30c60f5135f54f5ccb039ff7d9f35553ac8e74437743ca47e2","src/lib.rs":"e30bfef861273c9699638e4402509590c278be796f2b0891834c94e715410c5b","src/parse.rs":"7f8aa7e007caf5dcb03abdc4238157724bb742d0823a3b8a01646fa1f1129154","src/pretty/exponent.rs":"6c9aa1c707c567ae338647056e37557a94e5120781ee9f6f64e9c7071ffb50d0","src/pretty/mantissa.rs":"5e8d0a6bfdfd04e599a9fc8aefd638e3288651279e870e7cd44820717c3b6438","src/pretty/mod.rs":"731798246d414ca54df739c212f1cb8e05991a0472a7a1c28771e24d7a1cf09b","src/s2d.rs":"2f572603eedaa9efbe864105999a1ceac8aa4ff4e1d2fbd96127692460194d16","src/s2f.rs":"6ae7430fba61f59aa6010d446f5c1043974b6fadb8e4c75ce2ad56f73ee48f4a","tests/common_test.rs":"275184cf366f80c11e5f33c2d53065a073e20d81bf71ca70478c89e47fb8da36","tests/d2s_table_test.rs":"54b3a7d40aa9bec03e9dc555d15fb4512ee16a16398b3098a97819fab50c81f3","tests/d2s_test.rs":"39014777edd6e3231095186174c4ef341fd9c12ecc5510765761713b6cac3bb4","tests/exhaustive.rs":"f475ed9008a2cd86ce95abb577a4b01e9fed23fc16f7e217ccffb3b834005fa0","tests/f2s_test.rs":"10940f005e73a42bb106ff498e7a6cc4665d04d82829fef8dc7d0eb36f574e6f","tests/macros/mod.rs":"8e90a674b3960f9516cb38f4eea0e0981ff902c3b33572ebdb6c5528d3ffa72c","tests/s2d_test.rs":"75c3a1044881718db65e05f25c9f6e1d005392dddb2e8dafb799668bb6a9a5c3","tests/s2f_test.rs":"1ec06646cb65229bfe866ec913901a0d8d736668f30b812fc4b00136a43f5142"},"package":"f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"} \ No newline at end of file +{"files":{"Cargo.lock":"3b77e9629ab0087ab9592d75fb5dd6f80c43f9b519c89522999bac2bb6d1fc9a","Cargo.toml":"ec13c54a057c6910f5e1d41bdce15f363fe67e45e0022dbd4f96c7660ff86f57","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-BOOST":"c9bff75738922193e67fa726fa225535870d2aa1059f91452c411736284ad566","README.md":"86f0a92cf076f4983f99926607ea272c9650a5996fa3921fc5ca5abceb0f18db","benches/bench.rs":"703521c8cb9c6959ee305776a9971d24754b6fff5c1737741be04f956a3692e8","examples/upstream_benchmark.rs":"f702d3598a8fac59134a8058ebf74ba90163b1f23ebbd6c5978a7bd8a888d357","src/buffer/mod.rs":"e32f3fa7e994ff704796e58e115c5258e94a79a184d1608864772f2f2f5274fc","src/common.rs":"cae347e97fc30c50a964f80425e8c3e69ece2b8ab81f9b81b9baa7fcec64a001","src/d2s.rs":"f2612785ebe510c935b979dc5f66f6b8c818ca8a4cf0364ce1fe1d41fea39592","src/d2s_full_table.rs":"9b0186acbc6d65dc55c17e16125be707a2bfb920d22b35d33234b4cc38566a36","src/d2s_intrinsics.rs":"bbf15472f4299942312e80a992cbc2f47f85f17ed193f24084534434dbfb26e7","src/d2s_small_table.rs":"db3bbe4002d816785b0ee233c330f19fa7002f31dab47dc6f67b266996fe3ae4","src/digit_table.rs":"02351ca54cb8cb3679f635115dd094f32fd91750e9f66103c1ee9ec3db507072","src/f2s.rs":"cb96f61d8c6c6c941803a7b629f2bf835e1a20ad9d3e5d3454a30ed3391c3515","src/f2s_intrinsics.rs":"97bab98093838e30c60f5135f54f5ccb039ff7d9f35553ac8e74437743ca47e2","src/lib.rs":"ebc4c922a1c421ce54beb485fd01726e68f5a27213ea369ebdf2b536398c0737","src/parse.rs":"7f8aa7e007caf5dcb03abdc4238157724bb742d0823a3b8a01646fa1f1129154","src/pretty/exponent.rs":"fa914ec63b3f86cbdaf7933d7c44e1bc1f93c1239a29a5f86934680a7e957570","src/pretty/mantissa.rs":"40cb00efe1c3fab559ab58389bd519d556548aa18fb261a90dd48138911d039b","src/pretty/mod.rs":"eb0a8c78019f55a1767943821340e8b1278455e0d88bb4f63f4bd3dde340e387","src/s2d.rs":"c804518a771654e3786bde2b776c56e94e198ce6d3fe1e4e5e2f2a9cb9e607e3","src/s2f.rs":"11d528931ce1a01a93f39efb3fe99fdc3041b41fefafb2efd6a338d2a12b628c","tests/common_test.rs":"599781a637d9b9756858aabfe5c38a0734a550debd3d94774f33792b7b3c8240","tests/d2s_intrinsics_test.rs":"15d11b70810bf04f33f8b185bf7f010a436a4edb47fa4648b1a036568c2c5d15","tests/d2s_table_test.rs":"819c39cc94e3462138d3be337d06e7334de126642d34bf1394e03d2df9c0c90c","tests/d2s_test.rs":"d72aaf37c76a4042ecc12b7d6faf844696016bb72bb20d142ecab3bd6c87e29f","tests/exhaustive.rs":"f475ed9008a2cd86ce95abb577a4b01e9fed23fc16f7e217ccffb3b834005fa0","tests/f2s_test.rs":"ad9e6fe46e712c488b876428c144c79bdff0349b41c57eee5506fc3c9c156624","tests/macros/mod.rs":"8e90a674b3960f9516cb38f4eea0e0981ff902c3b33572ebdb6c5528d3ffa72c","tests/s2d_test.rs":"75c3a1044881718db65e05f25c9f6e1d005392dddb2e8dafb799668bb6a9a5c3","tests/s2f_test.rs":"1ec06646cb65229bfe866ec913901a0d8d736668f30b812fc4b00136a43f5142"},"package":"e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1"} \ No newline at end of file diff --git a/vendor/ryu/Cargo.toml b/vendor/ryu/Cargo.toml index 96a1627..ef4316d 100644 --- a/vendor/ryu/Cargo.toml +++ b/vendor/ryu/Cargo.toml @@ -13,7 +13,7 @@ edition = "2018" rust-version = "1.36" name = "ryu" -version = "1.0.13" +version = "1.0.17" authors = ["David Tolnay <dtolnay@gmail.com>"] exclude = [ "performance.png", @@ -26,11 +26,13 @@ keywords = ["float"] categories = [ "value-formatting", "no-std", + "no-std::no-alloc", ] license = "Apache-2.0 OR BSL-1.0" repository = "https://github.com/dtolnay/ryu" [package.metadata.docs.rs] +rustdoc-args = ["--generate-link-to-definition"] targets = ["x86_64-unknown-linux-gnu"] [lib] diff --git a/vendor/ryu/README.md b/vendor/ryu/README.md index 0abd71f..998ea3e 100644 --- a/vendor/ryu/README.md +++ b/vendor/ryu/README.md @@ -19,7 +19,7 @@ C, [https://github.com/ulfjack/ryu][upstream]. uses nothing from the Rust standard library so is usable from no_std crates.* [paper]: https://dl.acm.org/citation.cfm?id=3192369 -[upstream]: https://github.com/ulfjack/ryu/tree/abf76d252bc97300354857e64e80d4a2bf664291 +[upstream]: https://github.com/ulfjack/ryu/tree/77e767f5e056bab96e895072fc21618ecff2f44b ```toml [dependencies] diff --git a/vendor/ryu/src/buffer/mod.rs b/vendor/ryu/src/buffer/mod.rs index 2ccd9b0..905ee2f 100644 --- a/vendor/ryu/src/buffer/mod.rs +++ b/vendor/ryu/src/buffer/mod.rs @@ -83,6 +83,7 @@ impl Copy for Buffer {} impl Clone for Buffer { #[inline] + #[allow(clippy::non_canonical_clone_impl)] // false positive https://github.com/rust-lang/rust-clippy/issues/11072 fn clone(&self) -> Self { Buffer::new() } diff --git a/vendor/ryu/src/d2s.rs b/vendor/ryu/src/d2s.rs index 392577a..76a8164 100644 --- a/vendor/ryu/src/d2s.rs +++ b/vendor/ryu/src/d2s.rs @@ -18,12 +18,14 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. -use crate::common::*; +use crate::common::{log10_pow2, log10_pow5, pow5bits}; #[cfg(not(feature = "small"))] -pub use crate::d2s_full_table::*; -use crate::d2s_intrinsics::*; +pub use crate::d2s_full_table::{DOUBLE_POW5_INV_SPLIT, DOUBLE_POW5_SPLIT}; +use crate::d2s_intrinsics::{ + div10, div100, div5, mul_shift_all_64, multiple_of_power_of_2, multiple_of_power_of_5, +}; #[cfg(feature = "small")] -pub use crate::d2s_small_table::*; +pub use crate::d2s_small_table::{compute_inv_pow5, compute_pow5}; use core::mem::MaybeUninit; pub const DOUBLE_MANTISSA_BITS: u32 = 52; diff --git a/vendor/ryu/src/d2s_intrinsics.rs b/vendor/ryu/src/d2s_intrinsics.rs index f244a4d..a4e1fb1 100644 --- a/vendor/ryu/src/d2s_intrinsics.rs +++ b/vendor/ryu/src/d2s_intrinsics.rs @@ -36,16 +36,16 @@ pub fn div100(x: u64) -> u64 { } #[cfg_attr(feature = "no-panic", inline)] -fn pow5_factor(mut value: u64) -> u32 { +pub(crate) fn pow5_factor(mut value: u64) -> u32 { + const M_INV_5: u64 = 14757395258967641293; // 5 * m_inv_5 = 1 (mod 2^64) + const N_DIV_5: u64 = 3689348814741910323; // #{ n | n = 0 (mod 2^64) } = 2^64 / 5 let mut count = 0u32; loop { debug_assert!(value != 0); - let q = div5(value); - let r = (value as u32).wrapping_sub(5u32.wrapping_mul(q as u32)); - if r != 0 { + value = value.wrapping_mul(M_INV_5); + if value > N_DIV_5 { break; } - value = q; count += 1; } count diff --git a/vendor/ryu/src/d2s_small_table.rs b/vendor/ryu/src/d2s_small_table.rs index 262fc04..b6e3223 100644 --- a/vendor/ryu/src/d2s_small_table.rs +++ b/vendor/ryu/src/d2s_small_table.rs @@ -18,7 +18,7 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. -use crate::common::*; +use crate::common::pow5bits; pub static DOUBLE_POW5_INV_SPLIT2: [(u64, u64); 15] = [ (1, 2305843009213693952), diff --git a/vendor/ryu/src/f2s.rs b/vendor/ryu/src/f2s.rs index eeb457a..987fefb 100644 --- a/vendor/ryu/src/f2s.rs +++ b/vendor/ryu/src/f2s.rs @@ -18,8 +18,10 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. -use crate::common::*; -use crate::f2s_intrinsics::*; +use crate::common::{log10_pow2, log10_pow5, pow5bits}; +use crate::f2s_intrinsics::{ + mul_pow5_div_pow2, mul_pow5_inv_div_pow2, multiple_of_power_of_2_32, multiple_of_power_of_5_32, +}; pub const FLOAT_MANTISSA_BITS: u32 = 23; pub const FLOAT_EXPONENT_BITS: u32 = 8; diff --git a/vendor/ryu/src/lib.rs b/vendor/ryu/src/lib.rs index 0f4c89a..0ec6038 100644 --- a/vendor/ryu/src/lib.rs +++ b/vendor/ryu/src/lib.rs @@ -81,7 +81,7 @@ //! notation. #![no_std] -#![doc(html_root_url = "https://docs.rs/ryu/1.0.13")] +#![doc(html_root_url = "https://docs.rs/ryu/1.0.17")] #![allow( clippy::cast_lossless, clippy::cast_possible_truncation, @@ -95,6 +95,7 @@ clippy::missing_panics_doc, clippy::module_name_repetitions, clippy::must_use_candidate, + clippy::needless_doctest_main, clippy::similar_names, clippy::too_many_lines, clippy::unreadable_literal, diff --git a/vendor/ryu/src/pretty/exponent.rs b/vendor/ryu/src/pretty/exponent.rs index b72add5..529d92b 100644 --- a/vendor/ryu/src/pretty/exponent.rs +++ b/vendor/ryu/src/pretty/exponent.rs @@ -1,4 +1,4 @@ -use crate::digit_table::*; +use crate::digit_table::DIGIT_TABLE; use core::ptr; #[cfg_attr(feature = "no-panic", inline)] diff --git a/vendor/ryu/src/pretty/mantissa.rs b/vendor/ryu/src/pretty/mantissa.rs index 0149f5c..552dfe3 100644 --- a/vendor/ryu/src/pretty/mantissa.rs +++ b/vendor/ryu/src/pretty/mantissa.rs @@ -1,4 +1,4 @@ -use crate::digit_table::*; +use crate::digit_table::DIGIT_TABLE; use core::ptr; #[cfg_attr(feature = "no-panic", inline)] diff --git a/vendor/ryu/src/pretty/mod.rs b/vendor/ryu/src/pretty/mod.rs index da49e86..f901c5f 100644 --- a/vendor/ryu/src/pretty/mod.rs +++ b/vendor/ryu/src/pretty/mod.rs @@ -1,11 +1,11 @@ mod exponent; mod mantissa; -use self::exponent::*; -use self::mantissa::*; +use self::exponent::{write_exponent2, write_exponent3}; +use self::mantissa::{write_mantissa, write_mantissa_long}; use crate::common; -use crate::d2s::{self, *}; -use crate::f2s::*; +use crate::d2s::{self, d2d, DOUBLE_EXPONENT_BITS, DOUBLE_MANTISSA_BITS}; +use crate::f2s::{f2d, FLOAT_EXPONENT_BITS, FLOAT_MANTISSA_BITS}; use core::ptr; #[cfg(feature = "no-panic")] use no_panic::no_panic; diff --git a/vendor/ryu/src/s2d.rs b/vendor/ryu/src/s2d.rs index 152ca97..c6b4fa4 100644 --- a/vendor/ryu/src/s2d.rs +++ b/vendor/ryu/src/s2d.rs @@ -1,6 +1,6 @@ -use crate::common::*; +use crate::common::{ceil_log2_pow5, log2_pow5}; use crate::d2s; -use crate::d2s_intrinsics::*; +use crate::d2s_intrinsics::{mul_shift_64, multiple_of_power_of_2, multiple_of_power_of_5}; use crate::parse::Error; #[cfg(feature = "no-panic")] use no_panic::no_panic; diff --git a/vendor/ryu/src/s2f.rs b/vendor/ryu/src/s2f.rs index 9593528..52a3235 100644 --- a/vendor/ryu/src/s2f.rs +++ b/vendor/ryu/src/s2f.rs @@ -1,6 +1,8 @@ -use crate::common::*; +use crate::common::{ceil_log2_pow5, log2_pow5}; use crate::f2s; -use crate::f2s_intrinsics::*; +use crate::f2s_intrinsics::{ + mul_pow5_div_pow2, mul_pow5_inv_div_pow2, multiple_of_power_of_2_32, multiple_of_power_of_5_32, +}; use crate::parse::Error; #[cfg(feature = "no-panic")] use no_panic::no_panic; diff --git a/vendor/ryu/tests/common_test.rs b/vendor/ryu/tests/common_test.rs index 2f05b33..e2bc4e1 100644 --- a/vendor/ryu/tests/common_test.rs +++ b/vendor/ryu/tests/common_test.rs @@ -31,7 +31,7 @@ #[path = "../src/common.rs"] mod common; -use common::*; +use common::{ceil_log2_pow5, decimal_length9, log10_pow2, log10_pow5}; #[test] fn test_decimal_length9() { diff --git a/vendor/ryu/tests/d2s_intrinsics_test.rs b/vendor/ryu/tests/d2s_intrinsics_test.rs new file mode 100644 index 0000000..0ac80c9 --- /dev/null +++ b/vendor/ryu/tests/d2s_intrinsics_test.rs @@ -0,0 +1,72 @@ +// Translated from C to Rust. The original C code can be found at +// https://github.com/ulfjack/ryu and carries the following license: +// +// Copyright 2018 Ulf Adams +// +// The contents of this file may be used under the terms of the Apache License, +// Version 2.0. +// +// (See accompanying file LICENSE-Apache or copy at +// http://www.apache.org/licenses/LICENSE-2.0) +// +// Alternatively, the contents of this file may be used under the terms of +// the Boost Software License, Version 1.0. +// (See accompanying file LICENSE-Boost or copy at +// https://www.boost.org/LICENSE_1_0.txt) +// +// Unless required by applicable law or agreed to in writing, this software +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. + +#![allow(dead_code)] +#![allow( + clippy::cast_lossless, + clippy::cast_possible_truncation, + clippy::unreadable_literal +)] + +#[path = "../src/d2s_intrinsics.rs"] +mod d2s_intrinsics; + +use d2s_intrinsics::pow5_factor; + +#[test] +fn test_pow5_factor() { + assert_eq!(0, pow5_factor(1)); + assert_eq!(0, pow5_factor(2)); + assert_eq!(0, pow5_factor(3)); + assert_eq!(0, pow5_factor(4)); + assert_eq!(1, pow5_factor(5)); + assert_eq!(0, pow5_factor(6)); + assert_eq!(0, pow5_factor(7)); + assert_eq!(0, pow5_factor(8)); + assert_eq!(0, pow5_factor(9)); + assert_eq!(1, pow5_factor(10)); + + assert_eq!(0, pow5_factor(12)); + assert_eq!(0, pow5_factor(14)); + assert_eq!(0, pow5_factor(16)); + assert_eq!(0, pow5_factor(18)); + assert_eq!(1, pow5_factor(20)); + + assert_eq!(2, pow5_factor(5 * 5)); + assert_eq!(3, pow5_factor(5 * 5 * 5)); + assert_eq!(4, pow5_factor(5 * 5 * 5 * 5)); + assert_eq!(5, pow5_factor(5 * 5 * 5 * 5 * 5)); + assert_eq!(6, pow5_factor(5 * 5 * 5 * 5 * 5 * 5)); + assert_eq!(7, pow5_factor(5 * 5 * 5 * 5 * 5 * 5 * 5)); + assert_eq!(8, pow5_factor(5 * 5 * 5 * 5 * 5 * 5 * 5 * 5)); + assert_eq!(9, pow5_factor(5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5)); + assert_eq!(10, pow5_factor(5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5)); + + assert_eq!(0, pow5_factor(42)); + assert_eq!(1, pow5_factor(42 * 5)); + assert_eq!(2, pow5_factor(42 * 5 * 5)); + assert_eq!(3, pow5_factor(42 * 5 * 5 * 5)); + assert_eq!(4, pow5_factor(42 * 5 * 5 * 5 * 5)); + assert_eq!(5, pow5_factor(42 * 5 * 5 * 5 * 5 * 5)); + + assert_eq!(27, pow5_factor(7450580596923828125)); // 5^27, largest power of 5 < 2^64. + assert_eq!(1, pow5_factor(18446744073709551615)); // 2^64 - 1, largest multiple of 5 < 2^64. + assert_eq!(0, pow5_factor(18446744073709551614)); // 2^64 - 2, largest non-multiple of 5 < 2^64. +} diff --git a/vendor/ryu/tests/d2s_table_test.rs b/vendor/ryu/tests/d2s_table_test.rs index dce1be3..13c4216 100644 --- a/vendor/ryu/tests/d2s_table_test.rs +++ b/vendor/ryu/tests/d2s_table_test.rs @@ -41,8 +41,8 @@ mod d2s_intrinsics; #[path = "../src/d2s_small_table.rs"] mod d2s_small_table; -use d2s_full_table::*; -use d2s_small_table::*; +use d2s_full_table::{DOUBLE_POW5_INV_SPLIT, DOUBLE_POW5_SPLIT}; +use d2s_small_table::{compute_inv_pow5, compute_pow5}; #[test] fn test_compute_pow5() { diff --git a/vendor/ryu/tests/d2s_test.rs b/vendor/ryu/tests/d2s_test.rs index 368cab6..7e8eba6 100644 --- a/vendor/ryu/tests/d2s_test.rs +++ b/vendor/ryu/tests/d2s_test.rs @@ -82,7 +82,8 @@ fn test_basic() { check!(-0.0); check!(1.0); check!(-1.0); - assert_eq!(pretty(f64::NAN), "NaN"); + assert_eq!(pretty(f64::NAN.copysign(1.0)), "NaN"); + assert_eq!(pretty(f64::NAN.copysign(-1.0)), "NaN"); assert_eq!(pretty(f64::INFINITY), "inf"); assert_eq!(pretty(f64::NEG_INFINITY), "-inf"); } diff --git a/vendor/ryu/tests/f2s_test.rs b/vendor/ryu/tests/f2s_test.rs index 927fa7e..d6249a3 100644 --- a/vendor/ryu/tests/f2s_test.rs +++ b/vendor/ryu/tests/f2s_test.rs @@ -75,7 +75,8 @@ fn test_basic() { check!(-0.0); check!(1.0); check!(-1.0); - assert_eq!(pretty(f32::NAN), "NaN"); + assert_eq!(pretty(f32::NAN.copysign(1.0)), "NaN"); + assert_eq!(pretty(f32::NAN.copysign(-1.0)), "NaN"); assert_eq!(pretty(f32::INFINITY), "inf"); assert_eq!(pretty(f32::NEG_INFINITY), "-inf"); } diff --git a/vendor/serde/.cargo-checksum.json b/vendor/serde/.cargo-checksum.json index 7bef9d0..d55068b 100644 --- a/vendor/serde/.cargo-checksum.json +++ b/vendor/serde/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"08f34b0ade602046b909ac209f438ca4271b49084fca87c30cd4f0c39dcf333d","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"13c66875efb67f64fdec817725f34ceb07913e1ebea4adc240868d2ed581d3da","build.rs":"976e089a5f74fc03e23465744bcc02b3b600a59f1c098da60c29d3979c5b35df","crates-io.md":"ee22254ee64c3189eef3e707c8d75dc66a8df2a7ee9e518d95238950780ec387","src/de/format.rs":"84f902fd4c3be66e81ac01d5b21cd876113c16f9890ff8bab5faa0d085386294","src/de/ignored_any.rs":"967184c86707c99b77a1cfb218dfc823f560fae227b6635aee6af19ee82962f5","src/de/impls.rs":"60c481b12dc3bb41810302f6979d7d9a8fa47f4467617d511e283a6a889a7428","src/de/mod.rs":"71198e80e4c64aa686b5ceb6e8bce10db20845a87a30fa14227ecbe365a046d5","src/de/seed.rs":"e8cf0233afe0af5b8fb9e4c94f301c92729c5ba417280af9e2201b732e374a72","src/de/utf8.rs":"f17524ee0af98ec3abcfd7d0b812fbd1033263bd8e2ce2f57c1e1999ce153558","src/de/value.rs":"aa5055923e2c3fd1c1f1abdfb380a1d63d07cf4d602ef62d2df2b7da33dd8c81","src/integer128.rs":"ca49591abde2d8c4f582174533fee28f0fa9139e5d71bf22b25a6b175f8abccc","src/lib.rs":"13a3d8b17734fc99288eb9aacf13e9f03c9d2926f80fc0cc626790a6c040e0e2","src/macros.rs":"3d695a51f0a07f9f719dcb5620012c21a1b084c06a6283349cabf574ceba8123","src/private/de.rs":"a85efe9af4f5629ac7d946af56e20fbc184df6ac40a6cfe47bf3997a95b2ea20","src/private/doc.rs":"e9801a43c3088fccd5f1fac76416698f948e65b647024aa9da17d673e1e8c217","src/private/mod.rs":"37b204775e572396515477b393ce793b2579de48e5971e6f596ba3723c489fd6","src/private/ser.rs":"57fbff98429e870da86edcf61c0831caaa3b708c0c32e3038c4b2179e8dff73e","src/private/size_hint.rs":"605521227e9ba3100fbb9d5ea7fd5853385097c35015ce6908bd5f1ea20d59ad","src/ser/fmt.rs":"7827ed07fd8897e6324f75625ba0c926a4c4e7ec2914cd067391ce54d942ac7b","src/ser/impls.rs":"46229722b7f0d8c4f01c43567c765608bf2c1974a5f24ce2525815c5bfd42ff5","src/ser/impossible.rs":"db17913522c1c27389c5a085113911353b9813c1b116518681362e7c8b692c3a","src/ser/mod.rs":"e1e6c764837c70b6410dcf1949a0dae1b4b4ffce65b87607d3d173b612e9bccf","src/std_error.rs":"3aac687856c035517fae44ed2906dd4a1e3184bae4bf613adcdeb73f74126c57"},"package":"bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c"} \ No newline at end of file +{"files":{"Cargo.toml":"f03b626efae73a6dd42f07d722dad2da3a4add51f4e653e30a6d696853bab209","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"731c044fc5f98b37a89e9049c9214267db98763309cb63146b45c029640f82a3","build.rs":"f9ba30324b9ce085c903595fb55a5293f8c2348ff36bfe870521b935ae6d105c","crates-io.md":"56e988ac4944c45f5bf5051e3827892ed8fb817853d99d9df1fff6621108e270","src/de/format.rs":"c85071b016df643b161859682d21ce34fa0ebf2a3bdbeeea69859da48f5d934f","src/de/ignored_any.rs":"6480f2b2a83dc4764d01b2eec7309729eef2492eede2e5ee98d23a60b05198eb","src/de/impls.rs":"2857d734176a0b78a41c9358354b0b0b83c6b2d948590be072d98606a8cae9d6","src/de/mod.rs":"fc34da49f692803d2c2f131322d9b48ad8e4f39ed10b2b304d6193ab09d621fb","src/de/seed.rs":"045d890712a04eb33ffc5a021e5d948a63c89402b8ffeea749df2171b7484f8f","src/de/size_hint.rs":"fff83dc39d30e75e8e611991f9c5399188a1aad23a6462dbca2c8b62655cfedb","src/de/value.rs":"0fd511a288c20a1b768718f4baadf9c7d4146d276af6a71ba1d0f7679b28644a","src/integer128.rs":"29ef30b7d94507b34807090e68173767cdc7aff62edccd38affe69e75338dddc","src/lib.rs":"638b231a280519f1861ea5f1bfbe97e2394b2f7662a9701b8e57ed95093dd298","src/macros.rs":"e3486ef4a9a4ed1b27234aa1817ccb25ec0eb026ffc95e2c71c7b917f1f45629","src/private/de.rs":"6557a124fdaf61f9c7cd80163e40f4a453354e45b63a4eb55dafdfe0159f6881","src/private/doc.rs":"9ad740e9ea2eedf861b77116eda9a6fb74bc8553541cd17d1bc5791a3ef3271a","src/private/mod.rs":"b8f0c348621d91dd9da3db83d8877e70bc61ad0a2dc2d6fb57c6fc2c2cbafa26","src/private/ser.rs":"656613691bd8d40cb70a52d4ebe3ee96a993c8a1292d50822d9ca5bdad84426b","src/ser/fmt.rs":"77a5583e5c227ea1982b097ed6378af5c899d43761d71e33440262fd35944695","src/ser/impls.rs":"850619164b399c37cd373d24f5a2c83453f40b34bb978c5722d2c1ae226775b5","src/ser/impossible.rs":"e11b37689ec1395378d546fce74221ca9046d0761744301f12029102fd07e30e","src/ser/mod.rs":"a7fd082203d63cbe4f0fe86d9be16bf4f3b2444653dac6bb61d82e0f4f6b4214","src/std_error.rs":"25a07149e2e468747ffa5a58051c7f93d7b3c0fa0372f012a96c97ec8ab03b97"},"package":"3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2"} \ No newline at end of file diff --git a/vendor/serde/Cargo.toml b/vendor/serde/Cargo.toml index 3b273a1..6ba6868 100644 --- a/vendor/serde/Cargo.toml +++ b/vendor/serde/Cargo.toml @@ -10,22 +10,15 @@ # See Cargo.toml.orig for the original contents. [package] -rust-version = "1.19" +edition = "2018" +rust-version = "1.31" name = "serde" -version = "1.0.160" +version = "1.0.197" authors = [ "Erick Tryzelaar <erick.tryzelaar@gmail.com>", "David Tolnay <dtolnay@gmail.com>", ] build = "build.rs" -include = [ - "build.rs", - "src/**/*.rs", - "crates-io.md", - "README.md", - "LICENSE-APACHE", - "LICENSE-MIT", -] description = "A generic serialization/deserialization framework" homepage = "https://serde.rs" documentation = "https://docs.rs/serde" @@ -38,12 +31,22 @@ keywords = [ categories = [ "encoding", "no-std", + "no-std::no-alloc", ] license = "MIT OR Apache-2.0" repository = "https://github.com/serde-rs/serde" [package.metadata.docs.rs] -features = ["derive"] +features = [ + "derive", + "rc", + "unstable", +] +rustdoc-args = [ + "--cfg", + "doc_cfg", + "--generate-link-to-definition", +] targets = ["x86_64-unknown-linux-gnu"] [package.metadata.playground] @@ -56,11 +59,11 @@ features = [ doc-scrape-examples = false [dependencies.serde_derive] -version = "=1.0.160" +version = "1" optional = true [dev-dependencies.serde_derive] -version = "1.0" +version = "1" [features] alloc = [] @@ -69,3 +72,6 @@ derive = ["serde_derive"] rc = [] std = [] unstable = [] + +[target."cfg(any())".dependencies.serde_derive] +version = "=1.0.197" diff --git a/vendor/serde/README.md b/vendor/serde/README.md index d53e572..3129294 100644 --- a/vendor/serde/README.md +++ b/vendor/serde/README.md @@ -1,12 +1,12 @@ -# Serde   [![Build Status]][actions] [![Latest Version]][crates.io] [![serde: rustc 1.19+]][Rust 1.19] [![serde_derive: rustc 1.56+]][Rust 1.56] +# Serde   [![Build Status]][actions] [![Latest Version]][crates.io] [![serde msrv]][Rust 1.31] [![serde_derive msrv]][Rust 1.56] [Build Status]: https://img.shields.io/github/actions/workflow/status/serde-rs/serde/ci.yml?branch=master [actions]: https://github.com/serde-rs/serde/actions?query=branch%3Amaster [Latest Version]: https://img.shields.io/crates/v/serde.svg [crates.io]: https://crates.io/crates/serde -[serde: rustc 1.19+]: https://img.shields.io/badge/serde-rustc_1.19+-lightgray.svg -[serde_derive: rustc 1.56+]: https://img.shields.io/badge/serde_derive-rustc_1.56+-lightgray.svg -[Rust 1.19]: https://blog.rust-lang.org/2017/07/20/Rust-1.19.html +[serde msrv]: https://img.shields.io/crates/msrv/serde.svg?label=serde%20msrv&color=lightgray +[serde_derive msrv]: https://img.shields.io/crates/msrv/serde_derive.svg?label=serde_derive%20msrv&color=lightgray +[Rust 1.31]: https://blog.rust-lang.org/2018/12/06/Rust-1.31-and-rust-2018.html [Rust 1.56]: https://blog.rust-lang.org/2021/10/21/Rust-1.56.0.html **Serde is a framework for *ser*ializing and *de*serializing Rust data structures efficiently and generically.** @@ -48,7 +48,7 @@ serde_json = "1.0" <p></p> ```rust -use serde::{Serialize, Deserialize}; +use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug)] struct Point { diff --git a/vendor/serde/build.rs b/vendor/serde/build.rs index 929d8e1..fe5486a 100644 --- a/vendor/serde/build.rs +++ b/vendor/serde/build.rs @@ -16,68 +16,6 @@ fn main() { let target = env::var("TARGET").unwrap(); let emscripten = target == "asmjs-unknown-emscripten" || target == "wasm32-unknown-emscripten"; - // std::collections::Bound was stabilized in Rust 1.17 - // but it was moved to core::ops later in Rust 1.26: - // https://doc.rust-lang.org/core/ops/enum.Bound.html - if minor < 26 { - println!("cargo:rustc-cfg=no_ops_bound"); - if minor < 17 { - println!("cargo:rustc-cfg=no_collections_bound"); - } - } - - // core::cmp::Reverse stabilized in Rust 1.19: - // https://doc.rust-lang.org/stable/core/cmp/struct.Reverse.html - if minor < 19 { - println!("cargo:rustc-cfg=no_core_reverse"); - } - - // CString::into_boxed_c_str and PathBuf::into_boxed_path stabilized in Rust 1.20: - // https://doc.rust-lang.org/std/ffi/struct.CString.html#method.into_boxed_c_str - // https://doc.rust-lang.org/std/path/struct.PathBuf.html#method.into_boxed_path - if minor < 20 { - println!("cargo:rustc-cfg=no_de_boxed_c_str"); - println!("cargo:rustc-cfg=no_de_boxed_path"); - } - - // From<Box<T>> for Rc<T> / Arc<T> stabilized in Rust 1.21: - // https://doc.rust-lang.org/std/rc/struct.Rc.html#impl-From<Box<T>> - // https://doc.rust-lang.org/std/sync/struct.Arc.html#impl-From<Box<T>> - if minor < 21 { - println!("cargo:rustc-cfg=no_de_rc_dst"); - } - - // Duration available in core since Rust 1.25: - // https://blog.rust-lang.org/2018/03/29/Rust-1.25.html#library-stabilizations - if minor < 25 { - println!("cargo:rustc-cfg=no_core_duration"); - } - - // 128-bit integers stabilized in Rust 1.26: - // https://blog.rust-lang.org/2018/05/10/Rust-1.26.html - // - // Disabled on Emscripten targets before Rust 1.40 since - // Emscripten did not support 128-bit integers until Rust 1.40 - // (https://github.com/rust-lang/rust/pull/65251) - if minor < 26 || emscripten && minor < 40 { - println!("cargo:rustc-cfg=no_integer128"); - } - - // Inclusive ranges methods stabilized in Rust 1.27: - // https://github.com/rust-lang/rust/pull/50758 - // Also Iterator::try_for_each: - // https://blog.rust-lang.org/2018/06/21/Rust-1.27.html#library-stabilizations - if minor < 27 { - println!("cargo:rustc-cfg=no_range_inclusive"); - println!("cargo:rustc-cfg=no_iterator_try_fold"); - } - - // Non-zero integers stabilized in Rust 1.28: - // https://blog.rust-lang.org/2018/08/02/Rust-1.28.html#library-stabilizations - if minor < 28 { - println!("cargo:rustc-cfg=no_num_nonzero"); - } - // TryFrom, Atomic types, non-zero signed integers, and SystemTime::checked_add // stabilized in Rust 1.34: // https://blog.rust-lang.org/2019/04/11/Rust-1.34.0.html#tryfrom-and-tryinto @@ -89,6 +27,12 @@ fn main() { println!("cargo:rustc-cfg=no_relaxed_trait_bounds"); } + // f32::copysign and f64::copysign stabilized in Rust 1.35. + // https://blog.rust-lang.org/2019/05/23/Rust-1.35.0.html#copy-the-sign-of-a-floating-point-number-onto-another + if minor < 35 { + println!("cargo:rustc-cfg=no_float_copysign"); + } + // Current minimum supported version of serde_derive crate is Rust 1.56. if minor < 56 { println!("cargo:rustc-cfg=no_serde_derive"); diff --git a/vendor/serde/crates-io.md b/vendor/serde/crates-io.md index 6e0ec28..1871003 100644 --- a/vendor/serde/crates-io.md +++ b/vendor/serde/crates-io.md @@ -16,7 +16,7 @@ You may be looking for: ## Serde in action ```rust -use serde::{Serialize, Deserialize}; +use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug)] struct Point { diff --git a/vendor/serde/src/de/format.rs b/vendor/serde/src/de/format.rs index f14580b..9053cc0 100644 --- a/vendor/serde/src/de/format.rs +++ b/vendor/serde/src/de/format.rs @@ -1,5 +1,5 @@ -use lib::fmt::{self, Write}; -use lib::str; +use crate::lib::fmt::{self, Write}; +use crate::lib::str; pub(super) struct Buf<'a> { bytes: &'a mut [u8], diff --git a/vendor/serde/src/de/ignored_any.rs b/vendor/serde/src/de/ignored_any.rs index 9ed438e..2360a17 100644 --- a/vendor/serde/src/de/ignored_any.rs +++ b/vendor/serde/src/de/ignored_any.rs @@ -1,6 +1,6 @@ -use lib::*; +use crate::lib::*; -use de::{ +use crate::de::{ Deserialize, Deserializer, EnumAccess, Error, MapAccess, SeqAccess, VariantAccess, Visitor, }; @@ -10,13 +10,12 @@ use de::{ /// any type, except that it does not store any information about the data that /// gets deserialized. /// -/// ```edition2018 -/// use std::fmt; -/// use std::marker::PhantomData; -/// +/// ```edition2021 /// use serde::de::{ /// self, Deserialize, DeserializeSeed, Deserializer, IgnoredAny, SeqAccess, Visitor, /// }; +/// use std::fmt; +/// use std::marker::PhantomData; /// /// /// A seed that can be used to deserialize only the `n`th element of a sequence /// /// while efficiently discarding elements of any type before or after index `n`. @@ -108,7 +107,7 @@ use de::{ /// # Ok(()) /// # } /// ``` -#[derive(Copy, Clone, Debug, Default)] +#[derive(Copy, Clone, Debug, Default, PartialEq)] pub struct IgnoredAny; impl<'de> Visitor<'de> for IgnoredAny { @@ -130,12 +129,10 @@ impl<'de> Visitor<'de> for IgnoredAny { Ok(IgnoredAny) } - serde_if_integer128! { - #[inline] - fn visit_i128<E>(self, x: i128) -> Result<Self::Value, E> { - let _ = x; - Ok(IgnoredAny) - } + #[inline] + fn visit_i128<E>(self, x: i128) -> Result<Self::Value, E> { + let _ = x; + Ok(IgnoredAny) } #[inline] @@ -144,12 +141,10 @@ impl<'de> Visitor<'de> for IgnoredAny { Ok(IgnoredAny) } - serde_if_integer128! { - #[inline] - fn visit_u128<E>(self, x: u128) -> Result<Self::Value, E> { - let _ = x; - Ok(IgnoredAny) - } + #[inline] + fn visit_u128<E>(self, x: u128) -> Result<Self::Value, E> { + let _ = x; + Ok(IgnoredAny) } #[inline] @@ -198,7 +193,7 @@ impl<'de> Visitor<'de> for IgnoredAny { where A: SeqAccess<'de>, { - while let Some(IgnoredAny) = try!(seq.next_element()) { + while let Some(IgnoredAny) = tri!(seq.next_element()) { // Gobble } Ok(IgnoredAny) @@ -209,7 +204,7 @@ impl<'de> Visitor<'de> for IgnoredAny { where A: MapAccess<'de>, { - while let Some((IgnoredAny, IgnoredAny)) = try!(map.next_entry()) { + while let Some((IgnoredAny, IgnoredAny)) = tri!(map.next_entry()) { // Gobble } Ok(IgnoredAny) @@ -228,7 +223,7 @@ impl<'de> Visitor<'de> for IgnoredAny { where A: EnumAccess<'de>, { - try!(data.variant::<IgnoredAny>()).1.newtype_variant() + tri!(data.variant::<IgnoredAny>()).1.newtype_variant() } } diff --git a/vendor/serde/src/de/impls.rs b/vendor/serde/src/de/impls.rs index a2e2c48..413c997 100644 --- a/vendor/serde/src/de/impls.rs +++ b/vendor/serde/src/de/impls.rs @@ -1,16 +1,14 @@ -use lib::*; +use crate::lib::*; -use de::{ - Deserialize, Deserializer, EnumAccess, Error, SeqAccess, Unexpected, VariantAccess, Visitor, +use crate::de::{ + Deserialize, Deserializer, EnumAccess, Error, MapAccess, SeqAccess, Unexpected, VariantAccess, + Visitor, }; -#[cfg(any(feature = "std", feature = "alloc", not(no_core_duration)))] -use de::MapAccess; - -use seed::InPlaceSeed; +use crate::seed::InPlaceSeed; #[cfg(any(feature = "std", feature = "alloc"))] -use __private::size_hint; +use crate::de::size_hint; //////////////////////////////////////////////////////////////////////////////// @@ -41,6 +39,7 @@ impl<'de> Deserialize<'de> for () { } #[cfg(feature = "unstable")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "unstable")))] impl<'de> Deserialize<'de> for ! { fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error> where @@ -84,7 +83,7 @@ macro_rules! impl_deserialize_num { ($primitive:ident, $nonzero:ident $(cfg($($cfg:tt)*))*, $deserialize:ident $($method:ident!($($val:ident : $visit:ident)*);)*) => { impl_deserialize_num!($primitive, $deserialize $($method!($($val : $visit)*);)*); - #[cfg(all(not(no_num_nonzero), $($($cfg)*)*))] + $(#[cfg($($cfg)*)])* impl<'de> Deserialize<'de> for num::$nonzero { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -182,6 +181,28 @@ macro_rules! num_as_self { }; } +macro_rules! num_as_copysign_self { + ($ty:ident : $visit:ident) => { + #[inline] + fn $visit<E>(self, v: $ty) -> Result<Self::Value, E> + where + E: Error, + { + #[cfg(any(no_float_copysign, not(feature = "std")))] + { + Ok(v as Self::Value) + } + + #[cfg(all(not(no_float_copysign), feature = "std"))] + { + // Preserve sign of NaN. The `as` produces a nondeterministic sign. + let sign = if v.is_sign_positive() { 1.0 } else { -1.0 }; + Ok((v as Self::Value).copysign(sign)) + } + } + }; +} + macro_rules! int_to_int { ($ty:ident : $visit:ident) => { #[inline] @@ -353,7 +374,7 @@ impl_deserialize_num! { impl_deserialize_num! { f32, deserialize_f32 num_self!(f32:visit_f32); - num_as_self!(f64:visit_f64); + num_as_copysign_self!(f64:visit_f64); num_as_self!(i8:visit_i8 i16:visit_i16 i32:visit_i32 i64:visit_i64); num_as_self!(u8:visit_u8 u16:visit_u16 u32:visit_u32 u64:visit_u64); } @@ -361,69 +382,67 @@ impl_deserialize_num! { impl_deserialize_num! { f64, deserialize_f64 num_self!(f64:visit_f64); - num_as_self!(f32:visit_f32); + num_as_copysign_self!(f32:visit_f32); num_as_self!(i8:visit_i8 i16:visit_i16 i32:visit_i32 i64:visit_i64); num_as_self!(u8:visit_u8 u16:visit_u16 u32:visit_u32 u64:visit_u64); } -serde_if_integer128! { - macro_rules! num_128 { - ($ty:ident : $visit:ident) => { - fn $visit<E>(self, v: $ty) -> Result<Self::Value, E> - where - E: Error, +macro_rules! num_128 { + ($ty:ident : $visit:ident) => { + fn $visit<E>(self, v: $ty) -> Result<Self::Value, E> + where + E: Error, + { + if v as i128 >= Self::Value::min_value() as i128 + && v as u128 <= Self::Value::max_value() as u128 { - if v as i128 >= Self::Value::min_value() as i128 - && v as u128 <= Self::Value::max_value() as u128 - { - Ok(v as Self::Value) - } else { - Err(Error::invalid_value( - Unexpected::Other(stringify!($ty)), - &self, - )) - } + Ok(v as Self::Value) + } else { + Err(Error::invalid_value( + Unexpected::Other(stringify!($ty)), + &self, + )) } - }; + } + }; - (nonzero $primitive:ident $ty:ident : $visit:ident) => { - fn $visit<E>(self, v: $ty) -> Result<Self::Value, E> - where - E: Error, + (nonzero $primitive:ident $ty:ident : $visit:ident) => { + fn $visit<E>(self, v: $ty) -> Result<Self::Value, E> + where + E: Error, + { + if v as i128 >= $primitive::min_value() as i128 + && v as u128 <= $primitive::max_value() as u128 { - if v as i128 >= $primitive::min_value() as i128 - && v as u128 <= $primitive::max_value() as u128 - { - if let Some(nonzero) = Self::Value::new(v as $primitive) { - Ok(nonzero) - } else { - Err(Error::invalid_value(Unexpected::Unsigned(0), &self)) - } + if let Some(nonzero) = Self::Value::new(v as $primitive) { + Ok(nonzero) } else { - Err(Error::invalid_value( - Unexpected::Other(stringify!($ty)), - &self, - )) + Err(Error::invalid_value(Unexpected::Unsigned(0), &self)) } + } else { + Err(Error::invalid_value( + Unexpected::Other(stringify!($ty)), + &self, + )) } - }; - } + } + }; +} - impl_deserialize_num! { - i128, NonZeroI128 cfg(not(no_num_nonzero_signed)), deserialize_i128 - num_self!(i128:visit_i128); - num_as_self!(i8:visit_i8 i16:visit_i16 i32:visit_i32 i64:visit_i64); - num_as_self!(u8:visit_u8 u16:visit_u16 u32:visit_u32 u64:visit_u64); - num_128!(u128:visit_u128); - } +impl_deserialize_num! { + i128, NonZeroI128 cfg(not(no_num_nonzero_signed)), deserialize_i128 + num_self!(i128:visit_i128); + num_as_self!(i8:visit_i8 i16:visit_i16 i32:visit_i32 i64:visit_i64); + num_as_self!(u8:visit_u8 u16:visit_u16 u32:visit_u32 u64:visit_u64); + num_128!(u128:visit_u128); +} - impl_deserialize_num! { - u128, NonZeroU128, deserialize_u128 - num_self!(u128:visit_u128); - num_as_self!(u8:visit_u8 u16:visit_u16 u32:visit_u32 u64:visit_u64); - int_to_uint!(i8:visit_i8 i16:visit_i16 i32:visit_i32 i64:visit_i64); - num_128!(i128:visit_i128); - } +impl_deserialize_num! { + u128, NonZeroU128, deserialize_u128 + num_self!(u128:visit_u128); + num_as_self!(u8:visit_u8 u16:visit_u16 u32:visit_u32 u64:visit_u64); + int_to_uint!(i8:visit_i8 i16:visit_i16 i32:visit_i32 i64:visit_i64); + num_128!(i128:visit_i128); } //////////////////////////////////////////////////////////////////////////////// @@ -578,6 +597,7 @@ impl<'a, 'de> Visitor<'de> for StringInPlaceVisitor<'a> { } #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de> Deserialize<'de> for String { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -681,10 +701,10 @@ impl<'de> Visitor<'de> for CStringVisitor { where A: SeqAccess<'de>, { - let len = size_hint::cautious(seq.size_hint()); - let mut values = Vec::with_capacity(len); + let capacity = size_hint::cautious::<u8>(seq.size_hint()); + let mut values = Vec::<u8>::with_capacity(capacity); - while let Some(value) = try!(seq.next_element()) { + while let Some(value) = tri!(seq.next_element()) { values.push(value); } @@ -721,6 +741,7 @@ impl<'de> Visitor<'de> for CStringVisitor { } #[cfg(any(feature = "std", all(not(no_core_cstr), feature = "alloc")))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de> Deserialize<'de> for CString { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -732,10 +753,10 @@ impl<'de> Deserialize<'de> for CString { macro_rules! forwarded_impl { ( - $(#[doc = $doc:tt])* + $(#[$attr:meta])* ($($id:ident),*), $ty:ty, $func:expr ) => { - $(#[doc = $doc])* + $(#[$attr])* impl<'de $(, $id : Deserialize<'de>,)*> Deserialize<'de> for $ty { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -747,14 +768,15 @@ macro_rules! forwarded_impl { } } -#[cfg(all( - any(feature = "std", all(not(no_core_cstr), feature = "alloc")), - not(no_de_boxed_c_str) -))] -forwarded_impl!((), Box<CStr>, CString::into_boxed_c_str); +forwarded_impl! { + #[cfg(any(feature = "std", all(not(no_core_cstr), feature = "alloc")))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + (), Box<CStr>, CString::into_boxed_c_str +} -#[cfg(not(no_core_reverse))] -forwarded_impl!((T), Reverse<T>, Reverse); +forwarded_impl! { + (T), Reverse<T>, Reverse +} //////////////////////////////////////////////////////////////////////////////// @@ -860,9 +882,9 @@ impl<'de, T: ?Sized> Deserialize<'de> for PhantomData<T> { //////////////////////////////////////////////////////////////////////////////// -#[cfg(any(feature = "std", feature = "alloc"))] macro_rules! seq_impl { ( + $(#[$attr:meta])* $ty:ident <T $(: $tbound1:ident $(+ $tbound2:ident)*)* $(, $typaram:ident : $bound1:ident $(+ $bound2:ident)*)*>, $access:ident, $clear:expr, @@ -870,6 +892,7 @@ macro_rules! seq_impl { $reserve:expr, $insert:expr ) => { + $(#[$attr])* impl<'de, T $(, $typaram)*> Deserialize<'de> for $ty<T $(, $typaram)*> where T: Deserialize<'de> $(+ $tbound1 $(+ $tbound2)*)*, @@ -901,7 +924,7 @@ macro_rules! seq_impl { { let mut values = $with_capacity; - while let Some(value) = try!($access.next_element()) { + while let Some(value) = tri!($access.next_element()) { $insert(&mut values, value); } @@ -936,10 +959,10 @@ macro_rules! seq_impl { A: SeqAccess<'de>, { $clear(&mut self.0); - $reserve(&mut self.0, size_hint::cautious($access.size_hint())); + $reserve(&mut self.0, size_hint::cautious::<T>($access.size_hint())); // FIXME: try to overwrite old values here? (Vec, VecDeque, LinkedList) - while let Some(value) = try!($access.next_element()) { + while let Some(value) = tri!($access.next_element()) { $insert(&mut self.0, value); } @@ -957,18 +980,20 @@ macro_rules! seq_impl { #[cfg(any(feature = "std", feature = "alloc"))] fn nop_reserve<T>(_seq: T, _n: usize) {} -#[cfg(any(feature = "std", feature = "alloc"))] seq_impl!( + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] BinaryHeap<T: Ord>, seq, BinaryHeap::clear, - BinaryHeap::with_capacity(size_hint::cautious(seq.size_hint())), + BinaryHeap::with_capacity(size_hint::cautious::<T>(seq.size_hint())), BinaryHeap::reserve, BinaryHeap::push ); -#[cfg(any(feature = "std", feature = "alloc"))] seq_impl!( + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] BTreeSet<T: Eq + Ord>, seq, BTreeSet::clear, @@ -977,8 +1002,9 @@ seq_impl!( BTreeSet::insert ); -#[cfg(any(feature = "std", feature = "alloc"))] seq_impl!( + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] LinkedList<T>, seq, LinkedList::clear, @@ -987,21 +1013,24 @@ seq_impl!( LinkedList::push_back ); -#[cfg(feature = "std")] seq_impl!( + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] HashSet<T: Eq + Hash, S: BuildHasher + Default>, seq, HashSet::clear, - HashSet::with_capacity_and_hasher(size_hint::cautious(seq.size_hint()), S::default()), + HashSet::with_capacity_and_hasher(size_hint::cautious::<T>(seq.size_hint()), S::default()), HashSet::reserve, - HashSet::insert); + HashSet::insert +); -#[cfg(any(feature = "std", feature = "alloc"))] seq_impl!( + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] VecDeque<T>, seq, VecDeque::clear, - VecDeque::with_capacity(size_hint::cautious(seq.size_hint())), + VecDeque::with_capacity(size_hint::cautious::<T>(seq.size_hint())), VecDeque::reserve, VecDeque::push_back ); @@ -1009,6 +1038,7 @@ seq_impl!( //////////////////////////////////////////////////////////////////////////////// #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de, T> Deserialize<'de> for Vec<T> where T: Deserialize<'de>, @@ -1035,9 +1065,10 @@ where where A: SeqAccess<'de>, { - let mut values = Vec::with_capacity(size_hint::cautious(seq.size_hint())); + let capacity = size_hint::cautious::<T>(seq.size_hint()); + let mut values = Vec::<T>::with_capacity(capacity); - while let Some(value) = try!(seq.next_element()) { + while let Some(value) = tri!(seq.next_element()) { values.push(value); } @@ -1071,7 +1102,7 @@ where where A: SeqAccess<'de>, { - let hint = size_hint::cautious(seq.size_hint()); + let hint = size_hint::cautious::<T>(seq.size_hint()); if let Some(additional) = hint.checked_sub(self.0.len()) { self.0.reserve(additional); } @@ -1079,7 +1110,7 @@ where for i in 0..self.0.len() { let next = { let next_place = InPlaceSeed(&mut self.0[i]); - try!(seq.next_element_seed(next_place)) + tri!(seq.next_element_seed(next_place)) }; if next.is_none() { self.0.truncate(i); @@ -1087,7 +1118,7 @@ where } } - while let Some(value) = try!(seq.next_element()) { + while let Some(value) = tri!(seq.next_element()) { self.0.push(value); } @@ -1159,7 +1190,7 @@ macro_rules! array_impls { A: SeqAccess<'de>, { Ok([$( - match try!(seq.next_element()) { + match tri!(seq.next_element()) { Some(val) => val, None => return Err(Error::invalid_length($n, &self)), } @@ -1184,7 +1215,7 @@ macro_rules! array_impls { { let mut fail_idx = None; for (idx, dest) in self.0[..].iter_mut().enumerate() { - if try!(seq.next_element_seed(InPlaceSeed(dest))).is_none() { + if tri!(seq.next_element_seed(InPlaceSeed(dest))).is_none() { fail_idx = Some(idx); break; } @@ -1282,7 +1313,7 @@ macro_rules! tuple_impls { A: SeqAccess<'de>, { $( - let $name = match try!(seq.next_element()) { + let $name = match tri!(seq.next_element()) { Some(value) => value, None => return Err(Error::invalid_length($n, &self)), }; @@ -1316,7 +1347,7 @@ macro_rules! tuple_impls { A: SeqAccess<'de>, { $( - if try!(seq.next_element_seed(InPlaceSeed(&mut (self.0).$n))).is_none() { + if tri!(seq.next_element_seed(InPlaceSeed(&mut (self.0).$n))).is_none() { return Err(Error::invalid_length($n, &self)); } )+ @@ -1353,13 +1384,14 @@ tuple_impls! { //////////////////////////////////////////////////////////////////////////////// -#[cfg(any(feature = "std", feature = "alloc"))] macro_rules! map_impl { ( + $(#[$attr:meta])* $ty:ident <K $(: $kbound1:ident $(+ $kbound2:ident)*)*, V $(, $typaram:ident : $bound1:ident $(+ $bound2:ident)*)*>, $access:ident, - $with_capacity:expr + $with_capacity:expr, ) => { + $(#[$attr])* impl<'de, K, V $(, $typaram)*> Deserialize<'de> for $ty<K, V $(, $typaram)*> where K: Deserialize<'de> $(+ $kbound1 $(+ $kbound2)*)*, @@ -1393,7 +1425,7 @@ macro_rules! map_impl { { let mut values = $with_capacity; - while let Some((key, value)) = try!($access.next_entry()) { + while let Some((key, value)) = tri!($access.next_entry()) { values.insert(key, value); } @@ -1408,23 +1440,30 @@ macro_rules! map_impl { } } -#[cfg(any(feature = "std", feature = "alloc"))] -map_impl!( +map_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] BTreeMap<K: Ord, V>, map, - BTreeMap::new()); + BTreeMap::new(), +} -#[cfg(feature = "std")] -map_impl!( +map_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] HashMap<K: Eq + Hash, V, S: BuildHasher + Default>, map, - HashMap::with_capacity_and_hasher(size_hint::cautious(map.size_hint()), S::default())); + HashMap::with_capacity_and_hasher(size_hint::cautious::<(K, V)>(map.size_hint()), S::default()), +} //////////////////////////////////////////////////////////////////////////////// -#[cfg(feature = "std")] macro_rules! parse_ip_impl { - ($expecting:tt $ty:ty; $size:tt) => { + ( + $(#[$attr:meta])* + $ty:ty, $expecting:expr, $size:tt + ) => { + $(#[$attr])* impl<'de> Deserialize<'de> for $ty { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -1451,7 +1490,7 @@ macro_rules! variant_identifier { $($variant),* } - static $variants_name: &'static [&'static str] = &[$(stringify!($variant)),*]; + static $variants_name: &[&str] = &[$(stringify!($variant)),*]; impl<'de> Deserialize<'de> for $name_kind { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> @@ -1541,7 +1580,7 @@ macro_rules! deserialize_enum { where A: EnumAccess<'de>, { - match try!(data.variant()) { + match tri!(data.variant()) { $( ($name_kind :: $variant, v) => v.newtype_variant().map($name :: $variant), )* @@ -1553,6 +1592,7 @@ macro_rules! deserialize_enum { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<'de> Deserialize<'de> for net::IpAddr { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -1561,7 +1601,7 @@ impl<'de> Deserialize<'de> for net::IpAddr { if deserializer.is_human_readable() { deserializer.deserialize_str(FromStrVisitor::new("IP address")) } else { - use lib::net::IpAddr; + use crate::lib::net::IpAddr; deserialize_enum! { IpAddr IpAddrKind (V4; b"V4"; 0, V6; b"V6"; 1) "`V4` or `V6`", @@ -1571,15 +1611,25 @@ impl<'de> Deserialize<'de> for net::IpAddr { } } -#[cfg(feature = "std")] -parse_ip_impl!("IPv4 address" net::Ipv4Addr; 4); +parse_ip_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + net::Ipv4Addr, "IPv4 address", 4 +} -#[cfg(feature = "std")] -parse_ip_impl!("IPv6 address" net::Ipv6Addr; 16); +parse_ip_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + net::Ipv6Addr, "IPv6 address", 16 +} -#[cfg(feature = "std")] macro_rules! parse_socket_impl { - ($expecting:tt $ty:ty, $new:expr) => { + ( + $(#[$attr:meta])* + $ty:ty, $expecting:tt, + $new:expr, + ) => { + $(#[$attr])* impl<'de> Deserialize<'de> for $ty { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -1588,7 +1638,7 @@ macro_rules! parse_socket_impl { if deserializer.is_human_readable() { deserializer.deserialize_str(FromStrVisitor::new($expecting)) } else { - <(_, u16)>::deserialize(deserializer).map(|(ip, port)| $new(ip, port)) + <(_, u16)>::deserialize(deserializer).map($new) } } } @@ -1596,6 +1646,7 @@ macro_rules! parse_socket_impl { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<'de> Deserialize<'de> for net::SocketAddr { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -1604,7 +1655,7 @@ impl<'de> Deserialize<'de> for net::SocketAddr { if deserializer.is_human_readable() { deserializer.deserialize_str(FromStrVisitor::new("socket address")) } else { - use lib::net::SocketAddr; + use crate::lib::net::SocketAddr; deserialize_enum! { SocketAddr SocketAddrKind (V4; b"V4"; 0, V6; b"V6"; 1) "`V4` or `V6`", @@ -1614,13 +1665,19 @@ impl<'de> Deserialize<'de> for net::SocketAddr { } } -#[cfg(feature = "std")] -parse_socket_impl!("IPv4 socket address" net::SocketAddrV4, net::SocketAddrV4::new); +parse_socket_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + net::SocketAddrV4, "IPv4 socket address", + |(ip, port)| net::SocketAddrV4::new(ip, port), +} -#[cfg(feature = "std")] -parse_socket_impl!("IPv6 socket address" net::SocketAddrV6, |ip, port| net::SocketAddrV6::new( - ip, port, 0, 0 -)); +parse_socket_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + net::SocketAddrV6, "IPv6 socket address", + |(ip, port)| net::SocketAddrV6::new(ip, port, 0, 0), +} //////////////////////////////////////////////////////////////////////////////// @@ -1653,6 +1710,7 @@ impl<'a> Visitor<'a> for PathVisitor { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<'de: 'a, 'a> Deserialize<'de> for &'a Path { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -1707,6 +1765,7 @@ impl<'de> Visitor<'de> for PathBufVisitor { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<'de> Deserialize<'de> for PathBuf { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -1716,8 +1775,11 @@ impl<'de> Deserialize<'de> for PathBuf { } } -#[cfg(all(feature = "std", not(no_de_boxed_path)))] -forwarded_impl!((), Box<Path>, PathBuf::into_boxed_path); +forwarded_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + (), Box<Path>, PathBuf::into_boxed_path +} //////////////////////////////////////////////////////////////////////////////// @@ -1750,7 +1812,7 @@ impl<'de> Visitor<'de> for OsStringVisitor { { use std::os::unix::ffi::OsStringExt; - match try!(data.variant()) { + match tri!(data.variant()) { (OsStringKind::Unix, v) => v.newtype_variant().map(OsString::from_vec), (OsStringKind::Windows, _) => Err(Error::custom( "cannot deserialize Windows OS string on Unix", @@ -1765,7 +1827,7 @@ impl<'de> Visitor<'de> for OsStringVisitor { { use std::os::windows::ffi::OsStringExt; - match try!(data.variant()) { + match tri!(data.variant()) { (OsStringKind::Windows, v) => v .newtype_variant::<Vec<u16>>() .map(|vec| OsString::from_wide(&vec)), @@ -1777,6 +1839,7 @@ impl<'de> Visitor<'de> for OsStringVisitor { } #[cfg(all(feature = "std", any(unix, windows)))] +#[cfg_attr(doc_cfg, doc(cfg(all(feature = "std", any(unix, windows)))))] impl<'de> Deserialize<'de> for OsString { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -1788,40 +1851,32 @@ impl<'de> Deserialize<'de> for OsString { //////////////////////////////////////////////////////////////////////////////// -#[cfg(any(feature = "std", feature = "alloc"))] -forwarded_impl!((T), Box<T>, Box::new); - -#[cfg(any(feature = "std", feature = "alloc"))] -forwarded_impl!((T), Box<[T]>, Vec::into_boxed_slice); +forwarded_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + (T), Box<T>, Box::new +} -#[cfg(any(feature = "std", feature = "alloc"))] -forwarded_impl!((), Box<str>, String::into_boxed_str); +forwarded_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + (T), Box<[T]>, Vec::into_boxed_slice +} -#[cfg(all(no_de_rc_dst, feature = "rc", any(feature = "std", feature = "alloc")))] forwarded_impl! { - /// This impl requires the [`"rc"`] Cargo feature of Serde. - /// - /// Deserializing a data structure containing `Arc` will not attempt to - /// deduplicate `Arc` references to the same data. Every deserialized `Arc` - /// will end up with a strong count of 1. - /// - /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc - (T), Arc<T>, Arc::new + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + (), Box<str>, String::into_boxed_str } -#[cfg(all(no_de_rc_dst, feature = "rc", any(feature = "std", feature = "alloc")))] forwarded_impl! { - /// This impl requires the [`"rc"`] Cargo feature of Serde. - /// - /// Deserializing a data structure containing `Rc` will not attempt to - /// deduplicate `Rc` references to the same data. Every deserialized `Rc` - /// will end up with a strong count of 1. - /// - /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc - (T), Rc<T>, Rc::new + #[cfg(all(feature = "std", any(unix, windows)))] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "std", any(unix, windows)))))] + (), Box<OsStr>, OsString::into_boxed_os_str } #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de, 'a, T: ?Sized> Deserialize<'de> for Cow<'a, T> where T: ToOwned, @@ -1843,6 +1898,10 @@ where /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] +#[cfg_attr( + doc_cfg, + doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))) +)] impl<'de, T: ?Sized> Deserialize<'de> for RcWeak<T> where T: Deserialize<'de>, @@ -1851,7 +1910,7 @@ where where D: Deserializer<'de>, { - try!(Option::<T>::deserialize(deserializer)); + tri!(Option::<T>::deserialize(deserializer)); Ok(RcWeak::new()) } } @@ -1861,6 +1920,10 @@ where /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] +#[cfg_attr( + doc_cfg, + doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))) +)] impl<'de, T: ?Sized> Deserialize<'de> for ArcWeak<T> where T: Deserialize<'de>, @@ -1869,24 +1932,19 @@ where where D: Deserializer<'de>, { - try!(Option::<T>::deserialize(deserializer)); + tri!(Option::<T>::deserialize(deserializer)); Ok(ArcWeak::new()) } } //////////////////////////////////////////////////////////////////////////////// -#[cfg(all( - not(no_de_rc_dst), - feature = "rc", - any(feature = "std", feature = "alloc") -))] macro_rules! box_forwarded_impl { ( - $(#[doc = $doc:tt])* + $(#[$attr:meta])* $t:ident ) => { - $(#[doc = $doc])* + $(#[$attr])* impl<'de, T: ?Sized> Deserialize<'de> for $t<T> where Box<T>: Deserialize<'de>, @@ -1901,11 +1959,6 @@ macro_rules! box_forwarded_impl { }; } -#[cfg(all( - not(no_de_rc_dst), - feature = "rc", - any(feature = "std", feature = "alloc") -))] box_forwarded_impl! { /// This impl requires the [`"rc"`] Cargo feature of Serde. /// @@ -1914,14 +1967,11 @@ box_forwarded_impl! { /// will end up with a strong count of 1. /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc + #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))))] Rc } -#[cfg(all( - not(no_de_rc_dst), - feature = "rc", - any(feature = "std", feature = "alloc") -))] box_forwarded_impl! { /// This impl requires the [`"rc"`] Cargo feature of Serde. /// @@ -1930,6 +1980,8 @@ box_forwarded_impl! { /// will end up with a strong count of 1. /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc + #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))))] Arc } @@ -1947,13 +1999,21 @@ where } } -forwarded_impl!((T), RefCell<T>, RefCell::new); +forwarded_impl! { + (T), RefCell<T>, RefCell::new +} -#[cfg(feature = "std")] -forwarded_impl!((T), Mutex<T>, Mutex::new); +forwarded_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + (T), Mutex<T>, Mutex::new +} -#[cfg(feature = "std")] -forwarded_impl!((T), RwLock<T>, RwLock::new); +forwarded_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + (T), RwLock<T>, RwLock::new +} //////////////////////////////////////////////////////////////////////////////// @@ -1965,7 +2025,6 @@ forwarded_impl!((T), RwLock<T>, RwLock::new); // secs: u64, // nanos: u32, // } -#[cfg(any(feature = "std", not(no_core_duration)))] impl<'de> Deserialize<'de> for Duration { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -2013,7 +2072,7 @@ impl<'de> Deserialize<'de> for Duration { b"secs" => Ok(Field::Secs), b"nanos" => Ok(Field::Nanos), _ => { - let value = ::__private::from_utf8_lossy(value); + let value = crate::__private::from_utf8_lossy(value); Err(Error::unknown_field(&*value, FIELDS)) } } @@ -2048,19 +2107,19 @@ impl<'de> Deserialize<'de> for Duration { where A: SeqAccess<'de>, { - let secs: u64 = match try!(seq.next_element()) { + let secs: u64 = match tri!(seq.next_element()) { Some(value) => value, None => { return Err(Error::invalid_length(0, &self)); } }; - let nanos: u32 = match try!(seq.next_element()) { + let nanos: u32 = match tri!(seq.next_element()) { Some(value) => value, None => { return Err(Error::invalid_length(1, &self)); } }; - try!(check_overflow(secs, nanos)); + tri!(check_overflow(secs, nanos)); Ok(Duration::new(secs, nanos)) } @@ -2070,19 +2129,19 @@ impl<'de> Deserialize<'de> for Duration { { let mut secs: Option<u64> = None; let mut nanos: Option<u32> = None; - while let Some(key) = try!(map.next_key()) { + while let Some(key) = tri!(map.next_key()) { match key { Field::Secs => { if secs.is_some() { return Err(<A::Error as Error>::duplicate_field("secs")); } - secs = Some(try!(map.next_value())); + secs = Some(tri!(map.next_value())); } Field::Nanos => { if nanos.is_some() { return Err(<A::Error as Error>::duplicate_field("nanos")); } - nanos = Some(try!(map.next_value())); + nanos = Some(tri!(map.next_value())); } } } @@ -2094,12 +2153,12 @@ impl<'de> Deserialize<'de> for Duration { Some(nanos) => nanos, None => return Err(<A::Error as Error>::missing_field("nanos")), }; - try!(check_overflow(secs, nanos)); + tri!(check_overflow(secs, nanos)); Ok(Duration::new(secs, nanos)) } } - const FIELDS: &'static [&'static str] = &["secs", "nanos"]; + const FIELDS: &[&str] = &["secs", "nanos"]; deserializer.deserialize_struct("Duration", FIELDS, DurationVisitor) } } @@ -2107,6 +2166,7 @@ impl<'de> Deserialize<'de> for Duration { //////////////////////////////////////////////////////////////////////////////// #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<'de> Deserialize<'de> for SystemTime { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -2186,19 +2246,19 @@ impl<'de> Deserialize<'de> for SystemTime { where A: SeqAccess<'de>, { - let secs: u64 = match try!(seq.next_element()) { + let secs: u64 = match tri!(seq.next_element()) { Some(value) => value, None => { return Err(Error::invalid_length(0, &self)); } }; - let nanos: u32 = match try!(seq.next_element()) { + let nanos: u32 = match tri!(seq.next_element()) { Some(value) => value, None => { return Err(Error::invalid_length(1, &self)); } }; - try!(check_overflow(secs, nanos)); + tri!(check_overflow(secs, nanos)); Ok(Duration::new(secs, nanos)) } @@ -2208,7 +2268,7 @@ impl<'de> Deserialize<'de> for SystemTime { { let mut secs: Option<u64> = None; let mut nanos: Option<u32> = None; - while let Some(key) = try!(map.next_key()) { + while let Some(key) = tri!(map.next_key()) { match key { Field::Secs => { if secs.is_some() { @@ -2216,7 +2276,7 @@ impl<'de> Deserialize<'de> for SystemTime { "secs_since_epoch", )); } - secs = Some(try!(map.next_value())); + secs = Some(tri!(map.next_value())); } Field::Nanos => { if nanos.is_some() { @@ -2224,7 +2284,7 @@ impl<'de> Deserialize<'de> for SystemTime { "nanos_since_epoch", )); } - nanos = Some(try!(map.next_value())); + nanos = Some(tri!(map.next_value())); } } } @@ -2236,13 +2296,13 @@ impl<'de> Deserialize<'de> for SystemTime { Some(nanos) => nanos, None => return Err(<A::Error as Error>::missing_field("nanos_since_epoch")), }; - try!(check_overflow(secs, nanos)); + tri!(check_overflow(secs, nanos)); Ok(Duration::new(secs, nanos)) } } - const FIELDS: &'static [&'static str] = &["secs_since_epoch", "nanos_since_epoch"]; - let duration = try!(deserializer.deserialize_struct("SystemTime", FIELDS, DurationVisitor)); + const FIELDS: &[&str] = &["secs_since_epoch", "nanos_since_epoch"]; + let duration = tri!(deserializer.deserialize_struct("SystemTime", FIELDS, DurationVisitor)); #[cfg(not(no_systemtime_checked_add))] let ret = UNIX_EPOCH .checked_add(duration) @@ -2259,9 +2319,9 @@ impl<'de> Deserialize<'de> for SystemTime { // // #[derive(Deserialize)] // #[serde(deny_unknown_fields)] -// struct Range { -// start: u64, -// end: u32, +// struct Range<Idx> { +// start: Idx, +// end: Idx, // } impl<'de, Idx> Deserialize<'de> for Range<Idx> where @@ -2271,7 +2331,7 @@ where where D: Deserializer<'de>, { - let (start, end) = try!(deserializer.deserialize_struct( + let (start, end) = tri!(deserializer.deserialize_struct( "Range", range::FIELDS, range::RangeVisitor { @@ -2283,7 +2343,6 @@ where } } -#[cfg(not(no_range_inclusive))] impl<'de, Idx> Deserialize<'de> for RangeInclusive<Idx> where Idx: Deserialize<'de>, @@ -2292,7 +2351,7 @@ where where D: Deserializer<'de>, { - let (start, end) = try!(deserializer.deserialize_struct( + let (start, end) = tri!(deserializer.deserialize_struct( "RangeInclusive", range::FIELDS, range::RangeVisitor { @@ -2305,11 +2364,11 @@ where } mod range { - use lib::*; + use crate::lib::*; - use de::{Deserialize, Deserializer, Error, MapAccess, SeqAccess, Visitor}; + use crate::de::{Deserialize, Deserializer, Error, MapAccess, SeqAccess, Visitor}; - pub const FIELDS: &'static [&'static str] = &["start", "end"]; + pub const FIELDS: &[&str] = &["start", "end"]; // If this were outside of the serde crate, it would just use: // @@ -2353,7 +2412,7 @@ mod range { b"start" => Ok(Field::Start), b"end" => Ok(Field::End), _ => { - let value = ::__private::from_utf8_lossy(value); + let value = crate::__private::from_utf8_lossy(value); Err(Error::unknown_field(&*value, FIELDS)) } } @@ -2383,13 +2442,13 @@ mod range { where A: SeqAccess<'de>, { - let start: Idx = match try!(seq.next_element()) { + let start: Idx = match tri!(seq.next_element()) { Some(value) => value, None => { return Err(Error::invalid_length(0, &self)); } }; - let end: Idx = match try!(seq.next_element()) { + let end: Idx = match tri!(seq.next_element()) { Some(value) => value, None => { return Err(Error::invalid_length(1, &self)); @@ -2404,19 +2463,19 @@ mod range { { let mut start: Option<Idx> = None; let mut end: Option<Idx> = None; - while let Some(key) = try!(map.next_key()) { + while let Some(key) = tri!(map.next_key()) { match key { Field::Start => { if start.is_some() { return Err(<A::Error as Error>::duplicate_field("start")); } - start = Some(try!(map.next_value())); + start = Some(tri!(map.next_value())); } Field::End => { if end.is_some() { return Err(<A::Error as Error>::duplicate_field("end")); } - end = Some(try!(map.next_value())); + end = Some(tri!(map.next_value())); } } } @@ -2435,7 +2494,282 @@ mod range { //////////////////////////////////////////////////////////////////////////////// -#[cfg(any(not(no_ops_bound), all(feature = "std", not(no_collections_bound))))] +// Similar to: +// +// #[derive(Deserialize)] +// #[serde(deny_unknown_fields)] +// struct RangeFrom<Idx> { +// start: Idx, +// } +impl<'de, Idx> Deserialize<'de> for RangeFrom<Idx> +where + Idx: Deserialize<'de>, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + let start = tri!(deserializer.deserialize_struct( + "RangeFrom", + range_from::FIELDS, + range_from::RangeFromVisitor { + expecting: "struct RangeFrom", + phantom: PhantomData, + }, + )); + Ok(start..) + } +} + +mod range_from { + use crate::lib::*; + + use crate::de::{Deserialize, Deserializer, Error, MapAccess, SeqAccess, Visitor}; + + pub const FIELDS: &[&str] = &["start"]; + + // If this were outside of the serde crate, it would just use: + // + // #[derive(Deserialize)] + // #[serde(field_identifier, rename_all = "lowercase")] + enum Field { + Start, + } + + impl<'de> Deserialize<'de> for Field { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + struct FieldVisitor; + + impl<'de> Visitor<'de> for FieldVisitor { + type Value = Field; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("`start`") + } + + fn visit_str<E>(self, value: &str) -> Result<Self::Value, E> + where + E: Error, + { + match value { + "start" => Ok(Field::Start), + _ => Err(Error::unknown_field(value, FIELDS)), + } + } + + fn visit_bytes<E>(self, value: &[u8]) -> Result<Self::Value, E> + where + E: Error, + { + match value { + b"start" => Ok(Field::Start), + _ => { + let value = crate::__private::from_utf8_lossy(value); + Err(Error::unknown_field(&*value, FIELDS)) + } + } + } + } + + deserializer.deserialize_identifier(FieldVisitor) + } + } + + pub struct RangeFromVisitor<Idx> { + pub expecting: &'static str, + pub phantom: PhantomData<Idx>, + } + + impl<'de, Idx> Visitor<'de> for RangeFromVisitor<Idx> + where + Idx: Deserialize<'de>, + { + type Value = Idx; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str(self.expecting) + } + + fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> + where + A: SeqAccess<'de>, + { + let start: Idx = match tri!(seq.next_element()) { + Some(value) => value, + None => { + return Err(Error::invalid_length(0, &self)); + } + }; + Ok(start) + } + + fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error> + where + A: MapAccess<'de>, + { + let mut start: Option<Idx> = None; + while let Some(key) = tri!(map.next_key()) { + match key { + Field::Start => { + if start.is_some() { + return Err(<A::Error as Error>::duplicate_field("start")); + } + start = Some(tri!(map.next_value())); + } + } + } + let start = match start { + Some(start) => start, + None => return Err(<A::Error as Error>::missing_field("start")), + }; + Ok(start) + } + } +} + +//////////////////////////////////////////////////////////////////////////////// + +// Similar to: +// +// #[derive(Deserialize)] +// #[serde(deny_unknown_fields)] +// struct RangeTo<Idx> { +// end: Idx, +// } +impl<'de, Idx> Deserialize<'de> for RangeTo<Idx> +where + Idx: Deserialize<'de>, +{ + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + let end = tri!(deserializer.deserialize_struct( + "RangeTo", + range_to::FIELDS, + range_to::RangeToVisitor { + expecting: "struct RangeTo", + phantom: PhantomData, + }, + )); + Ok(..end) + } +} + +mod range_to { + use crate::lib::*; + + use crate::de::{Deserialize, Deserializer, Error, MapAccess, SeqAccess, Visitor}; + + pub const FIELDS: &[&str] = &["end"]; + + // If this were outside of the serde crate, it would just use: + // + // #[derive(Deserialize)] + // #[serde(field_identifier, rename_all = "lowercase")] + enum Field { + End, + } + + impl<'de> Deserialize<'de> for Field { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + struct FieldVisitor; + + impl<'de> Visitor<'de> for FieldVisitor { + type Value = Field; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("`end`") + } + + fn visit_str<E>(self, value: &str) -> Result<Self::Value, E> + where + E: Error, + { + match value { + "end" => Ok(Field::End), + _ => Err(Error::unknown_field(value, FIELDS)), + } + } + + fn visit_bytes<E>(self, value: &[u8]) -> Result<Self::Value, E> + where + E: Error, + { + match value { + b"end" => Ok(Field::End), + _ => { + let value = crate::__private::from_utf8_lossy(value); + Err(Error::unknown_field(&*value, FIELDS)) + } + } + } + } + + deserializer.deserialize_identifier(FieldVisitor) + } + } + + pub struct RangeToVisitor<Idx> { + pub expecting: &'static str, + pub phantom: PhantomData<Idx>, + } + + impl<'de, Idx> Visitor<'de> for RangeToVisitor<Idx> + where + Idx: Deserialize<'de>, + { + type Value = Idx; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str(self.expecting) + } + + fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> + where + A: SeqAccess<'de>, + { + let end: Idx = match tri!(seq.next_element()) { + Some(value) => value, + None => { + return Err(Error::invalid_length(0, &self)); + } + }; + Ok(end) + } + + fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error> + where + A: MapAccess<'de>, + { + let mut end: Option<Idx> = None; + while let Some(key) = tri!(map.next_key()) { + match key { + Field::End => { + if end.is_some() { + return Err(<A::Error as Error>::duplicate_field("end")); + } + end = Some(tri!(map.next_value())); + } + } + } + let end = match end { + Some(end) => end, + None => return Err(<A::Error as Error>::missing_field("end")), + }; + Ok(end) + } + } +} + +//////////////////////////////////////////////////////////////////////////////// + impl<'de, T> Deserialize<'de> for Bound<T> where T: Deserialize<'de>, @@ -2527,7 +2861,7 @@ where where A: EnumAccess<'de>, { - match try!(data.variant()) { + match tri!(data.variant()) { (Field::Unbounded, v) => v.unit_variant().map(|()| Bound::Unbounded), (Field::Included, v) => v.newtype_variant().map(Bound::Included), (Field::Excluded, v) => v.newtype_variant().map(Bound::Excluded), @@ -2535,7 +2869,7 @@ where } } - const VARIANTS: &'static [&'static str] = &["Unbounded", "Included", "Excluded"]; + const VARIANTS: &[&str] = &["Unbounded", "Included", "Excluded"]; deserializer.deserialize_enum("Bound", VARIANTS, BoundVisitor(PhantomData)) } @@ -2636,14 +2970,14 @@ where where A: EnumAccess<'de>, { - match try!(data.variant()) { + match tri!(data.variant()) { (Field::Ok, v) => v.newtype_variant().map(Ok), (Field::Err, v) => v.newtype_variant().map(Err), } } } - const VARIANTS: &'static [&'static str] = &["Ok", "Err"]; + const VARIANTS: &[&str] = &["Ok", "Err"]; deserializer.deserialize_enum("Result", VARIANTS, ResultVisitor(PhantomData)) } @@ -2668,6 +3002,7 @@ macro_rules! atomic_impl { ($($ty:ident $size:expr)*) => { $( #[cfg(any(no_target_has_atomic, target_has_atomic = $size))] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "std", target_has_atomic = $size))))] impl<'de> Deserialize<'de> for $ty { fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where @@ -2709,7 +3044,7 @@ struct FromStrVisitor<T> { impl<T> FromStrVisitor<T> { fn new(expecting: &'static str) -> Self { FromStrVisitor { - expecting: expecting, + expecting, ty: PhantomData, } } diff --git a/vendor/serde/src/de/mod.rs b/vendor/serde/src/de/mod.rs index ca29ec6..1924fe3 100644 --- a/vendor/serde/src/de/mod.rs +++ b/vendor/serde/src/de/mod.rs @@ -64,8 +64,8 @@ //! - RefCell\<T\> //! - Mutex\<T\> //! - RwLock\<T\> -//! - Rc\<T\> *(if* features = ["rc"] *is enabled)* -//! - Arc\<T\> *(if* features = ["rc"] *is enabled)* +//! - Rc\<T\> *(if* features = \["rc"\] *is enabled)* +//! - Arc\<T\> *(if* features = \["rc"\] *is enabled)* //! - **Collection types**: //! - BTreeMap\<K, V\> //! - BTreeSet\<T\> @@ -112,26 +112,28 @@ //! [derive section of the manual]: https://serde.rs/derive.html //! [data formats]: https://serde.rs/#data-formats -use lib::*; +use crate::lib::*; //////////////////////////////////////////////////////////////////////////////// pub mod value; -#[cfg(not(no_integer128))] mod format; mod ignored_any; mod impls; -mod utf8; +pub(crate) mod size_hint; pub use self::ignored_any::IgnoredAny; +#[cfg(not(any(feature = "std", feature = "unstable")))] +#[doc(no_inline)] +pub use crate::std_error::Error as StdError; +#[cfg(all(feature = "unstable", not(feature = "std")))] +#[doc(no_inline)] +pub use core::error::Error as StdError; #[cfg(feature = "std")] #[doc(no_inline)] pub use std::error::Error as StdError; -#[cfg(not(feature = "std"))] -#[doc(no_inline)] -pub use std_error::Error as StdError; //////////////////////////////////////////////////////////////////////////////// @@ -162,7 +164,7 @@ macro_rules! declare_error_trait { /// /// The message should not be capitalized and should not end with a period. /// - /// ```edition2018 + /// ```edition2021 /// # use std::str::FromStr; /// # /// # struct IpAddr; @@ -307,7 +309,7 @@ declare_error_trait!(Error: Sized + Debug + Display); /// This is used as an argument to the `invalid_type`, `invalid_value`, and /// `invalid_length` methods of the `Error` trait to build error messages. /// -/// ```edition2018 +/// ```edition2021 /// # use std::fmt; /// # /// # use serde::de::{self, Unexpected, Visitor}; @@ -400,20 +402,20 @@ impl<'a> fmt::Display for Unexpected<'a> { Bool(b) => write!(formatter, "boolean `{}`", b), Unsigned(i) => write!(formatter, "integer `{}`", i), Signed(i) => write!(formatter, "integer `{}`", i), - Float(f) => write!(formatter, "floating point `{}`", f), + Float(f) => write!(formatter, "floating point `{}`", WithDecimalPoint(f)), Char(c) => write!(formatter, "character `{}`", c), Str(s) => write!(formatter, "string {:?}", s), - Bytes(_) => write!(formatter, "byte array"), - Unit => write!(formatter, "unit value"), - Option => write!(formatter, "Option value"), - NewtypeStruct => write!(formatter, "newtype struct"), - Seq => write!(formatter, "sequence"), - Map => write!(formatter, "map"), - Enum => write!(formatter, "enum"), - UnitVariant => write!(formatter, "unit variant"), - NewtypeVariant => write!(formatter, "newtype variant"), - TupleVariant => write!(formatter, "tuple variant"), - StructVariant => write!(formatter, "struct variant"), + Bytes(_) => formatter.write_str("byte array"), + Unit => formatter.write_str("unit value"), + Option => formatter.write_str("Option value"), + NewtypeStruct => formatter.write_str("newtype struct"), + Seq => formatter.write_str("sequence"), + Map => formatter.write_str("map"), + Enum => formatter.write_str("enum"), + UnitVariant => formatter.write_str("unit variant"), + NewtypeVariant => formatter.write_str("newtype variant"), + TupleVariant => formatter.write_str("tuple variant"), + StructVariant => formatter.write_str("struct variant"), Other(other) => formatter.write_str(other), } } @@ -432,10 +434,9 @@ impl<'a> fmt::Display for Unexpected<'a> { /// Within the context of a `Visitor` implementation, the `Visitor` itself /// (`&self`) is an implementation of this trait. /// -/// ```edition2018 -/// # use std::fmt; -/// # +/// ```edition2021 /// # use serde::de::{self, Unexpected, Visitor}; +/// # use std::fmt; /// # /// # struct Example; /// # @@ -457,7 +458,7 @@ impl<'a> fmt::Display for Unexpected<'a> { /// /// Outside of a `Visitor`, `&"..."` can be used. /// -/// ```edition2018 +/// ```edition2021 /// # use serde::de::{self, Unexpected}; /// # /// # fn example<E>() -> Result<(), E> @@ -465,7 +466,10 @@ impl<'a> fmt::Display for Unexpected<'a> { /// # E: de::Error, /// # { /// # let v = true; -/// return Err(de::Error::invalid_type(Unexpected::Bool(v), &"a negative integer")); +/// return Err(de::Error::invalid_type( +/// Unexpected::Bool(v), +/// &"a negative integer", +/// )); /// # } /// ``` pub trait Expected { @@ -564,7 +568,7 @@ pub trait Deserialize<'de>: Sized { D: Deserializer<'de>, { // Default implementation just delegates to `deserialize` impl. - *place = try!(Deserialize::deserialize(deserializer)); + *place = tri!(Deserialize::deserialize(deserializer)); Ok(()) } } @@ -577,7 +581,7 @@ pub trait Deserialize<'de>: Sized { /// from the input string, but a `from_reader` function may only deserialize /// owned data. /// -/// ```edition2018 +/// ```edition2021 /// # use serde::de::{Deserialize, DeserializeOwned}; /// # use std::io::{Read, Result}; /// # @@ -616,7 +620,7 @@ impl<T> DeserializeOwned for T where T: for<'de> Deserialize<'de> {} /// /// The canonical API for stateless deserialization looks like this: /// -/// ```edition2018 +/// ```edition2021 /// # use serde::Deserialize; /// # /// # enum Error {} @@ -630,7 +634,7 @@ impl<T> DeserializeOwned for T where T: for<'de> Deserialize<'de> {} /// Adjusting an API like this to support stateful deserialization is a matter /// of accepting a seed as input: /// -/// ```edition2018 +/// ```edition2021 /// # use serde::de::DeserializeSeed; /// # /// # enum Error {} @@ -663,12 +667,11 @@ impl<T> DeserializeOwned for T where T: for<'de> Deserialize<'de> {} /// into it. This requires stateful deserialization using the `DeserializeSeed` /// trait. /// -/// ```edition2018 +/// ```edition2021 +/// use serde::de::{Deserialize, DeserializeSeed, Deserializer, SeqAccess, Visitor}; /// use std::fmt; /// use std::marker::PhantomData; /// -/// use serde::de::{Deserialize, DeserializeSeed, Deserializer, SeqAccess, Visitor}; -/// /// // A DeserializeSeed implementation that uses stateful deserialization to /// // append array elements onto the end of an existing vector. The preexisting /// // state ("seed") in this case is the Vec<T>. The `deserialize` method of @@ -709,7 +712,7 @@ impl<T> DeserializeOwned for T where T: for<'de> Deserialize<'de> {} /// { /// // Decrease the number of reallocations if there are many elements /// if let Some(size_hint) = seq.size_hint() { -/// self.0.reserve(size_hint); +/// self.0.reserve(size_hint); /// } /// /// // Visit each element in the inner array and push it onto @@ -945,18 +948,15 @@ pub trait Deserializer<'de>: Sized { where V: Visitor<'de>; - serde_if_integer128! { - /// Hint that the `Deserialize` type is expecting an `i128` value. - /// - /// This method is available only on Rust compiler versions >=1.26. The - /// default behavior unconditionally returns an error. - fn deserialize_i128<V>(self, visitor: V) -> Result<V::Value, Self::Error> - where - V: Visitor<'de> - { - let _ = visitor; - Err(Error::custom("i128 is not supported")) - } + /// Hint that the `Deserialize` type is expecting an `i128` value. + /// + /// The default behavior unconditionally returns an error. + fn deserialize_i128<V>(self, visitor: V) -> Result<V::Value, Self::Error> + where + V: Visitor<'de>, + { + let _ = visitor; + Err(Error::custom("i128 is not supported")) } /// Hint that the `Deserialize` type is expecting a `u8` value. @@ -979,18 +979,15 @@ pub trait Deserializer<'de>: Sized { where V: Visitor<'de>; - serde_if_integer128! { - /// Hint that the `Deserialize` type is expecting an `u128` value. - /// - /// This method is available only on Rust compiler versions >=1.26. The - /// default behavior unconditionally returns an error. - fn deserialize_u128<V>(self, visitor: V) -> Result<V::Value, Self::Error> - where - V: Visitor<'de> - { - let _ = visitor; - Err(Error::custom("u128 is not supported")) - } + /// Hint that the `Deserialize` type is expecting an `u128` value. + /// + /// The default behavior unconditionally returns an error. + fn deserialize_u128<V>(self, visitor: V) -> Result<V::Value, Self::Error> + where + V: Visitor<'de>, + { + let _ = visitor; + Err(Error::custom("u128 is not supported")) } /// Hint that the `Deserialize` type is expecting a `f32` value. @@ -1158,7 +1155,7 @@ pub trait Deserializer<'de>: Sized { /// human-readable one and binary formats like Postcard will prefer the /// compact one. /// - /// ```edition2018 + /// ```edition2021 /// # use std::ops::Add; /// # use std::str::FromStr; /// # @@ -1225,11 +1222,11 @@ pub trait Deserializer<'de>: Sized { #[doc(hidden)] fn __deserialize_content<V>( self, - _: ::actually_private::T, + _: crate::actually_private::T, visitor: V, - ) -> Result<::private::de::Content<'de>, Self::Error> + ) -> Result<crate::__private::de::Content<'de>, Self::Error> where - V: Visitor<'de, Value = ::private::de::Content<'de>>, + V: Visitor<'de, Value = crate::__private::de::Content<'de>>, { self.deserialize_any(visitor) } @@ -1249,10 +1246,9 @@ pub trait Deserializer<'de>: Sized { /// /// # Example /// -/// ```edition2018 -/// # use std::fmt; -/// # +/// ```edition2021 /// # use serde::de::{self, Unexpected, Visitor}; +/// # use std::fmt; /// # /// /// A visitor that deserializes a long string - a string containing at least /// /// some minimum number of bytes. @@ -1290,7 +1286,7 @@ pub trait Visitor<'de>: Sized { /// "an integer between 0 and 64". The message should not be capitalized and /// should not end with a period. /// - /// ```edition2018 + /// ```edition2021 /// # use std::fmt; /// # /// # struct S { @@ -1363,20 +1359,20 @@ pub trait Visitor<'de>: Sized { Err(Error::invalid_type(Unexpected::Signed(v), &self)) } - serde_if_integer128! { - /// The input contains a `i128`. - /// - /// This method is available only on Rust compiler versions >=1.26. The - /// default implementation fails with a type error. - fn visit_i128<E>(self, v: i128) -> Result<Self::Value, E> - where - E: Error, - { - let mut buf = [0u8; 58]; - let mut writer = format::Buf::new(&mut buf); - fmt::Write::write_fmt(&mut writer, format_args!("integer `{}` as i128", v)).unwrap(); - Err(Error::invalid_type(Unexpected::Other(writer.as_str()), &self)) - } + /// The input contains a `i128`. + /// + /// The default implementation fails with a type error. + fn visit_i128<E>(self, v: i128) -> Result<Self::Value, E> + where + E: Error, + { + let mut buf = [0u8; 58]; + let mut writer = format::Buf::new(&mut buf); + fmt::Write::write_fmt(&mut writer, format_args!("integer `{}` as i128", v)).unwrap(); + Err(Error::invalid_type( + Unexpected::Other(writer.as_str()), + &self, + )) } /// The input contains a `u8`. @@ -1425,20 +1421,20 @@ pub trait Visitor<'de>: Sized { Err(Error::invalid_type(Unexpected::Unsigned(v), &self)) } - serde_if_integer128! { - /// The input contains a `u128`. - /// - /// This method is available only on Rust compiler versions >=1.26. The - /// default implementation fails with a type error. - fn visit_u128<E>(self, v: u128) -> Result<Self::Value, E> - where - E: Error, - { - let mut buf = [0u8; 57]; - let mut writer = format::Buf::new(&mut buf); - fmt::Write::write_fmt(&mut writer, format_args!("integer `{}` as u128", v)).unwrap(); - Err(Error::invalid_type(Unexpected::Other(writer.as_str()), &self)) - } + /// The input contains a `u128`. + /// + /// The default implementation fails with a type error. + fn visit_u128<E>(self, v: u128) -> Result<Self::Value, E> + where + E: Error, + { + let mut buf = [0u8; 57]; + let mut writer = format::Buf::new(&mut buf); + fmt::Write::write_fmt(&mut writer, format_args!("integer `{}` as u128", v)).unwrap(); + Err(Error::invalid_type( + Unexpected::Other(writer.as_str()), + &self, + )) } /// The input contains an `f32`. @@ -1474,7 +1470,7 @@ pub trait Visitor<'de>: Sized { where E: Error, { - self.visit_str(utf8::encode(v).as_str()) + self.visit_str(v.encode_utf8(&mut [0u8; 4])) } /// The input contains a string. The lifetime of the string is ephemeral and @@ -1529,6 +1525,7 @@ pub trait Visitor<'de>: Sized { /// `String`. #[inline] #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] fn visit_string<E>(self, v: String) -> Result<Self::Value, E> where E: Error, @@ -1551,7 +1548,6 @@ pub trait Visitor<'de>: Sized { where E: Error, { - let _ = v; Err(Error::invalid_type(Unexpected::Bytes(v), &self)) } @@ -1588,6 +1584,7 @@ pub trait Visitor<'de>: Sized { /// The default implementation forwards to `visit_bytes` and then drops the /// `Vec<u8>`. #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E> where E: Error, @@ -1831,9 +1828,9 @@ pub trait MapAccess<'de> { K: DeserializeSeed<'de>, V: DeserializeSeed<'de>, { - match try!(self.next_key_seed(kseed)) { + match tri!(self.next_key_seed(kseed)) { Some(key) => { - let value = try!(self.next_value_seed(vseed)); + let value = tri!(self.next_value_seed(vseed)); Ok(Some((key, value))) } None => Ok(None), @@ -2035,7 +2032,7 @@ pub trait VariantAccess<'de>: Sized { /// If the data contains a different type of variant, the following /// `invalid_type` error should be constructed: /// - /// ```edition2018 + /// ```edition2021 /// # use serde::de::{self, value, DeserializeSeed, Visitor, VariantAccess, Unexpected}; /// # /// # struct X; @@ -2075,7 +2072,7 @@ pub trait VariantAccess<'de>: Sized { /// If the data contains a different type of variant, the following /// `invalid_type` error should be constructed: /// - /// ```edition2018 + /// ```edition2021 /// # use serde::de::{self, value, DeserializeSeed, Visitor, VariantAccess, Unexpected}; /// # /// # struct X; @@ -2131,7 +2128,7 @@ pub trait VariantAccess<'de>: Sized { /// If the data contains a different type of variant, the following /// `invalid_type` error should be constructed: /// - /// ```edition2018 + /// ```edition2021 /// # use serde::de::{self, value, DeserializeSeed, Visitor, VariantAccess, Unexpected}; /// # /// # struct X; @@ -2148,11 +2145,7 @@ pub trait VariantAccess<'de>: Sized { /// # T: DeserializeSeed<'de>, /// # { unimplemented!() } /// # - /// fn tuple_variant<V>( - /// self, - /// _len: usize, - /// _visitor: V, - /// ) -> Result<V::Value, Self::Error> + /// fn tuple_variant<V>(self, _len: usize, _visitor: V) -> Result<V::Value, Self::Error> /// where /// V: Visitor<'de>, /// { @@ -2178,7 +2171,7 @@ pub trait VariantAccess<'de>: Sized { /// If the data contains a different type of variant, the following /// `invalid_type` error should be constructed: /// - /// ```edition2018 + /// ```edition2021 /// # use serde::de::{self, value, DeserializeSeed, Visitor, VariantAccess, Unexpected}; /// # /// # struct X; @@ -2238,10 +2231,10 @@ pub trait VariantAccess<'de>: Sized { /// /// # Example /// -/// ```edition2018 +/// ```edition2021 +/// use serde::de::{value, Deserialize, IntoDeserializer}; +/// use serde_derive::Deserialize; /// use std::str::FromStr; -/// use serde::Deserialize; -/// use serde::de::{value, IntoDeserializer}; /// /// #[derive(Deserialize)] /// enum Setting { @@ -2285,15 +2278,48 @@ impl Display for OneOf { 1 => write!(formatter, "`{}`", self.names[0]), 2 => write!(formatter, "`{}` or `{}`", self.names[0], self.names[1]), _ => { - try!(write!(formatter, "one of ")); + tri!(formatter.write_str("one of ")); for (i, alt) in self.names.iter().enumerate() { if i > 0 { - try!(write!(formatter, ", ")); + tri!(formatter.write_str(", ")); } - try!(write!(formatter, "`{}`", alt)); + tri!(write!(formatter, "`{}`", alt)); } Ok(()) } } } } + +struct WithDecimalPoint(f64); + +impl Display for WithDecimalPoint { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + struct LookForDecimalPoint<'f, 'a> { + formatter: &'f mut fmt::Formatter<'a>, + has_decimal_point: bool, + } + + impl<'f, 'a> fmt::Write for LookForDecimalPoint<'f, 'a> { + fn write_str(&mut self, fragment: &str) -> fmt::Result { + self.has_decimal_point |= fragment.contains('.'); + self.formatter.write_str(fragment) + } + + fn write_char(&mut self, ch: char) -> fmt::Result { + self.has_decimal_point |= ch == '.'; + self.formatter.write_char(ch) + } + } + + let mut writer = LookForDecimalPoint { + formatter, + has_decimal_point: false, + }; + tri!(write!(writer, "{}", self.0)); + if !writer.has_decimal_point { + tri!(formatter.write_str(".0")); + } + Ok(()) + } +} diff --git a/vendor/serde/src/de/seed.rs b/vendor/serde/src/de/seed.rs index 13b7ea4..52fb89d 100644 --- a/vendor/serde/src/de/seed.rs +++ b/vendor/serde/src/de/seed.rs @@ -1,4 +1,4 @@ -use de::{Deserialize, DeserializeSeed, Deserializer}; +use crate::de::{Deserialize, DeserializeSeed, Deserializer}; /// A DeserializeSeed helper for implementing deserialize_in_place Visitors. /// diff --git a/vendor/serde/src/private/size_hint.rs b/vendor/serde/src/de/size_hint.rs similarity index 50% rename from vendor/serde/src/private/size_hint.rs rename to vendor/serde/src/de/size_hint.rs index ca71e61..4a4fe25 100644 --- a/vendor/serde/src/private/size_hint.rs +++ b/vendor/serde/src/de/size_hint.rs @@ -1,4 +1,4 @@ -use lib::*; +use crate::lib::*; pub fn from_bounds<I>(iter: &I) -> Option<usize> where @@ -8,9 +8,17 @@ where } #[cfg(any(feature = "std", feature = "alloc"))] -#[inline] -pub fn cautious(hint: Option<usize>) -> usize { - cmp::min(hint.unwrap_or(0), 4096) +pub fn cautious<Element>(hint: Option<usize>) -> usize { + const MAX_PREALLOC_BYTES: usize = 1024 * 1024; + + if mem::size_of::<Element>() == 0 { + 0 + } else { + cmp::min( + hint.unwrap_or(0), + MAX_PREALLOC_BYTES / mem::size_of::<Element>(), + ) + } } fn helper(bounds: (usize, Option<usize>)) -> Option<usize> { diff --git a/vendor/serde/src/de/utf8.rs b/vendor/serde/src/de/utf8.rs deleted file mode 100644 index 576fd03..0000000 --- a/vendor/serde/src/de/utf8.rs +++ /dev/null @@ -1,46 +0,0 @@ -use lib::*; - -const TAG_CONT: u8 = 0b1000_0000; -const TAG_TWO_B: u8 = 0b1100_0000; -const TAG_THREE_B: u8 = 0b1110_0000; -const TAG_FOUR_B: u8 = 0b1111_0000; -const MAX_ONE_B: u32 = 0x80; -const MAX_TWO_B: u32 = 0x800; -const MAX_THREE_B: u32 = 0x10000; - -#[inline] -pub fn encode(c: char) -> Encode { - let code = c as u32; - let mut buf = [0; 4]; - let pos = if code < MAX_ONE_B { - buf[3] = code as u8; - 3 - } else if code < MAX_TWO_B { - buf[2] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; - buf[3] = (code & 0x3F) as u8 | TAG_CONT; - 2 - } else if code < MAX_THREE_B { - buf[1] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; - buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; - buf[3] = (code & 0x3F) as u8 | TAG_CONT; - 1 - } else { - buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; - buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; - buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; - buf[3] = (code & 0x3F) as u8 | TAG_CONT; - 0 - }; - Encode { buf: buf, pos: pos } -} - -pub struct Encode { - buf: [u8; 4], - pos: usize, -} - -impl Encode { - pub fn as_str(&self) -> &str { - str::from_utf8(&self.buf[self.pos..]).unwrap() - } -} diff --git a/vendor/serde/src/de/value.rs b/vendor/serde/src/de/value.rs index 5d88862..3bc0c71 100644 --- a/vendor/serde/src/de/value.rs +++ b/vendor/serde/src/de/value.rs @@ -1,10 +1,10 @@ //! Building blocks for deserializing basic values using the `IntoDeserializer` //! trait. //! -//! ```edition2018 +//! ```edition2021 +//! use serde::de::{value, Deserialize, IntoDeserializer}; +//! use serde_derive::Deserialize; //! use std::str::FromStr; -//! use serde::Deserialize; -//! use serde::de::{value, IntoDeserializer}; //! //! #[derive(Deserialize)] //! enum Setting { @@ -21,12 +21,11 @@ //! } //! ``` -use lib::*; +use crate::lib::*; use self::private::{First, Second}; -use __private::size_hint; -use de::{self, Deserializer, Expected, IntoDeserializer, SeqAccess, Visitor}; -use ser; +use crate::de::{self, size_hint, Deserializer, Expected, IntoDeserializer, SeqAccess, Visitor}; +use crate::ser; //////////////////////////////////////////////////////////////////////////////// @@ -113,6 +112,7 @@ impl Debug for Error { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl error::Error for Error { fn description(&self) -> &str { &self.err @@ -185,12 +185,14 @@ impl<E> Debug for UnitDeserializer<E> { /// A deserializer that cannot be instantiated. #[cfg(feature = "unstable")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "unstable")))] pub struct NeverDeserializer<E> { never: !, marker: PhantomData<E>, } #[cfg(feature = "unstable")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "unstable")))] impl<'de, E> IntoDeserializer<'de, E> for ! where E: de::Error, @@ -251,7 +253,7 @@ macro_rules! primitive_deserializer { #[allow(missing_docs)] pub fn new(value: $ty) -> Self { $name { - value: value, + value, marker: PhantomData, } } @@ -293,20 +295,17 @@ primitive_deserializer!(i8, "an `i8`.", I8Deserializer, visit_i8); primitive_deserializer!(i16, "an `i16`.", I16Deserializer, visit_i16); primitive_deserializer!(i32, "an `i32`.", I32Deserializer, visit_i32); primitive_deserializer!(i64, "an `i64`.", I64Deserializer, visit_i64); +primitive_deserializer!(i128, "an `i128`.", I128Deserializer, visit_i128); primitive_deserializer!(isize, "an `isize`.", IsizeDeserializer, visit_i64 as i64); primitive_deserializer!(u8, "a `u8`.", U8Deserializer, visit_u8); primitive_deserializer!(u16, "a `u16`.", U16Deserializer, visit_u16); primitive_deserializer!(u64, "a `u64`.", U64Deserializer, visit_u64); +primitive_deserializer!(u128, "a `u128`.", U128Deserializer, visit_u128); primitive_deserializer!(usize, "a `usize`.", UsizeDeserializer, visit_u64 as u64); primitive_deserializer!(f32, "an `f32`.", F32Deserializer, visit_f32); primitive_deserializer!(f64, "an `f64`.", F64Deserializer, visit_f64); primitive_deserializer!(char, "a `char`.", CharDeserializer, visit_char); -serde_if_integer128! { - primitive_deserializer!(i128, "an `i128`.", I128Deserializer, visit_i128); - primitive_deserializer!(u128, "a `u128`.", U128Deserializer, visit_u128); -} - /// A deserializer holding a `u32`. pub struct U32Deserializer<E> { value: u32, @@ -330,7 +329,7 @@ impl<E> U32Deserializer<E> { #[allow(missing_docs)] pub fn new(value: u32) -> Self { U32Deserializer { - value: value, + value, marker: PhantomData, } } @@ -419,7 +418,7 @@ impl<'a, E> StrDeserializer<'a, E> { #[allow(missing_docs)] pub fn new(value: &'a str) -> Self { StrDeserializer { - value: value, + value, marker: PhantomData, } } @@ -498,7 +497,7 @@ impl<'de, E> BorrowedStrDeserializer<'de, E> { /// Create a new borrowed deserializer from the given string. pub fn new(value: &'de str) -> BorrowedStrDeserializer<'de, E> { BorrowedStrDeserializer { - value: value, + value, marker: PhantomData, } } @@ -566,6 +565,7 @@ impl<'de, E> Debug for BorrowedStrDeserializer<'de, E> { /// A deserializer holding a `String`. #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] pub struct StringDeserializer<E> { value: String, marker: PhantomData<E>, @@ -582,6 +582,7 @@ impl<E> Clone for StringDeserializer<E> { } #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de, E> IntoDeserializer<'de, E> for String where E: de::Error, @@ -598,7 +599,7 @@ impl<E> StringDeserializer<E> { #[allow(missing_docs)] pub fn new(value: String) -> Self { StringDeserializer { - value: value, + value, marker: PhantomData, } } @@ -669,6 +670,7 @@ impl<E> Debug for StringDeserializer<E> { /// A deserializer holding a `Cow<str>`. #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] pub struct CowStrDeserializer<'a, E> { value: Cow<'a, str>, marker: PhantomData<E>, @@ -685,6 +687,7 @@ impl<'a, E> Clone for CowStrDeserializer<'a, E> { } #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de, 'a, E> IntoDeserializer<'de, E> for Cow<'a, str> where E: de::Error, @@ -701,7 +704,7 @@ impl<'a, E> CowStrDeserializer<'a, E> { #[allow(missing_docs)] pub fn new(value: Cow<'a, str>) -> Self { CowStrDeserializer { - value: value, + value, marker: PhantomData, } } @@ -783,7 +786,7 @@ impl<'a, E> BytesDeserializer<'a, E> { /// Create a new deserializer from the given bytes. pub fn new(value: &'a [u8]) -> Self { BytesDeserializer { - value: value, + value, marker: PhantomData, } } @@ -842,7 +845,7 @@ impl<'de, E> BorrowedBytesDeserializer<'de, E> { /// Create a new borrowed deserializer from the given borrowed bytes. pub fn new(value: &'de [u8]) -> Self { BorrowedBytesDeserializer { - value: value, + value, marker: PhantomData, } } @@ -937,8 +940,8 @@ where where V: de::Visitor<'de>, { - let v = try!(visitor.visit_seq(&mut self)); - try!(self.end()); + let v = tri!(visitor.visit_seq(&mut self)); + tri!(self.end()); Ok(v) } @@ -980,7 +983,7 @@ struct ExpectedInSeq(usize); impl Expected for ExpectedInSeq { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { if self.0 == 1 { - write!(formatter, "1 element in sequence") + formatter.write_str("1 element in sequence") } else { write!(formatter, "{} elements in sequence", self.0) } @@ -1003,6 +1006,7 @@ where //////////////////////////////////////////////////////////////////////////////// #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de, T, E> IntoDeserializer<'de, E> for Vec<T> where T: IntoDeserializer<'de, E>, @@ -1016,6 +1020,7 @@ where } #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de, T, E> IntoDeserializer<'de, E> for BTreeSet<T> where T: IntoDeserializer<'de, E> + Eq + Ord, @@ -1029,6 +1034,7 @@ where } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<'de, T, S, E> IntoDeserializer<'de, E> for HashSet<T, S> where T: IntoDeserializer<'de, E> + Eq + Hash, @@ -1053,7 +1059,7 @@ pub struct SeqAccessDeserializer<A> { impl<A> SeqAccessDeserializer<A> { /// Construct a new `SeqAccessDeserializer<A>`. pub fn new(seq: A) -> Self { - SeqAccessDeserializer { seq: seq } + SeqAccessDeserializer { seq } } } @@ -1162,8 +1168,8 @@ where where V: de::Visitor<'de>, { - let value = try!(visitor.visit_map(&mut self)); - try!(self.end()); + let value = tri!(visitor.visit_map(&mut self)); + tri!(self.end()); Ok(value) } @@ -1171,8 +1177,8 @@ where where V: de::Visitor<'de>, { - let value = try!(visitor.visit_seq(&mut self)); - try!(self.end()); + let value = tri!(visitor.visit_seq(&mut self)); + tri!(self.end()); Ok(value) } @@ -1236,8 +1242,8 @@ where { match self.next_pair() { Some((key, value)) => { - let key = try!(kseed.deserialize(key.into_deserializer())); - let value = try!(vseed.deserialize(value.into_deserializer())); + let key = tri!(kseed.deserialize(key.into_deserializer())); + let value = tri!(vseed.deserialize(value.into_deserializer())); Ok(Some((key, value))) } None => Ok(None), @@ -1341,7 +1347,7 @@ where V: de::Visitor<'de>, { let mut pair_visitor = PairVisitor(Some(self.0), Some(self.1), PhantomData); - let pair = try!(visitor.visit_seq(&mut pair_visitor)); + let pair = tri!(visitor.visit_seq(&mut pair_visitor)); if pair_visitor.1.is_none() { Ok(pair) } else { @@ -1405,7 +1411,7 @@ struct ExpectedInMap(usize); impl Expected for ExpectedInMap { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { if self.0 == 1 { - write!(formatter, "1 element in map") + formatter.write_str("1 element in map") } else { write!(formatter, "{} elements in map", self.0) } @@ -1415,6 +1421,7 @@ impl Expected for ExpectedInMap { //////////////////////////////////////////////////////////////////////////////// #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl<'de, K, V, E> IntoDeserializer<'de, E> for BTreeMap<K, V> where K: IntoDeserializer<'de, E> + Eq + Ord, @@ -1429,6 +1436,7 @@ where } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<'de, K, V, S, E> IntoDeserializer<'de, E> for HashMap<K, V, S> where K: IntoDeserializer<'de, E> + Eq + Hash, @@ -1454,7 +1462,7 @@ pub struct MapAccessDeserializer<A> { impl<A> MapAccessDeserializer<A> { /// Construct a new `MapAccessDeserializer<A>`. pub fn new(map: A) -> Self { - MapAccessDeserializer { map: map } + MapAccessDeserializer { map } } } @@ -1501,7 +1509,7 @@ where where T: de::DeserializeSeed<'de>, { - match try!(self.map.next_key_seed(seed)) { + match tri!(self.map.next_key_seed(seed)) { Some(key) => Ok((key, private::map_as_enum(self.map))), None => Err(de::Error::invalid_type(de::Unexpected::Map, &"enum")), } @@ -1519,7 +1527,7 @@ pub struct EnumAccessDeserializer<A> { impl<A> EnumAccessDeserializer<A> { /// Construct a new `EnumAccessDeserializer<A>`. pub fn new(access: A) -> Self { - EnumAccessDeserializer { access: access } + EnumAccessDeserializer { access } } } @@ -1546,9 +1554,11 @@ where //////////////////////////////////////////////////////////////////////////////// mod private { - use lib::*; + use crate::lib::*; - use de::{self, DeserializeSeed, Deserializer, MapAccess, Unexpected, VariantAccess, Visitor}; + use crate::de::{ + self, DeserializeSeed, Deserializer, MapAccess, Unexpected, VariantAccess, Visitor, + }; pub struct UnitOnly<E> { marker: PhantomData<E>, @@ -1613,7 +1623,7 @@ mod private { } pub fn map_as_enum<A>(map: A) -> MapAsEnum<A> { - MapAsEnum { map: map } + MapAsEnum { map } } impl<'de, A> VariantAccess<'de> for MapAsEnum<A> @@ -1637,10 +1647,7 @@ mod private { where V: Visitor<'de>, { - self.map.next_value_seed(SeedTupleVariant { - len: len, - visitor: visitor, - }) + self.map.next_value_seed(SeedTupleVariant { len, visitor }) } fn struct_variant<V>( @@ -1651,8 +1658,7 @@ mod private { where V: Visitor<'de>, { - self.map - .next_value_seed(SeedStructVariant { visitor: visitor }) + self.map.next_value_seed(SeedStructVariant { visitor }) } } diff --git a/vendor/serde/src/integer128.rs b/vendor/serde/src/integer128.rs index 904c2a2..2f94a64 100644 --- a/vendor/serde/src/integer128.rs +++ b/vendor/serde/src/integer128.rs @@ -1,82 +1,9 @@ -/// Conditional compilation depending on whether Serde is built with support for -/// 128-bit integers. -/// -/// Data formats that wish to support Rust compiler versions older than 1.26 -/// (or targets that lack 128-bit integers) may place the i128 / u128 methods -/// of their Serializer and Deserializer behind this macro. -/// -/// Data formats that require a minimum Rust compiler version of at least 1.26, -/// or do not target platforms that lack 128-bit integers, do not need to -/// bother with this macro and may assume support for 128-bit integers. -/// -/// ```edition2018 -/// # use serde::__private::doc::Error; -/// # -/// # struct MySerializer; -/// # -/// use serde::{serde_if_integer128, Serializer}; -/// -/// impl Serializer for MySerializer { -/// type Ok = (); -/// type Error = Error; -/// -/// fn serialize_i64(self, v: i64) -> Result<Self::Ok, Self::Error> { -/// /* ... */ -/// # unimplemented!() -/// } -/// -/// /* ... */ -/// -/// serde_if_integer128! { -/// fn serialize_i128(self, v: i128) -> Result<Self::Ok, Self::Error> { -/// /* ... */ -/// # unimplemented!() -/// } -/// -/// fn serialize_u128(self, v: u128) -> Result<Self::Ok, Self::Error> { -/// /* ... */ -/// # unimplemented!() -/// } -/// } -/// # -/// # serde::__serialize_unimplemented! { -/// # bool i8 i16 i32 u8 u16 u32 u64 f32 f64 char str bytes none some -/// # unit unit_struct unit_variant newtype_struct newtype_variant seq -/// # tuple tuple_struct tuple_variant map struct struct_variant -/// # } -/// } -/// ``` -/// -/// When Serde is built with support for 128-bit integers, this macro expands -/// transparently into just the input tokens. -/// -/// ```edition2018 -/// macro_rules! serde_if_integer128 { -/// ($($tt:tt)*) => { -/// $($tt)* -/// }; -/// } -/// ``` -/// -/// When built without support for 128-bit integers, this macro expands to -/// nothing. -/// -/// ```edition2018 -/// macro_rules! serde_if_integer128 { -/// ($($tt:tt)*) => {}; -/// } -/// ``` -#[cfg(not(no_integer128))] +// No longer used. Old versions of serde used this macro for supporting targets +// that did not yet have 128-bit integer support. #[macro_export] +#[doc(hidden)] macro_rules! serde_if_integer128 { ($($tt:tt)*) => { $($tt)* }; } - -#[cfg(no_integer128)] -#[macro_export] -#[doc(hidden)] -macro_rules! serde_if_integer128 { - ($($tt:tt)*) => {}; -} diff --git a/vendor/serde/src/lib.rs b/vendor/serde/src/lib.rs index 8db200c..5cf44c1 100644 --- a/vendor/serde/src/lib.rs +++ b/vendor/serde/src/lib.rs @@ -63,6 +63,7 @@ //! and from DynamoDB. //! - [Hjson], a syntax extension to JSON designed around human reading and //! editing. *(deserialization only)* +//! - [CSV], Comma-separated values is a tabular text file format. //! //! [JSON]: https://github.com/serde-rs/json //! [Postcard]: https://github.com/jamesmunns/postcard @@ -89,66 +90,67 @@ //! [DynamoDB Items]: https://docs.rs/serde_dynamo //! [rusoto_dynamodb]: https://docs.rs/rusoto_dynamodb //! [Hjson]: https://github.com/Canop/deser-hjson +//! [CSV]: https://docs.rs/csv //////////////////////////////////////////////////////////////////////////////// // Serde types in rustdoc of other crates get linked to here. -#![doc(html_root_url = "https://docs.rs/serde/1.0.160")] +#![doc(html_root_url = "https://docs.rs/serde/1.0.197")] // Support using Serde without the standard library! #![cfg_attr(not(feature = "std"), no_std)] +// Show which crate feature enables conditionally compiled APIs in documentation. +#![cfg_attr(doc_cfg, feature(doc_cfg))] // Unstable functionality only if the user asks for it. For tracking and // discussion of these features please refer to this issue: // // https://github.com/serde-rs/serde/issues/812 #![cfg_attr(feature = "unstable", feature(error_in_core, never_type))] #![allow(unknown_lints, bare_trait_objects, deprecated)] -#![cfg_attr(feature = "cargo-clippy", allow(renamed_and_removed_lints))] // Ignored clippy and clippy_pedantic lints -#![cfg_attr( - feature = "cargo-clippy", - allow( - // clippy bug: https://github.com/rust-lang/rust-clippy/issues/5704 - unnested_or_patterns, - // clippy bug: https://github.com/rust-lang/rust-clippy/issues/7768 - semicolon_if_nothing_returned, - // not available in our oldest supported compiler - empty_enum, - type_repetition_in_bounds, // https://github.com/rust-lang/rust-clippy/issues/8772 - // integer and float ser/de requires these sorts of casts - cast_possible_truncation, - cast_possible_wrap, - cast_sign_loss, - // things are often more readable this way - cast_lossless, - module_name_repetitions, - option_if_let_else, - single_match_else, - type_complexity, - use_self, - zero_prefixed_literal, - // correctly used - derive_partial_eq_without_eq, - enum_glob_use, - explicit_auto_deref, - let_underscore_untyped, - map_err_ignore, - new_without_default, - result_unit_err, - wildcard_imports, - // not practical - needless_pass_by_value, - similar_names, - too_many_lines, - // preference - doc_markdown, - unseparated_literal_suffix, - // false positive - needless_doctest_main, - // noisy - missing_errors_doc, - must_use_candidate, - ) +#![allow( + // clippy bug: https://github.com/rust-lang/rust-clippy/issues/5704 + clippy::unnested_or_patterns, + // clippy bug: https://github.com/rust-lang/rust-clippy/issues/7768 + clippy::semicolon_if_nothing_returned, + // not available in our oldest supported compiler + clippy::empty_enum, + clippy::type_repetition_in_bounds, // https://github.com/rust-lang/rust-clippy/issues/8772 + // integer and float ser/de requires these sorts of casts + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::cast_sign_loss, + // things are often more readable this way + clippy::cast_lossless, + clippy::module_name_repetitions, + clippy::single_match_else, + clippy::type_complexity, + clippy::use_self, + clippy::zero_prefixed_literal, + // correctly used + clippy::derive_partial_eq_without_eq, + clippy::enum_glob_use, + clippy::explicit_auto_deref, + clippy::incompatible_msrv, + clippy::let_underscore_untyped, + clippy::map_err_ignore, + clippy::new_without_default, + clippy::result_unit_err, + clippy::wildcard_imports, + // not practical + clippy::needless_pass_by_value, + clippy::similar_names, + clippy::too_many_lines, + // preference + clippy::doc_markdown, + clippy::unseparated_literal_suffix, + // false positive + clippy::needless_doctest_main, + // noisy + clippy::missing_errors_doc, + clippy::must_use_candidate, )] +// Restrictions +#![deny(clippy::question_mark_used)] // Rustc lints. #![deny(missing_docs, unused_imports)] @@ -168,21 +170,26 @@ mod lib { pub use std::*; } - pub use self::core::{cmp, iter, mem, num, ptr, slice, str}; pub use self::core::{f32, f64}; pub use self::core::{i16, i32, i64, i8, isize}; + pub use self::core::{iter, num, ptr, str}; pub use self::core::{u16, u32, u64, u8, usize}; + #[cfg(any(feature = "std", feature = "alloc"))] + pub use self::core::{cmp, mem, slice}; + pub use self::core::cell::{Cell, RefCell}; - pub use self::core::clone::{self, Clone}; - pub use self::core::convert::{self, From, Into}; - pub use self::core::default::{self, Default}; - pub use self::core::fmt::{self, Debug, Display}; + pub use self::core::clone; + pub use self::core::cmp::Reverse; + pub use self::core::convert; + pub use self::core::default; + pub use self::core::fmt::{self, Debug, Display, Write as FmtWrite}; pub use self::core::marker::{self, PhantomData}; pub use self::core::num::Wrapping; - pub use self::core::ops::Range; - pub use self::core::option::{self, Option}; - pub use self::core::result::{self, Result}; + pub use self::core::ops::{Bound, Range, RangeFrom, RangeInclusive, RangeTo}; + pub use self::core::option; + pub use self::core::result; + pub use self::core::time::Duration; #[cfg(all(feature = "alloc", not(feature = "std")))] pub use alloc::borrow::{Cow, ToOwned}; @@ -220,7 +227,7 @@ mod lib { pub use std::collections::{BTreeMap, BTreeSet, BinaryHeap, LinkedList, VecDeque}; #[cfg(all(not(no_core_cstr), not(feature = "std")))] - pub use core::ffi::CStr; + pub use self::core::ffi::CStr; #[cfg(feature = "std")] pub use std::ffi::CStr; @@ -247,18 +254,6 @@ mod lib { #[cfg(feature = "std")] pub use std::time::{SystemTime, UNIX_EPOCH}; - #[cfg(all(feature = "std", not(no_collections_bound), no_ops_bound))] - pub use std::collections::Bound; - - #[cfg(not(no_core_reverse))] - pub use self::core::cmp::Reverse; - - #[cfg(not(no_ops_bound))] - pub use self::core::ops::Bound; - - #[cfg(not(no_range_inclusive))] - pub use self::core::ops::RangeInclusive; - #[cfg(all(feature = "std", no_target_has_atomic, not(no_std_atomic)))] pub use std::sync::atomic::{ AtomicBool, AtomicI16, AtomicI32, AtomicI8, AtomicIsize, AtomicU16, AtomicU32, AtomicU8, @@ -279,16 +274,13 @@ mod lib { pub use std::sync::atomic::{AtomicI64, AtomicU64}; #[cfg(all(feature = "std", not(no_target_has_atomic), target_has_atomic = "ptr"))] pub use std::sync::atomic::{AtomicIsize, AtomicUsize}; - - #[cfg(any(feature = "std", not(no_core_duration)))] - pub use self::core::time::Duration; } // None of this crate's error handling needs the `From::from` error conversion // performed implicitly by the `?` operator or the standard library's `try!` // macro. This simplified macro gives a 5.5% improvement in compile time // compared to standard `try!`, and 9% improvement compared to `?`. -macro_rules! try { +macro_rules! tri { ($expr:expr) => { match $expr { Ok(val) => val, @@ -309,20 +301,15 @@ pub mod de; pub mod ser; #[doc(inline)] -pub use de::{Deserialize, Deserializer}; +pub use crate::de::{Deserialize, Deserializer}; #[doc(inline)] -pub use ser::{Serialize, Serializer}; +pub use crate::ser::{Serialize, Serializer}; // Used by generated code and doc tests. Not public API. #[doc(hidden)] #[path = "private/mod.rs"] pub mod __private; -#[allow(unused_imports)] -use self::__private as export; -#[allow(unused_imports)] -use self::__private as private; - #[path = "de/seed.rs"] mod seed; @@ -335,12 +322,11 @@ mod std_error; // be annoying for crates that provide handwritten impls or data formats. They // would need to disable default features and then explicitly re-enable std. #[cfg(feature = "serde_derive")] -#[allow(unused_imports)] -#[macro_use] extern crate serde_derive; /// Derive macro available if serde is built with `features = ["derive"]`. #[cfg(feature = "serde_derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] pub use serde_derive::{Deserialize, Serialize}; #[cfg(all(not(no_serde_derive), any(feature = "std", feature = "alloc")))] diff --git a/vendor/serde/src/macros.rs b/vendor/serde/src/macros.rs index 6502a23..a8fd85a 100644 --- a/vendor/serde/src/macros.rs +++ b/vendor/serde/src/macros.rs @@ -11,7 +11,7 @@ /// input. This requires repetitive implementations of all the [`Deserializer`] /// trait methods. /// -/// ```edition2018 +/// ```edition2021 /// # use serde::forward_to_deserialize_any; /// # use serde::de::{value, Deserializer, Visitor}; /// # @@ -47,7 +47,7 @@ /// methods so that they forward directly to [`Deserializer::deserialize_any`]. /// You can choose which methods to forward. /// -/// ```edition2018 +/// ```edition2021 /// # use serde::forward_to_deserialize_any; /// # use serde::de::{value, Deserializer, Visitor}; /// # @@ -78,11 +78,10 @@ /// called `V`. A different type parameter and a different lifetime can be /// specified explicitly if necessary. /// -/// ```edition2018 -/// # use std::marker::PhantomData; -/// # +/// ```edition2021 /// # use serde::forward_to_deserialize_any; /// # use serde::de::{value, Deserializer, Visitor}; +/// # use std::marker::PhantomData; /// # /// # struct MyDeserializer<V>(PhantomData<V>); /// # @@ -155,9 +154,7 @@ macro_rules! forward_to_deserialize_any_helper { forward_to_deserialize_any_method!{deserialize_i64<$l, $v>()} }; (i128<$l:tt, $v:ident>) => { - serde_if_integer128! { - forward_to_deserialize_any_method!{deserialize_i128<$l, $v>()} - } + forward_to_deserialize_any_method!{deserialize_i128<$l, $v>()} }; (u8<$l:tt, $v:ident>) => { forward_to_deserialize_any_method!{deserialize_u8<$l, $v>()} @@ -172,9 +169,7 @@ macro_rules! forward_to_deserialize_any_helper { forward_to_deserialize_any_method!{deserialize_u64<$l, $v>()} }; (u128<$l:tt, $v:ident>) => { - serde_if_integer128! { - forward_to_deserialize_any_method!{deserialize_u128<$l, $v>()} - } + forward_to_deserialize_any_method!{deserialize_u128<$l, $v>()} }; (f32<$l:tt, $v:ident>) => { forward_to_deserialize_any_method!{deserialize_f32<$l, $v>()} diff --git a/vendor/serde/src/private/de.rs b/vendor/serde/src/private/de.rs index e9c693d..883e690 100644 --- a/vendor/serde/src/private/de.rs +++ b/vendor/serde/src/private/de.rs @@ -1,10 +1,13 @@ -use lib::*; +use crate::lib::*; -use de::value::{BorrowedBytesDeserializer, BytesDeserializer}; -use de::{Deserialize, Deserializer, Error, IntoDeserializer, Visitor}; +use crate::de::value::{BorrowedBytesDeserializer, BytesDeserializer}; +use crate::de::{ + Deserialize, DeserializeSeed, Deserializer, EnumAccess, Error, IntoDeserializer, VariantAccess, + Visitor, +}; #[cfg(any(feature = "std", feature = "alloc"))] -use de::{DeserializeSeed, MapAccess, Unexpected}; +use crate::de::{MapAccess, Unexpected}; #[cfg(any(feature = "std", feature = "alloc"))] pub use self::content::{ @@ -13,7 +16,7 @@ pub use self::content::{ TagOrContentField, TagOrContentFieldVisitor, TaggedContentVisitor, UntaggedUnitVisitor, }; -pub use seed::InPlaceSeed; +pub use crate::seed::InPlaceSeed; /// If the missing field is of type `Option<T>` then treat is as `None`, /// otherwise it is an error. @@ -203,13 +206,13 @@ mod content { // This issue is tracking making some of this stuff public: // https://github.com/serde-rs/serde/issues/741 - use lib::*; + use crate::lib::*; - use __private::size_hint; - use actually_private; - use de::{ - self, Deserialize, DeserializeSeed, Deserializer, EnumAccess, Expected, IgnoredAny, - MapAccess, SeqAccess, Unexpected, Visitor, + use crate::actually_private; + use crate::de::value::{MapDeserializer, SeqDeserializer}; + use crate::de::{ + self, size_hint, Deserialize, DeserializeSeed, Deserializer, EnumAccess, Expected, + IgnoredAny, MapAccess, SeqAccess, Unexpected, Visitor, }; /// Used from generated code to buffer the contents of the Deserializer when @@ -299,6 +302,17 @@ mod content { } } + impl<'de, E> de::IntoDeserializer<'de, E> for Content<'de> + where + E: de::Error, + { + type Deserializer = ContentDeserializer<'de, E>; + + fn into_deserializer(self) -> Self::Deserializer { + ContentDeserializer::new(self) + } + } + struct ContentVisitor<'de> { value: PhantomData<Content<'de>>, } @@ -474,8 +488,9 @@ mod content { where V: SeqAccess<'de>, { - let mut vec = Vec::with_capacity(size_hint::cautious(visitor.size_hint())); - while let Some(e) = try!(visitor.next_element()) { + let mut vec = + Vec::<Content>::with_capacity(size_hint::cautious::<Content>(visitor.size_hint())); + while let Some(e) = tri!(visitor.next_element()) { vec.push(e); } Ok(Content::Seq(vec)) @@ -485,8 +500,11 @@ mod content { where V: MapAccess<'de>, { - let mut vec = Vec::with_capacity(size_hint::cautious(visitor.size_hint())); - while let Some(kv) = try!(visitor.next_entry()) { + let mut vec = + Vec::<(Content, Content)>::with_capacity( + size_hint::cautious::<(Content, Content)>(visitor.size_hint()), + ); + while let Some(kv) = tri!(visitor.next_entry()) { vec.push(kv); } Ok(Content::Map(vec)) @@ -518,7 +536,7 @@ mod content { impl<'de> TagOrContentVisitor<'de> { fn new(name: &'static str) -> Self { TagOrContentVisitor { - name: name, + name, value: PhantomData, } } @@ -797,51 +815,29 @@ mod content { /// Used by generated code to deserialize an internally tagged enum. /// /// Not public API. - pub struct TaggedContent<'de, T> { - pub tag: T, - pub content: Content<'de>, - } - - /// Not public API. - pub struct TaggedContentVisitor<'de, T> { + pub struct TaggedContentVisitor<T> { tag_name: &'static str, expecting: &'static str, - value: PhantomData<TaggedContent<'de, T>>, + value: PhantomData<T>, } - impl<'de, T> TaggedContentVisitor<'de, T> { + impl<T> TaggedContentVisitor<T> { /// Visitor for the content of an internally tagged enum with the given /// tag name. pub fn new(name: &'static str, expecting: &'static str) -> Self { TaggedContentVisitor { tag_name: name, - expecting: expecting, + expecting, value: PhantomData, } } } - impl<'de, T> DeserializeSeed<'de> for TaggedContentVisitor<'de, T> + impl<'de, T> Visitor<'de> for TaggedContentVisitor<T> where T: Deserialize<'de>, { - type Value = TaggedContent<'de, T>; - - fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error> - where - D: Deserializer<'de>, - { - // Internally tagged enums are only supported in self-describing - // formats. - deserializer.deserialize_any(self) - } - } - - impl<'de, T> Visitor<'de> for TaggedContentVisitor<'de, T> - where - T: Deserialize<'de>, - { - type Value = TaggedContent<'de, T>; + type Value = (T, Content<'de>); fn expecting(&self, fmt: &mut fmt::Formatter) -> fmt::Result { fmt.write_str(self.expecting) @@ -851,17 +847,14 @@ mod content { where S: SeqAccess<'de>, { - let tag = match try!(seq.next_element()) { + let tag = match tri!(seq.next_element()) { Some(tag) => tag, None => { return Err(de::Error::missing_field(self.tag_name)); } }; let rest = de::value::SeqAccessDeserializer::new(seq); - Ok(TaggedContent { - tag: tag, - content: try!(Content::deserialize(rest)), - }) + Ok((tag, tri!(Content::deserialize(rest)))) } fn visit_map<M>(self, mut map: M) -> Result<Self::Value, M::Error> @@ -869,27 +862,27 @@ mod content { M: MapAccess<'de>, { let mut tag = None; - let mut vec = Vec::with_capacity(size_hint::cautious(map.size_hint())); - while let Some(k) = try!(map.next_key_seed(TagOrContentVisitor::new(self.tag_name))) { + let mut vec = Vec::<(Content, Content)>::with_capacity(size_hint::cautious::<( + Content, + Content, + )>(map.size_hint())); + while let Some(k) = tri!(map.next_key_seed(TagOrContentVisitor::new(self.tag_name))) { match k { TagOrContent::Tag => { if tag.is_some() { return Err(de::Error::duplicate_field(self.tag_name)); } - tag = Some(try!(map.next_value())); + tag = Some(tri!(map.next_value())); } TagOrContent::Content(k) => { - let v = try!(map.next_value()); + let v = tri!(map.next_value()); vec.push((k, v)); } } } match tag { None => Err(de::Error::missing_field(self.tag_name)), - Some(tag) => Ok(TaggedContent { - tag: tag, - content: Content::Map(vec), - }), + Some(tag) => Ok((tag, Content::Map(vec))), } } } @@ -915,7 +908,7 @@ mod content { where D: Deserializer<'de>, { - deserializer.deserialize_str(self) + deserializer.deserialize_identifier(self) } } @@ -926,6 +919,20 @@ mod content { write!(formatter, "{:?} or {:?}", self.tag, self.content) } + fn visit_u64<E>(self, field_index: u64) -> Result<Self::Value, E> + where + E: de::Error, + { + match field_index { + 0 => Ok(TagOrContentField::Tag), + 1 => Ok(TagOrContentField::Content), + _ => Err(de::Error::invalid_value( + Unexpected::Unsigned(field_index), + &self, + )), + } + } + fn visit_str<E>(self, field: &str) -> Result<Self::Value, E> where E: de::Error, @@ -938,6 +945,19 @@ mod content { Err(de::Error::invalid_value(Unexpected::Str(field), &self)) } } + + fn visit_bytes<E>(self, field: &[u8]) -> Result<Self::Value, E> + where + E: de::Error, + { + if field == self.tag.as_bytes() { + Ok(TagOrContentField::Tag) + } else if field == self.content.as_bytes() { + Ok(TagOrContentField::Content) + } else { + Err(de::Error::invalid_value(Unexpected::Bytes(field), &self)) + } + } } /// Used by generated code to deserialize an adjacently tagged enum when @@ -963,7 +983,7 @@ mod content { where D: Deserializer<'de>, { - deserializer.deserialize_str(self) + deserializer.deserialize_identifier(self) } } @@ -978,13 +998,31 @@ mod content { ) } + fn visit_u64<E>(self, field_index: u64) -> Result<Self::Value, E> + where + E: de::Error, + { + match field_index { + 0 => Ok(TagContentOtherField::Tag), + 1 => Ok(TagContentOtherField::Content), + _ => Ok(TagContentOtherField::Other), + } + } + fn visit_str<E>(self, field: &str) -> Result<Self::Value, E> where E: de::Error, { - if field == self.tag { + self.visit_bytes(field.as_bytes()) + } + + fn visit_bytes<E>(self, field: &[u8]) -> Result<Self::Value, E> + where + E: de::Error, + { + if field == self.tag.as_bytes() { Ok(TagContentOtherField::Tag) - } else if field == self.content { + } else if field == self.content.as_bytes() { Ok(TagContentOtherField::Content) } else { Ok(TagContentOtherField::Other) @@ -1050,9 +1088,9 @@ mod content { E: de::Error, { let seq = content.into_iter().map(ContentDeserializer::new); - let mut seq_visitor = de::value::SeqDeserializer::new(seq); - let value = try!(visitor.visit_seq(&mut seq_visitor)); - try!(seq_visitor.end()); + let mut seq_visitor = SeqDeserializer::new(seq); + let value = tri!(visitor.visit_seq(&mut seq_visitor)); + tri!(seq_visitor.end()); Ok(value) } @@ -1067,9 +1105,9 @@ mod content { let map = content .into_iter() .map(|(k, v)| (ContentDeserializer::new(k), ContentDeserializer::new(v))); - let mut map_visitor = de::value::MapDeserializer::new(map); - let value = try!(visitor.visit_map(&mut map_visitor)); - try!(map_visitor.end()); + let mut map_visitor = MapDeserializer::new(map); + let value = tri!(visitor.visit_map(&mut map_visitor)); + tri!(map_visitor.end()); Ok(value) } @@ -1457,7 +1495,7 @@ mod content { /// private API, don't use pub fn new(content: Content<'de>) -> Self { ContentDeserializer { - content: content, + content, err: PhantomData, } } @@ -1478,8 +1516,8 @@ mod content { { pub fn new(variant: Content<'de>, value: Option<Content<'de>>) -> EnumDeserializer<'de, E> { EnumDeserializer { - variant: variant, - value: value, + variant, + value, err: PhantomData, } } @@ -1545,7 +1583,7 @@ mod content { { match self.value { Some(Content::Seq(v)) => { - de::Deserializer::deserialize_any(SeqDeserializer::new(v), visitor) + de::Deserializer::deserialize_any(SeqDeserializer::new(v.into_iter()), visitor) } Some(other) => Err(de::Error::invalid_type( other.unexpected(), @@ -1568,10 +1606,10 @@ mod content { { match self.value { Some(Content::Map(v)) => { - de::Deserializer::deserialize_any(MapDeserializer::new(v), visitor) + de::Deserializer::deserialize_any(MapDeserializer::new(v.into_iter()), visitor) } Some(Content::Seq(v)) => { - de::Deserializer::deserialize_any(SeqDeserializer::new(v), visitor) + de::Deserializer::deserialize_any(SeqDeserializer::new(v.into_iter()), visitor) } Some(other) => Err(de::Error::invalid_type( other.unexpected(), @@ -1585,156 +1623,6 @@ mod content { } } - struct SeqDeserializer<'de, E> - where - E: de::Error, - { - iter: <Vec<Content<'de>> as IntoIterator>::IntoIter, - err: PhantomData<E>, - } - - impl<'de, E> SeqDeserializer<'de, E> - where - E: de::Error, - { - fn new(vec: Vec<Content<'de>>) -> Self { - SeqDeserializer { - iter: vec.into_iter(), - err: PhantomData, - } - } - } - - impl<'de, E> de::Deserializer<'de> for SeqDeserializer<'de, E> - where - E: de::Error, - { - type Error = E; - - #[inline] - fn deserialize_any<V>(mut self, visitor: V) -> Result<V::Value, Self::Error> - where - V: de::Visitor<'de>, - { - let len = self.iter.len(); - if len == 0 { - visitor.visit_unit() - } else { - let ret = try!(visitor.visit_seq(&mut self)); - let remaining = self.iter.len(); - if remaining == 0 { - Ok(ret) - } else { - Err(de::Error::invalid_length(len, &"fewer elements in array")) - } - } - } - - forward_to_deserialize_any! { - bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string - bytes byte_buf option unit unit_struct newtype_struct seq tuple - tuple_struct map struct enum identifier ignored_any - } - } - - impl<'de, E> de::SeqAccess<'de> for SeqDeserializer<'de, E> - where - E: de::Error, - { - type Error = E; - - fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error> - where - T: de::DeserializeSeed<'de>, - { - match self.iter.next() { - Some(value) => seed.deserialize(ContentDeserializer::new(value)).map(Some), - None => Ok(None), - } - } - - fn size_hint(&self) -> Option<usize> { - size_hint::from_bounds(&self.iter) - } - } - - struct MapDeserializer<'de, E> - where - E: de::Error, - { - iter: <Vec<(Content<'de>, Content<'de>)> as IntoIterator>::IntoIter, - value: Option<Content<'de>>, - err: PhantomData<E>, - } - - impl<'de, E> MapDeserializer<'de, E> - where - E: de::Error, - { - fn new(map: Vec<(Content<'de>, Content<'de>)>) -> Self { - MapDeserializer { - iter: map.into_iter(), - value: None, - err: PhantomData, - } - } - } - - impl<'de, E> de::MapAccess<'de> for MapDeserializer<'de, E> - where - E: de::Error, - { - type Error = E; - - fn next_key_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error> - where - T: de::DeserializeSeed<'de>, - { - match self.iter.next() { - Some((key, value)) => { - self.value = Some(value); - seed.deserialize(ContentDeserializer::new(key)).map(Some) - } - None => Ok(None), - } - } - - fn next_value_seed<T>(&mut self, seed: T) -> Result<T::Value, Self::Error> - where - T: de::DeserializeSeed<'de>, - { - match self.value.take() { - Some(value) => seed.deserialize(ContentDeserializer::new(value)), - None => Err(de::Error::custom("value is missing")), - } - } - - fn size_hint(&self) -> Option<usize> { - size_hint::from_bounds(&self.iter) - } - } - - impl<'de, E> de::Deserializer<'de> for MapDeserializer<'de, E> - where - E: de::Error, - { - type Error = E; - - #[inline] - fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error> - where - V: de::Visitor<'de>, - { - visitor.visit_map(self) - } - - forward_to_deserialize_any! { - bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string - bytes byte_buf option unit unit_struct newtype_struct seq tuple - tuple_struct map struct enum identifier ignored_any - } - } - /// Not public API. pub struct ContentRefDeserializer<'a, 'de: 'a, E> { content: &'a Content<'de>, @@ -1796,9 +1684,9 @@ mod content { E: de::Error, { let seq = content.iter().map(ContentRefDeserializer::new); - let mut seq_visitor = de::value::SeqDeserializer::new(seq); - let value = try!(visitor.visit_seq(&mut seq_visitor)); - try!(seq_visitor.end()); + let mut seq_visitor = SeqDeserializer::new(seq); + let value = tri!(visitor.visit_seq(&mut seq_visitor)); + tri!(seq_visitor.end()); Ok(value) } @@ -1816,9 +1704,9 @@ mod content { ContentRefDeserializer::new(v), ) }); - let mut map_visitor = de::value::MapDeserializer::new(map); - let value = try!(visitor.visit_map(&mut map_visitor)); - try!(map_visitor.end()); + let mut map_visitor = MapDeserializer::new(map); + let value = tri!(visitor.visit_map(&mut map_visitor)); + tri!(map_visitor.end()); Ok(value) } @@ -2135,8 +2023,8 @@ mod content { }; visitor.visit_enum(EnumRefDeserializer { - variant: variant, - value: value, + variant, + value, err: PhantomData, }) } @@ -2180,12 +2068,20 @@ mod content { /// private API, don't use pub fn new(content: &'a Content<'de>) -> Self { ContentRefDeserializer { - content: content, + content, err: PhantomData, } } } + impl<'a, 'de: 'a, E> Copy for ContentRefDeserializer<'a, 'de, E> {} + + impl<'a, 'de: 'a, E> Clone for ContentRefDeserializer<'a, 'de, E> { + fn clone(&self) -> Self { + *self + } + } + struct EnumRefDeserializer<'a, 'de: 'a, E> where E: de::Error, @@ -2330,7 +2226,7 @@ mod content { if len == 0 { visitor.visit_unit() } else { - let ret = try!(visitor.visit_seq(&mut self)); + let ret = tri!(visitor.visit_seq(&mut self)); let remaining = self.iter.len(); if remaining == 0 { Ok(ret) @@ -2481,8 +2377,8 @@ mod content { /// Not public API. pub fn new(type_name: &'a str, variant_name: &'a str) -> Self { InternallyTaggedUnitVisitor { - type_name: type_name, - variant_name: variant_name, + type_name, + variant_name, } } } @@ -2509,7 +2405,7 @@ mod content { where M: MapAccess<'de>, { - while try!(access.next_entry::<IgnoredAny, IgnoredAny>()).is_some() {} + while tri!(access.next_entry::<IgnoredAny, IgnoredAny>()).is_some() {} Ok(()) } } @@ -2526,8 +2422,8 @@ mod content { /// Not public API. pub fn new(type_name: &'a str, variant_name: &'a str) -> Self { UntaggedUnitVisitor { - type_name: type_name, - variant_name: variant_name, + type_name, + variant_name, } } } @@ -2731,11 +2627,7 @@ where where V: Visitor<'de>, { - visitor.visit_map(FlatInternallyTaggedAccess { - iter: self.0.iter_mut(), - pending: None, - _marker: PhantomData, - }) + self.deserialize_map(visitor) } fn deserialize_enum<V>( @@ -2747,17 +2639,8 @@ where where V: Visitor<'de>, { - for item in self.0.iter_mut() { - // items in the vector are nulled out when used. So we can only use - // an item if it's still filled in and if the field is one we care - // about. - let use_item = match *item { - None => false, - Some((ref c, _)) => c.as_str().map_or(false, |x| variants.contains(&x)), - }; - - if use_item { - let (key, value) = item.take().unwrap(); + for entry in self.0 { + if let Some((key, value)) = flat_map_take_entry(entry, variants) { return visitor.visit_enum(EnumDeserializer::new(key, Some(value))); } } @@ -2772,7 +2655,11 @@ where where V: Visitor<'de>, { - visitor.visit_map(FlatMapAccess::new(self.0.iter())) + visitor.visit_map(FlatMapAccess { + iter: self.0.iter(), + pending_content: None, + _marker: PhantomData, + }) } fn deserialize_struct<V>( @@ -2784,7 +2671,12 @@ where where V: Visitor<'de>, { - visitor.visit_map(FlatStructAccess::new(self.0.iter_mut(), fields)) + visitor.visit_map(FlatStructAccess { + iter: self.0.iter_mut(), + pending_content: None, + fields, + _marker: PhantomData, + }) } fn deserialize_newtype_struct<V>(self, _name: &str, visitor: V) -> Result<V::Value, Self::Error> @@ -2811,6 +2703,13 @@ where visitor.visit_unit() } + fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, Self::Error> + where + V: Visitor<'de>, + { + visitor.visit_unit() + } + forward_to_deserialize_other! { deserialize_bool() deserialize_i8() @@ -2833,30 +2732,16 @@ where deserialize_tuple(usize) deserialize_tuple_struct(&'static str, usize) deserialize_identifier() - deserialize_ignored_any() } } #[cfg(any(feature = "std", feature = "alloc"))] -pub struct FlatMapAccess<'a, 'de: 'a, E> { +struct FlatMapAccess<'a, 'de: 'a, E> { iter: slice::Iter<'a, Option<(Content<'de>, Content<'de>)>>, pending_content: Option<&'a Content<'de>>, _marker: PhantomData<E>, } -#[cfg(any(feature = "std", feature = "alloc"))] -impl<'a, 'de, E> FlatMapAccess<'a, 'de, E> { - fn new( - iter: slice::Iter<'a, Option<(Content<'de>, Content<'de>)>>, - ) -> FlatMapAccess<'a, 'de, E> { - FlatMapAccess { - iter: iter, - pending_content: None, - _marker: PhantomData, - } - } -} - #[cfg(any(feature = "std", feature = "alloc"))] impl<'a, 'de, E> MapAccess<'de> for FlatMapAccess<'a, 'de, E> where @@ -2871,6 +2756,10 @@ where for item in &mut self.iter { // Items in the vector are nulled out when used by a struct. if let Some((ref key, ref content)) = *item { + // Do not take(), instead borrow this entry. The internally tagged + // enum does its own buffering so we can't tell whether this entry + // is going to be consumed. Borrowing here leaves the entry + // available for later flattened fields. self.pending_content = Some(content); return seed.deserialize(ContentRefDeserializer::new(key)).map(Some); } @@ -2890,28 +2779,13 @@ where } #[cfg(any(feature = "std", feature = "alloc"))] -pub struct FlatStructAccess<'a, 'de: 'a, E> { +struct FlatStructAccess<'a, 'de: 'a, E> { iter: slice::IterMut<'a, Option<(Content<'de>, Content<'de>)>>, pending_content: Option<Content<'de>>, fields: &'static [&'static str], _marker: PhantomData<E>, } -#[cfg(any(feature = "std", feature = "alloc"))] -impl<'a, 'de, E> FlatStructAccess<'a, 'de, E> { - fn new( - iter: slice::IterMut<'a, Option<(Content<'de>, Content<'de>)>>, - fields: &'static [&'static str], - ) -> FlatStructAccess<'a, 'de, E> { - FlatStructAccess { - iter: iter, - pending_content: None, - fields: fields, - _marker: PhantomData, - } - } -} - #[cfg(any(feature = "std", feature = "alloc"))] impl<'a, 'de, E> MapAccess<'de> for FlatStructAccess<'a, 'de, E> where @@ -2923,17 +2797,8 @@ where where T: DeserializeSeed<'de>, { - while let Some(item) = self.iter.next() { - // items in the vector are nulled out when used. So we can only use - // an item if it's still filled in and if the field is one we care - // about. In case we do not know which fields we want, we take them all. - let use_item = match *item { - None => false, - Some((ref c, _)) => c.as_str().map_or(false, |key| self.fields.contains(&key)), - }; - - if use_item { - let (key, content) = item.take().unwrap(); + for entry in self.iter.by_ref() { + if let Some((key, content)) = flat_map_take_entry(entry, self.fields) { self.pending_content = Some(content); return seed.deserialize(ContentDeserializer::new(key)).map(Some); } @@ -2952,44 +2817,76 @@ where } } +/// Claims one key-value pair from a FlatMapDeserializer's field buffer if the +/// field name matches any of the recognized ones. #[cfg(any(feature = "std", feature = "alloc"))] -pub struct FlatInternallyTaggedAccess<'a, 'de: 'a, E> { - iter: slice::IterMut<'a, Option<(Content<'de>, Content<'de>)>>, - pending: Option<&'a Content<'de>>, - _marker: PhantomData<E>, +fn flat_map_take_entry<'de>( + entry: &mut Option<(Content<'de>, Content<'de>)>, + recognized: &[&str], +) -> Option<(Content<'de>, Content<'de>)> { + // Entries in the FlatMapDeserializer buffer are nulled out as they get + // claimed for deserialization. We only use an entry if it is still present + // and if the field is one recognized by the current data structure. + let is_recognized = match entry { + None => false, + Some((k, _v)) => k.as_str().map_or(false, |name| recognized.contains(&name)), + }; + + if is_recognized { + entry.take() + } else { + None + } } -#[cfg(any(feature = "std", feature = "alloc"))] -impl<'a, 'de, E> MapAccess<'de> for FlatInternallyTaggedAccess<'a, 'de, E> +pub struct AdjacentlyTaggedEnumVariantSeed<F> { + pub enum_name: &'static str, + pub variants: &'static [&'static str], + pub fields_enum: PhantomData<F>, +} + +pub struct AdjacentlyTaggedEnumVariantVisitor<F> { + enum_name: &'static str, + fields_enum: PhantomData<F>, +} + +impl<'de, F> Visitor<'de> for AdjacentlyTaggedEnumVariantVisitor<F> where - E: Error, + F: Deserialize<'de>, { - type Error = E; + type Value = F; - fn next_key_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error> + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "variant of enum {}", self.enum_name) + } + + fn visit_enum<A>(self, data: A) -> Result<Self::Value, A::Error> where - T: DeserializeSeed<'de>, + A: EnumAccess<'de>, { - for item in &mut self.iter { - if let Some((ref key, ref content)) = *item { - // Do not take(), instead borrow this entry. The internally tagged - // enum does its own buffering so we can't tell whether this entry - // is going to be consumed. Borrowing here leaves the entry - // available for later flattened fields. - self.pending = Some(content); - return seed.deserialize(ContentRefDeserializer::new(key)).map(Some); - } - } - Ok(None) + let (variant, variant_access) = tri!(data.variant()); + tri!(variant_access.unit_variant()); + Ok(variant) } +} - fn next_value_seed<T>(&mut self, seed: T) -> Result<T::Value, Self::Error> +impl<'de, F> DeserializeSeed<'de> for AdjacentlyTaggedEnumVariantSeed<F> +where + F: Deserialize<'de>, +{ + type Value = F; + + fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error> where - T: DeserializeSeed<'de>, + D: Deserializer<'de>, { - match self.pending.take() { - Some(value) => seed.deserialize(ContentRefDeserializer::new(value)), - None => panic!("value is missing"), - } + deserializer.deserialize_enum( + self.enum_name, + self.variants, + AdjacentlyTaggedEnumVariantVisitor { + enum_name: self.enum_name, + fields_enum: PhantomData, + }, + ) } } diff --git a/vendor/serde/src/private/doc.rs b/vendor/serde/src/private/doc.rs index f597af8..1b18fe6 100644 --- a/vendor/serde/src/private/doc.rs +++ b/vendor/serde/src/private/doc.rs @@ -1,8 +1,8 @@ // Used only by Serde doc tests. Not public API. -use lib::*; +use crate::lib::*; -use ser; +use crate::ser; #[doc(hidden)] #[derive(Debug)] diff --git a/vendor/serde/src/private/mod.rs b/vendor/serde/src/private/mod.rs index e896902..177f850 100644 --- a/vendor/serde/src/private/mod.rs +++ b/vendor/serde/src/private/mod.rs @@ -3,30 +3,28 @@ pub mod de; #[cfg(not(no_serde_derive))] pub mod ser; -pub mod size_hint; - // FIXME: #[cfg(doctest)] once https://github.com/rust-lang/rust/issues/67295 is fixed. pub mod doc; -pub use lib::clone::Clone; -pub use lib::convert::{From, Into}; -pub use lib::default::Default; -pub use lib::fmt::{self, Formatter}; -pub use lib::marker::PhantomData; -pub use lib::option::Option::{self, None, Some}; -pub use lib::ptr; -pub use lib::result::Result::{self, Err, Ok}; +pub use crate::lib::clone::Clone; +pub use crate::lib::convert::{From, Into}; +pub use crate::lib::default::Default; +pub use crate::lib::fmt::{self, Formatter}; +pub use crate::lib::marker::PhantomData; +pub use crate::lib::option::Option::{self, None, Some}; +pub use crate::lib::ptr; +pub use crate::lib::result::Result::{self, Err, Ok}; pub use self::string::from_utf8_lossy; #[cfg(any(feature = "alloc", feature = "std"))] -pub use lib::{ToString, Vec}; +pub use crate::lib::{ToString, Vec}; #[cfg(not(no_core_try_from))] -pub use lib::convert::TryFrom; +pub use crate::lib::convert::TryFrom; mod string { - use lib::*; + use crate::lib::*; #[cfg(any(feature = "std", feature = "alloc"))] pub fn from_utf8_lossy(bytes: &[u8]) -> Cow<str> { diff --git a/vendor/serde/src/private/ser.rs b/vendor/serde/src/private/ser.rs index 528e8c1..50bcb25 100644 --- a/vendor/serde/src/private/ser.rs +++ b/vendor/serde/src/private/ser.rs @@ -1,6 +1,6 @@ -use lib::*; +use crate::lib::*; -use ser::{self, Impossible, Serialize, SerializeMap, SerializeStruct, Serializer}; +use crate::ser::{self, Impossible, Serialize, SerializeMap, SerializeStruct, Serializer}; #[cfg(any(feature = "std", feature = "alloc"))] use self::content::{ @@ -27,10 +27,10 @@ where T: Serialize, { value.serialize(TaggedSerializer { - type_ident: type_ident, - variant_ident: variant_ident, - tag: tag, - variant_name: variant_name, + type_ident, + variant_ident, + tag, + variant_name, delegate: serializer, }) } @@ -182,14 +182,14 @@ where } fn serialize_unit(self) -> Result<Self::Ok, Self::Error> { - let mut map = try!(self.delegate.serialize_map(Some(1))); - try!(map.serialize_entry(self.tag, self.variant_name)); + let mut map = tri!(self.delegate.serialize_map(Some(1))); + tri!(map.serialize_entry(self.tag, self.variant_name)); map.end() } fn serialize_unit_struct(self, _: &'static str) -> Result<Self::Ok, Self::Error> { - let mut map = try!(self.delegate.serialize_map(Some(1))); - try!(map.serialize_entry(self.tag, self.variant_name)); + let mut map = tri!(self.delegate.serialize_map(Some(1))); + tri!(map.serialize_entry(self.tag, self.variant_name)); map.end() } @@ -199,9 +199,9 @@ where _: u32, inner_variant: &'static str, ) -> Result<Self::Ok, Self::Error> { - let mut map = try!(self.delegate.serialize_map(Some(2))); - try!(map.serialize_entry(self.tag, self.variant_name)); - try!(map.serialize_entry(inner_variant, &())); + let mut map = tri!(self.delegate.serialize_map(Some(2))); + tri!(map.serialize_entry(self.tag, self.variant_name)); + tri!(map.serialize_entry(inner_variant, &())); map.end() } @@ -226,9 +226,9 @@ where where T: Serialize, { - let mut map = try!(self.delegate.serialize_map(Some(2))); - try!(map.serialize_entry(self.tag, self.variant_name)); - try!(map.serialize_entry(inner_variant, inner_value)); + let mut map = tri!(self.delegate.serialize_map(Some(2))); + tri!(map.serialize_entry(self.tag, self.variant_name)); + tri!(map.serialize_entry(inner_variant, inner_value)); map.end() } @@ -269,9 +269,9 @@ where inner_variant: &'static str, len: usize, ) -> Result<Self::SerializeTupleVariant, Self::Error> { - let mut map = try!(self.delegate.serialize_map(Some(2))); - try!(map.serialize_entry(self.tag, self.variant_name)); - try!(map.serialize_key(inner_variant)); + let mut map = tri!(self.delegate.serialize_map(Some(2))); + tri!(map.serialize_entry(self.tag, self.variant_name)); + tri!(map.serialize_key(inner_variant)); Ok(SerializeTupleVariantAsMapValue::new( map, inner_variant, @@ -280,8 +280,8 @@ where } fn serialize_map(self, len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> { - let mut map = try!(self.delegate.serialize_map(len.map(|len| len + 1))); - try!(map.serialize_entry(self.tag, self.variant_name)); + let mut map = tri!(self.delegate.serialize_map(len.map(|len| len + 1))); + tri!(map.serialize_entry(self.tag, self.variant_name)); Ok(map) } @@ -290,8 +290,8 @@ where name: &'static str, len: usize, ) -> Result<Self::SerializeStruct, Self::Error> { - let mut state = try!(self.delegate.serialize_struct(name, len + 1)); - try!(state.serialize_field(self.tag, self.variant_name)); + let mut state = tri!(self.delegate.serialize_struct(name, len + 1)); + tri!(state.serialize_field(self.tag, self.variant_name)); Ok(state) } @@ -316,9 +316,9 @@ where inner_variant: &'static str, len: usize, ) -> Result<Self::SerializeStructVariant, Self::Error> { - let mut map = try!(self.delegate.serialize_map(Some(2))); - try!(map.serialize_entry(self.tag, self.variant_name)); - try!(map.serialize_key(inner_variant)); + let mut map = tri!(self.delegate.serialize_map(Some(2))); + tri!(map.serialize_entry(self.tag, self.variant_name)); + tri!(map.serialize_key(inner_variant)); Ok(SerializeStructVariantAsMapValue::new( map, inner_variant, @@ -337,9 +337,9 @@ where #[cfg(any(feature = "std", feature = "alloc"))] mod content { - use lib::*; + use crate::lib::*; - use ser::{self, Serialize, Serializer}; + use crate::ser::{self, Serialize, Serializer}; pub struct SerializeTupleVariantAsMapValue<M> { map: M, @@ -350,8 +350,8 @@ mod content { impl<M> SerializeTupleVariantAsMapValue<M> { pub fn new(map: M, name: &'static str, len: usize) -> Self { SerializeTupleVariantAsMapValue { - map: map, - name: name, + map, + name, fields: Vec::with_capacity(len), } } @@ -368,13 +368,13 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<M::Error>::new())); + let value = tri!(value.serialize(ContentSerializer::<M::Error>::new())); self.fields.push(value); Ok(()) } fn end(mut self) -> Result<M::Ok, M::Error> { - try!(self + tri!(self .map .serialize_value(&Content::TupleStruct(self.name, self.fields))); self.map.end() @@ -390,8 +390,8 @@ mod content { impl<M> SerializeStructVariantAsMapValue<M> { pub fn new(map: M, name: &'static str, len: usize) -> Self { SerializeStructVariantAsMapValue { - map: map, - name: name, + map, + name, fields: Vec::with_capacity(len), } } @@ -412,13 +412,13 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<M::Error>::new())); + let value = tri!(value.serialize(ContentSerializer::<M::Error>::new())); self.fields.push((key, value)); Ok(()) } fn end(mut self) -> Result<M::Ok, M::Error> { - try!(self + tri!(self .map .serialize_value(&Content::Struct(self.name, self.fields))); self.map.end() @@ -499,50 +499,50 @@ mod content { } Content::Seq(ref elements) => elements.serialize(serializer), Content::Tuple(ref elements) => { - use ser::SerializeTuple; - let mut tuple = try!(serializer.serialize_tuple(elements.len())); + use crate::ser::SerializeTuple; + let mut tuple = tri!(serializer.serialize_tuple(elements.len())); for e in elements { - try!(tuple.serialize_element(e)); + tri!(tuple.serialize_element(e)); } tuple.end() } Content::TupleStruct(n, ref fields) => { - use ser::SerializeTupleStruct; - let mut ts = try!(serializer.serialize_tuple_struct(n, fields.len())); + use crate::ser::SerializeTupleStruct; + let mut ts = tri!(serializer.serialize_tuple_struct(n, fields.len())); for f in fields { - try!(ts.serialize_field(f)); + tri!(ts.serialize_field(f)); } ts.end() } Content::TupleVariant(n, i, v, ref fields) => { - use ser::SerializeTupleVariant; - let mut tv = try!(serializer.serialize_tuple_variant(n, i, v, fields.len())); + use crate::ser::SerializeTupleVariant; + let mut tv = tri!(serializer.serialize_tuple_variant(n, i, v, fields.len())); for f in fields { - try!(tv.serialize_field(f)); + tri!(tv.serialize_field(f)); } tv.end() } Content::Map(ref entries) => { - use ser::SerializeMap; - let mut map = try!(serializer.serialize_map(Some(entries.len()))); + use crate::ser::SerializeMap; + let mut map = tri!(serializer.serialize_map(Some(entries.len()))); for (k, v) in entries { - try!(map.serialize_entry(k, v)); + tri!(map.serialize_entry(k, v)); } map.end() } Content::Struct(n, ref fields) => { - use ser::SerializeStruct; - let mut s = try!(serializer.serialize_struct(n, fields.len())); + use crate::ser::SerializeStruct; + let mut s = tri!(serializer.serialize_struct(n, fields.len())); for &(k, ref v) in fields { - try!(s.serialize_field(k, v)); + tri!(s.serialize_field(k, v)); } s.end() } Content::StructVariant(n, i, v, ref fields) => { - use ser::SerializeStructVariant; - let mut sv = try!(serializer.serialize_struct_variant(n, i, v, fields.len())); + use crate::ser::SerializeStructVariant; + let mut sv = tri!(serializer.serialize_struct_variant(n, i, v, fields.len())); for &(k, ref v) in fields { - try!(sv.serialize_field(k, v)); + tri!(sv.serialize_field(k, v)); } sv.end() } @@ -639,7 +639,7 @@ mod content { where T: Serialize, { - Ok(Content::Some(Box::new(try!(value.serialize(self))))) + Ok(Content::Some(Box::new(tri!(value.serialize(self))))) } fn serialize_unit(self) -> Result<Content, E> { @@ -669,7 +669,7 @@ mod content { { Ok(Content::NewtypeStruct( name, - Box::new(try!(value.serialize(self))), + Box::new(tri!(value.serialize(self))), )) } @@ -687,7 +687,7 @@ mod content { name, variant_index, variant, - Box::new(try!(value.serialize(self))), + Box::new(tri!(value.serialize(self))), )) } @@ -711,7 +711,7 @@ mod content { len: usize, ) -> Result<Self::SerializeTupleStruct, E> { Ok(SerializeTupleStruct { - name: name, + name, fields: Vec::with_capacity(len), error: PhantomData, }) @@ -725,9 +725,9 @@ mod content { len: usize, ) -> Result<Self::SerializeTupleVariant, E> { Ok(SerializeTupleVariant { - name: name, - variant_index: variant_index, - variant: variant, + name, + variant_index, + variant, fields: Vec::with_capacity(len), error: PhantomData, }) @@ -747,7 +747,7 @@ mod content { len: usize, ) -> Result<Self::SerializeStruct, E> { Ok(SerializeStruct { - name: name, + name, fields: Vec::with_capacity(len), error: PhantomData, }) @@ -761,9 +761,9 @@ mod content { len: usize, ) -> Result<Self::SerializeStructVariant, E> { Ok(SerializeStructVariant { - name: name, - variant_index: variant_index, - variant: variant, + name, + variant_index, + variant, fields: Vec::with_capacity(len), error: PhantomData, }) @@ -786,7 +786,7 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.elements.push(value); Ok(()) } @@ -812,7 +812,7 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.elements.push(value); Ok(()) } @@ -839,7 +839,7 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.fields.push(value); Ok(()) } @@ -868,7 +868,7 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.fields.push(value); Ok(()) } @@ -900,7 +900,7 @@ mod content { where T: Serialize, { - let key = try!(key.serialize(ContentSerializer::<E>::new())); + let key = tri!(key.serialize(ContentSerializer::<E>::new())); self.key = Some(key); Ok(()) } @@ -913,7 +913,7 @@ mod content { .key .take() .expect("serialize_value called before serialize_key"); - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.entries.push((key, value)); Ok(()) } @@ -927,8 +927,8 @@ mod content { K: Serialize, V: Serialize, { - let key = try!(key.serialize(ContentSerializer::<E>::new())); - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let key = tri!(key.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.entries.push((key, value)); Ok(()) } @@ -951,7 +951,7 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.fields.push((key, value)); Ok(()) } @@ -980,7 +980,7 @@ mod content { where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<E>::new())); + let value = tri!(value.serialize(ContentSerializer::<E>::new())); self.fields.push((key, value)); Ok(()) } @@ -1025,7 +1025,7 @@ where type SerializeTupleStruct = Impossible<Self::Ok, M::Error>; type SerializeMap = FlatMapSerializeMap<'a, M>; type SerializeStruct = FlatMapSerializeStruct<'a, M>; - type SerializeTupleVariant = Impossible<Self::Ok, M::Error>; + type SerializeTupleVariant = FlatMapSerializeTupleVariantAsMapValue<'a, M>; type SerializeStructVariant = FlatMapSerializeStructVariantAsMapValue<'a, M>; fn serialize_bool(self, _: bool) -> Result<Self::Ok, Self::Error> { @@ -1133,7 +1133,7 @@ where where T: Serialize, { - try!(self.0.serialize_key(variant)); + tri!(self.0.serialize_key(variant)); self.0.serialize_value(value) } @@ -1157,10 +1157,11 @@ where self, _: &'static str, _: u32, - _: &'static str, + variant: &'static str, _: usize, ) -> Result<Self::SerializeTupleVariant, Self::Error> { - Err(Self::bad_type(Unsupported::Enum)) + tri!(self.0.serialize_key(variant)); + Ok(FlatMapSerializeTupleVariantAsMapValue::new(self.0)) } fn serialize_map(self, _: Option<usize>) -> Result<Self::SerializeMap, Self::Error> { @@ -1182,7 +1183,7 @@ where inner_variant: &'static str, _: usize, ) -> Result<Self::SerializeStructVariant, Self::Error> { - try!(self.0.serialize_key(inner_variant)); + tri!(self.0.serialize_key(inner_variant)); Ok(FlatMapSerializeStructVariantAsMapValue::new( self.0, inner_variant, @@ -1259,6 +1260,52 @@ where } } +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#[cfg(any(feature = "std", feature = "alloc"))] +pub struct FlatMapSerializeTupleVariantAsMapValue<'a, M: 'a> { + map: &'a mut M, + fields: Vec<Content>, +} + +#[cfg(any(feature = "std", feature = "alloc"))] +impl<'a, M> FlatMapSerializeTupleVariantAsMapValue<'a, M> +where + M: SerializeMap + 'a, +{ + fn new(map: &'a mut M) -> Self { + FlatMapSerializeTupleVariantAsMapValue { + map, + fields: Vec::new(), + } + } +} + +#[cfg(any(feature = "std", feature = "alloc"))] +impl<'a, M> ser::SerializeTupleVariant for FlatMapSerializeTupleVariantAsMapValue<'a, M> +where + M: SerializeMap + 'a, +{ + type Ok = (); + type Error = M::Error; + + fn serialize_field<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + let value = tri!(value.serialize(ContentSerializer::<M::Error>::new())); + self.fields.push(value); + Ok(()) + } + + fn end(self) -> Result<(), Self::Error> { + tri!(self.map.serialize_value(&Content::Seq(self.fields))); + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + #[cfg(any(feature = "std", feature = "alloc"))] pub struct FlatMapSerializeStructVariantAsMapValue<'a, M: 'a> { map: &'a mut M, @@ -1273,8 +1320,8 @@ where { fn new(map: &'a mut M, name: &'static str) -> FlatMapSerializeStructVariantAsMapValue<'a, M> { FlatMapSerializeStructVariantAsMapValue { - map: map, - name: name, + map, + name, fields: Vec::new(), } } @@ -1296,15 +1343,43 @@ where where T: Serialize, { - let value = try!(value.serialize(ContentSerializer::<M::Error>::new())); + let value = tri!(value.serialize(ContentSerializer::<M::Error>::new())); self.fields.push((key, value)); Ok(()) } fn end(self) -> Result<(), Self::Error> { - try!(self + tri!(self .map .serialize_value(&Content::Struct(self.name, self.fields))); Ok(()) } } + +pub struct AdjacentlyTaggedEnumVariant { + pub enum_name: &'static str, + pub variant_index: u32, + pub variant_name: &'static str, +} + +impl Serialize for AdjacentlyTaggedEnumVariant { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_unit_variant(self.enum_name, self.variant_index, self.variant_name) + } +} + +// Error when Serialize for a non_exhaustive remote enum encounters a variant +// that is not recognized. +pub struct CannotSerializeVariant<T>(pub T); + +impl<T> Display for CannotSerializeVariant<T> +where + T: Debug, +{ + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "enum variant cannot be serialized: {:?}", self.0) + } +} diff --git a/vendor/serde/src/ser/fmt.rs b/vendor/serde/src/ser/fmt.rs index e7e09a1..0650ab6 100644 --- a/vendor/serde/src/ser/fmt.rs +++ b/vendor/serde/src/ser/fmt.rs @@ -1,5 +1,5 @@ -use lib::*; -use ser::{Error, Impossible, Serialize, Serializer}; +use crate::lib::*; +use crate::ser::{Error, Impossible, Serialize, Serializer}; impl Error for fmt::Error { fn custom<T: Display>(_msg: T) -> Self { @@ -17,8 +17,9 @@ macro_rules! fmt_primitives { }; } -/// ```edition2018 -/// use serde::Serialize; +/// ```edition2021 +/// use serde::ser::Serialize; +/// use serde_derive::Serialize; /// use std::fmt::{self, Display}; /// /// #[derive(Serialize)] @@ -51,10 +52,12 @@ impl<'a, 'b> Serializer for &'a mut fmt::Formatter<'b> { serialize_i16: i16, serialize_i32: i32, serialize_i64: i64, + serialize_i128: i128, serialize_u8: u8, serialize_u16: u16, serialize_u32: u32, serialize_u64: u64, + serialize_u128: u128, serialize_f32: f32, serialize_f64: f64, serialize_char: char, @@ -62,13 +65,6 @@ impl<'a, 'b> Serializer for &'a mut fmt::Formatter<'b> { serialize_unit_struct: &'static str, } - serde_if_integer128! { - fmt_primitives! { - serialize_i128: i128, - serialize_u128: u128, - } - } - fn serialize_unit_variant( self, _name: &'static str, diff --git a/vendor/serde/src/ser/impls.rs b/vendor/serde/src/ser/impls.rs index a79326e..8c70634 100644 --- a/vendor/serde/src/ser/impls.rs +++ b/vendor/serde/src/ser/impls.rs @@ -1,6 +1,6 @@ -use lib::*; +use crate::lib::*; -use ser::{Error, Serialize, SerializeTuple, Serializer}; +use crate::ser::{Error, Serialize, SerializeTuple, Serializer}; //////////////////////////////////////////////////////////////////////////////// @@ -24,20 +24,17 @@ primitive_impl!(i8, serialize_i8); primitive_impl!(i16, serialize_i16); primitive_impl!(i32, serialize_i32); primitive_impl!(i64, serialize_i64); +primitive_impl!(i128, serialize_i128); primitive_impl!(usize, serialize_u64 as u64); primitive_impl!(u8, serialize_u8); primitive_impl!(u16, serialize_u16); primitive_impl!(u32, serialize_u32); primitive_impl!(u64, serialize_u64); +primitive_impl!(u128, serialize_u128); primitive_impl!(f32, serialize_f32); primitive_impl!(f64, serialize_f64); primitive_impl!(char, serialize_char); -serde_if_integer128! { - primitive_impl!(i128, serialize_i128); - primitive_impl!(u128, serialize_u128); -} - //////////////////////////////////////////////////////////////////////////////// impl Serialize for str { @@ -51,6 +48,7 @@ impl Serialize for str { } #[cfg(any(feature = "std", feature = "alloc"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl Serialize for String { #[inline] fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> @@ -73,6 +71,7 @@ impl<'a> Serialize for fmt::Arguments<'a> { //////////////////////////////////////////////////////////////////////////////// #[cfg(any(feature = "std", not(no_core_cstr)))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for CStr { #[inline] fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> @@ -84,6 +83,7 @@ impl Serialize for CStr { } #[cfg(any(feature = "std", all(not(no_core_cstr), feature = "alloc")))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] impl Serialize for CString { #[inline] fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> @@ -133,7 +133,7 @@ impl<T> Serialize for [T; 0] { where S: Serializer, { - try!(serializer.serialize_tuple(0)).end() + tri!(serializer.serialize_tuple(0)).end() } } @@ -149,9 +149,9 @@ macro_rules! array_impls { where S: Serializer, { - let mut seq = try!(serializer.serialize_tuple($len)); + let mut seq = tri!(serializer.serialize_tuple($len)); for e in self { - try!(seq.serialize_element(e)); + tri!(seq.serialize_element(e)); } seq.end() } @@ -182,9 +182,13 @@ where } } -#[cfg(all(any(feature = "std", feature = "alloc"), not(no_relaxed_trait_bounds)))] +#[cfg(not(no_relaxed_trait_bounds))] macro_rules! seq_impl { - ($ty:ident <T $(: $tbound1:ident $(+ $tbound2:ident)*)* $(, $typaram:ident : $bound:ident)*>) => { + ( + $(#[$attr:meta])* + $ty:ident <T $(: $tbound1:ident $(+ $tbound2:ident)*)* $(, $typaram:ident : $bound:ident)*> + ) => { + $(#[$attr])* impl<T $(, $typaram)*> Serialize for $ty<T $(, $typaram)*> where T: Serialize, @@ -200,9 +204,13 @@ macro_rules! seq_impl { } } -#[cfg(all(any(feature = "std", feature = "alloc"), no_relaxed_trait_bounds))] +#[cfg(no_relaxed_trait_bounds)] macro_rules! seq_impl { - ($ty:ident <T $(: $tbound1:ident $(+ $tbound2:ident)*)* $(, $typaram:ident : $bound:ident)*>) => { + ( + $(#[$attr:meta])* + $ty:ident <T $(: $tbound1:ident $(+ $tbound2:ident)*)* $(, $typaram:ident : $bound:ident)*> + ) => { + $(#[$attr])* impl<T $(, $typaram)*> Serialize for $ty<T $(, $typaram)*> where T: Serialize $(+ $tbound1 $(+ $tbound2)*)*, @@ -219,23 +227,41 @@ macro_rules! seq_impl { } } -#[cfg(any(feature = "std", feature = "alloc"))] -seq_impl!(BinaryHeap<T: Ord>); +seq_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + BinaryHeap<T: Ord> +} -#[cfg(any(feature = "std", feature = "alloc"))] -seq_impl!(BTreeSet<T: Ord>); +seq_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + BTreeSet<T: Ord> +} -#[cfg(feature = "std")] -seq_impl!(HashSet<T: Eq + Hash, H: BuildHasher>); +seq_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + HashSet<T: Eq + Hash, H: BuildHasher> +} -#[cfg(any(feature = "std", feature = "alloc"))] -seq_impl!(LinkedList<T>); +seq_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + LinkedList<T> +} -#[cfg(any(feature = "std", feature = "alloc"))] -seq_impl!(Vec<T>); +seq_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + Vec<T> +} -#[cfg(any(feature = "std", feature = "alloc"))] -seq_impl!(VecDeque<T>); +seq_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + VecDeque<T> +} //////////////////////////////////////////////////////////////////////////////// @@ -248,16 +274,32 @@ where S: Serializer, { use super::SerializeStruct; - let mut state = try!(serializer.serialize_struct("Range", 2)); - try!(state.serialize_field("start", &self.start)); - try!(state.serialize_field("end", &self.end)); + let mut state = tri!(serializer.serialize_struct("Range", 2)); + tri!(state.serialize_field("start", &self.start)); + tri!(state.serialize_field("end", &self.end)); + state.end() + } +} + +//////////////////////////////////////////////////////////////////////////////// + +impl<Idx> Serialize for RangeFrom<Idx> +where + Idx: Serialize, +{ + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + use super::SerializeStruct; + let mut state = tri!(serializer.serialize_struct("RangeFrom", 1)); + tri!(state.serialize_field("start", &self.start)); state.end() } } //////////////////////////////////////////////////////////////////////////////// -#[cfg(not(no_range_inclusive))] impl<Idx> Serialize for RangeInclusive<Idx> where Idx: Serialize, @@ -267,16 +309,32 @@ where S: Serializer, { use super::SerializeStruct; - let mut state = try!(serializer.serialize_struct("RangeInclusive", 2)); - try!(state.serialize_field("start", &self.start())); - try!(state.serialize_field("end", &self.end())); + let mut state = tri!(serializer.serialize_struct("RangeInclusive", 2)); + tri!(state.serialize_field("start", &self.start())); + tri!(state.serialize_field("end", &self.end())); + state.end() + } +} + +//////////////////////////////////////////////////////////////////////////////// + +impl<Idx> Serialize for RangeTo<Idx> +where + Idx: Serialize, +{ + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + use super::SerializeStruct; + let mut state = tri!(serializer.serialize_struct("RangeTo", 1)); + tri!(state.serialize_field("end", &self.end)); state.end() } } //////////////////////////////////////////////////////////////////////////////// -#[cfg(any(not(no_ops_bound), all(feature = "std", not(no_collections_bound))))] impl<T> Serialize for Bound<T> where T: Serialize, @@ -310,6 +368,7 @@ impl Serialize for () { } #[cfg(feature = "unstable")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "unstable")))] impl Serialize for ! { fn serialize<S>(&self, _serializer: S) -> Result<S::Ok, S::Error> where @@ -333,9 +392,9 @@ macro_rules! tuple_impls { where S: Serializer, { - let mut tuple = try!(serializer.serialize_tuple($len)); + let mut tuple = tri!(serializer.serialize_tuple($len)); $( - try!(tuple.serialize_element(&self.$n)); + tri!(tuple.serialize_element(&self.$n)); )+ tuple.end() } @@ -365,9 +424,13 @@ tuple_impls! { //////////////////////////////////////////////////////////////////////////////// -#[cfg(all(any(feature = "std", feature = "alloc"), not(no_relaxed_trait_bounds)))] +#[cfg(not(no_relaxed_trait_bounds))] macro_rules! map_impl { - ($ty:ident <K $(: $kbound1:ident $(+ $kbound2:ident)*)*, V $(, $typaram:ident : $bound:ident)*>) => { + ( + $(#[$attr:meta])* + $ty:ident <K $(: $kbound1:ident $(+ $kbound2:ident)*)*, V $(, $typaram:ident : $bound:ident)*> + ) => { + $(#[$attr])* impl<K, V $(, $typaram)*> Serialize for $ty<K, V $(, $typaram)*> where K: Serialize, @@ -384,9 +447,13 @@ macro_rules! map_impl { } } -#[cfg(all(any(feature = "std", feature = "alloc"), no_relaxed_trait_bounds))] +#[cfg(no_relaxed_trait_bounds)] macro_rules! map_impl { - ($ty:ident <K $(: $kbound1:ident $(+ $kbound2:ident)*)*, V $(, $typaram:ident : $bound:ident)*>) => { + ( + $(#[$attr:meta])* + $ty:ident <K $(: $kbound1:ident $(+ $kbound2:ident)*)*, V $(, $typaram:ident : $bound:ident)*> + ) => { + $(#[$attr])* impl<K, V $(, $typaram)*> Serialize for $ty<K, V $(, $typaram)*> where K: Serialize $(+ $kbound1 $(+ $kbound2)*)*, @@ -404,20 +471,26 @@ macro_rules! map_impl { } } -#[cfg(any(feature = "std", feature = "alloc"))] -map_impl!(BTreeMap<K: Ord, V>); +map_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + BTreeMap<K: Ord, V> +} -#[cfg(feature = "std")] -map_impl!(HashMap<K: Eq + Hash, V, H: BuildHasher>); +map_impl! { + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + HashMap<K: Eq + Hash, V, H: BuildHasher> +} //////////////////////////////////////////////////////////////////////////////// macro_rules! deref_impl { ( - $(#[doc = $doc:tt])* + $(#[$attr:meta])* <$($desc:tt)+ ) => { - $(#[doc = $doc])* + $(#[$attr])* impl <$($desc)+ { #[inline] fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> @@ -430,13 +503,20 @@ macro_rules! deref_impl { }; } -deref_impl!(<'a, T: ?Sized> Serialize for &'a T where T: Serialize); -deref_impl!(<'a, T: ?Sized> Serialize for &'a mut T where T: Serialize); +deref_impl! { + <'a, T: ?Sized> Serialize for &'a T where T: Serialize +} -#[cfg(any(feature = "std", feature = "alloc"))] -deref_impl!(<T: ?Sized> Serialize for Box<T> where T: Serialize); +deref_impl! { + <'a, T: ?Sized> Serialize for &'a mut T where T: Serialize +} + +deref_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + <T: ?Sized> Serialize for Box<T> where T: Serialize +} -#[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] deref_impl! { /// This impl requires the [`"rc"`] Cargo feature of Serde. /// @@ -446,10 +526,11 @@ deref_impl! { /// repeated data. /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc + #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))))] <T: ?Sized> Serialize for Rc<T> where T: Serialize } -#[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] deref_impl! { /// This impl requires the [`"rc"`] Cargo feature of Serde. /// @@ -459,11 +540,16 @@ deref_impl! { /// repeated data. /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc + #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))))] <T: ?Sized> Serialize for Arc<T> where T: Serialize } -#[cfg(any(feature = "std", feature = "alloc"))] -deref_impl!(<'a, T: ?Sized> Serialize for Cow<'a, T> where T: Serialize + ToOwned); +deref_impl! { + #[cfg(any(feature = "std", feature = "alloc"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "alloc"))))] + <'a, T: ?Sized> Serialize for Cow<'a, T> where T: Serialize + ToOwned +} //////////////////////////////////////////////////////////////////////////////// @@ -471,6 +557,10 @@ deref_impl!(<'a, T: ?Sized> Serialize for Cow<'a, T> where T: Serialize + ToOwne /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] +#[cfg_attr( + doc_cfg, + doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))) +)] impl<T: ?Sized> Serialize for RcWeak<T> where T: Serialize, @@ -487,6 +577,10 @@ where /// /// [`"rc"`]: https://serde.rs/feature-flags.html#-features-rc #[cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))] +#[cfg_attr( + doc_cfg, + doc(cfg(all(feature = "rc", any(feature = "std", feature = "alloc")))) +)] impl<T: ?Sized> Serialize for ArcWeak<T> where T: Serialize, @@ -504,7 +598,6 @@ where macro_rules! nonzero_integers { ($($T:ident,)+) => { $( - #[cfg(not(no_num_nonzero))] impl Serialize for num::$T { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -522,6 +615,7 @@ nonzero_integers! { NonZeroU16, NonZeroU32, NonZeroU64, + NonZeroU128, NonZeroUsize, } @@ -531,22 +625,10 @@ nonzero_integers! { NonZeroI16, NonZeroI32, NonZeroI64, + NonZeroI128, NonZeroIsize, } -// Currently 128-bit integers do not work on Emscripten targets so we need an -// additional `#[cfg]` -serde_if_integer128! { - nonzero_integers! { - NonZeroU128, - } - - #[cfg(not(no_num_nonzero_signed))] - nonzero_integers! { - NonZeroI128, - } -} - impl<T> Serialize for Cell<T> where T: Serialize + Copy, @@ -575,6 +657,7 @@ where } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<T: ?Sized> Serialize for Mutex<T> where T: Serialize, @@ -591,6 +674,7 @@ where } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl<T: ?Sized> Serialize for RwLock<T> where T: Serialize, @@ -628,16 +712,15 @@ where //////////////////////////////////////////////////////////////////////////////// -#[cfg(any(feature = "std", not(no_core_duration)))] impl Serialize for Duration { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer, { use super::SerializeStruct; - let mut state = try!(serializer.serialize_struct("Duration", 2)); - try!(state.serialize_field("secs", &self.as_secs())); - try!(state.serialize_field("nanos", &self.subsec_nanos())); + let mut state = tri!(serializer.serialize_struct("Duration", 2)); + tri!(state.serialize_field("secs", &self.as_secs())); + tri!(state.serialize_field("nanos", &self.subsec_nanos())); state.end() } } @@ -645,6 +728,7 @@ impl Serialize for Duration { //////////////////////////////////////////////////////////////////////////////// #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for SystemTime { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -655,9 +739,9 @@ impl Serialize for SystemTime { Ok(duration_since_epoch) => duration_since_epoch, Err(_) => return Err(S::Error::custom("SystemTime must be later than UNIX_EPOCH")), }; - let mut state = try!(serializer.serialize_struct("SystemTime", 2)); - try!(state.serialize_field("secs_since_epoch", &duration_since_epoch.as_secs())); - try!(state.serialize_field("nanos_since_epoch", &duration_since_epoch.subsec_nanos())); + let mut state = tri!(serializer.serialize_struct("SystemTime", 2)); + tri!(state.serialize_field("secs_since_epoch", &duration_since_epoch.as_secs())); + tri!(state.serialize_field("nanos_since_epoch", &duration_since_epoch.subsec_nanos())); state.end() } } @@ -689,6 +773,7 @@ macro_rules! serialize_display_bounded_length { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for net::IpAddr { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -713,7 +798,7 @@ impl Serialize for net::IpAddr { } #[cfg(feature = "std")] -const DEC_DIGITS_LUT: &'static [u8] = b"\ +const DEC_DIGITS_LUT: &[u8] = b"\ 0001020304050607080910111213141516171819\ 2021222324252627282930313233343536373839\ 4041424344454647484950515253545556575859\ @@ -759,6 +844,7 @@ fn test_format_u8() { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for net::Ipv4Addr { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -783,6 +869,7 @@ impl Serialize for net::Ipv4Addr { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for net::Ipv6Addr { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -799,6 +886,7 @@ impl Serialize for net::Ipv6Addr { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for net::SocketAddr { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -823,6 +911,7 @@ impl Serialize for net::SocketAddr { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for net::SocketAddrV4 { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -839,6 +928,7 @@ impl Serialize for net::SocketAddrV4 { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for net::SocketAddrV6 { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -860,6 +950,7 @@ impl Serialize for net::SocketAddrV6 { //////////////////////////////////////////////////////////////////////////////// #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for Path { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -873,6 +964,7 @@ impl Serialize for Path { } #[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] impl Serialize for PathBuf { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -883,6 +975,7 @@ impl Serialize for PathBuf { } #[cfg(all(feature = "std", any(unix, windows)))] +#[cfg_attr(doc_cfg, doc(cfg(all(feature = "std", any(unix, windows)))))] impl Serialize for OsStr { #[cfg(unix)] fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> @@ -905,6 +998,7 @@ impl Serialize for OsStr { } #[cfg(all(feature = "std", any(unix, windows)))] +#[cfg_attr(doc_cfg, doc(cfg(all(feature = "std", any(unix, windows)))))] impl Serialize for OsString { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where @@ -929,7 +1023,6 @@ where } } -#[cfg(not(no_core_reverse))] impl<T> Serialize for Reverse<T> where T: Serialize, @@ -950,6 +1043,7 @@ macro_rules! atomic_impl { ($($ty:ident $size:expr)*) => { $( #[cfg(any(no_target_has_atomic, target_has_atomic = $size))] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "std", target_has_atomic = $size))))] impl Serialize for $ty { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where diff --git a/vendor/serde/src/ser/impossible.rs b/vendor/serde/src/ser/impossible.rs index e8df9ca..479be94 100644 --- a/vendor/serde/src/ser/impossible.rs +++ b/vendor/serde/src/ser/impossible.rs @@ -1,8 +1,8 @@ //! This module contains `Impossible` serializer and its implementations. -use lib::*; +use crate::lib::*; -use ser::{ +use crate::ser::{ self, Serialize, SerializeMap, SerializeSeq, SerializeStruct, SerializeStructVariant, SerializeTuple, SerializeTupleStruct, SerializeTupleVariant, }; @@ -15,7 +15,7 @@ use ser::{ /// [`SerializeTuple`], [`SerializeTupleStruct`], [`SerializeTupleVariant`], /// [`SerializeMap`], [`SerializeStruct`], and [`SerializeStructVariant`]. /// -/// ```edition2018 +/// ```edition2021 /// # use serde::ser::{Serializer, Impossible}; /// # use serde::__private::doc::Error; /// # diff --git a/vendor/serde/src/ser/mod.rs b/vendor/serde/src/ser/mod.rs index 5c45426..75c4514 100644 --- a/vendor/serde/src/ser/mod.rs +++ b/vendor/serde/src/ser/mod.rs @@ -61,8 +61,8 @@ //! - RefCell\<T\> //! - Mutex\<T\> //! - RwLock\<T\> -//! - Rc\<T\> *(if* features = ["rc"] *is enabled)* -//! - Arc\<T\> *(if* features = ["rc"] *is enabled)* +//! - Rc\<T\> *(if* features = \["rc"\] *is enabled)* +//! - Arc\<T\> *(if* features = \["rc"\] *is enabled)* //! - **Collection types**: //! - BTreeMap\<K, V\> //! - BTreeSet\<T\> @@ -107,7 +107,7 @@ //! [derive section of the manual]: https://serde.rs/derive.html //! [data formats]: https://serde.rs/#data-formats -use lib::*; +use crate::lib::*; mod fmt; mod impls; @@ -115,15 +115,15 @@ mod impossible; pub use self::impossible::Impossible; +#[cfg(not(any(feature = "std", feature = "unstable")))] +#[doc(no_inline)] +pub use crate::std_error::Error as StdError; #[cfg(all(feature = "unstable", not(feature = "std")))] -#[doc(inline)] +#[doc(no_inline)] pub use core::error::Error as StdError; #[cfg(feature = "std")] #[doc(no_inline)] pub use std::error::Error as StdError; -#[cfg(not(any(feature = "std", feature = "unstable")))] -#[doc(no_inline)] -pub use std_error::Error as StdError; //////////////////////////////////////////////////////////////////////////////// @@ -149,7 +149,7 @@ macro_rules! declare_error_trait { /// For example, a filesystem [`Path`] may refuse to serialize /// itself if it contains invalid UTF-8 data. /// - /// ```edition2018 + /// ```edition2021 /// # struct Path; /// # /// # impl Path { @@ -221,7 +221,7 @@ pub trait Serialize { /// See the [Implementing `Serialize`] section of the manual for more /// information about how to implement this method. /// - /// ```edition2018 + /// ```edition2021 /// use serde::ser::{Serialize, SerializeStruct, Serializer}; /// /// struct Person { @@ -388,7 +388,7 @@ pub trait Serializer: Sized { /// Serialize a `bool` value. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -410,7 +410,7 @@ pub trait Serializer: Sized { /// reasonable implementation would be to cast the value to `i64` and /// forward to `serialize_i64`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -432,7 +432,7 @@ pub trait Serializer: Sized { /// reasonable implementation would be to cast the value to `i64` and /// forward to `serialize_i64`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -454,7 +454,7 @@ pub trait Serializer: Sized { /// reasonable implementation would be to cast the value to `i64` and /// forward to `serialize_i64`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -472,7 +472,7 @@ pub trait Serializer: Sized { /// Serialize an `i64` value. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -488,30 +488,27 @@ pub trait Serializer: Sized { /// ``` fn serialize_i64(self, v: i64) -> Result<Self::Ok, Self::Error>; - serde_if_integer128! { - /// Serialize an `i128` value. - /// - /// ```edition2018 - /// # use serde::Serializer; - /// # - /// # serde::__private_serialize!(); - /// # - /// impl Serialize for i128 { - /// fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - /// where - /// S: Serializer, - /// { - /// serializer.serialize_i128(*self) - /// } - /// } - /// ``` - /// - /// This method is available only on Rust compiler versions >=1.26. The - /// default behavior unconditionally returns an error. - fn serialize_i128(self, v: i128) -> Result<Self::Ok, Self::Error> { - let _ = v; - Err(Error::custom("i128 is not supported")) - } + /// Serialize an `i128` value. + /// + /// ```edition2021 + /// # use serde::Serializer; + /// # + /// # serde::__private_serialize!(); + /// # + /// impl Serialize for i128 { + /// fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + /// where + /// S: Serializer, + /// { + /// serializer.serialize_i128(*self) + /// } + /// } + /// ``` + /// + /// The default behavior unconditionally returns an error. + fn serialize_i128(self, v: i128) -> Result<Self::Ok, Self::Error> { + let _ = v; + Err(Error::custom("i128 is not supported")) } /// Serialize a `u8` value. @@ -520,7 +517,7 @@ pub trait Serializer: Sized { /// reasonable implementation would be to cast the value to `u64` and /// forward to `serialize_u64`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -542,7 +539,7 @@ pub trait Serializer: Sized { /// reasonable implementation would be to cast the value to `u64` and /// forward to `serialize_u64`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -564,7 +561,7 @@ pub trait Serializer: Sized { /// reasonable implementation would be to cast the value to `u64` and /// forward to `serialize_u64`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -582,7 +579,7 @@ pub trait Serializer: Sized { /// Serialize a `u64` value. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -598,30 +595,27 @@ pub trait Serializer: Sized { /// ``` fn serialize_u64(self, v: u64) -> Result<Self::Ok, Self::Error>; - serde_if_integer128! { - /// Serialize a `u128` value. - /// - /// ```edition2018 - /// # use serde::Serializer; - /// # - /// # serde::__private_serialize!(); - /// # - /// impl Serialize for u128 { - /// fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> - /// where - /// S: Serializer, - /// { - /// serializer.serialize_u128(*self) - /// } - /// } - /// ``` - /// - /// This method is available only on Rust compiler versions >=1.26. The - /// default behavior unconditionally returns an error. - fn serialize_u128(self, v: u128) -> Result<Self::Ok, Self::Error> { - let _ = v; - Err(Error::custom("u128 is not supported")) - } + /// Serialize a `u128` value. + /// + /// ```edition2021 + /// # use serde::Serializer; + /// # + /// # serde::__private_serialize!(); + /// # + /// impl Serialize for u128 { + /// fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + /// where + /// S: Serializer, + /// { + /// serializer.serialize_u128(*self) + /// } + /// } + /// ``` + /// + /// The default behavior unconditionally returns an error. + fn serialize_u128(self, v: u128) -> Result<Self::Ok, Self::Error> { + let _ = v; + Err(Error::custom("u128 is not supported")) } /// Serialize an `f32` value. @@ -630,7 +624,7 @@ pub trait Serializer: Sized { /// reasonable implementation would be to cast the value to `f64` and /// forward to `serialize_f64`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -648,7 +642,7 @@ pub trait Serializer: Sized { /// Serialize an `f64` value. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -669,7 +663,7 @@ pub trait Serializer: Sized { /// If the format does not support characters, it is reasonable to serialize /// it as a single element `str` or a `u32`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -687,7 +681,7 @@ pub trait Serializer: Sized { /// Serialize a `&str`. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -711,7 +705,7 @@ pub trait Serializer: Sized { /// `serialize_seq`. If forwarded, the implementation looks usually just /// like this: /// - /// ```edition2018 + /// ```edition2021 /// # use serde::ser::{Serializer, SerializeSeq}; /// # use serde::__private::doc::Error; /// # @@ -740,7 +734,7 @@ pub trait Serializer: Sized { /// Serialize a [`None`] value. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::{Serialize, Serializer}; /// # /// # enum Option<T> { @@ -773,7 +767,7 @@ pub trait Serializer: Sized { /// Serialize a [`Some(T)`] value. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::{Serialize, Serializer}; /// # /// # enum Option<T> { @@ -808,7 +802,7 @@ pub trait Serializer: Sized { /// Serialize a `()` value. /// - /// ```edition2018 + /// ```edition2021 /// # use serde::Serializer; /// # /// # serde::__private_serialize!(); @@ -828,7 +822,7 @@ pub trait Serializer: Sized { /// /// A reasonable implementation would be to forward to `serialize_unit`. /// - /// ```edition2018 + /// ```edition2021 /// use serde::{Serialize, Serializer}; /// /// struct Nothing; @@ -850,7 +844,7 @@ pub trait Serializer: Sized { /// this variant within the enum, and the `variant` is the name of the /// variant. /// - /// ```edition2018 + /// ```edition2021 /// use serde::{Serialize, Serializer}; /// /// enum E { @@ -883,7 +877,7 @@ pub trait Serializer: Sized { /// wrappers around the data they contain. A reasonable implementation would /// be to forward to `value.serialize(self)`. /// - /// ```edition2018 + /// ```edition2021 /// use serde::{Serialize, Serializer}; /// /// struct Millimeters(u8); @@ -911,7 +905,7 @@ pub trait Serializer: Sized { /// this variant within the enum, and the `variant` is the name of the /// variant. The `value` is the data contained within this newtype variant. /// - /// ```edition2018 + /// ```edition2021 /// use serde::{Serialize, Serializer}; /// /// enum E { @@ -949,7 +943,7 @@ pub trait Serializer: Sized { /// not be computable before the sequence is iterated. Some serializers only /// support sequences whose length is known up front. /// - /// ```edition2018 + /// ```edition2021 /// # use std::marker::PhantomData; /// # /// # struct Vec<T>(PhantomData<T>); @@ -962,14 +956,14 @@ pub trait Serializer: Sized { /// # /// # impl<'a, T> IntoIterator for &'a Vec<T> { /// # type Item = &'a T; - /// # type IntoIter = Box<Iterator<Item = &'a T>>; + /// # type IntoIter = Box<dyn Iterator<Item = &'a T>>; /// # /// # fn into_iter(self) -> Self::IntoIter { /// # unimplemented!() /// # } /// # } /// # - /// use serde::ser::{Serialize, Serializer, SerializeSeq}; + /// use serde::ser::{Serialize, SerializeSeq, Serializer}; /// /// impl<T> Serialize for Vec<T> /// where @@ -994,8 +988,8 @@ pub trait Serializer: Sized { /// This call must be followed by zero or more calls to `serialize_element`, /// then a call to `end`. /// - /// ```edition2018 - /// use serde::ser::{Serialize, Serializer, SerializeTuple}; + /// ```edition2021 + /// use serde::ser::{Serialize, SerializeTuple, Serializer}; /// /// # mod fool { /// # trait Serialize {} @@ -1024,7 +1018,7 @@ pub trait Serializer: Sized { /// } /// ``` /// - /// ```edition2018 + /// ```edition2021 /// use serde::ser::{Serialize, SerializeTuple, Serializer}; /// /// const VRAM_SIZE: usize = 386; @@ -1052,7 +1046,7 @@ pub trait Serializer: Sized { /// The `name` is the name of the tuple struct and the `len` is the number /// of data fields that will be serialized. /// - /// ```edition2018 + /// ```edition2021 /// use serde::ser::{Serialize, SerializeTupleStruct, Serializer}; /// /// struct Rgb(u8, u8, u8); @@ -1084,7 +1078,7 @@ pub trait Serializer: Sized { /// this variant within the enum, the `variant` is the name of the variant, /// and the `len` is the number of data fields that will be serialized. /// - /// ```edition2018 + /// ```edition2021 /// use serde::ser::{Serialize, SerializeTupleVariant, Serializer}; /// /// enum E { @@ -1130,7 +1124,7 @@ pub trait Serializer: Sized { /// be computable before the map is iterated. Some serializers only support /// maps whose length is known up front. /// - /// ```edition2018 + /// ```edition2021 /// # use std::marker::PhantomData; /// # /// # struct HashMap<K, V>(PhantomData<K>, PhantomData<V>); @@ -1143,14 +1137,14 @@ pub trait Serializer: Sized { /// # /// # impl<'a, K, V> IntoIterator for &'a HashMap<K, V> { /// # type Item = (&'a K, &'a V); - /// # type IntoIter = Box<Iterator<Item = (&'a K, &'a V)>>; + /// # type IntoIter = Box<dyn Iterator<Item = (&'a K, &'a V)>>; /// # /// # fn into_iter(self) -> Self::IntoIter { /// # unimplemented!() /// # } /// # } /// # - /// use serde::ser::{Serialize, Serializer, SerializeMap}; + /// use serde::ser::{Serialize, SerializeMap, Serializer}; /// /// impl<K, V> Serialize for HashMap<K, V> /// where @@ -1178,7 +1172,7 @@ pub trait Serializer: Sized { /// The `name` is the name of the struct and the `len` is the number of /// data fields that will be serialized. /// - /// ```edition2018 + /// ```edition2021 /// use serde::ser::{Serialize, SerializeStruct, Serializer}; /// /// struct Rgb { @@ -1214,7 +1208,7 @@ pub trait Serializer: Sized { /// this variant within the enum, the `variant` is the name of the variant, /// and the `len` is the number of data fields that will be serialized. /// - /// ```edition2018 + /// ```edition2021 /// use serde::ser::{Serialize, SerializeStructVariant, Serializer}; /// /// enum E { @@ -1256,7 +1250,7 @@ pub trait Serializer: Sized { /// using [`serialize_seq`]. Implementors should not need to override this /// method. /// - /// ```edition2018 + /// ```edition2021 /// use serde::{Serialize, Serializer}; /// /// struct SecretlyOneHigher { @@ -1279,22 +1273,9 @@ pub trait Serializer: Sized { I: IntoIterator, <I as IntoIterator>::Item: Serialize, { - let iter = iter.into_iter(); - let mut serializer = try!(self.serialize_seq(iterator_len_hint(&iter))); - - #[cfg(not(no_iterator_try_fold))] - { - let mut iter = iter; - try!(iter.try_for_each(|item| serializer.serialize_element(&item))); - } - - #[cfg(no_iterator_try_fold)] - { - for item in iter { - try!(serializer.serialize_element(&item)); - } - } - + let mut iter = iter.into_iter(); + let mut serializer = tri!(self.serialize_seq(iterator_len_hint(&iter))); + tri!(iter.try_for_each(|item| serializer.serialize_element(&item))); serializer.end() } @@ -1304,7 +1285,7 @@ pub trait Serializer: Sized { /// using [`serialize_map`]. Implementors should not need to override this /// method. /// - /// ```edition2018 + /// ```edition2021 /// use serde::{Serialize, Serializer}; /// use std::collections::BTreeSet; /// @@ -1330,22 +1311,9 @@ pub trait Serializer: Sized { V: Serialize, I: IntoIterator<Item = (K, V)>, { - let iter = iter.into_iter(); - let mut serializer = try!(self.serialize_map(iterator_len_hint(&iter))); - - #[cfg(not(no_iterator_try_fold))] - { - let mut iter = iter; - try!(iter.try_for_each(|(key, value)| serializer.serialize_entry(&key, &value))); - } - - #[cfg(no_iterator_try_fold)] - { - for (key, value) in iter { - try!(serializer.serialize_entry(&key, &value)); - } - } - + let mut iter = iter.into_iter(); + let mut serializer = tri!(self.serialize_map(iterator_len_hint(&iter))); + tri!(iter.try_for_each(|(key, value)| serializer.serialize_entry(&key, &value))); serializer.end() } @@ -1355,7 +1323,7 @@ pub trait Serializer: Sized { /// delegates to [`serialize_str`]. Serializers are encouraged to provide a /// more efficient implementation if possible. /// - /// ```edition2018 + /// ```edition2021 /// # struct DateTime; /// # /// # impl DateTime { @@ -1370,9 +1338,7 @@ pub trait Serializer: Sized { /// where /// S: Serializer, /// { - /// serializer.collect_str(&format_args!("{:?}{:?}", - /// self.naive_local(), - /// self.offset())) + /// serializer.collect_str(&format_args!("{:?}{:?}", self.naive_local(), self.offset())) /// } /// } /// ``` @@ -1393,7 +1359,7 @@ pub trait Serializer: Sized { /// of this method. If no more sensible behavior is possible, the /// implementation is expected to return an error. /// - /// ```edition2018 + /// ```edition2021 /// # struct DateTime; /// # /// # impl DateTime { @@ -1408,9 +1374,7 @@ pub trait Serializer: Sized { /// where /// S: Serializer, /// { - /// serializer.collect_str(&format_args!("{:?}{:?}", - /// self.naive_local(), - /// self.offset())) + /// serializer.collect_str(&format_args!("{:?}{:?}", self.naive_local(), self.offset())) /// } /// } /// ``` @@ -1428,7 +1392,7 @@ pub trait Serializer: Sized { /// human-readable one and binary formats like Postcard will prefer the /// compact one. /// - /// ```edition2018 + /// ```edition2021 /// # use std::fmt::{self, Display}; /// # /// # struct Timestamp; @@ -1477,7 +1441,7 @@ pub trait Serializer: Sized { /// /// # Example use /// -/// ```edition2018 +/// ```edition2021 /// # use std::marker::PhantomData; /// # /// # struct Vec<T>(PhantomData<T>); @@ -1490,13 +1454,13 @@ pub trait Serializer: Sized { /// # /// # impl<'a, T> IntoIterator for &'a Vec<T> { /// # type Item = &'a T; -/// # type IntoIter = Box<Iterator<Item = &'a T>>; +/// # type IntoIter = Box<dyn Iterator<Item = &'a T>>; /// # fn into_iter(self) -> Self::IntoIter { /// # unimplemented!() /// # } /// # } /// # -/// use serde::ser::{Serialize, Serializer, SerializeSeq}; +/// use serde::ser::{Serialize, SerializeSeq, Serializer}; /// /// impl<T> Serialize for Vec<T> /// where @@ -1541,8 +1505,8 @@ pub trait SerializeSeq { /// /// # Example use /// -/// ```edition2018 -/// use serde::ser::{Serialize, Serializer, SerializeTuple}; +/// ```edition2021 +/// use serde::ser::{Serialize, SerializeTuple, Serializer}; /// /// # mod fool { /// # trait Serialize {} @@ -1571,7 +1535,7 @@ pub trait SerializeSeq { /// } /// ``` /// -/// ```edition2018 +/// ```edition2021 /// # use std::marker::PhantomData; /// # /// # struct Array<T>(PhantomData<T>); @@ -1584,13 +1548,13 @@ pub trait SerializeSeq { /// # /// # impl<'a, T> IntoIterator for &'a Array<T> { /// # type Item = &'a T; -/// # type IntoIter = Box<Iterator<Item = &'a T>>; +/// # type IntoIter = Box<dyn Iterator<Item = &'a T>>; /// # fn into_iter(self) -> Self::IntoIter { /// # unimplemented!() /// # } /// # } /// # -/// use serde::ser::{Serialize, Serializer, SerializeTuple}; +/// use serde::ser::{Serialize, SerializeTuple, Serializer}; /// /// # mod fool { /// # trait Serialize {} @@ -1641,7 +1605,7 @@ pub trait SerializeTuple { /// /// # Example use /// -/// ```edition2018 +/// ```edition2021 /// use serde::ser::{Serialize, SerializeTupleStruct, Serializer}; /// /// struct Rgb(u8, u8, u8); @@ -1686,7 +1650,7 @@ pub trait SerializeTupleStruct { /// /// # Example use /// -/// ```edition2018 +/// ```edition2021 /// use serde::ser::{Serialize, SerializeTupleVariant, Serializer}; /// /// enum E { @@ -1744,7 +1708,7 @@ pub trait SerializeTupleVariant { /// /// # Example use /// -/// ```edition2018 +/// ```edition2021 /// # use std::marker::PhantomData; /// # /// # struct HashMap<K, V>(PhantomData<K>, PhantomData<V>); @@ -1757,14 +1721,14 @@ pub trait SerializeTupleVariant { /// # /// # impl<'a, K, V> IntoIterator for &'a HashMap<K, V> { /// # type Item = (&'a K, &'a V); -/// # type IntoIter = Box<Iterator<Item = (&'a K, &'a V)>>; +/// # type IntoIter = Box<dyn Iterator<Item = (&'a K, &'a V)>>; /// # /// # fn into_iter(self) -> Self::IntoIter { /// # unimplemented!() /// # } /// # } /// # -/// use serde::ser::{Serialize, Serializer, SerializeMap}; +/// use serde::ser::{Serialize, SerializeMap, Serializer}; /// /// impl<K, V> Serialize for HashMap<K, V> /// where @@ -1843,7 +1807,7 @@ pub trait SerializeMap { K: Serialize, V: Serialize, { - try!(self.serialize_key(key)); + tri!(self.serialize_key(key)); self.serialize_value(value) } @@ -1855,7 +1819,7 @@ pub trait SerializeMap { /// /// # Example use /// -/// ```edition2018 +/// ```edition2021 /// use serde::ser::{Serialize, SerializeStruct, Serializer}; /// /// struct Rgb { @@ -1915,7 +1879,7 @@ pub trait SerializeStruct { /// /// # Example use /// -/// ```edition2018 +/// ```edition2021 /// use serde::ser::{Serialize, SerializeStructVariant, Serializer}; /// /// enum E { diff --git a/vendor/serde/src/std_error.rs b/vendor/serde/src/std_error.rs index 1055e0f..f15a4d7 100644 --- a/vendor/serde/src/std_error.rs +++ b/vendor/serde/src/std_error.rs @@ -1,4 +1,4 @@ -use lib::{Debug, Display}; +use crate::lib::{Debug, Display}; /// Either a re-export of std::error::Error or a new identical trait, depending /// on whether Serde's "std" feature is enabled. @@ -9,7 +9,7 @@ use lib::{Debug, Display}; /// generally provide their error types with a `std::error::Error` impl /// directly: /// -/// ```edition2018 +/// ```edition2021 /// #[derive(Debug)] /// struct MySerError {...} /// @@ -29,7 +29,7 @@ use lib::{Debug, Display}; /// std = ["serde/std"] /// ``` /// -/// ```edition2018 +/// ```edition2021 /// #[cfg(feature = "std")] /// impl std::error::Error for MySerError {} /// ``` @@ -37,7 +37,7 @@ use lib::{Debug, Display}; /// ... or else provide the std Error impl unconditionally via Serde's /// re-export: /// -/// ```edition2018 +/// ```edition2021 /// impl serde::ser::StdError for MySerError {} /// ``` pub trait Error: Debug + Display { diff --git a/vendor/serde_derive/.cargo-checksum.json b/vendor/serde_derive/.cargo-checksum.json index 8887143..331652a 100644 --- a/vendor/serde_derive/.cargo-checksum.json +++ b/vendor/serde_derive/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"d4c79a251c1bd0ed40ec6f4d8d06940542e54876e2327233a3e4737ab31d0860","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"13c66875efb67f64fdec817725f34ceb07913e1ebea4adc240868d2ed581d3da","build.rs":"d9a0df0c4dd552ff7fd0c3b3828cb1fff4fc4ab15bd98539881929b76b98003b","crates-io.md":"ee22254ee64c3189eef3e707c8d75dc66a8df2a7ee9e518d95238950780ec387","src/bound.rs":"9211d852730380be8e0af9ed5daa52e61563e598eef458739025551ba76aa7c6","src/de.rs":"4dde42292a4476f71b3790156fb2fbfbdba88ebad28c129065890d0f1470ff5d","src/dummy.rs":"31e02c3313d12c88b3b26baa49a9cb143440804b557b5255aad9a9b80b8ea1c6","src/fragment.rs":"5548ba65a53d90a296f60c1328a7a7fb040db467f59c2f5210b2fb320457145d","src/internals/ast.rs":"b019865eef92c1ddbb9029423ac22179f132dc655a51c09fb2a42f4aaef172fd","src/internals/attr.rs":"7407c6e85afd197bdbf715bd681bd171db37b2264f617e148ca340817f56e684","src/internals/case.rs":"9492f0c5142d7b7e8cd39c86d13a855e5ce4489425adb2b96aed89e1b7851ac0","src/internals/check.rs":"0449cc7653fc9e596f65028835bbb7d1545c10002c79c7608547f45a722c0040","src/internals/ctxt.rs":"6fa544ae52914498a62a395818ebdc1b36ac2fb5903c60afb741a864ad559f1c","src/internals/mod.rs":"f32138ff19d57eb00f88ba11f6b015efab2102657804f71ebbf386a3698dad91","src/internals/receiver.rs":"6b016351b8294539039095863d8c99e81dd4530d7f769003d12d4ca73cca172c","src/internals/respan.rs":"899753859c58ce5f532a3ec4584796a52f13ed5a0533191e48c953ba5c1b52ff","src/internals/symbol.rs":"2bf0287da64d28da7e8673af60f66aaf6b29efe33131e56b24d6fa55edb533ad","src/lib.rs":"0ed50d02eb095af7a0a57f571734f2c1ac0a704b498b88be3710f14335bd0f92","src/pretend.rs":"4aa53bf6c1350fbcfc8c4997f720cde61a8eb3aab73bb8c101b0f0a74901892b","src/ser.rs":"803f522d313e52eece8a91babfad9d35734d0bf04b042cf24d781294674ba8b3","src/this.rs":"a2c128955324c2994ed7cdc3fe4eeceb7ad8a0f9d071665a8378c85c8df64ce2","src/try.rs":"b171b0088c23ebf4bfa07ba457881b41ac5e547d55dd16f737ea988d34badf61"},"package":"291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df"} \ No newline at end of file +{"files":{"Cargo.toml":"daf3d8f8efdf30d3575c7d1e1372ff7287891fb95625223e4a8a2f792c4474e1","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"731c044fc5f98b37a89e9049c9214267db98763309cb63146b45c029640f82a3","crates-io.md":"56e988ac4944c45f5bf5051e3827892ed8fb817853d99d9df1fff6621108e270","src/bound.rs":"6c5c20785ac95af9480f8d0de35a7e844cc36a16012f6468db148acd03cb15c2","src/de.rs":"c221ab2b94a5d80dccff74a37f3448b3d695656552b452595dc289c73b12fb2b","src/dummy.rs":"9533dfee23f20d92ea75734c739022820c2787ded0d54f459feacdeb770ec912","src/fragment.rs":"6757cb4c3131d4300f093572efc273c4ab5a20e3e1efb54a311dcfa52d0bd6eb","src/internals/ast.rs":"7dc997e4090033bbd1d0bdd870e8bb87b096b7f66cfd02047f6b85ebdd569b12","src/internals/attr.rs":"6584c0a02de0d17993877303f3cc2c1bccf235257632220421f98082d82d387a","src/internals/case.rs":"10c8dda2b32d8c6c6b63cf09cdc63d02375af7e95ecefe8fecb34f93b65191bb","src/internals/check.rs":"d842eb9912fd29311060b67f3bc62c438eb7b5d86093355acb4de7eee02a0ef8","src/internals/ctxt.rs":"83a4e6fbe0e439d578478883594407e03f2f340541be479bdf0b04a202633a37","src/internals/mod.rs":"ed021ca635c18132a0e5c3d90f21b7f65def0a61e946421a30200b5b9ab6ad43","src/internals/receiver.rs":"fe8a480669511b5edcfe71f5dd290cf72ccec54c9016ec85f2ac59dce538077f","src/internals/respan.rs":"899753859c58ce5f532a3ec4584796a52f13ed5a0533191e48c953ba5c1b52ff","src/internals/symbol.rs":"d619e88caa3c7a09b03014257f2b349ee922290062d9b97b4dd19d0e64532690","src/lib.rs":"7a6c2796244658f62d398ebc6819c4f3064dac4a1ad7c52b40359f9411f1c266","src/pretend.rs":"7facc10a5b805564dd95735ae11118ec17ca6adcc49a59764e7c920e27b9fc4a","src/ser.rs":"e3341471cea9d7e2fb4043e5d1746862beb9a4e25196170879eeac529d460920","src/this.rs":"87818dc80cbb521b51938a653d09daf10aafc220bb10425948de82ad670fcb85"},"package":"7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"} \ No newline at end of file diff --git a/vendor/serde_derive/Cargo.toml b/vendor/serde_derive/Cargo.toml index a607f04..33642ef 100644 --- a/vendor/serde_derive/Cargo.toml +++ b/vendor/serde_derive/Cargo.toml @@ -12,19 +12,11 @@ [package] rust-version = "1.56" name = "serde_derive" -version = "1.0.160" +version = "1.0.197" authors = [ "Erick Tryzelaar <erick.tryzelaar@gmail.com>", "David Tolnay <dtolnay@gmail.com>", ] -include = [ - "build.rs", - "src/**/*.rs", - "crates-io.md", - "README.md", - "LICENSE-APACHE", - "LICENSE-MIT", -] description = "Macros 1.1 implementation of #[derive(Serialize, Deserialize)]" homepage = "https://serde.rs" documentation = "https://serde.rs/derive.html" @@ -35,11 +27,15 @@ keywords = [ "no_std", "derive", ] -categories = ["no-std"] +categories = [ + "no-std", + "no-std::no-alloc", +] license = "MIT OR Apache-2.0" repository = "https://github.com/serde-rs/serde" [package.metadata.docs.rs] +rustdoc-args = ["--generate-link-to-definition"] targets = ["x86_64-unknown-linux-gnu"] [lib] @@ -47,16 +43,28 @@ name = "serde_derive" proc-macro = true [dependencies.proc-macro2] -version = "1.0" +version = "1.0.74" +features = ["proc-macro"] +default-features = false [dependencies.quote] -version = "1.0" +version = "1.0.35" +features = ["proc-macro"] +default-features = false [dependencies.syn] -version = "2.0.3" +version = "2.0.46" +features = [ + "clone-impls", + "derive", + "parsing", + "printing", + "proc-macro", +] +default-features = false [dev-dependencies.serde] -version = "1.0" +version = "1" [features] default = [] diff --git a/vendor/serde_derive/README.md b/vendor/serde_derive/README.md index d53e572..3129294 100644 --- a/vendor/serde_derive/README.md +++ b/vendor/serde_derive/README.md @@ -1,12 +1,12 @@ -# Serde   [![Build Status]][actions] [![Latest Version]][crates.io] [![serde: rustc 1.19+]][Rust 1.19] [![serde_derive: rustc 1.56+]][Rust 1.56] +# Serde   [![Build Status]][actions] [![Latest Version]][crates.io] [![serde msrv]][Rust 1.31] [![serde_derive msrv]][Rust 1.56] [Build Status]: https://img.shields.io/github/actions/workflow/status/serde-rs/serde/ci.yml?branch=master [actions]: https://github.com/serde-rs/serde/actions?query=branch%3Amaster [Latest Version]: https://img.shields.io/crates/v/serde.svg [crates.io]: https://crates.io/crates/serde -[serde: rustc 1.19+]: https://img.shields.io/badge/serde-rustc_1.19+-lightgray.svg -[serde_derive: rustc 1.56+]: https://img.shields.io/badge/serde_derive-rustc_1.56+-lightgray.svg -[Rust 1.19]: https://blog.rust-lang.org/2017/07/20/Rust-1.19.html +[serde msrv]: https://img.shields.io/crates/msrv/serde.svg?label=serde%20msrv&color=lightgray +[serde_derive msrv]: https://img.shields.io/crates/msrv/serde_derive.svg?label=serde_derive%20msrv&color=lightgray +[Rust 1.31]: https://blog.rust-lang.org/2018/12/06/Rust-1.31-and-rust-2018.html [Rust 1.56]: https://blog.rust-lang.org/2021/10/21/Rust-1.56.0.html **Serde is a framework for *ser*ializing and *de*serializing Rust data structures efficiently and generically.** @@ -48,7 +48,7 @@ serde_json = "1.0" <p></p> ```rust -use serde::{Serialize, Deserialize}; +use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug)] struct Point { diff --git a/vendor/serde_derive/build.rs b/vendor/serde_derive/build.rs deleted file mode 100644 index 1249dab..0000000 --- a/vendor/serde_derive/build.rs +++ /dev/null @@ -1,38 +0,0 @@ -use std::env; -use std::process::Command; -use std::str; - -// The rustc-cfg strings below are *not* public API. Please let us know by -// opening a GitHub issue if your build environment requires some way to enable -// these cfgs other than by executing our build script. -fn main() { - println!("cargo:rerun-if-changed=build.rs"); - - let minor = match rustc_minor_version() { - Some(minor) => minor, - None => return, - }; - - // Underscore const names stabilized in Rust 1.37: - // https://blog.rust-lang.org/2019/08/15/Rust-1.37.0.html#using-unnamed-const-items-for-macros - if minor < 37 { - println!("cargo:rustc-cfg=no_underscore_consts"); - } - - // The ptr::addr_of! macro stabilized in Rust 1.51: - // https://blog.rust-lang.org/2021/03/25/Rust-1.51.0.html#stabilized-apis - if minor < 51 { - println!("cargo:rustc-cfg=no_ptr_addr_of"); - } -} - -fn rustc_minor_version() -> Option<u32> { - let rustc = env::var_os("RUSTC")?; - let output = Command::new(rustc).arg("--version").output().ok()?; - let version = str::from_utf8(&output.stdout).ok()?; - let mut pieces = version.split('.'); - if pieces.next() != Some("rustc 1") { - return None; - } - pieces.next()?.parse().ok() -} diff --git a/vendor/serde_derive/crates-io.md b/vendor/serde_derive/crates-io.md index 6e0ec28..1871003 100644 --- a/vendor/serde_derive/crates-io.md +++ b/vendor/serde_derive/crates-io.md @@ -16,7 +16,7 @@ You may be looking for: ## Serde in action ```rust -use serde::{Serialize, Deserialize}; +use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug)] struct Point { diff --git a/vendor/serde_derive/src/bound.rs b/vendor/serde_derive/src/bound.rs index 7bdb046..fe8ccff 100644 --- a/vendor/serde_derive/src/bound.rs +++ b/vendor/serde_derive/src/bound.rs @@ -1,12 +1,9 @@ +use crate::internals::ast::{Container, Data}; +use crate::internals::{attr, ungroup}; +use proc_macro2::Span; use std::collections::HashSet; - -use syn; use syn::punctuated::{Pair, Punctuated}; - -use internals::ast::{Container, Data}; -use internals::{attr, ungroup}; - -use proc_macro2::Span; +use syn::Token; // Remove the default from every type parameter because in the generated impls // they look like associated types: "error: associated type bindings are not @@ -147,6 +144,7 @@ pub fn with_bound( fn visit_type(&mut self, ty: &'ast syn::Type) { match ty { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] syn::Type::Array(ty) => self.visit_type(&ty.elem), syn::Type::BareFn(ty) => { for arg in &ty.inputs { @@ -184,7 +182,6 @@ pub fn with_bound( syn::Type::Infer(_) | syn::Type::Never(_) | syn::Type::Verbatim(_) => {} - #[cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] _ => {} } } @@ -199,16 +196,13 @@ pub fn with_bound( syn::PathArguments::AngleBracketed(arguments) => { for arg in &arguments.args { match arg { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] syn::GenericArgument::Type(arg) => self.visit_type(arg), syn::GenericArgument::AssocType(arg) => self.visit_type(&arg.ty), syn::GenericArgument::Lifetime(_) | syn::GenericArgument::Const(_) | syn::GenericArgument::AssocConst(_) | syn::GenericArgument::Constraint(_) => {} - #[cfg_attr( - all(test, exhaustive), - deny(non_exhaustive_omitted_patterns) - )] _ => {} } } @@ -231,9 +225,9 @@ pub fn with_bound( fn visit_type_param_bound(&mut self, bound: &'ast syn::TypeParamBound) { match bound { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] syn::TypeParamBound::Trait(bound) => self.visit_path(&bound.path), syn::TypeParamBound::Lifetime(_) | syn::TypeParamBound::Verbatim(_) => {} - #[cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] _ => {} } } @@ -259,7 +253,7 @@ pub fn with_bound( }; match &cont.data { Data::Enum(variants) => { - for variant in variants.iter() { + for variant in variants { let relevant_fields = variant .fields .iter() diff --git a/vendor/serde_derive/src/de.rs b/vendor/serde_derive/src/de.rs index 4d60d5a..e3b737c 100644 --- a/vendor/serde_derive/src/de.rs +++ b/vendor/serde_derive/src/de.rs @@ -1,23 +1,16 @@ +use crate::fragment::{Expr, Fragment, Match, Stmts}; +use crate::internals::ast::{Container, Data, Field, Style, Variant}; +use crate::internals::{attr, replace_receiver, ungroup, Ctxt, Derive}; +use crate::{bound, dummy, pretend, this}; use proc_macro2::{Literal, Span, TokenStream}; -use quote::ToTokens; -use syn::punctuated::Punctuated; -use syn::spanned::Spanned; -use syn::{self, Ident, Index, Member}; - -use bound; -use dummy; -use fragment::{Expr, Fragment, Match, Stmts}; -use internals::ast::{Container, Data, Field, Style, Variant}; -use internals::{attr, replace_receiver, ungroup, Ctxt, Derive}; -use pretend; -use this; - +use quote::{quote, quote_spanned, ToTokens}; use std::collections::BTreeSet; use std::ptr; +use syn::punctuated::Punctuated; +use syn::spanned::Spanned; +use syn::{parse_quote, Ident, Index, Member}; -pub fn expand_derive_deserialize( - input: &mut syn::DeriveInput, -) -> Result<TokenStream, Vec<syn::Error>> { +pub fn expand_derive_deserialize(input: &mut syn::DeriveInput) -> syn::Result<TokenStream> { replace_receiver(input); let ctxt = Ctxt::new(); @@ -69,8 +62,6 @@ pub fn expand_derive_deserialize( Ok(dummy::wrap_in_const( cont.attrs.custom_serde_path(), - "DESERIALIZE", - ident, impl_block, )) } @@ -291,10 +282,10 @@ fn deserialize_body(cont: &Container, params: &Parameters) -> Fragment { match &cont.data { Data::Enum(variants) => deserialize_enum(params, variants, &cont.attrs), Data::Struct(Style::Struct, fields) => { - deserialize_struct(None, params, fields, &cont.attrs, None, &Untagged::No) + deserialize_struct(params, fields, &cont.attrs, StructForm::Struct) } Data::Struct(Style::Tuple, fields) | Data::Struct(Style::Newtype, fields) => { - deserialize_tuple(None, params, fields, &cont.attrs, None) + deserialize_tuple(params, fields, &cont.attrs, TupleForm::Tuple) } Data::Struct(Style::Unit, _) => deserialize_unit_struct(params, &cont.attrs), } @@ -326,10 +317,10 @@ fn deserialize_in_place_body(cont: &Container, params: &Parameters) -> Option<St let code = match &cont.data { Data::Struct(Style::Struct, fields) => { - deserialize_struct_in_place(None, params, fields, &cont.attrs, None)? + deserialize_struct_in_place(params, fields, &cont.attrs)? } Data::Struct(Style::Tuple, fields) | Data::Struct(Style::Newtype, fields) => { - deserialize_tuple_in_place(None, params, fields, &cont.attrs, None) + deserialize_tuple_in_place(params, fields, &cont.attrs) } Data::Enum(_) | Data::Struct(Style::Unit, _) => { return None; @@ -414,16 +405,22 @@ fn deserialize_unit_struct(params: &Parameters, cattrs: &attr::Container) -> Fra let this_type = ¶ms.this_type; let this_value = ¶ms.this_value; let type_name = cattrs.name().deserialize_name(); + let (de_impl_generics, de_ty_generics, ty_generics, where_clause) = + split_with_de_lifetime(params); + let delife = params.borrowed.de_lifetime(); let expecting = format!("unit struct {}", params.type_name()); let expecting = cattrs.expecting().unwrap_or(&expecting); quote_block! { #[doc(hidden)] - struct __Visitor; + struct __Visitor #de_impl_generics #where_clause { + marker: _serde::__private::PhantomData<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData<&#delife ()>, + } - impl<'de> _serde::de::Visitor<'de> for __Visitor { - type Value = #this_type; + impl #de_impl_generics _serde::de::Visitor<#delife> for __Visitor #de_ty_generics #where_clause { + type Value = #this_type #ty_generics; fn expecting(&self, __formatter: &mut _serde::__private::Formatter) -> _serde::__private::fmt::Result { _serde::__private::Formatter::write_str(__formatter, #expecting) @@ -438,25 +435,45 @@ fn deserialize_unit_struct(params: &Parameters, cattrs: &attr::Container) -> Fra } } - _serde::Deserializer::deserialize_unit_struct(__deserializer, #type_name, __Visitor) + _serde::Deserializer::deserialize_unit_struct( + __deserializer, + #type_name, + __Visitor { + marker: _serde::__private::PhantomData::<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData, + }, + ) } } +enum TupleForm<'a> { + Tuple, + /// Contains a variant name + ExternallyTagged(&'a syn::Ident), + /// Contains a variant name and an intermediate deserializer from which actual + /// deserialization will be performed + Untagged(&'a syn::Ident, TokenStream), +} + fn deserialize_tuple( - variant_ident: Option<&syn::Ident>, params: &Parameters, fields: &[Field], cattrs: &attr::Container, - deserializer: Option<TokenStream>, + form: TupleForm, ) -> Fragment { + assert!(!cattrs.has_flatten()); + + let field_count = fields + .iter() + .filter(|field| !field.attrs.skip_deserializing()) + .count(); + let this_type = ¶ms.this_type; let this_value = ¶ms.this_value; let (de_impl_generics, de_ty_generics, ty_generics, where_clause) = split_with_de_lifetime(params); let delife = params.borrowed.de_lifetime(); - assert!(!cattrs.has_flatten()); - // If there are getters (implying private fields), construct the local type // and use an `Into` conversion to get the remote type. If there are no // getters then construct the target type directly. @@ -467,23 +484,27 @@ fn deserialize_tuple( quote!(#this_value) }; - let is_enum = variant_ident.is_some(); - let type_path = match variant_ident { - Some(variant_ident) => quote!(#construct::#variant_ident), - None => construct, + let type_path = match form { + TupleForm::Tuple => construct, + TupleForm::ExternallyTagged(variant_ident) | TupleForm::Untagged(variant_ident, _) => { + quote!(#construct::#variant_ident) + } }; - let expecting = match variant_ident { - Some(variant_ident) => format!("tuple variant {}::{}", params.type_name(), variant_ident), - None => format!("tuple struct {}", params.type_name()), + let expecting = match form { + TupleForm::Tuple => format!("tuple struct {}", params.type_name()), + TupleForm::ExternallyTagged(variant_ident) | TupleForm::Untagged(variant_ident, _) => { + format!("tuple variant {}::{}", params.type_name(), variant_ident) + } }; let expecting = cattrs.expecting().unwrap_or(&expecting); let nfields = fields.len(); - let visit_newtype_struct = if !is_enum && nfields == 1 { - Some(deserialize_newtype_struct(&type_path, params, &fields[0])) - } else { - None + let visit_newtype_struct = match form { + TupleForm::Tuple if nfields == 1 => { + Some(deserialize_newtype_struct(&type_path, params, &fields[0])) + } + _ => None, }; let visit_seq = Stmts(deserialize_seq( @@ -496,20 +517,28 @@ fn deserialize_tuple( lifetime: _serde::__private::PhantomData, } }; - let dispatch = if let Some(deserializer) = deserializer { - quote!(_serde::Deserializer::deserialize_tuple(#deserializer, #nfields, #visitor_expr)) - } else if is_enum { - quote!(_serde::de::VariantAccess::tuple_variant(__variant, #nfields, #visitor_expr)) - } else if nfields == 1 { - let type_name = cattrs.name().deserialize_name(); - quote!(_serde::Deserializer::deserialize_newtype_struct(__deserializer, #type_name, #visitor_expr)) - } else { - let type_name = cattrs.name().deserialize_name(); - quote!(_serde::Deserializer::deserialize_tuple_struct(__deserializer, #type_name, #nfields, #visitor_expr)) + let dispatch = match form { + TupleForm::Tuple if nfields == 1 => { + let type_name = cattrs.name().deserialize_name(); + quote! { + _serde::Deserializer::deserialize_newtype_struct(__deserializer, #type_name, #visitor_expr) + } + } + TupleForm::Tuple => { + let type_name = cattrs.name().deserialize_name(); + quote! { + _serde::Deserializer::deserialize_tuple_struct(__deserializer, #type_name, #field_count, #visitor_expr) + } + } + TupleForm::ExternallyTagged(_) => quote! { + _serde::de::VariantAccess::tuple_variant(__variant, #field_count, #visitor_expr) + }, + TupleForm::Untagged(_, deserializer) => quote! { + _serde::Deserializer::deserialize_tuple(#deserializer, #field_count, #visitor_expr) + }, }; - let all_skipped = fields.iter().all(|field| field.attrs.skip_deserializing()); - let visitor_var = if all_skipped { + let visitor_var = if field_count == 0 { quote!(_) } else { quote!(mut __seq) @@ -546,30 +575,41 @@ fn deserialize_tuple( #[cfg(feature = "deserialize_in_place")] fn deserialize_tuple_in_place( - variant_ident: Option<syn::Ident>, params: &Parameters, fields: &[Field], cattrs: &attr::Container, - deserializer: Option<TokenStream>, ) -> Fragment { + assert!(!cattrs.has_flatten()); + + let field_count = fields + .iter() + .filter(|field| !field.attrs.skip_deserializing()) + .count(); + let this_type = ¶ms.this_type; let (de_impl_generics, de_ty_generics, ty_generics, where_clause) = split_with_de_lifetime(params); let delife = params.borrowed.de_lifetime(); - assert!(!cattrs.has_flatten()); - - let is_enum = variant_ident.is_some(); - let expecting = match variant_ident { - Some(variant_ident) => format!("tuple variant {}::{}", params.type_name(), variant_ident), - None => format!("tuple struct {}", params.type_name()), - }; + let expecting = format!("tuple struct {}", params.type_name()); let expecting = cattrs.expecting().unwrap_or(&expecting); let nfields = fields.len(); - let visit_newtype_struct = if !is_enum && nfields == 1 { - Some(deserialize_newtype_struct_in_place(params, &fields[0])) + let visit_newtype_struct = if nfields == 1 { + // We do not generate deserialize_in_place if every field has a + // deserialize_with. + assert!(fields[0].attrs.deserialize_with().is_none()); + + Some(quote! { + #[inline] + fn visit_newtype_struct<__E>(self, __e: __E) -> _serde::__private::Result<Self::Value, __E::Error> + where + __E: _serde::Deserializer<#delife>, + { + _serde::Deserialize::deserialize_in_place(__e, &mut self.place.0) + } + }) } else { None }; @@ -583,20 +623,14 @@ fn deserialize_tuple_in_place( } }; - let dispatch = if let Some(deserializer) = deserializer { - quote!(_serde::Deserializer::deserialize_tuple(#deserializer, #nfields, #visitor_expr)) - } else if is_enum { - quote!(_serde::de::VariantAccess::tuple_variant(__variant, #nfields, #visitor_expr)) - } else if nfields == 1 { - let type_name = cattrs.name().deserialize_name(); + let type_name = cattrs.name().deserialize_name(); + let dispatch = if nfields == 1 { quote!(_serde::Deserializer::deserialize_newtype_struct(__deserializer, #type_name, #visitor_expr)) } else { - let type_name = cattrs.name().deserialize_name(); - quote!(_serde::Deserializer::deserialize_tuple_struct(__deserializer, #type_name, #nfields, #visitor_expr)) + quote!(_serde::Deserializer::deserialize_tuple_struct(__deserializer, #type_name, #field_count, #visitor_expr)) }; - let all_skipped = fields.iter().all(|field| field.attrs.skip_deserializing()); - let visitor_var = if all_skipped { + let visitor_var = if field_count == 0 { quote!(_) } else { quote!(mut __seq) @@ -670,31 +704,23 @@ fn deserialize_seq( let span = field.original.span(); let func = quote_spanned!(span=> _serde::de::SeqAccess::next_element::<#field_ty>); - quote!(try!(#func(&mut __seq))) + quote!(#func(&mut __seq)?) } Some(path) => { let (wrapper, wrapper_ty) = wrap_deserialize_field_with(params, field.ty, path); quote!({ #wrapper _serde::__private::Option::map( - try!(_serde::de::SeqAccess::next_element::<#wrapper_ty>(&mut __seq)), + _serde::de::SeqAccess::next_element::<#wrapper_ty>(&mut __seq)?, |__wrap| __wrap.value) }) } }; - let value_if_none = match field.attrs.default() { - attr::Default::Default => quote!(_serde::__private::Default::default()), - attr::Default::Path(path) => quote!(#path()), - attr::Default::None => quote!( - return _serde::__private::Err(_serde::de::Error::invalid_length(#index_in_seq, &#expecting)); - ), - }; + let value_if_none = expr_is_missing_seq(None, index_in_seq, field, cattrs, expecting); let assign = quote! { let #var = match #visit { _serde::__private::Some(__value) => __value, - _serde::__private::None => { - #value_if_none - } + _serde::__private::None => #value_if_none, }; }; index_in_seq += 1; @@ -770,24 +796,14 @@ fn deserialize_seq_in_place( self.place.#member = #default; } } else { - let value_if_none = match field.attrs.default() { - attr::Default::Default => quote!( - self.place.#member = _serde::__private::Default::default(); - ), - attr::Default::Path(path) => quote!( - self.place.#member = #path(); - ), - attr::Default::None => quote!( - return _serde::__private::Err(_serde::de::Error::invalid_length(#index_in_seq, &#expecting)); - ), - }; + let value_if_none = expr_is_missing_seq(Some(quote!(self.place.#member = )), index_in_seq, field, cattrs, expecting); let write = match field.attrs.deserialize_with() { None => { quote! { - if let _serde::__private::None = try!(_serde::de::SeqAccess::next_element_seed(&mut __seq, - _serde::__private::de::InPlaceSeed(&mut self.place.#member))) + if let _serde::__private::None = _serde::de::SeqAccess::next_element_seed(&mut __seq, + _serde::__private::de::InPlaceSeed(&mut self.place.#member))? { - #value_if_none + #value_if_none; } } } @@ -795,12 +811,12 @@ fn deserialize_seq_in_place( let (wrapper, wrapper_ty) = wrap_deserialize_field_with(params, field.ty, path); quote!({ #wrapper - match try!(_serde::de::SeqAccess::next_element::<#wrapper_ty>(&mut __seq)) { + match _serde::de::SeqAccess::next_element::<#wrapper_ty>(&mut __seq)? { _serde::__private::Some(__wrap) => { self.place.#member = __wrap.value; } _serde::__private::None => { - #value_if_none + #value_if_none; } } }) @@ -847,12 +863,12 @@ fn deserialize_newtype_struct( let span = field.original.span(); let func = quote_spanned!(span=> <#field_ty as _serde::Deserialize>::deserialize); quote! { - try!(#func(__e)) + #func(__e)? } } Some(path) => { quote! { - try!(#path(__e)) + #path(__e)? } } }; @@ -878,40 +894,24 @@ fn deserialize_newtype_struct( } } -#[cfg(feature = "deserialize_in_place")] -fn deserialize_newtype_struct_in_place(params: &Parameters, field: &Field) -> TokenStream { - // We do not generate deserialize_in_place if every field has a - // deserialize_with. - assert!(field.attrs.deserialize_with().is_none()); - - let delife = params.borrowed.de_lifetime(); - - quote! { - #[inline] - fn visit_newtype_struct<__E>(self, __e: __E) -> _serde::__private::Result<Self::Value, __E::Error> - where - __E: _serde::Deserializer<#delife>, - { - _serde::Deserialize::deserialize_in_place(__e, &mut self.place.0) - } - } -} - -enum Untagged { - Yes, - No, +enum StructForm<'a> { + Struct, + /// Contains a variant name + ExternallyTagged(&'a syn::Ident), + /// Contains a variant name and an intermediate deserializer from which actual + /// deserialization will be performed + InternallyTagged(&'a syn::Ident, TokenStream), + /// Contains a variant name and an intermediate deserializer from which actual + /// deserialization will be performed + Untagged(&'a syn::Ident, TokenStream), } fn deserialize_struct( - variant_ident: Option<&syn::Ident>, params: &Parameters, fields: &[Field], cattrs: &attr::Container, - deserializer: Option<TokenStream>, - untagged: &Untagged, + form: StructForm, ) -> Fragment { - let is_enum = variant_ident.is_some(); - let this_type = ¶ms.this_type; let this_value = ¶ms.this_value; let (de_impl_generics, de_ty_generics, ty_generics, where_clause) = @@ -928,83 +928,69 @@ fn deserialize_struct( quote!(#this_value) }; - let type_path = match variant_ident { - Some(variant_ident) => quote!(#construct::#variant_ident), - None => construct, + let type_path = match form { + StructForm::Struct => construct, + StructForm::ExternallyTagged(variant_ident) + | StructForm::InternallyTagged(variant_ident, _) + | StructForm::Untagged(variant_ident, _) => quote!(#construct::#variant_ident), }; - let expecting = match variant_ident { - Some(variant_ident) => format!("struct variant {}::{}", params.type_name(), variant_ident), - None => format!("struct {}", params.type_name()), - }; - let expecting = cattrs.expecting().unwrap_or(&expecting); - - let visit_seq = Stmts(deserialize_seq( - &type_path, params, fields, true, cattrs, expecting, - )); - - let (field_visitor, fields_stmt, visit_map) = if cattrs.has_flatten() { - deserialize_struct_as_map_visitor(&type_path, params, fields, cattrs) - } else { - deserialize_struct_as_struct_visitor(&type_path, params, fields, cattrs) - }; - let field_visitor = Stmts(field_visitor); - let fields_stmt = fields_stmt.map(Stmts); - let visit_map = Stmts(visit_map); - - let visitor_expr = quote! { - __Visitor { - marker: _serde::__private::PhantomData::<#this_type #ty_generics>, - lifetime: _serde::__private::PhantomData, - } - }; - let need_seed = deserializer.is_none(); - let dispatch = if let Some(deserializer) = deserializer { - quote! { - _serde::Deserializer::deserialize_any(#deserializer, #visitor_expr) - } - } else if is_enum && cattrs.has_flatten() { - quote! { - _serde::de::VariantAccess::newtype_variant_seed(__variant, #visitor_expr) - } - } else if is_enum { - quote! { - _serde::de::VariantAccess::struct_variant(__variant, FIELDS, #visitor_expr) - } - } else if cattrs.has_flatten() { - quote! { - _serde::Deserializer::deserialize_map(__deserializer, #visitor_expr) - } - } else { - let type_name = cattrs.name().deserialize_name(); - quote! { - _serde::Deserializer::deserialize_struct(__deserializer, #type_name, FIELDS, #visitor_expr) + let expecting = match form { + StructForm::Struct => format!("struct {}", params.type_name()), + StructForm::ExternallyTagged(variant_ident) + | StructForm::InternallyTagged(variant_ident, _) + | StructForm::Untagged(variant_ident, _) => { + format!("struct variant {}::{}", params.type_name(), variant_ident) } }; + let expecting = cattrs.expecting().unwrap_or(&expecting); - let all_skipped = fields.iter().all(|field| field.attrs.skip_deserializing()); - let visitor_var = if all_skipped { - quote!(_) - } else { - quote!(mut __seq) - }; + let field_names_idents: Vec<_> = fields + .iter() + .enumerate() + // Skip fields that shouldn't be deserialized or that were flattened, + // so they don't appear in the storage in their literal form + .filter(|&(_, field)| !field.attrs.skip_deserializing() && !field.attrs.flatten()) + .map(|(i, field)| { + ( + field.attrs.name().deserialize_name(), + field_i(i), + field.attrs.aliases(), + ) + }) + .collect(); + let field_visitor = deserialize_field_identifier(&field_names_idents, cattrs); // untagged struct variants do not get a visit_seq method. The same applies to // structs that only have a map representation. - let visit_seq = match *untagged { - Untagged::No if !cattrs.has_flatten() => Some(quote! { - #[inline] - fn visit_seq<__A>(self, #visitor_var: __A) -> _serde::__private::Result<Self::Value, __A::Error> - where - __A: _serde::de::SeqAccess<#delife>, - { - #visit_seq - } - }), - _ => None, + let visit_seq = match form { + StructForm::Untagged(..) => None, + _ if cattrs.has_flatten() => None, + _ => { + let mut_seq = if field_names_idents.is_empty() { + quote!(_) + } else { + quote!(mut __seq) + }; + + let visit_seq = Stmts(deserialize_seq( + &type_path, params, fields, true, cattrs, expecting, + )); + + Some(quote! { + #[inline] + fn visit_seq<__A>(self, #mut_seq: __A) -> _serde::__private::Result<Self::Value, __A::Error> + where + __A: _serde::de::SeqAccess<#delife>, + { + #visit_seq + } + }) + } }; + let visit_map = Stmts(deserialize_map(&type_path, params, fields, cattrs)); - let visitor_seed = if need_seed && is_enum && cattrs.has_flatten() { - Some(quote! { + let visitor_seed = match form { + StructForm::ExternallyTagged(..) if cattrs.has_flatten() => Some(quote! { impl #de_impl_generics _serde::de::DeserializeSeed<#delife> for __Visitor #de_ty_generics #where_clause { type Value = #this_type #ty_generics; @@ -1015,9 +1001,51 @@ fn deserialize_struct( _serde::Deserializer::deserialize_map(__deserializer, self) } } - }) - } else { + }), + _ => None, + }; + + let fields_stmt = if cattrs.has_flatten() { None + } else { + let field_names = field_names_idents + .iter() + .flat_map(|&(_, _, aliases)| aliases); + + Some(quote! { + #[doc(hidden)] + const FIELDS: &'static [&'static str] = &[ #(#field_names),* ]; + }) + }; + + let visitor_expr = quote! { + __Visitor { + marker: _serde::__private::PhantomData::<#this_type #ty_generics>, + lifetime: _serde::__private::PhantomData, + } + }; + let dispatch = match form { + StructForm::Struct if cattrs.has_flatten() => quote! { + _serde::Deserializer::deserialize_map(__deserializer, #visitor_expr) + }, + StructForm::Struct => { + let type_name = cattrs.name().deserialize_name(); + quote! { + _serde::Deserializer::deserialize_struct(__deserializer, #type_name, FIELDS, #visitor_expr) + } + } + StructForm::ExternallyTagged(_) if cattrs.has_flatten() => quote! { + _serde::de::VariantAccess::newtype_variant_seed(__variant, #visitor_expr) + }, + StructForm::ExternallyTagged(_) => quote! { + _serde::de::VariantAccess::struct_variant(__variant, FIELDS, #visitor_expr) + }, + StructForm::InternallyTagged(_, deserializer) => quote! { + _serde::Deserializer::deserialize_any(#deserializer, #visitor_expr) + }, + StructForm::Untagged(_, deserializer) => quote! { + _serde::Deserializer::deserialize_any(#deserializer, #visitor_expr) + }, }; quote_block! { @@ -1057,14 +1085,10 @@ fn deserialize_struct( #[cfg(feature = "deserialize_in_place")] fn deserialize_struct_in_place( - variant_ident: Option<syn::Ident>, params: &Parameters, fields: &[Field], cattrs: &attr::Container, - deserializer: Option<TokenStream>, ) -> Option<Fragment> { - let is_enum = variant_ident.is_some(); - // for now we do not support in_place deserialization for structs that // are represented as map. if cattrs.has_flatten() { @@ -1076,58 +1100,35 @@ fn deserialize_struct_in_place( split_with_de_lifetime(params); let delife = params.borrowed.de_lifetime(); - let expecting = match variant_ident { - Some(variant_ident) => format!("struct variant {}::{}", params.type_name(), variant_ident), - None => format!("struct {}", params.type_name()), - }; + let expecting = format!("struct {}", params.type_name()); let expecting = cattrs.expecting().unwrap_or(&expecting); - let visit_seq = Stmts(deserialize_seq_in_place(params, fields, cattrs, expecting)); - - let (field_visitor, fields_stmt, visit_map) = - deserialize_struct_as_struct_in_place_visitor(params, fields, cattrs); - - let field_visitor = Stmts(field_visitor); - let fields_stmt = Stmts(fields_stmt); - let visit_map = Stmts(visit_map); + let field_names_idents: Vec<_> = fields + .iter() + .enumerate() + .filter(|&(_, field)| !field.attrs.skip_deserializing()) + .map(|(i, field)| { + ( + field.attrs.name().deserialize_name(), + field_i(i), + field.attrs.aliases(), + ) + }) + .collect(); - let visitor_expr = quote! { - __Visitor { - place: __place, - lifetime: _serde::__private::PhantomData, - } - }; - let dispatch = if let Some(deserializer) = deserializer { - quote! { - _serde::Deserializer::deserialize_any(#deserializer, #visitor_expr) - } - } else if is_enum { - quote! { - _serde::de::VariantAccess::struct_variant(__variant, FIELDS, #visitor_expr) - } - } else { - let type_name = cattrs.name().deserialize_name(); - quote! { - _serde::Deserializer::deserialize_struct(__deserializer, #type_name, FIELDS, #visitor_expr) - } - }; + let field_visitor = deserialize_field_identifier(&field_names_idents, cattrs); - let all_skipped = fields.iter().all(|field| field.attrs.skip_deserializing()); - let visitor_var = if all_skipped { + let mut_seq = if field_names_idents.is_empty() { quote!(_) } else { quote!(mut __seq) }; - - let visit_seq = quote! { - #[inline] - fn visit_seq<__A>(self, #visitor_var: __A) -> _serde::__private::Result<Self::Value, __A::Error> - where - __A: _serde::de::SeqAccess<#delife>, - { - #visit_seq - } - }; + let visit_seq = Stmts(deserialize_seq_in_place(params, fields, cattrs, expecting)); + let visit_map = Stmts(deserialize_map_in_place(params, fields, cattrs)); + let field_names = field_names_idents + .iter() + .flat_map(|&(_, _, aliases)| aliases); + let type_name = cattrs.name().deserialize_name(); let in_place_impl_generics = de_impl_generics.in_place(); let in_place_ty_generics = de_ty_generics.in_place(); @@ -1149,7 +1150,13 @@ fn deserialize_struct_in_place( _serde::__private::Formatter::write_str(__formatter, #expecting) } - #visit_seq + #[inline] + fn visit_seq<__A>(self, #mut_seq: __A) -> _serde::__private::Result<Self::Value, __A::Error> + where + __A: _serde::de::SeqAccess<#delife>, + { + #visit_seq + } #[inline] fn visit_map<__A>(self, mut __map: __A) -> _serde::__private::Result<Self::Value, __A::Error> @@ -1160,9 +1167,13 @@ fn deserialize_struct_in_place( } } - #fields_stmt + #[doc(hidden)] + const FIELDS: &'static [&'static str] = &[ #(#field_names),* ]; - #dispatch + _serde::Deserializer::deserialize_struct(__deserializer, #type_name, FIELDS, __Visitor { + place: __place, + lifetime: _serde::__private::PhantomData, + }) }) } @@ -1170,6 +1181,22 @@ fn deserialize_enum( params: &Parameters, variants: &[Variant], cattrs: &attr::Container, +) -> Fragment { + // The variants have already been checked (in ast.rs) that all untagged variants appear at the end + match variants.iter().position(|var| var.attrs.untagged()) { + Some(variant_idx) => { + let (tagged, untagged) = variants.split_at(variant_idx); + let tagged_frag = Expr(deserialize_homogeneous_enum(params, tagged, cattrs)); + deserialize_untagged_enum_after(params, untagged, cattrs, Some(tagged_frag)) + } + None => deserialize_homogeneous_enum(params, variants, cattrs), + } +} + +fn deserialize_homogeneous_enum( + params: &Parameters, + variants: &[Variant], + cattrs: &attr::Container, ) -> Fragment { match cattrs.tag() { attr::TagType::External => deserialize_externally_tagged_enum(params, variants, cattrs), @@ -1203,7 +1230,12 @@ fn prepare_enum_variant_enum( }) .collect(); - let other_idx = deserialized_variants.position(|(_, variant)| variant.attrs.other()); + let fallthrough = deserialized_variants + .position(|(_, variant)| variant.attrs.other()) + .map(|other_idx| { + let ignore_variant = variant_names_idents[other_idx].1.clone(); + quote!(_serde::__private::Ok(__Field::#ignore_variant)) + }); let variants_stmt = { let variant_names = variant_names_idents.iter().map(|(name, _, _)| name); @@ -1217,7 +1249,8 @@ fn prepare_enum_variant_enum( &variant_names_idents, cattrs, true, - other_idx, + None, + fallthrough, )); (variants_stmt, variant_visitor) @@ -1272,7 +1305,7 @@ fn deserialize_externally_tagged_enum( } } else { quote! { - match try!(_serde::de::EnumAccess::variant(__data)) { + match _serde::de::EnumAccess::variant(__data)? { #(#variant_arms)* } } @@ -1336,9 +1369,7 @@ fn deserialize_internally_tagged_enum( params, variant, cattrs, - quote! { - _serde::__private::de::ContentDeserializer::<__D::Error>::new(__tagged.content) - }, + quote!(__deserializer), )); quote! { @@ -1354,11 +1385,12 @@ fn deserialize_internally_tagged_enum( #variants_stmt - let __tagged = try!(_serde::Deserializer::deserialize_any( + let (__tag, __content) = _serde::Deserializer::deserialize_any( __deserializer, - _serde::__private::de::TaggedContentVisitor::<__Field>::new(#tag, #expecting))); + _serde::__private::de::TaggedContentVisitor::<__Field>::new(#tag, #expecting))?; + let __deserializer = _serde::__private::de::ContentDeserializer::<__D::Error>::new(__content); - match __tagged.tag { + match __tag { #(#variant_arms)* } } @@ -1399,7 +1431,8 @@ fn deserialize_adjacently_tagged_enum( }) .collect(); - let expecting = format!("adjacently tagged enum {}", params.type_name()); + let rust_name = params.type_name(); + let expecting = format!("adjacently tagged enum {}", rust_name); let expecting = cattrs.expecting().unwrap_or(&expecting); let type_name = cattrs.name().deserialize_name(); let deny_unknown_fields = cattrs.deny_unknown_fields(); @@ -1419,6 +1452,14 @@ fn deserialize_adjacently_tagged_enum( } }; + let variant_seed = quote! { + _serde::__private::de::AdjacentlyTaggedEnumVariantSeed::<__Field> { + enum_name: #rust_name, + variants: VARIANTS, + fields_enum: _serde::__private::PhantomData + } + }; + let mut missing_content = quote! { _serde::__private::Err(<__A::Error as _serde::de::Error>::missing_field(#content)) }; @@ -1463,7 +1504,11 @@ fn deserialize_adjacently_tagged_enum( // Advance the map by one key, returning early in case of error. let next_key = quote! { - try!(_serde::de::MapAccess::next_key_seed(&mut __map, #tag_or_content)) + _serde::de::MapAccess::next_key_seed(&mut __map, #tag_or_content)? + }; + + let variant_from_map = quote! { + _serde::de::MapAccess::next_value_seed(&mut __map, #variant_seed)? }; // When allowing unknown fields, we want to transparently step through keys @@ -1476,7 +1521,7 @@ fn deserialize_adjacently_tagged_enum( while let _serde::__private::Some(__k) = #next_key { match __k { _serde::__private::de::TagContentOtherField::Other => { - let _ = try!(_serde::de::MapAccess::next_value::<_serde::de::IgnoredAny>(&mut __map)); + let _ = _serde::de::MapAccess::next_value::<_serde::de::IgnoredAny>(&mut __map)?; continue; }, _serde::__private::de::TagContentOtherField::Tag => { @@ -1511,14 +1556,14 @@ fn deserialize_adjacently_tagged_enum( let finish_content_then_tag = if variant_arms.is_empty() { quote! { - match try!(_serde::de::MapAccess::next_value::<__Field>(&mut __map)) {} + match #variant_from_map {} } } else { quote! { - let __ret = try!(match try!(_serde::de::MapAccess::next_value(&mut __map)) { + let __ret = match #variant_from_map { // Deserialize the buffered content now that we know the variant. #(#variant_arms)* - }); + }?; // Visit remaining keys, looking for duplicates. #visit_remaining_keys } @@ -1571,7 +1616,7 @@ fn deserialize_adjacently_tagged_enum( // First key is the tag. _serde::__private::Some(_serde::__private::de::TagOrContentField::Tag) => { // Parse the tag. - let __field = try!(_serde::de::MapAccess::next_value(&mut __map)); + let __field = #variant_from_map; // Visit the second key. match #next_relevant_key { // Second key is a duplicate of the tag. @@ -1580,12 +1625,12 @@ fn deserialize_adjacently_tagged_enum( } // Second key is the content. _serde::__private::Some(_serde::__private::de::TagOrContentField::Content) => { - let __ret = try!(_serde::de::MapAccess::next_value_seed(&mut __map, + let __ret = _serde::de::MapAccess::next_value_seed(&mut __map, __Seed { field: __field, marker: _serde::__private::PhantomData, lifetime: _serde::__private::PhantomData, - })); + })?; // Visit remaining keys, looking for duplicates. #visit_remaining_keys } @@ -1596,7 +1641,7 @@ fn deserialize_adjacently_tagged_enum( // First key is the content. _serde::__private::Some(_serde::__private::de::TagOrContentField::Content) => { // Buffer up the content. - let __content = try!(_serde::de::MapAccess::next_value::<_serde::__private::de::Content>(&mut __map)); + let __content = _serde::de::MapAccess::next_value::<_serde::__private::de::Content>(&mut __map)?; // Visit the second key. match #next_relevant_key { // Second key is the tag. @@ -1626,17 +1671,17 @@ fn deserialize_adjacently_tagged_enum( __A: _serde::de::SeqAccess<#delife>, { // Visit the first element - the tag. - match try!(_serde::de::SeqAccess::next_element(&mut __seq)) { + match _serde::de::SeqAccess::next_element(&mut __seq)? { _serde::__private::Some(__field) => { // Visit the second element - the content. - match try!(_serde::de::SeqAccess::next_element_seed( + match _serde::de::SeqAccess::next_element_seed( &mut __seq, __Seed { field: __field, marker: _serde::__private::PhantomData, lifetime: _serde::__private::PhantomData, }, - )) { + )? { _serde::__private::Some(__ret) => _serde::__private::Ok(__ret), // There is no second element. _serde::__private::None => { @@ -1670,6 +1715,16 @@ fn deserialize_untagged_enum( params: &Parameters, variants: &[Variant], cattrs: &attr::Container, +) -> Fragment { + let first_attempt = None; + deserialize_untagged_enum_after(params, variants, cattrs, first_attempt) +} + +fn deserialize_untagged_enum_after( + params: &Parameters, + variants: &[Variant], + cattrs: &attr::Container, + first_attempt: Option<Expr>, ) -> Fragment { let attempts = variants .iter() @@ -1679,12 +1734,9 @@ fn deserialize_untagged_enum( params, variant, cattrs, - quote!( - _serde::__private::de::ContentRefDeserializer::<__D::Error>::new(&__content) - ), + quote!(__deserializer), )) }); - // TODO this message could be better by saving the errors from the failed // attempts. The heuristic used by TOML was to count the number of fields // processed before an error, and use the error that happened after the @@ -1697,8 +1749,22 @@ fn deserialize_untagged_enum( ); let fallthrough_msg = cattrs.expecting().unwrap_or(&fallthrough_msg); + // Ignore any error associated with non-untagged deserialization so that we + // can fall through to the untagged variants. This may be infallible so we + // need to provide the error type. + let first_attempt = first_attempt.map(|expr| { + quote! { + if let _serde::__private::Result::<_, __D::Error>::Ok(__ok) = (|| #expr)() { + return _serde::__private::Ok(__ok); + } + } + }); + quote_block! { - let __content = try!(<_serde::__private::de::Content as _serde::Deserialize>::deserialize(__deserializer)); + let __content = <_serde::__private::de::Content as _serde::Deserialize>::deserialize(__deserializer)?; + let __deserializer = _serde::__private::de::ContentRefDeserializer::<__D::Error>::new(&__content); + + #first_attempt #( if let _serde::__private::Ok(__ok) = #attempts { @@ -1730,7 +1796,7 @@ fn deserialize_externally_tagged_variant( Style::Unit => { let this_value = ¶ms.this_value; quote_block! { - try!(_serde::de::VariantAccess::unit_variant(__variant)); + _serde::de::VariantAccess::unit_variant(__variant)?; _serde::__private::Ok(#this_value::#variant_ident) } } @@ -1740,16 +1806,17 @@ fn deserialize_externally_tagged_variant( &variant.fields[0], cattrs, ), - Style::Tuple => { - deserialize_tuple(Some(variant_ident), params, &variant.fields, cattrs, None) - } + Style::Tuple => deserialize_tuple( + params, + &variant.fields, + cattrs, + TupleForm::ExternallyTagged(variant_ident), + ), Style::Struct => deserialize_struct( - Some(variant_ident), params, &variant.fields, cattrs, - None, - &Untagged::No, + StructForm::ExternallyTagged(variant_ident), ), } } @@ -1773,12 +1840,12 @@ fn deserialize_internally_tagged_variant( let this_value = ¶ms.this_value; let type_name = params.type_name(); let variant_name = variant.ident.to_string(); - let default = variant.fields.get(0).map(|field| { + let default = variant.fields.first().map(|field| { let default = Expr(expr_is_missing(field, cattrs)); quote!((#default)) }); quote_block! { - try!(_serde::Deserializer::deserialize_any(#deserializer, _serde::__private::de::InternallyTaggedUnitVisitor::new(#type_name, #variant_name))); + _serde::Deserializer::deserialize_any(#deserializer, _serde::__private::de::InternallyTaggedUnitVisitor::new(#type_name, #variant_name))?; _serde::__private::Ok(#this_value::#variant_ident #default) } } @@ -1789,12 +1856,10 @@ fn deserialize_internally_tagged_variant( &deserializer, ), Style::Struct => deserialize_struct( - Some(variant_ident), params, &variant.fields, cattrs, - Some(deserializer), - &Untagged::No, + StructForm::InternallyTagged(variant_ident, deserializer), ), Style::Tuple => unreachable!("checked in serde_derive_internals"), } @@ -1820,7 +1885,7 @@ fn deserialize_untagged_variant( let this_value = ¶ms.this_value; let type_name = params.type_name(); let variant_name = variant.ident.to_string(); - let default = variant.fields.get(0).map(|field| { + let default = variant.fields.first().map(|field| { let default = Expr(expr_is_missing(field, cattrs)); quote!((#default)) }); @@ -1841,19 +1906,16 @@ fn deserialize_untagged_variant( &deserializer, ), Style::Tuple => deserialize_tuple( - Some(variant_ident), params, &variant.fields, cattrs, - Some(deserializer), + TupleForm::Untagged(variant_ident, deserializer), ), Style::Struct => deserialize_struct( - Some(variant_ident), params, &variant.fields, cattrs, - Some(deserializer), - &Untagged::Yes, + StructForm::Untagged(variant_ident, deserializer), ), } } @@ -1869,7 +1931,7 @@ fn deserialize_externally_tagged_newtype_variant( if field.attrs.skip_deserializing() { let default = Expr(expr_is_missing(field, cattrs)); return quote_block! { - try!(_serde::de::VariantAccess::unit_variant(__variant)); + _serde::de::VariantAccess::unit_variant(__variant)?; _serde::__private::Ok(#this_value::#variant_ident(#default)) }; } @@ -1922,30 +1984,15 @@ fn deserialize_untagged_newtype_variant( } fn deserialize_generated_identifier( - fields: &[(String, Ident, Vec<String>)], + fields: &[(&str, Ident, &BTreeSet<String>)], cattrs: &attr::Container, is_variant: bool, - other_idx: Option<usize>, + ignore_variant: Option<TokenStream>, + fallthrough: Option<TokenStream>, ) -> Fragment { let this_value = quote!(__Field); let field_idents: &Vec<_> = &fields.iter().map(|(_, ident, _)| ident).collect(); - let (ignore_variant, fallthrough) = if !is_variant && cattrs.has_flatten() { - let ignore_variant = quote!(__other(_serde::__private::de::Content<'de>),); - let fallthrough = quote!(_serde::__private::Ok(__Field::__other(__value))); - (Some(ignore_variant), Some(fallthrough)) - } else if let Some(other_idx) = other_idx { - let ignore_variant = fields[other_idx].1.clone(); - let fallthrough = quote!(_serde::__private::Ok(__Field::#ignore_variant)); - (None, Some(fallthrough)) - } else if is_variant || cattrs.deny_unknown_fields() { - (None, None) - } else { - let ignore_variant = quote!(__ignore,); - let fallthrough = quote!(_serde::__private::Ok(__Field::__ignore)); - (Some(ignore_variant), Some(fallthrough)) - }; - let visitor_impl = Stmts(deserialize_identifier( &this_value, fields, @@ -1991,6 +2038,33 @@ fn deserialize_generated_identifier( } } +/// Generates enum and its `Deserialize` implementation that represents each +/// non-skipped field of the struct +fn deserialize_field_identifier( + fields: &[(&str, Ident, &BTreeSet<String>)], + cattrs: &attr::Container, +) -> Stmts { + let (ignore_variant, fallthrough) = if cattrs.has_flatten() { + let ignore_variant = quote!(__other(_serde::__private::de::Content<'de>),); + let fallthrough = quote!(_serde::__private::Ok(__Field::__other(__value))); + (Some(ignore_variant), Some(fallthrough)) + } else if cattrs.deny_unknown_fields() { + (None, None) + } else { + let ignore_variant = quote!(__ignore,); + let fallthrough = quote!(_serde::__private::Ok(__Field::__ignore)); + (Some(ignore_variant), Some(fallthrough)) + }; + + Stmts(deserialize_generated_identifier( + fields, + cattrs, + false, + ignore_variant, + fallthrough, + )) +} + // Generates `Deserialize::deserialize` body for an enum with // `serde(field_identifier)` or `serde(variant_identifier)` attribute. fn deserialize_custom_identifier( @@ -2052,7 +2126,7 @@ fn deserialize_custom_identifier( }) .collect(); - let names = names_idents.iter().map(|(name, _, _)| name); + let names = names_idents.iter().flat_map(|&(_, _, aliases)| aliases); let names_const = if fallthrough.is_some() { None @@ -2108,32 +2182,24 @@ fn deserialize_custom_identifier( fn deserialize_identifier( this_value: &TokenStream, - fields: &[(String, Ident, Vec<String>)], + fields: &[(&str, Ident, &BTreeSet<String>)], is_variant: bool, fallthrough: Option<TokenStream>, fallthrough_borrowed: Option<TokenStream>, collect_other_fields: bool, expecting: Option<&str>, ) -> Fragment { - let mut flat_fields = Vec::new(); - for (_, ident, aliases) in fields { - flat_fields.extend(aliases.iter().map(|alias| (alias, ident))); - } - - let field_strs: &Vec<_> = &flat_fields.iter().map(|(name, _)| name).collect(); - let field_bytes: &Vec<_> = &flat_fields - .iter() - .map(|(name, _)| Literal::byte_string(name.as_bytes())) - .collect(); - - let constructors: &Vec<_> = &flat_fields - .iter() - .map(|(_, ident)| quote!(#this_value::#ident)) - .collect(); - let main_constructors: &Vec<_> = &fields - .iter() - .map(|(_, ident, _)| quote!(#this_value::#ident)) - .collect(); + let str_mapping = fields.iter().map(|(_, ident, aliases)| { + // `aliases` also contains a main name + quote!(#(#aliases)|* => _serde::__private::Ok(#this_value::#ident)) + }); + let bytes_mapping = fields.iter().map(|(_, ident, aliases)| { + // `aliases` also contains a main name + let aliases = aliases + .iter() + .map(|alias| Literal::byte_string(alias.as_bytes())); + quote!(#(#aliases)|* => _serde::__private::Ok(#this_value::#ident)) + }); let expecting = expecting.unwrap_or(if is_variant { "variant identifier" @@ -2141,8 +2207,6 @@ fn deserialize_identifier( "field identifier" }); - let index_expecting = if is_variant { "variant" } else { "field" }; - let bytes_to_str = if fallthrough.is_some() || collect_other_fields { None } else { @@ -2190,21 +2254,6 @@ fn deserialize_identifier( &fallthrough_arm_tokens }; - let u64_fallthrough_arm_tokens; - let u64_fallthrough_arm = if let Some(fallthrough) = &fallthrough { - fallthrough - } else { - let fallthrough_msg = format!("{} index 0 <= i < {}", index_expecting, fields.len()); - u64_fallthrough_arm_tokens = quote! { - _serde::__private::Err(_serde::de::Error::invalid_value( - _serde::de::Unexpected::Unsigned(__value), - &#fallthrough_msg, - )) - }; - &u64_fallthrough_arm_tokens - }; - - let variant_indices = 0_u64..; let visit_other = if collect_other_fields { quote! { fn visit_bool<__E>(self, __value: bool) -> _serde::__private::Result<Self::Value, __E> @@ -2299,15 +2348,33 @@ fn deserialize_identifier( } } } else { + let u64_mapping = fields.iter().enumerate().map(|(i, (_, ident, _))| { + let i = i as u64; + quote!(#i => _serde::__private::Ok(#this_value::#ident)) + }); + + let u64_fallthrough_arm_tokens; + let u64_fallthrough_arm = if let Some(fallthrough) = &fallthrough { + fallthrough + } else { + let index_expecting = if is_variant { "variant" } else { "field" }; + let fallthrough_msg = format!("{} index 0 <= i < {}", index_expecting, fields.len()); + u64_fallthrough_arm_tokens = quote! { + _serde::__private::Err(_serde::de::Error::invalid_value( + _serde::de::Unexpected::Unsigned(__value), + &#fallthrough_msg, + )) + }; + &u64_fallthrough_arm_tokens + }; + quote! { fn visit_u64<__E>(self, __value: u64) -> _serde::__private::Result<Self::Value, __E> where __E: _serde::de::Error, { match __value { - #( - #variant_indices => _serde::__private::Ok(#main_constructors), - )* + #(#u64_mapping,)* _ => #u64_fallthrough_arm, } } @@ -2315,6 +2382,8 @@ fn deserialize_identifier( }; let visit_borrowed = if fallthrough_borrowed.is_some() || collect_other_fields { + let str_mapping = str_mapping.clone(); + let bytes_mapping = bytes_mapping.clone(); let fallthrough_borrowed_arm = fallthrough_borrowed.as_ref().unwrap_or(fallthrough_arm); Some(quote! { fn visit_borrowed_str<__E>(self, __value: &'de str) -> _serde::__private::Result<Self::Value, __E> @@ -2322,9 +2391,7 @@ fn deserialize_identifier( __E: _serde::de::Error, { match __value { - #( - #field_strs => _serde::__private::Ok(#constructors), - )* + #(#str_mapping,)* _ => { #value_as_borrowed_str_content #fallthrough_borrowed_arm @@ -2337,9 +2404,7 @@ fn deserialize_identifier( __E: _serde::de::Error, { match __value { - #( - #field_bytes => _serde::__private::Ok(#constructors), - )* + #(#bytes_mapping,)* _ => { #bytes_to_str #value_as_borrowed_bytes_content @@ -2364,9 +2429,7 @@ fn deserialize_identifier( __E: _serde::de::Error, { match __value { - #( - #field_strs => _serde::__private::Ok(#constructors), - )* + #(#str_mapping,)* _ => { #value_as_str_content #fallthrough_arm @@ -2379,9 +2442,7 @@ fn deserialize_identifier( __E: _serde::de::Error, { match __value { - #( - #field_bytes => _serde::__private::Ok(#constructors), - )* + #(#bytes_mapping,)* _ => { #bytes_to_str #value_as_bytes_content @@ -2394,71 +2455,6 @@ fn deserialize_identifier( } } -fn deserialize_struct_as_struct_visitor( - struct_path: &TokenStream, - params: &Parameters, - fields: &[Field], - cattrs: &attr::Container, -) -> (Fragment, Option<Fragment>, Fragment) { - assert!(!cattrs.has_flatten()); - - let field_names_idents: Vec<_> = fields - .iter() - .enumerate() - .filter(|&(_, field)| !field.attrs.skip_deserializing()) - .map(|(i, field)| { - ( - field.attrs.name().deserialize_name(), - field_i(i), - field.attrs.aliases(), - ) - }) - .collect(); - - let fields_stmt = { - let field_names = field_names_idents - .iter() - .flat_map(|(_, _, aliases)| aliases); - - quote_block! { - #[doc(hidden)] - const FIELDS: &'static [&'static str] = &[ #(#field_names),* ]; - } - }; - - let field_visitor = deserialize_generated_identifier(&field_names_idents, cattrs, false, None); - - let visit_map = deserialize_map(struct_path, params, fields, cattrs); - - (field_visitor, Some(fields_stmt), visit_map) -} - -fn deserialize_struct_as_map_visitor( - struct_path: &TokenStream, - params: &Parameters, - fields: &[Field], - cattrs: &attr::Container, -) -> (Fragment, Option<Fragment>, Fragment) { - let field_names_idents: Vec<_> = fields - .iter() - .enumerate() - .filter(|&(_, field)| !field.attrs.skip_deserializing() && !field.attrs.flatten()) - .map(|(i, field)| { - ( - field.attrs.name().deserialize_name(), - field_i(i), - field.attrs.aliases(), - ) - }) - .collect(); - - let field_visitor = deserialize_generated_identifier(&field_names_idents, cattrs, false, None); - - let visit_map = deserialize_map(struct_path, params, fields, cattrs); - - (field_visitor, None, visit_map) -} - fn deserialize_map( struct_path: &TokenStream, params: &Parameters, @@ -2509,7 +2505,7 @@ fn deserialize_map( let func = quote_spanned!(span=> _serde::de::MapAccess::next_value::<#field_ty>); quote! { - try!(#func(&mut __map)) + #func(&mut __map)? } } Some(path) => { @@ -2541,14 +2537,14 @@ fn deserialize_map( __Field::__other(__name) => { __collect.push(_serde::__private::Some(( __name, - try!(_serde::de::MapAccess::next_value(&mut __map))))); + _serde::de::MapAccess::next_value(&mut __map)?))); } }) } else if cattrs.deny_unknown_fields() { None } else { Some(quote! { - _ => { let _ = try!(_serde::de::MapAccess::next_value::<_serde::de::IgnoredAny>(&mut __map)); } + _ => { let _ = _serde::de::MapAccess::next_value::<_serde::de::IgnoredAny>(&mut __map)?; } }) }; @@ -2556,14 +2552,14 @@ fn deserialize_map( let match_keys = if cattrs.deny_unknown_fields() && all_skipped { quote! { // FIXME: Once feature(exhaustive_patterns) is stable: - // let _serde::__private::None::<__Field> = try!(_serde::de::MapAccess::next_key(&mut __map)); + // let _serde::__private::None::<__Field> = _serde::de::MapAccess::next_key(&mut __map)?; _serde::__private::Option::map( - try!(_serde::de::MapAccess::next_key::<__Field>(&mut __map)), + _serde::de::MapAccess::next_key::<__Field>(&mut __map)?, |__impossible| match __impossible {}); } } else { quote! { - while let _serde::__private::Some(__key) = try!(_serde::de::MapAccess::next_key::<__Field>(&mut __map)) { + while let _serde::__private::Some(__key) = _serde::de::MapAccess::next_key::<__Field>(&mut __map)? { match __key { #(#value_arms)* #ignored_arm @@ -2599,10 +2595,10 @@ fn deserialize_map( Some(path) => quote!(#path), }; quote! { - let #name: #field_ty = try!(#func( + let #name: #field_ty = #func( _serde::__private::de::FlatMapDeserializer( &mut __collect, - _serde::__private::PhantomData))); + _serde::__private::PhantomData))?; } }); @@ -2676,42 +2672,6 @@ fn deserialize_map( } } -#[cfg(feature = "deserialize_in_place")] -fn deserialize_struct_as_struct_in_place_visitor( - params: &Parameters, - fields: &[Field], - cattrs: &attr::Container, -) -> (Fragment, Fragment, Fragment) { - assert!(!cattrs.has_flatten()); - - let field_names_idents: Vec<_> = fields - .iter() - .enumerate() - .filter(|&(_, field)| !field.attrs.skip_deserializing()) - .map(|(i, field)| { - ( - field.attrs.name().deserialize_name(), - field_i(i), - field.attrs.aliases(), - ) - }) - .collect(); - - let fields_stmt = { - let field_names = field_names_idents.iter().map(|(name, _, _)| name); - quote_block! { - #[doc(hidden)] - const FIELDS: &'static [&'static str] = &[ #(#field_names),* ]; - } - }; - - let field_visitor = deserialize_generated_identifier(&field_names_idents, cattrs, false, None); - - let visit_map = deserialize_map_in_place(params, fields, cattrs); - - (field_visitor, fields_stmt, visit_map) -} - #[cfg(feature = "deserialize_in_place")] fn deserialize_map_in_place( params: &Parameters, @@ -2749,7 +2709,7 @@ fn deserialize_map_in_place( let visit = match field.attrs.deserialize_with() { None => { quote! { - try!(_serde::de::MapAccess::next_value_seed(&mut __map, _serde::__private::de::InPlaceSeed(&mut self.place.#member))) + _serde::de::MapAccess::next_value_seed(&mut __map, _serde::__private::de::InPlaceSeed(&mut self.place.#member))? } } Some(path) => { @@ -2781,7 +2741,7 @@ fn deserialize_map_in_place( None } else { Some(quote! { - _ => { let _ = try!(_serde::de::MapAccess::next_value::<_serde::de::IgnoredAny>(&mut __map)); } + _ => { let _ = _serde::de::MapAccess::next_value::<_serde::de::IgnoredAny>(&mut __map)?; } }) }; @@ -2790,14 +2750,14 @@ fn deserialize_map_in_place( let match_keys = if cattrs.deny_unknown_fields() && all_skipped { quote! { // FIXME: Once feature(exhaustive_patterns) is stable: - // let _serde::__private::None::<__Field> = try!(_serde::de::MapAccess::next_key(&mut __map)); + // let _serde::__private::None::<__Field> = _serde::de::MapAccess::next_key(&mut __map)?; _serde::__private::Option::map( - try!(_serde::de::MapAccess::next_key::<__Field>(&mut __map)), + _serde::de::MapAccess::next_key::<__Field>(&mut __map)?, |__impossible| match __impossible {}); } } else { quote! { - while let _serde::__private::Some(__key) = try!(_serde::de::MapAccess::next_key::<__Field>(&mut __map)) { + while let _serde::__private::Some(__key) = _serde::de::MapAccess::next_key::<__Field>(&mut __map)? { match __key { #(#value_arms_from)* #ignored_arm @@ -2894,7 +2854,7 @@ fn wrap_deserialize_with( __D: _serde::Deserializer<#delife>, { _serde::__private::Ok(__DeserializeWith { - value: try!(#deserialize_with(__deserializer)), + value: #deserialize_with(__deserializer)?, phantom: _serde::__private::PhantomData, lifetime: _serde::__private::PhantomData, }) @@ -3004,7 +2964,7 @@ fn expr_is_missing(field: &Field, cattrs: &attr::Container) -> Fragment { let span = field.original.span(); let func = quote_spanned!(span=> _serde::__private::de::missing_field); quote_expr! { - try!(#func(#name)) + #func(#name)? } } Some(_) => { @@ -3015,6 +2975,35 @@ fn expr_is_missing(field: &Field, cattrs: &attr::Container) -> Fragment { } } +fn expr_is_missing_seq( + assign_to: Option<TokenStream>, + index: usize, + field: &Field, + cattrs: &attr::Container, + expecting: &str, +) -> TokenStream { + match field.attrs.default() { + attr::Default::Default => { + let span = field.original.span(); + return quote_spanned!(span=> #assign_to _serde::__private::Default::default()); + } + attr::Default::Path(path) => { + return quote_spanned!(path.span()=> #assign_to #path()); + } + attr::Default::None => { /* below */ } + } + + match *cattrs.default() { + attr::Default::Default | attr::Default::Path(_) => { + let member = &field.member; + quote!(#assign_to __default.#member) + } + attr::Default::None => quote!( + return _serde::__private::Err(_serde::de::Error::invalid_length(#index, &#expecting)) + ), + } +} + fn effective_style(variant: &Variant) -> Style { match variant.style { Style::Newtype if variant.fields[0].attrs.skip_deserializing() => Style::Unit, @@ -3086,23 +3075,31 @@ struct DeTypeGenerics<'a>(&'a Parameters); #[cfg(feature = "deserialize_in_place")] struct InPlaceTypeGenerics<'a>(&'a Parameters); +fn de_type_generics_to_tokens( + mut generics: syn::Generics, + borrowed: &BorrowedLifetimes, + tokens: &mut TokenStream, +) { + if borrowed.de_lifetime_param().is_some() { + let def = syn::LifetimeParam { + attrs: Vec::new(), + lifetime: syn::Lifetime::new("'de", Span::call_site()), + colon_token: None, + bounds: Punctuated::new(), + }; + // Prepend 'de lifetime to list of generics + generics.params = Some(syn::GenericParam::Lifetime(def)) + .into_iter() + .chain(generics.params) + .collect(); + } + let (_, ty_generics, _) = generics.split_for_impl(); + ty_generics.to_tokens(tokens); +} + impl<'a> ToTokens for DeTypeGenerics<'a> { fn to_tokens(&self, tokens: &mut TokenStream) { - let mut generics = self.0.generics.clone(); - if self.0.borrowed.de_lifetime_param().is_some() { - let def = syn::LifetimeParam { - attrs: Vec::new(), - lifetime: syn::Lifetime::new("'de", Span::call_site()), - colon_token: None, - bounds: Punctuated::new(), - }; - generics.params = Some(syn::GenericParam::Lifetime(def)) - .into_iter() - .chain(generics.params) - .collect(); - } - let (_, ty_generics, _) = generics.split_for_impl(); - ty_generics.to_tokens(tokens); + de_type_generics_to_tokens(self.0.generics.clone(), &self.0.borrowed, tokens); } } @@ -3115,20 +3112,7 @@ impl<'a> ToTokens for InPlaceTypeGenerics<'a> { .chain(generics.params) .collect(); - if self.0.borrowed.de_lifetime_param().is_some() { - let def = syn::LifetimeParam { - attrs: Vec::new(), - lifetime: syn::Lifetime::new("'de", Span::call_site()), - colon_token: None, - bounds: Punctuated::new(), - }; - generics.params = Some(syn::GenericParam::Lifetime(def)) - .into_iter() - .chain(generics.params) - .collect(); - } - let (_, ty_generics, _) = generics.split_for_impl(); - ty_generics.to_tokens(tokens); + de_type_generics_to_tokens(generics, &self.0.borrowed, tokens); } } diff --git a/vendor/serde_derive/src/dummy.rs b/vendor/serde_derive/src/dummy.rs index 2be5027..095f950 100644 --- a/vendor/serde_derive/src/dummy.rs +++ b/vendor/serde_derive/src/dummy.rs @@ -1,23 +1,7 @@ -use proc_macro2::{Ident, TokenStream}; -use quote::format_ident; - -use syn; -use try; - -pub fn wrap_in_const( - serde_path: Option<&syn::Path>, - trait_: &str, - ty: &Ident, - code: TokenStream, -) -> TokenStream { - let try_replacement = try::replacement(); - - let dummy_const = if cfg!(no_underscore_consts) { - format_ident!("_IMPL_{}_FOR_{}", trait_, unraw(ty)) - } else { - format_ident!("_") - }; +use proc_macro2::TokenStream; +use quote::quote; +pub fn wrap_in_const(serde_path: Option<&syn::Path>, code: TokenStream) -> TokenStream { let use_serde = match serde_path { Some(path) => quote! { use #path as _serde; @@ -31,14 +15,9 @@ pub fn wrap_in_const( quote! { #[doc(hidden)] #[allow(non_upper_case_globals, unused_attributes, unused_qualifications)] - const #dummy_const: () = { + const _: () = { #use_serde - #try_replacement #code }; } } - -fn unraw(ident: &Ident) -> String { - ident.to_string().trim_start_matches("r#").to_owned() -} diff --git a/vendor/serde_derive/src/fragment.rs b/vendor/serde_derive/src/fragment.rs index 324504a..6627c26 100644 --- a/vendor/serde_derive/src/fragment.rs +++ b/vendor/serde_derive/src/fragment.rs @@ -1,6 +1,6 @@ use proc_macro2::TokenStream; use quote::ToTokens; -use syn::token; +use syn::{token, Token}; pub enum Fragment { /// Tokens that can be used as an expression. diff --git a/vendor/serde_derive/src/internals/ast.rs b/vendor/serde_derive/src/internals/ast.rs index 2a6950b..a28d3ae 100644 --- a/vendor/serde_derive/src/internals/ast.rs +++ b/vendor/serde_derive/src/internals/ast.rs @@ -1,10 +1,8 @@ //! A Serde ast, parsed from the Syn ast and ready to generate Rust code. -use internals::attr; -use internals::check; -use internals::{Ctxt, Derive}; -use syn; +use crate::internals::{attr, check, Ctxt, Derive}; use syn::punctuated::Punctuated; +use syn::Token; /// A source data structure annotated with `#[derive(Serialize)]` and/or `#[derive(Deserialize)]`, /// parsed into an internal representation. @@ -88,9 +86,12 @@ impl<'a> Container<'a> { if field.attrs.flatten() { has_flatten = true; } - field - .attrs - .rename_by_rules(variant.attrs.rename_all_rules()); + field.attrs.rename_by_rules( + variant + .attrs + .rename_all_rules() + .or(attrs.rename_all_fields_rules()), + ); } } } @@ -121,7 +122,7 @@ impl<'a> Container<'a> { } impl<'a> Data<'a> { - pub fn all_fields(&'a self) -> Box<Iterator<Item = &'a Field<'a>> + 'a> { + pub fn all_fields(&'a self) -> Box<dyn Iterator<Item = &'a Field<'a>> + 'a> { match self { Data::Enum(variants) => { Box::new(variants.iter().flat_map(|variant| variant.fields.iter())) @@ -140,7 +141,7 @@ fn enum_from_ast<'a>( variants: &'a Punctuated<syn::Variant, Token![,]>, container_default: &attr::Default, ) -> Vec<Variant<'a>> { - variants + let variants: Vec<Variant> = variants .iter() .map(|variant| { let attrs = attr::Variant::from_ast(cx, variant); @@ -154,7 +155,20 @@ fn enum_from_ast<'a>( original: variant, } }) - .collect() + .collect(); + + let index_of_last_tagged_variant = variants + .iter() + .rposition(|variant| !variant.attrs.untagged()); + if let Some(index_of_last_tagged_variant) = index_of_last_tagged_variant { + for variant in &variants[..index_of_last_tagged_variant] { + if variant.attrs.untagged() { + cx.error_spanned_by(&variant.ident, "all variants with the #[serde(untagged)] attribute must be placed at the end of the enum"); + } + } + } + + variants } fn struct_from_ast<'a>( diff --git a/vendor/serde_derive/src/internals/attr.rs b/vendor/serde_derive/src/internals/attr.rs index b0a7d08..bb9de32 100644 --- a/vendor/serde_derive/src/internals/attr.rs +++ b/vendor/serde_derive/src/internals/attr.rs @@ -1,15 +1,14 @@ -use internals::symbol::*; -use internals::{ungroup, Ctxt}; +use crate::internals::symbol::*; +use crate::internals::{ungroup, Ctxt}; use proc_macro2::{Spacing, Span, TokenStream, TokenTree}; use quote::ToTokens; use std::borrow::Cow; use std::collections::BTreeSet; use std::iter::FromIterator; -use syn; use syn::meta::ParseNestedMeta; use syn::parse::ParseStream; use syn::punctuated::Punctuated; -use syn::{token, Ident, Lifetime}; +use syn::{parse_quote, token, Ident, Lifetime, Token}; // This module handles parsing of `#[serde(...)]` attributes. The entrypoints // are `attr::Container::from_ast`, `attr::Variant::from_ast`, and @@ -19,7 +18,7 @@ use syn::{token, Ident, Lifetime}; // user will see errors simultaneously for all bad attributes in the crate // rather than just the first. -pub use internals::case::RenameRule; +pub use crate::internals::case::RenameRule; struct Attr<'c, T> { cx: &'c Ctxt, @@ -135,7 +134,7 @@ pub struct Name { serialize_renamed: bool, deserialize: String, deserialize_renamed: bool, - deserialize_aliases: Vec<String>, + deserialize_aliases: BTreeSet<String>, } fn unraw(ident: &Ident) -> String { @@ -149,16 +148,12 @@ impl Name { de_name: Attr<String>, de_aliases: Option<VecAttr<String>>, ) -> Name { - let deserialize_aliases = match de_aliases { - Some(de_aliases) => { - let mut alias_list = BTreeSet::new(); - for alias_name in de_aliases.get() { - alias_list.insert(alias_name); - } - alias_list.into_iter().collect() + let mut alias_set = BTreeSet::new(); + if let Some(de_aliases) = de_aliases { + for alias_name in de_aliases.get() { + alias_set.insert(alias_name); } - None => Vec::new(), - }; + } let ser_name = ser_name.get(); let ser_renamed = ser_name.is_some(); @@ -169,35 +164,42 @@ impl Name { serialize_renamed: ser_renamed, deserialize: de_name.unwrap_or(source_name), deserialize_renamed: de_renamed, - deserialize_aliases, + deserialize_aliases: alias_set, } } /// Return the container name for the container when serializing. - pub fn serialize_name(&self) -> String { - self.serialize.clone() + pub fn serialize_name(&self) -> &str { + &self.serialize } /// Return the container name for the container when deserializing. - pub fn deserialize_name(&self) -> String { - self.deserialize.clone() + pub fn deserialize_name(&self) -> &str { + &self.deserialize } - fn deserialize_aliases(&self) -> Vec<String> { - let mut aliases = self.deserialize_aliases.clone(); - let main_name = self.deserialize_name(); - if !aliases.contains(&main_name) { - aliases.push(main_name); - } - aliases + fn deserialize_aliases(&self) -> &BTreeSet<String> { + &self.deserialize_aliases } } +#[derive(Copy, Clone)] pub struct RenameAllRules { serialize: RenameRule, deserialize: RenameRule, } +impl RenameAllRules { + /// Returns a new `RenameAllRules` with the individual rules of `self` and + /// `other_rules` joined by `RenameRules::or`. + pub fn or(self, other_rules: Self) -> Self { + Self { + serialize: self.serialize.or(other_rules.serialize), + deserialize: self.deserialize.or(other_rules.deserialize), + } + } +} + /// Represents struct or enum attribute information. pub struct Container { name: Name, @@ -205,6 +207,7 @@ pub struct Container { deny_unknown_fields: bool, default: Default, rename_all_rules: RenameAllRules, + rename_all_fields_rules: RenameAllRules, ser_bound: Option<Vec<syn::WherePredicate>>, de_bound: Option<Vec<syn::WherePredicate>>, tag: TagType, @@ -218,6 +221,7 @@ pub struct Container { is_packed: bool, /// Error message generated when type can't be deserialized expecting: Option<String>, + non_exhaustive: bool, } /// Styles of representing an enum. @@ -288,6 +292,8 @@ impl Container { let mut default = Attr::none(cx, DEFAULT); let mut rename_all_ser_rule = Attr::none(cx, RENAME_ALL); let mut rename_all_de_rule = Attr::none(cx, RENAME_ALL); + let mut rename_all_fields_ser_rule = Attr::none(cx, RENAME_ALL_FIELDS); + let mut rename_all_fields_de_rule = Attr::none(cx, RENAME_ALL_FIELDS); let mut ser_bound = Attr::none(cx, BOUND); let mut de_bound = Attr::none(cx, BOUND); let mut untagged = BoolAttr::none(cx, UNTAGGED); @@ -301,9 +307,12 @@ impl Container { let mut variant_identifier = BoolAttr::none(cx, VARIANT_IDENTIFIER); let mut serde_path = Attr::none(cx, CRATE); let mut expecting = Attr::none(cx, EXPECTING); + let mut non_exhaustive = false; for attr in &item.attrs { if attr.path() != SERDE { + non_exhaustive |= + matches!(&attr.meta, syn::Meta::Path(path) if path == NON_EXHAUSTIVE); continue; } @@ -341,6 +350,44 @@ impl Container { } } } + } else if meta.path == RENAME_ALL_FIELDS { + // #[serde(rename_all_fields = "foo")] + // #[serde(rename_all_fields(serialize = "foo", deserialize = "bar"))] + let one_name = meta.input.peek(Token![=]); + let (ser, de) = get_renames(cx, RENAME_ALL_FIELDS, &meta)?; + + match item.data { + syn::Data::Enum(_) => { + if let Some(ser) = ser { + match RenameRule::from_str(&ser.value()) { + Ok(rename_rule) => { + rename_all_fields_ser_rule.set(&meta.path, rename_rule); + } + Err(err) => cx.error_spanned_by(ser, err), + } + } + if let Some(de) = de { + match RenameRule::from_str(&de.value()) { + Ok(rename_rule) => { + rename_all_fields_de_rule.set(&meta.path, rename_rule); + } + Err(err) => { + if !one_name { + cx.error_spanned_by(de, err); + } + } + } + } + } + syn::Data::Struct(_) => { + let msg = "#[serde(rename_all_fields)] can only be used on enums"; + cx.syn_error(meta.error(msg)); + } + syn::Data::Union(_) => { + let msg = "#[serde(rename_all_fields)] can only be used on enums"; + cx.syn_error(meta.error(msg)); + } + } } else if meta.path == TRANSPARENT { // #[serde(transparent)] transparent.set_true(meta.path); @@ -353,21 +400,21 @@ impl Container { if let Some(path) = parse_lit_into_expr_path(cx, DEFAULT, &meta)? { match &item.data { syn::Data::Struct(syn::DataStruct { fields, .. }) => match fields { - syn::Fields::Named(_) => { + syn::Fields::Named(_) | syn::Fields::Unnamed(_) => { default.set(&meta.path, Default::Path(path)); } - syn::Fields::Unnamed(_) | syn::Fields::Unit => { - let msg = "#[serde(default = \"...\")] can only be used on structs with named fields"; - cx.error_spanned_by(fields, msg); + syn::Fields::Unit => { + let msg = "#[serde(default = \"...\")] can only be used on structs that have fields"; + cx.syn_error(meta.error(msg)); } }, - syn::Data::Enum(syn::DataEnum { enum_token, .. }) => { - let msg = "#[serde(default = \"...\")] can only be used on structs with named fields"; - cx.error_spanned_by(enum_token, msg); + syn::Data::Enum(_) => { + let msg = "#[serde(default = \"...\")] can only be used on structs"; + cx.syn_error(meta.error(msg)); } - syn::Data::Union(syn::DataUnion { union_token, .. }) => { - let msg = "#[serde(default = \"...\")] can only be used on structs with named fields"; - cx.error_spanned_by(union_token, msg); + syn::Data::Union(_) => { + let msg = "#[serde(default = \"...\")] can only be used on structs"; + cx.syn_error(meta.error(msg)); } } } @@ -375,21 +422,21 @@ impl Container { // #[serde(default)] match &item.data { syn::Data::Struct(syn::DataStruct { fields, .. }) => match fields { - syn::Fields::Named(_) => { + syn::Fields::Named(_) | syn::Fields::Unnamed(_) => { default.set(meta.path, Default::Default); } - syn::Fields::Unnamed(_) | syn::Fields::Unit => { - let msg = "#[serde(default)] can only be used on structs with named fields"; + syn::Fields::Unit => { + let msg = "#[serde(default)] can only be used on structs that have fields"; cx.error_spanned_by(fields, msg); } }, - syn::Data::Enum(syn::DataEnum { enum_token, .. }) => { - let msg = "#[serde(default)] can only be used on structs with named fields"; - cx.error_spanned_by(enum_token, msg); + syn::Data::Enum(_) => { + let msg = "#[serde(default)] can only be used on structs"; + cx.syn_error(meta.error(msg)); } - syn::Data::Union(syn::DataUnion { union_token, .. }) => { - let msg = "#[serde(default)] can only be used on structs with named fields"; - cx.error_spanned_by(union_token, msg); + syn::Data::Union(_) => { + let msg = "#[serde(default)] can only be used on structs"; + cx.syn_error(meta.error(msg)); } } } @@ -405,13 +452,13 @@ impl Container { syn::Data::Enum(_) => { untagged.set_true(&meta.path); } - syn::Data::Struct(syn::DataStruct { struct_token, .. }) => { + syn::Data::Struct(_) => { let msg = "#[serde(untagged)] can only be used on enums"; - cx.error_spanned_by(struct_token, msg); + cx.syn_error(meta.error(msg)); } - syn::Data::Union(syn::DataUnion { union_token, .. }) => { + syn::Data::Union(_) => { let msg = "#[serde(untagged)] can only be used on enums"; - cx.error_spanned_by(union_token, msg); + cx.syn_error(meta.error(msg)); } } } else if meta.path == TAG { @@ -427,12 +474,12 @@ impl Container { } syn::Fields::Unnamed(_) | syn::Fields::Unit => { let msg = "#[serde(tag = \"...\")] can only be used on enums and structs with named fields"; - cx.error_spanned_by(fields, msg); + cx.syn_error(meta.error(msg)); } }, - syn::Data::Union(syn::DataUnion { union_token, .. }) => { + syn::Data::Union(_) => { let msg = "#[serde(tag = \"...\")] can only be used on enums and structs with named fields"; - cx.error_spanned_by(union_token, msg); + cx.syn_error(meta.error(msg)); } } } @@ -443,13 +490,13 @@ impl Container { syn::Data::Enum(_) => { content.set(&meta.path, s.value()); } - syn::Data::Struct(syn::DataStruct { struct_token, .. }) => { + syn::Data::Struct(_) => { let msg = "#[serde(content = \"...\")] can only be used on enums"; - cx.error_spanned_by(struct_token, msg); + cx.syn_error(meta.error(msg)); } - syn::Data::Union(syn::DataUnion { union_token, .. }) => { + syn::Data::Union(_) => { let msg = "#[serde(content = \"...\")] can only be used on enums"; - cx.error_spanned_by(union_token, msg); + cx.syn_error(meta.error(msg)); } } } @@ -528,6 +575,10 @@ impl Container { serialize: rename_all_ser_rule.get().unwrap_or(RenameRule::None), deserialize: rename_all_de_rule.get().unwrap_or(RenameRule::None), }, + rename_all_fields_rules: RenameAllRules { + serialize: rename_all_fields_ser_rule.get().unwrap_or(RenameRule::None), + deserialize: rename_all_fields_de_rule.get().unwrap_or(RenameRule::None), + }, ser_bound: ser_bound.get(), de_bound: de_bound.get(), tag: decide_tag(cx, item, untagged, internal_tag, content), @@ -540,6 +591,7 @@ impl Container { serde_path: serde_path.get(), is_packed, expecting: expecting.get(), + non_exhaustive, } } @@ -547,8 +599,12 @@ impl Container { &self.name } - pub fn rename_all_rules(&self) -> &RenameAllRules { - &self.rename_all_rules + pub fn rename_all_rules(&self) -> RenameAllRules { + self.rename_all_rules + } + + pub fn rename_all_fields_rules(&self) -> RenameAllRules { + self.rename_all_fields_rules } pub fn transparent(&self) -> bool { @@ -621,6 +677,10 @@ impl Container { pub fn expecting(&self) -> Option<&str> { self.expecting.as_ref().map(String::as_ref) } + + pub fn non_exhaustive(&self) -> bool { + self.non_exhaustive + } } fn decide_tag( @@ -656,7 +716,7 @@ fn decide_tag( } TagType::Internal { tag } } - (Some((untagged_tokens, _)), Some((tag_tokens, _)), None) => { + (Some((untagged_tokens, ())), Some((tag_tokens, _)), None) => { let msg = "enum cannot be both untagged and internally tagged"; cx.error_spanned_by(untagged_tokens, msg); cx.error_spanned_by(tag_tokens, msg); @@ -667,14 +727,14 @@ fn decide_tag( cx.error_spanned_by(content_tokens, msg); TagType::External } - (Some((untagged_tokens, _)), None, Some((content_tokens, _))) => { + (Some((untagged_tokens, ())), None, Some((content_tokens, _))) => { let msg = "untagged enum cannot have #[serde(content = \"...\")]"; cx.error_spanned_by(untagged_tokens, msg); cx.error_spanned_by(content_tokens, msg); TagType::External } (None, Some((_, tag)), Some((_, content))) => TagType::Adjacent { tag, content }, - (Some((untagged_tokens, _)), Some((tag_tokens, _)), Some((content_tokens, _))) => { + (Some((untagged_tokens, ())), Some((tag_tokens, _)), Some((content_tokens, _))) => { let msg = "untagged enum cannot have #[serde(tag = \"...\", content = \"...\")]"; cx.error_spanned_by(untagged_tokens, msg); cx.error_spanned_by(tag_tokens, msg); @@ -696,7 +756,7 @@ fn decide_identifier( variant_identifier.0.get_with_tokens(), ) { (_, None, None) => Identifier::No, - (_, Some((field_identifier_tokens, _)), Some((variant_identifier_tokens, _))) => { + (_, Some((field_identifier_tokens, ())), Some((variant_identifier_tokens, ()))) => { let msg = "#[serde(field_identifier)] and #[serde(variant_identifier)] cannot both be set"; cx.error_spanned_by(field_identifier_tokens, msg); @@ -740,6 +800,7 @@ pub struct Variant { serialize_with: Option<syn::ExprPath>, deserialize_with: Option<syn::ExprPath>, borrow: Option<BorrowAttribute>, + untagged: bool, } struct BorrowAttribute { @@ -762,6 +823,7 @@ impl Variant { let mut serialize_with = Attr::none(cx, SERIALIZE_WITH); let mut deserialize_with = Attr::none(cx, DESERIALIZE_WITH); let mut borrow = Attr::none(cx, BORROW); + let mut untagged = BoolAttr::none(cx, UNTAGGED); for attr in &variant.attrs { if attr.path() != SERDE { @@ -879,6 +941,8 @@ impl Variant { cx.error_spanned_by(variant, msg); } } + } else if meta.path == UNTAGGED { + untagged.set_true(&meta.path); } else { let path = meta.path.to_token_stream().to_string().replace(' ', ""); return Err( @@ -905,6 +969,7 @@ impl Variant { serialize_with: serialize_with.get(), deserialize_with: deserialize_with.get(), borrow: borrow.get(), + untagged: untagged.get(), } } @@ -912,21 +977,24 @@ impl Variant { &self.name } - pub fn aliases(&self) -> Vec<String> { + pub fn aliases(&self) -> &BTreeSet<String> { self.name.deserialize_aliases() } - pub fn rename_by_rules(&mut self, rules: &RenameAllRules) { + pub fn rename_by_rules(&mut self, rules: RenameAllRules) { if !self.name.serialize_renamed { self.name.serialize = rules.serialize.apply_to_variant(&self.name.serialize); } if !self.name.deserialize_renamed { self.name.deserialize = rules.deserialize.apply_to_variant(&self.name.deserialize); } + self.name + .deserialize_aliases + .insert(self.name.deserialize.clone()); } - pub fn rename_all_rules(&self) -> &RenameAllRules { - &self.rename_all_rules + pub fn rename_all_rules(&self) -> RenameAllRules { + self.rename_all_rules } pub fn ser_bound(&self) -> Option<&[syn::WherePredicate]> { @@ -956,6 +1024,10 @@ impl Variant { pub fn deserialize_with(&self) -> Option<&syn::ExprPath> { self.deserialize_with.as_ref() } + + pub fn untagged(&self) -> bool { + self.untagged + } } /// Represents field attribute information @@ -1247,17 +1319,20 @@ impl Field { &self.name } - pub fn aliases(&self) -> Vec<String> { + pub fn aliases(&self) -> &BTreeSet<String> { self.name.deserialize_aliases() } - pub fn rename_by_rules(&mut self, rules: &RenameAllRules) { + pub fn rename_by_rules(&mut self, rules: RenameAllRules) { if !self.name.serialize_renamed { self.name.serialize = rules.serialize.apply_to_field(&self.name.serialize); } if !self.name.deserialize_renamed { self.name.deserialize = rules.deserialize.apply_to_field(&self.name.deserialize); } + self.name + .deserialize_aliases + .insert(self.name.deserialize.clone()); } pub fn skip_serializing(&self) -> bool { @@ -1409,6 +1484,13 @@ fn get_lit_str2( .. }) = value { + let suffix = lit.suffix(); + if !suffix.is_empty() { + cx.error_spanned_by( + lit, + format!("unexpected suffix `{}` on string literal", suffix), + ); + } Ok(Some(lit.clone())) } else { cx.error_spanned_by( @@ -1712,6 +1794,7 @@ fn borrowable_lifetimes( fn collect_lifetimes(ty: &syn::Type, out: &mut BTreeSet<syn::Lifetime>) { match ty { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] syn::Type::Slice(ty) => { collect_lifetimes(&ty.elem, out); } @@ -1747,7 +1830,10 @@ fn collect_lifetimes(ty: &syn::Type, out: &mut BTreeSet<syn::Lifetime>) { syn::GenericArgument::AssocType(binding) => { collect_lifetimes(&binding.ty, out); } - _ => {} + syn::GenericArgument::Const(_) + | syn::GenericArgument::AssocConst(_) + | syn::GenericArgument::Constraint(_) + | _ => {} } } } @@ -1769,7 +1855,6 @@ fn collect_lifetimes(ty: &syn::Type, out: &mut BTreeSet<syn::Lifetime>) { | syn::Type::Infer(_) | syn::Type::Verbatim(_) => {} - #[cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] _ => {} } } diff --git a/vendor/serde_derive/src/internals/case.rs b/vendor/serde_derive/src/internals/case.rs index 5545051..8c8c02e 100644 --- a/vendor/serde_derive/src/internals/case.rs +++ b/vendor/serde_derive/src/internals/case.rs @@ -1,13 +1,8 @@ //! Code to convert the Rust-styled field/variant (e.g. `my_field`, `MyType`) to the //! case of the source (e.g. `my-field`, `MY_FIELD`). -// See https://users.rust-lang.org/t/psa-dealing-with-warning-unused-import-std-ascii-asciiext-in-today-s-nightly/13726 -#[allow(deprecated, unused_imports)] -use std::ascii::AsciiExt; - -use std::fmt::{self, Debug, Display}; - use self::RenameRule::*; +use std::fmt::{self, Debug, Display}; /// The different possible ways to change case of fields in a struct, or variants in an enum. #[derive(Copy, Clone, PartialEq)] @@ -59,8 +54,8 @@ impl RenameRule { } /// Apply a renaming rule to an enum variant, returning the version expected in the source. - pub fn apply_to_variant(&self, variant: &str) -> String { - match *self { + pub fn apply_to_variant(self, variant: &str) -> String { + match self { None | PascalCase => variant.to_owned(), LowerCase => variant.to_ascii_lowercase(), UpperCase => variant.to_ascii_uppercase(), @@ -84,8 +79,8 @@ impl RenameRule { } /// Apply a renaming rule to a struct field, returning the version expected in the source. - pub fn apply_to_field(&self, field: &str) -> String { - match *self { + pub fn apply_to_field(self, field: &str) -> String { + match self { None | LowerCase | SnakeCase => field.to_owned(), UpperCase => field.to_ascii_uppercase(), PascalCase => { @@ -112,6 +107,14 @@ impl RenameRule { ScreamingKebabCase => ScreamingSnakeCase.apply_to_field(field).replace('_', "-"), } } + + /// Returns the `RenameRule` if it is not `None`, `rule_b` otherwise. + pub fn or(self, rule_b: Self) -> Self { + match self { + None => rule_b, + _ => self, + } + } } pub struct ParseError<'a> { diff --git a/vendor/serde_derive/src/internals/check.rs b/vendor/serde_derive/src/internals/check.rs index 05b4b8f..52b0f37 100644 --- a/vendor/serde_derive/src/internals/check.rs +++ b/vendor/serde_derive/src/internals/check.rs @@ -1,11 +1,12 @@ -use internals::ast::{Container, Data, Field, Style}; -use internals::attr::{Identifier, TagType}; -use internals::{ungroup, Ctxt, Derive}; +use crate::internals::ast::{Container, Data, Field, Style}; +use crate::internals::attr::{Default, Identifier, TagType}; +use crate::internals::{ungroup, Ctxt, Derive}; use syn::{Member, Type}; // Cross-cutting checks that require looking at more than a single attrs object. // Simpler checks should happen when parsing and building the attrs. pub fn check(cx: &Ctxt, cont: &mut Container, derive: Derive) { + check_default_on_tuple(cx, cont); check_remote_generic(cx, cont); check_getter(cx, cont); check_flatten(cx, cont); @@ -17,6 +18,39 @@ pub fn check(cx: &Ctxt, cont: &mut Container, derive: Derive) { check_from_and_try_from(cx, cont); } +// If some field of a tuple struct is marked #[serde(default)] then all fields +// after it must also be marked with that attribute, or the struct must have a +// container-level serde(default) attribute. A field's default value is only +// used for tuple fields if the sequence is exhausted at that point; that means +// all subsequent fields will fail to deserialize if they don't have their own +// default. +fn check_default_on_tuple(cx: &Ctxt, cont: &Container) { + if let Default::None = cont.attrs.default() { + if let Data::Struct(Style::Tuple, fields) = &cont.data { + let mut first_default_index = None; + for (i, field) in fields.iter().enumerate() { + // Skipped fields automatically get the #[serde(default)] + // attribute. We are interested only on non-skipped fields here. + if field.attrs.skip_deserializing() { + continue; + } + if let Default::None = field.attrs.default() { + if let Some(first) = first_default_index { + cx.error_spanned_by( + field.ty, + format!("field must have #[serde(default)] because previous field {} has #[serde(default)]", first), + ); + } + continue; + } + if first_default_index.is_none() { + first_default_index = Some(i); + } + } + } + } +} + // Remote derive definition type must have either all of the generics of the // remote type: // @@ -110,9 +144,7 @@ fn check_flatten_field(cx: &Ctxt, style: Style, field: &Field) { fn check_identifier(cx: &Ctxt, cont: &Container) { let variants = match &cont.data { Data::Enum(variants) => variants, - Data::Struct(_, _) => { - return; - } + Data::Struct(_, _) => return, }; for (i, variant) in variants.iter().enumerate() { @@ -194,12 +226,10 @@ fn check_identifier(cx: &Ctxt, cont: &Container) { fn check_variant_skip_attrs(cx: &Ctxt, cont: &Container) { let variants = match &cont.data { Data::Enum(variants) => variants, - Data::Struct(_, _) => { - return; - } + Data::Struct(_, _) => return, }; - for variant in variants.iter() { + for variant in variants { if variant.attrs.serialize_with().is_some() { if variant.attrs.skip_serializing() { cx.error_spanned_by( @@ -288,9 +318,14 @@ fn check_internal_tag_field_name_conflict(cx: &Ctxt, cont: &Container) { for variant in variants { match variant.style { Style::Struct => { + if variant.attrs.untagged() { + continue; + } for field in &variant.fields { - let check_ser = !field.attrs.skip_serializing(); - let check_de = !field.attrs.skip_deserializing(); + let check_ser = + !(field.attrs.skip_serializing() || variant.attrs.skip_serializing()); + let check_de = + !(field.attrs.skip_deserializing() || variant.attrs.skip_deserializing()); let name = field.attrs.name(); let ser_name = name.serialize_name(); diff --git a/vendor/serde_derive/src/internals/ctxt.rs b/vendor/serde_derive/src/internals/ctxt.rs index d692c2a..a47bfa4 100644 --- a/vendor/serde_derive/src/internals/ctxt.rs +++ b/vendor/serde_derive/src/internals/ctxt.rs @@ -2,7 +2,6 @@ use quote::ToTokens; use std::cell::RefCell; use std::fmt::Display; use std::thread; -use syn; /// A type to collect errors together and format them. /// @@ -44,12 +43,19 @@ impl Ctxt { } /// Consume this object, producing a formatted error string if there are errors. - pub fn check(self) -> Result<(), Vec<syn::Error>> { - let errors = self.errors.borrow_mut().take().unwrap(); - match errors.len() { - 0 => Ok(()), - _ => Err(errors), + pub fn check(self) -> syn::Result<()> { + let mut errors = self.errors.borrow_mut().take().unwrap().into_iter(); + + let mut combined = match errors.next() { + Some(first) => first, + None => return Ok(()), + }; + + for rest in errors { + combined.combine(rest); } + + Err(combined) } } diff --git a/vendor/serde_derive/src/internals/mod.rs b/vendor/serde_derive/src/internals/mod.rs index 5e9f416..f98ef08 100644 --- a/vendor/serde_derive/src/internals/mod.rs +++ b/vendor/serde_derive/src/internals/mod.rs @@ -1,19 +1,18 @@ pub mod ast; pub mod attr; -mod ctxt; -pub use self::ctxt::Ctxt; - -mod receiver; -pub use self::receiver::replace_receiver; - mod case; mod check; +mod ctxt; +mod receiver; mod respan; mod symbol; use syn::Type; +pub use self::ctxt::Ctxt; +pub use self::receiver::replace_receiver; + #[derive(Copy, Clone)] pub enum Derive { Serialize, diff --git a/vendor/serde_derive/src/internals/receiver.rs b/vendor/serde_derive/src/internals/receiver.rs index 5dc01db..fa2a77d 100644 --- a/vendor/serde_derive/src/internals/receiver.rs +++ b/vendor/serde_derive/src/internals/receiver.rs @@ -1,11 +1,11 @@ -use internals::respan::respan; +use crate::internals::respan::respan; use proc_macro2::Span; use quote::ToTokens; use std::mem; use syn::punctuated::Punctuated; use syn::{ parse_quote, Data, DeriveInput, Expr, ExprPath, GenericArgument, GenericParam, Generics, Macro, - Path, PathArguments, QSelf, ReturnType, Type, TypeParamBound, TypePath, WherePredicate, + Path, PathArguments, QSelf, ReturnType, Token, Type, TypeParamBound, TypePath, WherePredicate, }; pub fn replace_receiver(input: &mut DeriveInput) { @@ -107,6 +107,7 @@ impl ReplaceReceiver<'_> { fn visit_type_mut_impl(&mut self, ty: &mut Type) { match ty { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] Type::Array(ty) => { self.visit_type_mut(&mut ty.elem); self.visit_expr_mut(&mut ty.len); @@ -147,7 +148,6 @@ impl ReplaceReceiver<'_> { Type::Infer(_) | Type::Never(_) | Type::Verbatim(_) => {} - #[cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] _ => {} } } @@ -178,13 +178,13 @@ impl ReplaceReceiver<'_> { PathArguments::AngleBracketed(arguments) => { for arg in &mut arguments.args { match arg { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] GenericArgument::Type(arg) => self.visit_type_mut(arg), GenericArgument::AssocType(arg) => self.visit_type_mut(&mut arg.ty), GenericArgument::Lifetime(_) | GenericArgument::Const(_) | GenericArgument::AssocConst(_) | GenericArgument::Constraint(_) => {} - #[cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] _ => {} } } @@ -207,9 +207,9 @@ impl ReplaceReceiver<'_> { fn visit_type_param_bound_mut(&mut self, bound: &mut TypeParamBound) { match bound { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] TypeParamBound::Trait(bound) => self.visit_path_mut(&mut bound.path), TypeParamBound::Lifetime(_) | TypeParamBound::Verbatim(_) => {} - #[cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] _ => {} } } @@ -228,6 +228,7 @@ impl ReplaceReceiver<'_> { if let Some(where_clause) = &mut generics.where_clause { for predicate in &mut where_clause.predicates { match predicate { + #![cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] WherePredicate::Type(predicate) => { self.visit_type_mut(&mut predicate.bounded_ty); for bound in &mut predicate.bounds { @@ -235,7 +236,6 @@ impl ReplaceReceiver<'_> { } } WherePredicate::Lifetime(_) => {} - #[cfg_attr(all(test, exhaustive), deny(non_exhaustive_omitted_patterns))] _ => {} } } diff --git a/vendor/serde_derive/src/internals/symbol.rs b/vendor/serde_derive/src/internals/symbol.rs index 9606edb..572391a 100644 --- a/vendor/serde_derive/src/internals/symbol.rs +++ b/vendor/serde_derive/src/internals/symbol.rs @@ -19,10 +19,12 @@ pub const FLATTEN: Symbol = Symbol("flatten"); pub const FROM: Symbol = Symbol("from"); pub const GETTER: Symbol = Symbol("getter"); pub const INTO: Symbol = Symbol("into"); +pub const NON_EXHAUSTIVE: Symbol = Symbol("non_exhaustive"); pub const OTHER: Symbol = Symbol("other"); pub const REMOTE: Symbol = Symbol("remote"); pub const RENAME: Symbol = Symbol("rename"); pub const RENAME_ALL: Symbol = Symbol("rename_all"); +pub const RENAME_ALL_FIELDS: Symbol = Symbol("rename_all_fields"); pub const REPR: Symbol = Symbol("repr"); pub const SERDE: Symbol = Symbol("serde"); pub const SERIALIZE: Symbol = Symbol("serialize"); diff --git a/vendor/serde_derive/src/lib.rs b/vendor/serde_derive/src/lib.rs index 947033b..b91f17b 100644 --- a/vendor/serde_derive/src/lib.rs +++ b/vendor/serde_derive/src/lib.rs @@ -1,7 +1,7 @@ //! This crate provides Serde's two derive macros. //! -//! ```edition2018 -//! # use serde_derive::{Serialize, Deserialize}; +//! ```edition2021 +//! # use serde_derive::{Deserialize, Serialize}; //! # //! #[derive(Serialize, Deserialize)] //! # struct S; @@ -13,8 +13,7 @@ //! //! [https://serde.rs/derive.html]: https://serde.rs/derive.html -#![doc(html_root_url = "https://docs.rs/serde_derive/1.0.160")] -#![allow(unknown_lints, bare_trait_objects)] +#![doc(html_root_url = "https://docs.rs/serde_derive/1.0.197")] // Ignored clippy lints #![allow( // clippy false positive: https://github.com/rust-lang/rust-clippy/issues/7054 @@ -51,7 +50,6 @@ clippy::match_wildcard_for_single_variants, clippy::module_name_repetitions, clippy::must_use_candidate, - clippy::option_if_let_else, clippy::similar_names, clippy::single_match_else, clippy::struct_excessive_bools, @@ -63,17 +61,16 @@ )] #![cfg_attr(all(test, exhaustive), feature(non_exhaustive_omitted_patterns_lint))] -#[macro_use] +extern crate proc_macro2; extern crate quote; -#[macro_use] extern crate syn; extern crate proc_macro; -extern crate proc_macro2; mod internals; use proc_macro::TokenStream; +use syn::parse_macro_input; use syn::DeriveInput; #[macro_use] @@ -86,13 +83,12 @@ mod dummy; mod pretend; mod ser; mod this; -mod try; #[proc_macro_derive(Serialize, attributes(serde))] pub fn derive_serialize(input: TokenStream) -> TokenStream { let mut input = parse_macro_input!(input as DeriveInput); ser::expand_derive_serialize(&mut input) - .unwrap_or_else(to_compile_errors) + .unwrap_or_else(syn::Error::into_compile_error) .into() } @@ -100,11 +96,6 @@ pub fn derive_serialize(input: TokenStream) -> TokenStream { pub fn derive_deserialize(input: TokenStream) -> TokenStream { let mut input = parse_macro_input!(input as DeriveInput); de::expand_derive_deserialize(&mut input) - .unwrap_or_else(to_compile_errors) + .unwrap_or_else(syn::Error::into_compile_error) .into() } - -fn to_compile_errors(errors: Vec<syn::Error>) -> proc_macro2::TokenStream { - let compile_errors = errors.iter().map(syn::Error::to_compile_error); - quote!(#(#compile_errors)*) -} diff --git a/vendor/serde_derive/src/pretend.rs b/vendor/serde_derive/src/pretend.rs index d7b953d..2c9e779 100644 --- a/vendor/serde_derive/src/pretend.rs +++ b/vendor/serde_derive/src/pretend.rs @@ -1,7 +1,6 @@ +use crate::internals::ast::{Container, Data, Field, Style, Variant}; use proc_macro2::TokenStream; -use quote::format_ident; - -use internals::ast::{Container, Data, Field, Style, Variant}; +use quote::{format_ident, quote}; // Suppress dead_code warnings that would otherwise appear when using a remote // derive. Other than this pretend code, a struct annotated with remote derive @@ -65,14 +64,14 @@ pub fn pretend_used(cont: &Container, is_packed: bool) -> TokenStream { fn pretend_fields_used(cont: &Container, is_packed: bool) -> TokenStream { match &cont.data { Data::Enum(variants) => pretend_fields_used_enum(cont, variants), - Data::Struct(Style::Struct, fields) => { + Data::Struct(Style::Struct | Style::Tuple | Style::Newtype, fields) => { if is_packed { pretend_fields_used_struct_packed(cont, fields) } else { pretend_fields_used_struct(cont, fields) } } - Data::Struct(_, _) => quote!(), + Data::Struct(Style::Unit, _) => quote!(), } } @@ -97,29 +96,14 @@ fn pretend_fields_used_struct_packed(cont: &Container, fields: &[Field]) -> Toke let members = fields.iter().map(|field| &field.member).collect::<Vec<_>>(); - #[cfg(not(no_ptr_addr_of))] - { - quote! { - match _serde::__private::None::<&#type_ident #ty_generics> { - _serde::__private::Some(__v @ #type_ident { #(#members: _),* }) => { - #( - let _ = _serde::__private::ptr::addr_of!(__v.#members); - )* - } - _ => {} - } - } - } - - #[cfg(no_ptr_addr_of)] - { - let placeholders = (0usize..).map(|i| format_ident!("__v{}", i)); - - quote! { - match _serde::__private::None::<#type_ident #ty_generics> { - _serde::__private::Some(#type_ident { #(#members: #placeholders),* }) => {} - _ => {} + quote! { + match _serde::__private::None::<&#type_ident #ty_generics> { + _serde::__private::Some(__v @ #type_ident { #(#members: _),* }) => { + #( + let _ = _serde::__private::ptr::addr_of!(__v.#members); + )* } + _ => {} } } } @@ -131,13 +115,13 @@ fn pretend_fields_used_enum(cont: &Container, variants: &[Variant]) -> TokenStre let patterns = variants .iter() .filter_map(|variant| match variant.style { - Style::Struct => { + Style::Struct | Style::Tuple | Style::Newtype => { let variant_ident = &variant.ident; let members = variant.fields.iter().map(|field| &field.member); let placeholders = (0usize..).map(|i| format_ident!("__v{}", i)); Some(quote!(#type_ident::#variant_ident { #(#members: #placeholders),* })) } - _ => None, + Style::Unit => None, }) .collect::<Vec<_>>(); diff --git a/vendor/serde_derive/src/ser.rs b/vendor/serde_derive/src/ser.rs index f223f71..3be51ee 100644 --- a/vendor/serde_derive/src/ser.rs +++ b/vendor/serde_derive/src/ser.rs @@ -1,18 +1,13 @@ +use crate::fragment::{Fragment, Match, Stmts}; +use crate::internals::ast::{Container, Data, Field, Style, Variant}; +use crate::internals::{attr, replace_receiver, Ctxt, Derive}; +use crate::{bound, dummy, pretend, this}; use proc_macro2::{Span, TokenStream}; +use quote::{quote, quote_spanned}; use syn::spanned::Spanned; -use syn::{self, Ident, Index, Member}; - -use bound; -use dummy; -use fragment::{Fragment, Match, Stmts}; -use internals::ast::{Container, Data, Field, Style, Variant}; -use internals::{attr, replace_receiver, Ctxt, Derive}; -use pretend; -use this; - -pub fn expand_derive_serialize( - input: &mut syn::DeriveInput, -) -> Result<TokenStream, Vec<syn::Error>> { +use syn::{parse_quote, Ident, Index, Member}; + +pub fn expand_derive_serialize(input: &mut syn::DeriveInput) -> syn::Result<TokenStream> { replace_receiver(input); let ctxt = Ctxt::new(); @@ -59,8 +54,6 @@ pub fn expand_derive_serialize( Ok(dummy::wrap_in_const( cont.attrs.custom_serde_path(), - "SERIALIZE", - ident, impl_block, )) } @@ -289,7 +282,7 @@ fn serialize_tuple_struct( .fold(quote!(0), |sum, expr| quote!(#sum + #expr)); quote_block! { - let #let_mut __serde_state = try!(_serde::Serializer::serialize_tuple_struct(__serializer, #type_name, #len)); + let #let_mut __serde_state = _serde::Serializer::serialize_tuple_struct(__serializer, #type_name, #len)?; #(#serialize_stmts)* _serde::ser::SerializeTupleStruct::end(__serde_state) } @@ -311,7 +304,7 @@ fn serialize_struct_tag_field(cattrs: &attr::Container, struct_trait: &StructTra let type_name = cattrs.name().serialize_name(); let func = struct_trait.serialize_field(Span::call_site()); quote! { - try!(#func(&mut __serde_state, #tag, #type_name)); + #func(&mut __serde_state, #tag, #type_name)?; } } _ => quote! {}, @@ -352,7 +345,7 @@ fn serialize_struct_as_struct( ); quote_block! { - let #let_mut __serde_state = try!(_serde::Serializer::serialize_struct(__serializer, #type_name, #len)); + let #let_mut __serde_state = _serde::Serializer::serialize_struct(__serializer, #type_name, #len)?; #tag_field #(#serialize_fields)* _serde::ser::SerializeStruct::end(__serde_state) @@ -396,7 +389,7 @@ fn serialize_struct_as_map( }; quote_block! { - let #let_mut __serde_state = try!(_serde::Serializer::serialize_map(__serializer, #len)); + let #let_mut __serde_state = _serde::Serializer::serialize_map(__serializer, #len)?; #tag_field #(#serialize_fields)* _serde::ser::SerializeMap::end(__serde_state) @@ -408,7 +401,7 @@ fn serialize_enum(params: &Parameters, variants: &[Variant], cattrs: &attr::Cont let self_var = ¶ms.self_var; - let arms: Vec<_> = variants + let mut arms: Vec<_> = variants .iter() .enumerate() .map(|(variant_index, variant)| { @@ -416,6 +409,12 @@ fn serialize_enum(params: &Parameters, variants: &[Variant], cattrs: &attr::Cont }) .collect(); + if cattrs.remote().is_some() && cattrs.non_exhaustive() { + arms.push(quote! { + ref unrecognized => _serde::__private::Err(_serde::ser::Error::custom(_serde::__private::ser::CannotSerializeVariant(unrecognized))), + }); + } + quote_expr! { match *#self_var { #(#arms)* @@ -477,17 +476,26 @@ fn serialize_variant( } }; - let body = Match(match cattrs.tag() { - attr::TagType::External => { + let body = Match(match (cattrs.tag(), variant.attrs.untagged()) { + (attr::TagType::External, false) => { serialize_externally_tagged_variant(params, variant, variant_index, cattrs) } - attr::TagType::Internal { tag } => { + (attr::TagType::Internal { tag }, false) => { serialize_internally_tagged_variant(params, variant, cattrs, tag) } - attr::TagType::Adjacent { tag, content } => { - serialize_adjacently_tagged_variant(params, variant, cattrs, tag, content) + (attr::TagType::Adjacent { tag, content }, false) => { + serialize_adjacently_tagged_variant( + params, + variant, + cattrs, + variant_index, + tag, + content, + ) + } + (attr::TagType::None, _) | (_, true) => { + serialize_untagged_variant(params, variant, cattrs) } - attr::TagType::None => serialize_untagged_variant(params, variant, cattrs), }); quote! { @@ -564,7 +572,7 @@ fn serialize_externally_tagged_variant( }, params, &variant.fields, - &type_name, + type_name, ), } } @@ -598,10 +606,10 @@ fn serialize_internally_tagged_variant( match effective_style(variant) { Style::Unit => { quote_block! { - let mut __struct = try!(_serde::Serializer::serialize_struct( - __serializer, #type_name, 1)); - try!(_serde::ser::SerializeStruct::serialize_field( - &mut __struct, #tag, #variant_name)); + let mut __struct = _serde::Serializer::serialize_struct( + __serializer, #type_name, 1)?; + _serde::ser::SerializeStruct::serialize_field( + &mut __struct, #tag, #variant_name)?; _serde::ser::SerializeStruct::end(__struct) } } @@ -629,7 +637,7 @@ fn serialize_internally_tagged_variant( StructVariant::InternallyTagged { tag, variant_name }, params, &variant.fields, - &type_name, + type_name, ), Style::Tuple => unreachable!("checked in serde_derive_internals"), } @@ -639,12 +647,20 @@ fn serialize_adjacently_tagged_variant( params: &Parameters, variant: &Variant, cattrs: &attr::Container, + variant_index: u32, tag: &str, content: &str, ) -> Fragment { let this_type = ¶ms.this_type; let type_name = cattrs.name().serialize_name(); let variant_name = variant.attrs.name().serialize_name(); + let serialize_variant = quote! { + &_serde::__private::ser::AdjacentlyTaggedEnumVariant { + enum_name: #type_name, + variant_index: #variant_index, + variant_name: #variant_name, + } + }; let inner = Stmts(if let Some(path) = variant.attrs.serialize_with() { let ser = wrap_serialize_variant_with(params, path, variant); @@ -655,10 +671,10 @@ fn serialize_adjacently_tagged_variant( match effective_style(variant) { Style::Unit => { return quote_block! { - let mut __struct = try!(_serde::Serializer::serialize_struct( - __serializer, #type_name, 1)); - try!(_serde::ser::SerializeStruct::serialize_field( - &mut __struct, #tag, #variant_name)); + let mut __struct = _serde::Serializer::serialize_struct( + __serializer, #type_name, 1)?; + _serde::ser::SerializeStruct::serialize_field( + &mut __struct, #tag, #serialize_variant)?; _serde::ser::SerializeStruct::end(__struct) }; } @@ -672,12 +688,12 @@ fn serialize_adjacently_tagged_variant( let span = field.original.span(); let func = quote_spanned!(span=> _serde::ser::SerializeStruct::serialize_field); return quote_block! { - let mut __struct = try!(_serde::Serializer::serialize_struct( - __serializer, #type_name, 2)); - try!(_serde::ser::SerializeStruct::serialize_field( - &mut __struct, #tag, #variant_name)); - try!(#func( - &mut __struct, #content, #field_expr)); + let mut __struct = _serde::Serializer::serialize_struct( + __serializer, #type_name, 2)?; + _serde::ser::SerializeStruct::serialize_field( + &mut __struct, #tag, #serialize_variant)?; + #func( + &mut __struct, #content, #field_expr)?; _serde::ser::SerializeStruct::end(__struct) }; } @@ -688,13 +704,13 @@ fn serialize_adjacently_tagged_variant( StructVariant::Untagged, params, &variant.fields, - &variant_name, + variant_name, ), } }); let fields_ty = variant.fields.iter().map(|f| &f.ty); - let fields_ident: &Vec<_> = &match variant.style { + let fields_ident: &[_] = &match variant.style { Style::Unit => { if variant.attrs.serialize_with().is_some() { vec![] @@ -737,15 +753,15 @@ fn serialize_adjacently_tagged_variant( } } - let mut __struct = try!(_serde::Serializer::serialize_struct( - __serializer, #type_name, 2)); - try!(_serde::ser::SerializeStruct::serialize_field( - &mut __struct, #tag, #variant_name)); - try!(_serde::ser::SerializeStruct::serialize_field( + let mut __struct = _serde::Serializer::serialize_struct( + __serializer, #type_name, 2)?; + _serde::ser::SerializeStruct::serialize_field( + &mut __struct, #tag, #serialize_variant)?; + _serde::ser::SerializeStruct::serialize_field( &mut __struct, #content, &__AdjacentlyTagged { data: (#(#fields_ident,)*), phantom: _serde::__private::PhantomData::<#this_type #ty_generics>, - })); + })?; _serde::ser::SerializeStruct::end(__struct) } } @@ -784,16 +800,16 @@ fn serialize_untagged_variant( Style::Tuple => serialize_tuple_variant(TupleVariant::Untagged, params, &variant.fields), Style::Struct => { let type_name = cattrs.name().serialize_name(); - serialize_struct_variant(StructVariant::Untagged, params, &variant.fields, &type_name) + serialize_struct_variant(StructVariant::Untagged, params, &variant.fields, type_name) } } } -enum TupleVariant { +enum TupleVariant<'a> { ExternallyTagged { - type_name: String, + type_name: &'a str, variant_index: u32, - variant_name: String, + variant_name: &'a str, }, Untagged, } @@ -835,21 +851,21 @@ fn serialize_tuple_variant( variant_name, } => { quote_block! { - let #let_mut __serde_state = try!(_serde::Serializer::serialize_tuple_variant( + let #let_mut __serde_state = _serde::Serializer::serialize_tuple_variant( __serializer, #type_name, #variant_index, #variant_name, - #len)); + #len)?; #(#serialize_stmts)* _serde::ser::SerializeTupleVariant::end(__serde_state) } } TupleVariant::Untagged => { quote_block! { - let #let_mut __serde_state = try!(_serde::Serializer::serialize_tuple( + let #let_mut __serde_state = _serde::Serializer::serialize_tuple( __serializer, - #len)); + #len)?; #(#serialize_stmts)* _serde::ser::SerializeTuple::end(__serde_state) } @@ -860,11 +876,11 @@ fn serialize_tuple_variant( enum StructVariant<'a> { ExternallyTagged { variant_index: u32, - variant_name: String, + variant_name: &'a str, }, InternallyTagged { tag: &'a str, - variant_name: String, + variant_name: &'a str, }, Untagged, } @@ -912,40 +928,40 @@ fn serialize_struct_variant( variant_name, } => { quote_block! { - let #let_mut __serde_state = try!(_serde::Serializer::serialize_struct_variant( + let #let_mut __serde_state = _serde::Serializer::serialize_struct_variant( __serializer, #name, #variant_index, #variant_name, #len, - )); + )?; #(#serialize_fields)* _serde::ser::SerializeStructVariant::end(__serde_state) } } StructVariant::InternallyTagged { tag, variant_name } => { quote_block! { - let mut __serde_state = try!(_serde::Serializer::serialize_struct( + let mut __serde_state = _serde::Serializer::serialize_struct( __serializer, #name, #len + 1, - )); - try!(_serde::ser::SerializeStruct::serialize_field( + )?; + _serde::ser::SerializeStruct::serialize_field( &mut __serde_state, #tag, #variant_name, - )); + )?; #(#serialize_fields)* _serde::ser::SerializeStruct::end(__serde_state) } } StructVariant::Untagged => { quote_block! { - let #let_mut __serde_state = try!(_serde::Serializer::serialize_struct( + let #let_mut __serde_state = _serde::Serializer::serialize_struct( __serializer, #name, #len, - )); + )?; #(#serialize_fields)* _serde::ser::SerializeStruct::end(__serde_state) } @@ -995,9 +1011,9 @@ fn serialize_struct_variant_with_flatten( __S: _serde::Serializer, { let (#(#members,)*) = self.data; - let #let_mut __serde_state = try!(_serde::Serializer::serialize_map( + let #let_mut __serde_state = _serde::Serializer::serialize_map( __serializer, - _serde::__private::None)); + _serde::__private::None)?; #(#serialize_fields)* _serde::ser::SerializeMap::end(__serde_state) } @@ -1016,23 +1032,23 @@ fn serialize_struct_variant_with_flatten( } StructVariant::InternallyTagged { tag, variant_name } => { quote_block! { - let #let_mut __serde_state = try!(_serde::Serializer::serialize_map( + let #let_mut __serde_state = _serde::Serializer::serialize_map( __serializer, - _serde::__private::None)); - try!(_serde::ser::SerializeMap::serialize_entry( + _serde::__private::None)?; + _serde::ser::SerializeMap::serialize_entry( &mut __serde_state, #tag, #variant_name, - )); + )?; #(#serialize_fields)* _serde::ser::SerializeMap::end(__serde_state) } } StructVariant::Untagged => { quote_block! { - let #let_mut __serde_state = try!(_serde::Serializer::serialize_map( + let #let_mut __serde_state = _serde::Serializer::serialize_map( __serializer, - _serde::__private::None)); + _serde::__private::None)?; #(#serialize_fields)* _serde::ser::SerializeMap::end(__serde_state) } @@ -1077,7 +1093,7 @@ fn serialize_tuple_struct_visitor( let span = field.original.span(); let func = tuple_trait.serialize_element(span); let ser = quote! { - try!(#func(&mut __serde_state, #field_expr)); + #func(&mut __serde_state, #field_expr)?; }; match skip { @@ -1121,12 +1137,12 @@ fn serialize_struct_visitor( let ser = if field.attrs.flatten() { let func = quote_spanned!(span=> _serde::Serialize::serialize); quote! { - try!(#func(&#field_expr, _serde::__private::ser::FlatMapSerializer(&mut __serde_state))); + #func(&#field_expr, _serde::__private::ser::FlatMapSerializer(&mut __serde_state))?; } } else { let func = struct_trait.serialize_field(span); quote! { - try!(#func(&mut __serde_state, #key_expr, #field_expr)); + #func(&mut __serde_state, #key_expr, #field_expr)?; } }; @@ -1138,7 +1154,7 @@ fn serialize_struct_visitor( if !#skip { #ser } else { - try!(#skip_func(&mut __serde_state, #key_expr)); + #skip_func(&mut __serde_state, #key_expr)?; } } } else { @@ -1238,7 +1254,7 @@ fn wrap_serialize_with( // Serialization of an empty struct results in code like: // -// let mut __serde_state = try!(serializer.serialize_struct("S", 0)); +// let mut __serde_state = serializer.serialize_struct("S", 0)?; // _serde::ser::SerializeStruct::end(__serde_state) // // where we want to omit the `mut` to avoid a warning. diff --git a/vendor/serde_derive/src/this.rs b/vendor/serde_derive/src/this.rs index 32731d0..941cea4 100644 --- a/vendor/serde_derive/src/this.rs +++ b/vendor/serde_derive/src/this.rs @@ -1,4 +1,4 @@ -use internals::ast::Container; +use crate::internals::ast::Container; use syn::{Path, PathArguments, Token}; pub fn this_type(cont: &Container) -> Path { diff --git a/vendor/serde_derive/src/try.rs b/vendor/serde_derive/src/try.rs deleted file mode 100644 index 48cceeb..0000000 --- a/vendor/serde_derive/src/try.rs +++ /dev/null @@ -1,24 +0,0 @@ -use proc_macro2::{Punct, Spacing, TokenStream}; - -// None of our generated code requires the `From::from` error conversion -// performed by the standard library's `try!` macro. With this simplified macro -// we see a significant improvement in type checking and borrow checking time of -// the generated code and a slight improvement in binary size. -pub fn replacement() -> TokenStream { - // Cannot pass `$expr` to `quote!` prior to Rust 1.17.0 so interpolate it. - let dollar = Punct::new('$', Spacing::Alone); - - quote! { - #[allow(unused_macros)] - macro_rules! try { - (#dollar __expr:expr) => { - match #dollar __expr { - _serde::__private::Ok(__val) => __val, - _serde::__private::Err(__err) => { - return _serde::__private::Err(__err); - } - } - } - } - } -} diff --git a/vendor/serde_json/.cargo-checksum.json b/vendor/serde_json/.cargo-checksum.json index e913f1a..66cb2c7 100644 --- a/vendor/serde_json/.cargo-checksum.json +++ b/vendor/serde_json/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"CONTRIBUTING.md":"f5270cafba66223a7b51ffc0d286075a17bb7cd88762fc80d333d3102629f4d8","Cargo.toml":"ca84cc19e030dea31745bd2de0171b64b1d05770ac09c2e0b772cbe9964b902c","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"1e5a86e6b5e79f92f9e7226d9a8ba16d4ec70567d153c9cacebcb582770378a1","build.rs":"0dd97b63a07aa2d4bbb4c7d0e73b521da130769da9f49f28a7b63332781eb3de","src/de.rs":"5dba95fc6a564917289bf1e404d59c74f95772f22ec92cb91d55053b65e93032","src/error.rs":"1aee95e7bd59d75112bd29319dd40e32b0e27f0e2af8d203757c9f1a73e5c0a0","src/features_check/error.rs":"d7359f864afbfe105a38abea9f563dc423036ebc4c956a5695a4beef144dc7ec","src/features_check/mod.rs":"2209f8d5c46b50c8a3b8dc22338dcaf0135d192e8b05d2f456cbe6a73104e958","src/io/core.rs":"9a4146802391fd202a36bebbf3b14b715ae09d8828cbe8e06a01214c470ebf5c","src/io/mod.rs":"fd1ed5080495cab21117f6f7d3c2c9e3687cad0c69a0cd087b08a145a9e672da","src/iter.rs":"f832c469cd7999d26ba9b76baa69b257a212a7edb3dfdf9b1d1bb35e8da85fa9","src/lexical/algorithm.rs":"4fbeb1994049348d1fc388dd1a29e481f8abb8fe1e28bfebf50f3bbce5fa5fbe","src/lexical/bhcomp.rs":"b7c68d74c0055eb67ec2c1bcf27bbc28bef8f1bbc43db8eb94ba69892230add6","src/lexical/bignum.rs":"4230cde10dc8eae456a713cf90ec4e48dff4b1d0c542621ce7f00f39ade2645e","src/lexical/cached.rs":"0e127398691f8042c19cde209e7f4b0161f0f3150342430145929f711e6fdac8","src/lexical/cached_float80.rs":"0f8f74a22cb7d871322a9893bffd0255ca10bf9dffd13afb2462dd3d7f51805f","src/lexical/digit.rs":"a265b9072194a62a67dfc4df3c86d4213097cf3f82280d025e0012a5a262fd9d","src/lexical/errors.rs":"6bc993febceb7dd96ac1c8c5c53b5f5a30297016c0f813ed8ff8d7938d01534d","src/lexical/exponent.rs":"387e945b97dc7ba48a7091c50d228a0dde3a1c4145703d4ab9c31191a91693b0","src/lexical/float.rs":"fe356213c92a049f4bef2f58bc0e3a26866ca06b8c1d74d0f961c5b883852cad","src/lexical/large_powers.rs":"34537f5c701afce1ec2a1fd3c14950381b2e27c9ad74f002c91f3708e8da9ca5","src/lexical/large_powers32.rs":"d533037c6141e6671102aee490c9cdeaba81e667ddca781b2b99db2c455e4a1a","src/lexical/large_powers64.rs":"745dd7c0cbe499eec027ef586248881011d9df20c7efab7929c1807b59886ba1","src/lexical/math.rs":"27e22b724cdf990cdacd0ccfc3749e6e2eb7529d43ebf6e95b1999560b9e199a","src/lexical/mod.rs":"4b4c5228779c0f135a4cb018700e3bcd495da48b74421a86f6b8b304acdef924","src/lexical/num.rs":"cf705c62612e31d704f43d94a633ea1243c6befad7ef5792e2e881a7fd21e809","src/lexical/parse.rs":"c2bfac4c70a19938ced61e991f4ec606764887cf12bac1a0978b5b5318a56aac","src/lexical/rounding.rs":"697207248ba17b7f4965aedb11d276261ada5b06d9c6265d8fd6246664ff6e3c","src/lexical/shift.rs":"bc1ed053dd63d45ac9c35302f18de9f00d94027f28af4ab749c9248439de832a","src/lexical/small_powers.rs":"4608dd218b8002435db7e1ec79d2d0fef5f47ae257b93353326d52ecc80cccda","src/lib.rs":"7d97b4be9e309d10f259bb877c3dfa435ca5479318ca1ea826ff220a3e41cd4b","src/macros.rs":"c9f23156faec8d5216d72b6a97eebd768efb3f75870a6e2beed824308587b998","src/map.rs":"14fa16650b462ef138bebe1d18cb296b0e1ff404f12f2c212f72ed7c969b3a12","src/number.rs":"50633e05cfa4be84b69eb0321cf176ee0a34f7120046c2f927ebdbb625c60048","src/raw.rs":"6d46836486b8d1c58f2aff563285400b1b0ec163eee34e7be78e0fa7a99aa0e8","src/read.rs":"49b4b1d067b6485cbded28fb961666ab5df82c36661af722dbae756efb6b2891","src/ser.rs":"d5b7d883c31ddbd1c46ab71a3247484ea711b1cf1e4cfd80ee568cccf709ba3d","src/value/de.rs":"78f938d960e285f671f3b86ed173d598a815690a14512d6daa94dee43d3ce4cf","src/value/from.rs":"eceab6531422c820a443981c2a71c9ba01e032fbbd38884d1ac6d03175d56244","src/value/index.rs":"8a99d8d50f5674181ced22f6e81dc529eaecb01e543e30346e51fe42cb4b8a5f","src/value/mod.rs":"81c62fcb50e92b2f424e361328df5b02621756781bf80b8e26fd3d13473b57cd","src/value/partial_eq.rs":"b64e1e1dffd50fefe3106ca74499aac01ea3d76b995d3cc1ef3a2b2768d0f1cf","src/value/ser.rs":"e06311be9ace2f3805ec7aacf9a1663ace5494ffae1b7dc16870f96b6d6561e1","tests/compiletest.rs":"022a8e400ef813d7ea1875b944549cee5125f6a995dc33e93b48cba3e1b57bd1","tests/debug.rs":"a8451217c1e127ad6e653ef11e0513525ee350e1e37dd575758a8ee9301b28fb","tests/lexical.rs":"b65eaef5edbc3294751c6ddc0a51eaf8de9165d65955f87c2e3b2679a49de3b0","tests/lexical/algorithm.rs":"da378df9ee24bfa033968d5c94e91b58e52c39bf6c825dec51c3eb7250cc5874","tests/lexical/exponent.rs":"26ea92abc654a6a88a8281552bca2f76ea1fa4c17d66a1dd6defe14f7d89b666","tests/lexical/float.rs":"0440f2d85c993bcccd925096d7f4136bf624ffd66b3c7ee565d158390685eb11","tests/lexical/math.rs":"4874be2103be5fbe8b8015354414df271ffa00fd815546fc077f15fb4d7a5a37","tests/lexical/num.rs":"6e650c40de85ed72ac06b6bf1487ba161f3824e26d827df6cfdf2bbdb8d05a05","tests/lexical/parse.rs":"17c73e0a59d462716d974b8dd23a291eb6efdc3a933248874e5eab7e7209d67f","tests/lexical/rounding.rs":"6c56e39ba534616c1b2146e8efa6eb57aed322e683bf23183cd32a61fae6447e","tests/macros/mod.rs":"93aa1d54af20bc2c55b6ae8db73c1414cda2626eb9fa7bd57b9d613a3c6e6a19","tests/map.rs":"dcc5212242e4e93703c4335d54f5603b0211b33d6fb5ab410bb630cda6d46b09","tests/regression.rs":"86731134bfb9bb693d9a4fc62393027de80a8bf031109ea6c7ea475b1ebdde8d","tests/regression/issue1004.rs":"38d7e3b6c515b881078ebd21ca8063d2ca105cd319695d29538f879e37f091b5","tests/regression/issue520.rs":"d146be3472db902b48127d65fe83aa9f698143aca9074c83cd1a9d5dd28e3ec3","tests/regression/issue795.rs":"582e2e7c68113f05a4b1d2cb556a2df7cc77f2ce8164a32c5cc58ae68abb60ec","tests/regression/issue845.rs":"8bd64588fc344e119d0e9e5e7604236e7c168c574b0692033f15278e216a6b9c","tests/regression/issue953.rs":"b2cddc761f5ca6639900c173765a8a5868528a896924e5e925db2696469208f7","tests/stream.rs":"c7d91014538ecd8f495b196d40e999ab2745f2e69fa2ff9e52521605dc6ce856","tests/test.rs":"0fb9f0470b9ee1a8dd29c4ff5ded04da84a4d477db75a5e96705950c50e0ade2","tests/ui/missing_colon.rs":"d07e0c34d98eb43465f0a0310f2c0b5d5b0d26d243b352a1c6bbe6ad3b27eda9","tests/ui/missing_colon.stderr":"3732fd8f4e57b84efc07170cda5f9c5b2b17c707e23c1659222b5a46f652a8d8","tests/ui/missing_comma.rs":"b8a9662f99c3e6dd2b6417892c37640578ce91d3a8365bf10c1f686a3227aa87","tests/ui/missing_comma.stderr":"eae626cf93c97abd105066e624ca4e8cb096784413b9d2564cf9414a8492bc4d","tests/ui/missing_value.rs":"bca25d67127fb88e7c191c7b03af5a4ce8a9abb630f3d2e6a6c1e77e213dc9a4","tests/ui/missing_value.stderr":"b0df8add5cf74e5df30eedd3ca347e4862c04a01c54d802ff45392f2032065b1","tests/ui/not_found.rs":"d0a7adb309879ff65aee115b52cc33d36f4bad353cf97c4effc34a6128c2bee3","tests/ui/not_found.stderr":"359b751c0c21fab6d460daef4d5f73a265f7769c9b578f98ea3cb6cbf2387643","tests/ui/parse_expr.rs":"32e6d51f528db3d1ab0ed1e24765b865be393565c26f77413c5aa39d601ac563","tests/ui/parse_expr.stderr":"4fcd0a014fbce31c9266bab8527d6e6b6806a0e21d9e0275ce713137856073ce","tests/ui/parse_key.rs":"18829b2af320d5cf8a0a5cd3aaf84c7e92cc874651c30e45a3acafb76c2d8b93","tests/ui/parse_key.stderr":"fcb44e060b804a4762b7291e128c41d7010ffa8ab820b8828fd13fbe6d405ca6","tests/ui/unexpected_after_array_element.rs":"a343fc3104431720bdfcf330bcc3cfcd98c8dec3e951133b495242478b0b7eb3","tests/ui/unexpected_after_array_element.stderr":"8df615998fa3057bb9ed865981a35cdbb771625337048f0ad3fba7734e607adf","tests/ui/unexpected_after_map_entry.rs":"6e3bd2def435ca610e346bbc75cdbaf61963eb2ef1885bb5f76781ba1fac37ef","tests/ui/unexpected_after_map_entry.stderr":"b1985c89075ab48b2158bd1705ed766d37854b3d4620ab257cc8bc319d224f17","tests/ui/unexpected_colon.rs":"a313cff3fed4be4c33f1eda5d0c5c98147fb835a56d36470d9f367352c1d61ef","tests/ui/unexpected_colon.stderr":"b2288742fa6a4a7eb65d2ae899bcfed8795b57bd04958da227d60928a8df26c5","tests/ui/unexpected_comma.rs":"55a8b684bde1ce905837cce719fd457d8898b61cebc27e5b420d05cb6be97256","tests/ui/unexpected_comma.stderr":"4c103ca63ff15e2ca659242cc0eae0612bf050e7580da62f1cf50de8082aa7dc"},"package":"057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1"} \ No newline at end of file +{"files":{"CONTRIBUTING.md":"f5270cafba66223a7b51ffc0d286075a17bb7cd88762fc80d333d3102629f4d8","Cargo.toml":"2005ca22cf05f763439fe53e9ff7b8e757b8a27cf1e5455500614fac313f07a8","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"e49066294ebbeca4e71e5cb8f2e43b5f7a41cd0b47eeb1bad7b24d3c136bd8c0","build.rs":"9fa36e0b1bb7026b449a20986ae2f60b3046bcbcf3bf22b5c0e372eb892eaf57","src/de.rs":"02b550b12e941f9cedfc36b5c127ec27dbc4b54d3de2fae75b7adcc08333551f","src/error.rs":"a9b5de0a82f95608b51b8e8875c7c49f94fb60b9f976fc6277aec0213926dec9","src/features_check/error.rs":"d7359f864afbfe105a38abea9f563dc423036ebc4c956a5695a4beef144dc7ec","src/features_check/mod.rs":"2209f8d5c46b50c8a3b8dc22338dcaf0135d192e8b05d2f456cbe6a73104e958","src/io/core.rs":"60ba28f67a9acaecf8964b611efba416b13f9f2bae4befc329fdf0e037293802","src/io/mod.rs":"fd1ed5080495cab21117f6f7d3c2c9e3687cad0c69a0cd087b08a145a9e672da","src/iter.rs":"f832c469cd7999d26ba9b76baa69b257a212a7edb3dfdf9b1d1bb35e8da85fa9","src/lexical/algorithm.rs":"bd6106e5d8875c9ff1c1d57256b459a4f0992d14a0df1a5fffcd3d3cbdccee8c","src/lexical/bhcomp.rs":"b7c68d74c0055eb67ec2c1bcf27bbc28bef8f1bbc43db8eb94ba69892230add6","src/lexical/bignum.rs":"db688e8112389998d0f91906f6857e28f9b510a8b4065ad476c8e8be2f77becf","src/lexical/cached.rs":"0e127398691f8042c19cde209e7f4b0161f0f3150342430145929f711e6fdac8","src/lexical/cached_float80.rs":"0f8f74a22cb7d871322a9893bffd0255ca10bf9dffd13afb2462dd3d7f51805f","src/lexical/digit.rs":"9502805adbc3da059131d1fac0a802e17065b36cd7472606b3af24e3241d5cb8","src/lexical/errors.rs":"3d9f6de6245533bcb101dfd718cfed61d59dc293f6768cedae28aa13ace164f5","src/lexical/exponent.rs":"387e945b97dc7ba48a7091c50d228a0dde3a1c4145703d4ab9c31191a91693b0","src/lexical/float.rs":"fe356213c92a049f4bef2f58bc0e3a26866ca06b8c1d74d0f961c5b883852cad","src/lexical/large_powers.rs":"34537f5c701afce1ec2a1fd3c14950381b2e27c9ad74f002c91f3708e8da9ca5","src/lexical/large_powers32.rs":"d533037c6141e6671102aee490c9cdeaba81e667ddca781b2b99db2c455e4a1a","src/lexical/large_powers64.rs":"745dd7c0cbe499eec027ef586248881011d9df20c7efab7929c1807b59886ba1","src/lexical/math.rs":"240804aa030849495fa03a83a0ee8539d5a5c8639b825f2d69d27b7567b06fb3","src/lexical/mod.rs":"4b4c5228779c0f135a4cb018700e3bcd495da48b74421a86f6b8b304acdef924","src/lexical/num.rs":"dd9f4357b8d5bead413844d4015176d484e907e1a393cf7f583de5c5ccafb2e3","src/lexical/parse.rs":"c2bfac4c70a19938ced61e991f4ec606764887cf12bac1a0978b5b5318a56aac","src/lexical/rounding.rs":"697207248ba17b7f4965aedb11d276261ada5b06d9c6265d8fd6246664ff6e3c","src/lexical/shift.rs":"bc1ed053dd63d45ac9c35302f18de9f00d94027f28af4ab749c9248439de832a","src/lexical/small_powers.rs":"4608dd218b8002435db7e1ec79d2d0fef5f47ae257b93353326d52ecc80cccda","src/lib.rs":"27ce6be2efdc996bbb254087cb569f1eccb61d3331156eb9e31fac1b5427ec90","src/macros.rs":"516f69976f433bcc5e48c32b3e29c2e0ab7b549810827d7a9c59171cdf11c1e2","src/map.rs":"aeb07107ba949330e72f4d669d46b215990f83aa4055e06d1da69f2a32ddf642","src/number.rs":"464e576953bc3d3df228d88890dd2fe4da123479f5756e03e06c7f7633959156","src/raw.rs":"4183bd3e8b7c97605ec5bab4e7fbdb2a09e00bdc53984fbe2066674510ea74ae","src/read.rs":"55f9ee8a12558ac461fc51488b151b9db30cb47d33afa98023989ace585b1101","src/ser.rs":"ee5b6cb04c2ff7058b52cb5d0383218f484505ce0896e5c7fd5076bd9d90952e","src/value/de.rs":"69d88d0ba8a8fea15964e0c95968e53ab308b143176537bfa6368432a682a65c","src/value/from.rs":"add9687e35db3962729d986c8aa54785cadeb07b8b7121cd4075e3a37ecc73a1","src/value/index.rs":"1a0d59629ae16d6553686de8d7152abd470153f67f9a1b69741e480ba491cd67","src/value/mod.rs":"c1b08509df5ae1ffb5c5e148ac29791923c92eb9957935683739a95bef0007e9","src/value/partial_eq.rs":"655fd0bf3ab1d6669444a55ab849f43bb333032de8ca8f1ee95e1068da43ee22","src/value/ser.rs":"6b065423acf88d875d30608d61210a372507c7242d6525e5ef66077296ba32f1","tests/compiletest.rs":"022a8e400ef813d7ea1875b944549cee5125f6a995dc33e93b48cba3e1b57bd1","tests/debug.rs":"a8451217c1e127ad6e653ef11e0513525ee350e1e37dd575758a8ee9301b28fb","tests/lexical.rs":"8ee6e617ef62a090de49ac2a930130a6913ab5316100781543c7788f89ef99c2","tests/lexical/algorithm.rs":"da378df9ee24bfa033968d5c94e91b58e52c39bf6c825dec51c3eb7250cc5874","tests/lexical/exponent.rs":"26ea92abc654a6a88a8281552bca2f76ea1fa4c17d66a1dd6defe14f7d89b666","tests/lexical/float.rs":"0440f2d85c993bcccd925096d7f4136bf624ffd66b3c7ee565d158390685eb11","tests/lexical/math.rs":"4874be2103be5fbe8b8015354414df271ffa00fd815546fc077f15fb4d7a5a37","tests/lexical/num.rs":"6e650c40de85ed72ac06b6bf1487ba161f3824e26d827df6cfdf2bbdb8d05a05","tests/lexical/parse.rs":"a11f09bb003a3a024548008cf78bf76526ed71b00077d1989f45eb8cebc93b9c","tests/lexical/rounding.rs":"6c56e39ba534616c1b2146e8efa6eb57aed322e683bf23183cd32a61fae6447e","tests/macros/mod.rs":"93aa1d54af20bc2c55b6ae8db73c1414cda2626eb9fa7bd57b9d613a3c6e6a19","tests/map.rs":"89f604c5788bcb8dc82c82e252dc0da47257986e353c09d14e4ef3e58c455f2d","tests/regression.rs":"86731134bfb9bb693d9a4fc62393027de80a8bf031109ea6c7ea475b1ebdde8d","tests/regression/issue1004.rs":"38d7e3b6c515b881078ebd21ca8063d2ca105cd319695d29538f879e37f091b5","tests/regression/issue520.rs":"d146be3472db902b48127d65fe83aa9f698143aca9074c83cd1a9d5dd28e3ec3","tests/regression/issue795.rs":"582e2e7c68113f05a4b1d2cb556a2df7cc77f2ce8164a32c5cc58ae68abb60ec","tests/regression/issue845.rs":"66eb0eeabb744adaad42fd8e2638de22b458a04ec33863e2683b60eb3d500297","tests/regression/issue953.rs":"b2cddc761f5ca6639900c173765a8a5868528a896924e5e925db2696469208f7","tests/stream.rs":"c7d91014538ecd8f495b196d40e999ab2745f2e69fa2ff9e52521605dc6ce856","tests/test.rs":"fc9175aeadd7dcf4fc756c41de21e9e23be24cf6d36149fa6ca401917ded0b54","tests/ui/missing_colon.rs":"d07e0c34d98eb43465f0a0310f2c0b5d5b0d26d243b352a1c6bbe6ad3b27eda9","tests/ui/missing_colon.stderr":"3732fd8f4e57b84efc07170cda5f9c5b2b17c707e23c1659222b5a46f652a8d8","tests/ui/missing_comma.rs":"b8a9662f99c3e6dd2b6417892c37640578ce91d3a8365bf10c1f686a3227aa87","tests/ui/missing_comma.stderr":"eae626cf93c97abd105066e624ca4e8cb096784413b9d2564cf9414a8492bc4d","tests/ui/missing_value.rs":"bca25d67127fb88e7c191c7b03af5a4ce8a9abb630f3d2e6a6c1e77e213dc9a4","tests/ui/missing_value.stderr":"b0df8add5cf74e5df30eedd3ca347e4862c04a01c54d802ff45392f2032065b1","tests/ui/not_found.rs":"d0a7adb309879ff65aee115b52cc33d36f4bad353cf97c4effc34a6128c2bee3","tests/ui/not_found.stderr":"359b751c0c21fab6d460daef4d5f73a265f7769c9b578f98ea3cb6cbf2387643","tests/ui/parse_expr.rs":"32e6d51f528db3d1ab0ed1e24765b865be393565c26f77413c5aa39d601ac563","tests/ui/parse_expr.stderr":"4fcd0a014fbce31c9266bab8527d6e6b6806a0e21d9e0275ce713137856073ce","tests/ui/parse_key.rs":"18829b2af320d5cf8a0a5cd3aaf84c7e92cc874651c30e45a3acafb76c2d8b93","tests/ui/parse_key.stderr":"20cf0d2898749f3c36780fc065f5049ee809e74cb6f0ef776f43f45e01596ee3","tests/ui/unexpected_after_array_element.rs":"a343fc3104431720bdfcf330bcc3cfcd98c8dec3e951133b495242478b0b7eb3","tests/ui/unexpected_after_array_element.stderr":"8df615998fa3057bb9ed865981a35cdbb771625337048f0ad3fba7734e607adf","tests/ui/unexpected_after_map_entry.rs":"6e3bd2def435ca610e346bbc75cdbaf61963eb2ef1885bb5f76781ba1fac37ef","tests/ui/unexpected_after_map_entry.stderr":"b1985c89075ab48b2158bd1705ed766d37854b3d4620ab257cc8bc319d224f17","tests/ui/unexpected_colon.rs":"a313cff3fed4be4c33f1eda5d0c5c98147fb835a56d36470d9f367352c1d61ef","tests/ui/unexpected_colon.stderr":"b2288742fa6a4a7eb65d2ae899bcfed8795b57bd04958da227d60928a8df26c5","tests/ui/unexpected_comma.rs":"55a8b684bde1ce905837cce719fd457d8898b61cebc27e5b420d05cb6be97256","tests/ui/unexpected_comma.stderr":"4c103ca63ff15e2ca659242cc0eae0612bf050e7580da62f1cf50de8082aa7dc"},"package":"c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0"} \ No newline at end of file diff --git a/vendor/serde_json/Cargo.toml b/vendor/serde_json/Cargo.toml index 272dd25..abf3cd9 100644 --- a/vendor/serde_json/Cargo.toml +++ b/vendor/serde_json/Cargo.toml @@ -10,10 +10,10 @@ # See Cargo.toml.orig for the original contents. [package] -edition = "2018" -rust-version = "1.36" +edition = "2021" +rust-version = "1.56" name = "serde_json" -version = "1.0.96" +version = "1.0.114" authors = [ "Erick Tryzelaar <erick.tryzelaar@gmail.com>", "David Tolnay <dtolnay@gmail.com>", @@ -36,12 +36,14 @@ repository = "https://github.com/serde-rs/json" [package.metadata.docs.rs] features = [ + "preserve_order", "raw_value", "unbounded_depth", ] rustdoc-args = [ "--cfg", "docsrs", + "--generate-link-to-definition", ] targets = ["x86_64-unknown-linux-gnu"] @@ -52,8 +54,7 @@ features = ["raw_value"] doc-scrape-examples = false [dependencies.indexmap] -version = "1.5.2" -features = ["std"] +version = "2.2.1" optional = true [dependencies.itoa] @@ -63,36 +64,36 @@ version = "1.0" version = "1.0" [dependencies.serde] -version = "1.0.100" +version = "1.0.194" default-features = false [dev-dependencies.automod] -version = "1.0" +version = "1.0.11" [dev-dependencies.indoc] -version = "2.0" +version = "2.0.2" [dev-dependencies.ref-cast] -version = "1.0" +version = "1.0.18" [dev-dependencies.rustversion] -version = "1.0" +version = "1.0.13" [dev-dependencies.serde] -version = "1.0.100" +version = "1.0.194" features = ["derive"] [dev-dependencies.serde_bytes] -version = "0.11" +version = "0.11.10" [dev-dependencies.serde_derive] -version = "1.0" +version = "1.0.166" [dev-dependencies.serde_stacker] -version = "0.1" +version = "0.1.8" [dev-dependencies.trybuild] -version = "1.0.49" +version = "1.0.81" features = ["diff"] [features] diff --git a/vendor/serde_json/README.md b/vendor/serde_json/README.md index d704979..a3ba288 100644 --- a/vendor/serde_json/README.md +++ b/vendor/serde_json/README.md @@ -76,7 +76,7 @@ enum Value { A string of JSON data can be parsed into a `serde_json::Value` by the [`serde_json::from_str`][from_str] function. There is also -[`from_slice`][from_slice] for parsing from a byte slice &[u8] and +[`from_slice`][from_slice] for parsing from a byte slice &\[u8\] and [`from_reader`][from_reader] for parsing from any `io::Read` like a File or a TCP stream. diff --git a/vendor/serde_json/build.rs b/vendor/serde_json/build.rs index 0e12602..dd09e62 100644 --- a/vendor/serde_json/build.rs +++ b/vendor/serde_json/build.rs @@ -1,6 +1,4 @@ use std::env; -use std::process::Command; -use std::str::{self, FromStr}; fn main() { println!("cargo:rerun-if-changed=build.rs"); @@ -9,46 +7,11 @@ fn main() { // src/lexical/math.rs for where this has an effect. let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); match target_arch.as_str() { - "aarch64" | "mips64" | "powerpc64" | "x86_64" => { + "aarch64" | "mips64" | "powerpc64" | "x86_64" | "loongarch64" => { println!("cargo:rustc-cfg=limb_width_64"); } _ => { println!("cargo:rustc-cfg=limb_width_32"); } } - - let minor = match rustc_minor_version() { - Some(minor) => minor, - None => return, - }; - - // BTreeMap::get_key_value - // https://blog.rust-lang.org/2019/12/19/Rust-1.40.0.html#additions-to-the-standard-library - if minor < 40 { - println!("cargo:rustc-cfg=no_btreemap_get_key_value"); - } - - // BTreeMap::remove_entry - // https://blog.rust-lang.org/2020/07/16/Rust-1.45.0.html#library-changes - if minor < 45 { - println!("cargo:rustc-cfg=no_btreemap_remove_entry"); - } - - // BTreeMap::retain - // https://blog.rust-lang.org/2021/06/17/Rust-1.53.0.html#stabilized-apis - if minor < 53 { - println!("cargo:rustc-cfg=no_btreemap_retain"); - } -} - -fn rustc_minor_version() -> Option<u32> { - let rustc = env::var_os("RUSTC")?; - let output = Command::new(rustc).arg("--version").output().ok()?; - let version = str::from_utf8(&output.stdout).ok()?; - let mut pieces = version.split('.'); - if pieces.next() != Some("rustc 1") { - return None; - } - let next = pieces.next()?; - u32::from_str(next).ok() } diff --git a/vendor/serde_json/src/de.rs b/vendor/serde_json/src/de.rs index 88d0f26..7154f84 100644 --- a/vendor/serde_json/src/de.rs +++ b/vendor/serde_json/src/de.rs @@ -22,6 +22,7 @@ use crate::number::NumberDeserializer; pub use crate::read::{Read, SliceRead, StrRead}; #[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] pub use crate::read::IoRead; ////////////////////////////////////////////////////////////////////////////// @@ -209,7 +210,7 @@ impl<'de, R: Read<'de>> Deserializer<R> { self.disable_recursion_limit = true; } - fn peek(&mut self) -> Result<Option<u8>> { + pub(crate) fn peek(&mut self) -> Result<Option<u8>> { self.read.peek() } @@ -248,7 +249,7 @@ impl<'de, R: Read<'de>> Deserializer<R> { fn parse_whitespace(&mut self) -> Result<Option<u8>> { loop { match tri!(self.peek()) { - Some(b' ') | Some(b'\n') | Some(b'\t') | Some(b'\r') => { + Some(b' ' | b'\n' | b'\t' | b'\r') => { self.eat_char(); } other => { @@ -309,9 +310,9 @@ impl<'de, R: Read<'de>> Deserializer<R> { self.fix_position(err) } - fn deserialize_number<V>(&mut self, visitor: V) -> Result<V::Value> + pub(crate) fn deserialize_number<'any, V>(&mut self, visitor: V) -> Result<V::Value> where - V: de::Visitor<'de>, + V: de::Visitor<'any>, { let peek = match tri!(self.parse_whitespace()) { Some(b) => b, @@ -335,6 +336,79 @@ impl<'de, R: Read<'de>> Deserializer<R> { } } + #[cfg(feature = "float_roundtrip")] + pub(crate) fn do_deserialize_f32<'any, V>(&mut self, visitor: V) -> Result<V::Value> + where + V: de::Visitor<'any>, + { + self.single_precision = true; + let val = self.deserialize_number(visitor); + self.single_precision = false; + val + } + + pub(crate) fn do_deserialize_i128<'any, V>(&mut self, visitor: V) -> Result<V::Value> + where + V: de::Visitor<'any>, + { + let mut buf = String::new(); + + match tri!(self.parse_whitespace()) { + Some(b'-') => { + self.eat_char(); + buf.push('-'); + } + Some(_) => {} + None => { + return Err(self.peek_error(ErrorCode::EofWhileParsingValue)); + } + }; + + tri!(self.scan_integer128(&mut buf)); + + let value = match buf.parse() { + Ok(int) => visitor.visit_i128(int), + Err(_) => { + return Err(self.error(ErrorCode::NumberOutOfRange)); + } + }; + + match value { + Ok(value) => Ok(value), + Err(err) => Err(self.fix_position(err)), + } + } + + pub(crate) fn do_deserialize_u128<'any, V>(&mut self, visitor: V) -> Result<V::Value> + where + V: de::Visitor<'any>, + { + match tri!(self.parse_whitespace()) { + Some(b'-') => { + return Err(self.peek_error(ErrorCode::NumberOutOfRange)); + } + Some(_) => {} + None => { + return Err(self.peek_error(ErrorCode::EofWhileParsingValue)); + } + } + + let mut buf = String::new(); + tri!(self.scan_integer128(&mut buf)); + + let value = match buf.parse() { + Ok(int) => visitor.visit_u128(int), + Err(_) => { + return Err(self.error(ErrorCode::NumberOutOfRange)); + } + }; + + match value { + Ok(value) => Ok(value), + Err(err) => Err(self.fix_position(err)), + } + } + fn scan_integer128(&mut self, buf: &mut String) -> Result<()> { match tri!(self.next_char_or_null()) { b'0' => { @@ -860,7 +934,7 @@ impl<'de, R: Read<'de>> Deserializer<R> { if !positive { buf.push('-'); } - self.scan_integer(&mut buf)?; + tri!(self.scan_integer(&mut buf)); if positive { if let Ok(unsigned) = buf.parse() { return Ok(ParserNumber::U64(unsigned)); @@ -913,7 +987,7 @@ impl<'de, R: Read<'de>> Deserializer<R> { fn scan_number(&mut self, buf: &mut String) -> Result<()> { match tri!(self.peek_or_null()) { b'.' => self.scan_decimal(buf), - e @ b'e' | e @ b'E' => self.scan_exponent(e as char, buf), + e @ (b'e' | b'E') => self.scan_exponent(e as char, buf), _ => Ok(()), } } @@ -938,7 +1012,7 @@ impl<'de, R: Read<'de>> Deserializer<R> { } match tri!(self.peek_or_null()) { - e @ b'e' | e @ b'E' => self.scan_exponent(e as char, buf), + e @ (b'e' | b'E') => self.scan_exponent(e as char, buf), _ => Ok(()), } } @@ -1059,7 +1133,7 @@ impl<'de, R: Read<'de>> Deserializer<R> { tri!(self.read.ignore_str()); None } - frame @ b'[' | frame @ b'{' => { + frame @ (b'[' | b'{') => { self.scratch.extend(enclosing.take()); self.eat_char(); Some(frame) @@ -1204,9 +1278,9 @@ impl<'de, R: Read<'de>> Deserializer<R> { where V: de::Visitor<'de>, { - self.parse_whitespace()?; + tri!(self.parse_whitespace()); self.read.begin_raw_buffering(); - self.ignore_value()?; + tri!(self.ignore_value()); self.read.end_raw_buffering(visitor) } } @@ -1258,11 +1332,15 @@ static POW10: [f64; 309] = [ macro_rules! deserialize_number { ($method:ident) => { + deserialize_number!($method, deserialize_number); + }; + + ($method:ident, $using:ident) => { fn $method<V>(self, visitor: V) -> Result<V::Value> where V: de::Visitor<'de>, { - self.deserialize_number(visitor) + self.$using(visitor) } }; } @@ -1424,77 +1502,9 @@ impl<'de, 'a, R: Read<'de>> de::Deserializer<'de> for &'a mut Deserializer<R> { deserialize_number!(deserialize_f64); #[cfg(feature = "float_roundtrip")] - fn deserialize_f32<V>(self, visitor: V) -> Result<V::Value> - where - V: de::Visitor<'de>, - { - self.single_precision = true; - let val = self.deserialize_number(visitor); - self.single_precision = false; - val - } - - fn deserialize_i128<V>(self, visitor: V) -> Result<V::Value> - where - V: de::Visitor<'de>, - { - let mut buf = String::new(); - - match tri!(self.parse_whitespace()) { - Some(b'-') => { - self.eat_char(); - buf.push('-'); - } - Some(_) => {} - None => { - return Err(self.peek_error(ErrorCode::EofWhileParsingValue)); - } - }; - - tri!(self.scan_integer128(&mut buf)); - - let value = match buf.parse() { - Ok(int) => visitor.visit_i128(int), - Err(_) => { - return Err(self.error(ErrorCode::NumberOutOfRange)); - } - }; - - match value { - Ok(value) => Ok(value), - Err(err) => Err(self.fix_position(err)), - } - } - - fn deserialize_u128<V>(self, visitor: V) -> Result<V::Value> - where - V: de::Visitor<'de>, - { - match tri!(self.parse_whitespace()) { - Some(b'-') => { - return Err(self.peek_error(ErrorCode::NumberOutOfRange)); - } - Some(_) => {} - None => { - return Err(self.peek_error(ErrorCode::EofWhileParsingValue)); - } - } - - let mut buf = String::new(); - tri!(self.scan_integer128(&mut buf)); - - let value = match buf.parse() { - Ok(int) => visitor.visit_u128(int), - Err(_) => { - return Err(self.error(ErrorCode::NumberOutOfRange)); - } - }; - - match value { - Ok(value) => Ok(value), - Err(err) => Err(self.fix_position(err)), - } - } + deserialize_number!(deserialize_f32, do_deserialize_f32); + deserialize_number!(deserialize_i128, do_deserialize_i128); + deserialize_number!(deserialize_u128, do_deserialize_u128); fn deserialize_char<V>(self, visitor: V) -> Result<V::Value> where @@ -2118,24 +2128,47 @@ struct MapKey<'a, R: 'a> { de: &'a mut Deserializer<R>, } -macro_rules! deserialize_integer_key { - ($method:ident => $visit:ident) => { +macro_rules! deserialize_numeric_key { + ($method:ident) => { + fn $method<V>(self, visitor: V) -> Result<V::Value> + where + V: de::Visitor<'de>, + { + self.deserialize_number(visitor) + } + }; + + ($method:ident, $delegate:ident) => { fn $method<V>(self, visitor: V) -> Result<V::Value> where V: de::Visitor<'de>, { self.de.eat_char(); - self.de.scratch.clear(); - let string = tri!(self.de.read.parse_str(&mut self.de.scratch)); - match (string.parse(), string) { - (Ok(integer), _) => visitor.$visit(integer), - (Err(_), Reference::Borrowed(s)) => visitor.visit_borrowed_str(s), - (Err(_), Reference::Copied(s)) => visitor.visit_str(s), + + match tri!(self.de.peek()) { + Some(b'0'..=b'9' | b'-') => {} + _ => return Err(self.de.error(ErrorCode::ExpectedNumericKey)), + } + + let value = tri!(self.de.$delegate(visitor)); + + match tri!(self.de.peek()) { + Some(b'"') => self.de.eat_char(), + _ => return Err(self.de.peek_error(ErrorCode::ExpectedDoubleQuote)), } + + Ok(value) } }; } +impl<'de, 'a, R> MapKey<'a, R> +where + R: Read<'de>, +{ + deserialize_numeric_key!(deserialize_number, deserialize_number); +} + impl<'de, 'a, R> de::Deserializer<'de> for MapKey<'a, R> where R: Read<'de>, @@ -2155,16 +2188,56 @@ where } } - deserialize_integer_key!(deserialize_i8 => visit_i8); - deserialize_integer_key!(deserialize_i16 => visit_i16); - deserialize_integer_key!(deserialize_i32 => visit_i32); - deserialize_integer_key!(deserialize_i64 => visit_i64); - deserialize_integer_key!(deserialize_i128 => visit_i128); - deserialize_integer_key!(deserialize_u8 => visit_u8); - deserialize_integer_key!(deserialize_u16 => visit_u16); - deserialize_integer_key!(deserialize_u32 => visit_u32); - deserialize_integer_key!(deserialize_u64 => visit_u64); - deserialize_integer_key!(deserialize_u128 => visit_u128); + deserialize_numeric_key!(deserialize_i8); + deserialize_numeric_key!(deserialize_i16); + deserialize_numeric_key!(deserialize_i32); + deserialize_numeric_key!(deserialize_i64); + deserialize_numeric_key!(deserialize_i128, deserialize_i128); + deserialize_numeric_key!(deserialize_u8); + deserialize_numeric_key!(deserialize_u16); + deserialize_numeric_key!(deserialize_u32); + deserialize_numeric_key!(deserialize_u64); + deserialize_numeric_key!(deserialize_u128, deserialize_u128); + #[cfg(not(feature = "float_roundtrip"))] + deserialize_numeric_key!(deserialize_f32); + #[cfg(feature = "float_roundtrip")] + deserialize_numeric_key!(deserialize_f32, deserialize_f32); + deserialize_numeric_key!(deserialize_f64); + + fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value> + where + V: de::Visitor<'de>, + { + self.de.eat_char(); + + let peek = match tri!(self.de.next_char()) { + Some(b) => b, + None => { + return Err(self.de.peek_error(ErrorCode::EofWhileParsingValue)); + } + }; + + let value = match peek { + b't' => { + tri!(self.de.parse_ident(b"rue\"")); + visitor.visit_bool(true) + } + b'f' => { + tri!(self.de.parse_ident(b"alse\"")); + visitor.visit_bool(false) + } + _ => { + self.de.scratch.clear(); + let s = tri!(self.de.read.parse_str(&mut self.de.scratch)); + Err(de::Error::invalid_type(Unexpected::Str(&s), &visitor)) + } + }; + + match value { + Ok(value) => Ok(value), + Err(err) => Err(self.de.fix_position(err)), + } + } #[inline] fn deserialize_option<V>(self, visitor: V) -> Result<V::Value> @@ -2221,8 +2294,8 @@ where } forward_to_deserialize_any! { - bool f32 f64 char str string unit unit_struct seq tuple tuple_struct map - struct identifier ignored_any + char str string unit unit_struct seq tuple tuple_struct map struct + identifier ignored_any } } @@ -2318,8 +2391,8 @@ where fn peek_end_of_value(&mut self) -> Result<()> { match tri!(self.de.peek()) { - Some(b' ') | Some(b'\n') | Some(b'\t') | Some(b'\r') | Some(b'"') | Some(b'[') - | Some(b']') | Some(b'{') | Some(b'}') | Some(b',') | Some(b':') | None => Ok(()), + Some(b' ' | b'\n' | b'\t' | b'\r' | b'"' | b'[' | b']' | b'{' | b'}' | b',' | b':') + | None => Ok(()), Some(_) => { let position = self.de.read.peek_position(); Err(Error::syntax( @@ -2369,7 +2442,7 @@ where if self_delineated_value { Ok(value) } else { - self.peek_end_of_value().map(|_| value) + self.peek_end_of_value().map(|()| value) } } Err(e) => { @@ -2408,9 +2481,9 @@ where Ok(value) } -/// Deserialize an instance of type `T` from an IO stream of JSON. +/// Deserialize an instance of type `T` from an I/O stream of JSON. /// -/// The content of the IO stream is deserialized directly from the stream +/// The content of the I/O stream is deserialized directly from the stream /// without being buffered in memory by serde_json. /// /// When reading from a source against which short reads are not efficient, such diff --git a/vendor/serde_json/src/error.rs b/vendor/serde_json/src/error.rs index 0898baf..fbf9eb1 100644 --- a/vendor/serde_json/src/error.rs +++ b/vendor/serde_json/src/error.rs @@ -9,6 +9,8 @@ use core::str::FromStr; use serde::{de, ser}; #[cfg(feature = "std")] use std::error; +#[cfg(feature = "std")] +use std::io::ErrorKind; /// This type represents all possible errors that can occur when serializing or /// deserializing JSON data. @@ -36,15 +38,16 @@ impl Error { /// The first character in the input and any characters immediately /// following a newline character are in column 1. /// - /// Note that errors may occur in column 0, for example if a read from an IO - /// stream fails immediately following a previously read newline character. + /// Note that errors may occur in column 0, for example if a read from an + /// I/O stream fails immediately following a previously read newline + /// character. pub fn column(&self) -> usize { self.err.column } /// Categorizes the cause of this error. /// - /// - `Category::Io` - failure to read or write bytes on an IO stream + /// - `Category::Io` - failure to read or write bytes on an I/O stream /// - `Category::Syntax` - input that is not syntactically valid JSON /// - `Category::Data` - input data that is semantically incorrect /// - `Category::Eof` - unexpected end of the input data @@ -61,12 +64,15 @@ impl Error { | ErrorCode::ExpectedObjectCommaOrEnd | ErrorCode::ExpectedSomeIdent | ErrorCode::ExpectedSomeValue + | ErrorCode::ExpectedDoubleQuote | ErrorCode::InvalidEscape | ErrorCode::InvalidNumber | ErrorCode::NumberOutOfRange | ErrorCode::InvalidUnicodeCodePoint | ErrorCode::ControlCharacterWhileParsingString | ErrorCode::KeyMustBeAString + | ErrorCode::ExpectedNumericKey + | ErrorCode::FloatKeyMustBeFinite | ErrorCode::LoneLeadingSurrogateInHexEscape | ErrorCode::TrailingComma | ErrorCode::TrailingCharacters @@ -76,7 +82,7 @@ impl Error { } /// Returns true if this error was caused by a failure to read or write - /// bytes on an IO stream. + /// bytes on an I/O stream. pub fn is_io(&self) -> bool { self.classify() == Category::Io } @@ -104,12 +110,61 @@ impl Error { pub fn is_eof(&self) -> bool { self.classify() == Category::Eof } + + /// The kind reported by the underlying standard library I/O error, if this + /// error was caused by a failure to read or write bytes on an I/O stream. + /// + /// # Example + /// + /// ``` + /// use serde_json::Value; + /// use std::io::{self, ErrorKind, Read}; + /// use std::process; + /// + /// struct ReaderThatWillTimeOut<'a>(&'a [u8]); + /// + /// impl<'a> Read for ReaderThatWillTimeOut<'a> { + /// fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + /// if self.0.is_empty() { + /// Err(io::Error::new(ErrorKind::TimedOut, "timed out")) + /// } else { + /// self.0.read(buf) + /// } + /// } + /// } + /// + /// fn main() { + /// let reader = ReaderThatWillTimeOut(br#" {"k": "#); + /// + /// let _: Value = match serde_json::from_reader(reader) { + /// Ok(value) => value, + /// Err(error) => { + /// if error.io_error_kind() == Some(ErrorKind::TimedOut) { + /// // Maybe this application needs to retry certain kinds of errors. + /// + /// # return; + /// } else { + /// eprintln!("error: {}", error); + /// process::exit(1); + /// } + /// } + /// }; + /// } + /// ``` + #[cfg(feature = "std")] + pub fn io_error_kind(&self) -> Option<ErrorKind> { + if let ErrorCode::Io(io_error) = &self.err.code { + Some(io_error.kind()) + } else { + None + } + } } /// Categorizes the cause of a `serde_json::Error`. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum Category { - /// The error was caused by a failure to read or write bytes on an IO + /// The error was caused by a failure to read or write bytes on an I/O /// stream. Io, @@ -134,8 +189,8 @@ pub enum Category { impl From<Error> for io::Error { /// Convert a `serde_json::Error` into an `io::Error`. /// - /// JSON syntax and data errors are turned into `InvalidData` IO errors. - /// EOF errors are turned into `UnexpectedEof` IO errors. + /// JSON syntax and data errors are turned into `InvalidData` I/O errors. + /// EOF errors are turned into `UnexpectedEof` I/O errors. /// /// ``` /// use std::io; @@ -165,8 +220,8 @@ impl From<Error> for io::Error { } else { match j.classify() { Category::Io => unreachable!(), - Category::Syntax | Category::Data => io::Error::new(io::ErrorKind::InvalidData, j), - Category::Eof => io::Error::new(io::ErrorKind::UnexpectedEof, j), + Category::Syntax | Category::Data => io::Error::new(ErrorKind::InvalidData, j), + Category::Eof => io::Error::new(ErrorKind::UnexpectedEof, j), } } } @@ -182,7 +237,7 @@ pub(crate) enum ErrorCode { /// Catchall for syntax error messages Message(Box<str>), - /// Some IO error occurred while serializing or deserializing. + /// Some I/O error occurred while serializing or deserializing. Io(io::Error), /// EOF while parsing a list. @@ -212,6 +267,9 @@ pub(crate) enum ErrorCode { /// Expected this character to start a JSON value. ExpectedSomeValue, + /// Expected this character to be a `"`. + ExpectedDoubleQuote, + /// Invalid hex escape code. InvalidEscape, @@ -230,6 +288,12 @@ pub(crate) enum ErrorCode { /// Object key is not a string. KeyMustBeAString, + /// Contents of key were supposed to be a number. + ExpectedNumericKey, + + /// Object key is a non-finite float value. + FloatKeyMustBeFinite, + /// Lone leading surrogate in hex escape. LoneLeadingSurrogateInHexEscape, @@ -296,6 +360,7 @@ impl Display for ErrorCode { ErrorCode::ExpectedObjectCommaOrEnd => f.write_str("expected `,` or `}`"), ErrorCode::ExpectedSomeIdent => f.write_str("expected ident"), ErrorCode::ExpectedSomeValue => f.write_str("expected value"), + ErrorCode::ExpectedDoubleQuote => f.write_str("expected `\"`"), ErrorCode::InvalidEscape => f.write_str("invalid escape"), ErrorCode::InvalidNumber => f.write_str("invalid number"), ErrorCode::NumberOutOfRange => f.write_str("number out of range"), @@ -304,6 +369,12 @@ impl Display for ErrorCode { f.write_str("control character (\\u0000-\\u001F) found while parsing a string") } ErrorCode::KeyMustBeAString => f.write_str("key must be a string"), + ErrorCode::ExpectedNumericKey => { + f.write_str("invalid value: expected key to be a number in quotes") + } + ErrorCode::FloatKeyMustBeFinite => { + f.write_str("float key must be finite (got NaN or +/-inf)") + } ErrorCode::LoneLeadingSurrogateInHexEscape => { f.write_str("lone leading surrogate in hex escape") } @@ -367,11 +438,20 @@ impl de::Error for Error { #[cold] fn invalid_type(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self { - if let de::Unexpected::Unit = unexp { - Error::custom(format_args!("invalid type: null, expected {}", exp)) - } else { - Error::custom(format_args!("invalid type: {}, expected {}", unexp, exp)) - } + Error::custom(format_args!( + "invalid type: {}, expected {}", + JsonUnexpected(unexp), + exp, + )) + } + + #[cold] + fn invalid_value(unexp: de::Unexpected, exp: &dyn de::Expected) -> Self { + Error::custom(format_args!( + "invalid value: {}, expected {}", + JsonUnexpected(unexp), + exp, + )) } } @@ -382,6 +462,22 @@ impl ser::Error for Error { } } +struct JsonUnexpected<'a>(de::Unexpected<'a>); + +impl<'a> Display for JsonUnexpected<'a> { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + match self.0 { + de::Unexpected::Unit => formatter.write_str("null"), + de::Unexpected::Float(value) => write!( + formatter, + "floating point `{}`", + ryu::Buffer::new().format(value), + ), + unexp => Display::fmt(&unexp, formatter), + } + } +} + // Parse our own error message that looks like "{} at line {} column {}" to work // around erased-serde round-tripping the error through de::Error::custom. fn make_error(mut msg: String) -> Error { diff --git a/vendor/serde_json/src/io/core.rs b/vendor/serde_json/src/io/core.rs index 465ab8b..54c8ddf 100644 --- a/vendor/serde_json/src/io/core.rs +++ b/vendor/serde_json/src/io/core.rs @@ -9,7 +9,7 @@ pub enum ErrorKind { Other, } -// IO errors can never occur in no-std mode. All our no-std IO implementations +// I/O errors can never occur in no-std mode. All our no-std I/O implementations // are infallible. pub struct Error; diff --git a/vendor/serde_json/src/lexical/algorithm.rs b/vendor/serde_json/src/lexical/algorithm.rs index a2cbf18..eaa5e7e 100644 --- a/vendor/serde_json/src/lexical/algorithm.rs +++ b/vendor/serde_json/src/lexical/algorithm.rs @@ -51,7 +51,10 @@ where // Compute the product of the power, if it overflows, // prematurely return early, otherwise, if we didn't overshoot, // we can get an exact value. - let value = mantissa.checked_mul(power)?; + let value = match mantissa.checked_mul(power) { + None => return None, + Some(value) => value, + }; if value >> mantissa_size != 0 { None } else { diff --git a/vendor/serde_json/src/lexical/bignum.rs b/vendor/serde_json/src/lexical/bignum.rs index f9551f5..4fa7eed 100644 --- a/vendor/serde_json/src/lexical/bignum.rs +++ b/vendor/serde_json/src/lexical/bignum.rs @@ -3,6 +3,7 @@ //! Big integer type definition. use super::math::*; +#[allow(unused_imports)] use alloc::vec::Vec; /// Storage for a big integer type. diff --git a/vendor/serde_json/src/lexical/digit.rs b/vendor/serde_json/src/lexical/digit.rs index 882aa9e..3d150a1 100644 --- a/vendor/serde_json/src/lexical/digit.rs +++ b/vendor/serde_json/src/lexical/digit.rs @@ -11,5 +11,8 @@ pub(crate) fn to_digit(c: u8) -> Option<u32> { // Add digit to mantissa. #[inline] pub(crate) fn add_digit(value: u64, digit: u32) -> Option<u64> { - value.checked_mul(10)?.checked_add(digit as u64) + match value.checked_mul(10) { + None => None, + Some(n) => n.checked_add(digit as u64), + } } diff --git a/vendor/serde_json/src/lexical/errors.rs b/vendor/serde_json/src/lexical/errors.rs index cad4bd3..f4f41cd 100644 --- a/vendor/serde_json/src/lexical/errors.rs +++ b/vendor/serde_json/src/lexical/errors.rs @@ -5,8 +5,7 @@ //! This estimates the error in a floating-point representation. //! //! This implementation is loosely based off the Golang implementation, -//! found here: -//! https://golang.org/src/strconv/atof.go +//! found here: <https://golang.org/src/strconv/atof.go> use super::float::*; use super::num::*; diff --git a/vendor/serde_json/src/lexical/math.rs b/vendor/serde_json/src/lexical/math.rs index 37cc1d2..d7122bf 100644 --- a/vendor/serde_json/src/lexical/math.rs +++ b/vendor/serde_json/src/lexical/math.rs @@ -336,7 +336,7 @@ mod small { pub fn imul(x: &mut Vec<Limb>, y: Limb) { // Multiply iteratively over all elements, adding the carry each time. let mut carry: Limb = 0; - for xi in x.iter_mut() { + for xi in &mut *x { carry = scalar::imul(xi, y, carry); } @@ -482,7 +482,7 @@ mod small { let rshift = bits - n; let lshift = n; let mut prev: Limb = 0; - for xi in x.iter_mut() { + for xi in &mut *x { let tmp = *xi; *xi <<= lshift; *xi |= prev >> rshift; diff --git a/vendor/serde_json/src/lexical/num.rs b/vendor/serde_json/src/lexical/num.rs index e47e003..75eee01 100644 --- a/vendor/serde_json/src/lexical/num.rs +++ b/vendor/serde_json/src/lexical/num.rs @@ -223,7 +223,7 @@ pub trait Float: Number { const NEGATIVE_INFINITY_BITS: Self::Unsigned; /// Size of the significand (mantissa) without hidden bit. const MANTISSA_SIZE: i32; - /// Bias of the exponet + /// Bias of the exponent const EXPONENT_BIAS: i32; /// Exponent portion of a denormal float. const DENORMAL_EXPONENT: i32; @@ -248,7 +248,6 @@ pub trait Float: Number { fn from_bits(u: Self::Unsigned) -> Self; fn to_bits(self) -> Self::Unsigned; fn is_sign_positive(self) -> bool; - fn is_sign_negative(self) -> bool; /// Returns true if the float is a denormal. #[inline] @@ -368,11 +367,6 @@ impl Float for f32 { fn is_sign_positive(self) -> bool { f32::is_sign_positive(self) } - - #[inline] - fn is_sign_negative(self) -> bool { - f32::is_sign_negative(self) - } } impl Float for f64 { @@ -432,9 +426,4 @@ impl Float for f64 { fn is_sign_positive(self) -> bool { f64::is_sign_positive(self) } - - #[inline] - fn is_sign_negative(self) -> bool { - f64::is_sign_negative(self) - } } diff --git a/vendor/serde_json/src/lib.rs b/vendor/serde_json/src/lib.rs index 637d1ce..f10bedb 100644 --- a/vendor/serde_json/src/lib.rs +++ b/vendor/serde_json/src/lib.rs @@ -55,10 +55,9 @@ //! ``` //! //! A string of JSON data can be parsed into a `serde_json::Value` by the -//! [`serde_json::from_str`][from_str] function. There is also -//! [`from_slice`][from_slice] for parsing from a byte slice &[u8] and -//! [`from_reader`][from_reader] for parsing from any `io::Read` like a File or -//! a TCP stream. +//! [`serde_json::from_str`][from_str] function. There is also [`from_slice`] +//! for parsing from a byte slice &\[u8\] and [`from_reader`] for parsing from +//! any `io::Read` like a File or a TCP stream. //! //! ``` //! use serde_json::{Result, Value}; @@ -300,7 +299,7 @@ //! [macro]: crate::json //! [`serde-json-core`]: https://github.com/rust-embedded-community/serde-json-core -#![doc(html_root_url = "https://docs.rs/serde_json/1.0.96")] +#![doc(html_root_url = "https://docs.rs/serde_json/1.0.114")] // Ignored clippy lints #![allow( clippy::collapsible_else_if, @@ -315,18 +314,13 @@ clippy::match_single_binding, clippy::needless_doctest_main, clippy::needless_late_init, - // clippy bug: https://github.com/rust-lang/rust-clippy/issues/8366 - clippy::ptr_arg, clippy::return_self_not_must_use, clippy::transmute_ptr_to_ptr, - clippy::unnecessary_wraps, - // clippy bug: https://github.com/rust-lang/rust-clippy/issues/5704 - clippy::unnested_or_patterns, + clippy::unconditional_recursion, // https://github.com/rust-lang/rust-clippy/issues/12133 + clippy::unnecessary_wraps )] // Ignored clippy_pedantic lints #![allow( - // buggy - clippy::iter_not_returning_iterator, // https://github.com/rust-lang/rust-clippy/issues/8285 // Deserializer::from_str, into_iter clippy::should_implement_trait, // integer and float ser/de requires these sorts of casts @@ -362,14 +356,20 @@ clippy::missing_errors_doc, clippy::must_use_candidate, )] +// Restrictions +#![deny(clippy::question_mark_used)] #![allow(non_upper_case_globals)] #![deny(missing_docs)] -#![cfg_attr(not(feature = "std"), no_std)] +#![no_std] #![cfg_attr(docsrs, feature(doc_cfg))] extern crate alloc; #[cfg(feature = "std")] +extern crate std; + +#[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] #[doc(inline)] pub use crate::de::from_reader; #[doc(inline)] @@ -379,6 +379,7 @@ pub use crate::error::{Error, Result}; #[doc(inline)] pub use crate::ser::{to_string, to_string_pretty, to_vec, to_vec_pretty}; #[cfg(feature = "std")] +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] #[doc(inline)] pub use crate::ser::{to_writer, to_writer_pretty, Serializer}; #[doc(inline)] diff --git a/vendor/serde_json/src/macros.rs b/vendor/serde_json/src/macros.rs index 5287998..e8c6cd2 100644 --- a/vendor/serde_json/src/macros.rs +++ b/vendor/serde_json/src/macros.rs @@ -10,7 +10,8 @@ /// "features": [ /// "serde", /// "json" -/// ] +/// ], +/// "homepage": null /// } /// }); /// ``` diff --git a/vendor/serde_json/src/map.rs b/vendor/serde_json/src/map.rs index 3e8a381..520cd6c 100644 --- a/vendor/serde_json/src/map.rs +++ b/vendor/serde_json/src/map.rs @@ -11,7 +11,7 @@ use alloc::string::String; use core::borrow::Borrow; use core::fmt::{self, Debug}; use core::hash::Hash; -use core::iter::{FromIterator, FusedIterator}; +use core::iter::FusedIterator; #[cfg(feature = "preserve_order")] use core::mem; use core::ops; @@ -20,7 +20,7 @@ use serde::de; #[cfg(not(feature = "preserve_order"))] use alloc::collections::{btree_map, BTreeMap}; #[cfg(feature = "preserve_order")] -use indexmap::{self, IndexMap}; +use indexmap::IndexMap; /// Represents a JSON key/value type. pub struct Map<K, V> { @@ -106,7 +106,6 @@ impl Map<String, Value> { /// The key may be any borrowed form of the map's key type, but the ordering /// on the borrowed form *must* match the ordering on the key type. #[inline] - #[cfg(any(feature = "preserve_order", not(no_btreemap_get_key_value)))] pub fn get_key_value<Q>(&self, key: &Q) -> Option<(&String, &Value)> where String: Borrow<Q>, @@ -131,6 +130,12 @@ impl Map<String, Value> { /// /// The key may be any borrowed form of the map's key type, but the ordering /// on the borrowed form *must* match the ordering on the key type. + /// + /// If serde_json's "preserve_order" is enabled, `.remove(key)` is + /// equivalent to [`.swap_remove(key)`][Self::swap_remove], replacing this + /// entry's position with the last element. If you need to preserve the + /// relative order of the keys in the map, use + /// [`.shift_remove(key)`][Self::shift_remove] instead. #[inline] pub fn remove<Q>(&mut self, key: &Q) -> Option<Value> where @@ -138,7 +143,7 @@ impl Map<String, Value> { Q: ?Sized + Ord + Eq + Hash, { #[cfg(feature = "preserve_order")] - return self.map.swap_remove(key); + return self.swap_remove(key); #[cfg(not(feature = "preserve_order"))] return self.map.remove(key); } @@ -148,49 +153,94 @@ impl Map<String, Value> { /// /// The key may be any borrowed form of the map's key type, but the ordering /// on the borrowed form *must* match the ordering on the key type. + /// + /// If serde_json's "preserve_order" is enabled, `.remove_entry(key)` is + /// equivalent to [`.swap_remove_entry(key)`][Self::swap_remove_entry], + /// replacing this entry's position with the last element. If you need to + /// preserve the relative order of the keys in the map, use + /// [`.shift_remove_entry(key)`][Self::shift_remove_entry] instead. + #[inline] pub fn remove_entry<Q>(&mut self, key: &Q) -> Option<(String, Value)> where String: Borrow<Q>, Q: ?Sized + Ord + Eq + Hash, { - #[cfg(any(feature = "preserve_order", not(no_btreemap_remove_entry)))] + #[cfg(feature = "preserve_order")] + return self.swap_remove_entry(key); + #[cfg(not(feature = "preserve_order"))] return self.map.remove_entry(key); - #[cfg(all( - not(feature = "preserve_order"), - no_btreemap_remove_entry, - not(no_btreemap_get_key_value), - ))] - { - let (key, _value) = self.map.get_key_value(key)?; - let key = key.clone(); - let value = self.map.remove::<String>(&key)?; - Some((key, value)) - } - #[cfg(all( - not(feature = "preserve_order"), - no_btreemap_remove_entry, - no_btreemap_get_key_value, - ))] - { - use core::ops::{Bound, RangeBounds}; - - struct Key<'a, Q: ?Sized>(&'a Q); - - impl<'a, Q: ?Sized> RangeBounds<Q> for Key<'a, Q> { - fn start_bound(&self) -> Bound<&Q> { - Bound::Included(self.0) - } - fn end_bound(&self) -> Bound<&Q> { - Bound::Included(self.0) - } - } + } - let mut range = self.map.range(Key(key)); - let (key, _value) = range.next()?; - let key = key.clone(); - let value = self.map.remove::<String>(&key)?; - Some((key, value)) - } + /// Removes and returns the value corresponding to the key from the map. + /// + /// Like [`Vec::swap_remove`], the entry is removed by swapping it with the + /// last element of the map and popping it off. This perturbs the position + /// of what used to be the last element! + /// + /// [`Vec::swap_remove`]: std::vec::Vec::swap_remove + #[cfg(feature = "preserve_order")] + #[cfg_attr(docsrs, doc(cfg(feature = "preserve_order")))] + #[inline] + pub fn swap_remove<Q>(&mut self, key: &Q) -> Option<Value> + where + String: Borrow<Q>, + Q: ?Sized + Ord + Eq + Hash, + { + self.map.swap_remove(key) + } + + /// Remove and return the key-value pair. + /// + /// Like [`Vec::swap_remove`], the entry is removed by swapping it with the + /// last element of the map and popping it off. This perturbs the position + /// of what used to be the last element! + /// + /// [`Vec::swap_remove`]: std::vec::Vec::swap_remove + #[cfg(feature = "preserve_order")] + #[cfg_attr(docsrs, doc(cfg(feature = "preserve_order")))] + #[inline] + pub fn swap_remove_entry<Q>(&mut self, key: &Q) -> Option<(String, Value)> + where + String: Borrow<Q>, + Q: ?Sized + Ord + Eq + Hash, + { + self.map.swap_remove_entry(key) + } + + /// Removes and returns the value corresponding to the key from the map. + /// + /// Like [`Vec::remove`], the entry is removed by shifting all of the + /// elements that follow it, preserving their relative order. This perturbs + /// the index of all of those elements! + /// + /// [`Vec::remove`]: std::vec::Vec::remove + #[cfg(feature = "preserve_order")] + #[cfg_attr(docsrs, doc(cfg(feature = "preserve_order")))] + #[inline] + pub fn shift_remove<Q>(&mut self, key: &Q) -> Option<Value> + where + String: Borrow<Q>, + Q: ?Sized + Ord + Eq + Hash, + { + self.map.shift_remove(key) + } + + /// Remove and return the key-value pair. + /// + /// Like [`Vec::remove`], the entry is removed by shifting all of the + /// elements that follow it, preserving their relative order. This perturbs + /// the index of all of those elements! + /// + /// [`Vec::remove`]: std::vec::Vec::remove + #[cfg(feature = "preserve_order")] + #[cfg_attr(docsrs, doc(cfg(feature = "preserve_order")))] + #[inline] + pub fn shift_remove_entry<Q>(&mut self, key: &Q) -> Option<(String, Value)> + where + String: Borrow<Q>, + Q: ?Sized + Ord + Eq + Hash, + { + self.map.shift_remove_entry(key) } /// Moves all elements from other into self, leaving other empty. @@ -276,7 +326,6 @@ impl Map<String, Value> { /// /// In other words, remove all pairs `(k, v)` such that `f(&k, &mut v)` /// returns `false`. - #[cfg(not(no_btreemap_retain))] #[inline] pub fn retain<F>(&mut self, f: F) where diff --git a/vendor/serde_json/src/number.rs b/vendor/serde_json/src/number.rs index 5ecbde8..b0231a8 100644 --- a/vendor/serde_json/src/number.rs +++ b/vendor/serde_json/src/number.rs @@ -279,6 +279,33 @@ impl Number { } } + /// Returns the exact original JSON representation that this Number was + /// parsed from. + /// + /// For numbers constructed not via parsing, such as by `From<i32>`, returns + /// the JSON representation that serde\_json would serialize for this + /// number. + /// + /// ``` + /// # use serde_json::Number; + /// for value in [ + /// "7", + /// "12.34", + /// "34e-56789", + /// "0.0123456789000000012345678900000001234567890000123456789", + /// "343412345678910111213141516171819202122232425262728293034", + /// "-343412345678910111213141516171819202122232425262728293031", + /// ] { + /// let number: Number = serde_json::from_str(value).unwrap(); + /// assert_eq!(number.as_str(), value); + /// } + /// ``` + #[cfg(feature = "arbitrary_precision")] + #[cfg_attr(docsrs, doc(cfg(feature = "arbitrary_precision")))] + pub fn as_str(&self) -> &str { + &self.n + } + pub(crate) fn as_f32(&self) -> Option<f32> { #[cfg(not(feature = "arbitrary_precision"))] match self.n { @@ -361,8 +388,8 @@ impl Serialize for Number { { use serde::ser::SerializeStruct; - let mut s = serializer.serialize_struct(TOKEN, 1)?; - s.serialize_field(TOKEN, &self.n)?; + let mut s = tri!(serializer.serialize_struct(TOKEN, 1)); + tri!(s.serialize_field(TOKEN, &self.n)); s.end() } } @@ -406,11 +433,11 @@ impl<'de> Deserialize<'de> for Number { where V: de::MapAccess<'de>, { - let value = visitor.next_key::<NumberKey>()?; + let value = tri!(visitor.next_key::<NumberKey>()); if value.is_none() { return Err(de::Error::invalid_type(Unexpected::Map, &self)); } - let v: NumberFromString = visitor.next_value()?; + let v: NumberFromString = tri!(visitor.next_value()); Ok(v.value) } } @@ -449,7 +476,7 @@ impl<'de> de::Deserialize<'de> for NumberKey { } } - deserializer.deserialize_identifier(FieldVisitor)?; + tri!(deserializer.deserialize_identifier(FieldVisitor)); Ok(NumberKey) } } @@ -552,7 +579,7 @@ macro_rules! deserialize_number { where V: de::Visitor<'de>, { - visitor.$visit(self.n.parse().map_err(|_| invalid_number())?) + visitor.$visit(tri!(self.n.parse().map_err(|_| invalid_number()))) } }; } diff --git a/vendor/serde_json/src/raw.rs b/vendor/serde_json/src/raw.rs index 6aa4ffc..a2bf0ec 100644 --- a/vendor/serde_json/src/raw.rs +++ b/vendor/serde_json/src/raw.rs @@ -177,11 +177,9 @@ impl RawValue { /// - the input has no leading or trailing whitespace, and /// - the input has capacity equal to its length. pub fn from_string(json: String) -> Result<Box<Self>, Error> { - { - let borrowed = crate::from_str::<&Self>(&json)?; - if borrowed.json.len() < json.len() { - return Ok(borrowed.to_owned()); - } + let borrowed = tri!(crate::from_str::<&Self>(&json)); + if borrowed.json.len() < json.len() { + return Ok(borrowed.to_owned()); } Ok(Self::from_owned(json.into_boxed_str())) } @@ -287,7 +285,7 @@ pub fn to_raw_value<T>(value: &T) -> Result<Box<RawValue>, Error> where T: ?Sized + Serialize, { - let json_string = crate::to_string(value)?; + let json_string = tri!(crate::to_string(value)); Ok(RawValue::from_owned(json_string.into_boxed_str())) } @@ -298,8 +296,8 @@ impl Serialize for RawValue { where S: Serializer, { - let mut s = serializer.serialize_struct(TOKEN, 1)?; - s.serialize_field(TOKEN, &self.json)?; + let mut s = tri!(serializer.serialize_struct(TOKEN, 1)); + tri!(s.serialize_field(TOKEN, &self.json)); s.end() } } @@ -322,7 +320,7 @@ impl<'de: 'a, 'a> Deserialize<'de> for &'a RawValue { where V: MapAccess<'de>, { - let value = visitor.next_key::<RawKey>()?; + let value = tri!(visitor.next_key::<RawKey>()); if value.is_none() { return Err(de::Error::invalid_type(Unexpected::Map, &self)); } @@ -352,7 +350,7 @@ impl<'de> Deserialize<'de> for Box<RawValue> { where V: MapAccess<'de>, { - let value = visitor.next_key::<RawKey>()?; + let value = tri!(visitor.next_key::<RawKey>()); if value.is_none() { return Err(de::Error::invalid_type(Unexpected::Map, &self)); } @@ -392,7 +390,7 @@ impl<'de> Deserialize<'de> for RawKey { } } - deserializer.deserialize_identifier(FieldVisitor)?; + tri!(deserializer.deserialize_identifier(FieldVisitor)); Ok(RawKey) } } @@ -529,3 +527,251 @@ impl<'de> MapAccess<'de> for BorrowedRawDeserializer<'de> { seed.deserialize(BorrowedStrDeserializer::new(self.raw_value.take().unwrap())) } } + +impl<'de> IntoDeserializer<'de, Error> for &'de RawValue { + type Deserializer = &'de RawValue; + + fn into_deserializer(self) -> Self::Deserializer { + self + } +} + +impl<'de> Deserializer<'de> for &'de RawValue { + type Error = Error; + + fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_any(visitor) + } + + fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_bool(visitor) + } + + fn deserialize_i8<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_i8(visitor) + } + + fn deserialize_i16<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_i16(visitor) + } + + fn deserialize_i32<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_i32(visitor) + } + + fn deserialize_i64<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_i64(visitor) + } + + fn deserialize_i128<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_i128(visitor) + } + + fn deserialize_u8<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_u8(visitor) + } + + fn deserialize_u16<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_u16(visitor) + } + + fn deserialize_u32<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_u32(visitor) + } + + fn deserialize_u64<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_u64(visitor) + } + + fn deserialize_u128<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_u128(visitor) + } + + fn deserialize_f32<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_f32(visitor) + } + + fn deserialize_f64<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_f64(visitor) + } + + fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_char(visitor) + } + + fn deserialize_str<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_str(visitor) + } + + fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_string(visitor) + } + + fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_bytes(visitor) + } + + fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_byte_buf(visitor) + } + + fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_option(visitor) + } + + fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_unit(visitor) + } + + fn deserialize_unit_struct<V>(self, name: &'static str, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_unit_struct(name, visitor) + } + + fn deserialize_newtype_struct<V>( + self, + name: &'static str, + visitor: V, + ) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_newtype_struct(name, visitor) + } + + fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_seq(visitor) + } + + fn deserialize_tuple<V>(self, len: usize, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_tuple(len, visitor) + } + + fn deserialize_tuple_struct<V>( + self, + name: &'static str, + len: usize, + visitor: V, + ) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_tuple_struct(name, len, visitor) + } + + fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_map(visitor) + } + + fn deserialize_struct<V>( + self, + name: &'static str, + fields: &'static [&'static str], + visitor: V, + ) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_struct(name, fields, visitor) + } + + fn deserialize_enum<V>( + self, + name: &'static str, + variants: &'static [&'static str], + visitor: V, + ) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_enum(name, variants, visitor) + } + + fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_identifier(visitor) + } + + fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + crate::Deserializer::from_str(&self.json).deserialize_ignored_any(visitor) + } +} diff --git a/vendor/serde_json/src/read.rs b/vendor/serde_json/src/read.rs index fc3a3ca..06ac907 100644 --- a/vendor/serde_json/src/read.rs +++ b/vendor/serde_json/src/read.rs @@ -14,6 +14,8 @@ use crate::iter::LineColIterator; use crate::raw::BorrowedRawDeserializer; #[cfg(all(feature = "raw_value", feature = "std"))] use crate::raw::OwnedRawDeserializer; +#[cfg(all(feature = "raw_value", feature = "std"))] +use alloc::string::String; #[cfg(feature = "raw_value")] use serde::de::Visitor; @@ -81,7 +83,7 @@ pub trait Read<'de>: private::Sealed { #[doc(hidden)] fn ignore_str(&mut self) -> Result<()>; - /// Assumes the previous byte was a hex escape sequnce ('\u') in a string. + /// Assumes the previous byte was a hex escape sequence ('\u') in a string. /// Parses next hexadecimal sequence. #[doc(hidden)] fn decode_hex_escape(&mut self) -> Result<u16>; diff --git a/vendor/serde_json/src/ser.rs b/vendor/serde_json/src/ser.rs index b38f348..3742e0b 100644 --- a/vendor/serde_json/src/ser.rs +++ b/vendor/serde_json/src/ser.rs @@ -189,12 +189,9 @@ where #[inline] fn serialize_bytes(self, value: &[u8]) -> Result<()> { - use serde::ser::SerializeSeq; - let mut seq = tri!(self.serialize_seq(Some(value.len()))); - for byte in value { - tri!(seq.serialize_element(byte)); - } - seq.end() + self.formatter + .write_byte_array(&mut self.writer, value) + .map_err(Error::io) } #[inline] @@ -439,17 +436,15 @@ where .formatter .begin_string(&mut self.writer) .map_err(Error::io)); - { - let mut adapter = Adapter { - writer: &mut self.writer, - formatter: &mut self.formatter, - error: None, - }; - match write!(adapter, "{}", value) { - Ok(()) => debug_assert!(adapter.error.is_none()), - Err(fmt::Error) => { - return Err(Error::io(adapter.error.expect("there should be an error"))); - } + let mut adapter = Adapter { + writer: &mut self.writer, + formatter: &mut self.formatter, + error: None, + }; + match write!(adapter, "{}", value) { + Ok(()) => debug_assert!(adapter.error.is_none()), + Err(fmt::Error) => { + return Err(Error::io(adapter.error.expect("there should be an error"))); } } self.formatter @@ -789,6 +784,10 @@ fn key_must_be_a_string() -> Error { Error::syntax(ErrorCode::KeyMustBeAString, 0, 0) } +fn float_key_must_be_finite() -> Error { + Error::syntax(ErrorCode::FloatKeyMustBeFinite, 0, 0) +} + impl<'a, W, F> ser::Serializer for MapKeySerializer<'a, W, F> where W: io::Write, @@ -828,8 +827,21 @@ where type SerializeStruct = Impossible<(), Error>; type SerializeStructVariant = Impossible<(), Error>; - fn serialize_bool(self, _value: bool) -> Result<()> { - Err(key_must_be_a_string()) + fn serialize_bool(self, value: bool) -> Result<()> { + tri!(self + .ser + .formatter + .begin_string(&mut self.ser.writer) + .map_err(Error::io)); + tri!(self + .ser + .formatter + .write_bool(&mut self.ser.writer, value) + .map_err(Error::io)); + self.ser + .formatter + .end_string(&mut self.ser.writer) + .map_err(Error::io) } fn serialize_i8(self, value: i8) -> Result<()> { @@ -1002,12 +1014,46 @@ where .map_err(Error::io) } - fn serialize_f32(self, _value: f32) -> Result<()> { - Err(key_must_be_a_string()) + fn serialize_f32(self, value: f32) -> Result<()> { + if !value.is_finite() { + return Err(float_key_must_be_finite()); + } + + tri!(self + .ser + .formatter + .begin_string(&mut self.ser.writer) + .map_err(Error::io)); + tri!(self + .ser + .formatter + .write_f32(&mut self.ser.writer, value) + .map_err(Error::io)); + self.ser + .formatter + .end_string(&mut self.ser.writer) + .map_err(Error::io) } - fn serialize_f64(self, _value: f64) -> Result<()> { - Err(key_must_be_a_string()) + fn serialize_f64(self, value: f64) -> Result<()> { + if !value.is_finite() { + return Err(float_key_must_be_finite()); + } + + tri!(self + .ser + .formatter + .begin_string(&mut self.ser.writer) + .map_err(Error::io)); + tri!(self + .ser + .formatter + .write_f64(&mut self.ser.writer, value) + .map_err(Error::io)); + self.ser + .formatter + .end_string(&mut self.ser.writer) + .map_err(Error::io) } fn serialize_char(self, value: char) -> Result<()> { @@ -1043,11 +1089,11 @@ where Err(key_must_be_a_string()) } - fn serialize_some<T>(self, _value: &T) -> Result<()> + fn serialize_some<T>(self, value: &T) -> Result<()> where T: ?Sized + Serialize, { - Err(key_must_be_a_string()) + value.serialize(self) } fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq> { @@ -1734,6 +1780,24 @@ pub trait Formatter { writer.write_all(s) } + /// Writes the representation of a byte array. Formatters can choose whether + /// to represent bytes as a JSON array of integers (the default), or some + /// JSON string encoding like hex or base64. + fn write_byte_array<W>(&mut self, writer: &mut W, value: &[u8]) -> io::Result<()> + where + W: ?Sized + io::Write, + { + tri!(self.begin_array(writer)); + let mut first = true; + for byte in value { + tri!(self.begin_array_value(writer, first)); + tri!(self.write_u8(writer, *byte)); + tri!(self.end_array_value(writer)); + first = false; + } + self.end_array(writer) + } + /// Called before every array. Writes a `[` to the specified /// writer. #[inline] @@ -2062,7 +2126,7 @@ static ESCAPE: [u8; 256] = [ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F ]; -/// Serialize the given data structure as JSON into the IO stream. +/// Serialize the given data structure as JSON into the I/O stream. /// /// Serialization guarantees it only feeds valid UTF-8 sequences to the writer. /// @@ -2081,7 +2145,7 @@ where value.serialize(&mut ser) } -/// Serialize the given data structure as pretty-printed JSON into the IO +/// Serialize the given data structure as pretty-printed JSON into the I/O /// stream. /// /// Serialization guarantees it only feeds valid UTF-8 sequences to the writer. diff --git a/vendor/serde_json/src/value/de.rs b/vendor/serde_json/src/value/de.rs index 9c266d0..1e8b5ac 100644 --- a/vendor/serde_json/src/value/de.rs +++ b/vendor/serde_json/src/value/de.rs @@ -1,4 +1,4 @@ -use crate::error::Error; +use crate::error::{Error, ErrorCode}; use crate::map::Map; use crate::number::Number; use crate::value::Value; @@ -106,15 +106,15 @@ impl<'de> Deserialize<'de> for Value { where V: MapAccess<'de>, { - match visitor.next_key_seed(KeyClassifier)? { + match tri!(visitor.next_key_seed(KeyClassifier)) { #[cfg(feature = "arbitrary_precision")] Some(KeyClass::Number) => { - let number: NumberFromString = visitor.next_value()?; + let number: NumberFromString = tri!(visitor.next_value()); Ok(Value::Number(number.value)) } #[cfg(feature = "raw_value")] Some(KeyClass::RawValue) => { - let value = visitor.next_value_seed(crate::raw::BoxedFromString)?; + let value = tri!(visitor.next_value_seed(crate::raw::BoxedFromString)); crate::from_str(value.get()).map_err(de::Error::custom) } Some(KeyClass::Map(first_key)) => { @@ -482,6 +482,14 @@ impl<'de> IntoDeserializer<'de, Error> for Value { } } +impl<'de> IntoDeserializer<'de, Error> for &'de Value { + type Deserializer = Self; + + fn into_deserializer(self) -> Self::Deserializer { + self + } +} + struct VariantDeserializer { value: Option<Value>, } @@ -1120,18 +1128,30 @@ struct MapKeyDeserializer<'de> { key: Cow<'de, str>, } -macro_rules! deserialize_integer_key { - ($method:ident => $visit:ident) => { +macro_rules! deserialize_numeric_key { + ($method:ident) => { + deserialize_numeric_key!($method, deserialize_number); + }; + + ($method:ident, $using:ident) => { fn $method<V>(self, visitor: V) -> Result<V::Value, Error> where V: Visitor<'de>, { - match (self.key.parse(), self.key) { - (Ok(integer), _) => visitor.$visit(integer), - (Err(_), Cow::Borrowed(s)) => visitor.visit_borrowed_str(s), - #[cfg(any(feature = "std", feature = "alloc"))] - (Err(_), Cow::Owned(s)) => visitor.visit_string(s), + let mut de = crate::Deserializer::from_str(&self.key); + + match tri!(de.peek()) { + Some(b'0'..=b'9' | b'-') => {} + _ => return Err(Error::syntax(ErrorCode::ExpectedNumericKey, 0, 0)), + } + + let number = tri!(de.$using(visitor)); + + if tri!(de.peek()).is_some() { + return Err(Error::syntax(ErrorCode::ExpectedNumericKey, 0, 0)); } + + Ok(number) } }; } @@ -1146,16 +1166,38 @@ impl<'de> serde::Deserializer<'de> for MapKeyDeserializer<'de> { BorrowedCowStrDeserializer::new(self.key).deserialize_any(visitor) } - deserialize_integer_key!(deserialize_i8 => visit_i8); - deserialize_integer_key!(deserialize_i16 => visit_i16); - deserialize_integer_key!(deserialize_i32 => visit_i32); - deserialize_integer_key!(deserialize_i64 => visit_i64); - deserialize_integer_key!(deserialize_i128 => visit_i128); - deserialize_integer_key!(deserialize_u8 => visit_u8); - deserialize_integer_key!(deserialize_u16 => visit_u16); - deserialize_integer_key!(deserialize_u32 => visit_u32); - deserialize_integer_key!(deserialize_u64 => visit_u64); - deserialize_integer_key!(deserialize_u128 => visit_u128); + deserialize_numeric_key!(deserialize_i8); + deserialize_numeric_key!(deserialize_i16); + deserialize_numeric_key!(deserialize_i32); + deserialize_numeric_key!(deserialize_i64); + deserialize_numeric_key!(deserialize_u8); + deserialize_numeric_key!(deserialize_u16); + deserialize_numeric_key!(deserialize_u32); + deserialize_numeric_key!(deserialize_u64); + #[cfg(not(feature = "float_roundtrip"))] + deserialize_numeric_key!(deserialize_f32); + deserialize_numeric_key!(deserialize_f64); + + #[cfg(feature = "float_roundtrip")] + deserialize_numeric_key!(deserialize_f32, do_deserialize_f32); + deserialize_numeric_key!(deserialize_i128, do_deserialize_i128); + deserialize_numeric_key!(deserialize_u128, do_deserialize_u128); + + fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value, Error> + where + V: Visitor<'de>, + { + if self.key == "true" { + visitor.visit_bool(true) + } else if self.key == "false" { + visitor.visit_bool(false) + } else { + Err(serde::de::Error::invalid_type( + Unexpected::Str(&self.key), + &visitor, + )) + } + } #[inline] fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Error> @@ -1193,8 +1235,8 @@ impl<'de> serde::Deserializer<'de> for MapKeyDeserializer<'de> { } forward_to_deserialize_any! { - bool f32 f64 char str string bytes byte_buf unit unit_struct seq tuple - tuple_struct map struct identifier ignored_any + char str string bytes byte_buf unit unit_struct seq tuple tuple_struct + map struct identifier ignored_any } } @@ -1327,7 +1369,7 @@ impl<'de> de::EnumAccess<'de> for BorrowedCowStrDeserializer<'de> { where T: de::DeserializeSeed<'de>, { - let value = seed.deserialize(self)?; + let value = tri!(seed.deserialize(self)); Ok((value, UnitOnly)) } } diff --git a/vendor/serde_json/src/value/from.rs b/vendor/serde_json/src/value/from.rs index 462ad3f..ed1e333 100644 --- a/vendor/serde_json/src/value/from.rs +++ b/vendor/serde_json/src/value/from.rs @@ -4,7 +4,6 @@ use crate::number::Number; use alloc::borrow::Cow; use alloc::string::{String, ToString}; use alloc::vec::Vec; -use core::iter::FromIterator; macro_rules! from_integer { ($($ty:ident)*) => { @@ -29,7 +28,8 @@ from_integer! { } impl From<f32> for Value { - /// Convert 32-bit floating point number to `Value` + /// Convert 32-bit floating point number to `Value::Number`, or + /// `Value::Null` if infinite or NaN. /// /// # Examples /// @@ -45,7 +45,8 @@ impl From<f32> for Value { } impl From<f64> for Value { - /// Convert 64-bit floating point number to `Value` + /// Convert 64-bit floating point number to `Value::Number`, or + /// `Value::Null` if infinite or NaN. /// /// # Examples /// @@ -61,7 +62,7 @@ impl From<f64> for Value { } impl From<bool> for Value { - /// Convert boolean to `Value` + /// Convert boolean to `Value::Bool`. /// /// # Examples /// @@ -77,7 +78,7 @@ impl From<bool> for Value { } impl From<String> for Value { - /// Convert `String` to `Value` + /// Convert `String` to `Value::String`. /// /// # Examples /// @@ -92,8 +93,8 @@ impl From<String> for Value { } } -impl<'a> From<&'a str> for Value { - /// Convert string slice to `Value` +impl From<&str> for Value { + /// Convert string slice to `Value::String`. /// /// # Examples /// @@ -109,7 +110,7 @@ impl<'a> From<&'a str> for Value { } impl<'a> From<Cow<'a, str>> for Value { - /// Convert copy-on-write string to `Value` + /// Convert copy-on-write string to `Value::String`. /// /// # Examples /// @@ -134,7 +135,7 @@ impl<'a> From<Cow<'a, str>> for Value { } impl From<Number> for Value { - /// Convert `Number` to `Value` + /// Convert `Number` to `Value::Number`. /// /// # Examples /// @@ -150,7 +151,7 @@ impl From<Number> for Value { } impl From<Map<String, Value>> for Value { - /// Convert map (with string keys) to `Value` + /// Convert map (with string keys) to `Value::Object`. /// /// # Examples /// @@ -167,7 +168,7 @@ impl From<Map<String, Value>> for Value { } impl<T: Into<Value>> From<Vec<T>> for Value { - /// Convert a `Vec` to `Value` + /// Convert a `Vec` to `Value::Array`. /// /// # Examples /// @@ -182,8 +183,8 @@ impl<T: Into<Value>> From<Vec<T>> for Value { } } -impl<'a, T: Clone + Into<Value>> From<&'a [T]> for Value { - /// Convert a slice to `Value` +impl<T: Clone + Into<Value>> From<&[T]> for Value { + /// Convert a slice to `Value::Array`. /// /// # Examples /// @@ -193,13 +194,13 @@ impl<'a, T: Clone + Into<Value>> From<&'a [T]> for Value { /// let v: &[&str] = &["lorem", "ipsum", "dolor"]; /// let x: Value = v.into(); /// ``` - fn from(f: &'a [T]) -> Self { + fn from(f: &[T]) -> Self { Value::Array(f.iter().cloned().map(Into::into).collect()) } } impl<T: Into<Value>> FromIterator<T> for Value { - /// Convert an iteratable type to a `Value` + /// Create a `Value::Array` by collecting an iterator of array elements. /// /// # Examples /// @@ -229,7 +230,7 @@ impl<T: Into<Value>> FromIterator<T> for Value { } impl<K: Into<String>, V: Into<Value>> FromIterator<(K, V)> for Value { - /// Convert an iteratable type to a `Value` + /// Create a `Value::Object` by collecting an iterator of key-value pairs. /// /// # Examples /// @@ -249,7 +250,7 @@ impl<K: Into<String>, V: Into<Value>> FromIterator<(K, V)> for Value { } impl From<()> for Value { - /// Convert `()` to `Value` + /// Convert `()` to `Value::Null`. /// /// # Examples /// diff --git a/vendor/serde_json/src/value/index.rs b/vendor/serde_json/src/value/index.rs index c74042b..891ca8e 100644 --- a/vendor/serde_json/src/value/index.rs +++ b/vendor/serde_json/src/value/index.rs @@ -116,7 +116,7 @@ impl Index for String { } } -impl<'a, T> Index for &'a T +impl<T> Index for &T where T: ?Sized + Index, { diff --git a/vendor/serde_json/src/value/mod.rs b/vendor/serde_json/src/value/mod.rs index 470b6b2..b3f51ea 100644 --- a/vendor/serde_json/src/value/mod.rs +++ b/vendor/serde_json/src/value/mod.rs @@ -106,6 +106,7 @@ pub use crate::map::Map; pub use crate::number::Number; #[cfg(feature = "raw_value")] +#[cfg_attr(docsrs, doc(cfg(feature = "raw_value")))] pub use crate::raw::{to_raw_value, RawValue}; /// Represents any valid JSON value. @@ -182,11 +183,11 @@ impl Debug for Value { Value::Number(number) => Debug::fmt(number, formatter), Value::String(string) => write!(formatter, "String({:?})", string), Value::Array(vec) => { - formatter.write_str("Array ")?; + tri!(formatter.write_str("Array ")); Debug::fmt(vec, formatter) } Value::Object(map) => { - formatter.write_str("Object ")?; + tri!(formatter.write_str("Object ")); Debug::fmt(map, formatter) } } @@ -514,6 +515,28 @@ impl Value { } } + /// If the `Value` is a Number, returns the associated [`Number`]. Returns + /// None otherwise. + /// + /// ``` + /// # use serde_json::{json, Number}; + /// # + /// let v = json!({ "a": 1, "b": 2.2, "c": -3, "d": "4" }); + /// + /// assert_eq!(v["a"].as_number(), Some(&Number::from(1u64))); + /// assert_eq!(v["b"].as_number(), Some(&Number::from_f64(2.2).unwrap())); + /// assert_eq!(v["c"].as_number(), Some(&Number::from(-3i64))); + /// + /// // The string `"4"` is not a number. + /// assert_eq!(v["d"].as_number(), None); + /// ``` + pub fn as_number(&self) -> Option<&Number> { + match self { + Value::Number(number) => Some(number), + _ => None, + } + } + /// Returns true if the `Value` is an integer between `i64::MIN` and /// `i64::MAX`. /// @@ -889,7 +912,6 @@ mod ser; /// ``` /// use serde::Serialize; /// use serde_json::json; -/// /// use std::error::Error; /// /// #[derive(Serialize)] @@ -898,7 +920,7 @@ mod ser; /// location: String, /// } /// -/// fn compare_json_values() -> Result<(), Box<Error>> { +/// fn compare_json_values() -> Result<(), Box<dyn Error>> { /// let u = User { /// fingerprint: "0xF9BA143B95FF6D82".to_owned(), /// location: "Menlo Park, CA".to_owned(), diff --git a/vendor/serde_json/src/value/partial_eq.rs b/vendor/serde_json/src/value/partial_eq.rs index 6b2e350..46c1dbc 100644 --- a/vendor/serde_json/src/value/partial_eq.rs +++ b/vendor/serde_json/src/value/partial_eq.rs @@ -34,7 +34,7 @@ impl PartialEq<str> for Value { } } -impl<'a> PartialEq<&'a str> for Value { +impl PartialEq<&str> for Value { fn eq(&self, other: &&str) -> bool { eq_str(self, *other) } @@ -46,7 +46,7 @@ impl PartialEq<Value> for str { } } -impl<'a> PartialEq<Value> for &'a str { +impl PartialEq<Value> for &str { fn eq(&self, other: &Value) -> bool { eq_str(other, *self) } diff --git a/vendor/serde_json/src/value/ser.rs b/vendor/serde_json/src/value/ser.rs index 875d22e..835fa90 100644 --- a/vendor/serde_json/src/value/ser.rs +++ b/vendor/serde_json/src/value/ser.rs @@ -4,8 +4,6 @@ use crate::value::{to_value, Value}; use alloc::borrow::ToOwned; use alloc::string::{String, ToString}; use alloc::vec::Vec; -#[cfg(not(feature = "arbitrary_precision"))] -use core::convert::TryFrom; use core::fmt::Display; use core::result; use serde::ser::{Impossible, Serialize}; @@ -451,6 +449,10 @@ fn key_must_be_a_string() -> Error { Error::syntax(ErrorCode::KeyMustBeAString, 0, 0) } +fn float_key_must_be_finite() -> Error { + Error::syntax(ErrorCode::FloatKeyMustBeFinite, 0, 0) +} + impl serde::Serializer for MapKeySerializer { type Ok = String; type Error = Error; @@ -481,8 +483,8 @@ impl serde::Serializer for MapKeySerializer { value.serialize(self) } - fn serialize_bool(self, _value: bool) -> Result<String> { - Err(key_must_be_a_string()) + fn serialize_bool(self, value: bool) -> Result<String> { + Ok(value.to_string()) } fn serialize_i8(self, value: i8) -> Result<String> { @@ -517,12 +519,20 @@ impl serde::Serializer for MapKeySerializer { Ok(value.to_string()) } - fn serialize_f32(self, _value: f32) -> Result<String> { - Err(key_must_be_a_string()) + fn serialize_f32(self, value: f32) -> Result<String> { + if value.is_finite() { + Ok(ryu::Buffer::new().format_finite(value).to_owned()) + } else { + Err(float_key_must_be_finite()) + } } - fn serialize_f64(self, _value: f64) -> Result<String> { - Err(key_must_be_a_string()) + fn serialize_f64(self, value: f64) -> Result<String> { + if value.is_finite() { + Ok(ryu::Buffer::new().format_finite(value).to_owned()) + } else { + Err(float_key_must_be_finite()) + } } #[inline] @@ -640,7 +650,7 @@ impl serde::ser::SerializeStruct for SerializeMap { #[cfg(feature = "arbitrary_precision")] SerializeMap::Number { out_value } => { if key == crate::number::TOKEN { - *out_value = Some(value.serialize(NumberValueEmitter)?); + *out_value = Some(tri!(value.serialize(NumberValueEmitter))); Ok(()) } else { Err(invalid_number()) @@ -649,7 +659,7 @@ impl serde::ser::SerializeStruct for SerializeMap { #[cfg(feature = "raw_value")] SerializeMap::RawValue { out_value } => { if key == crate::raw::TOKEN { - *out_value = Some(value.serialize(RawValueEmitter)?); + *out_value = Some(tri!(value.serialize(RawValueEmitter))); Ok(()) } else { Err(invalid_raw_value()) diff --git a/vendor/serde_json/tests/lexical.rs b/vendor/serde_json/tests/lexical.rs index d3dfb85..368c844 100644 --- a/vendor/serde_json/tests/lexical.rs +++ b/vendor/serde_json/tests/lexical.rs @@ -26,11 +26,6 @@ extern crate alloc; #[path = "../src/lexical/mod.rs"] mod lexical; -mod lib { - pub use std::vec::Vec; - pub use std::{cmp, iter, mem, ops}; -} - #[path = "lexical/algorithm.rs"] mod algorithm; diff --git a/vendor/serde_json/tests/lexical/parse.rs b/vendor/serde_json/tests/lexical/parse.rs index 80ca25e..03ec1a9 100644 --- a/vendor/serde_json/tests/lexical/parse.rs +++ b/vendor/serde_json/tests/lexical/parse.rs @@ -1,7 +1,7 @@ // Adapted from https://github.com/Alexhuszagh/rust-lexical. use crate::lexical::num::Float; -use crate::lexical::parse::{parse_concise_float, parse_truncated_float}; +use crate::lexical::{parse_concise_float, parse_truncated_float}; use core::f64; use core::fmt::Debug; diff --git a/vendor/serde_json/tests/map.rs b/vendor/serde_json/tests/map.rs index ae01969..538cd16 100644 --- a/vendor/serde_json/tests/map.rs +++ b/vendor/serde_json/tests/map.rs @@ -35,7 +35,6 @@ fn test_append() { assert!(val.is_empty()); } -#[cfg(not(no_btreemap_retain))] #[test] fn test_retain() { let mut v: Value = from_str(r#"{"b":null,"a":null,"c":null}"#).unwrap(); diff --git a/vendor/serde_json/tests/regression/issue845.rs b/vendor/serde_json/tests/regression/issue845.rs index 56037ae..e8b0c0f 100644 --- a/vendor/serde_json/tests/regression/issue845.rs +++ b/vendor/serde_json/tests/regression/issue845.rs @@ -1,7 +1,6 @@ #![allow(clippy::trait_duplication_in_bounds)] // https://github.com/rust-lang/rust-clippy/issues/8757 use serde::{Deserialize, Deserializer}; -use std::convert::TryFrom; use std::fmt::{self, Display}; use std::marker::PhantomData; use std::str::FromStr; diff --git a/vendor/serde_json/tests/test.rs b/vendor/serde_json/tests/test.rs index 6c08cc8..543dbd0 100644 --- a/vendor/serde_json/tests/test.rs +++ b/vendor/serde_json/tests/test.rs @@ -5,6 +5,7 @@ clippy::derive_partial_eq_without_eq, clippy::excessive_precision, clippy::float_cmp, + clippy::incompatible_msrv, // https://github.com/rust-lang/rust-clippy/issues/12257 clippy::items_after_statements, clippy::let_underscore_untyped, clippy::shadow_unrelated, @@ -14,9 +15,6 @@ clippy::vec_init_then_push, clippy::zero_sized_map_values )] -#![cfg_attr(feature = "trace-macros", feature(trace_macros))] -#[cfg(feature = "trace-macros")] -trace_macros!(true); #[macro_use] mod macros; @@ -33,18 +31,18 @@ use serde_json::{ from_reader, from_slice, from_str, from_value, json, to_string, to_string_pretty, to_value, to_vec, Deserializer, Number, Value, }; -use std::collections::hash_map::DefaultHasher; use std::collections::BTreeMap; #[cfg(feature = "raw_value")] use std::collections::HashMap; use std::fmt::{self, Debug}; +use std::hash::BuildHasher; +#[cfg(feature = "raw_value")] use std::hash::{Hash, Hasher}; use std::io; use std::iter; use std::marker::PhantomData; use std::mem; use std::str::FromStr; -use std::string::ToString; use std::{f32, f64}; use std::{i16, i32, i64, i8}; use std::{u16, u32, u64, u8}; @@ -53,7 +51,7 @@ macro_rules! treemap { () => { BTreeMap::new() }; - ($($k:expr => $v:expr),+) => { + ($($k:expr => $v:expr),+ $(,)?) => { { let mut m = BTreeMap::new(); $( @@ -160,17 +158,29 @@ fn test_write_f64() { #[test] fn test_encode_nonfinite_float_yields_null() { - let v = to_value(::std::f64::NAN).unwrap(); + let v = to_value(::std::f64::NAN.copysign(1.0)).unwrap(); + assert!(v.is_null()); + + let v = to_value(::std::f64::NAN.copysign(-1.0)).unwrap(); assert!(v.is_null()); let v = to_value(::std::f64::INFINITY).unwrap(); assert!(v.is_null()); - let v = to_value(::std::f32::NAN).unwrap(); + let v = to_value(-::std::f64::INFINITY).unwrap(); + assert!(v.is_null()); + + let v = to_value(::std::f32::NAN.copysign(1.0)).unwrap(); + assert!(v.is_null()); + + let v = to_value(::std::f32::NAN.copysign(-1.0)).unwrap(); assert!(v.is_null()); let v = to_value(::std::f32::INFINITY).unwrap(); assert!(v.is_null()); + + let v = to_value(-::std::f32::INFINITY).unwrap(); + assert!(v.is_null()); } #[test] @@ -264,7 +274,7 @@ fn test_write_object() { ( treemap!( "a".to_string() => true, - "b".to_string() => false + "b".to_string() => false, ), "{\"a\":true,\"b\":false}", ), @@ -275,7 +285,7 @@ fn test_write_object() { treemap![ "a".to_string() => treemap![], "b".to_string() => treemap![], - "c".to_string() => treemap![] + "c".to_string() => treemap![], ], "{\"a\":{},\"b\":{},\"c\":{}}", ), @@ -284,10 +294,10 @@ fn test_write_object() { "a".to_string() => treemap![ "a".to_string() => treemap!["a" => vec![1,2,3]], "b".to_string() => treemap![], - "c".to_string() => treemap![] + "c".to_string() => treemap![], ], "b".to_string() => treemap![], - "c".to_string() => treemap![] + "c".to_string() => treemap![], ], "{\"a\":{\"a\":{\"a\":[1,2,3]},\"b\":{},\"c\":{}},\"b\":{},\"c\":{}}", ), @@ -297,9 +307,9 @@ fn test_write_object() { "b".to_string() => treemap![ "a".to_string() => treemap!["a" => vec![1,2,3]], "b".to_string() => treemap![], - "c".to_string() => treemap![] + "c".to_string() => treemap![], ], - "c".to_string() => treemap![] + "c".to_string() => treemap![], ], "{\"a\":{},\"b\":{\"a\":{\"a\":[1,2,3]},\"b\":{},\"c\":{}},\"c\":{}}", ), @@ -310,8 +320,8 @@ fn test_write_object() { "c".to_string() => treemap![ "a".to_string() => treemap!["a" => vec![1,2,3]], "b".to_string() => treemap![], - "c".to_string() => treemap![] - ] + "c".to_string() => treemap![], + ], ], "{\"a\":{},\"b\":{},\"c\":{\"a\":{\"a\":[1,2,3]},\"b\":{},\"c\":{}}}", ), @@ -324,7 +334,7 @@ fn test_write_object() { treemap![ "a".to_string() => treemap![], "b".to_string() => treemap![], - "c".to_string() => treemap![] + "c".to_string() => treemap![], ], pretty_str!({ "a": {}, @@ -337,10 +347,10 @@ fn test_write_object() { "a".to_string() => treemap![ "a".to_string() => treemap!["a" => vec![1,2,3]], "b".to_string() => treemap![], - "c".to_string() => treemap![] + "c".to_string() => treemap![], ], "b".to_string() => treemap![], - "c".to_string() => treemap![] + "c".to_string() => treemap![], ], pretty_str!({ "a": { @@ -364,9 +374,9 @@ fn test_write_object() { "b".to_string() => treemap![ "a".to_string() => treemap!["a" => vec![1,2,3]], "b".to_string() => treemap![], - "c".to_string() => treemap![] + "c".to_string() => treemap![], ], - "c".to_string() => treemap![] + "c".to_string() => treemap![], ], pretty_str!({ "a": {}, @@ -391,8 +401,8 @@ fn test_write_object() { "c".to_string() => treemap![ "a".to_string() => treemap!["a" => vec![1,2,3]], "b".to_string() => treemap![], - "c".to_string() => treemap![] - ] + "c".to_string() => treemap![], + ], ], pretty_str!({ "a": {}, @@ -423,7 +433,7 @@ fn test_write_object() { ( treemap!( "a".to_string() => true, - "b".to_string() => false + "b".to_string() => false, ), pretty_str!( { "a": true, @@ -1192,8 +1202,8 @@ fn test_parse_object() { treemap!( "a".to_string() => treemap!( "b".to_string() => 3u64, - "c".to_string() => 4 - ) + "c".to_string() => 4, + ), ), )]); @@ -1369,7 +1379,7 @@ fn test_parse_enum() { ), treemap!( "a".to_string() => Animal::Dog, - "b".to_string() => Animal::Frog("Henry".to_string(), vec![]) + "b".to_string() => Animal::Frog("Henry".to_string(), vec![]), ), )]); } @@ -1452,7 +1462,6 @@ fn test_serialize_seq_with_no_len() { where T: ser::Serialize, { - #[inline] fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: ser::Serializer, @@ -1479,7 +1488,6 @@ fn test_serialize_seq_with_no_len() { formatter.write_str("array") } - #[inline] fn visit_unit<E>(self) -> Result<MyVec<T>, E> where E: de::Error, @@ -1487,7 +1495,6 @@ fn test_serialize_seq_with_no_len() { Ok(MyVec(Vec::new())) } - #[inline] fn visit_seq<V>(self, mut visitor: V) -> Result<MyVec<T>, V::Error> where V: de::SeqAccess<'de>, @@ -1538,7 +1545,6 @@ fn test_serialize_map_with_no_len() { K: ser::Serialize + Ord, V: ser::Serialize, { - #[inline] fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: ser::Serializer, @@ -1566,7 +1572,6 @@ fn test_serialize_map_with_no_len() { formatter.write_str("map") } - #[inline] fn visit_unit<E>(self) -> Result<MyMap<K, V>, E> where E: de::Error, @@ -1574,7 +1579,6 @@ fn test_serialize_map_with_no_len() { Ok(MyMap(BTreeMap::new())) } - #[inline] fn visit_map<Visitor>(self, mut visitor: Visitor) -> Result<MyMap<K, V>, Visitor::Error> where Visitor: de::MapAccess<'de>, @@ -1660,23 +1664,12 @@ fn test_deserialize_from_stream() { assert_eq!(request, response); } -#[test] -fn test_serialize_rejects_bool_keys() { - let map = treemap!( - true => 2, - false => 4 - ); - - let err = to_vec(&map).unwrap_err(); - assert_eq!(err.to_string(), "key must be a string"); -} - #[test] fn test_serialize_rejects_adt_keys() { let map = treemap!( Some("a") => 2, Some("b") => 4, - None => 6 + None => 6, ); let err = to_vec(&map).unwrap_err(); @@ -1890,23 +1883,41 @@ fn test_integer_key() { // map with integer keys let map = treemap!( 1 => 2, - -1 => 6 + -1 => 6, ); let j = r#"{"-1":6,"1":2}"#; test_encode_ok(&[(&map, j)]); test_parse_ok(vec![(j, map)]); - let j = r#"{"x":null}"#; - test_parse_err::<BTreeMap<i32, ()>>(&[( - j, - "invalid type: string \"x\", expected i32 at line 1 column 4", - )]); + test_parse_err::<BTreeMap<i32, ()>>(&[ + ( + r#"{"x":null}"#, + "invalid value: expected key to be a number in quotes at line 1 column 2", + ), + ( + r#"{" 123":null}"#, + "invalid value: expected key to be a number in quotes at line 1 column 2", + ), + (r#"{"123 ":null}"#, "expected `\"` at line 1 column 6"), + ]); + + let err = from_value::<BTreeMap<i32, ()>>(json!({" 123":null})).unwrap_err(); + assert_eq!( + err.to_string(), + "invalid value: expected key to be a number in quotes", + ); + + let err = from_value::<BTreeMap<i32, ()>>(json!({"123 ":null})).unwrap_err(); + assert_eq!( + err.to_string(), + "invalid value: expected key to be a number in quotes", + ); } #[test] fn test_integer128_key() { let map = treemap! { - 100000000000000000000000000000000000000u128 => () + 100000000000000000000000000000000000000u128 => (), }; let j = r#"{"100000000000000000000000000000000000000":null}"#; assert_eq!(to_string(&map).unwrap(), j); @@ -1914,23 +1925,106 @@ fn test_integer128_key() { } #[test] -fn test_deny_float_key() { - #[derive(Eq, PartialEq, Ord, PartialOrd)] +fn test_float_key() { + #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone)] struct Float; impl Serialize for Float { fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer, { - serializer.serialize_f32(1.0) + serializer.serialize_f32(1.23) + } + } + impl<'de> Deserialize<'de> for Float { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: de::Deserializer<'de>, + { + f32::deserialize(deserializer).map(|_| Float) } } // map with float key - let map = treemap!(Float => "x"); + let map = treemap!(Float => "x".to_owned()); + let j = r#"{"1.23":"x"}"#; + + test_encode_ok(&[(&map, j)]); + test_parse_ok(vec![(j, map)]); + + let j = r#"{"x": null}"#; + test_parse_err::<BTreeMap<Float, ()>>(&[( + j, + "invalid value: expected key to be a number in quotes at line 1 column 2", + )]); +} + +#[test] +fn test_deny_non_finite_f32_key() { + // We store float bits so that we can derive Ord, and other traits. In a + // real context the code might involve a crate like ordered-float. + + #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone)] + struct F32Bits(u32); + impl Serialize for F32Bits { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_f32(f32::from_bits(self.0)) + } + } + + let map = treemap!(F32Bits(f32::INFINITY.to_bits()) => "x".to_owned()); + assert!(serde_json::to_string(&map).is_err()); + assert!(serde_json::to_value(map).is_err()); + + let map = treemap!(F32Bits(f32::NEG_INFINITY.to_bits()) => "x".to_owned()); + assert!(serde_json::to_string(&map).is_err()); + assert!(serde_json::to_value(map).is_err()); + + let map = treemap!(F32Bits(f32::NAN.to_bits()) => "x".to_owned()); + assert!(serde_json::to_string(&map).is_err()); assert!(serde_json::to_value(map).is_err()); } +#[test] +fn test_deny_non_finite_f64_key() { + // We store float bits so that we can derive Ord, and other traits. In a + // real context the code might involve a crate like ordered-float. + + #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone)] + struct F64Bits(u64); + impl Serialize for F64Bits { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_f64(f64::from_bits(self.0)) + } + } + + let map = treemap!(F64Bits(f64::INFINITY.to_bits()) => "x".to_owned()); + assert!(serde_json::to_string(&map).is_err()); + assert!(serde_json::to_value(map).is_err()); + + let map = treemap!(F64Bits(f64::NEG_INFINITY.to_bits()) => "x".to_owned()); + assert!(serde_json::to_string(&map).is_err()); + assert!(serde_json::to_value(map).is_err()); + + let map = treemap!(F64Bits(f64::NAN.to_bits()) => "x".to_owned()); + assert!(serde_json::to_string(&map).is_err()); + assert!(serde_json::to_value(map).is_err()); +} + +#[test] +fn test_boolean_key() { + let map = treemap!(false => 0, true => 1); + let j = r#"{"false":0,"true":1}"#; + test_encode_ok(&[(&map, j)]); + test_parse_ok(vec![(j, map)]); +} + #[test] fn test_borrowed_key() { let map: BTreeMap<&str, ()> = from_str("{\"borrowed\":null}").unwrap(); @@ -1954,7 +2048,7 @@ fn test_effectively_string_keys() { } let map = treemap! { Enum::One => 1, - Enum::Two => 2 + Enum::Two => 2, }; let expected = r#"{"One":1,"Two":2}"#; test_encode_ok(&[(&map, expected)]); @@ -1964,7 +2058,7 @@ fn test_effectively_string_keys() { struct Wrapper(String); let map = treemap! { Wrapper("zero".to_owned()) => 0, - Wrapper("one".to_owned()) => 1 + Wrapper("one".to_owned()) => 1, }; let expected = r#"{"one":1,"zero":0}"#; test_encode_ok(&[(&map, expected)]); @@ -2385,25 +2479,27 @@ fn test_value_into_deserializer() { let mut map = BTreeMap::new(); map.insert("inner", json!({ "string": "Hello World" })); + let outer = Outer::deserialize(serde::de::value::MapDeserializer::new( + map.iter().map(|(k, v)| (*k, v)), + )) + .unwrap(); + assert_eq!(outer.inner.string, "Hello World"); + let outer = Outer::deserialize(map.into_deserializer()).unwrap(); assert_eq!(outer.inner.string, "Hello World"); } #[test] fn hash_positive_and_negative_zero() { - fn hash(obj: impl Hash) -> u64 { - let mut hasher = DefaultHasher::new(); - obj.hash(&mut hasher); - hasher.finish() - } + let rand = std::hash::RandomState::new(); let k1 = serde_json::from_str::<Number>("0.0").unwrap(); let k2 = serde_json::from_str::<Number>("-0.0").unwrap(); if cfg!(feature = "arbitrary_precision") { assert_ne!(k1, k2); - assert_ne!(hash(k1), hash(k2)); + assert_ne!(rand.hash_one(k1), rand.hash_one(k2)); } else { assert_eq!(k1, k2); - assert_eq!(hash(k1), hash(k2)); + assert_eq!(rand.hash_one(k1), rand.hash_one(k2)); } } diff --git a/vendor/serde_json/tests/ui/parse_key.stderr b/vendor/serde_json/tests/ui/parse_key.stderr index f10c218..15662dc 100644 --- a/vendor/serde_json/tests/ui/parse_key.stderr +++ b/vendor/serde_json/tests/ui/parse_key.stderr @@ -2,4 +2,4 @@ error[E0609]: no field `s` on type `&'static str` --> tests/ui/parse_key.rs:4:16 | 4 | json!({ "".s : true }); - | ^ + | ^ unknown field diff --git a/vendor/syn/.cargo-checksum.json b/vendor/syn/.cargo-checksum.json index 8b32498..e1d6c63 100644 --- a/vendor/syn/.cargo-checksum.json +++ b/vendor/syn/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"07a5542d87a02be22d8d80182280f622a65263ea468350ba6137539f7bca1a8f","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"33fd74d909172770aaf840c519f7c59ef185a4a8d21c1e5e4dcd6a398e7e1e61","benches/file.rs":"0a0527c78d849148cbb6118b4d36f72da7d4add865ba1a410e0a1be9e8dbfe0e","benches/rust.rs":"cc2f7ce7b547b746b02215c8eabeb82697bff9d54fabec70156b54f6dc6492cd","src/attr.rs":"bd5ffae18a363162f7d9c12a1b6c1d023070cbf1b060c98ebc38ef79f1de9c67","src/bigint.rs":"0299829b2f7a1a798fe2f7bc1680e4a10f9b6f4a852d09af4da2deab466c4242","src/buffer.rs":"634fed0b398163581d27b4693a481ffcef891c8e274d9b4574482a644ef9fce9","src/custom_keyword.rs":"b82199b98f67ed5c0025f5e8791b8c9a755522e54aa5ab8fbab2b01b36fdb400","src/custom_punctuation.rs":"39b38bc18553aa902a5ce842f503390c30e259b4404d5fb63d2401af7c73b527","src/data.rs":"7d217b0252a0d14b2db308ac00f48ba24a831e01a49b893f5b3ee6b580dab4cb","src/derive.rs":"3132e7f064725c7ca43f26daee93ec78037d46a935c6b0758af905cff450c15c","src/discouraged.rs":"482970b03bdee3cbc30c034f644e3293b25387db46300da5d8d8efd97dad8507","src/drops.rs":"013385f1dd95663f1afab41abc1e2eea04181998644828935ca564c74d6462ae","src/error.rs":"c75089eeb5c0a231e747cbe479e84a379841a6e8d61fd072347cfae09c8781ec","src/export.rs":"6785b6329d7bc0a5b0efbf9d28af7cdbdfe279ae9a0e21ef177b144ed6188b66","src/expr.rs":"5b90b619ec48704627adefeb437793d41148f62e94d4b1e80ad81b8ee5085a14","src/ext.rs":"3cf2d869812e5be894aa1c48bf074da262143fb2df1c9ac1b5ee965bf2a96a1c","src/file.rs":"a4d510dd0e2756bd54983dfa747601918c801e987cbf92deab44cdca6a201aeb","src/gen/clone.rs":"46540509dc99bb849014948a0c5b02ea372d5feceae5ea391c29f226f06516eb","src/gen/debug.rs":"32b2076b755f021428a0fb268a94057e1bcb1cd400feb895946703d7919b843a","src/gen/eq.rs":"aa5455b2cc0d9846d119ce001e821872df911f65133b993e3801a42e8f635f2a","src/gen/fold.rs":"45ac5b6915d5214fa1e9af84621584443f599f838ed936fa8bda3b68a9cc4b6a","src/gen/hash.rs":"4ca8239c681ea5fd7b16bb61bff9034bff09680c088f5a16e90e99013e55742f","src/gen/visit.rs":"0a10ef3a2c5cae7aed83e8ffb5da9f9c85e0fdbae82025cc411f6328bf7fda9e","src/gen/visit_mut.rs":"1f6cfa463da0f970063e70831e3ff6b07d725c77c6e20ece17c0731d90d5b4a4","src/gen_helper.rs":"750caab67ba0ba11a95ea28cd38026485227bb4aa114cdb497472386f60fdb35","src/generics.rs":"d080112c1d3084e9d701ab628cfa77881ed9398c638ba40c7e4135d9b3f1e784","src/group.rs":"fb7f24019ab612ba85f091c4edda3b2f0154f39caa18c9a139ee600afffbeefa","src/ident.rs":"711647537aee87d7249bbcdeb2cc90d146937998dd435395c85c6b18a10b5e07","src/item.rs":"6f9c8c8bd6f1a30d39e9df5e8be978c3d2d727df64c5e64fb34199f770df6a2f","src/lib.rs":"a2c3d09def47c5788759a5e2762b81521b2a90d9ecf8816d0a4165ca0729d98e","src/lifetime.rs":"531ef74507eaf942a3aedfac83bbdbc17463102a6c806f675a83a0d6dc612c52","src/lit.rs":"72214440bdfa844aa86853aec42cd6900dff47a3cab4bc8d83ad205a115c09ce","src/lookahead.rs":"376092f91a1c32e1b277db0a6790fdda151c9ec51bd971fe6a6545b5b9e73b5d","src/mac.rs":"b1cf73f34a27a8f1429125e726623a524fb5dce875eb68ead3beaffa976442c3","src/macros.rs":"4e464104c590200213635624706d83e4a0ddd5aedd826ab4aabb390000f35ae0","src/meta.rs":"43c9d06f222f5323087bb668d8b5c1cd4fdef772db1b433c9b991ea026649699","src/op.rs":"fe5db7c3373b956234ea8a1a7d129a06e5aef5db77c44c1c2fedb4aaa667ac56","src/parse.rs":"07dafec0038234eba0c15845bd85f3250f41dce6d013f49e2364666bb9732bae","src/parse_macro_input.rs":"4a753b2a6dbfefd6dc93852d66b4f6d73ebd6b8b9be74019fc476f429b9a892d","src/parse_quote.rs":"60eff4d03bf4f5977be86f49faad16d6713121f69bedd868f951bbcabf443d66","src/pat.rs":"cae5d096a31f7dfe96213f6d83a6c717ef5e2ef4a10793f4d28e2099e6ee404b","src/path.rs":"8dcedaab7ca9e9bc901fb74079e35bfca6ff9e45bc5ca75af1008c087a2c24c8","src/print.rs":"22910bf0521ab868ebd7c62601c55912d12cfb400c65723e08e5cfa3a2d111c0","src/punctuated.rs":"6c072f20c5ff0eda8916e94c415c8fd62e113faf87316be4b6e5ca64042b6b01","src/restriction.rs":"62efbc127d7e7316dd1070c0e976872de6238b2602bba1fb35df18511b4e7199","src/sealed.rs":"6ece3b3dcb30f6bb98b93d83759ca7712ee8592bef9c0511141039c38765db0e","src/span.rs":"4c13579eaf94803bcdb98696e4c3e26fd5cfb7ad46e5a727ed087e5935530a59","src/spanned.rs":"311f4ca8ab9d436df8861a8ea3411d8eff0920354457e124ac85d0579c074981","src/stmt.rs":"acd8ad6406a8e0c11de789f4907d127bdbe8fdf2be68de957298905492ec195c","src/thread.rs":"32f1d8a9890a15920bb939e51647a6630c0661c3fae282834394e4437b8aa5df","src/token.rs":"8b0b4535972fb7b3640e27cb54f80d0e61f27334f2c4c2226c6bae7958299527","src/tt.rs":"32490509abcc4a5a3c7eb5628337172b3b49d30697d2f7b7df4d8045255c13da","src/ty.rs":"6b0185102966685329c1797c6e6bbac47ffe91cb8d68218f454443ba5d252206","src/verbatim.rs":"8d2a42a0aad2a5e69d9b32ba7fb3564fce003fe0862dbc01e106f15d951f3060","src/whitespace.rs":"718a80c12cdd145358e2690f0f68ff7779a91ec17ce9fde9bb755f635fce69ad","tests/common/eq.rs":"d130722a0fe5379c34f163cd7c46678ad902e39be7abaa552155609d22128713","tests/common/mod.rs":"432ad35577f836a20b517d8c26ed994ac25fe73ef2f461c67688b61b99762015","tests/common/parse.rs":"246ddf1d303a9dbbc380e8d0689bd851cef3c3146d09d2627175deb9203b003d","tests/debug/gen.rs":"0b689be01a4f4a0d168617b0f867f248a9e3d211e259926e6ec6c10a59776d81","tests/debug/mod.rs":"dd87563bbd359401790a9c4185178539929ff9fa35a6998657af82a85731fe4c","tests/macros/mod.rs":"aff805b35cfd55aef6a1359ff747e4023afcb08d69d86aff4c19465d29dda088","tests/regression.rs":"e9565ea0efecb4136f099164ffcfa26e1996b0a27fb9c6659e90ad9bdd42e7b6","tests/regression/issue1108.rs":"f32db35244a674e22ff824ca9e5bbec2184e287b59f022db68c418b5878a2edc","tests/regression/issue1235.rs":"a2266b10c3f7c7af5734817ab0a3e8b309b51e7d177b63f26e67e6b744d280b0","tests/repo/mod.rs":"c624f94ac3238a4231dd884daf330979ccd600b2169cc76ddd2306aeebfae8d9","tests/repo/progress.rs":"c08d0314a7f3ecf760d471f27da3cd2a500aeb9f1c8331bffb2aa648f9fabf3f","tests/test_asyncness.rs":"3868181f25f7470476077f80a442a7804b6b9b371ad5917f4fd18b1002714c64","tests/test_attribute.rs":"b35550a43bbd187bb330997ba36f90c65d8fc489135b1d32ef4547f145cb7612","tests/test_derive_input.rs":"c215245c4d09052661ac5b65b34e950ea47622847bdffe648d380470f12db8f2","tests/test_expr.rs":"1d8688c51d4e8dd5a288722ec8c074320081756fcc83812f23109dffe0caddbf","tests/test_generics.rs":"b77741aa38e6ac7e1a9082faf168e7b7b92fbabf9f3fd07306676339a67394df","tests/test_grouping.rs":"ecbe3324878b2e2be42640a3dec198620cff18731fcb95ee7e94eacd11d2fec1","tests/test_ident.rs":"9eb53d1e21edf23e7c9e14dc74dcc2b2538e9221e19dbcc0a44e3acc2e90f3f6","tests/test_item.rs":"7f0255b61d0a6921313c09aaba470beefc55f1d4e66d1e24cfac7a3f63b035d8","tests/test_iterators.rs":"f4dacb5f3a8e0473dfb0d27f05270d41e79eddb4759b1fad3e88e379b4731e17","tests/test_lit.rs":"7297fed48ca248689f112f67b6f024f2f2784e29c6cd33185ac659c350834b01","tests/test_meta.rs":"3e1bb60b4bd56adb1e04b0e2d867404f0d81f7bf69caf7d8a70fc7090e079e84","tests/test_parse_buffer.rs":"3ed83ea2e50f84b80c0b543aac4bfbd379610d0911c0baa1eb94bb925bda7341","tests/test_parse_stream.rs":"a7e186272c89a239cae03053b5a039cdc073cdb46fad64b178fe76fde98405d5","tests/test_pat.rs":"fe94e084ee478d41cccea4eeb3e975386a70d36ff7cbb902ba0c767d536aab6e","tests/test_path.rs":"0033e1082b576bb3217ebd4546423d6f86fde7ee7ba3aba8c57bf137d2b42f47","tests/test_precedence.rs":"1395b213a1aa953a3b2eacc922853f8d0e3afba552325440bfbe4df6b62102a1","tests/test_receiver.rs":"af64117acd66fbf42edc476f731ecd20c88009d9cb641dbd7a1d6384ae99ae73","tests/test_round_trip.rs":"b9f133540847a04e80f6f5264290633ebdd00d058a7b1a626929966786ffbe98","tests/test_shebang.rs":"06d3acabed004767d8b3a0389bde7485a6719cad6a0d0b4ac2c7439b03586651","tests/test_should_parse.rs":"1d3535698a446e2755bfc360676bdb161841a1f454cdef6e7556c6d06a95c89d","tests/test_size.rs":"a4db2760e9438ef1a26d70238c2b5044de02878403fd424b4c9913b5ee45e004","tests/test_stmt.rs":"42a3707056da0ce3a01f8fb13e8b7631f9be6066627ac376e1874742419ad2cc","tests/test_token_trees.rs":"d012da9c3c861073711b006bf6ffdc073821fb9fb0a08733628cdae57124d1f5","tests/test_ty.rs":"f7f21f76e9e798669f09a95c380e26ae5959ee8ac5f3b765b1a799cc9505d075","tests/test_visibility.rs":"cf4c93997cd88821ef7f8e2dd5d1586175cce4614407cd3bdf371ecc4d8abc44","tests/zzz_stable.rs":"2a862e59cb446235ed99aec0e6ada8e16d3ecc30229b29d825b7c0bbc2602989"},"package":"a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822"} \ No newline at end of file +{"files":{"Cargo.toml":"096d9ca68883d01d86e64db90df5591fad182fa4a2e48fab75747164910bc451","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","README.md":"f6904878f9082d7d267b6d0d737ef211ff165cfd039a4d45ad88e9861f3e217f","benches/file.rs":"0a0527c78d849148cbb6118b4d36f72da7d4add865ba1a410e0a1be9e8dbfe0e","benches/rust.rs":"02585b90de2404241ed167f34d1d21da950ef48f1e3d7f718f4e01a5e3795e14","src/attr.rs":"7dc328025f9def00f634b1de958a93ac2b2f3fdfb11411600861c556bc821262","src/bigint.rs":"0299829b2f7a1a798fe2f7bc1680e4a10f9b6f4a852d09af4da2deab466c4242","src/buffer.rs":"30a9fffaf1b7a4fd8ca7eed5a9cde7a13d75d0b7b92569975bee1e7083d2555e","src/custom_keyword.rs":"b2e25cbe73d237b459140a38083ee21e4836ea444859b033f400652414630290","src/custom_punctuation.rs":"c2c344c9cdbebc66a5833eb0f9ded817ce9807dbb75b616283b2b60960d1ffb0","src/data.rs":"921eecc6a9909004019922c2f8fe217f45f75556c8d7e59d731c43b825330539","src/derive.rs":"8bb147e2e4490ccd891d71c5889904fb2718a2204d4597e337b3a8926f0fd52c","src/discouraged.rs":"c9b99a0447514d293a18cd5f61bdb257700c467cb74a60dd172fda9991bc8582","src/drops.rs":"013385f1dd95663f1afab41abc1e2eea04181998644828935ca564c74d6462ae","src/error.rs":"f0d80891d42459ae9d490f2c5a749b501f3d06acc6ae2429c3558fd5dea38d0d","src/export.rs":"b260cc49da1da3489e7755832bc8015cfad79e84f6c74e237f65ae25a2385e56","src/expr.rs":"ce965ff999c27eb1b779d3ee17e19fe1b3648ce8c3ed112ef9b8b87fb74396c9","src/ext.rs":"ed143b029af286e62ceb4310286a4ce894792dd588465face042b4199b39d329","src/file.rs":"b608c101bab989ede6dcf1fc5d4b5b9a710c2c324f0e27a47dab1d5aff598e1d","src/gen/clone.rs":"0f452c91a02d94a8b093226f438b39116025e49ed318d9e7c4ef20649039839d","src/gen/debug.rs":"e76f1b4ac8d81160f88e2b052dc30ecb0be2c0fe910751fe9aa43664dcec9670","src/gen/eq.rs":"1cf713c2f0cf3995b3fad5a1bc82d25370aeeb2409699bbf6a20b9773fcc8b81","src/gen/fold.rs":"c7b2265eb17dc3a1ba48d8245077fa0155e66dc51d368355511af0391e655034","src/gen/hash.rs":"e9e9533c6862373826835229521343cdf81ae4107f40b35ca47138a31d5b5e0a","src/gen/visit.rs":"d0407584b46d39f3c914b5732ff9643727a18c4b397d7df7dd378985be3ab333","src/gen/visit_mut.rs":"cb298bbd475f18f71cdf1156a43d173a1da642c39035c8b8d813e3a4b2246d04","src/gen_helper.rs":"750caab67ba0ba11a95ea28cd38026485227bb4aa114cdb497472386f60fdb35","src/generics.rs":"628f5d7157106db018b6f901e6c85b5c40a9c4b3cdf2bb4787691d5f570f6bc0","src/group.rs":"f5911e9cf2dc2dffab546590167c48de30409cb7708aa3307d22be143df720e4","src/ident.rs":"f93f74d04776e749607e944e30ce4396ec146dce41c64710f4cd7f323b75dca4","src/item.rs":"f5ca9a2de9a4d1dbe638f671870a4feca6590dce279b34457069383dce8d4c3a","src/lib.rs":"9322106e691792b51e601bb8f1a6a4bda17e4988a1259d64bf1d83d62ce66967","src/lifetime.rs":"f77ac2f4ee9205c71db9c6b63e1b90eac4b5ecfa330c553b7bbbf5400a65150b","src/lit.rs":"cb30dfbeda8ee8d9859c46759ad7837a14ded79d522be23137111522a4e5e39f","src/lookahead.rs":"376092f91a1c32e1b277db0a6790fdda151c9ec51bd971fe6a6545b5b9e73b5d","src/mac.rs":"deacf75bb99af8ea0d6a5ca54504b75b8cf9d814ad7d4148bb5d533c7f1e1aed","src/macros.rs":"e68e246752d67307f9afb5ada6c1d689ddc0b0d9a51d40ba6191427d1e1b8bdc","src/meta.rs":"969d8ccbdbc6ea2e4928a21831b791c57447b231e1373149e4c63b46f3951801","src/op.rs":"410a5cbc596afa81cfdf66c65ba2e21be7105b819a2a705dc0c0e6363ff1a715","src/parse.rs":"3da4e04afb84f9c955d90cfd47e987b11add203bee53dfb53c5af593e6fe4bf6","src/parse_macro_input.rs":"4a753b2a6dbfefd6dc93852d66b4f6d73ebd6b8b9be74019fc476f429b9a892d","src/parse_quote.rs":"c9065c39465543f91828d207d73354372a5aeda1d8d76c6ce47ddb7c33677c66","src/pat.rs":"d1fb776758e2fd74b3ef14d205d28c62e4bfd4279b37b7ca30af2683b51f6045","src/path.rs":"c5a496bb7911685cbf14c8a4526f1821139409779b8d3145f6c7c67ebb067ba2","src/print.rs":"22910bf0521ab868ebd7c62601c55912d12cfb400c65723e08e5cfa3a2d111c0","src/punctuated.rs":"e7e148c5d52713a6bf3f2057c251836dabd78feeee1f533c0baf7a1a83ee5806","src/restriction.rs":"e3bfe3c43083640224557f2130a8ea543ad929df66cb511d5ad661f534e3ab6b","src/sealed.rs":"6ece3b3dcb30f6bb98b93d83759ca7712ee8592bef9c0511141039c38765db0e","src/span.rs":"0a48e375e5c9768f6f64174a91ba6a255f4b021e2fb3548d8494e617f142601b","src/spanned.rs":"4b9bd65f60ab81922adfd0be8f03b6d50e98da3a5f525f242f9639aec4beac79","src/stmt.rs":"a4e7e9bea8b0d972499b92c2d19fa3f34e2fe4c32ed77198b2b88a30d1e445e0","src/thread.rs":"1f1deb1272525ab2af9a36aac4bce8f65b0e315adb1656641fd7075662f49222","src/token.rs":"e6b2373fb0465158d267bd25eac16266bd4b492d7dbe8ae4b5405a841a1ac57d","src/tt.rs":"32490509abcc4a5a3c7eb5628337172b3b49d30697d2f7b7df4d8045255c13da","src/ty.rs":"e5f754d037bfc32a99dc5bddbba05c6b405e3b2655d95fdcab05d6d8cf17612d","src/verbatim.rs":"87cbe82a90f48efb57ffd09141042698b3e011a21d0d5412154d80324b0a5ef0","src/whitespace.rs":"718a80c12cdd145358e2690f0f68ff7779a91ec17ce9fde9bb755f635fce69ad","tests/common/eq.rs":"396f03496a2c648bb2700a390cb32b11c74615235b449f5a2a6185fff04e2927","tests/common/mod.rs":"432ad35577f836a20b517d8c26ed994ac25fe73ef2f461c67688b61b99762015","tests/common/parse.rs":"a69236cfb2f73bb83bddd4c20a4130a73d1d8b74679eda3272bc5098a5b4d534","tests/debug/gen.rs":"daa5be1f3dc2379d5067137e0106874e0f3ed5eba7f2a8436fdada0bf33d7186","tests/debug/mod.rs":"b56136586267ae1812a937b69215dd053ada2c21717771d89dcd3ce52bcb27f5","tests/macros/mod.rs":"64b0da858096e7cf0f772e66bc1787a867e45897d7677de580c0a1f35c0f6852","tests/regression.rs":"e9565ea0efecb4136f099164ffcfa26e1996b0a27fb9c6659e90ad9bdd42e7b6","tests/regression/issue1108.rs":"f32db35244a674e22ff824ca9e5bbec2184e287b59f022db68c418b5878a2edc","tests/regression/issue1235.rs":"a2266b10c3f7c7af5734817ab0a3e8b309b51e7d177b63f26e67e6b744d280b0","tests/repo/mod.rs":"d92f0f9afa0613e95ab8ed1e7ec9b4d863b6eff5b790371ed8eac86f44902bf1","tests/repo/progress.rs":"c08d0314a7f3ecf760d471f27da3cd2a500aeb9f1c8331bffb2aa648f9fabf3f","tests/test_asyncness.rs":"8982f6bc4e36510f924e288247473403e72697389ce9dda4e4b5ab0a8e49259f","tests/test_attribute.rs":"b35550a43bbd187bb330997ba36f90c65d8fc489135b1d32ef4547f145cb7612","tests/test_derive_input.rs":"99c4e6e45e3322ea9e269b309059c8a00fda1dcc03aed41f6e7d8c7e0a72fa2b","tests/test_expr.rs":"f7726efc959b860aa8aca6c51bc10c466cb1957774c1dfac24c3907cd8ea99a6","tests/test_generics.rs":"2fcc8575d695b568f3724b3b33d853b8fa6d9864eb816b5e3ca82420682e6155","tests/test_grouping.rs":"ecbe3324878b2e2be42640a3dec198620cff18731fcb95ee7e94eacd11d2fec1","tests/test_ident.rs":"9eb53d1e21edf23e7c9e14dc74dcc2b2538e9221e19dbcc0a44e3acc2e90f3f6","tests/test_item.rs":"9398997f2be33c89de52eb40f8c2fce86cf4ce5810fe709d2f20916ed6e2bb47","tests/test_iterators.rs":"f4dacb5f3a8e0473dfb0d27f05270d41e79eddb4759b1fad3e88e379b4731e17","tests/test_lit.rs":"8e30c2d7837673a742d77aef01212788bbd099182dd5c1d10ee474cfeb786c39","tests/test_meta.rs":"70fd75b42d1d913f05825c9e8280a4802e81de0b2343ad876850d2b7c588f0bf","tests/test_parse_buffer.rs":"92f5e898c1a6625011497f8cc8684eac0311850566ae9ab1848444305c9bdddf","tests/test_parse_quote.rs":"5bb7ec6773c3b878b3abedf17952948e707d8990b7e131605ee03d31f9ecae5b","tests/test_parse_stream.rs":"91a7ec997ea67d3c9d3028495345d89f2f67eb01bf11af3f99a1cef42a41aa05","tests/test_pat.rs":"8467fbef7cba36e6ce105cbc1a038b13ec154505bd34c863a18cfdfeac02c0b1","tests/test_path.rs":"b202244f034e58bf17e4c39bef696b0567e0ed42a63427ed4866acd14aaa90df","tests/test_precedence.rs":"f8b57c4a10ad249d3c6984cbe5db56a0c1e1381900dbc837fdc6f0e7f19221a7","tests/test_receiver.rs":"af64117acd66fbf42edc476f731ecd20c88009d9cb641dbd7a1d6384ae99ae73","tests/test_round_trip.rs":"23081225e68451398402d261c715cae897c84bcf0ca251f685a0603ba740a0da","tests/test_shebang.rs":"98e8a6690c04e0aad2893b747593620b51836fe704f50f5c6fe352609837138a","tests/test_should_parse.rs":"1d3535698a446e2755bfc360676bdb161841a1f454cdef6e7556c6d06a95c89d","tests/test_size.rs":"b446868d55ae820196aad03dba3aa3529b727ac0465113c6fc66706423a73350","tests/test_stmt.rs":"761946f7d020f37dcc9f3a6c4b17c8d26c30d609193ac13c0672a2833b80f6dc","tests/test_token_trees.rs":"d012da9c3c861073711b006bf6ffdc073821fb9fb0a08733628cdae57124d1f5","tests/test_ty.rs":"e0262eb0c65c06a5bd8781cf0e256d7f093182202e8160623f00de98594845d2","tests/test_visibility.rs":"cf4c93997cd88821ef7f8e2dd5d1586175cce4614407cd3bdf371ecc4d8abc44","tests/zzz_stable.rs":"2a862e59cb446235ed99aec0e6ada8e16d3ecc30229b29d825b7c0bbc2602989"},"package":"6ab617d94515e94ae53b8406c628598680aa0c9587474ecbe58188f7b345d66c"} \ No newline at end of file diff --git a/vendor/syn/Cargo.toml b/vendor/syn/Cargo.toml index 6674872..8ff7618 100644 --- a/vendor/syn/Cargo.toml +++ b/vendor/syn/Cargo.toml @@ -13,7 +13,7 @@ edition = "2021" rust-version = "1.56" name = "syn" -version = "2.0.15" +version = "2.0.51" authors = ["David Tolnay <dtolnay@gmail.com>"] include = [ "/benches/**", @@ -43,6 +43,7 @@ all-features = true rustdoc-args = [ "--cfg", "doc_cfg", + "--generate-link-to-definition", ] targets = ["x86_64-unknown-linux-gnu"] @@ -74,11 +75,11 @@ required-features = [ ] [dependencies.proc-macro2] -version = "1.0.55" +version = "1.0.75" default-features = false [dependencies.quote] -version = "1.0.25" +version = "1.0.35" optional = true default-features = false @@ -103,9 +104,6 @@ version = "1" [dev-dependencies.ref-cast] version = "1" -[dev-dependencies.regex] -version = "1" - [dev-dependencies.reqwest] version = "0.11" features = ["blocking"] diff --git a/vendor/syn/README.md b/vendor/syn/README.md index 24aea17..e8d99ab 100644 --- a/vendor/syn/README.md +++ b/vendor/syn/README.md @@ -39,12 +39,12 @@ contains some APIs that may be useful more generally. procedural macros enable only what they need, and do not pay in compile time for all the rest. -[`syn::File`]: https://docs.rs/syn/1.0/syn/struct.File.html -[`syn::Item`]: https://docs.rs/syn/1.0/syn/enum.Item.html -[`syn::Expr`]: https://docs.rs/syn/1.0/syn/enum.Expr.html -[`syn::Type`]: https://docs.rs/syn/1.0/syn/enum.Type.html -[`syn::DeriveInput`]: https://docs.rs/syn/1.0/syn/struct.DeriveInput.html -[parser functions]: https://docs.rs/syn/1.0/syn/parse/index.html +[`syn::File`]: https://docs.rs/syn/2.0/syn/struct.File.html +[`syn::Item`]: https://docs.rs/syn/2.0/syn/enum.Item.html +[`syn::Expr`]: https://docs.rs/syn/2.0/syn/enum.Expr.html +[`syn::Type`]: https://docs.rs/syn/2.0/syn/enum.Type.html +[`syn::DeriveInput`]: https://docs.rs/syn/2.0/syn/struct.DeriveInput.html +[parser functions]: https://docs.rs/syn/2.0/syn/parse/index.html *Version requirement: Syn supports rustc 1.56 and up.* diff --git a/vendor/syn/benches/rust.rs b/vendor/syn/benches/rust.rs index ce6cfde..2f33708 100644 --- a/vendor/syn/benches/rust.rs +++ b/vendor/syn/benches/rust.rs @@ -6,6 +6,7 @@ #![cfg_attr(not(syn_only), feature(rustc_private))] #![recursion_limit = "1024"] #![allow( + clippy::arc_with_non_send_sync, clippy::cast_lossless, clippy::let_underscore_untyped, clippy::manual_let_else, @@ -53,7 +54,7 @@ mod librustc_parse { use rustc_data_structures::sync::Lrc; use rustc_error_messages::FluentBundle; - use rustc_errors::{emitter::Emitter, translation::Translate, Diagnostic, Handler}; + use rustc_errors::{emitter::Emitter, translation::Translate, DiagCtxt, Diagnostic}; use rustc_session::parse::ParseSess; use rustc_span::source_map::{FilePathMapping, SourceMap}; use rustc_span::{edition::Edition, FileName}; @@ -62,7 +63,7 @@ mod librustc_parse { struct SilentEmitter; impl Emitter for SilentEmitter { - fn emit_diagnostic(&mut self, _diag: &Diagnostic) {} + fn emit_diagnostic(&mut self, _diag: Diagnostic) {} fn source_map(&self) -> Option<&Lrc<SourceMap>> { None } @@ -78,10 +79,10 @@ mod librustc_parse { } rustc_span::create_session_if_not_set_then(Edition::Edition2018, |_| { - let cm = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let source_map = Lrc::new(SourceMap::new(FilePathMapping::empty())); let emitter = Box::new(SilentEmitter); - let handler = Handler::with_emitter(false, None, emitter); - let sess = ParseSess::with_span_handler(handler, cm); + let handler = DiagCtxt::with_emitter(emitter); + let sess = ParseSess::with_dcx(handler, source_map); if let Err(diagnostic) = rustc_parse::parse_crate_from_source_str( FileName::Custom("bench".to_owned()), content.to_owned(), diff --git a/vendor/syn/src/attr.rs b/vendor/syn/src/attr.rs index 34d5515..589e427 100644 --- a/vendor/syn/src/attr.rs +++ b/vendor/syn/src/attr.rs @@ -1,12 +1,20 @@ -use super::*; -use proc_macro2::TokenStream; -use std::iter; -use std::slice; - +#[cfg(feature = "parsing")] +use crate::error::Error; +#[cfg(feature = "parsing")] +use crate::error::Result; +use crate::expr::Expr; +use crate::mac::MacroDelimiter; #[cfg(feature = "parsing")] use crate::meta::{self, ParseNestedMeta}; #[cfg(feature = "parsing")] -use crate::parse::{Parse, ParseStream, Parser, Result}; +use crate::parse::{Parse, ParseStream, Parser}; +use crate::path::Path; +use crate::token; +use proc_macro2::TokenStream; +#[cfg(feature = "printing")] +use std::iter; +#[cfg(feature = "printing")] +use std::slice; ast_struct! { /// An attribute, like `#[repr(transparent)]`. @@ -77,9 +85,9 @@ ast_struct! { /// [`Attribute::parse_outer`] or [`Attribute::parse_inner`] depending on /// which you intend to parse. /// - /// [`Parse`]: parse::Parse - /// [`ParseStream::parse`]: parse::ParseBuffer::parse - /// [`ParseStream::call`]: parse::ParseBuffer::call + /// [`Parse`]: crate::parse::Parse + /// [`ParseStream::parse`]: crate::parse::ParseBuffer::parse + /// [`ParseStream::call`]: crate::parse::ParseBuffer::call /// /// ``` /// use syn::{Attribute, Ident, Result, Token}; @@ -460,7 +468,7 @@ ast_enum_of_structs! { /// /// This type is a [syntax tree enum]. /// - /// [syntax tree enum]: Expr#syntax-tree-enums + /// [syntax tree enum]: crate::expr::Expr#syntax-tree-enums #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] pub enum Meta { Path(Path), @@ -582,13 +590,16 @@ impl MetaList { } } +#[cfg(feature = "printing")] pub(crate) trait FilterAttrs<'a> { type Ret: Iterator<Item = &'a Attribute>; fn outer(self) -> Self::Ret; + #[cfg(feature = "full")] fn inner(self) -> Self::Ret; } +#[cfg(feature = "printing")] impl<'a> FilterAttrs<'a> for &'a [Attribute] { type Ret = iter::Filter<slice::Iter<'a, Attribute>, fn(&&Attribute) -> bool>; @@ -602,6 +613,7 @@ impl<'a> FilterAttrs<'a> for &'a [Attribute] { self.iter().filter(is_outer) } + #[cfg(feature = "full")] fn inner(self) -> Self::Ret { fn is_inner(attr: &&Attribute) -> bool { match attr.style { @@ -615,14 +627,19 @@ impl<'a> FilterAttrs<'a> for &'a [Attribute] { #[cfg(feature = "parsing")] pub(crate) mod parsing { - use super::*; - use crate::parse::discouraged::Speculative; - use crate::parse::{Parse, ParseStream, Result}; + use crate::attr::{AttrStyle, Attribute, Meta, MetaList, MetaNameValue}; + use crate::error::Result; + use crate::expr::{Expr, ExprLit}; + use crate::lit::Lit; + use crate::parse::discouraged::Speculative as _; + use crate::parse::{Parse, ParseStream}; + use crate::path::Path; + use crate::{mac, token}; use std::fmt::{self, Display}; pub(crate) fn parse_inner(input: ParseStream, attrs: &mut Vec<Attribute>) -> Result<()> { while input.peek(Token![#]) && input.peek2(Token![!]) { - attrs.push(input.call(parsing::single_parse_inner)?); + attrs.push(input.call(single_parse_inner)?); } Ok(()) } @@ -740,7 +757,7 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] mod printing { - use super::*; + use crate::attr::{AttrStyle, Attribute, MetaList, MetaNameValue}; use proc_macro2::TokenStream; use quote::ToTokens; diff --git a/vendor/syn/src/buffer.rs b/vendor/syn/src/buffer.rs index e16f2ad..86dec46 100644 --- a/vendor/syn/src/buffer.rs +++ b/vendor/syn/src/buffer.rs @@ -5,11 +5,6 @@ // Syn, and caution should be used when editing it. The public-facing interface // is 100% safe but the implementation is fragile internally. -#[cfg(all( - not(all(target_arch = "wasm32", any(target_os = "unknown", target_os = "wasi"))), - feature = "proc-macro" -))] -use crate::proc_macro as pm; use crate::Lifetime; use proc_macro2::extra::DelimSpan; use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree}; @@ -60,12 +55,9 @@ impl TokenBuffer { /// Creates a `TokenBuffer` containing all the tokens from the input /// `proc_macro::TokenStream`. - #[cfg(all( - not(all(target_arch = "wasm32", any(target_os = "unknown", target_os = "wasi"))), - feature = "proc-macro" - ))] + #[cfg(feature = "proc-macro")] #[cfg_attr(doc_cfg, doc(cfg(feature = "proc-macro")))] - pub fn new(stream: pm::TokenStream) -> Self { + pub fn new(stream: proc_macro::TokenStream) -> Self { Self::new2(stream.into()) } @@ -136,11 +128,11 @@ impl<'a> Cursor<'a> { // past it, unless `ptr == scope`, which means that we're at the edge of // our cursor's scope. We should only have `ptr != scope` at the exit // from None-delimited groups entered with `ignore_none`. - while let Entry::End(_) = *ptr { + while let Entry::End(_) = unsafe { &*ptr } { if ptr == scope { break; } - ptr = ptr.add(1); + ptr = unsafe { ptr.add(1) }; } Cursor { @@ -162,7 +154,7 @@ impl<'a> Cursor<'a> { /// If the cursor is looking at an `Entry::Group`, the bumped cursor will /// point at the first token in the group (with the same scope end). unsafe fn bump_ignore_group(self) -> Cursor<'a> { - Cursor::create(self.ptr.offset(1), self.scope) + unsafe { Cursor::create(self.ptr.offset(1), self.scope) } } /// While the cursor is looking at a `None`-delimited group, move it to look @@ -397,7 +389,7 @@ impl<'a> PartialEq for Cursor<'a> { impl<'a> PartialOrd for Cursor<'a> { fn partial_cmp(&self, other: &Self) -> Option<Ordering> { if same_buffer(*self, *other) { - Some(self.ptr.cmp(&other.ptr)) + Some(cmp_assuming_same_buffer(*self, *other)) } else { None } @@ -421,7 +413,6 @@ fn start_of_buffer(cursor: Cursor) -> *const Entry { } } -#[cfg(any(feature = "full", feature = "derive"))] pub(crate) fn cmp_assuming_same_buffer(a: Cursor, b: Cursor) -> Ordering { a.ptr.cmp(&b.ptr) } diff --git a/vendor/syn/src/custom_keyword.rs b/vendor/syn/src/custom_keyword.rs index 379d159..6ce23db 100644 --- a/vendor/syn/src/custom_keyword.rs +++ b/vendor/syn/src/custom_keyword.rs @@ -128,7 +128,7 @@ macro_rules! custom_keyword { macro_rules! impl_parse_for_custom_keyword { ($ident:ident) => { // For peek. - impl $crate::token::CustomToken for $ident { + impl $crate::__private::CustomToken for $ident { fn peek(cursor: $crate::buffer::Cursor) -> $crate::__private::bool { if let $crate::__private::Some((ident, _rest)) = cursor.ident() { ident == $crate::__private::stringify!($ident) @@ -224,7 +224,7 @@ macro_rules! impl_clone_for_custom_keyword { macro_rules! impl_extra_traits_for_custom_keyword { ($ident:ident) => { impl $crate::__private::Debug for $ident { - fn fmt(&self, f: &mut $crate::__private::Formatter) -> $crate::__private::fmt::Result { + fn fmt(&self, f: &mut $crate::__private::Formatter) -> $crate::__private::FmtResult { $crate::__private::Formatter::write_str( f, $crate::__private::concat!( diff --git a/vendor/syn/src/custom_punctuation.rs b/vendor/syn/src/custom_punctuation.rs index e8cbcd2..1b2c768 100644 --- a/vendor/syn/src/custom_punctuation.rs +++ b/vendor/syn/src/custom_punctuation.rs @@ -113,8 +113,8 @@ macro_rules! custom_punctuation { #[macro_export] macro_rules! impl_parse_for_custom_punctuation { ($ident:ident, $($tt:tt)+) => { - impl $crate::token::CustomToken for $ident { - fn peek(cursor: $crate::buffer::Cursor) -> bool { + impl $crate::__private::CustomToken for $ident { + fn peek(cursor: $crate::buffer::Cursor) -> $crate::__private::bool { $crate::__private::peek_punct(cursor, $crate::stringify_punct!($($tt)+)) } @@ -195,7 +195,7 @@ macro_rules! impl_clone_for_custom_punctuation { macro_rules! impl_extra_traits_for_custom_punctuation { ($ident:ident, $($tt:tt)+) => { impl $crate::__private::Debug for $ident { - fn fmt(&self, f: &mut $crate::__private::Formatter) -> $crate::__private::fmt::Result { + fn fmt(&self, f: &mut $crate::__private::Formatter) -> $crate::__private::FmtResult { $crate::__private::Formatter::write_str(f, $crate::__private::stringify!($ident)) } } diff --git a/vendor/syn/src/data.rs b/vendor/syn/src/data.rs index 185f88b..9fb97e4 100644 --- a/vendor/syn/src/data.rs +++ b/vendor/syn/src/data.rs @@ -1,5 +1,10 @@ -use super::*; -use crate::punctuated::Punctuated; +use crate::attr::Attribute; +use crate::expr::Expr; +use crate::ident::Ident; +use crate::punctuated::{self, Punctuated}; +use crate::restriction::{FieldMutability, Visibility}; +use crate::token; +use crate::ty::Type; ast_struct! { /// An enum variant. @@ -25,7 +30,7 @@ ast_enum_of_structs! { /// /// This type is a [syntax tree enum]. /// - /// [syntax tree enum]: Expr#syntax-tree-enums + /// [syntax tree enum]: crate::expr::Expr#syntax-tree-enums #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] pub enum Fields { /// Named fields of a struct or struct variant such as `Point { x: f64, @@ -155,9 +160,19 @@ ast_struct! { #[cfg(feature = "parsing")] pub(crate) mod parsing { - use super::*; - use crate::ext::IdentExt; - use crate::parse::{Parse, ParseStream, Result}; + use crate::attr::Attribute; + use crate::data::{Field, Fields, FieldsNamed, FieldsUnnamed, Variant}; + use crate::error::Result; + use crate::expr::Expr; + use crate::ext::IdentExt as _; + use crate::ident::Ident; + #[cfg(not(feature = "full"))] + use crate::parse::discouraged::Speculative as _; + use crate::parse::{Parse, ParseStream}; + use crate::restriction::{FieldMutability, Visibility}; + use crate::token; + use crate::ty::Type; + use crate::verbatim; #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for Variant { @@ -174,7 +189,20 @@ pub(crate) mod parsing { }; let discriminant = if input.peek(Token![=]) { let eq_token: Token![=] = input.parse()?; + #[cfg(feature = "full")] let discriminant: Expr = input.parse()?; + #[cfg(not(feature = "full"))] + let discriminant = { + let begin = input.fork(); + let ahead = input.fork(); + let mut discriminant: Result<Expr> = ahead.parse(); + if discriminant.is_ok() { + input.advance_to(&ahead); + } else if scan_lenient_discriminant(input).is_ok() { + discriminant = Ok(Expr::Verbatim(verbatim::between(&begin, input))); + } + discriminant? + }; Some((eq_token, discriminant)) } else { None @@ -188,6 +216,85 @@ pub(crate) mod parsing { } } + #[cfg(not(feature = "full"))] + pub(crate) fn scan_lenient_discriminant(input: ParseStream) -> Result<()> { + use crate::expr::Member; + use crate::lifetime::Lifetime; + use crate::lit::Lit; + use crate::lit::LitFloat; + use crate::op::{BinOp, UnOp}; + use crate::path::{self, AngleBracketedGenericArguments}; + use proc_macro2::Delimiter::{self, Brace, Bracket, Parenthesis}; + + let consume = |delimiter: Delimiter| { + Result::unwrap(input.step(|cursor| match cursor.group(delimiter) { + Some((_inside, _span, rest)) => Ok((true, rest)), + None => Ok((false, *cursor)), + })) + }; + + macro_rules! consume { + [$token:tt] => { + input.parse::<Option<Token![$token]>>().unwrap().is_some() + }; + } + + let mut initial = true; + let mut depth = 0usize; + loop { + if initial { + if consume![&] { + input.parse::<Option<Token![mut]>>()?; + } else if consume![if] || consume![match] || consume![while] { + depth += 1; + } else if input.parse::<Option<Lit>>()?.is_some() + || (consume(Brace) || consume(Bracket) || consume(Parenthesis)) + || (consume![async] || consume![const] || consume![loop] || consume![unsafe]) + && (consume(Brace) || break) + { + initial = false; + } else if consume![let] { + while !consume![=] { + if !((consume![|] || consume![ref] || consume![mut] || consume![@]) + || (consume![!] || input.parse::<Option<Lit>>()?.is_some()) + || (consume![..=] || consume![..] || consume![&] || consume![_]) + || (consume(Brace) || consume(Bracket) || consume(Parenthesis))) + { + path::parsing::qpath(input, true)?; + } + } + } else if input.parse::<Option<Lifetime>>()?.is_some() && !consume![:] { + break; + } else if input.parse::<UnOp>().is_err() { + path::parsing::qpath(input, true)?; + initial = consume![!] || depth == 0 && input.peek(token::Brace); + } + } else if input.is_empty() || input.peek(Token![,]) { + return Ok(()); + } else if depth > 0 && consume(Brace) { + if consume![else] && !consume(Brace) { + initial = consume![if] || break; + } else { + depth -= 1; + } + } else if input.parse::<BinOp>().is_ok() || (consume![..] | consume![=]) { + initial = true; + } else if consume![.] { + if input.parse::<Option<LitFloat>>()?.is_none() + && (input.parse::<Member>()?.is_named() && consume![::]) + { + AngleBracketedGenericArguments::do_parse(None, input)?; + } + } else if consume![as] { + input.parse::<Type>()?; + } else if !(consume(Brace) || consume(Bracket) || consume(Parenthesis)) { + break; + } + } + + Err(input.error("unsupported expression")) + } + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for FieldsNamed { fn parse(input: ParseStream) -> Result<Self> { @@ -214,17 +321,37 @@ pub(crate) mod parsing { /// Parses a named (braced struct) field. #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] pub fn parse_named(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + + let unnamed_field = cfg!(feature = "full") && input.peek(Token![_]); + let ident = if unnamed_field { + input.call(Ident::parse_any) + } else { + input.parse() + }?; + + let colon_token: Token![:] = input.parse()?; + + let ty: Type = if unnamed_field + && (input.peek(Token![struct]) + || input.peek(Token![union]) && input.peek2(token::Brace)) + { + let begin = input.fork(); + input.call(Ident::parse_any)?; + input.parse::<FieldsNamed>()?; + Type::Verbatim(verbatim::between(&begin, input)) + } else { + input.parse()? + }; + Ok(Field { - attrs: input.call(Attribute::parse_outer)?, - vis: input.parse()?, + attrs, + vis, mutability: FieldMutability::None, - ident: Some(if input.peek(Token![_]) { - input.call(Ident::parse_any) - } else { - input.parse() - }?), - colon_token: Some(input.parse()?), - ty: input.parse()?, + ident: Some(ident), + colon_token: Some(colon_token), + ty, }) } @@ -245,7 +372,7 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] mod printing { - use super::*; + use crate::data::{Field, FieldsNamed, FieldsUnnamed, Variant}; use crate::print::TokensOrDefault; use proc_macro2::TokenStream; use quote::{ToTokens, TokenStreamExt}; diff --git a/vendor/syn/src/derive.rs b/vendor/syn/src/derive.rs index 25fa4c9..dbb2cf6 100644 --- a/vendor/syn/src/derive.rs +++ b/vendor/syn/src/derive.rs @@ -1,5 +1,10 @@ -use super::*; +use crate::attr::Attribute; +use crate::data::{Fields, FieldsNamed, Variant}; +use crate::generics::Generics; +use crate::ident::Ident; use crate::punctuated::Punctuated; +use crate::restriction::Visibility; +use crate::token; ast_struct! { /// Data structure sent to a `proc_macro_derive` macro. @@ -20,7 +25,7 @@ ast_enum! { /// /// This type is a [syntax tree enum]. /// - /// [syntax tree enum]: Expr#syntax-tree-enums + /// [syntax tree enum]: crate::expr::Expr#syntax-tree-enums #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] pub enum Data { Struct(DataStruct), @@ -60,8 +65,16 @@ ast_struct! { #[cfg(feature = "parsing")] pub(crate) mod parsing { - use super::*; - use crate::parse::{Parse, ParseStream, Result}; + use crate::attr::Attribute; + use crate::data::{Fields, FieldsNamed, Variant}; + use crate::derive::{Data, DataEnum, DataStruct, DataUnion, DeriveInput}; + use crate::error::Result; + use crate::generics::{Generics, WhereClause}; + use crate::ident::Ident; + use crate::parse::{Parse, ParseStream}; + use crate::punctuated::Punctuated; + use crate::restriction::Visibility; + use crate::token; #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for DeriveInput { @@ -193,8 +206,9 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] mod printing { - use super::*; use crate::attr::FilterAttrs; + use crate::data::Fields; + use crate::derive::{Data, DeriveInput}; use crate::print::TokensOrDefault; use proc_macro2::TokenStream; use quote::ToTokens; diff --git a/vendor/syn/src/discouraged.rs b/vendor/syn/src/discouraged.rs index fb98d63..7ed51c9 100644 --- a/vendor/syn/src/discouraged.rs +++ b/vendor/syn/src/discouraged.rs @@ -1,7 +1,13 @@ //! Extensions to the parsing API with niche applicability. -use super::*; +use crate::buffer::Cursor; +use crate::error::Result; +use crate::parse::{inner_unexpected, ParseBuffer, Unexpected}; use proc_macro2::extra::DelimSpan; +use proc_macro2::Delimiter; +use std::cell::Cell; +use std::mem; +use std::rc::Rc; /// Extensions to the `ParseStream` API to support speculative parsing. pub trait Speculative { diff --git a/vendor/syn/src/error.rs b/vendor/syn/src/error.rs index 93f20f4..71247cd 100644 --- a/vendor/syn/src/error.rs +++ b/vendor/syn/src/error.rs @@ -185,6 +185,7 @@ impl Error { /// When in doubt it's recommended to stick to `Error::new` (or /// `ParseStream::error`)! #[cfg(feature = "printing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] pub fn new_spanned<T: ToTokens, U: Display>(tokens: T, message: U) -> Self { return new_spanned(tokens.into_token_stream(), message.to_string()); @@ -385,7 +386,7 @@ impl Clone for Error { impl Clone for ErrorMessage { fn clone(&self) -> Self { ErrorMessage { - span: self.span.clone(), + span: self.span, message: self.message.clone(), } } @@ -403,7 +404,7 @@ impl std::error::Error for Error {} impl From<LexError> for Error { fn from(err: LexError) -> Self { - Error::new(err.span(), "lex error") + Error::new(err.span(), err) } } diff --git a/vendor/syn/src/export.rs b/vendor/syn/src/export.rs index c1c16f9..b9ea5c7 100644 --- a/vendor/syn/src/export.rs +++ b/vendor/syn/src/export.rs @@ -1,50 +1,73 @@ +#[doc(hidden)] pub use std::clone::Clone; +#[doc(hidden)] pub use std::cmp::{Eq, PartialEq}; +#[doc(hidden)] pub use std::concat; +#[doc(hidden)] pub use std::default::Default; -pub use std::fmt::{self, Debug, Formatter}; +#[doc(hidden)] +pub use std::fmt::Debug; +#[doc(hidden)] pub use std::hash::{Hash, Hasher}; +#[doc(hidden)] pub use std::marker::Copy; +#[doc(hidden)] pub use std::option::Option::{None, Some}; +#[doc(hidden)] pub use std::result::Result::{Err, Ok}; +#[doc(hidden)] pub use std::stringify; +#[doc(hidden)] +pub type Formatter<'a> = std::fmt::Formatter<'a>; +#[doc(hidden)] +pub type FmtResult = std::fmt::Result; + +#[doc(hidden)] +pub type bool = std::primitive::bool; +#[doc(hidden)] +pub type str = std::primitive::str; + #[cfg(feature = "printing")] +#[doc(hidden)] pub use quote; -pub use proc_macro2::{Span, TokenStream as TokenStream2}; +#[doc(hidden)] +pub type Span = proc_macro2::Span; +#[doc(hidden)] +pub type TokenStream2 = proc_macro2::TokenStream; #[cfg(feature = "parsing")] +#[doc(hidden)] pub use crate::group::{parse_braces, parse_brackets, parse_parens}; +#[doc(hidden)] pub use crate::span::IntoSpans; #[cfg(all(feature = "parsing", feature = "printing"))] +#[doc(hidden)] pub use crate::parse_quote::parse as parse_quote; #[cfg(feature = "parsing")] +#[doc(hidden)] pub use crate::token::parsing::{peek_punct, punct as parse_punct}; #[cfg(feature = "printing")] +#[doc(hidden)] pub use crate::token::printing::punct as print_punct; -#[cfg(all( - not(all(target_arch = "wasm32", any(target_os = "unknown", target_os = "wasi"))), - feature = "proc-macro" -))] -pub use proc_macro::TokenStream; +#[cfg(feature = "parsing")] +#[doc(hidden)] +pub use crate::token::private::CustomToken; + +#[cfg(feature = "proc-macro")] +#[doc(hidden)] +pub type TokenStream = proc_macro::TokenStream; #[cfg(feature = "printing")] +#[doc(hidden)] pub use quote::{ToTokens, TokenStreamExt}; -#[allow(non_camel_case_types)] -pub type bool = help::Bool; -#[allow(non_camel_case_types)] -pub type str = help::Str; - -mod help { - pub type Bool = bool; - pub type Str = str; -} - +#[doc(hidden)] pub struct private(pub(crate) ()); diff --git a/vendor/syn/src/expr.rs b/vendor/syn/src/expr.rs index 8a98202..4229864 100644 --- a/vendor/syn/src/expr.rs +++ b/vendor/syn/src/expr.rs @@ -1,12 +1,29 @@ -use super::*; +use crate::attr::Attribute; +#[cfg(feature = "full")] +use crate::generics::BoundLifetimes; +use crate::ident::Ident; +#[cfg(feature = "full")] +use crate::lifetime::Lifetime; +use crate::lit::Lit; +use crate::mac::Macro; +use crate::op::{BinOp, UnOp}; +#[cfg(feature = "full")] +use crate::pat::Pat; +use crate::path::{AngleBracketedGenericArguments, Path, QSelf}; use crate::punctuated::Punctuated; +#[cfg(feature = "full")] +use crate::stmt::Block; +use crate::token; +#[cfg(feature = "full")] +use crate::ty::ReturnType; +use crate::ty::Type; use proc_macro2::{Span, TokenStream}; #[cfg(feature = "printing")] use quote::IdentFragment; #[cfg(feature = "printing")] use std::fmt::{self, Display}; use std::hash::{Hash, Hasher}; -#[cfg(feature = "parsing")] +#[cfg(all(feature = "parsing", feature = "full"))] use std::mem; ast_enum_of_structs! { @@ -223,12 +240,13 @@ ast_enum_of_structs! { // For testing exhaustiveness in downstream code, use the following idiom: // // match expr { + // #![cfg_attr(test, deny(non_exhaustive_omitted_patterns))] + // // Expr::Array(expr) => {...} // Expr::Assign(expr) => {...} // ... // Expr::Yield(expr) => {...} // - // #[cfg_attr(test, deny(non_exhaustive_omitted_patterns))] // _ => { /* some sane fallback */ } // } // @@ -357,6 +375,7 @@ ast_struct! { ast_struct! { /// A const block: `const { ... }`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] pub struct ExprConst #full { pub attrs: Vec<Attribute>, pub const_token: Token![const], @@ -443,6 +462,7 @@ ast_struct! { ast_struct! { /// The inferred value of a const generic argument, denoted `_`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] pub struct ExprInfer #full { pub attrs: Vec<Attribute>, pub underscore_token: Token![_], @@ -483,6 +503,7 @@ ast_struct! { ast_struct! { /// A macro invocation expression: `format!("{}", q)`. + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] pub struct ExprMacro { pub attrs: Vec<Attribute>, pub mac: Macro, @@ -503,8 +524,8 @@ ast_struct! { ast_struct! { /// A method call expression: `x.foo::<T>(a, b)`. - #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] - pub struct ExprMethodCall #full { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprMethodCall { pub attrs: Vec<Attribute>, pub receiver: Box<Expr>, pub dot_token: Token![.], @@ -551,8 +572,8 @@ ast_struct! { ast_struct! { /// A referencing operation: `&a` or `&mut a`. - #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] - pub struct ExprReference #full { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprReference { pub attrs: Vec<Attribute>, pub and_token: Token![&], pub mutability: Option<Token![mut]>, @@ -587,8 +608,8 @@ ast_struct! { /// /// The `rest` provides the value of the remaining fields as in `S { a: /// 1, b: 1, ..rest }`. - #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] - pub struct ExprStruct #full { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] + pub struct ExprStruct { pub attrs: Vec<Attribute>, pub qself: Option<QSelf>, pub path: Path, @@ -840,10 +861,9 @@ impl IdentFragment for Index { } } -#[cfg(feature = "full")] ast_struct! { /// A field-value pair in a struct literal. - #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] pub struct FieldValue { pub attrs: Vec<Attribute>, pub member: Member, @@ -955,26 +975,10 @@ pub(crate) fn requires_terminator(expr: &Expr) -> bool { } #[cfg(feature = "parsing")] -pub(crate) mod parsing { - use super::*; - use crate::parse::discouraged::Speculative; - #[cfg(feature = "full")] - use crate::parse::ParseBuffer; - use crate::parse::{Parse, ParseStream, Result}; - use crate::path; - use std::cmp::Ordering; - - crate::custom_keyword!(raw); +mod precedence { + use super::BinOp; - // When we're parsing expressions which occur before blocks, like in an if - // statement's condition, we cannot parse a struct literal. - // - // Struct literals are ambiguous in certain positions - // https://github.com/rust-lang/rfcs/pull/92 - #[cfg(feature = "full")] - pub(crate) struct AllowStruct(bool); - - enum Precedence { + pub(crate) enum Precedence { Any, Assign, Range, @@ -991,7 +995,7 @@ pub(crate) mod parsing { } impl Precedence { - fn of(op: &BinOp) -> Self { + pub(crate) fn of(op: &BinOp) -> Self { match op { BinOp::Add(_) | BinOp::Sub(_) => Precedence::Arithmetic, BinOp::Mul(_) | BinOp::Div(_) | BinOp::Rem(_) => Precedence::Term, @@ -1020,6 +1024,69 @@ pub(crate) mod parsing { } } } +} + +#[cfg(feature = "parsing")] +pub(crate) mod parsing { + #[cfg(feature = "full")] + use crate::attr; + use crate::attr::Attribute; + use crate::error::{Error, Result}; + use crate::expr::precedence::Precedence; + #[cfg(feature = "full")] + use crate::expr::{ + requires_terminator, Arm, ExprArray, ExprAssign, ExprAsync, ExprAwait, ExprBlock, + ExprBreak, ExprClosure, ExprConst, ExprContinue, ExprForLoop, ExprIf, ExprInfer, ExprLet, + ExprLoop, ExprMatch, ExprRange, ExprRepeat, ExprReturn, ExprTry, ExprTryBlock, ExprTuple, + ExprUnsafe, ExprWhile, ExprYield, Label, RangeLimits, + }; + use crate::expr::{ + Expr, ExprBinary, ExprCall, ExprCast, ExprField, ExprGroup, ExprIndex, ExprLit, ExprMacro, + ExprMethodCall, ExprParen, ExprPath, ExprReference, ExprStruct, ExprUnary, FieldValue, + Index, Member, + }; + #[cfg(feature = "full")] + use crate::ext::IdentExt as _; + #[cfg(feature = "full")] + use crate::generics::BoundLifetimes; + use crate::ident::Ident; + #[cfg(feature = "full")] + use crate::lifetime::Lifetime; + use crate::lit::{Lit, LitFloat, LitInt}; + use crate::mac::{self, Macro}; + use crate::op::BinOp; + use crate::parse::discouraged::Speculative as _; + #[cfg(feature = "full")] + use crate::parse::ParseBuffer; + use crate::parse::{Parse, ParseStream}; + #[cfg(feature = "full")] + use crate::pat::{Pat, PatType}; + use crate::path::{self, AngleBracketedGenericArguments, Path, QSelf}; + use crate::punctuated::Punctuated; + #[cfg(feature = "full")] + use crate::stmt::Block; + use crate::token; + use crate::ty; + #[cfg(feature = "full")] + use crate::ty::{ReturnType, Type}; + use crate::verbatim; + #[cfg(feature = "full")] + use proc_macro2::TokenStream; + use std::cmp::Ordering; + use std::mem; + + mod kw { + crate::custom_keyword!(builtin); + crate::custom_keyword!(raw); + } + + // When we're parsing expressions which occur before blocks, like in an if + // statement's condition, we cannot parse a struct literal. + // + // Struct literals are ambiguous in certain positions + // https://github.com/rust-lang/rfcs/pull/92 + #[cfg(feature = "full")] + pub(crate) struct AllowStruct(bool); #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for Expr { @@ -1152,6 +1219,25 @@ pub(crate) mod parsing { } } + #[cfg(feature = "full")] + fn can_begin_expr(input: ParseStream) -> bool { + input.peek(Ident::peek_any) // value name or keyword + || input.peek(token::Paren) // tuple + || input.peek(token::Bracket) // array + || input.peek(token::Brace) // block + || input.peek(Lit) // literal + || input.peek(Token![!]) && !input.peek(Token![!=]) // operator not + || input.peek(Token![-]) && !input.peek(Token![-=]) && !input.peek(Token![->]) // unary minus + || input.peek(Token![*]) && !input.peek(Token![*=]) // dereference + || input.peek(Token![|]) && !input.peek(Token![|=]) // closure + || input.peek(Token![&]) && !input.peek(Token![&=]) // reference + || input.peek(Token![..]) // range notation + || input.peek(Token![<]) && !input.peek(Token![<=]) && !input.peek(Token![<<=]) // associated path + || input.peek(Token![::]) // global path + || input.peek(Lifetime) // labeled loop + || input.peek(Token![#]) // expression attributes + } + #[cfg(feature = "full")] fn parse_expr( input: ParseStream, @@ -1330,23 +1416,8 @@ pub(crate) mod parsing { #[cfg(feature = "full")] fn expr_attrs(input: ParseStream) -> Result<Vec<Attribute>> { let mut attrs = Vec::new(); - loop { - if input.peek(token::Group) { - let ahead = input.fork(); - let group = crate::group::parse_group(&ahead)?; - if !group.content.peek(Token![#]) || group.content.peek2(Token![!]) { - break; - } - let attr = group.content.call(attr::parsing::single_parse_outer)?; - if !group.content.is_empty() { - break; - } - attrs.push(attr); - } else if input.peek(Token![#]) { - attrs.push(input.call(attr::parsing::single_parse_outer)?); - } else { - break; - } + while !input.peek(token::Group) && input.peek(Token![#]) { + attrs.push(input.call(attr::parsing::single_parse_outer)?); } Ok(attrs) } @@ -1359,21 +1430,26 @@ pub(crate) mod parsing { fn unary_expr(input: ParseStream, allow_struct: AllowStruct) -> Result<Expr> { let begin = input.fork(); let attrs = input.call(expr_attrs)?; + if input.peek(token::Group) { + return trailer_expr(begin, attrs, input, allow_struct); + } + if input.peek(Token![&]) { let and_token: Token![&] = input.parse()?; - let raw: Option<raw> = - if input.peek(raw) && (input.peek2(Token![mut]) || input.peek2(Token![const])) { - Some(input.parse()?) - } else { - None - }; + let raw: Option<kw::raw> = if input.peek(kw::raw) + && (input.peek2(Token![mut]) || input.peek2(Token![const])) + { + Some(input.parse()?) + } else { + None + }; let mutability: Option<Token![mut]> = input.parse()?; if raw.is_some() && mutability.is_none() { input.parse::<Token![const]>()?; } let expr = Box::new(unary_expr(input, allow_struct)?); if raw.is_some() { - Ok(Expr::Verbatim(verbatim::between(begin, input))) + Ok(Expr::Verbatim(verbatim::between(&begin, input))) } else { Ok(Expr::Reference(ExprReference { attrs, @@ -1391,7 +1467,14 @@ pub(crate) mod parsing { #[cfg(not(feature = "full"))] fn unary_expr(input: ParseStream) -> Result<Expr> { - if input.peek(Token![*]) || input.peek(Token![!]) || input.peek(Token![-]) { + if input.peek(Token![&]) { + Ok(Expr::Reference(ExprReference { + attrs: Vec::new(), + and_token: input.parse()?, + mutability: input.parse()?, + expr: Box::new(unary_expr(input)?), + })) + } else if input.peek(Token![*]) || input.peek(Token![!]) || input.peek(Token![-]) { Ok(Expr::Unary(ExprUnary { attrs: Vec::new(), op: input.parse()?, @@ -1419,7 +1502,7 @@ pub(crate) mod parsing { let mut e = trailer_helper(input, atom)?; if let Expr::Verbatim(tokens) = &mut e { - *tokens = verbatim::between(begin, input); + *tokens = verbatim::between(&begin, input); } else { let inner_attrs = e.replace_attrs(Vec::new()); attrs.extend(inner_attrs); @@ -1535,17 +1618,45 @@ pub(crate) mod parsing { && !input.peek2(Token![await]) { let mut dot_token: Token![.] = input.parse()?; + let float_token: Option<LitFloat> = input.parse()?; if let Some(float_token) = float_token { if multi_index(&mut e, &mut dot_token, float_token)? { continue; } } + + let member: Member = input.parse()?; + let turbofish = if member.is_named() && input.peek(Token![::]) { + let colon2_token: Token![::] = input.parse()?; + let turbofish = + AngleBracketedGenericArguments::do_parse(Some(colon2_token), input)?; + Some(turbofish) + } else { + None + }; + + if turbofish.is_some() || input.peek(token::Paren) { + if let Member::Named(method) = member { + let content; + e = Expr::MethodCall(ExprMethodCall { + attrs: Vec::new(), + receiver: Box::new(e), + dot_token, + method, + turbofish, + paren_token: parenthesized!(content in input), + args: content.parse_terminated(Expr::parse, Token![,])?, + }); + continue; + } + } + e = Expr::Field(ExprField { attrs: Vec::new(), base: Box::new(e), dot_token, - member: input.parse()?, + member, }); } else if input.peek(token::Bracket) { let content; @@ -1567,12 +1678,8 @@ pub(crate) mod parsing { // interactions, as they are fully contained. #[cfg(feature = "full")] fn atom_expr(input: ParseStream, allow_struct: AllowStruct) -> Result<Expr> { - if input.peek(token::Group) - && !input.peek2(Token![::]) - && !input.peek2(Token![!]) - && !input.peek2(token::Brace) - { - input.call(expr_group).map(Expr::Group) + if input.peek(token::Group) { + expr_group(input, allow_struct) } else if input.peek(Lit) { input.parse().map(Expr::Lit) } else if input.peek(Token![async]) @@ -1591,6 +1698,8 @@ pub(crate) mod parsing { || input.peek(Token![async]) && (input.peek2(Token![|]) || input.peek2(Token![move])) { expr_closure(input, allow_struct).map(Expr::Closure) + } else if input.peek(kw::builtin) && input.peek2(Token![#]) { + expr_builtin(input) } else if input.peek(Ident) || input.peek(Token![::]) || input.peek(Token![<]) @@ -1608,7 +1717,7 @@ pub(crate) mod parsing { } else if input.peek(Token![continue]) { input.parse().map(Expr::Continue) } else if input.peek(Token![return]) { - expr_ret(input, allow_struct).map(Expr::Return) + expr_return(input, allow_struct).map(Expr::Return) } else if input.peek(token::Bracket) { array_or_repeat(input) } else if input.peek(Token![let]) { @@ -1636,39 +1745,40 @@ pub(crate) mod parsing { } else if input.peek(Token![_]) { input.parse().map(Expr::Infer) } else if input.peek(Lifetime) { - let the_label: Label = input.parse()?; - let mut expr = if input.peek(Token![while]) { - Expr::While(input.parse()?) - } else if input.peek(Token![for]) { - Expr::ForLoop(input.parse()?) - } else if input.peek(Token![loop]) { - Expr::Loop(input.parse()?) - } else if input.peek(token::Brace) { - Expr::Block(input.parse()?) - } else { - return Err(input.error("expected loop or block expression")); - }; - match &mut expr { - Expr::While(ExprWhile { label, .. }) - | Expr::ForLoop(ExprForLoop { label, .. }) - | Expr::Loop(ExprLoop { label, .. }) - | Expr::Block(ExprBlock { label, .. }) => *label = Some(the_label), - _ => unreachable!(), - } - Ok(expr) + atom_labeled(input) } else { - Err(input.error("expected expression")) + Err(input.error("expected an expression")) } } + #[cfg(feature = "full")] + fn atom_labeled(input: ParseStream) -> Result<Expr> { + let the_label: Label = input.parse()?; + let mut expr = if input.peek(Token![while]) { + Expr::While(input.parse()?) + } else if input.peek(Token![for]) { + Expr::ForLoop(input.parse()?) + } else if input.peek(Token![loop]) { + Expr::Loop(input.parse()?) + } else if input.peek(token::Brace) { + Expr::Block(input.parse()?) + } else { + return Err(input.error("expected loop or block expression")); + }; + match &mut expr { + Expr::While(ExprWhile { label, .. }) + | Expr::ForLoop(ExprForLoop { label, .. }) + | Expr::Loop(ExprLoop { label, .. }) + | Expr::Block(ExprBlock { label, .. }) => *label = Some(the_label), + _ => unreachable!(), + } + Ok(expr) + } + #[cfg(not(feature = "full"))] fn atom_expr(input: ParseStream) -> Result<Expr> { - if input.peek(token::Group) - && !input.peek2(Token![::]) - && !input.peek2(Token![!]) - && !input.peek2(token::Brace) - { - input.call(expr_group).map(Expr::Group) + if input.peek(token::Group) { + expr_group(input) } else if input.peek(Lit) { input.parse().map(Expr::Lit) } else if input.peek(token::Paren) { @@ -1685,16 +1795,55 @@ pub(crate) mod parsing { } else if input.is_empty() { Err(input.error("expected an expression")) } else { + if input.peek(token::Brace) { + let scan = input.fork(); + let content; + braced!(content in scan); + if content.parse::<Expr>().is_ok() && content.is_empty() { + let expr_block = verbatim::between(input, &scan); + input.advance_to(&scan); + return Ok(Expr::Verbatim(expr_block)); + } + } Err(input.error("unsupported expression; enable syn's features=[\"full\"]")) } } + #[cfg(feature = "full")] + fn expr_builtin(input: ParseStream) -> Result<Expr> { + let begin = input.fork(); + + input.parse::<kw::builtin>()?; + input.parse::<Token![#]>()?; + input.parse::<Ident>()?; + + let args; + parenthesized!(args in input); + args.parse::<TokenStream>()?; + + Ok(Expr::Verbatim(verbatim::between(&begin, input))) + } + fn path_or_macro_or_struct( input: ParseStream, #[cfg(feature = "full")] allow_struct: AllowStruct, ) -> Result<Expr> { let (qself, path) = path::parsing::qpath(input, true)?; + rest_of_path_or_macro_or_struct( + qself, + path, + input, + #[cfg(feature = "full")] + allow_struct, + ) + } + fn rest_of_path_or_macro_or_struct( + qself: Option<QSelf>, + path: Path, + input: ParseStream, + #[cfg(feature = "full")] allow_struct: AllowStruct, + ) -> Result<Expr> { if qself.is_none() && input.peek(Token![!]) && !input.peek(Token![!=]) @@ -1713,7 +1862,8 @@ pub(crate) mod parsing { })); } - #[cfg(feature = "full")] + #[cfg(not(feature = "full"))] + let allow_struct = (true,); if allow_struct.0 && input.peek(token::Brace) { return expr_struct_helper(input, qself, path).map(Expr::Struct); } @@ -1863,7 +2013,15 @@ pub(crate) mod parsing { #[cfg(feature = "full")] pub(crate) fn expr_early(input: ParseStream) -> Result<Expr> { let mut attrs = input.call(expr_attrs)?; - let mut expr = if input.peek(Token![if]) { + let mut expr = if input.peek(token::Group) { + let allow_struct = AllowStruct(true); + let atom = expr_group(input, allow_struct)?; + if continue_parsing_early(&atom) { + trailer_helper(input, atom)? + } else { + atom + } + } else if input.peek(Token![if]) { Expr::If(input.parse()?) } else if input.peek(Token![while]) { Expr::While(input.parse()?) @@ -1883,15 +2041,20 @@ pub(crate) mod parsing { Expr::Const(input.parse()?) } else if input.peek(token::Brace) { Expr::Block(input.parse()?) + } else if input.peek(Lifetime) { + atom_labeled(input)? } else { let allow_struct = AllowStruct(true); - let mut expr = unary_expr(input, allow_struct)?; + unary_expr(input, allow_struct)? + }; + if continue_parsing_early(&expr) { attrs.extend(expr.replace_attrs(Vec::new())); expr.replace_attrs(attrs); + let allow_struct = AllowStruct(true); return parse_expr(input, expr, allow_struct, Precedence::Any); - }; + } if input.peek(Token![.]) && !input.peek(Token![..]) || input.peek(Token![?]) { expr = trailer_helper(input, expr)?; @@ -1908,6 +2071,25 @@ pub(crate) mod parsing { Ok(expr) } + #[cfg(feature = "full")] + fn continue_parsing_early(mut expr: &Expr) -> bool { + while let Expr::Group(group) = expr { + expr = &group.expr; + } + match expr { + Expr::If(_) + | Expr::While(_) + | Expr::ForLoop(_) + | Expr::Loop(_) + | Expr::Match(_) + | Expr::TryBlock(_) + | Expr::Unsafe(_) + | Expr::Const(_) + | Expr::Block(_) => false, + _ => true, + } + } + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for ExprLit { fn parse(input: ParseStream) -> Result<Self> { @@ -1918,13 +2100,38 @@ pub(crate) mod parsing { } } - fn expr_group(input: ParseStream) -> Result<ExprGroup> { + fn expr_group( + input: ParseStream, + #[cfg(feature = "full")] allow_struct: AllowStruct, + ) -> Result<Expr> { let group = crate::group::parse_group(input)?; - Ok(ExprGroup { + let mut inner: Expr = group.content.parse()?; + + match inner { + Expr::Path(mut expr) if expr.attrs.is_empty() => { + let grouped_len = expr.path.segments.len(); + Path::parse_rest(input, &mut expr.path, true)?; + match rest_of_path_or_macro_or_struct( + expr.qself, + expr.path, + input, + #[cfg(feature = "full")] + allow_struct, + )? { + Expr::Path(expr) if expr.path.segments.len() == grouped_len => { + inner = Expr::Path(expr); + } + extended => return Ok(extended), + } + } + _ => {} + } + + Ok(Expr::Group(ExprGroup { attrs: Vec::new(), group_token: group.token, - expr: group.content.parse()?, - }) + expr: Box::new(inner), + })) } #[cfg(feature = "full")] @@ -1988,9 +2195,9 @@ pub(crate) mod parsing { let else_token: Token![else] = input.parse()?; let lookahead = input.lookahead1(); - let else_branch = if input.peek(Token![if]) { + let else_branch = if lookahead.peek(Token![if]) { input.parse().map(Expr::If)? - } else if input.peek(token::Brace) { + } else if lookahead.peek(token::Brace) { Expr::Block(ExprBlock { attrs: Vec::new(), label: None, @@ -2192,7 +2399,7 @@ pub(crate) mod parsing { impl Parse for ExprReturn { fn parse(input: ParseStream) -> Result<Self> { let allow_struct = AllowStruct(true); - expr_ret(input, allow_struct) + expr_return(input, allow_struct) } } @@ -2216,7 +2423,7 @@ pub(crate) mod parsing { attrs: Vec::new(), yield_token: input.parse()?, expr: { - if !input.is_empty() && !input.peek(Token![,]) && !input.peek(Token![;]) { + if can_begin_expr(input) { Some(input.parse()?) } else { None @@ -2411,34 +2618,46 @@ pub(crate) mod parsing { #[cfg(feature = "full")] fn expr_break(input: ParseStream, allow_struct: AllowStruct) -> Result<ExprBreak> { + let break_token: Token![break] = input.parse()?; + + let ahead = input.fork(); + let label: Option<Lifetime> = ahead.parse()?; + if label.is_some() && ahead.peek(Token![:]) { + // Not allowed: `break 'label: loop {...}` + // Parentheses are required. `break ('label: loop {...})` + let _ = ambiguous_expr(input, allow_struct)?; + let start_span = label.unwrap().apostrophe; + let end_span = input.cursor().prev_span(); + return Err(crate::error::new2( + start_span, + end_span, + "parentheses required", + )); + } + + input.advance_to(&ahead); + let expr = if can_begin_expr(input) && (allow_struct.0 || !input.peek(token::Brace)) { + let expr = ambiguous_expr(input, allow_struct)?; + Some(Box::new(expr)) + } else { + None + }; + Ok(ExprBreak { attrs: Vec::new(), - break_token: input.parse()?, - label: input.parse()?, - expr: { - if input.is_empty() - || input.peek(Token![,]) - || input.peek(Token![;]) - || !allow_struct.0 && input.peek(token::Brace) - { - None - } else { - let expr = ambiguous_expr(input, allow_struct)?; - Some(Box::new(expr)) - } - }, + break_token, + label, + expr, }) } #[cfg(feature = "full")] - fn expr_ret(input: ParseStream, allow_struct: AllowStruct) -> Result<ExprReturn> { + fn expr_return(input: ParseStream, allow_struct: AllowStruct) -> Result<ExprReturn> { Ok(ExprReturn { attrs: Vec::new(), return_token: input.parse()?, expr: { - if input.is_empty() || input.peek(Token![,]) || input.peek(Token![;]) { - None - } else { + if can_begin_expr(input) { // NOTE: return is greedy and eats blocks after it even when in a // position where structs are not allowed, such as in if statement // conditions. For example: @@ -2446,12 +2665,13 @@ pub(crate) mod parsing { // if return { println!("A") } {} // Prints "A" let expr = ambiguous_expr(input, allow_struct)?; Some(Box::new(expr)) + } else { + None } }, }) } - #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for FieldValue { fn parse(input: ParseStream) -> Result<Self> { @@ -2481,7 +2701,6 @@ pub(crate) mod parsing { } } - #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for ExprStruct { fn parse(input: ParseStream) -> Result<Self> { @@ -2490,7 +2709,6 @@ pub(crate) mod parsing { } } - #[cfg(feature = "full")] fn expr_struct_helper( input: ParseStream, qself: Option<QSelf>, @@ -2749,9 +2967,8 @@ pub(crate) mod parsing { Ok(!trailing_dot) } - #[cfg(feature = "full")] impl Member { - fn is_named(&self) -> bool { + pub(crate) fn is_named(&self) -> bool { match self { Member::Named(_) => true, Member::Unnamed(_) => false, @@ -2784,10 +3001,25 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] pub(crate) mod printing { - use super::*; + use crate::attr::Attribute; #[cfg(feature = "full")] use crate::attr::FilterAttrs; - use proc_macro2::{Literal, TokenStream}; + #[cfg(feature = "full")] + use crate::expr::{ + requires_terminator, Arm, Expr, ExprArray, ExprAssign, ExprAsync, ExprAwait, ExprBlock, + ExprBreak, ExprClosure, ExprConst, ExprContinue, ExprForLoop, ExprIf, ExprInfer, ExprLet, + ExprLoop, ExprMatch, ExprRange, ExprRepeat, ExprReturn, ExprTry, ExprTryBlock, ExprTuple, + ExprUnsafe, ExprWhile, ExprYield, Label, RangeLimits, + }; + use crate::expr::{ + ExprBinary, ExprCall, ExprCast, ExprField, ExprGroup, ExprIndex, ExprLit, ExprMacro, + ExprMethodCall, ExprParen, ExprPath, ExprReference, ExprStruct, ExprUnary, FieldValue, + Index, Member, + }; + use crate::path; + #[cfg(feature = "full")] + use crate::token; + use proc_macro2::{Literal, Span, TokenStream}; use quote::{ToTokens, TokenStreamExt}; // If the given expression is a bare `ExprStruct`, wraps it in parenthesis @@ -3097,7 +3329,6 @@ pub(crate) mod printing { } } - #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] impl ToTokens for ExprMethodCall { fn to_tokens(&self, tokens: &mut TokenStream) { @@ -3141,7 +3372,6 @@ pub(crate) mod printing { } } - #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] impl ToTokens for ExprReference { fn to_tokens(&self, tokens: &mut TokenStream) { @@ -3175,7 +3405,6 @@ pub(crate) mod printing { } } - #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] impl ToTokens for ExprStruct { fn to_tokens(&self, tokens: &mut TokenStream) { @@ -3292,7 +3521,6 @@ pub(crate) mod printing { } } - #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] impl ToTokens for FieldValue { fn to_tokens(&self, tokens: &mut TokenStream) { diff --git a/vendor/syn/src/ext.rs b/vendor/syn/src/ext.rs index 9ee5672..5cd79e8 100644 --- a/vendor/syn/src/ext.rs +++ b/vendor/syn/src/ext.rs @@ -1,8 +1,9 @@ //! Extension traits to provide parsing methods on foreign types. use crate::buffer::Cursor; +use crate::error::Result; +use crate::parse::ParseStream; use crate::parse::Peek; -use crate::parse::{ParseStream, Result}; use crate::sealed::lookahead; use crate::token::CustomToken; use proc_macro2::Ident; diff --git a/vendor/syn/src/file.rs b/vendor/syn/src/file.rs index 2d9f298..760e5a6 100644 --- a/vendor/syn/src/file.rs +++ b/vendor/syn/src/file.rs @@ -1,10 +1,13 @@ -use super::*; +use crate::attr::Attribute; +use crate::item::Item; ast_struct! { /// A complete file of Rust source code. /// /// Typically `File` objects are created with [`parse_file`]. /// + /// [`parse_file`]: crate::parse_file + /// /// # Example /// /// Parse a Rust source file into a `syn::File` and print out a debug @@ -87,8 +90,10 @@ ast_struct! { #[cfg(feature = "parsing")] pub(crate) mod parsing { - use super::*; - use crate::parse::{Parse, ParseStream, Result}; + use crate::attr::Attribute; + use crate::error::Result; + use crate::file::File; + use crate::parse::{Parse, ParseStream}; #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for File { @@ -110,8 +115,8 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] mod printing { - use super::*; use crate::attr::FilterAttrs; + use crate::file::File; use proc_macro2::TokenStream; use quote::{ToTokens, TokenStreamExt}; diff --git a/vendor/syn/src/gen/clone.rs b/vendor/syn/src/gen/clone.rs index d275f51..3313d4d 100644 --- a/vendor/syn/src/gen/clone.rs +++ b/vendor/syn/src/gen/clone.rs @@ -2,12 +2,11 @@ // It is not intended for manual editing. #![allow(clippy::clone_on_copy, clippy::expl_impl_clone_on_copy)] -use crate::*; #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Abi { +impl Clone for crate::Abi { fn clone(&self) -> Self { - Abi { + crate::Abi { extern_token: self.extern_token.clone(), name: self.name.clone(), } @@ -15,9 +14,9 @@ impl Clone for Abi { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for AngleBracketedGenericArguments { +impl Clone for crate::AngleBracketedGenericArguments { fn clone(&self) -> Self { - AngleBracketedGenericArguments { + crate::AngleBracketedGenericArguments { colon2_token: self.colon2_token.clone(), lt_token: self.lt_token.clone(), args: self.args.clone(), @@ -27,9 +26,9 @@ impl Clone for AngleBracketedGenericArguments { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Arm { +impl Clone for crate::Arm { fn clone(&self) -> Self { - Arm { + crate::Arm { attrs: self.attrs.clone(), pat: self.pat.clone(), guard: self.guard.clone(), @@ -41,9 +40,9 @@ impl Clone for Arm { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for AssocConst { +impl Clone for crate::AssocConst { fn clone(&self) -> Self { - AssocConst { + crate::AssocConst { ident: self.ident.clone(), generics: self.generics.clone(), eq_token: self.eq_token.clone(), @@ -53,9 +52,9 @@ impl Clone for AssocConst { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for AssocType { +impl Clone for crate::AssocType { fn clone(&self) -> Self { - AssocType { + crate::AssocType { ident: self.ident.clone(), generics: self.generics.clone(), eq_token: self.eq_token.clone(), @@ -65,19 +64,19 @@ impl Clone for AssocType { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Copy for AttrStyle {} +impl Copy for crate::AttrStyle {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for AttrStyle { +impl Clone for crate::AttrStyle { fn clone(&self) -> Self { *self } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Attribute { +impl Clone for crate::Attribute { fn clone(&self) -> Self { - Attribute { + crate::Attribute { pound_token: self.pound_token.clone(), style: self.style.clone(), bracket_token: self.bracket_token.clone(), @@ -87,9 +86,9 @@ impl Clone for Attribute { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for BareFnArg { +impl Clone for crate::BareFnArg { fn clone(&self) -> Self { - BareFnArg { + crate::BareFnArg { attrs: self.attrs.clone(), name: self.name.clone(), ty: self.ty.clone(), @@ -98,9 +97,9 @@ impl Clone for BareFnArg { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for BareVariadic { +impl Clone for crate::BareVariadic { fn clone(&self) -> Self { - BareVariadic { + crate::BareVariadic { attrs: self.attrs.clone(), name: self.name.clone(), dots: self.dots.clone(), @@ -110,19 +109,19 @@ impl Clone for BareVariadic { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Copy for BinOp {} +impl Copy for crate::BinOp {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for BinOp { +impl Clone for crate::BinOp { fn clone(&self) -> Self { *self } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Block { +impl Clone for crate::Block { fn clone(&self) -> Self { - Block { + crate::Block { brace_token: self.brace_token.clone(), stmts: self.stmts.clone(), } @@ -130,9 +129,9 @@ impl Clone for Block { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for BoundLifetimes { +impl Clone for crate::BoundLifetimes { fn clone(&self) -> Self { - BoundLifetimes { + crate::BoundLifetimes { for_token: self.for_token.clone(), lt_token: self.lt_token.clone(), lifetimes: self.lifetimes.clone(), @@ -142,9 +141,9 @@ impl Clone for BoundLifetimes { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ConstParam { +impl Clone for crate::ConstParam { fn clone(&self) -> Self { - ConstParam { + crate::ConstParam { attrs: self.attrs.clone(), const_token: self.const_token.clone(), ident: self.ident.clone(), @@ -157,9 +156,9 @@ impl Clone for ConstParam { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Constraint { +impl Clone for crate::Constraint { fn clone(&self) -> Self { - Constraint { + crate::Constraint { ident: self.ident.clone(), generics: self.generics.clone(), colon_token: self.colon_token.clone(), @@ -169,20 +168,20 @@ impl Clone for Constraint { } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Data { +impl Clone for crate::Data { fn clone(&self) -> Self { match self { - Data::Struct(v0) => Data::Struct(v0.clone()), - Data::Enum(v0) => Data::Enum(v0.clone()), - Data::Union(v0) => Data::Union(v0.clone()), + crate::Data::Struct(v0) => crate::Data::Struct(v0.clone()), + crate::Data::Enum(v0) => crate::Data::Enum(v0.clone()), + crate::Data::Union(v0) => crate::Data::Union(v0.clone()), } } } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for DataEnum { +impl Clone for crate::DataEnum { fn clone(&self) -> Self { - DataEnum { + crate::DataEnum { enum_token: self.enum_token.clone(), brace_token: self.brace_token.clone(), variants: self.variants.clone(), @@ -191,9 +190,9 @@ impl Clone for DataEnum { } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for DataStruct { +impl Clone for crate::DataStruct { fn clone(&self) -> Self { - DataStruct { + crate::DataStruct { struct_token: self.struct_token.clone(), fields: self.fields.clone(), semi_token: self.semi_token.clone(), @@ -202,9 +201,9 @@ impl Clone for DataStruct { } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for DataUnion { +impl Clone for crate::DataUnion { fn clone(&self) -> Self { - DataUnion { + crate::DataUnion { union_token: self.union_token.clone(), fields: self.fields.clone(), } @@ -212,9 +211,9 @@ impl Clone for DataUnion { } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for DeriveInput { +impl Clone for crate::DeriveInput { fn clone(&self) -> Self { - DeriveInput { + crate::DeriveInput { attrs: self.attrs.clone(), vis: self.vis.clone(), ident: self.ident.clone(), @@ -225,75 +224,72 @@ impl Clone for DeriveInput { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Expr { +impl Clone for crate::Expr { fn clone(&self) -> Self { match self { #[cfg(feature = "full")] - Expr::Array(v0) => Expr::Array(v0.clone()), + crate::Expr::Array(v0) => crate::Expr::Array(v0.clone()), #[cfg(feature = "full")] - Expr::Assign(v0) => Expr::Assign(v0.clone()), + crate::Expr::Assign(v0) => crate::Expr::Assign(v0.clone()), #[cfg(feature = "full")] - Expr::Async(v0) => Expr::Async(v0.clone()), + crate::Expr::Async(v0) => crate::Expr::Async(v0.clone()), #[cfg(feature = "full")] - Expr::Await(v0) => Expr::Await(v0.clone()), - Expr::Binary(v0) => Expr::Binary(v0.clone()), + crate::Expr::Await(v0) => crate::Expr::Await(v0.clone()), + crate::Expr::Binary(v0) => crate::Expr::Binary(v0.clone()), #[cfg(feature = "full")] - Expr::Block(v0) => Expr::Block(v0.clone()), + crate::Expr::Block(v0) => crate::Expr::Block(v0.clone()), #[cfg(feature = "full")] - Expr::Break(v0) => Expr::Break(v0.clone()), - Expr::Call(v0) => Expr::Call(v0.clone()), - Expr::Cast(v0) => Expr::Cast(v0.clone()), + crate::Expr::Break(v0) => crate::Expr::Break(v0.clone()), + crate::Expr::Call(v0) => crate::Expr::Call(v0.clone()), + crate::Expr::Cast(v0) => crate::Expr::Cast(v0.clone()), #[cfg(feature = "full")] - Expr::Closure(v0) => Expr::Closure(v0.clone()), + crate::Expr::Closure(v0) => crate::Expr::Closure(v0.clone()), #[cfg(feature = "full")] - Expr::Const(v0) => Expr::Const(v0.clone()), + crate::Expr::Const(v0) => crate::Expr::Const(v0.clone()), #[cfg(feature = "full")] - Expr::Continue(v0) => Expr::Continue(v0.clone()), - Expr::Field(v0) => Expr::Field(v0.clone()), + crate::Expr::Continue(v0) => crate::Expr::Continue(v0.clone()), + crate::Expr::Field(v0) => crate::Expr::Field(v0.clone()), #[cfg(feature = "full")] - Expr::ForLoop(v0) => Expr::ForLoop(v0.clone()), - Expr::Group(v0) => Expr::Group(v0.clone()), + crate::Expr::ForLoop(v0) => crate::Expr::ForLoop(v0.clone()), + crate::Expr::Group(v0) => crate::Expr::Group(v0.clone()), #[cfg(feature = "full")] - Expr::If(v0) => Expr::If(v0.clone()), - Expr::Index(v0) => Expr::Index(v0.clone()), + crate::Expr::If(v0) => crate::Expr::If(v0.clone()), + crate::Expr::Index(v0) => crate::Expr::Index(v0.clone()), #[cfg(feature = "full")] - Expr::Infer(v0) => Expr::Infer(v0.clone()), + crate::Expr::Infer(v0) => crate::Expr::Infer(v0.clone()), #[cfg(feature = "full")] - Expr::Let(v0) => Expr::Let(v0.clone()), - Expr::Lit(v0) => Expr::Lit(v0.clone()), + crate::Expr::Let(v0) => crate::Expr::Let(v0.clone()), + crate::Expr::Lit(v0) => crate::Expr::Lit(v0.clone()), #[cfg(feature = "full")] - Expr::Loop(v0) => Expr::Loop(v0.clone()), - Expr::Macro(v0) => Expr::Macro(v0.clone()), + crate::Expr::Loop(v0) => crate::Expr::Loop(v0.clone()), + crate::Expr::Macro(v0) => crate::Expr::Macro(v0.clone()), #[cfg(feature = "full")] - Expr::Match(v0) => Expr::Match(v0.clone()), + crate::Expr::Match(v0) => crate::Expr::Match(v0.clone()), + crate::Expr::MethodCall(v0) => crate::Expr::MethodCall(v0.clone()), + crate::Expr::Paren(v0) => crate::Expr::Paren(v0.clone()), + crate::Expr::Path(v0) => crate::Expr::Path(v0.clone()), #[cfg(feature = "full")] - Expr::MethodCall(v0) => Expr::MethodCall(v0.clone()), - Expr::Paren(v0) => Expr::Paren(v0.clone()), - Expr::Path(v0) => Expr::Path(v0.clone()), + crate::Expr::Range(v0) => crate::Expr::Range(v0.clone()), + crate::Expr::Reference(v0) => crate::Expr::Reference(v0.clone()), #[cfg(feature = "full")] - Expr::Range(v0) => Expr::Range(v0.clone()), + crate::Expr::Repeat(v0) => crate::Expr::Repeat(v0.clone()), #[cfg(feature = "full")] - Expr::Reference(v0) => Expr::Reference(v0.clone()), + crate::Expr::Return(v0) => crate::Expr::Return(v0.clone()), + crate::Expr::Struct(v0) => crate::Expr::Struct(v0.clone()), #[cfg(feature = "full")] - Expr::Repeat(v0) => Expr::Repeat(v0.clone()), + crate::Expr::Try(v0) => crate::Expr::Try(v0.clone()), #[cfg(feature = "full")] - Expr::Return(v0) => Expr::Return(v0.clone()), + crate::Expr::TryBlock(v0) => crate::Expr::TryBlock(v0.clone()), #[cfg(feature = "full")] - Expr::Struct(v0) => Expr::Struct(v0.clone()), + crate::Expr::Tuple(v0) => crate::Expr::Tuple(v0.clone()), + crate::Expr::Unary(v0) => crate::Expr::Unary(v0.clone()), #[cfg(feature = "full")] - Expr::Try(v0) => Expr::Try(v0.clone()), + crate::Expr::Unsafe(v0) => crate::Expr::Unsafe(v0.clone()), + crate::Expr::Verbatim(v0) => crate::Expr::Verbatim(v0.clone()), #[cfg(feature = "full")] - Expr::TryBlock(v0) => Expr::TryBlock(v0.clone()), + crate::Expr::While(v0) => crate::Expr::While(v0.clone()), #[cfg(feature = "full")] - Expr::Tuple(v0) => Expr::Tuple(v0.clone()), - Expr::Unary(v0) => Expr::Unary(v0.clone()), - #[cfg(feature = "full")] - Expr::Unsafe(v0) => Expr::Unsafe(v0.clone()), - Expr::Verbatim(v0) => Expr::Verbatim(v0.clone()), - #[cfg(feature = "full")] - Expr::While(v0) => Expr::While(v0.clone()), - #[cfg(feature = "full")] - Expr::Yield(v0) => Expr::Yield(v0.clone()), + crate::Expr::Yield(v0) => crate::Expr::Yield(v0.clone()), #[cfg(not(feature = "full"))] _ => unreachable!(), } @@ -301,9 +297,9 @@ impl Clone for Expr { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprArray { +impl Clone for crate::ExprArray { fn clone(&self) -> Self { - ExprArray { + crate::ExprArray { attrs: self.attrs.clone(), bracket_token: self.bracket_token.clone(), elems: self.elems.clone(), @@ -312,9 +308,9 @@ impl Clone for ExprArray { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprAssign { +impl Clone for crate::ExprAssign { fn clone(&self) -> Self { - ExprAssign { + crate::ExprAssign { attrs: self.attrs.clone(), left: self.left.clone(), eq_token: self.eq_token.clone(), @@ -324,9 +320,9 @@ impl Clone for ExprAssign { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprAsync { +impl Clone for crate::ExprAsync { fn clone(&self) -> Self { - ExprAsync { + crate::ExprAsync { attrs: self.attrs.clone(), async_token: self.async_token.clone(), capture: self.capture.clone(), @@ -336,9 +332,9 @@ impl Clone for ExprAsync { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprAwait { +impl Clone for crate::ExprAwait { fn clone(&self) -> Self { - ExprAwait { + crate::ExprAwait { attrs: self.attrs.clone(), base: self.base.clone(), dot_token: self.dot_token.clone(), @@ -348,9 +344,9 @@ impl Clone for ExprAwait { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprBinary { +impl Clone for crate::ExprBinary { fn clone(&self) -> Self { - ExprBinary { + crate::ExprBinary { attrs: self.attrs.clone(), left: self.left.clone(), op: self.op.clone(), @@ -360,9 +356,9 @@ impl Clone for ExprBinary { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprBlock { +impl Clone for crate::ExprBlock { fn clone(&self) -> Self { - ExprBlock { + crate::ExprBlock { attrs: self.attrs.clone(), label: self.label.clone(), block: self.block.clone(), @@ -371,9 +367,9 @@ impl Clone for ExprBlock { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprBreak { +impl Clone for crate::ExprBreak { fn clone(&self) -> Self { - ExprBreak { + crate::ExprBreak { attrs: self.attrs.clone(), break_token: self.break_token.clone(), label: self.label.clone(), @@ -383,9 +379,9 @@ impl Clone for ExprBreak { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprCall { +impl Clone for crate::ExprCall { fn clone(&self) -> Self { - ExprCall { + crate::ExprCall { attrs: self.attrs.clone(), func: self.func.clone(), paren_token: self.paren_token.clone(), @@ -395,9 +391,9 @@ impl Clone for ExprCall { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprCast { +impl Clone for crate::ExprCast { fn clone(&self) -> Self { - ExprCast { + crate::ExprCast { attrs: self.attrs.clone(), expr: self.expr.clone(), as_token: self.as_token.clone(), @@ -407,9 +403,9 @@ impl Clone for ExprCast { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprClosure { +impl Clone for crate::ExprClosure { fn clone(&self) -> Self { - ExprClosure { + crate::ExprClosure { attrs: self.attrs.clone(), lifetimes: self.lifetimes.clone(), constness: self.constness.clone(), @@ -426,9 +422,9 @@ impl Clone for ExprClosure { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprConst { +impl Clone for crate::ExprConst { fn clone(&self) -> Self { - ExprConst { + crate::ExprConst { attrs: self.attrs.clone(), const_token: self.const_token.clone(), block: self.block.clone(), @@ -437,9 +433,9 @@ impl Clone for ExprConst { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprContinue { +impl Clone for crate::ExprContinue { fn clone(&self) -> Self { - ExprContinue { + crate::ExprContinue { attrs: self.attrs.clone(), continue_token: self.continue_token.clone(), label: self.label.clone(), @@ -448,9 +444,9 @@ impl Clone for ExprContinue { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprField { +impl Clone for crate::ExprField { fn clone(&self) -> Self { - ExprField { + crate::ExprField { attrs: self.attrs.clone(), base: self.base.clone(), dot_token: self.dot_token.clone(), @@ -460,9 +456,9 @@ impl Clone for ExprField { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprForLoop { +impl Clone for crate::ExprForLoop { fn clone(&self) -> Self { - ExprForLoop { + crate::ExprForLoop { attrs: self.attrs.clone(), label: self.label.clone(), for_token: self.for_token.clone(), @@ -475,9 +471,9 @@ impl Clone for ExprForLoop { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprGroup { +impl Clone for crate::ExprGroup { fn clone(&self) -> Self { - ExprGroup { + crate::ExprGroup { attrs: self.attrs.clone(), group_token: self.group_token.clone(), expr: self.expr.clone(), @@ -486,9 +482,9 @@ impl Clone for ExprGroup { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprIf { +impl Clone for crate::ExprIf { fn clone(&self) -> Self { - ExprIf { + crate::ExprIf { attrs: self.attrs.clone(), if_token: self.if_token.clone(), cond: self.cond.clone(), @@ -499,9 +495,9 @@ impl Clone for ExprIf { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprIndex { +impl Clone for crate::ExprIndex { fn clone(&self) -> Self { - ExprIndex { + crate::ExprIndex { attrs: self.attrs.clone(), expr: self.expr.clone(), bracket_token: self.bracket_token.clone(), @@ -511,9 +507,9 @@ impl Clone for ExprIndex { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprInfer { +impl Clone for crate::ExprInfer { fn clone(&self) -> Self { - ExprInfer { + crate::ExprInfer { attrs: self.attrs.clone(), underscore_token: self.underscore_token.clone(), } @@ -521,9 +517,9 @@ impl Clone for ExprInfer { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprLet { +impl Clone for crate::ExprLet { fn clone(&self) -> Self { - ExprLet { + crate::ExprLet { attrs: self.attrs.clone(), let_token: self.let_token.clone(), pat: self.pat.clone(), @@ -534,9 +530,9 @@ impl Clone for ExprLet { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprLit { +impl Clone for crate::ExprLit { fn clone(&self) -> Self { - ExprLit { + crate::ExprLit { attrs: self.attrs.clone(), lit: self.lit.clone(), } @@ -544,9 +540,9 @@ impl Clone for ExprLit { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprLoop { +impl Clone for crate::ExprLoop { fn clone(&self) -> Self { - ExprLoop { + crate::ExprLoop { attrs: self.attrs.clone(), label: self.label.clone(), loop_token: self.loop_token.clone(), @@ -556,9 +552,9 @@ impl Clone for ExprLoop { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprMacro { +impl Clone for crate::ExprMacro { fn clone(&self) -> Self { - ExprMacro { + crate::ExprMacro { attrs: self.attrs.clone(), mac: self.mac.clone(), } @@ -566,9 +562,9 @@ impl Clone for ExprMacro { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprMatch { +impl Clone for crate::ExprMatch { fn clone(&self) -> Self { - ExprMatch { + crate::ExprMatch { attrs: self.attrs.clone(), match_token: self.match_token.clone(), expr: self.expr.clone(), @@ -577,11 +573,11 @@ impl Clone for ExprMatch { } } } -#[cfg(feature = "full")] +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprMethodCall { +impl Clone for crate::ExprMethodCall { fn clone(&self) -> Self { - ExprMethodCall { + crate::ExprMethodCall { attrs: self.attrs.clone(), receiver: self.receiver.clone(), dot_token: self.dot_token.clone(), @@ -594,9 +590,9 @@ impl Clone for ExprMethodCall { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprParen { +impl Clone for crate::ExprParen { fn clone(&self) -> Self { - ExprParen { + crate::ExprParen { attrs: self.attrs.clone(), paren_token: self.paren_token.clone(), expr: self.expr.clone(), @@ -605,9 +601,9 @@ impl Clone for ExprParen { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprPath { +impl Clone for crate::ExprPath { fn clone(&self) -> Self { - ExprPath { + crate::ExprPath { attrs: self.attrs.clone(), qself: self.qself.clone(), path: self.path.clone(), @@ -616,9 +612,9 @@ impl Clone for ExprPath { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprRange { +impl Clone for crate::ExprRange { fn clone(&self) -> Self { - ExprRange { + crate::ExprRange { attrs: self.attrs.clone(), start: self.start.clone(), limits: self.limits.clone(), @@ -626,11 +622,11 @@ impl Clone for ExprRange { } } } -#[cfg(feature = "full")] +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprReference { +impl Clone for crate::ExprReference { fn clone(&self) -> Self { - ExprReference { + crate::ExprReference { attrs: self.attrs.clone(), and_token: self.and_token.clone(), mutability: self.mutability.clone(), @@ -640,9 +636,9 @@ impl Clone for ExprReference { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprRepeat { +impl Clone for crate::ExprRepeat { fn clone(&self) -> Self { - ExprRepeat { + crate::ExprRepeat { attrs: self.attrs.clone(), bracket_token: self.bracket_token.clone(), expr: self.expr.clone(), @@ -653,20 +649,20 @@ impl Clone for ExprRepeat { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprReturn { +impl Clone for crate::ExprReturn { fn clone(&self) -> Self { - ExprReturn { + crate::ExprReturn { attrs: self.attrs.clone(), return_token: self.return_token.clone(), expr: self.expr.clone(), } } } -#[cfg(feature = "full")] +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprStruct { +impl Clone for crate::ExprStruct { fn clone(&self) -> Self { - ExprStruct { + crate::ExprStruct { attrs: self.attrs.clone(), qself: self.qself.clone(), path: self.path.clone(), @@ -679,9 +675,9 @@ impl Clone for ExprStruct { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprTry { +impl Clone for crate::ExprTry { fn clone(&self) -> Self { - ExprTry { + crate::ExprTry { attrs: self.attrs.clone(), expr: self.expr.clone(), question_token: self.question_token.clone(), @@ -690,9 +686,9 @@ impl Clone for ExprTry { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprTryBlock { +impl Clone for crate::ExprTryBlock { fn clone(&self) -> Self { - ExprTryBlock { + crate::ExprTryBlock { attrs: self.attrs.clone(), try_token: self.try_token.clone(), block: self.block.clone(), @@ -701,9 +697,9 @@ impl Clone for ExprTryBlock { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprTuple { +impl Clone for crate::ExprTuple { fn clone(&self) -> Self { - ExprTuple { + crate::ExprTuple { attrs: self.attrs.clone(), paren_token: self.paren_token.clone(), elems: self.elems.clone(), @@ -712,9 +708,9 @@ impl Clone for ExprTuple { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprUnary { +impl Clone for crate::ExprUnary { fn clone(&self) -> Self { - ExprUnary { + crate::ExprUnary { attrs: self.attrs.clone(), op: self.op.clone(), expr: self.expr.clone(), @@ -723,9 +719,9 @@ impl Clone for ExprUnary { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprUnsafe { +impl Clone for crate::ExprUnsafe { fn clone(&self) -> Self { - ExprUnsafe { + crate::ExprUnsafe { attrs: self.attrs.clone(), unsafe_token: self.unsafe_token.clone(), block: self.block.clone(), @@ -734,9 +730,9 @@ impl Clone for ExprUnsafe { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprWhile { +impl Clone for crate::ExprWhile { fn clone(&self) -> Self { - ExprWhile { + crate::ExprWhile { attrs: self.attrs.clone(), label: self.label.clone(), while_token: self.while_token.clone(), @@ -747,9 +743,9 @@ impl Clone for ExprWhile { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ExprYield { +impl Clone for crate::ExprYield { fn clone(&self) -> Self { - ExprYield { + crate::ExprYield { attrs: self.attrs.clone(), yield_token: self.yield_token.clone(), expr: self.expr.clone(), @@ -758,9 +754,9 @@ impl Clone for ExprYield { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Field { +impl Clone for crate::Field { fn clone(&self) -> Self { - Field { + crate::Field { attrs: self.attrs.clone(), vis: self.vis.clone(), mutability: self.mutability.clone(), @@ -772,18 +768,18 @@ impl Clone for Field { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for FieldMutability { +impl Clone for crate::FieldMutability { fn clone(&self) -> Self { match self { - FieldMutability::None => FieldMutability::None, + crate::FieldMutability::None => crate::FieldMutability::None, } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for FieldPat { +impl Clone for crate::FieldPat { fn clone(&self) -> Self { - FieldPat { + crate::FieldPat { attrs: self.attrs.clone(), member: self.member.clone(), colon_token: self.colon_token.clone(), @@ -791,11 +787,11 @@ impl Clone for FieldPat { } } } -#[cfg(feature = "full")] +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for FieldValue { +impl Clone for crate::FieldValue { fn clone(&self) -> Self { - FieldValue { + crate::FieldValue { attrs: self.attrs.clone(), member: self.member.clone(), colon_token: self.colon_token.clone(), @@ -805,20 +801,20 @@ impl Clone for FieldValue { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Fields { +impl Clone for crate::Fields { fn clone(&self) -> Self { match self { - Fields::Named(v0) => Fields::Named(v0.clone()), - Fields::Unnamed(v0) => Fields::Unnamed(v0.clone()), - Fields::Unit => Fields::Unit, + crate::Fields::Named(v0) => crate::Fields::Named(v0.clone()), + crate::Fields::Unnamed(v0) => crate::Fields::Unnamed(v0.clone()), + crate::Fields::Unit => crate::Fields::Unit, } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for FieldsNamed { +impl Clone for crate::FieldsNamed { fn clone(&self) -> Self { - FieldsNamed { + crate::FieldsNamed { brace_token: self.brace_token.clone(), named: self.named.clone(), } @@ -826,9 +822,9 @@ impl Clone for FieldsNamed { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for FieldsUnnamed { +impl Clone for crate::FieldsUnnamed { fn clone(&self) -> Self { - FieldsUnnamed { + crate::FieldsUnnamed { paren_token: self.paren_token.clone(), unnamed: self.unnamed.clone(), } @@ -836,9 +832,9 @@ impl Clone for FieldsUnnamed { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for File { +impl Clone for crate::File { fn clone(&self) -> Self { - File { + crate::File { shebang: self.shebang.clone(), attrs: self.attrs.clone(), items: self.items.clone(), @@ -847,32 +843,32 @@ impl Clone for File { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for FnArg { +impl Clone for crate::FnArg { fn clone(&self) -> Self { match self { - FnArg::Receiver(v0) => FnArg::Receiver(v0.clone()), - FnArg::Typed(v0) => FnArg::Typed(v0.clone()), + crate::FnArg::Receiver(v0) => crate::FnArg::Receiver(v0.clone()), + crate::FnArg::Typed(v0) => crate::FnArg::Typed(v0.clone()), } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ForeignItem { +impl Clone for crate::ForeignItem { fn clone(&self) -> Self { match self { - ForeignItem::Fn(v0) => ForeignItem::Fn(v0.clone()), - ForeignItem::Static(v0) => ForeignItem::Static(v0.clone()), - ForeignItem::Type(v0) => ForeignItem::Type(v0.clone()), - ForeignItem::Macro(v0) => ForeignItem::Macro(v0.clone()), - ForeignItem::Verbatim(v0) => ForeignItem::Verbatim(v0.clone()), + crate::ForeignItem::Fn(v0) => crate::ForeignItem::Fn(v0.clone()), + crate::ForeignItem::Static(v0) => crate::ForeignItem::Static(v0.clone()), + crate::ForeignItem::Type(v0) => crate::ForeignItem::Type(v0.clone()), + crate::ForeignItem::Macro(v0) => crate::ForeignItem::Macro(v0.clone()), + crate::ForeignItem::Verbatim(v0) => crate::ForeignItem::Verbatim(v0.clone()), } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ForeignItemFn { +impl Clone for crate::ForeignItemFn { fn clone(&self) -> Self { - ForeignItemFn { + crate::ForeignItemFn { attrs: self.attrs.clone(), vis: self.vis.clone(), sig: self.sig.clone(), @@ -882,9 +878,9 @@ impl Clone for ForeignItemFn { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ForeignItemMacro { +impl Clone for crate::ForeignItemMacro { fn clone(&self) -> Self { - ForeignItemMacro { + crate::ForeignItemMacro { attrs: self.attrs.clone(), mac: self.mac.clone(), semi_token: self.semi_token.clone(), @@ -893,9 +889,9 @@ impl Clone for ForeignItemMacro { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ForeignItemStatic { +impl Clone for crate::ForeignItemStatic { fn clone(&self) -> Self { - ForeignItemStatic { + crate::ForeignItemStatic { attrs: self.attrs.clone(), vis: self.vis.clone(), static_token: self.static_token.clone(), @@ -909,9 +905,9 @@ impl Clone for ForeignItemStatic { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ForeignItemType { +impl Clone for crate::ForeignItemType { fn clone(&self) -> Self { - ForeignItemType { + crate::ForeignItemType { attrs: self.attrs.clone(), vis: self.vis.clone(), type_token: self.type_token.clone(), @@ -923,34 +919,46 @@ impl Clone for ForeignItemType { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for GenericArgument { +impl Clone for crate::GenericArgument { fn clone(&self) -> Self { match self { - GenericArgument::Lifetime(v0) => GenericArgument::Lifetime(v0.clone()), - GenericArgument::Type(v0) => GenericArgument::Type(v0.clone()), - GenericArgument::Const(v0) => GenericArgument::Const(v0.clone()), - GenericArgument::AssocType(v0) => GenericArgument::AssocType(v0.clone()), - GenericArgument::AssocConst(v0) => GenericArgument::AssocConst(v0.clone()), - GenericArgument::Constraint(v0) => GenericArgument::Constraint(v0.clone()), + crate::GenericArgument::Lifetime(v0) => { + crate::GenericArgument::Lifetime(v0.clone()) + } + crate::GenericArgument::Type(v0) => crate::GenericArgument::Type(v0.clone()), + crate::GenericArgument::Const(v0) => { + crate::GenericArgument::Const(v0.clone()) + } + crate::GenericArgument::AssocType(v0) => { + crate::GenericArgument::AssocType(v0.clone()) + } + crate::GenericArgument::AssocConst(v0) => { + crate::GenericArgument::AssocConst(v0.clone()) + } + crate::GenericArgument::Constraint(v0) => { + crate::GenericArgument::Constraint(v0.clone()) + } } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for GenericParam { +impl Clone for crate::GenericParam { fn clone(&self) -> Self { match self { - GenericParam::Lifetime(v0) => GenericParam::Lifetime(v0.clone()), - GenericParam::Type(v0) => GenericParam::Type(v0.clone()), - GenericParam::Const(v0) => GenericParam::Const(v0.clone()), + crate::GenericParam::Lifetime(v0) => { + crate::GenericParam::Lifetime(v0.clone()) + } + crate::GenericParam::Type(v0) => crate::GenericParam::Type(v0.clone()), + crate::GenericParam::Const(v0) => crate::GenericParam::Const(v0.clone()), } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Generics { +impl Clone for crate::Generics { fn clone(&self) -> Self { - Generics { + crate::Generics { lt_token: self.lt_token.clone(), params: self.params.clone(), gt_token: self.gt_token.clone(), @@ -960,22 +968,22 @@ impl Clone for Generics { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ImplItem { +impl Clone for crate::ImplItem { fn clone(&self) -> Self { match self { - ImplItem::Const(v0) => ImplItem::Const(v0.clone()), - ImplItem::Fn(v0) => ImplItem::Fn(v0.clone()), - ImplItem::Type(v0) => ImplItem::Type(v0.clone()), - ImplItem::Macro(v0) => ImplItem::Macro(v0.clone()), - ImplItem::Verbatim(v0) => ImplItem::Verbatim(v0.clone()), + crate::ImplItem::Const(v0) => crate::ImplItem::Const(v0.clone()), + crate::ImplItem::Fn(v0) => crate::ImplItem::Fn(v0.clone()), + crate::ImplItem::Type(v0) => crate::ImplItem::Type(v0.clone()), + crate::ImplItem::Macro(v0) => crate::ImplItem::Macro(v0.clone()), + crate::ImplItem::Verbatim(v0) => crate::ImplItem::Verbatim(v0.clone()), } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ImplItemConst { +impl Clone for crate::ImplItemConst { fn clone(&self) -> Self { - ImplItemConst { + crate::ImplItemConst { attrs: self.attrs.clone(), vis: self.vis.clone(), defaultness: self.defaultness.clone(), @@ -992,9 +1000,9 @@ impl Clone for ImplItemConst { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ImplItemFn { +impl Clone for crate::ImplItemFn { fn clone(&self) -> Self { - ImplItemFn { + crate::ImplItemFn { attrs: self.attrs.clone(), vis: self.vis.clone(), defaultness: self.defaultness.clone(), @@ -1005,9 +1013,9 @@ impl Clone for ImplItemFn { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ImplItemMacro { +impl Clone for crate::ImplItemMacro { fn clone(&self) -> Self { - ImplItemMacro { + crate::ImplItemMacro { attrs: self.attrs.clone(), mac: self.mac.clone(), semi_token: self.semi_token.clone(), @@ -1016,9 +1024,9 @@ impl Clone for ImplItemMacro { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ImplItemType { +impl Clone for crate::ImplItemType { fn clone(&self) -> Self { - ImplItemType { + crate::ImplItemType { attrs: self.attrs.clone(), vis: self.vis.clone(), defaultness: self.defaultness.clone(), @@ -1033,16 +1041,16 @@ impl Clone for ImplItemType { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ImplRestriction { +impl Clone for crate::ImplRestriction { fn clone(&self) -> Self { match *self {} } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Index { +impl Clone for crate::Index { fn clone(&self) -> Self { - Index { + crate::Index { index: self.index.clone(), span: self.span.clone(), } @@ -1050,33 +1058,33 @@ impl Clone for Index { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Item { +impl Clone for crate::Item { fn clone(&self) -> Self { match self { - Item::Const(v0) => Item::Const(v0.clone()), - Item::Enum(v0) => Item::Enum(v0.clone()), - Item::ExternCrate(v0) => Item::ExternCrate(v0.clone()), - Item::Fn(v0) => Item::Fn(v0.clone()), - Item::ForeignMod(v0) => Item::ForeignMod(v0.clone()), - Item::Impl(v0) => Item::Impl(v0.clone()), - Item::Macro(v0) => Item::Macro(v0.clone()), - Item::Mod(v0) => Item::Mod(v0.clone()), - Item::Static(v0) => Item::Static(v0.clone()), - Item::Struct(v0) => Item::Struct(v0.clone()), - Item::Trait(v0) => Item::Trait(v0.clone()), - Item::TraitAlias(v0) => Item::TraitAlias(v0.clone()), - Item::Type(v0) => Item::Type(v0.clone()), - Item::Union(v0) => Item::Union(v0.clone()), - Item::Use(v0) => Item::Use(v0.clone()), - Item::Verbatim(v0) => Item::Verbatim(v0.clone()), + crate::Item::Const(v0) => crate::Item::Const(v0.clone()), + crate::Item::Enum(v0) => crate::Item::Enum(v0.clone()), + crate::Item::ExternCrate(v0) => crate::Item::ExternCrate(v0.clone()), + crate::Item::Fn(v0) => crate::Item::Fn(v0.clone()), + crate::Item::ForeignMod(v0) => crate::Item::ForeignMod(v0.clone()), + crate::Item::Impl(v0) => crate::Item::Impl(v0.clone()), + crate::Item::Macro(v0) => crate::Item::Macro(v0.clone()), + crate::Item::Mod(v0) => crate::Item::Mod(v0.clone()), + crate::Item::Static(v0) => crate::Item::Static(v0.clone()), + crate::Item::Struct(v0) => crate::Item::Struct(v0.clone()), + crate::Item::Trait(v0) => crate::Item::Trait(v0.clone()), + crate::Item::TraitAlias(v0) => crate::Item::TraitAlias(v0.clone()), + crate::Item::Type(v0) => crate::Item::Type(v0.clone()), + crate::Item::Union(v0) => crate::Item::Union(v0.clone()), + crate::Item::Use(v0) => crate::Item::Use(v0.clone()), + crate::Item::Verbatim(v0) => crate::Item::Verbatim(v0.clone()), } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ItemConst { +impl Clone for crate::ItemConst { fn clone(&self) -> Self { - ItemConst { + crate::ItemConst { attrs: self.attrs.clone(), vis: self.vis.clone(), const_token: self.const_token.clone(), @@ -1092,9 +1100,9 @@ impl Clone for ItemConst { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ItemEnum { +impl Clone for crate::ItemEnum { fn clone(&self) -> Self { - ItemEnum { + crate::ItemEnum { attrs: self.attrs.clone(), vis: self.vis.clone(), enum_token: self.enum_token.clone(), @@ -1107,9 +1115,9 @@ impl Clone for ItemEnum { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ItemExternCrate { +impl Clone for crate::ItemExternCrate { fn clone(&self) -> Self { - ItemExternCrate { + crate::ItemExternCrate { attrs: self.attrs.clone(), vis: self.vis.clone(), extern_token: self.extern_token.clone(), @@ -1122,9 +1130,9 @@ impl Clone for ItemExternCrate { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ItemFn { +impl Clone for crate::ItemFn { fn clone(&self) -> Self { - ItemFn { + crate::ItemFn { attrs: self.attrs.clone(), vis: self.vis.clone(), sig: self.sig.clone(), @@ -1134,9 +1142,9 @@ impl Clone for ItemFn { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ItemForeignMod { +impl Clone for crate::ItemForeignMod { fn clone(&self) -> Self { - ItemForeignMod { + crate::ItemForeignMod { attrs: self.attrs.clone(), unsafety: self.unsafety.clone(), abi: self.abi.clone(), @@ -1147,9 +1155,9 @@ impl Clone for ItemForeignMod { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ItemImpl { +impl Clone for crate::ItemImpl { fn clone(&self) -> Self { - ItemImpl { + crate::ItemImpl { attrs: self.attrs.clone(), defaultness: self.defaultness.clone(), unsafety: self.unsafety.clone(), @@ -1164,9 +1172,9 @@ impl Clone for ItemImpl { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ItemMacro { +impl Clone for crate::ItemMacro { fn clone(&self) -> Self { - ItemMacro { + crate::ItemMacro { attrs: self.attrs.clone(), ident: self.ident.clone(), mac: self.mac.clone(), @@ -1176,9 +1184,9 @@ impl Clone for ItemMacro { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ItemMod { +impl Clone for crate::ItemMod { fn clone(&self) -> Self { - ItemMod { + crate::ItemMod { attrs: self.attrs.clone(), vis: self.vis.clone(), unsafety: self.unsafety.clone(), @@ -1191,9 +1199,9 @@ impl Clone for ItemMod { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ItemStatic { +impl Clone for crate::ItemStatic { fn clone(&self) -> Self { - ItemStatic { + crate::ItemStatic { attrs: self.attrs.clone(), vis: self.vis.clone(), static_token: self.static_token.clone(), @@ -1209,9 +1217,9 @@ impl Clone for ItemStatic { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ItemStruct { +impl Clone for crate::ItemStruct { fn clone(&self) -> Self { - ItemStruct { + crate::ItemStruct { attrs: self.attrs.clone(), vis: self.vis.clone(), struct_token: self.struct_token.clone(), @@ -1224,9 +1232,9 @@ impl Clone for ItemStruct { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ItemTrait { +impl Clone for crate::ItemTrait { fn clone(&self) -> Self { - ItemTrait { + crate::ItemTrait { attrs: self.attrs.clone(), vis: self.vis.clone(), unsafety: self.unsafety.clone(), @@ -1244,9 +1252,9 @@ impl Clone for ItemTrait { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ItemTraitAlias { +impl Clone for crate::ItemTraitAlias { fn clone(&self) -> Self { - ItemTraitAlias { + crate::ItemTraitAlias { attrs: self.attrs.clone(), vis: self.vis.clone(), trait_token: self.trait_token.clone(), @@ -1260,9 +1268,9 @@ impl Clone for ItemTraitAlias { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ItemType { +impl Clone for crate::ItemType { fn clone(&self) -> Self { - ItemType { + crate::ItemType { attrs: self.attrs.clone(), vis: self.vis.clone(), type_token: self.type_token.clone(), @@ -1276,9 +1284,9 @@ impl Clone for ItemType { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ItemUnion { +impl Clone for crate::ItemUnion { fn clone(&self) -> Self { - ItemUnion { + crate::ItemUnion { attrs: self.attrs.clone(), vis: self.vis.clone(), union_token: self.union_token.clone(), @@ -1290,9 +1298,9 @@ impl Clone for ItemUnion { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ItemUse { +impl Clone for crate::ItemUse { fn clone(&self) -> Self { - ItemUse { + crate::ItemUse { attrs: self.attrs.clone(), vis: self.vis.clone(), use_token: self.use_token.clone(), @@ -1304,9 +1312,9 @@ impl Clone for ItemUse { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Label { +impl Clone for crate::Label { fn clone(&self) -> Self { - Label { + crate::Label { name: self.name.clone(), colon_token: self.colon_token.clone(), } @@ -1314,9 +1322,9 @@ impl Clone for Label { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for LifetimeParam { +impl Clone for crate::LifetimeParam { fn clone(&self) -> Self { - LifetimeParam { + crate::LifetimeParam { attrs: self.attrs.clone(), lifetime: self.lifetime.clone(), colon_token: self.colon_token.clone(), @@ -1325,24 +1333,24 @@ impl Clone for LifetimeParam { } } #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Lit { +impl Clone for crate::Lit { fn clone(&self) -> Self { match self { - Lit::Str(v0) => Lit::Str(v0.clone()), - Lit::ByteStr(v0) => Lit::ByteStr(v0.clone()), - Lit::Byte(v0) => Lit::Byte(v0.clone()), - Lit::Char(v0) => Lit::Char(v0.clone()), - Lit::Int(v0) => Lit::Int(v0.clone()), - Lit::Float(v0) => Lit::Float(v0.clone()), - Lit::Bool(v0) => Lit::Bool(v0.clone()), - Lit::Verbatim(v0) => Lit::Verbatim(v0.clone()), + crate::Lit::Str(v0) => crate::Lit::Str(v0.clone()), + crate::Lit::ByteStr(v0) => crate::Lit::ByteStr(v0.clone()), + crate::Lit::Byte(v0) => crate::Lit::Byte(v0.clone()), + crate::Lit::Char(v0) => crate::Lit::Char(v0.clone()), + crate::Lit::Int(v0) => crate::Lit::Int(v0.clone()), + crate::Lit::Float(v0) => crate::Lit::Float(v0.clone()), + crate::Lit::Bool(v0) => crate::Lit::Bool(v0.clone()), + crate::Lit::Verbatim(v0) => crate::Lit::Verbatim(v0.clone()), } } } #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for LitBool { +impl Clone for crate::LitBool { fn clone(&self) -> Self { - LitBool { + crate::LitBool { value: self.value.clone(), span: self.span.clone(), } @@ -1350,9 +1358,9 @@ impl Clone for LitBool { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Local { +impl Clone for crate::Local { fn clone(&self) -> Self { - Local { + crate::Local { attrs: self.attrs.clone(), let_token: self.let_token.clone(), pat: self.pat.clone(), @@ -1363,9 +1371,9 @@ impl Clone for Local { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for LocalInit { +impl Clone for crate::LocalInit { fn clone(&self) -> Self { - LocalInit { + crate::LocalInit { eq_token: self.eq_token.clone(), expr: self.expr.clone(), diverge: self.diverge.clone(), @@ -1374,9 +1382,9 @@ impl Clone for LocalInit { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Macro { +impl Clone for crate::Macro { fn clone(&self) -> Self { - Macro { + crate::Macro { path: self.path.clone(), bang_token: self.bang_token.clone(), delimiter: self.delimiter.clone(), @@ -1386,41 +1394,43 @@ impl Clone for Macro { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for MacroDelimiter { +impl Clone for crate::MacroDelimiter { fn clone(&self) -> Self { match self { - MacroDelimiter::Paren(v0) => MacroDelimiter::Paren(v0.clone()), - MacroDelimiter::Brace(v0) => MacroDelimiter::Brace(v0.clone()), - MacroDelimiter::Bracket(v0) => MacroDelimiter::Bracket(v0.clone()), + crate::MacroDelimiter::Paren(v0) => crate::MacroDelimiter::Paren(v0.clone()), + crate::MacroDelimiter::Brace(v0) => crate::MacroDelimiter::Brace(v0.clone()), + crate::MacroDelimiter::Bracket(v0) => { + crate::MacroDelimiter::Bracket(v0.clone()) + } } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Member { +impl Clone for crate::Member { fn clone(&self) -> Self { match self { - Member::Named(v0) => Member::Named(v0.clone()), - Member::Unnamed(v0) => Member::Unnamed(v0.clone()), + crate::Member::Named(v0) => crate::Member::Named(v0.clone()), + crate::Member::Unnamed(v0) => crate::Member::Unnamed(v0.clone()), } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Meta { +impl Clone for crate::Meta { fn clone(&self) -> Self { match self { - Meta::Path(v0) => Meta::Path(v0.clone()), - Meta::List(v0) => Meta::List(v0.clone()), - Meta::NameValue(v0) => Meta::NameValue(v0.clone()), + crate::Meta::Path(v0) => crate::Meta::Path(v0.clone()), + crate::Meta::List(v0) => crate::Meta::List(v0.clone()), + crate::Meta::NameValue(v0) => crate::Meta::NameValue(v0.clone()), } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for MetaList { +impl Clone for crate::MetaList { fn clone(&self) -> Self { - MetaList { + crate::MetaList { path: self.path.clone(), delimiter: self.delimiter.clone(), tokens: self.tokens.clone(), @@ -1429,9 +1439,9 @@ impl Clone for MetaList { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for MetaNameValue { +impl Clone for crate::MetaNameValue { fn clone(&self) -> Self { - MetaNameValue { + crate::MetaNameValue { path: self.path.clone(), eq_token: self.eq_token.clone(), value: self.value.clone(), @@ -1440,9 +1450,9 @@ impl Clone for MetaNameValue { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ParenthesizedGenericArguments { +impl Clone for crate::ParenthesizedGenericArguments { fn clone(&self) -> Self { - ParenthesizedGenericArguments { + crate::ParenthesizedGenericArguments { paren_token: self.paren_token.clone(), inputs: self.inputs.clone(), output: self.output.clone(), @@ -1451,34 +1461,34 @@ impl Clone for ParenthesizedGenericArguments { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Pat { +impl Clone for crate::Pat { fn clone(&self) -> Self { match self { - Pat::Const(v0) => Pat::Const(v0.clone()), - Pat::Ident(v0) => Pat::Ident(v0.clone()), - Pat::Lit(v0) => Pat::Lit(v0.clone()), - Pat::Macro(v0) => Pat::Macro(v0.clone()), - Pat::Or(v0) => Pat::Or(v0.clone()), - Pat::Paren(v0) => Pat::Paren(v0.clone()), - Pat::Path(v0) => Pat::Path(v0.clone()), - Pat::Range(v0) => Pat::Range(v0.clone()), - Pat::Reference(v0) => Pat::Reference(v0.clone()), - Pat::Rest(v0) => Pat::Rest(v0.clone()), - Pat::Slice(v0) => Pat::Slice(v0.clone()), - Pat::Struct(v0) => Pat::Struct(v0.clone()), - Pat::Tuple(v0) => Pat::Tuple(v0.clone()), - Pat::TupleStruct(v0) => Pat::TupleStruct(v0.clone()), - Pat::Type(v0) => Pat::Type(v0.clone()), - Pat::Verbatim(v0) => Pat::Verbatim(v0.clone()), - Pat::Wild(v0) => Pat::Wild(v0.clone()), + crate::Pat::Const(v0) => crate::Pat::Const(v0.clone()), + crate::Pat::Ident(v0) => crate::Pat::Ident(v0.clone()), + crate::Pat::Lit(v0) => crate::Pat::Lit(v0.clone()), + crate::Pat::Macro(v0) => crate::Pat::Macro(v0.clone()), + crate::Pat::Or(v0) => crate::Pat::Or(v0.clone()), + crate::Pat::Paren(v0) => crate::Pat::Paren(v0.clone()), + crate::Pat::Path(v0) => crate::Pat::Path(v0.clone()), + crate::Pat::Range(v0) => crate::Pat::Range(v0.clone()), + crate::Pat::Reference(v0) => crate::Pat::Reference(v0.clone()), + crate::Pat::Rest(v0) => crate::Pat::Rest(v0.clone()), + crate::Pat::Slice(v0) => crate::Pat::Slice(v0.clone()), + crate::Pat::Struct(v0) => crate::Pat::Struct(v0.clone()), + crate::Pat::Tuple(v0) => crate::Pat::Tuple(v0.clone()), + crate::Pat::TupleStruct(v0) => crate::Pat::TupleStruct(v0.clone()), + crate::Pat::Type(v0) => crate::Pat::Type(v0.clone()), + crate::Pat::Verbatim(v0) => crate::Pat::Verbatim(v0.clone()), + crate::Pat::Wild(v0) => crate::Pat::Wild(v0.clone()), } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for PatIdent { +impl Clone for crate::PatIdent { fn clone(&self) -> Self { - PatIdent { + crate::PatIdent { attrs: self.attrs.clone(), by_ref: self.by_ref.clone(), mutability: self.mutability.clone(), @@ -1489,9 +1499,9 @@ impl Clone for PatIdent { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for PatOr { +impl Clone for crate::PatOr { fn clone(&self) -> Self { - PatOr { + crate::PatOr { attrs: self.attrs.clone(), leading_vert: self.leading_vert.clone(), cases: self.cases.clone(), @@ -1500,9 +1510,9 @@ impl Clone for PatOr { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for PatParen { +impl Clone for crate::PatParen { fn clone(&self) -> Self { - PatParen { + crate::PatParen { attrs: self.attrs.clone(), paren_token: self.paren_token.clone(), pat: self.pat.clone(), @@ -1511,9 +1521,9 @@ impl Clone for PatParen { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for PatReference { +impl Clone for crate::PatReference { fn clone(&self) -> Self { - PatReference { + crate::PatReference { attrs: self.attrs.clone(), and_token: self.and_token.clone(), mutability: self.mutability.clone(), @@ -1523,9 +1533,9 @@ impl Clone for PatReference { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for PatRest { +impl Clone for crate::PatRest { fn clone(&self) -> Self { - PatRest { + crate::PatRest { attrs: self.attrs.clone(), dot2_token: self.dot2_token.clone(), } @@ -1533,9 +1543,9 @@ impl Clone for PatRest { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for PatSlice { +impl Clone for crate::PatSlice { fn clone(&self) -> Self { - PatSlice { + crate::PatSlice { attrs: self.attrs.clone(), bracket_token: self.bracket_token.clone(), elems: self.elems.clone(), @@ -1544,9 +1554,9 @@ impl Clone for PatSlice { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for PatStruct { +impl Clone for crate::PatStruct { fn clone(&self) -> Self { - PatStruct { + crate::PatStruct { attrs: self.attrs.clone(), qself: self.qself.clone(), path: self.path.clone(), @@ -1558,9 +1568,9 @@ impl Clone for PatStruct { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for PatTuple { +impl Clone for crate::PatTuple { fn clone(&self) -> Self { - PatTuple { + crate::PatTuple { attrs: self.attrs.clone(), paren_token: self.paren_token.clone(), elems: self.elems.clone(), @@ -1569,9 +1579,9 @@ impl Clone for PatTuple { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for PatTupleStruct { +impl Clone for crate::PatTupleStruct { fn clone(&self) -> Self { - PatTupleStruct { + crate::PatTupleStruct { attrs: self.attrs.clone(), qself: self.qself.clone(), path: self.path.clone(), @@ -1582,9 +1592,9 @@ impl Clone for PatTupleStruct { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for PatType { +impl Clone for crate::PatType { fn clone(&self) -> Self { - PatType { + crate::PatType { attrs: self.attrs.clone(), pat: self.pat.clone(), colon_token: self.colon_token.clone(), @@ -1594,9 +1604,9 @@ impl Clone for PatType { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for PatWild { +impl Clone for crate::PatWild { fn clone(&self) -> Self { - PatWild { + crate::PatWild { attrs: self.attrs.clone(), underscore_token: self.underscore_token.clone(), } @@ -1604,9 +1614,9 @@ impl Clone for PatWild { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Path { +impl Clone for crate::Path { fn clone(&self) -> Self { - Path { + crate::Path { leading_colon: self.leading_colon.clone(), segments: self.segments.clone(), } @@ -1614,22 +1624,24 @@ impl Clone for Path { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for PathArguments { +impl Clone for crate::PathArguments { fn clone(&self) -> Self { match self { - PathArguments::None => PathArguments::None, - PathArguments::AngleBracketed(v0) => { - PathArguments::AngleBracketed(v0.clone()) + crate::PathArguments::None => crate::PathArguments::None, + crate::PathArguments::AngleBracketed(v0) => { + crate::PathArguments::AngleBracketed(v0.clone()) + } + crate::PathArguments::Parenthesized(v0) => { + crate::PathArguments::Parenthesized(v0.clone()) } - PathArguments::Parenthesized(v0) => PathArguments::Parenthesized(v0.clone()), } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for PathSegment { +impl Clone for crate::PathSegment { fn clone(&self) -> Self { - PathSegment { + crate::PathSegment { ident: self.ident.clone(), arguments: self.arguments.clone(), } @@ -1637,9 +1649,9 @@ impl Clone for PathSegment { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for PredicateLifetime { +impl Clone for crate::PredicateLifetime { fn clone(&self) -> Self { - PredicateLifetime { + crate::PredicateLifetime { lifetime: self.lifetime.clone(), colon_token: self.colon_token.clone(), bounds: self.bounds.clone(), @@ -1648,9 +1660,9 @@ impl Clone for PredicateLifetime { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for PredicateType { +impl Clone for crate::PredicateType { fn clone(&self) -> Self { - PredicateType { + crate::PredicateType { lifetimes: self.lifetimes.clone(), bounded_ty: self.bounded_ty.clone(), colon_token: self.colon_token.clone(), @@ -1660,9 +1672,9 @@ impl Clone for PredicateType { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for QSelf { +impl Clone for crate::QSelf { fn clone(&self) -> Self { - QSelf { + crate::QSelf { lt_token: self.lt_token.clone(), ty: self.ty.clone(), position: self.position.clone(), @@ -1673,19 +1685,19 @@ impl Clone for QSelf { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Copy for RangeLimits {} +impl Copy for crate::RangeLimits {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for RangeLimits { +impl Clone for crate::RangeLimits { fn clone(&self) -> Self { *self } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Receiver { +impl Clone for crate::Receiver { fn clone(&self) -> Self { - Receiver { + crate::Receiver { attrs: self.attrs.clone(), reference: self.reference.clone(), mutability: self.mutability.clone(), @@ -1697,19 +1709,21 @@ impl Clone for Receiver { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for ReturnType { +impl Clone for crate::ReturnType { fn clone(&self) -> Self { match self { - ReturnType::Default => ReturnType::Default, - ReturnType::Type(v0, v1) => ReturnType::Type(v0.clone(), v1.clone()), + crate::ReturnType::Default => crate::ReturnType::Default, + crate::ReturnType::Type(v0, v1) => { + crate::ReturnType::Type(v0.clone(), v1.clone()) + } } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Signature { +impl Clone for crate::Signature { fn clone(&self) -> Self { - Signature { + crate::Signature { constness: self.constness.clone(), asyncness: self.asyncness.clone(), unsafety: self.unsafety.clone(), @@ -1726,31 +1740,31 @@ impl Clone for Signature { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for StaticMutability { +impl Clone for crate::StaticMutability { fn clone(&self) -> Self { match self { - StaticMutability::Mut(v0) => StaticMutability::Mut(v0.clone()), - StaticMutability::None => StaticMutability::None, + crate::StaticMutability::Mut(v0) => crate::StaticMutability::Mut(v0.clone()), + crate::StaticMutability::None => crate::StaticMutability::None, } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Stmt { +impl Clone for crate::Stmt { fn clone(&self) -> Self { match self { - Stmt::Local(v0) => Stmt::Local(v0.clone()), - Stmt::Item(v0) => Stmt::Item(v0.clone()), - Stmt::Expr(v0, v1) => Stmt::Expr(v0.clone(), v1.clone()), - Stmt::Macro(v0) => Stmt::Macro(v0.clone()), + crate::Stmt::Local(v0) => crate::Stmt::Local(v0.clone()), + crate::Stmt::Item(v0) => crate::Stmt::Item(v0.clone()), + crate::Stmt::Expr(v0, v1) => crate::Stmt::Expr(v0.clone(), v1.clone()), + crate::Stmt::Macro(v0) => crate::Stmt::Macro(v0.clone()), } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for StmtMacro { +impl Clone for crate::StmtMacro { fn clone(&self) -> Self { - StmtMacro { + crate::StmtMacro { attrs: self.attrs.clone(), mac: self.mac.clone(), semi_token: self.semi_token.clone(), @@ -1759,9 +1773,9 @@ impl Clone for StmtMacro { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TraitBound { +impl Clone for crate::TraitBound { fn clone(&self) -> Self { - TraitBound { + crate::TraitBound { paren_token: self.paren_token.clone(), modifier: self.modifier.clone(), lifetimes: self.lifetimes.clone(), @@ -1771,32 +1785,32 @@ impl Clone for TraitBound { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Copy for TraitBoundModifier {} +impl Copy for crate::TraitBoundModifier {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TraitBoundModifier { +impl Clone for crate::TraitBoundModifier { fn clone(&self) -> Self { *self } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TraitItem { +impl Clone for crate::TraitItem { fn clone(&self) -> Self { match self { - TraitItem::Const(v0) => TraitItem::Const(v0.clone()), - TraitItem::Fn(v0) => TraitItem::Fn(v0.clone()), - TraitItem::Type(v0) => TraitItem::Type(v0.clone()), - TraitItem::Macro(v0) => TraitItem::Macro(v0.clone()), - TraitItem::Verbatim(v0) => TraitItem::Verbatim(v0.clone()), + crate::TraitItem::Const(v0) => crate::TraitItem::Const(v0.clone()), + crate::TraitItem::Fn(v0) => crate::TraitItem::Fn(v0.clone()), + crate::TraitItem::Type(v0) => crate::TraitItem::Type(v0.clone()), + crate::TraitItem::Macro(v0) => crate::TraitItem::Macro(v0.clone()), + crate::TraitItem::Verbatim(v0) => crate::TraitItem::Verbatim(v0.clone()), } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TraitItemConst { +impl Clone for crate::TraitItemConst { fn clone(&self) -> Self { - TraitItemConst { + crate::TraitItemConst { attrs: self.attrs.clone(), const_token: self.const_token.clone(), ident: self.ident.clone(), @@ -1810,9 +1824,9 @@ impl Clone for TraitItemConst { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TraitItemFn { +impl Clone for crate::TraitItemFn { fn clone(&self) -> Self { - TraitItemFn { + crate::TraitItemFn { attrs: self.attrs.clone(), sig: self.sig.clone(), default: self.default.clone(), @@ -1822,9 +1836,9 @@ impl Clone for TraitItemFn { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TraitItemMacro { +impl Clone for crate::TraitItemMacro { fn clone(&self) -> Self { - TraitItemMacro { + crate::TraitItemMacro { attrs: self.attrs.clone(), mac: self.mac.clone(), semi_token: self.semi_token.clone(), @@ -1833,9 +1847,9 @@ impl Clone for TraitItemMacro { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TraitItemType { +impl Clone for crate::TraitItemType { fn clone(&self) -> Self { - TraitItemType { + crate::TraitItemType { attrs: self.attrs.clone(), type_token: self.type_token.clone(), ident: self.ident.clone(), @@ -1849,32 +1863,32 @@ impl Clone for TraitItemType { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Type { +impl Clone for crate::Type { fn clone(&self) -> Self { match self { - Type::Array(v0) => Type::Array(v0.clone()), - Type::BareFn(v0) => Type::BareFn(v0.clone()), - Type::Group(v0) => Type::Group(v0.clone()), - Type::ImplTrait(v0) => Type::ImplTrait(v0.clone()), - Type::Infer(v0) => Type::Infer(v0.clone()), - Type::Macro(v0) => Type::Macro(v0.clone()), - Type::Never(v0) => Type::Never(v0.clone()), - Type::Paren(v0) => Type::Paren(v0.clone()), - Type::Path(v0) => Type::Path(v0.clone()), - Type::Ptr(v0) => Type::Ptr(v0.clone()), - Type::Reference(v0) => Type::Reference(v0.clone()), - Type::Slice(v0) => Type::Slice(v0.clone()), - Type::TraitObject(v0) => Type::TraitObject(v0.clone()), - Type::Tuple(v0) => Type::Tuple(v0.clone()), - Type::Verbatim(v0) => Type::Verbatim(v0.clone()), + crate::Type::Array(v0) => crate::Type::Array(v0.clone()), + crate::Type::BareFn(v0) => crate::Type::BareFn(v0.clone()), + crate::Type::Group(v0) => crate::Type::Group(v0.clone()), + crate::Type::ImplTrait(v0) => crate::Type::ImplTrait(v0.clone()), + crate::Type::Infer(v0) => crate::Type::Infer(v0.clone()), + crate::Type::Macro(v0) => crate::Type::Macro(v0.clone()), + crate::Type::Never(v0) => crate::Type::Never(v0.clone()), + crate::Type::Paren(v0) => crate::Type::Paren(v0.clone()), + crate::Type::Path(v0) => crate::Type::Path(v0.clone()), + crate::Type::Ptr(v0) => crate::Type::Ptr(v0.clone()), + crate::Type::Reference(v0) => crate::Type::Reference(v0.clone()), + crate::Type::Slice(v0) => crate::Type::Slice(v0.clone()), + crate::Type::TraitObject(v0) => crate::Type::TraitObject(v0.clone()), + crate::Type::Tuple(v0) => crate::Type::Tuple(v0.clone()), + crate::Type::Verbatim(v0) => crate::Type::Verbatim(v0.clone()), } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypeArray { +impl Clone for crate::TypeArray { fn clone(&self) -> Self { - TypeArray { + crate::TypeArray { bracket_token: self.bracket_token.clone(), elem: self.elem.clone(), semi_token: self.semi_token.clone(), @@ -1884,9 +1898,9 @@ impl Clone for TypeArray { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypeBareFn { +impl Clone for crate::TypeBareFn { fn clone(&self) -> Self { - TypeBareFn { + crate::TypeBareFn { lifetimes: self.lifetimes.clone(), unsafety: self.unsafety.clone(), abi: self.abi.clone(), @@ -1900,9 +1914,9 @@ impl Clone for TypeBareFn { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypeGroup { +impl Clone for crate::TypeGroup { fn clone(&self) -> Self { - TypeGroup { + crate::TypeGroup { group_token: self.group_token.clone(), elem: self.elem.clone(), } @@ -1910,9 +1924,9 @@ impl Clone for TypeGroup { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypeImplTrait { +impl Clone for crate::TypeImplTrait { fn clone(&self) -> Self { - TypeImplTrait { + crate::TypeImplTrait { impl_token: self.impl_token.clone(), bounds: self.bounds.clone(), } @@ -1920,34 +1934,36 @@ impl Clone for TypeImplTrait { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypeInfer { +impl Clone for crate::TypeInfer { fn clone(&self) -> Self { - TypeInfer { + crate::TypeInfer { underscore_token: self.underscore_token.clone(), } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypeMacro { +impl Clone for crate::TypeMacro { fn clone(&self) -> Self { - TypeMacro { mac: self.mac.clone() } + crate::TypeMacro { + mac: self.mac.clone(), + } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypeNever { +impl Clone for crate::TypeNever { fn clone(&self) -> Self { - TypeNever { + crate::TypeNever { bang_token: self.bang_token.clone(), } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypeParam { +impl Clone for crate::TypeParam { fn clone(&self) -> Self { - TypeParam { + crate::TypeParam { attrs: self.attrs.clone(), ident: self.ident.clone(), colon_token: self.colon_token.clone(), @@ -1959,20 +1975,24 @@ impl Clone for TypeParam { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypeParamBound { +impl Clone for crate::TypeParamBound { fn clone(&self) -> Self { match self { - TypeParamBound::Trait(v0) => TypeParamBound::Trait(v0.clone()), - TypeParamBound::Lifetime(v0) => TypeParamBound::Lifetime(v0.clone()), - TypeParamBound::Verbatim(v0) => TypeParamBound::Verbatim(v0.clone()), + crate::TypeParamBound::Trait(v0) => crate::TypeParamBound::Trait(v0.clone()), + crate::TypeParamBound::Lifetime(v0) => { + crate::TypeParamBound::Lifetime(v0.clone()) + } + crate::TypeParamBound::Verbatim(v0) => { + crate::TypeParamBound::Verbatim(v0.clone()) + } } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypeParen { +impl Clone for crate::TypeParen { fn clone(&self) -> Self { - TypeParen { + crate::TypeParen { paren_token: self.paren_token.clone(), elem: self.elem.clone(), } @@ -1980,9 +2000,9 @@ impl Clone for TypeParen { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypePath { +impl Clone for crate::TypePath { fn clone(&self) -> Self { - TypePath { + crate::TypePath { qself: self.qself.clone(), path: self.path.clone(), } @@ -1990,9 +2010,9 @@ impl Clone for TypePath { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypePtr { +impl Clone for crate::TypePtr { fn clone(&self) -> Self { - TypePtr { + crate::TypePtr { star_token: self.star_token.clone(), const_token: self.const_token.clone(), mutability: self.mutability.clone(), @@ -2002,9 +2022,9 @@ impl Clone for TypePtr { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypeReference { +impl Clone for crate::TypeReference { fn clone(&self) -> Self { - TypeReference { + crate::TypeReference { and_token: self.and_token.clone(), lifetime: self.lifetime.clone(), mutability: self.mutability.clone(), @@ -2014,9 +2034,9 @@ impl Clone for TypeReference { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypeSlice { +impl Clone for crate::TypeSlice { fn clone(&self) -> Self { - TypeSlice { + crate::TypeSlice { bracket_token: self.bracket_token.clone(), elem: self.elem.clone(), } @@ -2024,9 +2044,9 @@ impl Clone for TypeSlice { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypeTraitObject { +impl Clone for crate::TypeTraitObject { fn clone(&self) -> Self { - TypeTraitObject { + crate::TypeTraitObject { dyn_token: self.dyn_token.clone(), bounds: self.bounds.clone(), } @@ -2034,9 +2054,9 @@ impl Clone for TypeTraitObject { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for TypeTuple { +impl Clone for crate::TypeTuple { fn clone(&self) -> Self { - TypeTuple { + crate::TypeTuple { paren_token: self.paren_token.clone(), elems: self.elems.clone(), } @@ -2044,28 +2064,28 @@ impl Clone for TypeTuple { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Copy for UnOp {} +impl Copy for crate::UnOp {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for UnOp { +impl Clone for crate::UnOp { fn clone(&self) -> Self { *self } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for UseGlob { +impl Clone for crate::UseGlob { fn clone(&self) -> Self { - UseGlob { + crate::UseGlob { star_token: self.star_token.clone(), } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for UseGroup { +impl Clone for crate::UseGroup { fn clone(&self) -> Self { - UseGroup { + crate::UseGroup { brace_token: self.brace_token.clone(), items: self.items.clone(), } @@ -2073,18 +2093,18 @@ impl Clone for UseGroup { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for UseName { +impl Clone for crate::UseName { fn clone(&self) -> Self { - UseName { + crate::UseName { ident: self.ident.clone(), } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for UsePath { +impl Clone for crate::UsePath { fn clone(&self) -> Self { - UsePath { + crate::UsePath { ident: self.ident.clone(), colon2_token: self.colon2_token.clone(), tree: self.tree.clone(), @@ -2093,9 +2113,9 @@ impl Clone for UsePath { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for UseRename { +impl Clone for crate::UseRename { fn clone(&self) -> Self { - UseRename { + crate::UseRename { ident: self.ident.clone(), as_token: self.as_token.clone(), rename: self.rename.clone(), @@ -2104,22 +2124,22 @@ impl Clone for UseRename { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for UseTree { +impl Clone for crate::UseTree { fn clone(&self) -> Self { match self { - UseTree::Path(v0) => UseTree::Path(v0.clone()), - UseTree::Name(v0) => UseTree::Name(v0.clone()), - UseTree::Rename(v0) => UseTree::Rename(v0.clone()), - UseTree::Glob(v0) => UseTree::Glob(v0.clone()), - UseTree::Group(v0) => UseTree::Group(v0.clone()), + crate::UseTree::Path(v0) => crate::UseTree::Path(v0.clone()), + crate::UseTree::Name(v0) => crate::UseTree::Name(v0.clone()), + crate::UseTree::Rename(v0) => crate::UseTree::Rename(v0.clone()), + crate::UseTree::Glob(v0) => crate::UseTree::Glob(v0.clone()), + crate::UseTree::Group(v0) => crate::UseTree::Group(v0.clone()), } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Variadic { +impl Clone for crate::Variadic { fn clone(&self) -> Self { - Variadic { + crate::Variadic { attrs: self.attrs.clone(), pat: self.pat.clone(), dots: self.dots.clone(), @@ -2129,9 +2149,9 @@ impl Clone for Variadic { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Variant { +impl Clone for crate::Variant { fn clone(&self) -> Self { - Variant { + crate::Variant { attrs: self.attrs.clone(), ident: self.ident.clone(), fields: self.fields.clone(), @@ -2141,9 +2161,9 @@ impl Clone for Variant { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for VisRestricted { +impl Clone for crate::VisRestricted { fn clone(&self) -> Self { - VisRestricted { + crate::VisRestricted { pub_token: self.pub_token.clone(), paren_token: self.paren_token.clone(), in_token: self.in_token.clone(), @@ -2153,20 +2173,22 @@ impl Clone for VisRestricted { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for Visibility { +impl Clone for crate::Visibility { fn clone(&self) -> Self { match self { - Visibility::Public(v0) => Visibility::Public(v0.clone()), - Visibility::Restricted(v0) => Visibility::Restricted(v0.clone()), - Visibility::Inherited => Visibility::Inherited, + crate::Visibility::Public(v0) => crate::Visibility::Public(v0.clone()), + crate::Visibility::Restricted(v0) => { + crate::Visibility::Restricted(v0.clone()) + } + crate::Visibility::Inherited => crate::Visibility::Inherited, } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for WhereClause { +impl Clone for crate::WhereClause { fn clone(&self) -> Self { - WhereClause { + crate::WhereClause { where_token: self.where_token.clone(), predicates: self.predicates.clone(), } @@ -2174,11 +2196,13 @@ impl Clone for WhereClause { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "clone-impls")))] -impl Clone for WherePredicate { +impl Clone for crate::WherePredicate { fn clone(&self) -> Self { match self { - WherePredicate::Lifetime(v0) => WherePredicate::Lifetime(v0.clone()), - WherePredicate::Type(v0) => WherePredicate::Type(v0.clone()), + crate::WherePredicate::Lifetime(v0) => { + crate::WherePredicate::Lifetime(v0.clone()) + } + crate::WherePredicate::Type(v0) => crate::WherePredicate::Type(v0.clone()), } } } diff --git a/vendor/syn/src/gen/debug.rs b/vendor/syn/src/gen/debug.rs index 837fe99..2dc531e 100644 --- a/vendor/syn/src/gen/debug.rs +++ b/vendor/syn/src/gen/debug.rs @@ -1,11 +1,11 @@ // This file is @generated by syn-internal-codegen. // It is not intended for manual editing. -use crate::*; +#![allow(unknown_lints, non_local_definitions)] use std::fmt::{self, Debug}; #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Abi { +impl Debug for crate::Abi { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("Abi"); formatter.field("extern_token", &self.extern_token); @@ -15,24 +15,25 @@ impl Debug for Abi { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for AngleBracketedGenericArguments { +impl Debug for crate::AngleBracketedGenericArguments { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl AngleBracketedGenericArguments { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("colon2_token", &self.colon2_token); - formatter.field("lt_token", &self.lt_token); - formatter.field("args", &self.args); - formatter.field("gt_token", &self.gt_token); - formatter.finish() - } - } self.debug(formatter, "AngleBracketedGenericArguments") } } +#[cfg(any(feature = "derive", feature = "full"))] +impl crate::AngleBracketedGenericArguments { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("colon2_token", &self.colon2_token); + formatter.field("lt_token", &self.lt_token); + formatter.field("args", &self.args); + formatter.field("gt_token", &self.gt_token); + formatter.finish() + } +} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Arm { +impl Debug for crate::Arm { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("Arm"); formatter.field("attrs", &self.attrs); @@ -46,7 +47,7 @@ impl Debug for Arm { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for AssocConst { +impl Debug for crate::AssocConst { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("AssocConst"); formatter.field("ident", &self.ident); @@ -58,7 +59,7 @@ impl Debug for AssocConst { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for AssocType { +impl Debug for crate::AssocType { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("AssocType"); formatter.field("ident", &self.ident); @@ -70,12 +71,12 @@ impl Debug for AssocType { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for AttrStyle { +impl Debug for crate::AttrStyle { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("AttrStyle::")?; match self { - AttrStyle::Outer => formatter.write_str("Outer"), - AttrStyle::Inner(v0) => { + crate::AttrStyle::Outer => formatter.write_str("Outer"), + crate::AttrStyle::Inner(v0) => { let mut formatter = formatter.debug_tuple("Inner"); formatter.field(v0); formatter.finish() @@ -85,7 +86,7 @@ impl Debug for AttrStyle { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Attribute { +impl Debug for crate::Attribute { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("Attribute"); formatter.field("pound_token", &self.pound_token); @@ -97,7 +98,7 @@ impl Debug for Attribute { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for BareFnArg { +impl Debug for crate::BareFnArg { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("BareFnArg"); formatter.field("attrs", &self.attrs); @@ -108,7 +109,7 @@ impl Debug for BareFnArg { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for BareVariadic { +impl Debug for crate::BareVariadic { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("BareVariadic"); formatter.field("attrs", &self.attrs); @@ -120,146 +121,146 @@ impl Debug for BareVariadic { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for BinOp { +impl Debug for crate::BinOp { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("BinOp::")?; match self { - BinOp::Add(v0) => { + crate::BinOp::Add(v0) => { let mut formatter = formatter.debug_tuple("Add"); formatter.field(v0); formatter.finish() } - BinOp::Sub(v0) => { + crate::BinOp::Sub(v0) => { let mut formatter = formatter.debug_tuple("Sub"); formatter.field(v0); formatter.finish() } - BinOp::Mul(v0) => { + crate::BinOp::Mul(v0) => { let mut formatter = formatter.debug_tuple("Mul"); formatter.field(v0); formatter.finish() } - BinOp::Div(v0) => { + crate::BinOp::Div(v0) => { let mut formatter = formatter.debug_tuple("Div"); formatter.field(v0); formatter.finish() } - BinOp::Rem(v0) => { + crate::BinOp::Rem(v0) => { let mut formatter = formatter.debug_tuple("Rem"); formatter.field(v0); formatter.finish() } - BinOp::And(v0) => { + crate::BinOp::And(v0) => { let mut formatter = formatter.debug_tuple("And"); formatter.field(v0); formatter.finish() } - BinOp::Or(v0) => { + crate::BinOp::Or(v0) => { let mut formatter = formatter.debug_tuple("Or"); formatter.field(v0); formatter.finish() } - BinOp::BitXor(v0) => { + crate::BinOp::BitXor(v0) => { let mut formatter = formatter.debug_tuple("BitXor"); formatter.field(v0); formatter.finish() } - BinOp::BitAnd(v0) => { + crate::BinOp::BitAnd(v0) => { let mut formatter = formatter.debug_tuple("BitAnd"); formatter.field(v0); formatter.finish() } - BinOp::BitOr(v0) => { + crate::BinOp::BitOr(v0) => { let mut formatter = formatter.debug_tuple("BitOr"); formatter.field(v0); formatter.finish() } - BinOp::Shl(v0) => { + crate::BinOp::Shl(v0) => { let mut formatter = formatter.debug_tuple("Shl"); formatter.field(v0); formatter.finish() } - BinOp::Shr(v0) => { + crate::BinOp::Shr(v0) => { let mut formatter = formatter.debug_tuple("Shr"); formatter.field(v0); formatter.finish() } - BinOp::Eq(v0) => { + crate::BinOp::Eq(v0) => { let mut formatter = formatter.debug_tuple("Eq"); formatter.field(v0); formatter.finish() } - BinOp::Lt(v0) => { + crate::BinOp::Lt(v0) => { let mut formatter = formatter.debug_tuple("Lt"); formatter.field(v0); formatter.finish() } - BinOp::Le(v0) => { + crate::BinOp::Le(v0) => { let mut formatter = formatter.debug_tuple("Le"); formatter.field(v0); formatter.finish() } - BinOp::Ne(v0) => { + crate::BinOp::Ne(v0) => { let mut formatter = formatter.debug_tuple("Ne"); formatter.field(v0); formatter.finish() } - BinOp::Ge(v0) => { + crate::BinOp::Ge(v0) => { let mut formatter = formatter.debug_tuple("Ge"); formatter.field(v0); formatter.finish() } - BinOp::Gt(v0) => { + crate::BinOp::Gt(v0) => { let mut formatter = formatter.debug_tuple("Gt"); formatter.field(v0); formatter.finish() } - BinOp::AddAssign(v0) => { + crate::BinOp::AddAssign(v0) => { let mut formatter = formatter.debug_tuple("AddAssign"); formatter.field(v0); formatter.finish() } - BinOp::SubAssign(v0) => { + crate::BinOp::SubAssign(v0) => { let mut formatter = formatter.debug_tuple("SubAssign"); formatter.field(v0); formatter.finish() } - BinOp::MulAssign(v0) => { + crate::BinOp::MulAssign(v0) => { let mut formatter = formatter.debug_tuple("MulAssign"); formatter.field(v0); formatter.finish() } - BinOp::DivAssign(v0) => { + crate::BinOp::DivAssign(v0) => { let mut formatter = formatter.debug_tuple("DivAssign"); formatter.field(v0); formatter.finish() } - BinOp::RemAssign(v0) => { + crate::BinOp::RemAssign(v0) => { let mut formatter = formatter.debug_tuple("RemAssign"); formatter.field(v0); formatter.finish() } - BinOp::BitXorAssign(v0) => { + crate::BinOp::BitXorAssign(v0) => { let mut formatter = formatter.debug_tuple("BitXorAssign"); formatter.field(v0); formatter.finish() } - BinOp::BitAndAssign(v0) => { + crate::BinOp::BitAndAssign(v0) => { let mut formatter = formatter.debug_tuple("BitAndAssign"); formatter.field(v0); formatter.finish() } - BinOp::BitOrAssign(v0) => { + crate::BinOp::BitOrAssign(v0) => { let mut formatter = formatter.debug_tuple("BitOrAssign"); formatter.field(v0); formatter.finish() } - BinOp::ShlAssign(v0) => { + crate::BinOp::ShlAssign(v0) => { let mut formatter = formatter.debug_tuple("ShlAssign"); formatter.field(v0); formatter.finish() } - BinOp::ShrAssign(v0) => { + crate::BinOp::ShrAssign(v0) => { let mut formatter = formatter.debug_tuple("ShrAssign"); formatter.field(v0); formatter.finish() @@ -269,7 +270,7 @@ impl Debug for BinOp { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Block { +impl Debug for crate::Block { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("Block"); formatter.field("brace_token", &self.brace_token); @@ -279,7 +280,7 @@ impl Debug for Block { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for BoundLifetimes { +impl Debug for crate::BoundLifetimes { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("BoundLifetimes"); formatter.field("for_token", &self.for_token); @@ -291,7 +292,7 @@ impl Debug for BoundLifetimes { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ConstParam { +impl Debug for crate::ConstParam { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("ConstParam"); formatter.field("attrs", &self.attrs); @@ -306,7 +307,7 @@ impl Debug for ConstParam { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Constraint { +impl Debug for crate::Constraint { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("Constraint"); formatter.field("ident", &self.ident); @@ -318,66 +319,69 @@ impl Debug for Constraint { } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Data { +impl Debug for crate::Data { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("Data::")?; match self { - Data::Struct(v0) => v0.debug(formatter, "Struct"), - Data::Enum(v0) => v0.debug(formatter, "Enum"), - Data::Union(v0) => v0.debug(formatter, "Union"), + crate::Data::Struct(v0) => v0.debug(formatter, "Struct"), + crate::Data::Enum(v0) => v0.debug(formatter, "Enum"), + crate::Data::Union(v0) => v0.debug(formatter, "Union"), } } } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for DataEnum { +impl Debug for crate::DataEnum { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl DataEnum { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("enum_token", &self.enum_token); - formatter.field("brace_token", &self.brace_token); - formatter.field("variants", &self.variants); - formatter.finish() - } - } self.debug(formatter, "DataEnum") } } #[cfg(feature = "derive")] +impl crate::DataEnum { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("enum_token", &self.enum_token); + formatter.field("brace_token", &self.brace_token); + formatter.field("variants", &self.variants); + formatter.finish() + } +} +#[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for DataStruct { +impl Debug for crate::DataStruct { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl DataStruct { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("struct_token", &self.struct_token); - formatter.field("fields", &self.fields); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "DataStruct") } } #[cfg(feature = "derive")] +impl crate::DataStruct { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("struct_token", &self.struct_token); + formatter.field("fields", &self.fields); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for DataUnion { +impl Debug for crate::DataUnion { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl DataUnion { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("union_token", &self.union_token); - formatter.field("fields", &self.fields); - formatter.finish() - } - } self.debug(formatter, "DataUnion") } } #[cfg(feature = "derive")] +impl crate::DataUnion { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("union_token", &self.union_token); + formatter.field("fields", &self.fields); + formatter.finish() + } +} +#[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for DeriveInput { +impl Debug for crate::DeriveInput { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("DeriveInput"); formatter.field("attrs", &self.attrs); @@ -390,80 +394,77 @@ impl Debug for DeriveInput { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Expr { +impl Debug for crate::Expr { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("Expr::")?; match self { #[cfg(feature = "full")] - Expr::Array(v0) => v0.debug(formatter, "Array"), - #[cfg(feature = "full")] - Expr::Assign(v0) => v0.debug(formatter, "Assign"), - #[cfg(feature = "full")] - Expr::Async(v0) => v0.debug(formatter, "Async"), + crate::Expr::Array(v0) => v0.debug(formatter, "Array"), #[cfg(feature = "full")] - Expr::Await(v0) => v0.debug(formatter, "Await"), - Expr::Binary(v0) => v0.debug(formatter, "Binary"), + crate::Expr::Assign(v0) => v0.debug(formatter, "Assign"), #[cfg(feature = "full")] - Expr::Block(v0) => v0.debug(formatter, "Block"), + crate::Expr::Async(v0) => v0.debug(formatter, "Async"), #[cfg(feature = "full")] - Expr::Break(v0) => v0.debug(formatter, "Break"), - Expr::Call(v0) => v0.debug(formatter, "Call"), - Expr::Cast(v0) => v0.debug(formatter, "Cast"), + crate::Expr::Await(v0) => v0.debug(formatter, "Await"), + crate::Expr::Binary(v0) => v0.debug(formatter, "Binary"), #[cfg(feature = "full")] - Expr::Closure(v0) => v0.debug(formatter, "Closure"), + crate::Expr::Block(v0) => v0.debug(formatter, "Block"), #[cfg(feature = "full")] - Expr::Const(v0) => v0.debug(formatter, "Const"), + crate::Expr::Break(v0) => v0.debug(formatter, "Break"), + crate::Expr::Call(v0) => v0.debug(formatter, "Call"), + crate::Expr::Cast(v0) => v0.debug(formatter, "Cast"), #[cfg(feature = "full")] - Expr::Continue(v0) => v0.debug(formatter, "Continue"), - Expr::Field(v0) => v0.debug(formatter, "Field"), + crate::Expr::Closure(v0) => v0.debug(formatter, "Closure"), #[cfg(feature = "full")] - Expr::ForLoop(v0) => v0.debug(formatter, "ForLoop"), - Expr::Group(v0) => v0.debug(formatter, "Group"), + crate::Expr::Const(v0) => v0.debug(formatter, "Const"), #[cfg(feature = "full")] - Expr::If(v0) => v0.debug(formatter, "If"), - Expr::Index(v0) => v0.debug(formatter, "Index"), + crate::Expr::Continue(v0) => v0.debug(formatter, "Continue"), + crate::Expr::Field(v0) => v0.debug(formatter, "Field"), #[cfg(feature = "full")] - Expr::Infer(v0) => v0.debug(formatter, "Infer"), + crate::Expr::ForLoop(v0) => v0.debug(formatter, "ForLoop"), + crate::Expr::Group(v0) => v0.debug(formatter, "Group"), #[cfg(feature = "full")] - Expr::Let(v0) => v0.debug(formatter, "Let"), - Expr::Lit(v0) => v0.debug(formatter, "Lit"), + crate::Expr::If(v0) => v0.debug(formatter, "If"), + crate::Expr::Index(v0) => v0.debug(formatter, "Index"), #[cfg(feature = "full")] - Expr::Loop(v0) => v0.debug(formatter, "Loop"), - Expr::Macro(v0) => v0.debug(formatter, "Macro"), + crate::Expr::Infer(v0) => v0.debug(formatter, "Infer"), #[cfg(feature = "full")] - Expr::Match(v0) => v0.debug(formatter, "Match"), + crate::Expr::Let(v0) => v0.debug(formatter, "Let"), + crate::Expr::Lit(v0) => v0.debug(formatter, "Lit"), #[cfg(feature = "full")] - Expr::MethodCall(v0) => v0.debug(formatter, "MethodCall"), - Expr::Paren(v0) => v0.debug(formatter, "Paren"), - Expr::Path(v0) => v0.debug(formatter, "Path"), + crate::Expr::Loop(v0) => v0.debug(formatter, "Loop"), + crate::Expr::Macro(v0) => v0.debug(formatter, "Macro"), #[cfg(feature = "full")] - Expr::Range(v0) => v0.debug(formatter, "Range"), + crate::Expr::Match(v0) => v0.debug(formatter, "Match"), + crate::Expr::MethodCall(v0) => v0.debug(formatter, "MethodCall"), + crate::Expr::Paren(v0) => v0.debug(formatter, "Paren"), + crate::Expr::Path(v0) => v0.debug(formatter, "Path"), #[cfg(feature = "full")] - Expr::Reference(v0) => v0.debug(formatter, "Reference"), + crate::Expr::Range(v0) => v0.debug(formatter, "Range"), + crate::Expr::Reference(v0) => v0.debug(formatter, "Reference"), #[cfg(feature = "full")] - Expr::Repeat(v0) => v0.debug(formatter, "Repeat"), + crate::Expr::Repeat(v0) => v0.debug(formatter, "Repeat"), #[cfg(feature = "full")] - Expr::Return(v0) => v0.debug(formatter, "Return"), + crate::Expr::Return(v0) => v0.debug(formatter, "Return"), + crate::Expr::Struct(v0) => v0.debug(formatter, "Struct"), #[cfg(feature = "full")] - Expr::Struct(v0) => v0.debug(formatter, "Struct"), + crate::Expr::Try(v0) => v0.debug(formatter, "Try"), #[cfg(feature = "full")] - Expr::Try(v0) => v0.debug(formatter, "Try"), + crate::Expr::TryBlock(v0) => v0.debug(formatter, "TryBlock"), #[cfg(feature = "full")] - Expr::TryBlock(v0) => v0.debug(formatter, "TryBlock"), + crate::Expr::Tuple(v0) => v0.debug(formatter, "Tuple"), + crate::Expr::Unary(v0) => v0.debug(formatter, "Unary"), #[cfg(feature = "full")] - Expr::Tuple(v0) => v0.debug(formatter, "Tuple"), - Expr::Unary(v0) => v0.debug(formatter, "Unary"), - #[cfg(feature = "full")] - Expr::Unsafe(v0) => v0.debug(formatter, "Unsafe"), - Expr::Verbatim(v0) => { + crate::Expr::Unsafe(v0) => v0.debug(formatter, "Unsafe"), + crate::Expr::Verbatim(v0) => { let mut formatter = formatter.debug_tuple("Verbatim"); formatter.field(v0); formatter.finish() } #[cfg(feature = "full")] - Expr::While(v0) => v0.debug(formatter, "While"), + crate::Expr::While(v0) => v0.debug(formatter, "While"), #[cfg(feature = "full")] - Expr::Yield(v0) => v0.debug(formatter, "Yield"), + crate::Expr::Yield(v0) => v0.debug(formatter, "Yield"), #[cfg(not(feature = "full"))] _ => unreachable!(), } @@ -471,654 +472,692 @@ impl Debug for Expr { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprArray { +impl Debug for crate::ExprArray { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprArray { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("bracket_token", &self.bracket_token); - formatter.field("elems", &self.elems); - formatter.finish() - } - } self.debug(formatter, "ExprArray") } } #[cfg(feature = "full")] +impl crate::ExprArray { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("bracket_token", &self.bracket_token); + formatter.field("elems", &self.elems); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprAssign { +impl Debug for crate::ExprAssign { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprAssign { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("left", &self.left); - formatter.field("eq_token", &self.eq_token); - formatter.field("right", &self.right); - formatter.finish() - } - } self.debug(formatter, "ExprAssign") } } #[cfg(feature = "full")] +impl crate::ExprAssign { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("left", &self.left); + formatter.field("eq_token", &self.eq_token); + formatter.field("right", &self.right); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprAsync { +impl Debug for crate::ExprAsync { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprAsync { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("async_token", &self.async_token); - formatter.field("capture", &self.capture); - formatter.field("block", &self.block); - formatter.finish() - } - } self.debug(formatter, "ExprAsync") } } #[cfg(feature = "full")] +impl crate::ExprAsync { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("async_token", &self.async_token); + formatter.field("capture", &self.capture); + formatter.field("block", &self.block); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprAwait { +impl Debug for crate::ExprAwait { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprAwait { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("base", &self.base); - formatter.field("dot_token", &self.dot_token); - formatter.field("await_token", &self.await_token); - formatter.finish() - } - } self.debug(formatter, "ExprAwait") } } +#[cfg(feature = "full")] +impl crate::ExprAwait { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("base", &self.base); + formatter.field("dot_token", &self.dot_token); + formatter.field("await_token", &self.await_token); + formatter.finish() + } +} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprBinary { +impl Debug for crate::ExprBinary { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprBinary { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("left", &self.left); - formatter.field("op", &self.op); - formatter.field("right", &self.right); - formatter.finish() - } - } self.debug(formatter, "ExprBinary") } } +#[cfg(any(feature = "derive", feature = "full"))] +impl crate::ExprBinary { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("left", &self.left); + formatter.field("op", &self.op); + formatter.field("right", &self.right); + formatter.finish() + } +} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprBlock { +impl Debug for crate::ExprBlock { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprBlock { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("label", &self.label); - formatter.field("block", &self.block); - formatter.finish() - } - } self.debug(formatter, "ExprBlock") } } #[cfg(feature = "full")] +impl crate::ExprBlock { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("label", &self.label); + formatter.field("block", &self.block); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprBreak { +impl Debug for crate::ExprBreak { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprBreak { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("break_token", &self.break_token); - formatter.field("label", &self.label); - formatter.field("expr", &self.expr); - formatter.finish() - } - } self.debug(formatter, "ExprBreak") } } +#[cfg(feature = "full")] +impl crate::ExprBreak { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("break_token", &self.break_token); + formatter.field("label", &self.label); + formatter.field("expr", &self.expr); + formatter.finish() + } +} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprCall { +impl Debug for crate::ExprCall { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprCall { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("func", &self.func); - formatter.field("paren_token", &self.paren_token); - formatter.field("args", &self.args); - formatter.finish() - } - } self.debug(formatter, "ExprCall") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::ExprCall { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("func", &self.func); + formatter.field("paren_token", &self.paren_token); + formatter.field("args", &self.args); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprCast { +impl Debug for crate::ExprCast { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprCast { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("expr", &self.expr); - formatter.field("as_token", &self.as_token); - formatter.field("ty", &self.ty); - formatter.finish() - } - } self.debug(formatter, "ExprCast") } } +#[cfg(any(feature = "derive", feature = "full"))] +impl crate::ExprCast { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("expr", &self.expr); + formatter.field("as_token", &self.as_token); + formatter.field("ty", &self.ty); + formatter.finish() + } +} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprClosure { +impl Debug for crate::ExprClosure { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprClosure { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("lifetimes", &self.lifetimes); - formatter.field("constness", &self.constness); - formatter.field("movability", &self.movability); - formatter.field("asyncness", &self.asyncness); - formatter.field("capture", &self.capture); - formatter.field("or1_token", &self.or1_token); - formatter.field("inputs", &self.inputs); - formatter.field("or2_token", &self.or2_token); - formatter.field("output", &self.output); - formatter.field("body", &self.body); - formatter.finish() - } - } self.debug(formatter, "ExprClosure") } } #[cfg(feature = "full")] +impl crate::ExprClosure { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("lifetimes", &self.lifetimes); + formatter.field("constness", &self.constness); + formatter.field("movability", &self.movability); + formatter.field("asyncness", &self.asyncness); + formatter.field("capture", &self.capture); + formatter.field("or1_token", &self.or1_token); + formatter.field("inputs", &self.inputs); + formatter.field("or2_token", &self.or2_token); + formatter.field("output", &self.output); + formatter.field("body", &self.body); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprConst { +impl Debug for crate::ExprConst { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprConst { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("const_token", &self.const_token); - formatter.field("block", &self.block); - formatter.finish() - } - } self.debug(formatter, "ExprConst") } } #[cfg(feature = "full")] +impl crate::ExprConst { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("const_token", &self.const_token); + formatter.field("block", &self.block); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprContinue { +impl Debug for crate::ExprContinue { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprContinue { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("continue_token", &self.continue_token); - formatter.field("label", &self.label); - formatter.finish() - } - } self.debug(formatter, "ExprContinue") } } +#[cfg(feature = "full")] +impl crate::ExprContinue { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("continue_token", &self.continue_token); + formatter.field("label", &self.label); + formatter.finish() + } +} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprField { +impl Debug for crate::ExprField { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprField { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("base", &self.base); - formatter.field("dot_token", &self.dot_token); - formatter.field("member", &self.member); - formatter.finish() - } - } self.debug(formatter, "ExprField") } } +#[cfg(any(feature = "derive", feature = "full"))] +impl crate::ExprField { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("base", &self.base); + formatter.field("dot_token", &self.dot_token); + formatter.field("member", &self.member); + formatter.finish() + } +} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprForLoop { +impl Debug for crate::ExprForLoop { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprForLoop { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("label", &self.label); - formatter.field("for_token", &self.for_token); - formatter.field("pat", &self.pat); - formatter.field("in_token", &self.in_token); - formatter.field("expr", &self.expr); - formatter.field("body", &self.body); - formatter.finish() - } - } self.debug(formatter, "ExprForLoop") } } +#[cfg(feature = "full")] +impl crate::ExprForLoop { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("label", &self.label); + formatter.field("for_token", &self.for_token); + formatter.field("pat", &self.pat); + formatter.field("in_token", &self.in_token); + formatter.field("expr", &self.expr); + formatter.field("body", &self.body); + formatter.finish() + } +} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprGroup { +impl Debug for crate::ExprGroup { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprGroup { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("group_token", &self.group_token); - formatter.field("expr", &self.expr); - formatter.finish() - } - } self.debug(formatter, "ExprGroup") } } +#[cfg(any(feature = "derive", feature = "full"))] +impl crate::ExprGroup { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("group_token", &self.group_token); + formatter.field("expr", &self.expr); + formatter.finish() + } +} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprIf { +impl Debug for crate::ExprIf { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprIf { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("if_token", &self.if_token); - formatter.field("cond", &self.cond); - formatter.field("then_branch", &self.then_branch); - formatter.field("else_branch", &self.else_branch); - formatter.finish() - } - } self.debug(formatter, "ExprIf") } } +#[cfg(feature = "full")] +impl crate::ExprIf { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("if_token", &self.if_token); + formatter.field("cond", &self.cond); + formatter.field("then_branch", &self.then_branch); + formatter.field("else_branch", &self.else_branch); + formatter.finish() + } +} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprIndex { +impl Debug for crate::ExprIndex { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprIndex { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("expr", &self.expr); - formatter.field("bracket_token", &self.bracket_token); - formatter.field("index", &self.index); - formatter.finish() - } - } self.debug(formatter, "ExprIndex") } } +#[cfg(any(feature = "derive", feature = "full"))] +impl crate::ExprIndex { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("expr", &self.expr); + formatter.field("bracket_token", &self.bracket_token); + formatter.field("index", &self.index); + formatter.finish() + } +} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprInfer { +impl Debug for crate::ExprInfer { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprInfer { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("underscore_token", &self.underscore_token); - formatter.finish() - } - } self.debug(formatter, "ExprInfer") } } #[cfg(feature = "full")] +impl crate::ExprInfer { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("underscore_token", &self.underscore_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprLet { +impl Debug for crate::ExprLet { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprLet { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("let_token", &self.let_token); - formatter.field("pat", &self.pat); - formatter.field("eq_token", &self.eq_token); - formatter.field("expr", &self.expr); - formatter.finish() - } - } self.debug(formatter, "ExprLet") } } +#[cfg(feature = "full")] +impl crate::ExprLet { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("let_token", &self.let_token); + formatter.field("pat", &self.pat); + formatter.field("eq_token", &self.eq_token); + formatter.field("expr", &self.expr); + formatter.finish() + } +} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprLit { +impl Debug for crate::ExprLit { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprLit { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("lit", &self.lit); - formatter.finish() - } - } self.debug(formatter, "ExprLit") } } +#[cfg(any(feature = "derive", feature = "full"))] +impl crate::ExprLit { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("lit", &self.lit); + formatter.finish() + } +} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprLoop { +impl Debug for crate::ExprLoop { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprLoop { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("label", &self.label); - formatter.field("loop_token", &self.loop_token); - formatter.field("body", &self.body); - formatter.finish() - } - } self.debug(formatter, "ExprLoop") } } +#[cfg(feature = "full")] +impl crate::ExprLoop { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("label", &self.label); + formatter.field("loop_token", &self.loop_token); + formatter.field("body", &self.body); + formatter.finish() + } +} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprMacro { +impl Debug for crate::ExprMacro { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprMacro { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("mac", &self.mac); - formatter.finish() - } - } self.debug(formatter, "ExprMacro") } } +#[cfg(any(feature = "derive", feature = "full"))] +impl crate::ExprMacro { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("mac", &self.mac); + formatter.finish() + } +} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprMatch { +impl Debug for crate::ExprMatch { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprMatch { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("match_token", &self.match_token); - formatter.field("expr", &self.expr); - formatter.field("brace_token", &self.brace_token); - formatter.field("arms", &self.arms); - formatter.finish() - } - } self.debug(formatter, "ExprMatch") } } #[cfg(feature = "full")] +impl crate::ExprMatch { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("match_token", &self.match_token); + formatter.field("expr", &self.expr); + formatter.field("brace_token", &self.brace_token); + formatter.field("arms", &self.arms); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprMethodCall { +impl Debug for crate::ExprMethodCall { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprMethodCall { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("receiver", &self.receiver); - formatter.field("dot_token", &self.dot_token); - formatter.field("method", &self.method); - formatter.field("turbofish", &self.turbofish); - formatter.field("paren_token", &self.paren_token); - formatter.field("args", &self.args); - formatter.finish() - } - } self.debug(formatter, "ExprMethodCall") } } #[cfg(any(feature = "derive", feature = "full"))] -#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprParen { - fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprParen { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("paren_token", &self.paren_token); - formatter.field("expr", &self.expr); - formatter.finish() - } - } +impl crate::ExprMethodCall { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("receiver", &self.receiver); + formatter.field("dot_token", &self.dot_token); + formatter.field("method", &self.method); + formatter.field("turbofish", &self.turbofish); + formatter.field("paren_token", &self.paren_token); + formatter.field("args", &self.args); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Debug for crate::ExprParen { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { self.debug(formatter, "ExprParen") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::ExprParen { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("paren_token", &self.paren_token); + formatter.field("expr", &self.expr); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprPath { +impl Debug for crate::ExprPath { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprPath { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("qself", &self.qself); - formatter.field("path", &self.path); - formatter.finish() - } - } self.debug(formatter, "ExprPath") } } +#[cfg(any(feature = "derive", feature = "full"))] +impl crate::ExprPath { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("qself", &self.qself); + formatter.field("path", &self.path); + formatter.finish() + } +} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprRange { +impl Debug for crate::ExprRange { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprRange { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("start", &self.start); - formatter.field("limits", &self.limits); - formatter.field("end", &self.end); - formatter.finish() - } - } self.debug(formatter, "ExprRange") } } #[cfg(feature = "full")] +impl crate::ExprRange { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("start", &self.start); + formatter.field("limits", &self.limits); + formatter.field("end", &self.end); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprReference { +impl Debug for crate::ExprReference { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprReference { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("and_token", &self.and_token); - formatter.field("mutability", &self.mutability); - formatter.field("expr", &self.expr); - formatter.finish() - } - } self.debug(formatter, "ExprReference") } } +#[cfg(any(feature = "derive", feature = "full"))] +impl crate::ExprReference { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("and_token", &self.and_token); + formatter.field("mutability", &self.mutability); + formatter.field("expr", &self.expr); + formatter.finish() + } +} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprRepeat { +impl Debug for crate::ExprRepeat { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprRepeat { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("bracket_token", &self.bracket_token); - formatter.field("expr", &self.expr); - formatter.field("semi_token", &self.semi_token); - formatter.field("len", &self.len); - formatter.finish() - } - } self.debug(formatter, "ExprRepeat") } } #[cfg(feature = "full")] +impl crate::ExprRepeat { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("bracket_token", &self.bracket_token); + formatter.field("expr", &self.expr); + formatter.field("semi_token", &self.semi_token); + formatter.field("len", &self.len); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprReturn { +impl Debug for crate::ExprReturn { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprReturn { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("return_token", &self.return_token); - formatter.field("expr", &self.expr); - formatter.finish() - } - } self.debug(formatter, "ExprReturn") } } #[cfg(feature = "full")] +impl crate::ExprReturn { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("return_token", &self.return_token); + formatter.field("expr", &self.expr); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprStruct { +impl Debug for crate::ExprStruct { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprStruct { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("qself", &self.qself); - formatter.field("path", &self.path); - formatter.field("brace_token", &self.brace_token); - formatter.field("fields", &self.fields); - formatter.field("dot2_token", &self.dot2_token); - formatter.field("rest", &self.rest); - formatter.finish() - } - } self.debug(formatter, "ExprStruct") } } +#[cfg(any(feature = "derive", feature = "full"))] +impl crate::ExprStruct { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("qself", &self.qself); + formatter.field("path", &self.path); + formatter.field("brace_token", &self.brace_token); + formatter.field("fields", &self.fields); + formatter.field("dot2_token", &self.dot2_token); + formatter.field("rest", &self.rest); + formatter.finish() + } +} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprTry { +impl Debug for crate::ExprTry { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprTry { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("expr", &self.expr); - formatter.field("question_token", &self.question_token); - formatter.finish() - } - } self.debug(formatter, "ExprTry") } } #[cfg(feature = "full")] +impl crate::ExprTry { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("expr", &self.expr); + formatter.field("question_token", &self.question_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprTryBlock { +impl Debug for crate::ExprTryBlock { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprTryBlock { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("try_token", &self.try_token); - formatter.field("block", &self.block); - formatter.finish() - } - } self.debug(formatter, "ExprTryBlock") } } #[cfg(feature = "full")] +impl crate::ExprTryBlock { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("try_token", &self.try_token); + formatter.field("block", &self.block); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprTuple { +impl Debug for crate::ExprTuple { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprTuple { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("paren_token", &self.paren_token); - formatter.field("elems", &self.elems); - formatter.finish() - } - } self.debug(formatter, "ExprTuple") } } +#[cfg(feature = "full")] +impl crate::ExprTuple { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("paren_token", &self.paren_token); + formatter.field("elems", &self.elems); + formatter.finish() + } +} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprUnary { +impl Debug for crate::ExprUnary { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprUnary { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("op", &self.op); - formatter.field("expr", &self.expr); - formatter.finish() - } - } self.debug(formatter, "ExprUnary") } } +#[cfg(any(feature = "derive", feature = "full"))] +impl crate::ExprUnary { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("op", &self.op); + formatter.field("expr", &self.expr); + formatter.finish() + } +} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprUnsafe { +impl Debug for crate::ExprUnsafe { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprUnsafe { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("unsafe_token", &self.unsafe_token); - formatter.field("block", &self.block); - formatter.finish() - } - } self.debug(formatter, "ExprUnsafe") } } #[cfg(feature = "full")] +impl crate::ExprUnsafe { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("unsafe_token", &self.unsafe_token); + formatter.field("block", &self.block); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprWhile { +impl Debug for crate::ExprWhile { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprWhile { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("label", &self.label); - formatter.field("while_token", &self.while_token); - formatter.field("cond", &self.cond); - formatter.field("body", &self.body); - formatter.finish() - } - } self.debug(formatter, "ExprWhile") } } #[cfg(feature = "full")] +impl crate::ExprWhile { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("label", &self.label); + formatter.field("while_token", &self.while_token); + formatter.field("cond", &self.cond); + formatter.field("body", &self.body); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ExprYield { +impl Debug for crate::ExprYield { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ExprYield { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("yield_token", &self.yield_token); - formatter.field("expr", &self.expr); - formatter.finish() - } - } self.debug(formatter, "ExprYield") } } +#[cfg(feature = "full")] +impl crate::ExprYield { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("yield_token", &self.yield_token); + formatter.field("expr", &self.expr); + formatter.finish() + } +} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Field { +impl Debug for crate::Field { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("Field"); formatter.field("attrs", &self.attrs); @@ -1132,17 +1171,17 @@ impl Debug for Field { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for FieldMutability { +impl Debug for crate::FieldMutability { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("FieldMutability::")?; match self { - FieldMutability::None => formatter.write_str("None"), + crate::FieldMutability::None => formatter.write_str("None"), } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for FieldPat { +impl Debug for crate::FieldPat { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("FieldPat"); formatter.field("attrs", &self.attrs); @@ -1152,9 +1191,9 @@ impl Debug for FieldPat { formatter.finish() } } -#[cfg(feature = "full")] +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for FieldValue { +impl Debug for crate::FieldValue { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("FieldValue"); formatter.field("attrs", &self.attrs); @@ -1166,49 +1205,51 @@ impl Debug for FieldValue { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Fields { +impl Debug for crate::Fields { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("Fields::")?; match self { - Fields::Named(v0) => v0.debug(formatter, "Named"), - Fields::Unnamed(v0) => v0.debug(formatter, "Unnamed"), - Fields::Unit => formatter.write_str("Unit"), + crate::Fields::Named(v0) => v0.debug(formatter, "Named"), + crate::Fields::Unnamed(v0) => v0.debug(formatter, "Unnamed"), + crate::Fields::Unit => formatter.write_str("Unit"), } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for FieldsNamed { +impl Debug for crate::FieldsNamed { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl FieldsNamed { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("brace_token", &self.brace_token); - formatter.field("named", &self.named); - formatter.finish() - } - } self.debug(formatter, "FieldsNamed") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::FieldsNamed { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("brace_token", &self.brace_token); + formatter.field("named", &self.named); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for FieldsUnnamed { +impl Debug for crate::FieldsUnnamed { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl FieldsUnnamed { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("paren_token", &self.paren_token); - formatter.field("unnamed", &self.unnamed); - formatter.finish() - } - } self.debug(formatter, "FieldsUnnamed") } } +#[cfg(any(feature = "derive", feature = "full"))] +impl crate::FieldsUnnamed { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("paren_token", &self.paren_token); + formatter.field("unnamed", &self.unnamed); + formatter.finish() + } +} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for File { +impl Debug for crate::File { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("File"); formatter.field("shebang", &self.shebang); @@ -1219,16 +1260,16 @@ impl Debug for File { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for FnArg { +impl Debug for crate::FnArg { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("FnArg::")?; match self { - FnArg::Receiver(v0) => { + crate::FnArg::Receiver(v0) => { let mut formatter = formatter.debug_tuple("Receiver"); formatter.field(v0); formatter.finish() } - FnArg::Typed(v0) => { + crate::FnArg::Typed(v0) => { let mut formatter = formatter.debug_tuple("Typed"); formatter.field(v0); formatter.finish() @@ -1238,15 +1279,15 @@ impl Debug for FnArg { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ForeignItem { +impl Debug for crate::ForeignItem { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("ForeignItem::")?; match self { - ForeignItem::Fn(v0) => v0.debug(formatter, "Fn"), - ForeignItem::Static(v0) => v0.debug(formatter, "Static"), - ForeignItem::Type(v0) => v0.debug(formatter, "Type"), - ForeignItem::Macro(v0) => v0.debug(formatter, "Macro"), - ForeignItem::Verbatim(v0) => { + crate::ForeignItem::Fn(v0) => v0.debug(formatter, "Fn"), + crate::ForeignItem::Static(v0) => v0.debug(formatter, "Static"), + crate::ForeignItem::Type(v0) => v0.debug(formatter, "Type"), + crate::ForeignItem::Macro(v0) => v0.debug(formatter, "Macro"), + crate::ForeignItem::Verbatim(v0) => { let mut formatter = formatter.debug_tuple("Verbatim"); formatter.field(v0); formatter.finish() @@ -1256,109 +1297,113 @@ impl Debug for ForeignItem { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ForeignItemFn { +impl Debug for crate::ForeignItemFn { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ForeignItemFn { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("sig", &self.sig); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "ForeignItemFn") } } #[cfg(feature = "full")] +impl crate::ForeignItemFn { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("sig", &self.sig); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ForeignItemMacro { +impl Debug for crate::ForeignItemMacro { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ForeignItemMacro { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("mac", &self.mac); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "ForeignItemMacro") } } #[cfg(feature = "full")] +impl crate::ForeignItemMacro { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("mac", &self.mac); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ForeignItemStatic { +impl Debug for crate::ForeignItemStatic { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ForeignItemStatic { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("static_token", &self.static_token); - formatter.field("mutability", &self.mutability); - formatter.field("ident", &self.ident); - formatter.field("colon_token", &self.colon_token); - formatter.field("ty", &self.ty); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "ForeignItemStatic") } } #[cfg(feature = "full")] +impl crate::ForeignItemStatic { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("static_token", &self.static_token); + formatter.field("mutability", &self.mutability); + formatter.field("ident", &self.ident); + formatter.field("colon_token", &self.colon_token); + formatter.field("ty", &self.ty); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ForeignItemType { +impl Debug for crate::ForeignItemType { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ForeignItemType { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("type_token", &self.type_token); - formatter.field("ident", &self.ident); - formatter.field("generics", &self.generics); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "ForeignItemType") } } +#[cfg(feature = "full")] +impl crate::ForeignItemType { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("type_token", &self.type_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for GenericArgument { +impl Debug for crate::GenericArgument { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("GenericArgument::")?; match self { - GenericArgument::Lifetime(v0) => { + crate::GenericArgument::Lifetime(v0) => { let mut formatter = formatter.debug_tuple("Lifetime"); formatter.field(v0); formatter.finish() } - GenericArgument::Type(v0) => { + crate::GenericArgument::Type(v0) => { let mut formatter = formatter.debug_tuple("Type"); formatter.field(v0); formatter.finish() } - GenericArgument::Const(v0) => { + crate::GenericArgument::Const(v0) => { let mut formatter = formatter.debug_tuple("Const"); formatter.field(v0); formatter.finish() } - GenericArgument::AssocType(v0) => { + crate::GenericArgument::AssocType(v0) => { let mut formatter = formatter.debug_tuple("AssocType"); formatter.field(v0); formatter.finish() } - GenericArgument::AssocConst(v0) => { + crate::GenericArgument::AssocConst(v0) => { let mut formatter = formatter.debug_tuple("AssocConst"); formatter.field(v0); formatter.finish() } - GenericArgument::Constraint(v0) => { + crate::GenericArgument::Constraint(v0) => { let mut formatter = formatter.debug_tuple("Constraint"); formatter.field(v0); formatter.finish() @@ -1368,21 +1413,21 @@ impl Debug for GenericArgument { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for GenericParam { +impl Debug for crate::GenericParam { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("GenericParam::")?; match self { - GenericParam::Lifetime(v0) => { + crate::GenericParam::Lifetime(v0) => { let mut formatter = formatter.debug_tuple("Lifetime"); formatter.field(v0); formatter.finish() } - GenericParam::Type(v0) => { + crate::GenericParam::Type(v0) => { let mut formatter = formatter.debug_tuple("Type"); formatter.field(v0); formatter.finish() } - GenericParam::Const(v0) => { + crate::GenericParam::Const(v0) => { let mut formatter = formatter.debug_tuple("Const"); formatter.field(v0); formatter.finish() @@ -1392,7 +1437,7 @@ impl Debug for GenericParam { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Generics { +impl Debug for crate::Generics { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("Generics"); formatter.field("lt_token", &self.lt_token); @@ -1404,15 +1449,15 @@ impl Debug for Generics { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ImplItem { +impl Debug for crate::ImplItem { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("ImplItem::")?; match self { - ImplItem::Const(v0) => v0.debug(formatter, "Const"), - ImplItem::Fn(v0) => v0.debug(formatter, "Fn"), - ImplItem::Type(v0) => v0.debug(formatter, "Type"), - ImplItem::Macro(v0) => v0.debug(formatter, "Macro"), - ImplItem::Verbatim(v0) => { + crate::ImplItem::Const(v0) => v0.debug(formatter, "Const"), + crate::ImplItem::Fn(v0) => v0.debug(formatter, "Fn"), + crate::ImplItem::Type(v0) => v0.debug(formatter, "Type"), + crate::ImplItem::Macro(v0) => v0.debug(formatter, "Macro"), + crate::ImplItem::Verbatim(v0) => { let mut formatter = formatter.debug_tuple("Verbatim"); formatter.field(v0); formatter.finish() @@ -1422,94 +1467,98 @@ impl Debug for ImplItem { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ImplItemConst { +impl Debug for crate::ImplItemConst { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ImplItemConst { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("defaultness", &self.defaultness); - formatter.field("const_token", &self.const_token); - formatter.field("ident", &self.ident); - formatter.field("generics", &self.generics); - formatter.field("colon_token", &self.colon_token); - formatter.field("ty", &self.ty); - formatter.field("eq_token", &self.eq_token); - formatter.field("expr", &self.expr); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "ImplItemConst") } } #[cfg(feature = "full")] +impl crate::ImplItemConst { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("defaultness", &self.defaultness); + formatter.field("const_token", &self.const_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("colon_token", &self.colon_token); + formatter.field("ty", &self.ty); + formatter.field("eq_token", &self.eq_token); + formatter.field("expr", &self.expr); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ImplItemFn { +impl Debug for crate::ImplItemFn { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ImplItemFn { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("defaultness", &self.defaultness); - formatter.field("sig", &self.sig); - formatter.field("block", &self.block); - formatter.finish() - } - } self.debug(formatter, "ImplItemFn") } } #[cfg(feature = "full")] +impl crate::ImplItemFn { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("defaultness", &self.defaultness); + formatter.field("sig", &self.sig); + formatter.field("block", &self.block); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ImplItemMacro { +impl Debug for crate::ImplItemMacro { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ImplItemMacro { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("mac", &self.mac); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "ImplItemMacro") } } #[cfg(feature = "full")] +impl crate::ImplItemMacro { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("mac", &self.mac); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ImplItemType { +impl Debug for crate::ImplItemType { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ImplItemType { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("defaultness", &self.defaultness); - formatter.field("type_token", &self.type_token); - formatter.field("ident", &self.ident); - formatter.field("generics", &self.generics); - formatter.field("eq_token", &self.eq_token); - formatter.field("ty", &self.ty); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "ImplItemType") } } #[cfg(feature = "full")] +impl crate::ImplItemType { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("defaultness", &self.defaultness); + formatter.field("type_token", &self.type_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("eq_token", &self.eq_token); + formatter.field("ty", &self.ty); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ImplRestriction { +impl Debug for crate::ImplRestriction { fn fmt(&self, _formatter: &mut fmt::Formatter) -> fmt::Result { match *self {} } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Index { +impl Debug for crate::Index { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("Index"); formatter.field("index", &self.index); @@ -1519,26 +1568,26 @@ impl Debug for Index { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Item { +impl Debug for crate::Item { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("Item::")?; match self { - Item::Const(v0) => v0.debug(formatter, "Const"), - Item::Enum(v0) => v0.debug(formatter, "Enum"), - Item::ExternCrate(v0) => v0.debug(formatter, "ExternCrate"), - Item::Fn(v0) => v0.debug(formatter, "Fn"), - Item::ForeignMod(v0) => v0.debug(formatter, "ForeignMod"), - Item::Impl(v0) => v0.debug(formatter, "Impl"), - Item::Macro(v0) => v0.debug(formatter, "Macro"), - Item::Mod(v0) => v0.debug(formatter, "Mod"), - Item::Static(v0) => v0.debug(formatter, "Static"), - Item::Struct(v0) => v0.debug(formatter, "Struct"), - Item::Trait(v0) => v0.debug(formatter, "Trait"), - Item::TraitAlias(v0) => v0.debug(formatter, "TraitAlias"), - Item::Type(v0) => v0.debug(formatter, "Type"), - Item::Union(v0) => v0.debug(formatter, "Union"), - Item::Use(v0) => v0.debug(formatter, "Use"), - Item::Verbatim(v0) => { + crate::Item::Const(v0) => v0.debug(formatter, "Const"), + crate::Item::Enum(v0) => v0.debug(formatter, "Enum"), + crate::Item::ExternCrate(v0) => v0.debug(formatter, "ExternCrate"), + crate::Item::Fn(v0) => v0.debug(formatter, "Fn"), + crate::Item::ForeignMod(v0) => v0.debug(formatter, "ForeignMod"), + crate::Item::Impl(v0) => v0.debug(formatter, "Impl"), + crate::Item::Macro(v0) => v0.debug(formatter, "Macro"), + crate::Item::Mod(v0) => v0.debug(formatter, "Mod"), + crate::Item::Static(v0) => v0.debug(formatter, "Static"), + crate::Item::Struct(v0) => v0.debug(formatter, "Struct"), + crate::Item::Trait(v0) => v0.debug(formatter, "Trait"), + crate::Item::TraitAlias(v0) => v0.debug(formatter, "TraitAlias"), + crate::Item::Type(v0) => v0.debug(formatter, "Type"), + crate::Item::Union(v0) => v0.debug(formatter, "Union"), + crate::Item::Use(v0) => v0.debug(formatter, "Use"), + crate::Item::Verbatim(v0) => { let mut formatter = formatter.debug_tuple("Verbatim"); formatter.field(v0); formatter.finish() @@ -1548,312 +1597,327 @@ impl Debug for Item { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ItemConst { +impl Debug for crate::ItemConst { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ItemConst { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("const_token", &self.const_token); - formatter.field("ident", &self.ident); - formatter.field("generics", &self.generics); - formatter.field("colon_token", &self.colon_token); - formatter.field("ty", &self.ty); - formatter.field("eq_token", &self.eq_token); - formatter.field("expr", &self.expr); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "ItemConst") } } #[cfg(feature = "full")] +impl crate::ItemConst { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("const_token", &self.const_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("colon_token", &self.colon_token); + formatter.field("ty", &self.ty); + formatter.field("eq_token", &self.eq_token); + formatter.field("expr", &self.expr); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ItemEnum { +impl Debug for crate::ItemEnum { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ItemEnum { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("enum_token", &self.enum_token); - formatter.field("ident", &self.ident); - formatter.field("generics", &self.generics); - formatter.field("brace_token", &self.brace_token); - formatter.field("variants", &self.variants); - formatter.finish() - } - } self.debug(formatter, "ItemEnum") } } #[cfg(feature = "full")] +impl crate::ItemEnum { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("enum_token", &self.enum_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("brace_token", &self.brace_token); + formatter.field("variants", &self.variants); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ItemExternCrate { +impl Debug for crate::ItemExternCrate { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ItemExternCrate { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("extern_token", &self.extern_token); - formatter.field("crate_token", &self.crate_token); - formatter.field("ident", &self.ident); - formatter.field("rename", &self.rename); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "ItemExternCrate") } } #[cfg(feature = "full")] +impl crate::ItemExternCrate { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("extern_token", &self.extern_token); + formatter.field("crate_token", &self.crate_token); + formatter.field("ident", &self.ident); + formatter.field("rename", &self.rename); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ItemFn { +impl Debug for crate::ItemFn { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ItemFn { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("sig", &self.sig); - formatter.field("block", &self.block); - formatter.finish() - } - } self.debug(formatter, "ItemFn") } } #[cfg(feature = "full")] +impl crate::ItemFn { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("sig", &self.sig); + formatter.field("block", &self.block); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ItemForeignMod { +impl Debug for crate::ItemForeignMod { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ItemForeignMod { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("unsafety", &self.unsafety); - formatter.field("abi", &self.abi); - formatter.field("brace_token", &self.brace_token); - formatter.field("items", &self.items); - formatter.finish() - } - } self.debug(formatter, "ItemForeignMod") } } #[cfg(feature = "full")] +impl crate::ItemForeignMod { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("unsafety", &self.unsafety); + formatter.field("abi", &self.abi); + formatter.field("brace_token", &self.brace_token); + formatter.field("items", &self.items); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ItemImpl { +impl Debug for crate::ItemImpl { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ItemImpl { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("defaultness", &self.defaultness); - formatter.field("unsafety", &self.unsafety); - formatter.field("impl_token", &self.impl_token); - formatter.field("generics", &self.generics); - formatter.field("trait_", &self.trait_); - formatter.field("self_ty", &self.self_ty); - formatter.field("brace_token", &self.brace_token); - formatter.field("items", &self.items); - formatter.finish() - } - } self.debug(formatter, "ItemImpl") } } #[cfg(feature = "full")] +impl crate::ItemImpl { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("defaultness", &self.defaultness); + formatter.field("unsafety", &self.unsafety); + formatter.field("impl_token", &self.impl_token); + formatter.field("generics", &self.generics); + formatter.field("trait_", &self.trait_); + formatter.field("self_ty", &self.self_ty); + formatter.field("brace_token", &self.brace_token); + formatter.field("items", &self.items); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ItemMacro { +impl Debug for crate::ItemMacro { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ItemMacro { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("ident", &self.ident); - formatter.field("mac", &self.mac); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "ItemMacro") } } #[cfg(feature = "full")] +impl crate::ItemMacro { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("ident", &self.ident); + formatter.field("mac", &self.mac); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ItemMod { +impl Debug for crate::ItemMod { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ItemMod { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("unsafety", &self.unsafety); - formatter.field("mod_token", &self.mod_token); - formatter.field("ident", &self.ident); - formatter.field("content", &self.content); - formatter.field("semi", &self.semi); - formatter.finish() - } - } self.debug(formatter, "ItemMod") } } #[cfg(feature = "full")] +impl crate::ItemMod { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("unsafety", &self.unsafety); + formatter.field("mod_token", &self.mod_token); + formatter.field("ident", &self.ident); + formatter.field("content", &self.content); + formatter.field("semi", &self.semi); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ItemStatic { +impl Debug for crate::ItemStatic { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ItemStatic { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("static_token", &self.static_token); - formatter.field("mutability", &self.mutability); - formatter.field("ident", &self.ident); - formatter.field("colon_token", &self.colon_token); - formatter.field("ty", &self.ty); - formatter.field("eq_token", &self.eq_token); - formatter.field("expr", &self.expr); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "ItemStatic") } } #[cfg(feature = "full")] +impl crate::ItemStatic { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("static_token", &self.static_token); + formatter.field("mutability", &self.mutability); + formatter.field("ident", &self.ident); + formatter.field("colon_token", &self.colon_token); + formatter.field("ty", &self.ty); + formatter.field("eq_token", &self.eq_token); + formatter.field("expr", &self.expr); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ItemStruct { +impl Debug for crate::ItemStruct { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ItemStruct { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("struct_token", &self.struct_token); - formatter.field("ident", &self.ident); - formatter.field("generics", &self.generics); - formatter.field("fields", &self.fields); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "ItemStruct") } } #[cfg(feature = "full")] +impl crate::ItemStruct { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("struct_token", &self.struct_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("fields", &self.fields); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ItemTrait { +impl Debug for crate::ItemTrait { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ItemTrait { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("unsafety", &self.unsafety); - formatter.field("auto_token", &self.auto_token); - formatter.field("restriction", &self.restriction); - formatter.field("trait_token", &self.trait_token); - formatter.field("ident", &self.ident); - formatter.field("generics", &self.generics); - formatter.field("colon_token", &self.colon_token); - formatter.field("supertraits", &self.supertraits); - formatter.field("brace_token", &self.brace_token); - formatter.field("items", &self.items); - formatter.finish() - } - } self.debug(formatter, "ItemTrait") } } #[cfg(feature = "full")] +impl crate::ItemTrait { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("unsafety", &self.unsafety); + formatter.field("auto_token", &self.auto_token); + formatter.field("restriction", &self.restriction); + formatter.field("trait_token", &self.trait_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("colon_token", &self.colon_token); + formatter.field("supertraits", &self.supertraits); + formatter.field("brace_token", &self.brace_token); + formatter.field("items", &self.items); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ItemTraitAlias { +impl Debug for crate::ItemTraitAlias { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ItemTraitAlias { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("trait_token", &self.trait_token); - formatter.field("ident", &self.ident); - formatter.field("generics", &self.generics); - formatter.field("eq_token", &self.eq_token); - formatter.field("bounds", &self.bounds); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "ItemTraitAlias") } } #[cfg(feature = "full")] +impl crate::ItemTraitAlias { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("trait_token", &self.trait_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("eq_token", &self.eq_token); + formatter.field("bounds", &self.bounds); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ItemType { +impl Debug for crate::ItemType { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ItemType { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("type_token", &self.type_token); - formatter.field("ident", &self.ident); - formatter.field("generics", &self.generics); - formatter.field("eq_token", &self.eq_token); - formatter.field("ty", &self.ty); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "ItemType") } } #[cfg(feature = "full")] +impl crate::ItemType { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("type_token", &self.type_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("eq_token", &self.eq_token); + formatter.field("ty", &self.ty); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ItemUnion { +impl Debug for crate::ItemUnion { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ItemUnion { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("union_token", &self.union_token); - formatter.field("ident", &self.ident); - formatter.field("generics", &self.generics); - formatter.field("fields", &self.fields); - formatter.finish() - } - } self.debug(formatter, "ItemUnion") } } #[cfg(feature = "full")] +impl crate::ItemUnion { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("union_token", &self.union_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("fields", &self.fields); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ItemUse { +impl Debug for crate::ItemUse { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ItemUse { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("vis", &self.vis); - formatter.field("use_token", &self.use_token); - formatter.field("leading_colon", &self.leading_colon); - formatter.field("tree", &self.tree); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "ItemUse") } } #[cfg(feature = "full")] +impl crate::ItemUse { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("vis", &self.vis); + formatter.field("use_token", &self.use_token); + formatter.field("leading_colon", &self.leading_colon); + formatter.field("tree", &self.tree); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Label { +impl Debug for crate::Label { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("Label"); formatter.field("name", &self.name); @@ -1862,22 +1926,22 @@ impl Debug for Label { } } #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Lifetime { +impl Debug for crate::Lifetime { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl Lifetime { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("apostrophe", &self.apostrophe); - formatter.field("ident", &self.ident); - formatter.finish() - } - } self.debug(formatter, "Lifetime") } } +impl crate::Lifetime { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("apostrophe", &self.apostrophe); + formatter.field("ident", &self.ident); + formatter.finish() + } +} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for LifetimeParam { +impl Debug for crate::LifetimeParam { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("LifetimeParam"); formatter.field("attrs", &self.attrs); @@ -1888,18 +1952,18 @@ impl Debug for LifetimeParam { } } #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Lit { +impl Debug for crate::Lit { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("Lit::")?; match self { - Lit::Str(v0) => v0.debug(formatter, "Str"), - Lit::ByteStr(v0) => v0.debug(formatter, "ByteStr"), - Lit::Byte(v0) => v0.debug(formatter, "Byte"), - Lit::Char(v0) => v0.debug(formatter, "Char"), - Lit::Int(v0) => v0.debug(formatter, "Int"), - Lit::Float(v0) => v0.debug(formatter, "Float"), - Lit::Bool(v0) => v0.debug(formatter, "Bool"), - Lit::Verbatim(v0) => { + crate::Lit::Str(v0) => v0.debug(formatter, "Str"), + crate::Lit::ByteStr(v0) => v0.debug(formatter, "ByteStr"), + crate::Lit::Byte(v0) => v0.debug(formatter, "Byte"), + crate::Lit::Char(v0) => v0.debug(formatter, "Char"), + crate::Lit::Int(v0) => v0.debug(formatter, "Int"), + crate::Lit::Float(v0) => v0.debug(formatter, "Float"), + crate::Lit::Bool(v0) => v0.debug(formatter, "Bool"), + crate::Lit::Verbatim(v0) => { let mut formatter = formatter.debug_tuple("Verbatim"); formatter.field(v0); formatter.finish() @@ -1909,25 +1973,26 @@ impl Debug for Lit { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Local { +impl Debug for crate::Local { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl Local { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("let_token", &self.let_token); - formatter.field("pat", &self.pat); - formatter.field("init", &self.init); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "Local") } } #[cfg(feature = "full")] +impl crate::Local { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("let_token", &self.let_token); + formatter.field("pat", &self.pat); + formatter.field("init", &self.init); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for LocalInit { +impl Debug for crate::LocalInit { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("LocalInit"); formatter.field("eq_token", &self.eq_token); @@ -1938,7 +2003,7 @@ impl Debug for LocalInit { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Macro { +impl Debug for crate::Macro { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("Macro"); formatter.field("path", &self.path); @@ -1950,21 +2015,21 @@ impl Debug for Macro { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for MacroDelimiter { +impl Debug for crate::MacroDelimiter { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("MacroDelimiter::")?; match self { - MacroDelimiter::Paren(v0) => { + crate::MacroDelimiter::Paren(v0) => { let mut formatter = formatter.debug_tuple("Paren"); formatter.field(v0); formatter.finish() } - MacroDelimiter::Brace(v0) => { + crate::MacroDelimiter::Brace(v0) => { let mut formatter = formatter.debug_tuple("Brace"); formatter.field(v0); formatter.finish() } - MacroDelimiter::Bracket(v0) => { + crate::MacroDelimiter::Bracket(v0) => { let mut formatter = formatter.debug_tuple("Bracket"); formatter.field(v0); formatter.finish() @@ -1974,16 +2039,16 @@ impl Debug for MacroDelimiter { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Member { +impl Debug for crate::Member { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("Member::")?; match self { - Member::Named(v0) => { + crate::Member::Named(v0) => { let mut formatter = formatter.debug_tuple("Named"); formatter.field(v0); formatter.finish() } - Member::Unnamed(v0) => { + crate::Member::Unnamed(v0) => { let mut formatter = formatter.debug_tuple("Unnamed"); formatter.field(v0); formatter.finish() @@ -1993,307 +2058,326 @@ impl Debug for Member { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Meta { +impl Debug for crate::Meta { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("Meta::")?; match self { - Meta::Path(v0) => v0.debug(formatter, "Path"), - Meta::List(v0) => v0.debug(formatter, "List"), - Meta::NameValue(v0) => v0.debug(formatter, "NameValue"), + crate::Meta::Path(v0) => v0.debug(formatter, "Path"), + crate::Meta::List(v0) => v0.debug(formatter, "List"), + crate::Meta::NameValue(v0) => v0.debug(formatter, "NameValue"), } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for MetaList { +impl Debug for crate::MetaList { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl MetaList { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("path", &self.path); - formatter.field("delimiter", &self.delimiter); - formatter.field("tokens", &self.tokens); - formatter.finish() - } - } self.debug(formatter, "MetaList") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::MetaList { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("path", &self.path); + formatter.field("delimiter", &self.delimiter); + formatter.field("tokens", &self.tokens); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for MetaNameValue { +impl Debug for crate::MetaNameValue { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl MetaNameValue { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("path", &self.path); - formatter.field("eq_token", &self.eq_token); - formatter.field("value", &self.value); - formatter.finish() - } - } self.debug(formatter, "MetaNameValue") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::MetaNameValue { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("path", &self.path); + formatter.field("eq_token", &self.eq_token); + formatter.field("value", &self.value); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ParenthesizedGenericArguments { +impl Debug for crate::ParenthesizedGenericArguments { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl ParenthesizedGenericArguments { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("paren_token", &self.paren_token); - formatter.field("inputs", &self.inputs); - formatter.field("output", &self.output); - formatter.finish() - } - } self.debug(formatter, "ParenthesizedGenericArguments") } } +#[cfg(any(feature = "derive", feature = "full"))] +impl crate::ParenthesizedGenericArguments { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("paren_token", &self.paren_token); + formatter.field("inputs", &self.inputs); + formatter.field("output", &self.output); + formatter.finish() + } +} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Pat { +impl Debug for crate::Pat { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("Pat::")?; match self { - Pat::Const(v0) => v0.debug(formatter, "Const"), - Pat::Ident(v0) => v0.debug(formatter, "Ident"), - Pat::Lit(v0) => v0.debug(formatter, "Lit"), - Pat::Macro(v0) => v0.debug(formatter, "Macro"), - Pat::Or(v0) => v0.debug(formatter, "Or"), - Pat::Paren(v0) => v0.debug(formatter, "Paren"), - Pat::Path(v0) => v0.debug(formatter, "Path"), - Pat::Range(v0) => v0.debug(formatter, "Range"), - Pat::Reference(v0) => v0.debug(formatter, "Reference"), - Pat::Rest(v0) => v0.debug(formatter, "Rest"), - Pat::Slice(v0) => v0.debug(formatter, "Slice"), - Pat::Struct(v0) => v0.debug(formatter, "Struct"), - Pat::Tuple(v0) => v0.debug(formatter, "Tuple"), - Pat::TupleStruct(v0) => v0.debug(formatter, "TupleStruct"), - Pat::Type(v0) => v0.debug(formatter, "Type"), - Pat::Verbatim(v0) => { + crate::Pat::Const(v0) => v0.debug(formatter, "Const"), + crate::Pat::Ident(v0) => v0.debug(formatter, "Ident"), + crate::Pat::Lit(v0) => v0.debug(formatter, "Lit"), + crate::Pat::Macro(v0) => v0.debug(formatter, "Macro"), + crate::Pat::Or(v0) => v0.debug(formatter, "Or"), + crate::Pat::Paren(v0) => v0.debug(formatter, "Paren"), + crate::Pat::Path(v0) => v0.debug(formatter, "Path"), + crate::Pat::Range(v0) => v0.debug(formatter, "Range"), + crate::Pat::Reference(v0) => v0.debug(formatter, "Reference"), + crate::Pat::Rest(v0) => v0.debug(formatter, "Rest"), + crate::Pat::Slice(v0) => v0.debug(formatter, "Slice"), + crate::Pat::Struct(v0) => v0.debug(formatter, "Struct"), + crate::Pat::Tuple(v0) => v0.debug(formatter, "Tuple"), + crate::Pat::TupleStruct(v0) => v0.debug(formatter, "TupleStruct"), + crate::Pat::Type(v0) => v0.debug(formatter, "Type"), + crate::Pat::Verbatim(v0) => { let mut formatter = formatter.debug_tuple("Verbatim"); formatter.field(v0); formatter.finish() } - Pat::Wild(v0) => v0.debug(formatter, "Wild"), + crate::Pat::Wild(v0) => v0.debug(formatter, "Wild"), } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for PatIdent { +impl Debug for crate::PatIdent { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl PatIdent { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("by_ref", &self.by_ref); - formatter.field("mutability", &self.mutability); - formatter.field("ident", &self.ident); - formatter.field("subpat", &self.subpat); - formatter.finish() - } - } self.debug(formatter, "PatIdent") } } #[cfg(feature = "full")] +impl crate::PatIdent { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("by_ref", &self.by_ref); + formatter.field("mutability", &self.mutability); + formatter.field("ident", &self.ident); + formatter.field("subpat", &self.subpat); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for PatOr { +impl Debug for crate::PatOr { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl PatOr { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("leading_vert", &self.leading_vert); - formatter.field("cases", &self.cases); - formatter.finish() - } - } self.debug(formatter, "PatOr") } } #[cfg(feature = "full")] +impl crate::PatOr { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("leading_vert", &self.leading_vert); + formatter.field("cases", &self.cases); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for PatParen { +impl Debug for crate::PatParen { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl PatParen { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("paren_token", &self.paren_token); - formatter.field("pat", &self.pat); - formatter.finish() - } - } self.debug(formatter, "PatParen") } } #[cfg(feature = "full")] +impl crate::PatParen { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("paren_token", &self.paren_token); + formatter.field("pat", &self.pat); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for PatReference { +impl Debug for crate::PatReference { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl PatReference { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("and_token", &self.and_token); - formatter.field("mutability", &self.mutability); - formatter.field("pat", &self.pat); - formatter.finish() - } - } self.debug(formatter, "PatReference") } } #[cfg(feature = "full")] +impl crate::PatReference { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("and_token", &self.and_token); + formatter.field("mutability", &self.mutability); + formatter.field("pat", &self.pat); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for PatRest { +impl Debug for crate::PatRest { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl PatRest { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("dot2_token", &self.dot2_token); - formatter.finish() - } - } self.debug(formatter, "PatRest") } } #[cfg(feature = "full")] +impl crate::PatRest { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("dot2_token", &self.dot2_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for PatSlice { +impl Debug for crate::PatSlice { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl PatSlice { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("bracket_token", &self.bracket_token); - formatter.field("elems", &self.elems); - formatter.finish() - } - } self.debug(formatter, "PatSlice") } } #[cfg(feature = "full")] +impl crate::PatSlice { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("bracket_token", &self.bracket_token); + formatter.field("elems", &self.elems); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for PatStruct { +impl Debug for crate::PatStruct { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl PatStruct { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("qself", &self.qself); - formatter.field("path", &self.path); - formatter.field("brace_token", &self.brace_token); - formatter.field("fields", &self.fields); - formatter.field("rest", &self.rest); - formatter.finish() - } - } self.debug(formatter, "PatStruct") } } #[cfg(feature = "full")] +impl crate::PatStruct { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("qself", &self.qself); + formatter.field("path", &self.path); + formatter.field("brace_token", &self.brace_token); + formatter.field("fields", &self.fields); + formatter.field("rest", &self.rest); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for PatTuple { +impl Debug for crate::PatTuple { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl PatTuple { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("paren_token", &self.paren_token); - formatter.field("elems", &self.elems); - formatter.finish() - } - } self.debug(formatter, "PatTuple") } } #[cfg(feature = "full")] +impl crate::PatTuple { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("paren_token", &self.paren_token); + formatter.field("elems", &self.elems); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for PatTupleStruct { +impl Debug for crate::PatTupleStruct { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl PatTupleStruct { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("qself", &self.qself); - formatter.field("path", &self.path); - formatter.field("paren_token", &self.paren_token); - formatter.field("elems", &self.elems); - formatter.finish() - } - } self.debug(formatter, "PatTupleStruct") } } #[cfg(feature = "full")] +impl crate::PatTupleStruct { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("qself", &self.qself); + formatter.field("path", &self.path); + formatter.field("paren_token", &self.paren_token); + formatter.field("elems", &self.elems); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for PatType { +impl Debug for crate::PatType { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl PatType { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("pat", &self.pat); - formatter.field("colon_token", &self.colon_token); - formatter.field("ty", &self.ty); - formatter.finish() - } - } self.debug(formatter, "PatType") } } #[cfg(feature = "full")] +impl crate::PatType { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("pat", &self.pat); + formatter.field("colon_token", &self.colon_token); + formatter.field("ty", &self.ty); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for PatWild { +impl Debug for crate::PatWild { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl PatWild { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("underscore_token", &self.underscore_token); - formatter.finish() - } - } self.debug(formatter, "PatWild") } } +#[cfg(feature = "full")] +impl crate::PatWild { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("underscore_token", &self.underscore_token); + formatter.finish() + } +} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Path { +impl Debug for crate::Path { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl Path { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("leading_colon", &self.leading_colon); - formatter.field("segments", &self.segments); - formatter.finish() - } - } self.debug(formatter, "Path") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::Path { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("leading_colon", &self.leading_colon); + formatter.field("segments", &self.segments); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for PathArguments { +impl Debug for crate::PathArguments { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("PathArguments::")?; match self { - PathArguments::None => formatter.write_str("None"), - PathArguments::AngleBracketed(v0) => v0.debug(formatter, "AngleBracketed"), - PathArguments::Parenthesized(v0) => v0.debug(formatter, "Parenthesized"), + crate::PathArguments::None => formatter.write_str("None"), + crate::PathArguments::AngleBracketed(v0) => { + v0.debug(formatter, "AngleBracketed") + } + crate::PathArguments::Parenthesized(v0) => { + v0.debug(formatter, "Parenthesized") + } } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for PathSegment { +impl Debug for crate::PathSegment { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("PathSegment"); formatter.field("ident", &self.ident); @@ -2303,7 +2387,7 @@ impl Debug for PathSegment { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for PredicateLifetime { +impl Debug for crate::PredicateLifetime { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("PredicateLifetime"); formatter.field("lifetime", &self.lifetime); @@ -2314,7 +2398,7 @@ impl Debug for PredicateLifetime { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for PredicateType { +impl Debug for crate::PredicateType { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("PredicateType"); formatter.field("lifetimes", &self.lifetimes); @@ -2326,7 +2410,7 @@ impl Debug for PredicateType { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for QSelf { +impl Debug for crate::QSelf { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("QSelf"); formatter.field("lt_token", &self.lt_token); @@ -2339,16 +2423,16 @@ impl Debug for QSelf { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for RangeLimits { +impl Debug for crate::RangeLimits { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("RangeLimits::")?; match self { - RangeLimits::HalfOpen(v0) => { + crate::RangeLimits::HalfOpen(v0) => { let mut formatter = formatter.debug_tuple("HalfOpen"); formatter.field(v0); formatter.finish() } - RangeLimits::Closed(v0) => { + crate::RangeLimits::Closed(v0) => { let mut formatter = formatter.debug_tuple("Closed"); formatter.field(v0); formatter.finish() @@ -2358,7 +2442,7 @@ impl Debug for RangeLimits { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Receiver { +impl Debug for crate::Receiver { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("Receiver"); formatter.field("attrs", &self.attrs); @@ -2372,12 +2456,12 @@ impl Debug for Receiver { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for ReturnType { +impl Debug for crate::ReturnType { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("ReturnType::")?; match self { - ReturnType::Default => formatter.write_str("Default"), - ReturnType::Type(v0, v1) => { + crate::ReturnType::Default => formatter.write_str("Default"), + crate::ReturnType::Type(v0, v1) => { let mut formatter = formatter.debug_tuple("Type"); formatter.field(v0); formatter.field(v1); @@ -2388,7 +2472,7 @@ impl Debug for ReturnType { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Signature { +impl Debug for crate::Signature { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("Signature"); formatter.field("constness", &self.constness); @@ -2407,60 +2491,61 @@ impl Debug for Signature { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for StaticMutability { +impl Debug for crate::StaticMutability { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("StaticMutability::")?; match self { - StaticMutability::Mut(v0) => { + crate::StaticMutability::Mut(v0) => { let mut formatter = formatter.debug_tuple("Mut"); formatter.field(v0); formatter.finish() } - StaticMutability::None => formatter.write_str("None"), + crate::StaticMutability::None => formatter.write_str("None"), } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Stmt { +impl Debug for crate::Stmt { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("Stmt::")?; match self { - Stmt::Local(v0) => v0.debug(formatter, "Local"), - Stmt::Item(v0) => { + crate::Stmt::Local(v0) => v0.debug(formatter, "Local"), + crate::Stmt::Item(v0) => { let mut formatter = formatter.debug_tuple("Item"); formatter.field(v0); formatter.finish() } - Stmt::Expr(v0, v1) => { + crate::Stmt::Expr(v0, v1) => { let mut formatter = formatter.debug_tuple("Expr"); formatter.field(v0); formatter.field(v1); formatter.finish() } - Stmt::Macro(v0) => v0.debug(formatter, "Macro"), + crate::Stmt::Macro(v0) => v0.debug(formatter, "Macro"), } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for StmtMacro { +impl Debug for crate::StmtMacro { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl StmtMacro { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("mac", &self.mac); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "StmtMacro") } } +#[cfg(feature = "full")] +impl crate::StmtMacro { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("mac", &self.mac); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TraitBound { +impl Debug for crate::TraitBound { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("TraitBound"); formatter.field("paren_token", &self.paren_token); @@ -2472,12 +2557,12 @@ impl Debug for TraitBound { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TraitBoundModifier { +impl Debug for crate::TraitBoundModifier { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("TraitBoundModifier::")?; match self { - TraitBoundModifier::None => formatter.write_str("None"), - TraitBoundModifier::Maybe(v0) => { + crate::TraitBoundModifier::None => formatter.write_str("None"), + crate::TraitBoundModifier::Maybe(v0) => { let mut formatter = formatter.debug_tuple("Maybe"); formatter.field(v0); formatter.finish() @@ -2487,15 +2572,15 @@ impl Debug for TraitBoundModifier { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TraitItem { +impl Debug for crate::TraitItem { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("TraitItem::")?; match self { - TraitItem::Const(v0) => v0.debug(formatter, "Const"), - TraitItem::Fn(v0) => v0.debug(formatter, "Fn"), - TraitItem::Type(v0) => v0.debug(formatter, "Type"), - TraitItem::Macro(v0) => v0.debug(formatter, "Macro"), - TraitItem::Verbatim(v0) => { + crate::TraitItem::Const(v0) => v0.debug(formatter, "Const"), + crate::TraitItem::Fn(v0) => v0.debug(formatter, "Fn"), + crate::TraitItem::Type(v0) => v0.debug(formatter, "Type"), + crate::TraitItem::Macro(v0) => v0.debug(formatter, "Macro"), + crate::TraitItem::Verbatim(v0) => { let mut formatter = formatter.debug_tuple("Verbatim"); formatter.field(v0); formatter.finish() @@ -2505,100 +2590,104 @@ impl Debug for TraitItem { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TraitItemConst { +impl Debug for crate::TraitItemConst { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TraitItemConst { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("const_token", &self.const_token); - formatter.field("ident", &self.ident); - formatter.field("generics", &self.generics); - formatter.field("colon_token", &self.colon_token); - formatter.field("ty", &self.ty); - formatter.field("default", &self.default); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "TraitItemConst") } } #[cfg(feature = "full")] +impl crate::TraitItemConst { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("const_token", &self.const_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("colon_token", &self.colon_token); + formatter.field("ty", &self.ty); + formatter.field("default", &self.default); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TraitItemFn { +impl Debug for crate::TraitItemFn { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TraitItemFn { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("sig", &self.sig); - formatter.field("default", &self.default); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "TraitItemFn") } } #[cfg(feature = "full")] +impl crate::TraitItemFn { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("sig", &self.sig); + formatter.field("default", &self.default); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TraitItemMacro { +impl Debug for crate::TraitItemMacro { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TraitItemMacro { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("mac", &self.mac); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "TraitItemMacro") } } #[cfg(feature = "full")] +impl crate::TraitItemMacro { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("mac", &self.mac); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} +#[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TraitItemType { +impl Debug for crate::TraitItemType { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TraitItemType { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("attrs", &self.attrs); - formatter.field("type_token", &self.type_token); - formatter.field("ident", &self.ident); - formatter.field("generics", &self.generics); - formatter.field("colon_token", &self.colon_token); - formatter.field("bounds", &self.bounds); - formatter.field("default", &self.default); - formatter.field("semi_token", &self.semi_token); - formatter.finish() - } - } self.debug(formatter, "TraitItemType") } } +#[cfg(feature = "full")] +impl crate::TraitItemType { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("attrs", &self.attrs); + formatter.field("type_token", &self.type_token); + formatter.field("ident", &self.ident); + formatter.field("generics", &self.generics); + formatter.field("colon_token", &self.colon_token); + formatter.field("bounds", &self.bounds); + formatter.field("default", &self.default); + formatter.field("semi_token", &self.semi_token); + formatter.finish() + } +} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Type { +impl Debug for crate::Type { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("Type::")?; match self { - Type::Array(v0) => v0.debug(formatter, "Array"), - Type::BareFn(v0) => v0.debug(formatter, "BareFn"), - Type::Group(v0) => v0.debug(formatter, "Group"), - Type::ImplTrait(v0) => v0.debug(formatter, "ImplTrait"), - Type::Infer(v0) => v0.debug(formatter, "Infer"), - Type::Macro(v0) => v0.debug(formatter, "Macro"), - Type::Never(v0) => v0.debug(formatter, "Never"), - Type::Paren(v0) => v0.debug(formatter, "Paren"), - Type::Path(v0) => v0.debug(formatter, "Path"), - Type::Ptr(v0) => v0.debug(formatter, "Ptr"), - Type::Reference(v0) => v0.debug(formatter, "Reference"), - Type::Slice(v0) => v0.debug(formatter, "Slice"), - Type::TraitObject(v0) => v0.debug(formatter, "TraitObject"), - Type::Tuple(v0) => v0.debug(formatter, "Tuple"), - Type::Verbatim(v0) => { + crate::Type::Array(v0) => v0.debug(formatter, "Array"), + crate::Type::BareFn(v0) => v0.debug(formatter, "BareFn"), + crate::Type::Group(v0) => v0.debug(formatter, "Group"), + crate::Type::ImplTrait(v0) => v0.debug(formatter, "ImplTrait"), + crate::Type::Infer(v0) => v0.debug(formatter, "Infer"), + crate::Type::Macro(v0) => v0.debug(formatter, "Macro"), + crate::Type::Never(v0) => v0.debug(formatter, "Never"), + crate::Type::Paren(v0) => v0.debug(formatter, "Paren"), + crate::Type::Path(v0) => v0.debug(formatter, "Path"), + crate::Type::Ptr(v0) => v0.debug(formatter, "Ptr"), + crate::Type::Reference(v0) => v0.debug(formatter, "Reference"), + crate::Type::Slice(v0) => v0.debug(formatter, "Slice"), + crate::Type::TraitObject(v0) => v0.debug(formatter, "TraitObject"), + crate::Type::Tuple(v0) => v0.debug(formatter, "Tuple"), + crate::Type::Verbatim(v0) => { let mut formatter = formatter.debug_tuple("Verbatim"); formatter.field(v0); formatter.finish() @@ -2608,117 +2697,124 @@ impl Debug for Type { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypeArray { +impl Debug for crate::TypeArray { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TypeArray { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("bracket_token", &self.bracket_token); - formatter.field("elem", &self.elem); - formatter.field("semi_token", &self.semi_token); - formatter.field("len", &self.len); - formatter.finish() - } - } self.debug(formatter, "TypeArray") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::TypeArray { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("bracket_token", &self.bracket_token); + formatter.field("elem", &self.elem); + formatter.field("semi_token", &self.semi_token); + formatter.field("len", &self.len); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypeBareFn { +impl Debug for crate::TypeBareFn { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TypeBareFn { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("lifetimes", &self.lifetimes); - formatter.field("unsafety", &self.unsafety); - formatter.field("abi", &self.abi); - formatter.field("fn_token", &self.fn_token); - formatter.field("paren_token", &self.paren_token); - formatter.field("inputs", &self.inputs); - formatter.field("variadic", &self.variadic); - formatter.field("output", &self.output); - formatter.finish() - } - } self.debug(formatter, "TypeBareFn") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::TypeBareFn { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("lifetimes", &self.lifetimes); + formatter.field("unsafety", &self.unsafety); + formatter.field("abi", &self.abi); + formatter.field("fn_token", &self.fn_token); + formatter.field("paren_token", &self.paren_token); + formatter.field("inputs", &self.inputs); + formatter.field("variadic", &self.variadic); + formatter.field("output", &self.output); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypeGroup { +impl Debug for crate::TypeGroup { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TypeGroup { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("group_token", &self.group_token); - formatter.field("elem", &self.elem); - formatter.finish() - } - } self.debug(formatter, "TypeGroup") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::TypeGroup { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("group_token", &self.group_token); + formatter.field("elem", &self.elem); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypeImplTrait { +impl Debug for crate::TypeImplTrait { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TypeImplTrait { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("impl_token", &self.impl_token); - formatter.field("bounds", &self.bounds); - formatter.finish() - } - } self.debug(formatter, "TypeImplTrait") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::TypeImplTrait { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("impl_token", &self.impl_token); + formatter.field("bounds", &self.bounds); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypeInfer { +impl Debug for crate::TypeInfer { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TypeInfer { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("underscore_token", &self.underscore_token); - formatter.finish() - } - } self.debug(formatter, "TypeInfer") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::TypeInfer { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("underscore_token", &self.underscore_token); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypeMacro { +impl Debug for crate::TypeMacro { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TypeMacro { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("mac", &self.mac); - formatter.finish() - } - } self.debug(formatter, "TypeMacro") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::TypeMacro { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("mac", &self.mac); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypeNever { +impl Debug for crate::TypeNever { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TypeNever { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("bang_token", &self.bang_token); - formatter.finish() - } - } self.debug(formatter, "TypeNever") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::TypeNever { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("bang_token", &self.bang_token); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypeParam { +impl Debug for crate::TypeParam { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("TypeParam"); formatter.field("attrs", &self.attrs); @@ -2732,17 +2828,17 @@ impl Debug for TypeParam { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypeParamBound { +impl Debug for crate::TypeParamBound { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("TypeParamBound::")?; match self { - TypeParamBound::Trait(v0) => { + crate::TypeParamBound::Trait(v0) => { let mut formatter = formatter.debug_tuple("Trait"); formatter.field(v0); formatter.finish() } - TypeParamBound::Lifetime(v0) => v0.debug(formatter, "Lifetime"), - TypeParamBound::Verbatim(v0) => { + crate::TypeParamBound::Lifetime(v0) => v0.debug(formatter, "Lifetime"), + crate::TypeParamBound::Verbatim(v0) => { let mut formatter = formatter.debug_tuple("Verbatim"); formatter.field(v0); formatter.finish() @@ -2752,130 +2848,137 @@ impl Debug for TypeParamBound { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypeParen { +impl Debug for crate::TypeParen { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TypeParen { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("paren_token", &self.paren_token); - formatter.field("elem", &self.elem); - formatter.finish() - } - } self.debug(formatter, "TypeParen") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::TypeParen { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("paren_token", &self.paren_token); + formatter.field("elem", &self.elem); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypePath { +impl Debug for crate::TypePath { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TypePath { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("qself", &self.qself); - formatter.field("path", &self.path); - formatter.finish() - } - } self.debug(formatter, "TypePath") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::TypePath { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("qself", &self.qself); + formatter.field("path", &self.path); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypePtr { +impl Debug for crate::TypePtr { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TypePtr { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("star_token", &self.star_token); - formatter.field("const_token", &self.const_token); - formatter.field("mutability", &self.mutability); - formatter.field("elem", &self.elem); - formatter.finish() - } - } self.debug(formatter, "TypePtr") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::TypePtr { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("star_token", &self.star_token); + formatter.field("const_token", &self.const_token); + formatter.field("mutability", &self.mutability); + formatter.field("elem", &self.elem); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypeReference { +impl Debug for crate::TypeReference { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TypeReference { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("and_token", &self.and_token); - formatter.field("lifetime", &self.lifetime); - formatter.field("mutability", &self.mutability); - formatter.field("elem", &self.elem); - formatter.finish() - } - } self.debug(formatter, "TypeReference") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::TypeReference { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("and_token", &self.and_token); + formatter.field("lifetime", &self.lifetime); + formatter.field("mutability", &self.mutability); + formatter.field("elem", &self.elem); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypeSlice { +impl Debug for crate::TypeSlice { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TypeSlice { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("bracket_token", &self.bracket_token); - formatter.field("elem", &self.elem); - formatter.finish() - } - } self.debug(formatter, "TypeSlice") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::TypeSlice { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("bracket_token", &self.bracket_token); + formatter.field("elem", &self.elem); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypeTraitObject { +impl Debug for crate::TypeTraitObject { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TypeTraitObject { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("dyn_token", &self.dyn_token); - formatter.field("bounds", &self.bounds); - formatter.finish() - } - } self.debug(formatter, "TypeTraitObject") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::TypeTraitObject { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("dyn_token", &self.dyn_token); + formatter.field("bounds", &self.bounds); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for TypeTuple { +impl Debug for crate::TypeTuple { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl TypeTuple { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("paren_token", &self.paren_token); - formatter.field("elems", &self.elems); - formatter.finish() - } - } self.debug(formatter, "TypeTuple") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::TypeTuple { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("paren_token", &self.paren_token); + formatter.field("elems", &self.elems); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for UnOp { +impl Debug for crate::UnOp { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("UnOp::")?; match self { - UnOp::Deref(v0) => { + crate::UnOp::Deref(v0) => { let mut formatter = formatter.debug_tuple("Deref"); formatter.field(v0); formatter.finish() } - UnOp::Not(v0) => { + crate::UnOp::Not(v0) => { let mut formatter = formatter.debug_tuple("Not"); formatter.field(v0); formatter.finish() } - UnOp::Neg(v0) => { + crate::UnOp::Neg(v0) => { let mut formatter = formatter.debug_tuple("Neg"); formatter.field(v0); formatter.finish() @@ -2885,7 +2988,7 @@ impl Debug for UnOp { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for UseGlob { +impl Debug for crate::UseGlob { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("UseGlob"); formatter.field("star_token", &self.star_token); @@ -2894,7 +2997,7 @@ impl Debug for UseGlob { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for UseGroup { +impl Debug for crate::UseGroup { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("UseGroup"); formatter.field("brace_token", &self.brace_token); @@ -2904,7 +3007,7 @@ impl Debug for UseGroup { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for UseName { +impl Debug for crate::UseName { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("UseName"); formatter.field("ident", &self.ident); @@ -2913,7 +3016,7 @@ impl Debug for UseName { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for UsePath { +impl Debug for crate::UsePath { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("UsePath"); formatter.field("ident", &self.ident); @@ -2924,7 +3027,7 @@ impl Debug for UsePath { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for UseRename { +impl Debug for crate::UseRename { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("UseRename"); formatter.field("ident", &self.ident); @@ -2935,31 +3038,31 @@ impl Debug for UseRename { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for UseTree { +impl Debug for crate::UseTree { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("UseTree::")?; match self { - UseTree::Path(v0) => { + crate::UseTree::Path(v0) => { let mut formatter = formatter.debug_tuple("Path"); formatter.field(v0); formatter.finish() } - UseTree::Name(v0) => { + crate::UseTree::Name(v0) => { let mut formatter = formatter.debug_tuple("Name"); formatter.field(v0); formatter.finish() } - UseTree::Rename(v0) => { + crate::UseTree::Rename(v0) => { let mut formatter = formatter.debug_tuple("Rename"); formatter.field(v0); formatter.finish() } - UseTree::Glob(v0) => { + crate::UseTree::Glob(v0) => { let mut formatter = formatter.debug_tuple("Glob"); formatter.field(v0); formatter.finish() } - UseTree::Group(v0) => { + crate::UseTree::Group(v0) => { let mut formatter = formatter.debug_tuple("Group"); formatter.field(v0); formatter.finish() @@ -2969,7 +3072,7 @@ impl Debug for UseTree { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Variadic { +impl Debug for crate::Variadic { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("Variadic"); formatter.field("attrs", &self.attrs); @@ -2981,7 +3084,7 @@ impl Debug for Variadic { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Variant { +impl Debug for crate::Variant { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("Variant"); formatter.field("attrs", &self.attrs); @@ -2993,40 +3096,41 @@ impl Debug for Variant { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for VisRestricted { +impl Debug for crate::VisRestricted { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl VisRestricted { - fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { - let mut formatter = formatter.debug_struct(name); - formatter.field("pub_token", &self.pub_token); - formatter.field("paren_token", &self.paren_token); - formatter.field("in_token", &self.in_token); - formatter.field("path", &self.path); - formatter.finish() - } - } self.debug(formatter, "VisRestricted") } } #[cfg(any(feature = "derive", feature = "full"))] +impl crate::VisRestricted { + fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + let mut formatter = formatter.debug_struct(name); + formatter.field("pub_token", &self.pub_token); + formatter.field("paren_token", &self.paren_token); + formatter.field("in_token", &self.in_token); + formatter.field("path", &self.path); + formatter.finish() + } +} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for Visibility { +impl Debug for crate::Visibility { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("Visibility::")?; match self { - Visibility::Public(v0) => { + crate::Visibility::Public(v0) => { let mut formatter = formatter.debug_tuple("Public"); formatter.field(v0); formatter.finish() } - Visibility::Restricted(v0) => v0.debug(formatter, "Restricted"), - Visibility::Inherited => formatter.write_str("Inherited"), + crate::Visibility::Restricted(v0) => v0.debug(formatter, "Restricted"), + crate::Visibility::Inherited => formatter.write_str("Inherited"), } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for WhereClause { +impl Debug for crate::WhereClause { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { let mut formatter = formatter.debug_struct("WhereClause"); formatter.field("where_token", &self.where_token); @@ -3036,16 +3140,16 @@ impl Debug for WhereClause { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Debug for WherePredicate { +impl Debug for crate::WherePredicate { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("WherePredicate::")?; match self { - WherePredicate::Lifetime(v0) => { + crate::WherePredicate::Lifetime(v0) => { let mut formatter = formatter.debug_tuple("Lifetime"); formatter.field(v0); formatter.finish() } - WherePredicate::Type(v0) => { + crate::WherePredicate::Type(v0) => { let mut formatter = formatter.debug_tuple("Type"); formatter.field(v0); formatter.finish() diff --git a/vendor/syn/src/gen/eq.rs b/vendor/syn/src/gen/eq.rs index a7479c3..9bfce5f 100644 --- a/vendor/syn/src/gen/eq.rs +++ b/vendor/syn/src/gen/eq.rs @@ -3,33 +3,32 @@ #[cfg(any(feature = "derive", feature = "full"))] use crate::tt::TokenStreamHelper; -use crate::*; #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Abi {} +impl Eq for crate::Abi {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Abi { +impl PartialEq for crate::Abi { fn eq(&self, other: &Self) -> bool { self.name == other.name } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for AngleBracketedGenericArguments {} +impl Eq for crate::AngleBracketedGenericArguments {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for AngleBracketedGenericArguments { +impl PartialEq for crate::AngleBracketedGenericArguments { fn eq(&self, other: &Self) -> bool { self.colon2_token == other.colon2_token && self.args == other.args } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Arm {} +impl Eq for crate::Arm {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Arm { +impl PartialEq for crate::Arm { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.pat == other.pat && self.guard == other.guard && self.body == other.body && self.comma == other.comma @@ -37,10 +36,10 @@ impl PartialEq for Arm { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for AssocConst {} +impl Eq for crate::AssocConst {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for AssocConst { +impl PartialEq for crate::AssocConst { fn eq(&self, other: &Self) -> bool { self.ident == other.ident && self.generics == other.generics && self.value == other.value @@ -48,10 +47,10 @@ impl PartialEq for AssocConst { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for AssocType {} +impl Eq for crate::AssocType {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for AssocType { +impl PartialEq for crate::AssocType { fn eq(&self, other: &Self) -> bool { self.ident == other.ident && self.generics == other.generics && self.ty == other.ty @@ -59,114 +58,114 @@ impl PartialEq for AssocType { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for AttrStyle {} +impl Eq for crate::AttrStyle {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for AttrStyle { +impl PartialEq for crate::AttrStyle { fn eq(&self, other: &Self) -> bool { match (self, other) { - (AttrStyle::Outer, AttrStyle::Outer) => true, - (AttrStyle::Inner(_), AttrStyle::Inner(_)) => true, + (crate::AttrStyle::Outer, crate::AttrStyle::Outer) => true, + (crate::AttrStyle::Inner(_), crate::AttrStyle::Inner(_)) => true, _ => false, } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Attribute {} +impl Eq for crate::Attribute {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Attribute { +impl PartialEq for crate::Attribute { fn eq(&self, other: &Self) -> bool { self.style == other.style && self.meta == other.meta } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for BareFnArg {} +impl Eq for crate::BareFnArg {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for BareFnArg { +impl PartialEq for crate::BareFnArg { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.name == other.name && self.ty == other.ty } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for BareVariadic {} +impl Eq for crate::BareVariadic {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for BareVariadic { +impl PartialEq for crate::BareVariadic { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.name == other.name && self.comma == other.comma } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for BinOp {} +impl Eq for crate::BinOp {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for BinOp { +impl PartialEq for crate::BinOp { fn eq(&self, other: &Self) -> bool { match (self, other) { - (BinOp::Add(_), BinOp::Add(_)) => true, - (BinOp::Sub(_), BinOp::Sub(_)) => true, - (BinOp::Mul(_), BinOp::Mul(_)) => true, - (BinOp::Div(_), BinOp::Div(_)) => true, - (BinOp::Rem(_), BinOp::Rem(_)) => true, - (BinOp::And(_), BinOp::And(_)) => true, - (BinOp::Or(_), BinOp::Or(_)) => true, - (BinOp::BitXor(_), BinOp::BitXor(_)) => true, - (BinOp::BitAnd(_), BinOp::BitAnd(_)) => true, - (BinOp::BitOr(_), BinOp::BitOr(_)) => true, - (BinOp::Shl(_), BinOp::Shl(_)) => true, - (BinOp::Shr(_), BinOp::Shr(_)) => true, - (BinOp::Eq(_), BinOp::Eq(_)) => true, - (BinOp::Lt(_), BinOp::Lt(_)) => true, - (BinOp::Le(_), BinOp::Le(_)) => true, - (BinOp::Ne(_), BinOp::Ne(_)) => true, - (BinOp::Ge(_), BinOp::Ge(_)) => true, - (BinOp::Gt(_), BinOp::Gt(_)) => true, - (BinOp::AddAssign(_), BinOp::AddAssign(_)) => true, - (BinOp::SubAssign(_), BinOp::SubAssign(_)) => true, - (BinOp::MulAssign(_), BinOp::MulAssign(_)) => true, - (BinOp::DivAssign(_), BinOp::DivAssign(_)) => true, - (BinOp::RemAssign(_), BinOp::RemAssign(_)) => true, - (BinOp::BitXorAssign(_), BinOp::BitXorAssign(_)) => true, - (BinOp::BitAndAssign(_), BinOp::BitAndAssign(_)) => true, - (BinOp::BitOrAssign(_), BinOp::BitOrAssign(_)) => true, - (BinOp::ShlAssign(_), BinOp::ShlAssign(_)) => true, - (BinOp::ShrAssign(_), BinOp::ShrAssign(_)) => true, + (crate::BinOp::Add(_), crate::BinOp::Add(_)) => true, + (crate::BinOp::Sub(_), crate::BinOp::Sub(_)) => true, + (crate::BinOp::Mul(_), crate::BinOp::Mul(_)) => true, + (crate::BinOp::Div(_), crate::BinOp::Div(_)) => true, + (crate::BinOp::Rem(_), crate::BinOp::Rem(_)) => true, + (crate::BinOp::And(_), crate::BinOp::And(_)) => true, + (crate::BinOp::Or(_), crate::BinOp::Or(_)) => true, + (crate::BinOp::BitXor(_), crate::BinOp::BitXor(_)) => true, + (crate::BinOp::BitAnd(_), crate::BinOp::BitAnd(_)) => true, + (crate::BinOp::BitOr(_), crate::BinOp::BitOr(_)) => true, + (crate::BinOp::Shl(_), crate::BinOp::Shl(_)) => true, + (crate::BinOp::Shr(_), crate::BinOp::Shr(_)) => true, + (crate::BinOp::Eq(_), crate::BinOp::Eq(_)) => true, + (crate::BinOp::Lt(_), crate::BinOp::Lt(_)) => true, + (crate::BinOp::Le(_), crate::BinOp::Le(_)) => true, + (crate::BinOp::Ne(_), crate::BinOp::Ne(_)) => true, + (crate::BinOp::Ge(_), crate::BinOp::Ge(_)) => true, + (crate::BinOp::Gt(_), crate::BinOp::Gt(_)) => true, + (crate::BinOp::AddAssign(_), crate::BinOp::AddAssign(_)) => true, + (crate::BinOp::SubAssign(_), crate::BinOp::SubAssign(_)) => true, + (crate::BinOp::MulAssign(_), crate::BinOp::MulAssign(_)) => true, + (crate::BinOp::DivAssign(_), crate::BinOp::DivAssign(_)) => true, + (crate::BinOp::RemAssign(_), crate::BinOp::RemAssign(_)) => true, + (crate::BinOp::BitXorAssign(_), crate::BinOp::BitXorAssign(_)) => true, + (crate::BinOp::BitAndAssign(_), crate::BinOp::BitAndAssign(_)) => true, + (crate::BinOp::BitOrAssign(_), crate::BinOp::BitOrAssign(_)) => true, + (crate::BinOp::ShlAssign(_), crate::BinOp::ShlAssign(_)) => true, + (crate::BinOp::ShrAssign(_), crate::BinOp::ShrAssign(_)) => true, _ => false, } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Block {} +impl Eq for crate::Block {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Block { +impl PartialEq for crate::Block { fn eq(&self, other: &Self) -> bool { self.stmts == other.stmts } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for BoundLifetimes {} +impl Eq for crate::BoundLifetimes {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for BoundLifetimes { +impl PartialEq for crate::BoundLifetimes { fn eq(&self, other: &Self) -> bool { self.lifetimes == other.lifetimes } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ConstParam {} +impl Eq for crate::ConstParam {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ConstParam { +impl PartialEq for crate::ConstParam { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.ident == other.ident && self.ty == other.ty && self.eq_token == other.eq_token && self.default == other.default @@ -174,10 +173,10 @@ impl PartialEq for ConstParam { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Constraint {} +impl Eq for crate::Constraint {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Constraint { +impl PartialEq for crate::Constraint { fn eq(&self, other: &Self) -> bool { self.ident == other.ident && self.generics == other.generics && self.bounds == other.bounds @@ -185,55 +184,55 @@ impl PartialEq for Constraint { } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Data {} +impl Eq for crate::Data {} #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Data { +impl PartialEq for crate::Data { fn eq(&self, other: &Self) -> bool { match (self, other) { - (Data::Struct(self0), Data::Struct(other0)) => self0 == other0, - (Data::Enum(self0), Data::Enum(other0)) => self0 == other0, - (Data::Union(self0), Data::Union(other0)) => self0 == other0, + (crate::Data::Struct(self0), crate::Data::Struct(other0)) => self0 == other0, + (crate::Data::Enum(self0), crate::Data::Enum(other0)) => self0 == other0, + (crate::Data::Union(self0), crate::Data::Union(other0)) => self0 == other0, _ => false, } } } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for DataEnum {} +impl Eq for crate::DataEnum {} #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for DataEnum { +impl PartialEq for crate::DataEnum { fn eq(&self, other: &Self) -> bool { self.variants == other.variants } } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for DataStruct {} +impl Eq for crate::DataStruct {} #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for DataStruct { +impl PartialEq for crate::DataStruct { fn eq(&self, other: &Self) -> bool { self.fields == other.fields && self.semi_token == other.semi_token } } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for DataUnion {} +impl Eq for crate::DataUnion {} #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for DataUnion { +impl PartialEq for crate::DataUnion { fn eq(&self, other: &Self) -> bool { self.fields == other.fields } } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for DeriveInput {} +impl Eq for crate::DeriveInput {} #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for DeriveInput { +impl PartialEq for crate::DeriveInput { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident && self.generics == other.generics && self.data == other.data @@ -241,110 +240,119 @@ impl PartialEq for DeriveInput { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Expr {} +impl Eq for crate::Expr {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Expr { +impl PartialEq for crate::Expr { fn eq(&self, other: &Self) -> bool { match (self, other) { #[cfg(feature = "full")] - (Expr::Array(self0), Expr::Array(other0)) => self0 == other0, + (crate::Expr::Array(self0), crate::Expr::Array(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Assign(self0), Expr::Assign(other0)) => self0 == other0, + (crate::Expr::Assign(self0), crate::Expr::Assign(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Async(self0), Expr::Async(other0)) => self0 == other0, + (crate::Expr::Async(self0), crate::Expr::Async(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Await(self0), Expr::Await(other0)) => self0 == other0, - (Expr::Binary(self0), Expr::Binary(other0)) => self0 == other0, + (crate::Expr::Await(self0), crate::Expr::Await(other0)) => self0 == other0, + (crate::Expr::Binary(self0), crate::Expr::Binary(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Block(self0), Expr::Block(other0)) => self0 == other0, + (crate::Expr::Block(self0), crate::Expr::Block(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Break(self0), Expr::Break(other0)) => self0 == other0, - (Expr::Call(self0), Expr::Call(other0)) => self0 == other0, - (Expr::Cast(self0), Expr::Cast(other0)) => self0 == other0, + (crate::Expr::Break(self0), crate::Expr::Break(other0)) => self0 == other0, + (crate::Expr::Call(self0), crate::Expr::Call(other0)) => self0 == other0, + (crate::Expr::Cast(self0), crate::Expr::Cast(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Closure(self0), Expr::Closure(other0)) => self0 == other0, - #[cfg(feature = "full")] - (Expr::Const(self0), Expr::Const(other0)) => self0 == other0, - #[cfg(feature = "full")] - (Expr::Continue(self0), Expr::Continue(other0)) => self0 == other0, - (Expr::Field(self0), Expr::Field(other0)) => self0 == other0, - #[cfg(feature = "full")] - (Expr::ForLoop(self0), Expr::ForLoop(other0)) => self0 == other0, - (Expr::Group(self0), Expr::Group(other0)) => self0 == other0, + (crate::Expr::Closure(self0), crate::Expr::Closure(other0)) => { + self0 == other0 + } #[cfg(feature = "full")] - (Expr::If(self0), Expr::If(other0)) => self0 == other0, - (Expr::Index(self0), Expr::Index(other0)) => self0 == other0, + (crate::Expr::Const(self0), crate::Expr::Const(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Infer(self0), Expr::Infer(other0)) => self0 == other0, + (crate::Expr::Continue(self0), crate::Expr::Continue(other0)) => { + self0 == other0 + } + (crate::Expr::Field(self0), crate::Expr::Field(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Let(self0), Expr::Let(other0)) => self0 == other0, - (Expr::Lit(self0), Expr::Lit(other0)) => self0 == other0, + (crate::Expr::ForLoop(self0), crate::Expr::ForLoop(other0)) => { + self0 == other0 + } + (crate::Expr::Group(self0), crate::Expr::Group(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Loop(self0), Expr::Loop(other0)) => self0 == other0, - (Expr::Macro(self0), Expr::Macro(other0)) => self0 == other0, + (crate::Expr::If(self0), crate::Expr::If(other0)) => self0 == other0, + (crate::Expr::Index(self0), crate::Expr::Index(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Match(self0), Expr::Match(other0)) => self0 == other0, + (crate::Expr::Infer(self0), crate::Expr::Infer(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::MethodCall(self0), Expr::MethodCall(other0)) => self0 == other0, - (Expr::Paren(self0), Expr::Paren(other0)) => self0 == other0, - (Expr::Path(self0), Expr::Path(other0)) => self0 == other0, + (crate::Expr::Let(self0), crate::Expr::Let(other0)) => self0 == other0, + (crate::Expr::Lit(self0), crate::Expr::Lit(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Range(self0), Expr::Range(other0)) => self0 == other0, + (crate::Expr::Loop(self0), crate::Expr::Loop(other0)) => self0 == other0, + (crate::Expr::Macro(self0), crate::Expr::Macro(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Reference(self0), Expr::Reference(other0)) => self0 == other0, + (crate::Expr::Match(self0), crate::Expr::Match(other0)) => self0 == other0, + (crate::Expr::MethodCall(self0), crate::Expr::MethodCall(other0)) => { + self0 == other0 + } + (crate::Expr::Paren(self0), crate::Expr::Paren(other0)) => self0 == other0, + (crate::Expr::Path(self0), crate::Expr::Path(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Repeat(self0), Expr::Repeat(other0)) => self0 == other0, + (crate::Expr::Range(self0), crate::Expr::Range(other0)) => self0 == other0, + (crate::Expr::Reference(self0), crate::Expr::Reference(other0)) => { + self0 == other0 + } #[cfg(feature = "full")] - (Expr::Return(self0), Expr::Return(other0)) => self0 == other0, + (crate::Expr::Repeat(self0), crate::Expr::Repeat(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Struct(self0), Expr::Struct(other0)) => self0 == other0, + (crate::Expr::Return(self0), crate::Expr::Return(other0)) => self0 == other0, + (crate::Expr::Struct(self0), crate::Expr::Struct(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Try(self0), Expr::Try(other0)) => self0 == other0, + (crate::Expr::Try(self0), crate::Expr::Try(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::TryBlock(self0), Expr::TryBlock(other0)) => self0 == other0, + (crate::Expr::TryBlock(self0), crate::Expr::TryBlock(other0)) => { + self0 == other0 + } #[cfg(feature = "full")] - (Expr::Tuple(self0), Expr::Tuple(other0)) => self0 == other0, - (Expr::Unary(self0), Expr::Unary(other0)) => self0 == other0, + (crate::Expr::Tuple(self0), crate::Expr::Tuple(other0)) => self0 == other0, + (crate::Expr::Unary(self0), crate::Expr::Unary(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Unsafe(self0), Expr::Unsafe(other0)) => self0 == other0, - (Expr::Verbatim(self0), Expr::Verbatim(other0)) => { + (crate::Expr::Unsafe(self0), crate::Expr::Unsafe(other0)) => self0 == other0, + (crate::Expr::Verbatim(self0), crate::Expr::Verbatim(other0)) => { TokenStreamHelper(self0) == TokenStreamHelper(other0) } #[cfg(feature = "full")] - (Expr::While(self0), Expr::While(other0)) => self0 == other0, + (crate::Expr::While(self0), crate::Expr::While(other0)) => self0 == other0, #[cfg(feature = "full")] - (Expr::Yield(self0), Expr::Yield(other0)) => self0 == other0, + (crate::Expr::Yield(self0), crate::Expr::Yield(other0)) => self0 == other0, _ => false, } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprArray {} +impl Eq for crate::ExprArray {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprArray { +impl PartialEq for crate::ExprArray { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.elems == other.elems } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprAssign {} +impl Eq for crate::ExprAssign {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprAssign { +impl PartialEq for crate::ExprAssign { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.left == other.left && self.right == other.right } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprAsync {} +impl Eq for crate::ExprAsync {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprAsync { +impl PartialEq for crate::ExprAsync { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.capture == other.capture && self.block == other.block @@ -352,20 +360,20 @@ impl PartialEq for ExprAsync { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprAwait {} +impl Eq for crate::ExprAwait {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprAwait { +impl PartialEq for crate::ExprAwait { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.base == other.base } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprBinary {} +impl Eq for crate::ExprBinary {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprBinary { +impl PartialEq for crate::ExprBinary { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.left == other.left && self.op == other.op && self.right == other.right @@ -373,10 +381,10 @@ impl PartialEq for ExprBinary { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprBlock {} +impl Eq for crate::ExprBlock {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprBlock { +impl PartialEq for crate::ExprBlock { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.label == other.label && self.block == other.block @@ -384,40 +392,40 @@ impl PartialEq for ExprBlock { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprBreak {} +impl Eq for crate::ExprBreak {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprBreak { +impl PartialEq for crate::ExprBreak { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.label == other.label && self.expr == other.expr } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprCall {} +impl Eq for crate::ExprCall {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprCall { +impl PartialEq for crate::ExprCall { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.func == other.func && self.args == other.args } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprCast {} +impl Eq for crate::ExprCast {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprCast { +impl PartialEq for crate::ExprCast { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.expr == other.expr && self.ty == other.ty } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprClosure {} +impl Eq for crate::ExprClosure {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprClosure { +impl PartialEq for crate::ExprClosure { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.lifetimes == other.lifetimes && self.constness == other.constness && self.movability == other.movability @@ -428,30 +436,30 @@ impl PartialEq for ExprClosure { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprConst {} +impl Eq for crate::ExprConst {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprConst { +impl PartialEq for crate::ExprConst { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.block == other.block } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprContinue {} +impl Eq for crate::ExprContinue {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprContinue { +impl PartialEq for crate::ExprContinue { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.label == other.label } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprField {} +impl Eq for crate::ExprField {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprField { +impl PartialEq for crate::ExprField { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.base == other.base && self.member == other.member @@ -459,10 +467,10 @@ impl PartialEq for ExprField { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprForLoop {} +impl Eq for crate::ExprForLoop {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprForLoop { +impl PartialEq for crate::ExprForLoop { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.label == other.label && self.pat == other.pat && self.expr == other.expr && self.body == other.body @@ -470,20 +478,20 @@ impl PartialEq for ExprForLoop { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprGroup {} +impl Eq for crate::ExprGroup {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprGroup { +impl PartialEq for crate::ExprGroup { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.expr == other.expr } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprIf {} +impl Eq for crate::ExprIf {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprIf { +impl PartialEq for crate::ExprIf { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.cond == other.cond && self.then_branch == other.then_branch @@ -492,80 +500,80 @@ impl PartialEq for ExprIf { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprIndex {} +impl Eq for crate::ExprIndex {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprIndex { +impl PartialEq for crate::ExprIndex { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.expr == other.expr && self.index == other.index } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprInfer {} +impl Eq for crate::ExprInfer {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprInfer { +impl PartialEq for crate::ExprInfer { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprLet {} +impl Eq for crate::ExprLet {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprLet { +impl PartialEq for crate::ExprLet { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.pat == other.pat && self.expr == other.expr } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprLit {} +impl Eq for crate::ExprLit {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprLit { +impl PartialEq for crate::ExprLit { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.lit == other.lit } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprLoop {} +impl Eq for crate::ExprLoop {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprLoop { +impl PartialEq for crate::ExprLoop { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.label == other.label && self.body == other.body } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprMacro {} +impl Eq for crate::ExprMacro {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprMacro { +impl PartialEq for crate::ExprMacro { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.mac == other.mac } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprMatch {} +impl Eq for crate::ExprMatch {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprMatch { +impl PartialEq for crate::ExprMatch { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.expr == other.expr && self.arms == other.arms } } -#[cfg(feature = "full")] +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprMethodCall {} -#[cfg(feature = "full")] +impl Eq for crate::ExprMethodCall {} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprMethodCall { +impl PartialEq for crate::ExprMethodCall { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.receiver == other.receiver && self.method == other.method && self.turbofish == other.turbofish @@ -574,41 +582,41 @@ impl PartialEq for ExprMethodCall { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprParen {} +impl Eq for crate::ExprParen {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprParen { +impl PartialEq for crate::ExprParen { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.expr == other.expr } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprPath {} +impl Eq for crate::ExprPath {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprPath { +impl PartialEq for crate::ExprPath { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.qself == other.qself && self.path == other.path } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprRange {} +impl Eq for crate::ExprRange {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprRange { +impl PartialEq for crate::ExprRange { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.start == other.start && self.limits == other.limits && self.end == other.end } } -#[cfg(feature = "full")] +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprReference {} -#[cfg(feature = "full")] +impl Eq for crate::ExprReference {} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprReference { +impl PartialEq for crate::ExprReference { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.mutability == other.mutability && self.expr == other.expr @@ -616,30 +624,30 @@ impl PartialEq for ExprReference { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprRepeat {} +impl Eq for crate::ExprRepeat {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprRepeat { +impl PartialEq for crate::ExprRepeat { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.expr == other.expr && self.len == other.len } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprReturn {} +impl Eq for crate::ExprReturn {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprReturn { +impl PartialEq for crate::ExprReturn { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.expr == other.expr } } -#[cfg(feature = "full")] +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprStruct {} -#[cfg(feature = "full")] +impl Eq for crate::ExprStruct {} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprStruct { +impl PartialEq for crate::ExprStruct { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.qself == other.qself && self.path == other.path && self.fields == other.fields && self.dot2_token == other.dot2_token @@ -648,60 +656,60 @@ impl PartialEq for ExprStruct { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprTry {} +impl Eq for crate::ExprTry {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprTry { +impl PartialEq for crate::ExprTry { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.expr == other.expr } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprTryBlock {} +impl Eq for crate::ExprTryBlock {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprTryBlock { +impl PartialEq for crate::ExprTryBlock { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.block == other.block } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprTuple {} +impl Eq for crate::ExprTuple {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprTuple { +impl PartialEq for crate::ExprTuple { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.elems == other.elems } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprUnary {} +impl Eq for crate::ExprUnary {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprUnary { +impl PartialEq for crate::ExprUnary { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.op == other.op && self.expr == other.expr } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprUnsafe {} +impl Eq for crate::ExprUnsafe {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprUnsafe { +impl PartialEq for crate::ExprUnsafe { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.block == other.block } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprWhile {} +impl Eq for crate::ExprWhile {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprWhile { +impl PartialEq for crate::ExprWhile { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.label == other.label && self.cond == other.cond && self.body == other.body @@ -709,20 +717,20 @@ impl PartialEq for ExprWhile { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ExprYield {} +impl Eq for crate::ExprYield {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ExprYield { +impl PartialEq for crate::ExprYield { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.expr == other.expr } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Field {} +impl Eq for crate::Field {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Field { +impl PartialEq for crate::Field { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.mutability == other.mutability && self.ident == other.ident @@ -731,33 +739,33 @@ impl PartialEq for Field { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for FieldMutability {} +impl Eq for crate::FieldMutability {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for FieldMutability { +impl PartialEq for crate::FieldMutability { fn eq(&self, other: &Self) -> bool { match (self, other) { - (FieldMutability::None, FieldMutability::None) => true, + (crate::FieldMutability::None, crate::FieldMutability::None) => true, } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for FieldPat {} +impl Eq for crate::FieldPat {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for FieldPat { +impl PartialEq for crate::FieldPat { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.member == other.member && self.colon_token == other.colon_token && self.pat == other.pat } } -#[cfg(feature = "full")] +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for FieldValue {} -#[cfg(feature = "full")] +impl Eq for crate::FieldValue {} +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for FieldValue { +impl PartialEq for crate::FieldValue { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.member == other.member && self.colon_token == other.colon_token && self.expr == other.expr @@ -765,45 +773,49 @@ impl PartialEq for FieldValue { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Fields {} +impl Eq for crate::Fields {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Fields { +impl PartialEq for crate::Fields { fn eq(&self, other: &Self) -> bool { match (self, other) { - (Fields::Named(self0), Fields::Named(other0)) => self0 == other0, - (Fields::Unnamed(self0), Fields::Unnamed(other0)) => self0 == other0, - (Fields::Unit, Fields::Unit) => true, + (crate::Fields::Named(self0), crate::Fields::Named(other0)) => { + self0 == other0 + } + (crate::Fields::Unnamed(self0), crate::Fields::Unnamed(other0)) => { + self0 == other0 + } + (crate::Fields::Unit, crate::Fields::Unit) => true, _ => false, } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for FieldsNamed {} +impl Eq for crate::FieldsNamed {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for FieldsNamed { +impl PartialEq for crate::FieldsNamed { fn eq(&self, other: &Self) -> bool { self.named == other.named } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for FieldsUnnamed {} +impl Eq for crate::FieldsUnnamed {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for FieldsUnnamed { +impl PartialEq for crate::FieldsUnnamed { fn eq(&self, other: &Self) -> bool { self.unnamed == other.unnamed } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for File {} +impl Eq for crate::File {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for File { +impl PartialEq for crate::File { fn eq(&self, other: &Self) -> bool { self.shebang == other.shebang && self.attrs == other.attrs && self.items == other.items @@ -811,53 +823,64 @@ impl PartialEq for File { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for FnArg {} +impl Eq for crate::FnArg {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for FnArg { +impl PartialEq for crate::FnArg { fn eq(&self, other: &Self) -> bool { match (self, other) { - (FnArg::Receiver(self0), FnArg::Receiver(other0)) => self0 == other0, - (FnArg::Typed(self0), FnArg::Typed(other0)) => self0 == other0, + (crate::FnArg::Receiver(self0), crate::FnArg::Receiver(other0)) => { + self0 == other0 + } + (crate::FnArg::Typed(self0), crate::FnArg::Typed(other0)) => self0 == other0, _ => false, } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ForeignItem {} +impl Eq for crate::ForeignItem {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ForeignItem { +impl PartialEq for crate::ForeignItem { fn eq(&self, other: &Self) -> bool { match (self, other) { - (ForeignItem::Fn(self0), ForeignItem::Fn(other0)) => self0 == other0, - (ForeignItem::Static(self0), ForeignItem::Static(other0)) => self0 == other0, - (ForeignItem::Type(self0), ForeignItem::Type(other0)) => self0 == other0, - (ForeignItem::Macro(self0), ForeignItem::Macro(other0)) => self0 == other0, - (ForeignItem::Verbatim(self0), ForeignItem::Verbatim(other0)) => { - TokenStreamHelper(self0) == TokenStreamHelper(other0) + (crate::ForeignItem::Fn(self0), crate::ForeignItem::Fn(other0)) => { + self0 == other0 } + (crate::ForeignItem::Static(self0), crate::ForeignItem::Static(other0)) => { + self0 == other0 + } + (crate::ForeignItem::Type(self0), crate::ForeignItem::Type(other0)) => { + self0 == other0 + } + (crate::ForeignItem::Macro(self0), crate::ForeignItem::Macro(other0)) => { + self0 == other0 + } + ( + crate::ForeignItem::Verbatim(self0), + crate::ForeignItem::Verbatim(other0), + ) => TokenStreamHelper(self0) == TokenStreamHelper(other0), _ => false, } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ForeignItemFn {} +impl Eq for crate::ForeignItemFn {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ForeignItemFn { +impl PartialEq for crate::ForeignItemFn { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.sig == other.sig } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ForeignItemMacro {} +impl Eq for crate::ForeignItemMacro {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ForeignItemMacro { +impl PartialEq for crate::ForeignItemMacro { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.mac == other.mac && self.semi_token == other.semi_token @@ -865,10 +888,10 @@ impl PartialEq for ForeignItemMacro { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ForeignItemStatic {} +impl Eq for crate::ForeignItemStatic {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ForeignItemStatic { +impl PartialEq for crate::ForeignItemStatic { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.mutability == other.mutability && self.ident == other.ident @@ -877,10 +900,10 @@ impl PartialEq for ForeignItemStatic { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ForeignItemType {} +impl Eq for crate::ForeignItemType {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ForeignItemType { +impl PartialEq for crate::ForeignItemType { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident && self.generics == other.generics @@ -888,57 +911,68 @@ impl PartialEq for ForeignItemType { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for GenericArgument {} +impl Eq for crate::GenericArgument {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for GenericArgument { +impl PartialEq for crate::GenericArgument { fn eq(&self, other: &Self) -> bool { match (self, other) { - (GenericArgument::Lifetime(self0), GenericArgument::Lifetime(other0)) => { - self0 == other0 - } - (GenericArgument::Type(self0), GenericArgument::Type(other0)) => { - self0 == other0 - } - (GenericArgument::Const(self0), GenericArgument::Const(other0)) => { - self0 == other0 - } - (GenericArgument::AssocType(self0), GenericArgument::AssocType(other0)) => { - self0 == other0 - } - (GenericArgument::AssocConst(self0), GenericArgument::AssocConst(other0)) => { - self0 == other0 - } - (GenericArgument::Constraint(self0), GenericArgument::Constraint(other0)) => { - self0 == other0 - } + ( + crate::GenericArgument::Lifetime(self0), + crate::GenericArgument::Lifetime(other0), + ) => self0 == other0, + ( + crate::GenericArgument::Type(self0), + crate::GenericArgument::Type(other0), + ) => self0 == other0, + ( + crate::GenericArgument::Const(self0), + crate::GenericArgument::Const(other0), + ) => self0 == other0, + ( + crate::GenericArgument::AssocType(self0), + crate::GenericArgument::AssocType(other0), + ) => self0 == other0, + ( + crate::GenericArgument::AssocConst(self0), + crate::GenericArgument::AssocConst(other0), + ) => self0 == other0, + ( + crate::GenericArgument::Constraint(self0), + crate::GenericArgument::Constraint(other0), + ) => self0 == other0, _ => false, } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for GenericParam {} +impl Eq for crate::GenericParam {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for GenericParam { +impl PartialEq for crate::GenericParam { fn eq(&self, other: &Self) -> bool { match (self, other) { - (GenericParam::Lifetime(self0), GenericParam::Lifetime(other0)) => { + ( + crate::GenericParam::Lifetime(self0), + crate::GenericParam::Lifetime(other0), + ) => self0 == other0, + (crate::GenericParam::Type(self0), crate::GenericParam::Type(other0)) => { + self0 == other0 + } + (crate::GenericParam::Const(self0), crate::GenericParam::Const(other0)) => { self0 == other0 } - (GenericParam::Type(self0), GenericParam::Type(other0)) => self0 == other0, - (GenericParam::Const(self0), GenericParam::Const(other0)) => self0 == other0, _ => false, } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Generics {} +impl Eq for crate::Generics {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Generics { +impl PartialEq for crate::Generics { fn eq(&self, other: &Self) -> bool { self.lt_token == other.lt_token && self.params == other.params && self.gt_token == other.gt_token && self.where_clause == other.where_clause @@ -946,17 +980,23 @@ impl PartialEq for Generics { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ImplItem {} +impl Eq for crate::ImplItem {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ImplItem { +impl PartialEq for crate::ImplItem { fn eq(&self, other: &Self) -> bool { match (self, other) { - (ImplItem::Const(self0), ImplItem::Const(other0)) => self0 == other0, - (ImplItem::Fn(self0), ImplItem::Fn(other0)) => self0 == other0, - (ImplItem::Type(self0), ImplItem::Type(other0)) => self0 == other0, - (ImplItem::Macro(self0), ImplItem::Macro(other0)) => self0 == other0, - (ImplItem::Verbatim(self0), ImplItem::Verbatim(other0)) => { + (crate::ImplItem::Const(self0), crate::ImplItem::Const(other0)) => { + self0 == other0 + } + (crate::ImplItem::Fn(self0), crate::ImplItem::Fn(other0)) => self0 == other0, + (crate::ImplItem::Type(self0), crate::ImplItem::Type(other0)) => { + self0 == other0 + } + (crate::ImplItem::Macro(self0), crate::ImplItem::Macro(other0)) => { + self0 == other0 + } + (crate::ImplItem::Verbatim(self0), crate::ImplItem::Verbatim(other0)) => { TokenStreamHelper(self0) == TokenStreamHelper(other0) } _ => false, @@ -965,10 +1005,10 @@ impl PartialEq for ImplItem { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ImplItemConst {} +impl Eq for crate::ImplItemConst {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ImplItemConst { +impl PartialEq for crate::ImplItemConst { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.defaultness == other.defaultness && self.ident == other.ident @@ -978,10 +1018,10 @@ impl PartialEq for ImplItemConst { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ImplItemFn {} +impl Eq for crate::ImplItemFn {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ImplItemFn { +impl PartialEq for crate::ImplItemFn { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.defaultness == other.defaultness && self.sig == other.sig @@ -990,10 +1030,10 @@ impl PartialEq for ImplItemFn { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ImplItemMacro {} +impl Eq for crate::ImplItemMacro {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ImplItemMacro { +impl PartialEq for crate::ImplItemMacro { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.mac == other.mac && self.semi_token == other.semi_token @@ -1001,10 +1041,10 @@ impl PartialEq for ImplItemMacro { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ImplItemType {} +impl Eq for crate::ImplItemType {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ImplItemType { +impl PartialEq for crate::ImplItemType { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.defaultness == other.defaultness && self.ident == other.ident @@ -1013,38 +1053,44 @@ impl PartialEq for ImplItemType { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ImplRestriction {} +impl Eq for crate::ImplRestriction {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ImplRestriction { +impl PartialEq for crate::ImplRestriction { fn eq(&self, _other: &Self) -> bool { match *self {} } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Item {} +impl Eq for crate::Item {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Item { +impl PartialEq for crate::Item { fn eq(&self, other: &Self) -> bool { match (self, other) { - (Item::Const(self0), Item::Const(other0)) => self0 == other0, - (Item::Enum(self0), Item::Enum(other0)) => self0 == other0, - (Item::ExternCrate(self0), Item::ExternCrate(other0)) => self0 == other0, - (Item::Fn(self0), Item::Fn(other0)) => self0 == other0, - (Item::ForeignMod(self0), Item::ForeignMod(other0)) => self0 == other0, - (Item::Impl(self0), Item::Impl(other0)) => self0 == other0, - (Item::Macro(self0), Item::Macro(other0)) => self0 == other0, - (Item::Mod(self0), Item::Mod(other0)) => self0 == other0, - (Item::Static(self0), Item::Static(other0)) => self0 == other0, - (Item::Struct(self0), Item::Struct(other0)) => self0 == other0, - (Item::Trait(self0), Item::Trait(other0)) => self0 == other0, - (Item::TraitAlias(self0), Item::TraitAlias(other0)) => self0 == other0, - (Item::Type(self0), Item::Type(other0)) => self0 == other0, - (Item::Union(self0), Item::Union(other0)) => self0 == other0, - (Item::Use(self0), Item::Use(other0)) => self0 == other0, - (Item::Verbatim(self0), Item::Verbatim(other0)) => { + (crate::Item::Const(self0), crate::Item::Const(other0)) => self0 == other0, + (crate::Item::Enum(self0), crate::Item::Enum(other0)) => self0 == other0, + (crate::Item::ExternCrate(self0), crate::Item::ExternCrate(other0)) => { + self0 == other0 + } + (crate::Item::Fn(self0), crate::Item::Fn(other0)) => self0 == other0, + (crate::Item::ForeignMod(self0), crate::Item::ForeignMod(other0)) => { + self0 == other0 + } + (crate::Item::Impl(self0), crate::Item::Impl(other0)) => self0 == other0, + (crate::Item::Macro(self0), crate::Item::Macro(other0)) => self0 == other0, + (crate::Item::Mod(self0), crate::Item::Mod(other0)) => self0 == other0, + (crate::Item::Static(self0), crate::Item::Static(other0)) => self0 == other0, + (crate::Item::Struct(self0), crate::Item::Struct(other0)) => self0 == other0, + (crate::Item::Trait(self0), crate::Item::Trait(other0)) => self0 == other0, + (crate::Item::TraitAlias(self0), crate::Item::TraitAlias(other0)) => { + self0 == other0 + } + (crate::Item::Type(self0), crate::Item::Type(other0)) => self0 == other0, + (crate::Item::Union(self0), crate::Item::Union(other0)) => self0 == other0, + (crate::Item::Use(self0), crate::Item::Use(other0)) => self0 == other0, + (crate::Item::Verbatim(self0), crate::Item::Verbatim(other0)) => { TokenStreamHelper(self0) == TokenStreamHelper(other0) } _ => false, @@ -1053,10 +1099,10 @@ impl PartialEq for Item { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ItemConst {} +impl Eq for crate::ItemConst {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ItemConst { +impl PartialEq for crate::ItemConst { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident && self.generics == other.generics && self.ty == other.ty @@ -1065,10 +1111,10 @@ impl PartialEq for ItemConst { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ItemEnum {} +impl Eq for crate::ItemEnum {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ItemEnum { +impl PartialEq for crate::ItemEnum { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident && self.generics == other.generics && self.variants == other.variants @@ -1076,10 +1122,10 @@ impl PartialEq for ItemEnum { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ItemExternCrate {} +impl Eq for crate::ItemExternCrate {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ItemExternCrate { +impl PartialEq for crate::ItemExternCrate { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident && self.rename == other.rename @@ -1087,10 +1133,10 @@ impl PartialEq for ItemExternCrate { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ItemFn {} +impl Eq for crate::ItemFn {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ItemFn { +impl PartialEq for crate::ItemFn { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.sig == other.sig && self.block == other.block @@ -1098,10 +1144,10 @@ impl PartialEq for ItemFn { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ItemForeignMod {} +impl Eq for crate::ItemForeignMod {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ItemForeignMod { +impl PartialEq for crate::ItemForeignMod { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.unsafety == other.unsafety && self.abi == other.abi && self.items == other.items @@ -1109,10 +1155,10 @@ impl PartialEq for ItemForeignMod { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ItemImpl {} +impl Eq for crate::ItemImpl {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ItemImpl { +impl PartialEq for crate::ItemImpl { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.defaultness == other.defaultness && self.unsafety == other.unsafety && self.generics == other.generics @@ -1122,10 +1168,10 @@ impl PartialEq for ItemImpl { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ItemMacro {} +impl Eq for crate::ItemMacro {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ItemMacro { +impl PartialEq for crate::ItemMacro { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.ident == other.ident && self.mac == other.mac && self.semi_token == other.semi_token @@ -1133,10 +1179,10 @@ impl PartialEq for ItemMacro { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ItemMod {} +impl Eq for crate::ItemMod {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ItemMod { +impl PartialEq for crate::ItemMod { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.unsafety == other.unsafety && self.ident == other.ident @@ -1145,10 +1191,10 @@ impl PartialEq for ItemMod { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ItemStatic {} +impl Eq for crate::ItemStatic {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ItemStatic { +impl PartialEq for crate::ItemStatic { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.mutability == other.mutability && self.ident == other.ident @@ -1157,10 +1203,10 @@ impl PartialEq for ItemStatic { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ItemStruct {} +impl Eq for crate::ItemStruct {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ItemStruct { +impl PartialEq for crate::ItemStruct { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident && self.generics == other.generics && self.fields == other.fields @@ -1169,10 +1215,10 @@ impl PartialEq for ItemStruct { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ItemTrait {} +impl Eq for crate::ItemTrait {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ItemTrait { +impl PartialEq for crate::ItemTrait { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.unsafety == other.unsafety && self.auto_token == other.auto_token @@ -1183,10 +1229,10 @@ impl PartialEq for ItemTrait { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ItemTraitAlias {} +impl Eq for crate::ItemTraitAlias {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ItemTraitAlias { +impl PartialEq for crate::ItemTraitAlias { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident && self.generics == other.generics && self.bounds == other.bounds @@ -1194,10 +1240,10 @@ impl PartialEq for ItemTraitAlias { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ItemType {} +impl Eq for crate::ItemType {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ItemType { +impl PartialEq for crate::ItemType { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident && self.generics == other.generics && self.ty == other.ty @@ -1205,10 +1251,10 @@ impl PartialEq for ItemType { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ItemUnion {} +impl Eq for crate::ItemUnion {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ItemUnion { +impl PartialEq for crate::ItemUnion { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.ident == other.ident && self.generics == other.generics && self.fields == other.fields @@ -1216,10 +1262,10 @@ impl PartialEq for ItemUnion { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ItemUse {} +impl Eq for crate::ItemUse {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ItemUse { +impl PartialEq for crate::ItemUse { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.vis == other.vis && self.leading_colon == other.leading_colon && self.tree == other.tree @@ -1227,39 +1273,39 @@ impl PartialEq for ItemUse { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Label {} +impl Eq for crate::Label {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Label { +impl PartialEq for crate::Label { fn eq(&self, other: &Self) -> bool { self.name == other.name } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for LifetimeParam {} +impl Eq for crate::LifetimeParam {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for LifetimeParam { +impl PartialEq for crate::LifetimeParam { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.lifetime == other.lifetime && self.colon_token == other.colon_token && self.bounds == other.bounds } } #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Lit {} +impl Eq for crate::Lit {} #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Lit { +impl PartialEq for crate::Lit { fn eq(&self, other: &Self) -> bool { match (self, other) { - (Lit::Str(self0), Lit::Str(other0)) => self0 == other0, - (Lit::ByteStr(self0), Lit::ByteStr(other0)) => self0 == other0, - (Lit::Byte(self0), Lit::Byte(other0)) => self0 == other0, - (Lit::Char(self0), Lit::Char(other0)) => self0 == other0, - (Lit::Int(self0), Lit::Int(other0)) => self0 == other0, - (Lit::Float(self0), Lit::Float(other0)) => self0 == other0, - (Lit::Bool(self0), Lit::Bool(other0)) => self0 == other0, - (Lit::Verbatim(self0), Lit::Verbatim(other0)) => { + (crate::Lit::Str(self0), crate::Lit::Str(other0)) => self0 == other0, + (crate::Lit::ByteStr(self0), crate::Lit::ByteStr(other0)) => self0 == other0, + (crate::Lit::Byte(self0), crate::Lit::Byte(other0)) => self0 == other0, + (crate::Lit::Char(self0), crate::Lit::Char(other0)) => self0 == other0, + (crate::Lit::Int(self0), crate::Lit::Int(other0)) => self0 == other0, + (crate::Lit::Float(self0), crate::Lit::Float(other0)) => self0 == other0, + (crate::Lit::Bool(self0), crate::Lit::Bool(other0)) => self0 == other0, + (crate::Lit::Verbatim(self0), crate::Lit::Verbatim(other0)) => { self0.to_string() == other0.to_string() } _ => false, @@ -1267,51 +1313,51 @@ impl PartialEq for Lit { } } #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for LitBool {} +impl Eq for crate::LitBool {} #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for LitBool { +impl PartialEq for crate::LitBool { fn eq(&self, other: &Self) -> bool { self.value == other.value } } #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for LitByte {} +impl Eq for crate::LitByte {} #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for LitByteStr {} +impl Eq for crate::LitByteStr {} #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for LitChar {} +impl Eq for crate::LitChar {} #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for LitFloat {} +impl Eq for crate::LitFloat {} #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for LitInt {} +impl Eq for crate::LitInt {} #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for LitStr {} +impl Eq for crate::LitStr {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Local {} +impl Eq for crate::Local {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Local { +impl PartialEq for crate::Local { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.pat == other.pat && self.init == other.init } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for LocalInit {} +impl Eq for crate::LocalInit {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for LocalInit { +impl PartialEq for crate::LocalInit { fn eq(&self, other: &Self) -> bool { self.expr == other.expr && self.diverge == other.diverge } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Macro {} +impl Eq for crate::Macro {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Macro { +impl PartialEq for crate::Macro { fn eq(&self, other: &Self) -> bool { self.path == other.path && self.delimiter == other.delimiter && TokenStreamHelper(&self.tokens) == TokenStreamHelper(&other.tokens) @@ -1319,40 +1365,44 @@ impl PartialEq for Macro { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for MacroDelimiter {} +impl Eq for crate::MacroDelimiter {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for MacroDelimiter { +impl PartialEq for crate::MacroDelimiter { fn eq(&self, other: &Self) -> bool { match (self, other) { - (MacroDelimiter::Paren(_), MacroDelimiter::Paren(_)) => true, - (MacroDelimiter::Brace(_), MacroDelimiter::Brace(_)) => true, - (MacroDelimiter::Bracket(_), MacroDelimiter::Bracket(_)) => true, + (crate::MacroDelimiter::Paren(_), crate::MacroDelimiter::Paren(_)) => true, + (crate::MacroDelimiter::Brace(_), crate::MacroDelimiter::Brace(_)) => true, + (crate::MacroDelimiter::Bracket(_), crate::MacroDelimiter::Bracket(_)) => { + true + } _ => false, } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Meta {} +impl Eq for crate::Meta {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Meta { +impl PartialEq for crate::Meta { fn eq(&self, other: &Self) -> bool { match (self, other) { - (Meta::Path(self0), Meta::Path(other0)) => self0 == other0, - (Meta::List(self0), Meta::List(other0)) => self0 == other0, - (Meta::NameValue(self0), Meta::NameValue(other0)) => self0 == other0, + (crate::Meta::Path(self0), crate::Meta::Path(other0)) => self0 == other0, + (crate::Meta::List(self0), crate::Meta::List(other0)) => self0 == other0, + (crate::Meta::NameValue(self0), crate::Meta::NameValue(other0)) => { + self0 == other0 + } _ => false, } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for MetaList {} +impl Eq for crate::MetaList {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for MetaList { +impl PartialEq for crate::MetaList { fn eq(&self, other: &Self) -> bool { self.path == other.path && self.delimiter == other.delimiter && TokenStreamHelper(&self.tokens) == TokenStreamHelper(&other.tokens) @@ -1360,61 +1410,65 @@ impl PartialEq for MetaList { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for MetaNameValue {} +impl Eq for crate::MetaNameValue {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for MetaNameValue { +impl PartialEq for crate::MetaNameValue { fn eq(&self, other: &Self) -> bool { self.path == other.path && self.value == other.value } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ParenthesizedGenericArguments {} +impl Eq for crate::ParenthesizedGenericArguments {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ParenthesizedGenericArguments { +impl PartialEq for crate::ParenthesizedGenericArguments { fn eq(&self, other: &Self) -> bool { self.inputs == other.inputs && self.output == other.output } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Pat {} +impl Eq for crate::Pat {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Pat { +impl PartialEq for crate::Pat { fn eq(&self, other: &Self) -> bool { match (self, other) { - (Pat::Const(self0), Pat::Const(other0)) => self0 == other0, - (Pat::Ident(self0), Pat::Ident(other0)) => self0 == other0, - (Pat::Lit(self0), Pat::Lit(other0)) => self0 == other0, - (Pat::Macro(self0), Pat::Macro(other0)) => self0 == other0, - (Pat::Or(self0), Pat::Or(other0)) => self0 == other0, - (Pat::Paren(self0), Pat::Paren(other0)) => self0 == other0, - (Pat::Path(self0), Pat::Path(other0)) => self0 == other0, - (Pat::Range(self0), Pat::Range(other0)) => self0 == other0, - (Pat::Reference(self0), Pat::Reference(other0)) => self0 == other0, - (Pat::Rest(self0), Pat::Rest(other0)) => self0 == other0, - (Pat::Slice(self0), Pat::Slice(other0)) => self0 == other0, - (Pat::Struct(self0), Pat::Struct(other0)) => self0 == other0, - (Pat::Tuple(self0), Pat::Tuple(other0)) => self0 == other0, - (Pat::TupleStruct(self0), Pat::TupleStruct(other0)) => self0 == other0, - (Pat::Type(self0), Pat::Type(other0)) => self0 == other0, - (Pat::Verbatim(self0), Pat::Verbatim(other0)) => { + (crate::Pat::Const(self0), crate::Pat::Const(other0)) => self0 == other0, + (crate::Pat::Ident(self0), crate::Pat::Ident(other0)) => self0 == other0, + (crate::Pat::Lit(self0), crate::Pat::Lit(other0)) => self0 == other0, + (crate::Pat::Macro(self0), crate::Pat::Macro(other0)) => self0 == other0, + (crate::Pat::Or(self0), crate::Pat::Or(other0)) => self0 == other0, + (crate::Pat::Paren(self0), crate::Pat::Paren(other0)) => self0 == other0, + (crate::Pat::Path(self0), crate::Pat::Path(other0)) => self0 == other0, + (crate::Pat::Range(self0), crate::Pat::Range(other0)) => self0 == other0, + (crate::Pat::Reference(self0), crate::Pat::Reference(other0)) => { + self0 == other0 + } + (crate::Pat::Rest(self0), crate::Pat::Rest(other0)) => self0 == other0, + (crate::Pat::Slice(self0), crate::Pat::Slice(other0)) => self0 == other0, + (crate::Pat::Struct(self0), crate::Pat::Struct(other0)) => self0 == other0, + (crate::Pat::Tuple(self0), crate::Pat::Tuple(other0)) => self0 == other0, + (crate::Pat::TupleStruct(self0), crate::Pat::TupleStruct(other0)) => { + self0 == other0 + } + (crate::Pat::Type(self0), crate::Pat::Type(other0)) => self0 == other0, + (crate::Pat::Verbatim(self0), crate::Pat::Verbatim(other0)) => { TokenStreamHelper(self0) == TokenStreamHelper(other0) } - (Pat::Wild(self0), Pat::Wild(other0)) => self0 == other0, + (crate::Pat::Wild(self0), crate::Pat::Wild(other0)) => self0 == other0, _ => false, } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for PatIdent {} +impl Eq for crate::PatIdent {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for PatIdent { +impl PartialEq for crate::PatIdent { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.by_ref == other.by_ref && self.mutability == other.mutability && self.ident == other.ident @@ -1423,10 +1477,10 @@ impl PartialEq for PatIdent { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for PatOr {} +impl Eq for crate::PatOr {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for PatOr { +impl PartialEq for crate::PatOr { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.leading_vert == other.leading_vert && self.cases == other.cases @@ -1434,20 +1488,20 @@ impl PartialEq for PatOr { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for PatParen {} +impl Eq for crate::PatParen {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for PatParen { +impl PartialEq for crate::PatParen { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.pat == other.pat } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for PatReference {} +impl Eq for crate::PatReference {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for PatReference { +impl PartialEq for crate::PatReference { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.mutability == other.mutability && self.pat == other.pat @@ -1455,30 +1509,30 @@ impl PartialEq for PatReference { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for PatRest {} +impl Eq for crate::PatRest {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for PatRest { +impl PartialEq for crate::PatRest { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for PatSlice {} +impl Eq for crate::PatSlice {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for PatSlice { +impl PartialEq for crate::PatSlice { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.elems == other.elems } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for PatStruct {} +impl Eq for crate::PatStruct {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for PatStruct { +impl PartialEq for crate::PatStruct { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.qself == other.qself && self.path == other.path && self.fields == other.fields && self.rest == other.rest @@ -1486,20 +1540,20 @@ impl PartialEq for PatStruct { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for PatTuple {} +impl Eq for crate::PatTuple {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for PatTuple { +impl PartialEq for crate::PatTuple { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.elems == other.elems } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for PatTupleStruct {} +impl Eq for crate::PatTupleStruct {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for PatTupleStruct { +impl PartialEq for crate::PatTupleStruct { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.qself == other.qself && self.path == other.path && self.elems == other.elems @@ -1507,50 +1561,50 @@ impl PartialEq for PatTupleStruct { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for PatType {} +impl Eq for crate::PatType {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for PatType { +impl PartialEq for crate::PatType { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.pat == other.pat && self.ty == other.ty } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for PatWild {} +impl Eq for crate::PatWild {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for PatWild { +impl PartialEq for crate::PatWild { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Path {} +impl Eq for crate::Path {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Path { +impl PartialEq for crate::Path { fn eq(&self, other: &Self) -> bool { self.leading_colon == other.leading_colon && self.segments == other.segments } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for PathArguments {} +impl Eq for crate::PathArguments {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for PathArguments { +impl PartialEq for crate::PathArguments { fn eq(&self, other: &Self) -> bool { match (self, other) { - (PathArguments::None, PathArguments::None) => true, + (crate::PathArguments::None, crate::PathArguments::None) => true, ( - PathArguments::AngleBracketed(self0), - PathArguments::AngleBracketed(other0), + crate::PathArguments::AngleBracketed(self0), + crate::PathArguments::AngleBracketed(other0), ) => self0 == other0, ( - PathArguments::Parenthesized(self0), - PathArguments::Parenthesized(other0), + crate::PathArguments::Parenthesized(self0), + crate::PathArguments::Parenthesized(other0), ) => self0 == other0, _ => false, } @@ -1558,30 +1612,30 @@ impl PartialEq for PathArguments { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for PathSegment {} +impl Eq for crate::PathSegment {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for PathSegment { +impl PartialEq for crate::PathSegment { fn eq(&self, other: &Self) -> bool { self.ident == other.ident && self.arguments == other.arguments } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for PredicateLifetime {} +impl Eq for crate::PredicateLifetime {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for PredicateLifetime { +impl PartialEq for crate::PredicateLifetime { fn eq(&self, other: &Self) -> bool { self.lifetime == other.lifetime && self.bounds == other.bounds } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for PredicateType {} +impl Eq for crate::PredicateType {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for PredicateType { +impl PartialEq for crate::PredicateType { fn eq(&self, other: &Self) -> bool { self.lifetimes == other.lifetimes && self.bounded_ty == other.bounded_ty && self.bounds == other.bounds @@ -1589,10 +1643,10 @@ impl PartialEq for PredicateType { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for QSelf {} +impl Eq for crate::QSelf {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for QSelf { +impl PartialEq for crate::QSelf { fn eq(&self, other: &Self) -> bool { self.ty == other.ty && self.position == other.position && self.as_token == other.as_token @@ -1600,24 +1654,24 @@ impl PartialEq for QSelf { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for RangeLimits {} +impl Eq for crate::RangeLimits {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for RangeLimits { +impl PartialEq for crate::RangeLimits { fn eq(&self, other: &Self) -> bool { match (self, other) { - (RangeLimits::HalfOpen(_), RangeLimits::HalfOpen(_)) => true, - (RangeLimits::Closed(_), RangeLimits::Closed(_)) => true, + (crate::RangeLimits::HalfOpen(_), crate::RangeLimits::HalfOpen(_)) => true, + (crate::RangeLimits::Closed(_), crate::RangeLimits::Closed(_)) => true, _ => false, } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Receiver {} +impl Eq for crate::Receiver {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Receiver { +impl PartialEq for crate::Receiver { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.reference == other.reference && self.mutability == other.mutability @@ -1626,24 +1680,26 @@ impl PartialEq for Receiver { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for ReturnType {} +impl Eq for crate::ReturnType {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for ReturnType { +impl PartialEq for crate::ReturnType { fn eq(&self, other: &Self) -> bool { match (self, other) { - (ReturnType::Default, ReturnType::Default) => true, - (ReturnType::Type(_, self1), ReturnType::Type(_, other1)) => self1 == other1, + (crate::ReturnType::Default, crate::ReturnType::Default) => true, + (crate::ReturnType::Type(_, self1), crate::ReturnType::Type(_, other1)) => { + self1 == other1 + } _ => false, } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Signature {} +impl Eq for crate::Signature {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Signature { +impl PartialEq for crate::Signature { fn eq(&self, other: &Self) -> bool { self.constness == other.constness && self.asyncness == other.asyncness && self.unsafety == other.unsafety && self.abi == other.abi @@ -1654,42 +1710,42 @@ impl PartialEq for Signature { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for StaticMutability {} +impl Eq for crate::StaticMutability {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for StaticMutability { +impl PartialEq for crate::StaticMutability { fn eq(&self, other: &Self) -> bool { match (self, other) { - (StaticMutability::Mut(_), StaticMutability::Mut(_)) => true, - (StaticMutability::None, StaticMutability::None) => true, + (crate::StaticMutability::Mut(_), crate::StaticMutability::Mut(_)) => true, + (crate::StaticMutability::None, crate::StaticMutability::None) => true, _ => false, } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Stmt {} +impl Eq for crate::Stmt {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Stmt { +impl PartialEq for crate::Stmt { fn eq(&self, other: &Self) -> bool { match (self, other) { - (Stmt::Local(self0), Stmt::Local(other0)) => self0 == other0, - (Stmt::Item(self0), Stmt::Item(other0)) => self0 == other0, - (Stmt::Expr(self0, self1), Stmt::Expr(other0, other1)) => { + (crate::Stmt::Local(self0), crate::Stmt::Local(other0)) => self0 == other0, + (crate::Stmt::Item(self0), crate::Stmt::Item(other0)) => self0 == other0, + (crate::Stmt::Expr(self0, self1), crate::Stmt::Expr(other0, other1)) => { self0 == other0 && self1 == other1 } - (Stmt::Macro(self0), Stmt::Macro(other0)) => self0 == other0, + (crate::Stmt::Macro(self0), crate::Stmt::Macro(other0)) => self0 == other0, _ => false, } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for StmtMacro {} +impl Eq for crate::StmtMacro {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for StmtMacro { +impl PartialEq for crate::StmtMacro { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.mac == other.mac && self.semi_token == other.semi_token @@ -1697,10 +1753,10 @@ impl PartialEq for StmtMacro { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TraitBound {} +impl Eq for crate::TraitBound {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TraitBound { +impl PartialEq for crate::TraitBound { fn eq(&self, other: &Self) -> bool { self.paren_token == other.paren_token && self.modifier == other.modifier && self.lifetimes == other.lifetimes && self.path == other.path @@ -1708,31 +1764,42 @@ impl PartialEq for TraitBound { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TraitBoundModifier {} +impl Eq for crate::TraitBoundModifier {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TraitBoundModifier { +impl PartialEq for crate::TraitBoundModifier { fn eq(&self, other: &Self) -> bool { match (self, other) { - (TraitBoundModifier::None, TraitBoundModifier::None) => true, - (TraitBoundModifier::Maybe(_), TraitBoundModifier::Maybe(_)) => true, + (crate::TraitBoundModifier::None, crate::TraitBoundModifier::None) => true, + ( + crate::TraitBoundModifier::Maybe(_), + crate::TraitBoundModifier::Maybe(_), + ) => true, _ => false, } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TraitItem {} +impl Eq for crate::TraitItem {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TraitItem { +impl PartialEq for crate::TraitItem { fn eq(&self, other: &Self) -> bool { match (self, other) { - (TraitItem::Const(self0), TraitItem::Const(other0)) => self0 == other0, - (TraitItem::Fn(self0), TraitItem::Fn(other0)) => self0 == other0, - (TraitItem::Type(self0), TraitItem::Type(other0)) => self0 == other0, - (TraitItem::Macro(self0), TraitItem::Macro(other0)) => self0 == other0, - (TraitItem::Verbatim(self0), TraitItem::Verbatim(other0)) => { + (crate::TraitItem::Const(self0), crate::TraitItem::Const(other0)) => { + self0 == other0 + } + (crate::TraitItem::Fn(self0), crate::TraitItem::Fn(other0)) => { + self0 == other0 + } + (crate::TraitItem::Type(self0), crate::TraitItem::Type(other0)) => { + self0 == other0 + } + (crate::TraitItem::Macro(self0), crate::TraitItem::Macro(other0)) => { + self0 == other0 + } + (crate::TraitItem::Verbatim(self0), crate::TraitItem::Verbatim(other0)) => { TokenStreamHelper(self0) == TokenStreamHelper(other0) } _ => false, @@ -1741,10 +1808,10 @@ impl PartialEq for TraitItem { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TraitItemConst {} +impl Eq for crate::TraitItemConst {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TraitItemConst { +impl PartialEq for crate::TraitItemConst { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.ident == other.ident && self.generics == other.generics && self.ty == other.ty @@ -1753,10 +1820,10 @@ impl PartialEq for TraitItemConst { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TraitItemFn {} +impl Eq for crate::TraitItemFn {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TraitItemFn { +impl PartialEq for crate::TraitItemFn { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.sig == other.sig && self.default == other.default && self.semi_token == other.semi_token @@ -1764,10 +1831,10 @@ impl PartialEq for TraitItemFn { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TraitItemMacro {} +impl Eq for crate::TraitItemMacro {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TraitItemMacro { +impl PartialEq for crate::TraitItemMacro { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.mac == other.mac && self.semi_token == other.semi_token @@ -1775,10 +1842,10 @@ impl PartialEq for TraitItemMacro { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TraitItemType {} +impl Eq for crate::TraitItemType {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TraitItemType { +impl PartialEq for crate::TraitItemType { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.ident == other.ident && self.generics == other.generics && self.colon_token == other.colon_token @@ -1787,27 +1854,33 @@ impl PartialEq for TraitItemType { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Type {} +impl Eq for crate::Type {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Type { +impl PartialEq for crate::Type { fn eq(&self, other: &Self) -> bool { match (self, other) { - (Type::Array(self0), Type::Array(other0)) => self0 == other0, - (Type::BareFn(self0), Type::BareFn(other0)) => self0 == other0, - (Type::Group(self0), Type::Group(other0)) => self0 == other0, - (Type::ImplTrait(self0), Type::ImplTrait(other0)) => self0 == other0, - (Type::Infer(self0), Type::Infer(other0)) => self0 == other0, - (Type::Macro(self0), Type::Macro(other0)) => self0 == other0, - (Type::Never(self0), Type::Never(other0)) => self0 == other0, - (Type::Paren(self0), Type::Paren(other0)) => self0 == other0, - (Type::Path(self0), Type::Path(other0)) => self0 == other0, - (Type::Ptr(self0), Type::Ptr(other0)) => self0 == other0, - (Type::Reference(self0), Type::Reference(other0)) => self0 == other0, - (Type::Slice(self0), Type::Slice(other0)) => self0 == other0, - (Type::TraitObject(self0), Type::TraitObject(other0)) => self0 == other0, - (Type::Tuple(self0), Type::Tuple(other0)) => self0 == other0, - (Type::Verbatim(self0), Type::Verbatim(other0)) => { + (crate::Type::Array(self0), crate::Type::Array(other0)) => self0 == other0, + (crate::Type::BareFn(self0), crate::Type::BareFn(other0)) => self0 == other0, + (crate::Type::Group(self0), crate::Type::Group(other0)) => self0 == other0, + (crate::Type::ImplTrait(self0), crate::Type::ImplTrait(other0)) => { + self0 == other0 + } + (crate::Type::Infer(self0), crate::Type::Infer(other0)) => self0 == other0, + (crate::Type::Macro(self0), crate::Type::Macro(other0)) => self0 == other0, + (crate::Type::Never(self0), crate::Type::Never(other0)) => self0 == other0, + (crate::Type::Paren(self0), crate::Type::Paren(other0)) => self0 == other0, + (crate::Type::Path(self0), crate::Type::Path(other0)) => self0 == other0, + (crate::Type::Ptr(self0), crate::Type::Ptr(other0)) => self0 == other0, + (crate::Type::Reference(self0), crate::Type::Reference(other0)) => { + self0 == other0 + } + (crate::Type::Slice(self0), crate::Type::Slice(other0)) => self0 == other0, + (crate::Type::TraitObject(self0), crate::Type::TraitObject(other0)) => { + self0 == other0 + } + (crate::Type::Tuple(self0), crate::Type::Tuple(other0)) => self0 == other0, + (crate::Type::Verbatim(self0), crate::Type::Verbatim(other0)) => { TokenStreamHelper(self0) == TokenStreamHelper(other0) } _ => false, @@ -1816,20 +1889,20 @@ impl PartialEq for Type { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypeArray {} +impl Eq for crate::TypeArray {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypeArray { +impl PartialEq for crate::TypeArray { fn eq(&self, other: &Self) -> bool { self.elem == other.elem && self.len == other.len } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypeBareFn {} +impl Eq for crate::TypeBareFn {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypeBareFn { +impl PartialEq for crate::TypeBareFn { fn eq(&self, other: &Self) -> bool { self.lifetimes == other.lifetimes && self.unsafety == other.unsafety && self.abi == other.abi && self.inputs == other.inputs @@ -1838,60 +1911,60 @@ impl PartialEq for TypeBareFn { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypeGroup {} +impl Eq for crate::TypeGroup {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypeGroup { +impl PartialEq for crate::TypeGroup { fn eq(&self, other: &Self) -> bool { self.elem == other.elem } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypeImplTrait {} +impl Eq for crate::TypeImplTrait {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypeImplTrait { +impl PartialEq for crate::TypeImplTrait { fn eq(&self, other: &Self) -> bool { self.bounds == other.bounds } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypeInfer {} +impl Eq for crate::TypeInfer {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypeInfer { +impl PartialEq for crate::TypeInfer { fn eq(&self, _other: &Self) -> bool { true } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypeMacro {} +impl Eq for crate::TypeMacro {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypeMacro { +impl PartialEq for crate::TypeMacro { fn eq(&self, other: &Self) -> bool { self.mac == other.mac } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypeNever {} +impl Eq for crate::TypeNever {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypeNever { +impl PartialEq for crate::TypeNever { fn eq(&self, _other: &Self) -> bool { true } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypeParam {} +impl Eq for crate::TypeParam {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypeParam { +impl PartialEq for crate::TypeParam { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.ident == other.ident && self.colon_token == other.colon_token && self.bounds == other.bounds @@ -1900,51 +1973,54 @@ impl PartialEq for TypeParam { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypeParamBound {} +impl Eq for crate::TypeParamBound {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypeParamBound { +impl PartialEq for crate::TypeParamBound { fn eq(&self, other: &Self) -> bool { match (self, other) { - (TypeParamBound::Trait(self0), TypeParamBound::Trait(other0)) => { - self0 == other0 - } - (TypeParamBound::Lifetime(self0), TypeParamBound::Lifetime(other0)) => { - self0 == other0 - } - (TypeParamBound::Verbatim(self0), TypeParamBound::Verbatim(other0)) => { - TokenStreamHelper(self0) == TokenStreamHelper(other0) - } + ( + crate::TypeParamBound::Trait(self0), + crate::TypeParamBound::Trait(other0), + ) => self0 == other0, + ( + crate::TypeParamBound::Lifetime(self0), + crate::TypeParamBound::Lifetime(other0), + ) => self0 == other0, + ( + crate::TypeParamBound::Verbatim(self0), + crate::TypeParamBound::Verbatim(other0), + ) => TokenStreamHelper(self0) == TokenStreamHelper(other0), _ => false, } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypeParen {} +impl Eq for crate::TypeParen {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypeParen { +impl PartialEq for crate::TypeParen { fn eq(&self, other: &Self) -> bool { self.elem == other.elem } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypePath {} +impl Eq for crate::TypePath {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypePath { +impl PartialEq for crate::TypePath { fn eq(&self, other: &Self) -> bool { self.qself == other.qself && self.path == other.path } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypePtr {} +impl Eq for crate::TypePtr {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypePtr { +impl PartialEq for crate::TypePtr { fn eq(&self, other: &Self) -> bool { self.const_token == other.const_token && self.mutability == other.mutability && self.elem == other.elem @@ -1952,10 +2028,10 @@ impl PartialEq for TypePtr { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypeReference {} +impl Eq for crate::TypeReference {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypeReference { +impl PartialEq for crate::TypeReference { fn eq(&self, other: &Self) -> bool { self.lifetime == other.lifetime && self.mutability == other.mutability && self.elem == other.elem @@ -1963,132 +2039,142 @@ impl PartialEq for TypeReference { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypeSlice {} +impl Eq for crate::TypeSlice {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypeSlice { +impl PartialEq for crate::TypeSlice { fn eq(&self, other: &Self) -> bool { self.elem == other.elem } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypeTraitObject {} +impl Eq for crate::TypeTraitObject {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypeTraitObject { +impl PartialEq for crate::TypeTraitObject { fn eq(&self, other: &Self) -> bool { self.dyn_token == other.dyn_token && self.bounds == other.bounds } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for TypeTuple {} +impl Eq for crate::TypeTuple {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for TypeTuple { +impl PartialEq for crate::TypeTuple { fn eq(&self, other: &Self) -> bool { self.elems == other.elems } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for UnOp {} +impl Eq for crate::UnOp {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for UnOp { +impl PartialEq for crate::UnOp { fn eq(&self, other: &Self) -> bool { match (self, other) { - (UnOp::Deref(_), UnOp::Deref(_)) => true, - (UnOp::Not(_), UnOp::Not(_)) => true, - (UnOp::Neg(_), UnOp::Neg(_)) => true, + (crate::UnOp::Deref(_), crate::UnOp::Deref(_)) => true, + (crate::UnOp::Not(_), crate::UnOp::Not(_)) => true, + (crate::UnOp::Neg(_), crate::UnOp::Neg(_)) => true, _ => false, } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for UseGlob {} +impl Eq for crate::UseGlob {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for UseGlob { +impl PartialEq for crate::UseGlob { fn eq(&self, _other: &Self) -> bool { true } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for UseGroup {} +impl Eq for crate::UseGroup {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for UseGroup { +impl PartialEq for crate::UseGroup { fn eq(&self, other: &Self) -> bool { self.items == other.items } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for UseName {} +impl Eq for crate::UseName {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for UseName { +impl PartialEq for crate::UseName { fn eq(&self, other: &Self) -> bool { self.ident == other.ident } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for UsePath {} +impl Eq for crate::UsePath {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for UsePath { +impl PartialEq for crate::UsePath { fn eq(&self, other: &Self) -> bool { self.ident == other.ident && self.tree == other.tree } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for UseRename {} +impl Eq for crate::UseRename {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for UseRename { +impl PartialEq for crate::UseRename { fn eq(&self, other: &Self) -> bool { self.ident == other.ident && self.rename == other.rename } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for UseTree {} +impl Eq for crate::UseTree {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for UseTree { +impl PartialEq for crate::UseTree { fn eq(&self, other: &Self) -> bool { match (self, other) { - (UseTree::Path(self0), UseTree::Path(other0)) => self0 == other0, - (UseTree::Name(self0), UseTree::Name(other0)) => self0 == other0, - (UseTree::Rename(self0), UseTree::Rename(other0)) => self0 == other0, - (UseTree::Glob(self0), UseTree::Glob(other0)) => self0 == other0, - (UseTree::Group(self0), UseTree::Group(other0)) => self0 == other0, + (crate::UseTree::Path(self0), crate::UseTree::Path(other0)) => { + self0 == other0 + } + (crate::UseTree::Name(self0), crate::UseTree::Name(other0)) => { + self0 == other0 + } + (crate::UseTree::Rename(self0), crate::UseTree::Rename(other0)) => { + self0 == other0 + } + (crate::UseTree::Glob(self0), crate::UseTree::Glob(other0)) => { + self0 == other0 + } + (crate::UseTree::Group(self0), crate::UseTree::Group(other0)) => { + self0 == other0 + } _ => false, } } } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Variadic {} +impl Eq for crate::Variadic {} #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Variadic { +impl PartialEq for crate::Variadic { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.pat == other.pat && self.comma == other.comma } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Variant {} +impl Eq for crate::Variant {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Variant { +impl PartialEq for crate::Variant { fn eq(&self, other: &Self) -> bool { self.attrs == other.attrs && self.ident == other.ident && self.fields == other.fields && self.discriminant == other.discriminant @@ -2096,53 +2182,55 @@ impl PartialEq for Variant { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for VisRestricted {} +impl Eq for crate::VisRestricted {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for VisRestricted { +impl PartialEq for crate::VisRestricted { fn eq(&self, other: &Self) -> bool { self.in_token == other.in_token && self.path == other.path } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for Visibility {} +impl Eq for crate::Visibility {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for Visibility { +impl PartialEq for crate::Visibility { fn eq(&self, other: &Self) -> bool { match (self, other) { - (Visibility::Public(_), Visibility::Public(_)) => true, - (Visibility::Restricted(self0), Visibility::Restricted(other0)) => { - self0 == other0 - } - (Visibility::Inherited, Visibility::Inherited) => true, + (crate::Visibility::Public(_), crate::Visibility::Public(_)) => true, + ( + crate::Visibility::Restricted(self0), + crate::Visibility::Restricted(other0), + ) => self0 == other0, + (crate::Visibility::Inherited, crate::Visibility::Inherited) => true, _ => false, } } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for WhereClause {} +impl Eq for crate::WhereClause {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for WhereClause { +impl PartialEq for crate::WhereClause { fn eq(&self, other: &Self) -> bool { self.predicates == other.predicates } } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Eq for WherePredicate {} +impl Eq for crate::WherePredicate {} #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl PartialEq for WherePredicate { +impl PartialEq for crate::WherePredicate { fn eq(&self, other: &Self) -> bool { match (self, other) { - (WherePredicate::Lifetime(self0), WherePredicate::Lifetime(other0)) => { - self0 == other0 - } - (WherePredicate::Type(self0), WherePredicate::Type(other0)) => { + ( + crate::WherePredicate::Lifetime(self0), + crate::WherePredicate::Lifetime(other0), + ) => self0 == other0, + (crate::WherePredicate::Type(self0), crate::WherePredicate::Type(other0)) => { self0 == other0 } _ => false, diff --git a/vendor/syn/src/gen/fold.rs b/vendor/syn/src/gen/fold.rs index 624c15b..872ffe1 100644 --- a/vendor/syn/src/gen/fold.rs +++ b/vendor/syn/src/gen/fold.rs @@ -2,11 +2,13 @@ // It is not intended for manual editing. #![allow(unreachable_code, unused_variables)] -#![allow(clippy::match_wildcard_for_single_variants, clippy::needless_match)] +#![allow( + clippy::match_wildcard_for_single_variants, + clippy::needless_match, + clippy::needless_pass_by_ref_mut, +)] #[cfg(any(feature = "full", feature = "derive"))] -use crate::gen::helper::fold::*; -use crate::*; -use proc_macro2::Span; +use crate::gen::helper::fold::FoldHelper; #[cfg(feature = "full")] macro_rules! full { ($e:expr) => { @@ -26,751 +28,984 @@ macro_rules! full { /// [module documentation]: self pub trait Fold { #[cfg(any(feature = "derive", feature = "full"))] - fn fold_abi(&mut self, i: Abi) -> Abi { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_abi(&mut self, i: crate::Abi) -> crate::Abi { fold_abi(self, i) } #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] fn fold_angle_bracketed_generic_arguments( &mut self, - i: AngleBracketedGenericArguments, - ) -> AngleBracketedGenericArguments { + i: crate::AngleBracketedGenericArguments, + ) -> crate::AngleBracketedGenericArguments { fold_angle_bracketed_generic_arguments(self, i) } #[cfg(feature = "full")] - fn fold_arm(&mut self, i: Arm) -> Arm { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_arm(&mut self, i: crate::Arm) -> crate::Arm { fold_arm(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_assoc_const(&mut self, i: AssocConst) -> AssocConst { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_assoc_const(&mut self, i: crate::AssocConst) -> crate::AssocConst { fold_assoc_const(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_assoc_type(&mut self, i: AssocType) -> AssocType { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_assoc_type(&mut self, i: crate::AssocType) -> crate::AssocType { fold_assoc_type(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_attr_style(&mut self, i: AttrStyle) -> AttrStyle { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_attr_style(&mut self, i: crate::AttrStyle) -> crate::AttrStyle { fold_attr_style(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_attribute(&mut self, i: Attribute) -> Attribute { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_attribute(&mut self, i: crate::Attribute) -> crate::Attribute { fold_attribute(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_bare_fn_arg(&mut self, i: BareFnArg) -> BareFnArg { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_bare_fn_arg(&mut self, i: crate::BareFnArg) -> crate::BareFnArg { fold_bare_fn_arg(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_bare_variadic(&mut self, i: BareVariadic) -> BareVariadic { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_bare_variadic(&mut self, i: crate::BareVariadic) -> crate::BareVariadic { fold_bare_variadic(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_bin_op(&mut self, i: BinOp) -> BinOp { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_bin_op(&mut self, i: crate::BinOp) -> crate::BinOp { fold_bin_op(self, i) } #[cfg(feature = "full")] - fn fold_block(&mut self, i: Block) -> Block { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_block(&mut self, i: crate::Block) -> crate::Block { fold_block(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_bound_lifetimes(&mut self, i: BoundLifetimes) -> BoundLifetimes { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_bound_lifetimes( + &mut self, + i: crate::BoundLifetimes, + ) -> crate::BoundLifetimes { fold_bound_lifetimes(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_const_param(&mut self, i: ConstParam) -> ConstParam { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_const_param(&mut self, i: crate::ConstParam) -> crate::ConstParam { fold_const_param(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_constraint(&mut self, i: Constraint) -> Constraint { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_constraint(&mut self, i: crate::Constraint) -> crate::Constraint { fold_constraint(self, i) } #[cfg(feature = "derive")] - fn fold_data(&mut self, i: Data) -> Data { + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn fold_data(&mut self, i: crate::Data) -> crate::Data { fold_data(self, i) } #[cfg(feature = "derive")] - fn fold_data_enum(&mut self, i: DataEnum) -> DataEnum { + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn fold_data_enum(&mut self, i: crate::DataEnum) -> crate::DataEnum { fold_data_enum(self, i) } #[cfg(feature = "derive")] - fn fold_data_struct(&mut self, i: DataStruct) -> DataStruct { + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn fold_data_struct(&mut self, i: crate::DataStruct) -> crate::DataStruct { fold_data_struct(self, i) } #[cfg(feature = "derive")] - fn fold_data_union(&mut self, i: DataUnion) -> DataUnion { + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn fold_data_union(&mut self, i: crate::DataUnion) -> crate::DataUnion { fold_data_union(self, i) } #[cfg(feature = "derive")] - fn fold_derive_input(&mut self, i: DeriveInput) -> DeriveInput { + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn fold_derive_input(&mut self, i: crate::DeriveInput) -> crate::DeriveInput { fold_derive_input(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_expr(&mut self, i: Expr) -> Expr { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr(&mut self, i: crate::Expr) -> crate::Expr { fold_expr(self, i) } #[cfg(feature = "full")] - fn fold_expr_array(&mut self, i: ExprArray) -> ExprArray { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_array(&mut self, i: crate::ExprArray) -> crate::ExprArray { fold_expr_array(self, i) } #[cfg(feature = "full")] - fn fold_expr_assign(&mut self, i: ExprAssign) -> ExprAssign { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_assign(&mut self, i: crate::ExprAssign) -> crate::ExprAssign { fold_expr_assign(self, i) } #[cfg(feature = "full")] - fn fold_expr_async(&mut self, i: ExprAsync) -> ExprAsync { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_async(&mut self, i: crate::ExprAsync) -> crate::ExprAsync { fold_expr_async(self, i) } #[cfg(feature = "full")] - fn fold_expr_await(&mut self, i: ExprAwait) -> ExprAwait { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_await(&mut self, i: crate::ExprAwait) -> crate::ExprAwait { fold_expr_await(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_expr_binary(&mut self, i: ExprBinary) -> ExprBinary { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_binary(&mut self, i: crate::ExprBinary) -> crate::ExprBinary { fold_expr_binary(self, i) } #[cfg(feature = "full")] - fn fold_expr_block(&mut self, i: ExprBlock) -> ExprBlock { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_block(&mut self, i: crate::ExprBlock) -> crate::ExprBlock { fold_expr_block(self, i) } #[cfg(feature = "full")] - fn fold_expr_break(&mut self, i: ExprBreak) -> ExprBreak { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_break(&mut self, i: crate::ExprBreak) -> crate::ExprBreak { fold_expr_break(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_expr_call(&mut self, i: ExprCall) -> ExprCall { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_call(&mut self, i: crate::ExprCall) -> crate::ExprCall { fold_expr_call(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_expr_cast(&mut self, i: ExprCast) -> ExprCast { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_cast(&mut self, i: crate::ExprCast) -> crate::ExprCast { fold_expr_cast(self, i) } #[cfg(feature = "full")] - fn fold_expr_closure(&mut self, i: ExprClosure) -> ExprClosure { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_closure(&mut self, i: crate::ExprClosure) -> crate::ExprClosure { fold_expr_closure(self, i) } #[cfg(feature = "full")] - fn fold_expr_const(&mut self, i: ExprConst) -> ExprConst { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_const(&mut self, i: crate::ExprConst) -> crate::ExprConst { fold_expr_const(self, i) } #[cfg(feature = "full")] - fn fold_expr_continue(&mut self, i: ExprContinue) -> ExprContinue { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_continue(&mut self, i: crate::ExprContinue) -> crate::ExprContinue { fold_expr_continue(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_expr_field(&mut self, i: ExprField) -> ExprField { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_field(&mut self, i: crate::ExprField) -> crate::ExprField { fold_expr_field(self, i) } #[cfg(feature = "full")] - fn fold_expr_for_loop(&mut self, i: ExprForLoop) -> ExprForLoop { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_for_loop(&mut self, i: crate::ExprForLoop) -> crate::ExprForLoop { fold_expr_for_loop(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_expr_group(&mut self, i: ExprGroup) -> ExprGroup { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_group(&mut self, i: crate::ExprGroup) -> crate::ExprGroup { fold_expr_group(self, i) } #[cfg(feature = "full")] - fn fold_expr_if(&mut self, i: ExprIf) -> ExprIf { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_if(&mut self, i: crate::ExprIf) -> crate::ExprIf { fold_expr_if(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_expr_index(&mut self, i: ExprIndex) -> ExprIndex { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_index(&mut self, i: crate::ExprIndex) -> crate::ExprIndex { fold_expr_index(self, i) } #[cfg(feature = "full")] - fn fold_expr_infer(&mut self, i: ExprInfer) -> ExprInfer { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_infer(&mut self, i: crate::ExprInfer) -> crate::ExprInfer { fold_expr_infer(self, i) } #[cfg(feature = "full")] - fn fold_expr_let(&mut self, i: ExprLet) -> ExprLet { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_let(&mut self, i: crate::ExprLet) -> crate::ExprLet { fold_expr_let(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_expr_lit(&mut self, i: ExprLit) -> ExprLit { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_lit(&mut self, i: crate::ExprLit) -> crate::ExprLit { fold_expr_lit(self, i) } #[cfg(feature = "full")] - fn fold_expr_loop(&mut self, i: ExprLoop) -> ExprLoop { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_loop(&mut self, i: crate::ExprLoop) -> crate::ExprLoop { fold_expr_loop(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_expr_macro(&mut self, i: ExprMacro) -> ExprMacro { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_macro(&mut self, i: crate::ExprMacro) -> crate::ExprMacro { fold_expr_macro(self, i) } #[cfg(feature = "full")] - fn fold_expr_match(&mut self, i: ExprMatch) -> ExprMatch { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_match(&mut self, i: crate::ExprMatch) -> crate::ExprMatch { fold_expr_match(self, i) } - #[cfg(feature = "full")] - fn fold_expr_method_call(&mut self, i: ExprMethodCall) -> ExprMethodCall { + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_method_call( + &mut self, + i: crate::ExprMethodCall, + ) -> crate::ExprMethodCall { fold_expr_method_call(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_expr_paren(&mut self, i: ExprParen) -> ExprParen { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_paren(&mut self, i: crate::ExprParen) -> crate::ExprParen { fold_expr_paren(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_expr_path(&mut self, i: ExprPath) -> ExprPath { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_path(&mut self, i: crate::ExprPath) -> crate::ExprPath { fold_expr_path(self, i) } #[cfg(feature = "full")] - fn fold_expr_range(&mut self, i: ExprRange) -> ExprRange { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_range(&mut self, i: crate::ExprRange) -> crate::ExprRange { fold_expr_range(self, i) } - #[cfg(feature = "full")] - fn fold_expr_reference(&mut self, i: ExprReference) -> ExprReference { + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_reference(&mut self, i: crate::ExprReference) -> crate::ExprReference { fold_expr_reference(self, i) } #[cfg(feature = "full")] - fn fold_expr_repeat(&mut self, i: ExprRepeat) -> ExprRepeat { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_repeat(&mut self, i: crate::ExprRepeat) -> crate::ExprRepeat { fold_expr_repeat(self, i) } #[cfg(feature = "full")] - fn fold_expr_return(&mut self, i: ExprReturn) -> ExprReturn { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_return(&mut self, i: crate::ExprReturn) -> crate::ExprReturn { fold_expr_return(self, i) } - #[cfg(feature = "full")] - fn fold_expr_struct(&mut self, i: ExprStruct) -> ExprStruct { + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_struct(&mut self, i: crate::ExprStruct) -> crate::ExprStruct { fold_expr_struct(self, i) } #[cfg(feature = "full")] - fn fold_expr_try(&mut self, i: ExprTry) -> ExprTry { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_try(&mut self, i: crate::ExprTry) -> crate::ExprTry { fold_expr_try(self, i) } #[cfg(feature = "full")] - fn fold_expr_try_block(&mut self, i: ExprTryBlock) -> ExprTryBlock { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_try_block(&mut self, i: crate::ExprTryBlock) -> crate::ExprTryBlock { fold_expr_try_block(self, i) } #[cfg(feature = "full")] - fn fold_expr_tuple(&mut self, i: ExprTuple) -> ExprTuple { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_tuple(&mut self, i: crate::ExprTuple) -> crate::ExprTuple { fold_expr_tuple(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_expr_unary(&mut self, i: ExprUnary) -> ExprUnary { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_expr_unary(&mut self, i: crate::ExprUnary) -> crate::ExprUnary { fold_expr_unary(self, i) } #[cfg(feature = "full")] - fn fold_expr_unsafe(&mut self, i: ExprUnsafe) -> ExprUnsafe { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_unsafe(&mut self, i: crate::ExprUnsafe) -> crate::ExprUnsafe { fold_expr_unsafe(self, i) } #[cfg(feature = "full")] - fn fold_expr_while(&mut self, i: ExprWhile) -> ExprWhile { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_while(&mut self, i: crate::ExprWhile) -> crate::ExprWhile { fold_expr_while(self, i) } #[cfg(feature = "full")] - fn fold_expr_yield(&mut self, i: ExprYield) -> ExprYield { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_expr_yield(&mut self, i: crate::ExprYield) -> crate::ExprYield { fold_expr_yield(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_field(&mut self, i: Field) -> Field { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_field(&mut self, i: crate::Field) -> crate::Field { fold_field(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_field_mutability(&mut self, i: FieldMutability) -> FieldMutability { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_field_mutability( + &mut self, + i: crate::FieldMutability, + ) -> crate::FieldMutability { fold_field_mutability(self, i) } #[cfg(feature = "full")] - fn fold_field_pat(&mut self, i: FieldPat) -> FieldPat { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_field_pat(&mut self, i: crate::FieldPat) -> crate::FieldPat { fold_field_pat(self, i) } - #[cfg(feature = "full")] - fn fold_field_value(&mut self, i: FieldValue) -> FieldValue { + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_field_value(&mut self, i: crate::FieldValue) -> crate::FieldValue { fold_field_value(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_fields(&mut self, i: Fields) -> Fields { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_fields(&mut self, i: crate::Fields) -> crate::Fields { fold_fields(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_fields_named(&mut self, i: FieldsNamed) -> FieldsNamed { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_fields_named(&mut self, i: crate::FieldsNamed) -> crate::FieldsNamed { fold_fields_named(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_fields_unnamed(&mut self, i: FieldsUnnamed) -> FieldsUnnamed { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_fields_unnamed(&mut self, i: crate::FieldsUnnamed) -> crate::FieldsUnnamed { fold_fields_unnamed(self, i) } #[cfg(feature = "full")] - fn fold_file(&mut self, i: File) -> File { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_file(&mut self, i: crate::File) -> crate::File { fold_file(self, i) } #[cfg(feature = "full")] - fn fold_fn_arg(&mut self, i: FnArg) -> FnArg { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_fn_arg(&mut self, i: crate::FnArg) -> crate::FnArg { fold_fn_arg(self, i) } #[cfg(feature = "full")] - fn fold_foreign_item(&mut self, i: ForeignItem) -> ForeignItem { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_foreign_item(&mut self, i: crate::ForeignItem) -> crate::ForeignItem { fold_foreign_item(self, i) } #[cfg(feature = "full")] - fn fold_foreign_item_fn(&mut self, i: ForeignItemFn) -> ForeignItemFn { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_foreign_item_fn(&mut self, i: crate::ForeignItemFn) -> crate::ForeignItemFn { fold_foreign_item_fn(self, i) } #[cfg(feature = "full")] - fn fold_foreign_item_macro(&mut self, i: ForeignItemMacro) -> ForeignItemMacro { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_foreign_item_macro( + &mut self, + i: crate::ForeignItemMacro, + ) -> crate::ForeignItemMacro { fold_foreign_item_macro(self, i) } #[cfg(feature = "full")] - fn fold_foreign_item_static(&mut self, i: ForeignItemStatic) -> ForeignItemStatic { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_foreign_item_static( + &mut self, + i: crate::ForeignItemStatic, + ) -> crate::ForeignItemStatic { fold_foreign_item_static(self, i) } #[cfg(feature = "full")] - fn fold_foreign_item_type(&mut self, i: ForeignItemType) -> ForeignItemType { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_foreign_item_type( + &mut self, + i: crate::ForeignItemType, + ) -> crate::ForeignItemType { fold_foreign_item_type(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_generic_argument(&mut self, i: GenericArgument) -> GenericArgument { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_generic_argument( + &mut self, + i: crate::GenericArgument, + ) -> crate::GenericArgument { fold_generic_argument(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_generic_param(&mut self, i: GenericParam) -> GenericParam { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_generic_param(&mut self, i: crate::GenericParam) -> crate::GenericParam { fold_generic_param(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_generics(&mut self, i: Generics) -> Generics { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_generics(&mut self, i: crate::Generics) -> crate::Generics { fold_generics(self, i) } - fn fold_ident(&mut self, i: Ident) -> Ident { + fn fold_ident(&mut self, i: proc_macro2::Ident) -> proc_macro2::Ident { fold_ident(self, i) } #[cfg(feature = "full")] - fn fold_impl_item(&mut self, i: ImplItem) -> ImplItem { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_impl_item(&mut self, i: crate::ImplItem) -> crate::ImplItem { fold_impl_item(self, i) } #[cfg(feature = "full")] - fn fold_impl_item_const(&mut self, i: ImplItemConst) -> ImplItemConst { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_impl_item_const(&mut self, i: crate::ImplItemConst) -> crate::ImplItemConst { fold_impl_item_const(self, i) } #[cfg(feature = "full")] - fn fold_impl_item_fn(&mut self, i: ImplItemFn) -> ImplItemFn { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_impl_item_fn(&mut self, i: crate::ImplItemFn) -> crate::ImplItemFn { fold_impl_item_fn(self, i) } #[cfg(feature = "full")] - fn fold_impl_item_macro(&mut self, i: ImplItemMacro) -> ImplItemMacro { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_impl_item_macro(&mut self, i: crate::ImplItemMacro) -> crate::ImplItemMacro { fold_impl_item_macro(self, i) } #[cfg(feature = "full")] - fn fold_impl_item_type(&mut self, i: ImplItemType) -> ImplItemType { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_impl_item_type(&mut self, i: crate::ImplItemType) -> crate::ImplItemType { fold_impl_item_type(self, i) } #[cfg(feature = "full")] - fn fold_impl_restriction(&mut self, i: ImplRestriction) -> ImplRestriction { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_impl_restriction( + &mut self, + i: crate::ImplRestriction, + ) -> crate::ImplRestriction { fold_impl_restriction(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_index(&mut self, i: Index) -> Index { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_index(&mut self, i: crate::Index) -> crate::Index { fold_index(self, i) } #[cfg(feature = "full")] - fn fold_item(&mut self, i: Item) -> Item { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item(&mut self, i: crate::Item) -> crate::Item { fold_item(self, i) } #[cfg(feature = "full")] - fn fold_item_const(&mut self, i: ItemConst) -> ItemConst { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_const(&mut self, i: crate::ItemConst) -> crate::ItemConst { fold_item_const(self, i) } #[cfg(feature = "full")] - fn fold_item_enum(&mut self, i: ItemEnum) -> ItemEnum { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_enum(&mut self, i: crate::ItemEnum) -> crate::ItemEnum { fold_item_enum(self, i) } #[cfg(feature = "full")] - fn fold_item_extern_crate(&mut self, i: ItemExternCrate) -> ItemExternCrate { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_extern_crate( + &mut self, + i: crate::ItemExternCrate, + ) -> crate::ItemExternCrate { fold_item_extern_crate(self, i) } #[cfg(feature = "full")] - fn fold_item_fn(&mut self, i: ItemFn) -> ItemFn { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_fn(&mut self, i: crate::ItemFn) -> crate::ItemFn { fold_item_fn(self, i) } #[cfg(feature = "full")] - fn fold_item_foreign_mod(&mut self, i: ItemForeignMod) -> ItemForeignMod { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_foreign_mod( + &mut self, + i: crate::ItemForeignMod, + ) -> crate::ItemForeignMod { fold_item_foreign_mod(self, i) } #[cfg(feature = "full")] - fn fold_item_impl(&mut self, i: ItemImpl) -> ItemImpl { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_impl(&mut self, i: crate::ItemImpl) -> crate::ItemImpl { fold_item_impl(self, i) } #[cfg(feature = "full")] - fn fold_item_macro(&mut self, i: ItemMacro) -> ItemMacro { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_macro(&mut self, i: crate::ItemMacro) -> crate::ItemMacro { fold_item_macro(self, i) } #[cfg(feature = "full")] - fn fold_item_mod(&mut self, i: ItemMod) -> ItemMod { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_mod(&mut self, i: crate::ItemMod) -> crate::ItemMod { fold_item_mod(self, i) } #[cfg(feature = "full")] - fn fold_item_static(&mut self, i: ItemStatic) -> ItemStatic { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_static(&mut self, i: crate::ItemStatic) -> crate::ItemStatic { fold_item_static(self, i) } #[cfg(feature = "full")] - fn fold_item_struct(&mut self, i: ItemStruct) -> ItemStruct { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_struct(&mut self, i: crate::ItemStruct) -> crate::ItemStruct { fold_item_struct(self, i) } #[cfg(feature = "full")] - fn fold_item_trait(&mut self, i: ItemTrait) -> ItemTrait { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_trait(&mut self, i: crate::ItemTrait) -> crate::ItemTrait { fold_item_trait(self, i) } #[cfg(feature = "full")] - fn fold_item_trait_alias(&mut self, i: ItemTraitAlias) -> ItemTraitAlias { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_trait_alias( + &mut self, + i: crate::ItemTraitAlias, + ) -> crate::ItemTraitAlias { fold_item_trait_alias(self, i) } #[cfg(feature = "full")] - fn fold_item_type(&mut self, i: ItemType) -> ItemType { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_type(&mut self, i: crate::ItemType) -> crate::ItemType { fold_item_type(self, i) } #[cfg(feature = "full")] - fn fold_item_union(&mut self, i: ItemUnion) -> ItemUnion { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_union(&mut self, i: crate::ItemUnion) -> crate::ItemUnion { fold_item_union(self, i) } #[cfg(feature = "full")] - fn fold_item_use(&mut self, i: ItemUse) -> ItemUse { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_item_use(&mut self, i: crate::ItemUse) -> crate::ItemUse { fold_item_use(self, i) } #[cfg(feature = "full")] - fn fold_label(&mut self, i: Label) -> Label { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_label(&mut self, i: crate::Label) -> crate::Label { fold_label(self, i) } - fn fold_lifetime(&mut self, i: Lifetime) -> Lifetime { + fn fold_lifetime(&mut self, i: crate::Lifetime) -> crate::Lifetime { fold_lifetime(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_lifetime_param(&mut self, i: LifetimeParam) -> LifetimeParam { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_lifetime_param(&mut self, i: crate::LifetimeParam) -> crate::LifetimeParam { fold_lifetime_param(self, i) } - fn fold_lit(&mut self, i: Lit) -> Lit { + fn fold_lit(&mut self, i: crate::Lit) -> crate::Lit { fold_lit(self, i) } - fn fold_lit_bool(&mut self, i: LitBool) -> LitBool { + fn fold_lit_bool(&mut self, i: crate::LitBool) -> crate::LitBool { fold_lit_bool(self, i) } - fn fold_lit_byte(&mut self, i: LitByte) -> LitByte { + fn fold_lit_byte(&mut self, i: crate::LitByte) -> crate::LitByte { fold_lit_byte(self, i) } - fn fold_lit_byte_str(&mut self, i: LitByteStr) -> LitByteStr { + fn fold_lit_byte_str(&mut self, i: crate::LitByteStr) -> crate::LitByteStr { fold_lit_byte_str(self, i) } - fn fold_lit_char(&mut self, i: LitChar) -> LitChar { + fn fold_lit_char(&mut self, i: crate::LitChar) -> crate::LitChar { fold_lit_char(self, i) } - fn fold_lit_float(&mut self, i: LitFloat) -> LitFloat { + fn fold_lit_float(&mut self, i: crate::LitFloat) -> crate::LitFloat { fold_lit_float(self, i) } - fn fold_lit_int(&mut self, i: LitInt) -> LitInt { + fn fold_lit_int(&mut self, i: crate::LitInt) -> crate::LitInt { fold_lit_int(self, i) } - fn fold_lit_str(&mut self, i: LitStr) -> LitStr { + fn fold_lit_str(&mut self, i: crate::LitStr) -> crate::LitStr { fold_lit_str(self, i) } #[cfg(feature = "full")] - fn fold_local(&mut self, i: Local) -> Local { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_local(&mut self, i: crate::Local) -> crate::Local { fold_local(self, i) } #[cfg(feature = "full")] - fn fold_local_init(&mut self, i: LocalInit) -> LocalInit { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_local_init(&mut self, i: crate::LocalInit) -> crate::LocalInit { fold_local_init(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_macro(&mut self, i: Macro) -> Macro { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_macro(&mut self, i: crate::Macro) -> crate::Macro { fold_macro(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_macro_delimiter(&mut self, i: MacroDelimiter) -> MacroDelimiter { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_macro_delimiter( + &mut self, + i: crate::MacroDelimiter, + ) -> crate::MacroDelimiter { fold_macro_delimiter(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_member(&mut self, i: Member) -> Member { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_member(&mut self, i: crate::Member) -> crate::Member { fold_member(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_meta(&mut self, i: Meta) -> Meta { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_meta(&mut self, i: crate::Meta) -> crate::Meta { fold_meta(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_meta_list(&mut self, i: MetaList) -> MetaList { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_meta_list(&mut self, i: crate::MetaList) -> crate::MetaList { fold_meta_list(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_meta_name_value(&mut self, i: MetaNameValue) -> MetaNameValue { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_meta_name_value(&mut self, i: crate::MetaNameValue) -> crate::MetaNameValue { fold_meta_name_value(self, i) } #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] fn fold_parenthesized_generic_arguments( &mut self, - i: ParenthesizedGenericArguments, - ) -> ParenthesizedGenericArguments { + i: crate::ParenthesizedGenericArguments, + ) -> crate::ParenthesizedGenericArguments { fold_parenthesized_generic_arguments(self, i) } #[cfg(feature = "full")] - fn fold_pat(&mut self, i: Pat) -> Pat { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat(&mut self, i: crate::Pat) -> crate::Pat { fold_pat(self, i) } #[cfg(feature = "full")] - fn fold_pat_ident(&mut self, i: PatIdent) -> PatIdent { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_ident(&mut self, i: crate::PatIdent) -> crate::PatIdent { fold_pat_ident(self, i) } #[cfg(feature = "full")] - fn fold_pat_or(&mut self, i: PatOr) -> PatOr { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_or(&mut self, i: crate::PatOr) -> crate::PatOr { fold_pat_or(self, i) } #[cfg(feature = "full")] - fn fold_pat_paren(&mut self, i: PatParen) -> PatParen { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_paren(&mut self, i: crate::PatParen) -> crate::PatParen { fold_pat_paren(self, i) } #[cfg(feature = "full")] - fn fold_pat_reference(&mut self, i: PatReference) -> PatReference { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_reference(&mut self, i: crate::PatReference) -> crate::PatReference { fold_pat_reference(self, i) } #[cfg(feature = "full")] - fn fold_pat_rest(&mut self, i: PatRest) -> PatRest { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_rest(&mut self, i: crate::PatRest) -> crate::PatRest { fold_pat_rest(self, i) } #[cfg(feature = "full")] - fn fold_pat_slice(&mut self, i: PatSlice) -> PatSlice { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_slice(&mut self, i: crate::PatSlice) -> crate::PatSlice { fold_pat_slice(self, i) } #[cfg(feature = "full")] - fn fold_pat_struct(&mut self, i: PatStruct) -> PatStruct { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_struct(&mut self, i: crate::PatStruct) -> crate::PatStruct { fold_pat_struct(self, i) } #[cfg(feature = "full")] - fn fold_pat_tuple(&mut self, i: PatTuple) -> PatTuple { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_tuple(&mut self, i: crate::PatTuple) -> crate::PatTuple { fold_pat_tuple(self, i) } #[cfg(feature = "full")] - fn fold_pat_tuple_struct(&mut self, i: PatTupleStruct) -> PatTupleStruct { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_tuple_struct( + &mut self, + i: crate::PatTupleStruct, + ) -> crate::PatTupleStruct { fold_pat_tuple_struct(self, i) } #[cfg(feature = "full")] - fn fold_pat_type(&mut self, i: PatType) -> PatType { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_type(&mut self, i: crate::PatType) -> crate::PatType { fold_pat_type(self, i) } #[cfg(feature = "full")] - fn fold_pat_wild(&mut self, i: PatWild) -> PatWild { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_pat_wild(&mut self, i: crate::PatWild) -> crate::PatWild { fold_pat_wild(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_path(&mut self, i: Path) -> Path { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_path(&mut self, i: crate::Path) -> crate::Path { fold_path(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_path_arguments(&mut self, i: PathArguments) -> PathArguments { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_path_arguments(&mut self, i: crate::PathArguments) -> crate::PathArguments { fold_path_arguments(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_path_segment(&mut self, i: PathSegment) -> PathSegment { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_path_segment(&mut self, i: crate::PathSegment) -> crate::PathSegment { fold_path_segment(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_predicate_lifetime(&mut self, i: PredicateLifetime) -> PredicateLifetime { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_predicate_lifetime( + &mut self, + i: crate::PredicateLifetime, + ) -> crate::PredicateLifetime { fold_predicate_lifetime(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_predicate_type(&mut self, i: PredicateType) -> PredicateType { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_predicate_type(&mut self, i: crate::PredicateType) -> crate::PredicateType { fold_predicate_type(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_qself(&mut self, i: QSelf) -> QSelf { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_qself(&mut self, i: crate::QSelf) -> crate::QSelf { fold_qself(self, i) } #[cfg(feature = "full")] - fn fold_range_limits(&mut self, i: RangeLimits) -> RangeLimits { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_range_limits(&mut self, i: crate::RangeLimits) -> crate::RangeLimits { fold_range_limits(self, i) } #[cfg(feature = "full")] - fn fold_receiver(&mut self, i: Receiver) -> Receiver { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_receiver(&mut self, i: crate::Receiver) -> crate::Receiver { fold_receiver(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_return_type(&mut self, i: ReturnType) -> ReturnType { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_return_type(&mut self, i: crate::ReturnType) -> crate::ReturnType { fold_return_type(self, i) } #[cfg(feature = "full")] - fn fold_signature(&mut self, i: Signature) -> Signature { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_signature(&mut self, i: crate::Signature) -> crate::Signature { fold_signature(self, i) } - fn fold_span(&mut self, i: Span) -> Span { + fn fold_span(&mut self, i: proc_macro2::Span) -> proc_macro2::Span { fold_span(self, i) } #[cfg(feature = "full")] - fn fold_static_mutability(&mut self, i: StaticMutability) -> StaticMutability { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_static_mutability( + &mut self, + i: crate::StaticMutability, + ) -> crate::StaticMutability { fold_static_mutability(self, i) } #[cfg(feature = "full")] - fn fold_stmt(&mut self, i: Stmt) -> Stmt { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_stmt(&mut self, i: crate::Stmt) -> crate::Stmt { fold_stmt(self, i) } #[cfg(feature = "full")] - fn fold_stmt_macro(&mut self, i: StmtMacro) -> StmtMacro { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_stmt_macro(&mut self, i: crate::StmtMacro) -> crate::StmtMacro { fold_stmt_macro(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_trait_bound(&mut self, i: TraitBound) -> TraitBound { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_trait_bound(&mut self, i: crate::TraitBound) -> crate::TraitBound { fold_trait_bound(self, i) } #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] fn fold_trait_bound_modifier( &mut self, - i: TraitBoundModifier, - ) -> TraitBoundModifier { + i: crate::TraitBoundModifier, + ) -> crate::TraitBoundModifier { fold_trait_bound_modifier(self, i) } #[cfg(feature = "full")] - fn fold_trait_item(&mut self, i: TraitItem) -> TraitItem { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_trait_item(&mut self, i: crate::TraitItem) -> crate::TraitItem { fold_trait_item(self, i) } #[cfg(feature = "full")] - fn fold_trait_item_const(&mut self, i: TraitItemConst) -> TraitItemConst { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_trait_item_const( + &mut self, + i: crate::TraitItemConst, + ) -> crate::TraitItemConst { fold_trait_item_const(self, i) } #[cfg(feature = "full")] - fn fold_trait_item_fn(&mut self, i: TraitItemFn) -> TraitItemFn { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_trait_item_fn(&mut self, i: crate::TraitItemFn) -> crate::TraitItemFn { fold_trait_item_fn(self, i) } #[cfg(feature = "full")] - fn fold_trait_item_macro(&mut self, i: TraitItemMacro) -> TraitItemMacro { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_trait_item_macro( + &mut self, + i: crate::TraitItemMacro, + ) -> crate::TraitItemMacro { fold_trait_item_macro(self, i) } #[cfg(feature = "full")] - fn fold_trait_item_type(&mut self, i: TraitItemType) -> TraitItemType { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_trait_item_type(&mut self, i: crate::TraitItemType) -> crate::TraitItemType { fold_trait_item_type(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type(&mut self, i: Type) -> Type { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type(&mut self, i: crate::Type) -> crate::Type { fold_type(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_array(&mut self, i: TypeArray) -> TypeArray { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_array(&mut self, i: crate::TypeArray) -> crate::TypeArray { fold_type_array(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_bare_fn(&mut self, i: TypeBareFn) -> TypeBareFn { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_bare_fn(&mut self, i: crate::TypeBareFn) -> crate::TypeBareFn { fold_type_bare_fn(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_group(&mut self, i: TypeGroup) -> TypeGroup { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_group(&mut self, i: crate::TypeGroup) -> crate::TypeGroup { fold_type_group(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_impl_trait(&mut self, i: TypeImplTrait) -> TypeImplTrait { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_impl_trait(&mut self, i: crate::TypeImplTrait) -> crate::TypeImplTrait { fold_type_impl_trait(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_infer(&mut self, i: TypeInfer) -> TypeInfer { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_infer(&mut self, i: crate::TypeInfer) -> crate::TypeInfer { fold_type_infer(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_macro(&mut self, i: TypeMacro) -> TypeMacro { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_macro(&mut self, i: crate::TypeMacro) -> crate::TypeMacro { fold_type_macro(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_never(&mut self, i: TypeNever) -> TypeNever { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_never(&mut self, i: crate::TypeNever) -> crate::TypeNever { fold_type_never(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_param(&mut self, i: TypeParam) -> TypeParam { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_param(&mut self, i: crate::TypeParam) -> crate::TypeParam { fold_type_param(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_param_bound(&mut self, i: TypeParamBound) -> TypeParamBound { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_param_bound( + &mut self, + i: crate::TypeParamBound, + ) -> crate::TypeParamBound { fold_type_param_bound(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_paren(&mut self, i: TypeParen) -> TypeParen { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_paren(&mut self, i: crate::TypeParen) -> crate::TypeParen { fold_type_paren(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_path(&mut self, i: TypePath) -> TypePath { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_path(&mut self, i: crate::TypePath) -> crate::TypePath { fold_type_path(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_ptr(&mut self, i: TypePtr) -> TypePtr { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_ptr(&mut self, i: crate::TypePtr) -> crate::TypePtr { fold_type_ptr(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_reference(&mut self, i: TypeReference) -> TypeReference { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_reference(&mut self, i: crate::TypeReference) -> crate::TypeReference { fold_type_reference(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_slice(&mut self, i: TypeSlice) -> TypeSlice { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_slice(&mut self, i: crate::TypeSlice) -> crate::TypeSlice { fold_type_slice(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_trait_object(&mut self, i: TypeTraitObject) -> TypeTraitObject { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_trait_object( + &mut self, + i: crate::TypeTraitObject, + ) -> crate::TypeTraitObject { fold_type_trait_object(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_type_tuple(&mut self, i: TypeTuple) -> TypeTuple { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_type_tuple(&mut self, i: crate::TypeTuple) -> crate::TypeTuple { fold_type_tuple(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_un_op(&mut self, i: UnOp) -> UnOp { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_un_op(&mut self, i: crate::UnOp) -> crate::UnOp { fold_un_op(self, i) } #[cfg(feature = "full")] - fn fold_use_glob(&mut self, i: UseGlob) -> UseGlob { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_use_glob(&mut self, i: crate::UseGlob) -> crate::UseGlob { fold_use_glob(self, i) } #[cfg(feature = "full")] - fn fold_use_group(&mut self, i: UseGroup) -> UseGroup { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_use_group(&mut self, i: crate::UseGroup) -> crate::UseGroup { fold_use_group(self, i) } #[cfg(feature = "full")] - fn fold_use_name(&mut self, i: UseName) -> UseName { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_use_name(&mut self, i: crate::UseName) -> crate::UseName { fold_use_name(self, i) } #[cfg(feature = "full")] - fn fold_use_path(&mut self, i: UsePath) -> UsePath { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_use_path(&mut self, i: crate::UsePath) -> crate::UsePath { fold_use_path(self, i) } #[cfg(feature = "full")] - fn fold_use_rename(&mut self, i: UseRename) -> UseRename { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_use_rename(&mut self, i: crate::UseRename) -> crate::UseRename { fold_use_rename(self, i) } #[cfg(feature = "full")] - fn fold_use_tree(&mut self, i: UseTree) -> UseTree { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_use_tree(&mut self, i: crate::UseTree) -> crate::UseTree { fold_use_tree(self, i) } #[cfg(feature = "full")] - fn fold_variadic(&mut self, i: Variadic) -> Variadic { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn fold_variadic(&mut self, i: crate::Variadic) -> crate::Variadic { fold_variadic(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_variant(&mut self, i: Variant) -> Variant { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_variant(&mut self, i: crate::Variant) -> crate::Variant { fold_variant(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_vis_restricted(&mut self, i: VisRestricted) -> VisRestricted { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_vis_restricted(&mut self, i: crate::VisRestricted) -> crate::VisRestricted { fold_vis_restricted(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_visibility(&mut self, i: Visibility) -> Visibility { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_visibility(&mut self, i: crate::Visibility) -> crate::Visibility { fold_visibility(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_where_clause(&mut self, i: WhereClause) -> WhereClause { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_where_clause(&mut self, i: crate::WhereClause) -> crate::WhereClause { fold_where_clause(self, i) } #[cfg(any(feature = "derive", feature = "full"))] - fn fold_where_predicate(&mut self, i: WherePredicate) -> WherePredicate { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn fold_where_predicate( + &mut self, + i: crate::WherePredicate, + ) -> crate::WherePredicate { fold_where_predicate(self, i) } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_abi<F>(f: &mut F, node: Abi) -> Abi +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_abi<F>(f: &mut F, node: crate::Abi) -> crate::Abi where F: Fold + ?Sized, { - Abi { + crate::Abi { extern_token: node.extern_token, name: (node.name).map(|it| f.fold_lit_str(it)), } } #[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] pub fn fold_angle_bracketed_generic_arguments<F>( f: &mut F, - node: AngleBracketedGenericArguments, -) -> AngleBracketedGenericArguments + node: crate::AngleBracketedGenericArguments, +) -> crate::AngleBracketedGenericArguments where F: Fold + ?Sized, { - AngleBracketedGenericArguments { + crate::AngleBracketedGenericArguments { colon2_token: node.colon2_token, lt_token: node.lt_token, args: FoldHelper::lift(node.args, |it| f.fold_generic_argument(it)), @@ -778,11 +1013,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_arm<F>(f: &mut F, node: Arm) -> Arm +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_arm<F>(f: &mut F, node: crate::Arm) -> crate::Arm where F: Fold + ?Sized, { - Arm { + crate::Arm { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), pat: f.fold_pat(node.pat), guard: (node.guard).map(|it| ((it).0, Box::new(f.fold_expr(*(it).1)))), @@ -792,11 +1028,12 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_assoc_const<F>(f: &mut F, node: AssocConst) -> AssocConst +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_assoc_const<F>(f: &mut F, node: crate::AssocConst) -> crate::AssocConst where F: Fold + ?Sized, { - AssocConst { + crate::AssocConst { ident: f.fold_ident(node.ident), generics: (node.generics).map(|it| f.fold_angle_bracketed_generic_arguments(it)), eq_token: node.eq_token, @@ -804,11 +1041,12 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_assoc_type<F>(f: &mut F, node: AssocType) -> AssocType +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_assoc_type<F>(f: &mut F, node: crate::AssocType) -> crate::AssocType where F: Fold + ?Sized, { - AssocType { + crate::AssocType { ident: f.fold_ident(node.ident), generics: (node.generics).map(|it| f.fold_angle_bracketed_generic_arguments(it)), eq_token: node.eq_token, @@ -816,21 +1054,23 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_attr_style<F>(f: &mut F, node: AttrStyle) -> AttrStyle +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_attr_style<F>(f: &mut F, node: crate::AttrStyle) -> crate::AttrStyle where F: Fold + ?Sized, { match node { - AttrStyle::Outer => AttrStyle::Outer, - AttrStyle::Inner(_binding_0) => AttrStyle::Inner(_binding_0), + crate::AttrStyle::Outer => crate::AttrStyle::Outer, + crate::AttrStyle::Inner(_binding_0) => crate::AttrStyle::Inner(_binding_0), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_attribute<F>(f: &mut F, node: Attribute) -> Attribute +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_attribute<F>(f: &mut F, node: crate::Attribute) -> crate::Attribute where F: Fold + ?Sized, { - Attribute { + crate::Attribute { pound_token: node.pound_token, style: f.fold_attr_style(node.style), bracket_token: node.bracket_token, @@ -838,22 +1078,24 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_bare_fn_arg<F>(f: &mut F, node: BareFnArg) -> BareFnArg +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_bare_fn_arg<F>(f: &mut F, node: crate::BareFnArg) -> crate::BareFnArg where F: Fold + ?Sized, { - BareFnArg { + crate::BareFnArg { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), name: (node.name).map(|it| (f.fold_ident((it).0), (it).1)), ty: f.fold_type(node.ty), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_bare_variadic<F>(f: &mut F, node: BareVariadic) -> BareVariadic +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_bare_variadic<F>(f: &mut F, node: crate::BareVariadic) -> crate::BareVariadic where F: Fold + ?Sized, { - BareVariadic { + crate::BareVariadic { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), name: (node.name).map(|it| (f.fold_ident((it).0), (it).1)), dots: node.dots, @@ -861,57 +1103,63 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_bin_op<F>(f: &mut F, node: BinOp) -> BinOp +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_bin_op<F>(f: &mut F, node: crate::BinOp) -> crate::BinOp where F: Fold + ?Sized, { match node { - BinOp::Add(_binding_0) => BinOp::Add(_binding_0), - BinOp::Sub(_binding_0) => BinOp::Sub(_binding_0), - BinOp::Mul(_binding_0) => BinOp::Mul(_binding_0), - BinOp::Div(_binding_0) => BinOp::Div(_binding_0), - BinOp::Rem(_binding_0) => BinOp::Rem(_binding_0), - BinOp::And(_binding_0) => BinOp::And(_binding_0), - BinOp::Or(_binding_0) => BinOp::Or(_binding_0), - BinOp::BitXor(_binding_0) => BinOp::BitXor(_binding_0), - BinOp::BitAnd(_binding_0) => BinOp::BitAnd(_binding_0), - BinOp::BitOr(_binding_0) => BinOp::BitOr(_binding_0), - BinOp::Shl(_binding_0) => BinOp::Shl(_binding_0), - BinOp::Shr(_binding_0) => BinOp::Shr(_binding_0), - BinOp::Eq(_binding_0) => BinOp::Eq(_binding_0), - BinOp::Lt(_binding_0) => BinOp::Lt(_binding_0), - BinOp::Le(_binding_0) => BinOp::Le(_binding_0), - BinOp::Ne(_binding_0) => BinOp::Ne(_binding_0), - BinOp::Ge(_binding_0) => BinOp::Ge(_binding_0), - BinOp::Gt(_binding_0) => BinOp::Gt(_binding_0), - BinOp::AddAssign(_binding_0) => BinOp::AddAssign(_binding_0), - BinOp::SubAssign(_binding_0) => BinOp::SubAssign(_binding_0), - BinOp::MulAssign(_binding_0) => BinOp::MulAssign(_binding_0), - BinOp::DivAssign(_binding_0) => BinOp::DivAssign(_binding_0), - BinOp::RemAssign(_binding_0) => BinOp::RemAssign(_binding_0), - BinOp::BitXorAssign(_binding_0) => BinOp::BitXorAssign(_binding_0), - BinOp::BitAndAssign(_binding_0) => BinOp::BitAndAssign(_binding_0), - BinOp::BitOrAssign(_binding_0) => BinOp::BitOrAssign(_binding_0), - BinOp::ShlAssign(_binding_0) => BinOp::ShlAssign(_binding_0), - BinOp::ShrAssign(_binding_0) => BinOp::ShrAssign(_binding_0), - } -} -#[cfg(feature = "full")] -pub fn fold_block<F>(f: &mut F, node: Block) -> Block -where - F: Fold + ?Sized, -{ - Block { + crate::BinOp::Add(_binding_0) => crate::BinOp::Add(_binding_0), + crate::BinOp::Sub(_binding_0) => crate::BinOp::Sub(_binding_0), + crate::BinOp::Mul(_binding_0) => crate::BinOp::Mul(_binding_0), + crate::BinOp::Div(_binding_0) => crate::BinOp::Div(_binding_0), + crate::BinOp::Rem(_binding_0) => crate::BinOp::Rem(_binding_0), + crate::BinOp::And(_binding_0) => crate::BinOp::And(_binding_0), + crate::BinOp::Or(_binding_0) => crate::BinOp::Or(_binding_0), + crate::BinOp::BitXor(_binding_0) => crate::BinOp::BitXor(_binding_0), + crate::BinOp::BitAnd(_binding_0) => crate::BinOp::BitAnd(_binding_0), + crate::BinOp::BitOr(_binding_0) => crate::BinOp::BitOr(_binding_0), + crate::BinOp::Shl(_binding_0) => crate::BinOp::Shl(_binding_0), + crate::BinOp::Shr(_binding_0) => crate::BinOp::Shr(_binding_0), + crate::BinOp::Eq(_binding_0) => crate::BinOp::Eq(_binding_0), + crate::BinOp::Lt(_binding_0) => crate::BinOp::Lt(_binding_0), + crate::BinOp::Le(_binding_0) => crate::BinOp::Le(_binding_0), + crate::BinOp::Ne(_binding_0) => crate::BinOp::Ne(_binding_0), + crate::BinOp::Ge(_binding_0) => crate::BinOp::Ge(_binding_0), + crate::BinOp::Gt(_binding_0) => crate::BinOp::Gt(_binding_0), + crate::BinOp::AddAssign(_binding_0) => crate::BinOp::AddAssign(_binding_0), + crate::BinOp::SubAssign(_binding_0) => crate::BinOp::SubAssign(_binding_0), + crate::BinOp::MulAssign(_binding_0) => crate::BinOp::MulAssign(_binding_0), + crate::BinOp::DivAssign(_binding_0) => crate::BinOp::DivAssign(_binding_0), + crate::BinOp::RemAssign(_binding_0) => crate::BinOp::RemAssign(_binding_0), + crate::BinOp::BitXorAssign(_binding_0) => crate::BinOp::BitXorAssign(_binding_0), + crate::BinOp::BitAndAssign(_binding_0) => crate::BinOp::BitAndAssign(_binding_0), + crate::BinOp::BitOrAssign(_binding_0) => crate::BinOp::BitOrAssign(_binding_0), + crate::BinOp::ShlAssign(_binding_0) => crate::BinOp::ShlAssign(_binding_0), + crate::BinOp::ShrAssign(_binding_0) => crate::BinOp::ShrAssign(_binding_0), + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_block<F>(f: &mut F, node: crate::Block) -> crate::Block +where + F: Fold + ?Sized, +{ + crate::Block { brace_token: node.brace_token, stmts: FoldHelper::lift(node.stmts, |it| f.fold_stmt(it)), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_bound_lifetimes<F>(f: &mut F, node: BoundLifetimes) -> BoundLifetimes +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_bound_lifetimes<F>( + f: &mut F, + node: crate::BoundLifetimes, +) -> crate::BoundLifetimes where F: Fold + ?Sized, { - BoundLifetimes { + crate::BoundLifetimes { for_token: node.for_token, lt_token: node.lt_token, lifetimes: FoldHelper::lift(node.lifetimes, |it| f.fold_generic_param(it)), @@ -919,11 +1167,12 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_const_param<F>(f: &mut F, node: ConstParam) -> ConstParam +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_const_param<F>(f: &mut F, node: crate::ConstParam) -> crate::ConstParam where F: Fold + ?Sized, { - ConstParam { + crate::ConstParam { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), const_token: node.const_token, ident: f.fold_ident(node.ident), @@ -934,11 +1183,12 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_constraint<F>(f: &mut F, node: Constraint) -> Constraint +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_constraint<F>(f: &mut F, node: crate::Constraint) -> crate::Constraint where F: Fold + ?Sized, { - Constraint { + crate::Constraint { ident: f.fold_ident(node.ident), generics: (node.generics).map(|it| f.fold_angle_bracketed_generic_arguments(it)), colon_token: node.colon_token, @@ -946,54 +1196,63 @@ where } } #[cfg(feature = "derive")] -pub fn fold_data<F>(f: &mut F, node: Data) -> Data +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn fold_data<F>(f: &mut F, node: crate::Data) -> crate::Data where F: Fold + ?Sized, { match node { - Data::Struct(_binding_0) => Data::Struct(f.fold_data_struct(_binding_0)), - Data::Enum(_binding_0) => Data::Enum(f.fold_data_enum(_binding_0)), - Data::Union(_binding_0) => Data::Union(f.fold_data_union(_binding_0)), + crate::Data::Struct(_binding_0) => { + crate::Data::Struct(f.fold_data_struct(_binding_0)) + } + crate::Data::Enum(_binding_0) => crate::Data::Enum(f.fold_data_enum(_binding_0)), + crate::Data::Union(_binding_0) => { + crate::Data::Union(f.fold_data_union(_binding_0)) + } } } #[cfg(feature = "derive")] -pub fn fold_data_enum<F>(f: &mut F, node: DataEnum) -> DataEnum +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn fold_data_enum<F>(f: &mut F, node: crate::DataEnum) -> crate::DataEnum where F: Fold + ?Sized, { - DataEnum { + crate::DataEnum { enum_token: node.enum_token, brace_token: node.brace_token, variants: FoldHelper::lift(node.variants, |it| f.fold_variant(it)), } } #[cfg(feature = "derive")] -pub fn fold_data_struct<F>(f: &mut F, node: DataStruct) -> DataStruct +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn fold_data_struct<F>(f: &mut F, node: crate::DataStruct) -> crate::DataStruct where F: Fold + ?Sized, { - DataStruct { + crate::DataStruct { struct_token: node.struct_token, fields: f.fold_fields(node.fields), semi_token: node.semi_token, } } #[cfg(feature = "derive")] -pub fn fold_data_union<F>(f: &mut F, node: DataUnion) -> DataUnion +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn fold_data_union<F>(f: &mut F, node: crate::DataUnion) -> crate::DataUnion where F: Fold + ?Sized, { - DataUnion { + crate::DataUnion { union_token: node.union_token, fields: f.fold_fields_named(node.fields), } } #[cfg(feature = "derive")] -pub fn fold_derive_input<F>(f: &mut F, node: DeriveInput) -> DeriveInput +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn fold_derive_input<F>(f: &mut F, node: crate::DeriveInput) -> crate::DeriveInput where F: Fold + ?Sized, { - DeriveInput { + crate::DeriveInput { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), ident: f.fold_ident(node.ident), @@ -1002,81 +1261,138 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_expr<F>(f: &mut F, node: Expr) -> Expr +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr<F>(f: &mut F, node: crate::Expr) -> crate::Expr where F: Fold + ?Sized, { match node { - Expr::Array(_binding_0) => Expr::Array(full!(f.fold_expr_array(_binding_0))), - Expr::Assign(_binding_0) => Expr::Assign(full!(f.fold_expr_assign(_binding_0))), - Expr::Async(_binding_0) => Expr::Async(full!(f.fold_expr_async(_binding_0))), - Expr::Await(_binding_0) => Expr::Await(full!(f.fold_expr_await(_binding_0))), - Expr::Binary(_binding_0) => Expr::Binary(f.fold_expr_binary(_binding_0)), - Expr::Block(_binding_0) => Expr::Block(full!(f.fold_expr_block(_binding_0))), - Expr::Break(_binding_0) => Expr::Break(full!(f.fold_expr_break(_binding_0))), - Expr::Call(_binding_0) => Expr::Call(f.fold_expr_call(_binding_0)), - Expr::Cast(_binding_0) => Expr::Cast(f.fold_expr_cast(_binding_0)), - Expr::Closure(_binding_0) => { - Expr::Closure(full!(f.fold_expr_closure(_binding_0))) - } - Expr::Const(_binding_0) => Expr::Const(full!(f.fold_expr_const(_binding_0))), - Expr::Continue(_binding_0) => { - Expr::Continue(full!(f.fold_expr_continue(_binding_0))) - } - Expr::Field(_binding_0) => Expr::Field(f.fold_expr_field(_binding_0)), - Expr::ForLoop(_binding_0) => { - Expr::ForLoop(full!(f.fold_expr_for_loop(_binding_0))) - } - Expr::Group(_binding_0) => Expr::Group(f.fold_expr_group(_binding_0)), - Expr::If(_binding_0) => Expr::If(full!(f.fold_expr_if(_binding_0))), - Expr::Index(_binding_0) => Expr::Index(f.fold_expr_index(_binding_0)), - Expr::Infer(_binding_0) => Expr::Infer(full!(f.fold_expr_infer(_binding_0))), - Expr::Let(_binding_0) => Expr::Let(full!(f.fold_expr_let(_binding_0))), - Expr::Lit(_binding_0) => Expr::Lit(f.fold_expr_lit(_binding_0)), - Expr::Loop(_binding_0) => Expr::Loop(full!(f.fold_expr_loop(_binding_0))), - Expr::Macro(_binding_0) => Expr::Macro(f.fold_expr_macro(_binding_0)), - Expr::Match(_binding_0) => Expr::Match(full!(f.fold_expr_match(_binding_0))), - Expr::MethodCall(_binding_0) => { - Expr::MethodCall(full!(f.fold_expr_method_call(_binding_0))) - } - Expr::Paren(_binding_0) => Expr::Paren(f.fold_expr_paren(_binding_0)), - Expr::Path(_binding_0) => Expr::Path(f.fold_expr_path(_binding_0)), - Expr::Range(_binding_0) => Expr::Range(full!(f.fold_expr_range(_binding_0))), - Expr::Reference(_binding_0) => { - Expr::Reference(full!(f.fold_expr_reference(_binding_0))) - } - Expr::Repeat(_binding_0) => Expr::Repeat(full!(f.fold_expr_repeat(_binding_0))), - Expr::Return(_binding_0) => Expr::Return(full!(f.fold_expr_return(_binding_0))), - Expr::Struct(_binding_0) => Expr::Struct(full!(f.fold_expr_struct(_binding_0))), - Expr::Try(_binding_0) => Expr::Try(full!(f.fold_expr_try(_binding_0))), - Expr::TryBlock(_binding_0) => { - Expr::TryBlock(full!(f.fold_expr_try_block(_binding_0))) - } - Expr::Tuple(_binding_0) => Expr::Tuple(full!(f.fold_expr_tuple(_binding_0))), - Expr::Unary(_binding_0) => Expr::Unary(f.fold_expr_unary(_binding_0)), - Expr::Unsafe(_binding_0) => Expr::Unsafe(full!(f.fold_expr_unsafe(_binding_0))), - Expr::Verbatim(_binding_0) => Expr::Verbatim(_binding_0), - Expr::While(_binding_0) => Expr::While(full!(f.fold_expr_while(_binding_0))), - Expr::Yield(_binding_0) => Expr::Yield(full!(f.fold_expr_yield(_binding_0))), - } -} -#[cfg(feature = "full")] -pub fn fold_expr_array<F>(f: &mut F, node: ExprArray) -> ExprArray -where - F: Fold + ?Sized, -{ - ExprArray { + crate::Expr::Array(_binding_0) => { + crate::Expr::Array(full!(f.fold_expr_array(_binding_0))) + } + crate::Expr::Assign(_binding_0) => { + crate::Expr::Assign(full!(f.fold_expr_assign(_binding_0))) + } + crate::Expr::Async(_binding_0) => { + crate::Expr::Async(full!(f.fold_expr_async(_binding_0))) + } + crate::Expr::Await(_binding_0) => { + crate::Expr::Await(full!(f.fold_expr_await(_binding_0))) + } + crate::Expr::Binary(_binding_0) => { + crate::Expr::Binary(f.fold_expr_binary(_binding_0)) + } + crate::Expr::Block(_binding_0) => { + crate::Expr::Block(full!(f.fold_expr_block(_binding_0))) + } + crate::Expr::Break(_binding_0) => { + crate::Expr::Break(full!(f.fold_expr_break(_binding_0))) + } + crate::Expr::Call(_binding_0) => crate::Expr::Call(f.fold_expr_call(_binding_0)), + crate::Expr::Cast(_binding_0) => crate::Expr::Cast(f.fold_expr_cast(_binding_0)), + crate::Expr::Closure(_binding_0) => { + crate::Expr::Closure(full!(f.fold_expr_closure(_binding_0))) + } + crate::Expr::Const(_binding_0) => { + crate::Expr::Const(full!(f.fold_expr_const(_binding_0))) + } + crate::Expr::Continue(_binding_0) => { + crate::Expr::Continue(full!(f.fold_expr_continue(_binding_0))) + } + crate::Expr::Field(_binding_0) => { + crate::Expr::Field(f.fold_expr_field(_binding_0)) + } + crate::Expr::ForLoop(_binding_0) => { + crate::Expr::ForLoop(full!(f.fold_expr_for_loop(_binding_0))) + } + crate::Expr::Group(_binding_0) => { + crate::Expr::Group(f.fold_expr_group(_binding_0)) + } + crate::Expr::If(_binding_0) => crate::Expr::If(full!(f.fold_expr_if(_binding_0))), + crate::Expr::Index(_binding_0) => { + crate::Expr::Index(f.fold_expr_index(_binding_0)) + } + crate::Expr::Infer(_binding_0) => { + crate::Expr::Infer(full!(f.fold_expr_infer(_binding_0))) + } + crate::Expr::Let(_binding_0) => { + crate::Expr::Let(full!(f.fold_expr_let(_binding_0))) + } + crate::Expr::Lit(_binding_0) => crate::Expr::Lit(f.fold_expr_lit(_binding_0)), + crate::Expr::Loop(_binding_0) => { + crate::Expr::Loop(full!(f.fold_expr_loop(_binding_0))) + } + crate::Expr::Macro(_binding_0) => { + crate::Expr::Macro(f.fold_expr_macro(_binding_0)) + } + crate::Expr::Match(_binding_0) => { + crate::Expr::Match(full!(f.fold_expr_match(_binding_0))) + } + crate::Expr::MethodCall(_binding_0) => { + crate::Expr::MethodCall(f.fold_expr_method_call(_binding_0)) + } + crate::Expr::Paren(_binding_0) => { + crate::Expr::Paren(f.fold_expr_paren(_binding_0)) + } + crate::Expr::Path(_binding_0) => crate::Expr::Path(f.fold_expr_path(_binding_0)), + crate::Expr::Range(_binding_0) => { + crate::Expr::Range(full!(f.fold_expr_range(_binding_0))) + } + crate::Expr::Reference(_binding_0) => { + crate::Expr::Reference(f.fold_expr_reference(_binding_0)) + } + crate::Expr::Repeat(_binding_0) => { + crate::Expr::Repeat(full!(f.fold_expr_repeat(_binding_0))) + } + crate::Expr::Return(_binding_0) => { + crate::Expr::Return(full!(f.fold_expr_return(_binding_0))) + } + crate::Expr::Struct(_binding_0) => { + crate::Expr::Struct(f.fold_expr_struct(_binding_0)) + } + crate::Expr::Try(_binding_0) => { + crate::Expr::Try(full!(f.fold_expr_try(_binding_0))) + } + crate::Expr::TryBlock(_binding_0) => { + crate::Expr::TryBlock(full!(f.fold_expr_try_block(_binding_0))) + } + crate::Expr::Tuple(_binding_0) => { + crate::Expr::Tuple(full!(f.fold_expr_tuple(_binding_0))) + } + crate::Expr::Unary(_binding_0) => { + crate::Expr::Unary(f.fold_expr_unary(_binding_0)) + } + crate::Expr::Unsafe(_binding_0) => { + crate::Expr::Unsafe(full!(f.fold_expr_unsafe(_binding_0))) + } + crate::Expr::Verbatim(_binding_0) => crate::Expr::Verbatim(_binding_0), + crate::Expr::While(_binding_0) => { + crate::Expr::While(full!(f.fold_expr_while(_binding_0))) + } + crate::Expr::Yield(_binding_0) => { + crate::Expr::Yield(full!(f.fold_expr_yield(_binding_0))) + } + } +} +#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_array<F>(f: &mut F, node: crate::ExprArray) -> crate::ExprArray +where + F: Fold + ?Sized, +{ + crate::ExprArray { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), bracket_token: node.bracket_token, elems: FoldHelper::lift(node.elems, |it| f.fold_expr(it)), } } #[cfg(feature = "full")] -pub fn fold_expr_assign<F>(f: &mut F, node: ExprAssign) -> ExprAssign +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_assign<F>(f: &mut F, node: crate::ExprAssign) -> crate::ExprAssign where F: Fold + ?Sized, { - ExprAssign { + crate::ExprAssign { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), left: Box::new(f.fold_expr(*node.left)), eq_token: node.eq_token, @@ -1084,11 +1400,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_expr_async<F>(f: &mut F, node: ExprAsync) -> ExprAsync +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_async<F>(f: &mut F, node: crate::ExprAsync) -> crate::ExprAsync where F: Fold + ?Sized, { - ExprAsync { + crate::ExprAsync { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), async_token: node.async_token, capture: node.capture, @@ -1096,11 +1413,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_expr_await<F>(f: &mut F, node: ExprAwait) -> ExprAwait +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_await<F>(f: &mut F, node: crate::ExprAwait) -> crate::ExprAwait where F: Fold + ?Sized, { - ExprAwait { + crate::ExprAwait { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), base: Box::new(f.fold_expr(*node.base)), dot_token: node.dot_token, @@ -1108,11 +1426,12 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_expr_binary<F>(f: &mut F, node: ExprBinary) -> ExprBinary +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_binary<F>(f: &mut F, node: crate::ExprBinary) -> crate::ExprBinary where F: Fold + ?Sized, { - ExprBinary { + crate::ExprBinary { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), left: Box::new(f.fold_expr(*node.left)), op: f.fold_bin_op(node.op), @@ -1120,22 +1439,24 @@ where } } #[cfg(feature = "full")] -pub fn fold_expr_block<F>(f: &mut F, node: ExprBlock) -> ExprBlock +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_block<F>(f: &mut F, node: crate::ExprBlock) -> crate::ExprBlock where F: Fold + ?Sized, { - ExprBlock { + crate::ExprBlock { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), label: (node.label).map(|it| f.fold_label(it)), block: f.fold_block(node.block), } } #[cfg(feature = "full")] -pub fn fold_expr_break<F>(f: &mut F, node: ExprBreak) -> ExprBreak +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_break<F>(f: &mut F, node: crate::ExprBreak) -> crate::ExprBreak where F: Fold + ?Sized, { - ExprBreak { + crate::ExprBreak { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), break_token: node.break_token, label: (node.label).map(|it| f.fold_lifetime(it)), @@ -1143,11 +1464,12 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_expr_call<F>(f: &mut F, node: ExprCall) -> ExprCall +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_call<F>(f: &mut F, node: crate::ExprCall) -> crate::ExprCall where F: Fold + ?Sized, { - ExprCall { + crate::ExprCall { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), func: Box::new(f.fold_expr(*node.func)), paren_token: node.paren_token, @@ -1155,11 +1477,12 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_expr_cast<F>(f: &mut F, node: ExprCast) -> ExprCast +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_cast<F>(f: &mut F, node: crate::ExprCast) -> crate::ExprCast where F: Fold + ?Sized, { - ExprCast { + crate::ExprCast { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), expr: Box::new(f.fold_expr(*node.expr)), as_token: node.as_token, @@ -1167,11 +1490,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_expr_closure<F>(f: &mut F, node: ExprClosure) -> ExprClosure +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_closure<F>(f: &mut F, node: crate::ExprClosure) -> crate::ExprClosure where F: Fold + ?Sized, { - ExprClosure { + crate::ExprClosure { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), lifetimes: (node.lifetimes).map(|it| f.fold_bound_lifetimes(it)), constness: node.constness, @@ -1186,33 +1510,36 @@ where } } #[cfg(feature = "full")] -pub fn fold_expr_const<F>(f: &mut F, node: ExprConst) -> ExprConst +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_const<F>(f: &mut F, node: crate::ExprConst) -> crate::ExprConst where F: Fold + ?Sized, { - ExprConst { + crate::ExprConst { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), const_token: node.const_token, block: f.fold_block(node.block), } } #[cfg(feature = "full")] -pub fn fold_expr_continue<F>(f: &mut F, node: ExprContinue) -> ExprContinue +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_continue<F>(f: &mut F, node: crate::ExprContinue) -> crate::ExprContinue where F: Fold + ?Sized, { - ExprContinue { + crate::ExprContinue { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), continue_token: node.continue_token, label: (node.label).map(|it| f.fold_lifetime(it)), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_expr_field<F>(f: &mut F, node: ExprField) -> ExprField +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_field<F>(f: &mut F, node: crate::ExprField) -> crate::ExprField where F: Fold + ?Sized, { - ExprField { + crate::ExprField { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), base: Box::new(f.fold_expr(*node.base)), dot_token: node.dot_token, @@ -1220,11 +1547,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_expr_for_loop<F>(f: &mut F, node: ExprForLoop) -> ExprForLoop +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_for_loop<F>(f: &mut F, node: crate::ExprForLoop) -> crate::ExprForLoop where F: Fold + ?Sized, { - ExprForLoop { + crate::ExprForLoop { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), label: (node.label).map(|it| f.fold_label(it)), for_token: node.for_token, @@ -1235,22 +1563,24 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_expr_group<F>(f: &mut F, node: ExprGroup) -> ExprGroup +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_group<F>(f: &mut F, node: crate::ExprGroup) -> crate::ExprGroup where F: Fold + ?Sized, { - ExprGroup { + crate::ExprGroup { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), group_token: node.group_token, expr: Box::new(f.fold_expr(*node.expr)), } } #[cfg(feature = "full")] -pub fn fold_expr_if<F>(f: &mut F, node: ExprIf) -> ExprIf +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_if<F>(f: &mut F, node: crate::ExprIf) -> crate::ExprIf where F: Fold + ?Sized, { - ExprIf { + crate::ExprIf { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), if_token: node.if_token, cond: Box::new(f.fold_expr(*node.cond)), @@ -1260,11 +1590,12 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_expr_index<F>(f: &mut F, node: ExprIndex) -> ExprIndex +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_index<F>(f: &mut F, node: crate::ExprIndex) -> crate::ExprIndex where F: Fold + ?Sized, { - ExprIndex { + crate::ExprIndex { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), expr: Box::new(f.fold_expr(*node.expr)), bracket_token: node.bracket_token, @@ -1272,21 +1603,23 @@ where } } #[cfg(feature = "full")] -pub fn fold_expr_infer<F>(f: &mut F, node: ExprInfer) -> ExprInfer +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_infer<F>(f: &mut F, node: crate::ExprInfer) -> crate::ExprInfer where F: Fold + ?Sized, { - ExprInfer { + crate::ExprInfer { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), underscore_token: node.underscore_token, } } #[cfg(feature = "full")] -pub fn fold_expr_let<F>(f: &mut F, node: ExprLet) -> ExprLet +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_let<F>(f: &mut F, node: crate::ExprLet) -> crate::ExprLet where F: Fold + ?Sized, { - ExprLet { + crate::ExprLet { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), let_token: node.let_token, pat: Box::new(f.fold_pat(*node.pat)), @@ -1295,21 +1628,23 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_expr_lit<F>(f: &mut F, node: ExprLit) -> ExprLit +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_lit<F>(f: &mut F, node: crate::ExprLit) -> crate::ExprLit where F: Fold + ?Sized, { - ExprLit { + crate::ExprLit { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), lit: f.fold_lit(node.lit), } } #[cfg(feature = "full")] -pub fn fold_expr_loop<F>(f: &mut F, node: ExprLoop) -> ExprLoop +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_loop<F>(f: &mut F, node: crate::ExprLoop) -> crate::ExprLoop where F: Fold + ?Sized, { - ExprLoop { + crate::ExprLoop { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), label: (node.label).map(|it| f.fold_label(it)), loop_token: node.loop_token, @@ -1317,21 +1652,23 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_expr_macro<F>(f: &mut F, node: ExprMacro) -> ExprMacro +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_macro<F>(f: &mut F, node: crate::ExprMacro) -> crate::ExprMacro where F: Fold + ?Sized, { - ExprMacro { + crate::ExprMacro { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), mac: f.fold_macro(node.mac), } } #[cfg(feature = "full")] -pub fn fold_expr_match<F>(f: &mut F, node: ExprMatch) -> ExprMatch +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_match<F>(f: &mut F, node: crate::ExprMatch) -> crate::ExprMatch where F: Fold + ?Sized, { - ExprMatch { + crate::ExprMatch { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), match_token: node.match_token, expr: Box::new(f.fold_expr(*node.expr)), @@ -1339,12 +1676,16 @@ where arms: FoldHelper::lift(node.arms, |it| f.fold_arm(it)), } } -#[cfg(feature = "full")] -pub fn fold_expr_method_call<F>(f: &mut F, node: ExprMethodCall) -> ExprMethodCall +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_method_call<F>( + f: &mut F, + node: crate::ExprMethodCall, +) -> crate::ExprMethodCall where F: Fold + ?Sized, { - ExprMethodCall { + crate::ExprMethodCall { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), receiver: Box::new(f.fold_expr(*node.receiver)), dot_token: node.dot_token, @@ -1356,45 +1697,52 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_expr_paren<F>(f: &mut F, node: ExprParen) -> ExprParen +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_paren<F>(f: &mut F, node: crate::ExprParen) -> crate::ExprParen where F: Fold + ?Sized, { - ExprParen { + crate::ExprParen { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), paren_token: node.paren_token, expr: Box::new(f.fold_expr(*node.expr)), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_expr_path<F>(f: &mut F, node: ExprPath) -> ExprPath +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_path<F>(f: &mut F, node: crate::ExprPath) -> crate::ExprPath where F: Fold + ?Sized, { - ExprPath { + crate::ExprPath { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), qself: (node.qself).map(|it| f.fold_qself(it)), path: f.fold_path(node.path), } } #[cfg(feature = "full")] -pub fn fold_expr_range<F>(f: &mut F, node: ExprRange) -> ExprRange +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_range<F>(f: &mut F, node: crate::ExprRange) -> crate::ExprRange where F: Fold + ?Sized, { - ExprRange { + crate::ExprRange { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), start: (node.start).map(|it| Box::new(f.fold_expr(*it))), limits: f.fold_range_limits(node.limits), end: (node.end).map(|it| Box::new(f.fold_expr(*it))), } } -#[cfg(feature = "full")] -pub fn fold_expr_reference<F>(f: &mut F, node: ExprReference) -> ExprReference +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_reference<F>( + f: &mut F, + node: crate::ExprReference, +) -> crate::ExprReference where F: Fold + ?Sized, { - ExprReference { + crate::ExprReference { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), and_token: node.and_token, mutability: node.mutability, @@ -1402,11 +1750,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_expr_repeat<F>(f: &mut F, node: ExprRepeat) -> ExprRepeat +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_repeat<F>(f: &mut F, node: crate::ExprRepeat) -> crate::ExprRepeat where F: Fold + ?Sized, { - ExprRepeat { + crate::ExprRepeat { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), bracket_token: node.bracket_token, expr: Box::new(f.fold_expr(*node.expr)), @@ -1415,22 +1764,24 @@ where } } #[cfg(feature = "full")] -pub fn fold_expr_return<F>(f: &mut F, node: ExprReturn) -> ExprReturn +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_return<F>(f: &mut F, node: crate::ExprReturn) -> crate::ExprReturn where F: Fold + ?Sized, { - ExprReturn { + crate::ExprReturn { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), return_token: node.return_token, expr: (node.expr).map(|it| Box::new(f.fold_expr(*it))), } } -#[cfg(feature = "full")] -pub fn fold_expr_struct<F>(f: &mut F, node: ExprStruct) -> ExprStruct +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_struct<F>(f: &mut F, node: crate::ExprStruct) -> crate::ExprStruct where F: Fold + ?Sized, { - ExprStruct { + crate::ExprStruct { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), qself: (node.qself).map(|it| f.fold_qself(it)), path: f.fold_path(node.path), @@ -1441,66 +1792,75 @@ where } } #[cfg(feature = "full")] -pub fn fold_expr_try<F>(f: &mut F, node: ExprTry) -> ExprTry +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_try<F>(f: &mut F, node: crate::ExprTry) -> crate::ExprTry where F: Fold + ?Sized, { - ExprTry { + crate::ExprTry { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), expr: Box::new(f.fold_expr(*node.expr)), question_token: node.question_token, } } #[cfg(feature = "full")] -pub fn fold_expr_try_block<F>(f: &mut F, node: ExprTryBlock) -> ExprTryBlock +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_try_block<F>( + f: &mut F, + node: crate::ExprTryBlock, +) -> crate::ExprTryBlock where F: Fold + ?Sized, { - ExprTryBlock { + crate::ExprTryBlock { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), try_token: node.try_token, block: f.fold_block(node.block), } } #[cfg(feature = "full")] -pub fn fold_expr_tuple<F>(f: &mut F, node: ExprTuple) -> ExprTuple +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_tuple<F>(f: &mut F, node: crate::ExprTuple) -> crate::ExprTuple where F: Fold + ?Sized, { - ExprTuple { + crate::ExprTuple { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), paren_token: node.paren_token, elems: FoldHelper::lift(node.elems, |it| f.fold_expr(it)), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_expr_unary<F>(f: &mut F, node: ExprUnary) -> ExprUnary +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_expr_unary<F>(f: &mut F, node: crate::ExprUnary) -> crate::ExprUnary where F: Fold + ?Sized, { - ExprUnary { + crate::ExprUnary { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), op: f.fold_un_op(node.op), expr: Box::new(f.fold_expr(*node.expr)), } } #[cfg(feature = "full")] -pub fn fold_expr_unsafe<F>(f: &mut F, node: ExprUnsafe) -> ExprUnsafe +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_unsafe<F>(f: &mut F, node: crate::ExprUnsafe) -> crate::ExprUnsafe where F: Fold + ?Sized, { - ExprUnsafe { + crate::ExprUnsafe { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), unsafe_token: node.unsafe_token, block: f.fold_block(node.block), } } #[cfg(feature = "full")] -pub fn fold_expr_while<F>(f: &mut F, node: ExprWhile) -> ExprWhile +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_while<F>(f: &mut F, node: crate::ExprWhile) -> crate::ExprWhile where F: Fold + ?Sized, { - ExprWhile { + crate::ExprWhile { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), label: (node.label).map(|it| f.fold_label(it)), while_token: node.while_token, @@ -1509,22 +1869,24 @@ where } } #[cfg(feature = "full")] -pub fn fold_expr_yield<F>(f: &mut F, node: ExprYield) -> ExprYield +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_expr_yield<F>(f: &mut F, node: crate::ExprYield) -> crate::ExprYield where F: Fold + ?Sized, { - ExprYield { + crate::ExprYield { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), yield_token: node.yield_token, expr: (node.expr).map(|it| Box::new(f.fold_expr(*it))), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_field<F>(f: &mut F, node: Field) -> Field +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_field<F>(f: &mut F, node: crate::Field) -> crate::Field where F: Fold + ?Sized, { - Field { + crate::Field { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), mutability: f.fold_field_mutability(node.mutability), @@ -1534,32 +1896,38 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_field_mutability<F>(f: &mut F, node: FieldMutability) -> FieldMutability +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_field_mutability<F>( + f: &mut F, + node: crate::FieldMutability, +) -> crate::FieldMutability where F: Fold + ?Sized, { match node { - FieldMutability::None => FieldMutability::None, + crate::FieldMutability::None => crate::FieldMutability::None, } } #[cfg(feature = "full")] -pub fn fold_field_pat<F>(f: &mut F, node: FieldPat) -> FieldPat +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_field_pat<F>(f: &mut F, node: crate::FieldPat) -> crate::FieldPat where F: Fold + ?Sized, { - FieldPat { + crate::FieldPat { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), member: f.fold_member(node.member), colon_token: node.colon_token, pat: Box::new(f.fold_pat(*node.pat)), } } -#[cfg(feature = "full")] -pub fn fold_field_value<F>(f: &mut F, node: FieldValue) -> FieldValue +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_field_value<F>(f: &mut F, node: crate::FieldValue) -> crate::FieldValue where F: Fold + ?Sized, { - FieldValue { + crate::FieldValue { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), member: f.fold_member(node.member), colon_token: node.colon_token, @@ -1567,84 +1935,107 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_fields<F>(f: &mut F, node: Fields) -> Fields +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_fields<F>(f: &mut F, node: crate::Fields) -> crate::Fields where F: Fold + ?Sized, { match node { - Fields::Named(_binding_0) => Fields::Named(f.fold_fields_named(_binding_0)), - Fields::Unnamed(_binding_0) => Fields::Unnamed(f.fold_fields_unnamed(_binding_0)), - Fields::Unit => Fields::Unit, + crate::Fields::Named(_binding_0) => { + crate::Fields::Named(f.fold_fields_named(_binding_0)) + } + crate::Fields::Unnamed(_binding_0) => { + crate::Fields::Unnamed(f.fold_fields_unnamed(_binding_0)) + } + crate::Fields::Unit => crate::Fields::Unit, } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_fields_named<F>(f: &mut F, node: FieldsNamed) -> FieldsNamed +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_fields_named<F>(f: &mut F, node: crate::FieldsNamed) -> crate::FieldsNamed where F: Fold + ?Sized, { - FieldsNamed { + crate::FieldsNamed { brace_token: node.brace_token, named: FoldHelper::lift(node.named, |it| f.fold_field(it)), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_fields_unnamed<F>(f: &mut F, node: FieldsUnnamed) -> FieldsUnnamed +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_fields_unnamed<F>( + f: &mut F, + node: crate::FieldsUnnamed, +) -> crate::FieldsUnnamed where F: Fold + ?Sized, { - FieldsUnnamed { + crate::FieldsUnnamed { paren_token: node.paren_token, unnamed: FoldHelper::lift(node.unnamed, |it| f.fold_field(it)), } } #[cfg(feature = "full")] -pub fn fold_file<F>(f: &mut F, node: File) -> File +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_file<F>(f: &mut F, node: crate::File) -> crate::File where F: Fold + ?Sized, { - File { + crate::File { shebang: node.shebang, attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), items: FoldHelper::lift(node.items, |it| f.fold_item(it)), } } #[cfg(feature = "full")] -pub fn fold_fn_arg<F>(f: &mut F, node: FnArg) -> FnArg +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_fn_arg<F>(f: &mut F, node: crate::FnArg) -> crate::FnArg where F: Fold + ?Sized, { match node { - FnArg::Receiver(_binding_0) => FnArg::Receiver(f.fold_receiver(_binding_0)), - FnArg::Typed(_binding_0) => FnArg::Typed(f.fold_pat_type(_binding_0)), + crate::FnArg::Receiver(_binding_0) => { + crate::FnArg::Receiver(f.fold_receiver(_binding_0)) + } + crate::FnArg::Typed(_binding_0) => { + crate::FnArg::Typed(f.fold_pat_type(_binding_0)) + } } } #[cfg(feature = "full")] -pub fn fold_foreign_item<F>(f: &mut F, node: ForeignItem) -> ForeignItem +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_foreign_item<F>(f: &mut F, node: crate::ForeignItem) -> crate::ForeignItem where F: Fold + ?Sized, { match node { - ForeignItem::Fn(_binding_0) => { - ForeignItem::Fn(f.fold_foreign_item_fn(_binding_0)) + crate::ForeignItem::Fn(_binding_0) => { + crate::ForeignItem::Fn(f.fold_foreign_item_fn(_binding_0)) + } + crate::ForeignItem::Static(_binding_0) => { + crate::ForeignItem::Static(f.fold_foreign_item_static(_binding_0)) } - ForeignItem::Static(_binding_0) => { - ForeignItem::Static(f.fold_foreign_item_static(_binding_0)) + crate::ForeignItem::Type(_binding_0) => { + crate::ForeignItem::Type(f.fold_foreign_item_type(_binding_0)) } - ForeignItem::Type(_binding_0) => { - ForeignItem::Type(f.fold_foreign_item_type(_binding_0)) + crate::ForeignItem::Macro(_binding_0) => { + crate::ForeignItem::Macro(f.fold_foreign_item_macro(_binding_0)) } - ForeignItem::Macro(_binding_0) => { - ForeignItem::Macro(f.fold_foreign_item_macro(_binding_0)) + crate::ForeignItem::Verbatim(_binding_0) => { + crate::ForeignItem::Verbatim(_binding_0) } - ForeignItem::Verbatim(_binding_0) => ForeignItem::Verbatim(_binding_0), } } #[cfg(feature = "full")] -pub fn fold_foreign_item_fn<F>(f: &mut F, node: ForeignItemFn) -> ForeignItemFn +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_foreign_item_fn<F>( + f: &mut F, + node: crate::ForeignItemFn, +) -> crate::ForeignItemFn where F: Fold + ?Sized, { - ForeignItemFn { + crate::ForeignItemFn { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), sig: f.fold_signature(node.sig), @@ -1652,25 +2043,30 @@ where } } #[cfg(feature = "full")] -pub fn fold_foreign_item_macro<F>(f: &mut F, node: ForeignItemMacro) -> ForeignItemMacro +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_foreign_item_macro<F>( + f: &mut F, + node: crate::ForeignItemMacro, +) -> crate::ForeignItemMacro where F: Fold + ?Sized, { - ForeignItemMacro { + crate::ForeignItemMacro { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), mac: f.fold_macro(node.mac), semi_token: node.semi_token, } } #[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] pub fn fold_foreign_item_static<F>( f: &mut F, - node: ForeignItemStatic, -) -> ForeignItemStatic + node: crate::ForeignItemStatic, +) -> crate::ForeignItemStatic where F: Fold + ?Sized, { - ForeignItemStatic { + crate::ForeignItemStatic { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), static_token: node.static_token, @@ -1682,11 +2078,15 @@ where } } #[cfg(feature = "full")] -pub fn fold_foreign_item_type<F>(f: &mut F, node: ForeignItemType) -> ForeignItemType +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_foreign_item_type<F>( + f: &mut F, + node: crate::ForeignItemType, +) -> crate::ForeignItemType where F: Fold + ?Sized, { - ForeignItemType { + crate::ForeignItemType { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), type_token: node.type_token, @@ -1696,61 +2096,67 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_generic_argument<F>(f: &mut F, node: GenericArgument) -> GenericArgument +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_generic_argument<F>( + f: &mut F, + node: crate::GenericArgument, +) -> crate::GenericArgument where F: Fold + ?Sized, { match node { - GenericArgument::Lifetime(_binding_0) => { - GenericArgument::Lifetime(f.fold_lifetime(_binding_0)) + crate::GenericArgument::Lifetime(_binding_0) => { + crate::GenericArgument::Lifetime(f.fold_lifetime(_binding_0)) } - GenericArgument::Type(_binding_0) => { - GenericArgument::Type(f.fold_type(_binding_0)) + crate::GenericArgument::Type(_binding_0) => { + crate::GenericArgument::Type(f.fold_type(_binding_0)) } - GenericArgument::Const(_binding_0) => { - GenericArgument::Const(f.fold_expr(_binding_0)) + crate::GenericArgument::Const(_binding_0) => { + crate::GenericArgument::Const(f.fold_expr(_binding_0)) } - GenericArgument::AssocType(_binding_0) => { - GenericArgument::AssocType(f.fold_assoc_type(_binding_0)) + crate::GenericArgument::AssocType(_binding_0) => { + crate::GenericArgument::AssocType(f.fold_assoc_type(_binding_0)) } - GenericArgument::AssocConst(_binding_0) => { - GenericArgument::AssocConst(f.fold_assoc_const(_binding_0)) + crate::GenericArgument::AssocConst(_binding_0) => { + crate::GenericArgument::AssocConst(f.fold_assoc_const(_binding_0)) } - GenericArgument::Constraint(_binding_0) => { - GenericArgument::Constraint(f.fold_constraint(_binding_0)) + crate::GenericArgument::Constraint(_binding_0) => { + crate::GenericArgument::Constraint(f.fold_constraint(_binding_0)) } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_generic_param<F>(f: &mut F, node: GenericParam) -> GenericParam +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_generic_param<F>(f: &mut F, node: crate::GenericParam) -> crate::GenericParam where F: Fold + ?Sized, { match node { - GenericParam::Lifetime(_binding_0) => { - GenericParam::Lifetime(f.fold_lifetime_param(_binding_0)) + crate::GenericParam::Lifetime(_binding_0) => { + crate::GenericParam::Lifetime(f.fold_lifetime_param(_binding_0)) } - GenericParam::Type(_binding_0) => { - GenericParam::Type(f.fold_type_param(_binding_0)) + crate::GenericParam::Type(_binding_0) => { + crate::GenericParam::Type(f.fold_type_param(_binding_0)) } - GenericParam::Const(_binding_0) => { - GenericParam::Const(f.fold_const_param(_binding_0)) + crate::GenericParam::Const(_binding_0) => { + crate::GenericParam::Const(f.fold_const_param(_binding_0)) } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_generics<F>(f: &mut F, node: Generics) -> Generics +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_generics<F>(f: &mut F, node: crate::Generics) -> crate::Generics where F: Fold + ?Sized, { - Generics { + crate::Generics { lt_token: node.lt_token, params: FoldHelper::lift(node.params, |it| f.fold_generic_param(it)), gt_token: node.gt_token, where_clause: (node.where_clause).map(|it| f.fold_where_clause(it)), } } -pub fn fold_ident<F>(f: &mut F, node: Ident) -> Ident +pub fn fold_ident<F>(f: &mut F, node: proc_macro2::Ident) -> proc_macro2::Ident where F: Fold + ?Sized, { @@ -1760,28 +2166,37 @@ where node } #[cfg(feature = "full")] -pub fn fold_impl_item<F>(f: &mut F, node: ImplItem) -> ImplItem +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_impl_item<F>(f: &mut F, node: crate::ImplItem) -> crate::ImplItem where F: Fold + ?Sized, { match node { - ImplItem::Const(_binding_0) => { - ImplItem::Const(f.fold_impl_item_const(_binding_0)) + crate::ImplItem::Const(_binding_0) => { + crate::ImplItem::Const(f.fold_impl_item_const(_binding_0)) + } + crate::ImplItem::Fn(_binding_0) => { + crate::ImplItem::Fn(f.fold_impl_item_fn(_binding_0)) } - ImplItem::Fn(_binding_0) => ImplItem::Fn(f.fold_impl_item_fn(_binding_0)), - ImplItem::Type(_binding_0) => ImplItem::Type(f.fold_impl_item_type(_binding_0)), - ImplItem::Macro(_binding_0) => { - ImplItem::Macro(f.fold_impl_item_macro(_binding_0)) + crate::ImplItem::Type(_binding_0) => { + crate::ImplItem::Type(f.fold_impl_item_type(_binding_0)) } - ImplItem::Verbatim(_binding_0) => ImplItem::Verbatim(_binding_0), + crate::ImplItem::Macro(_binding_0) => { + crate::ImplItem::Macro(f.fold_impl_item_macro(_binding_0)) + } + crate::ImplItem::Verbatim(_binding_0) => crate::ImplItem::Verbatim(_binding_0), } } #[cfg(feature = "full")] -pub fn fold_impl_item_const<F>(f: &mut F, node: ImplItemConst) -> ImplItemConst +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_impl_item_const<F>( + f: &mut F, + node: crate::ImplItemConst, +) -> crate::ImplItemConst where F: Fold + ?Sized, { - ImplItemConst { + crate::ImplItemConst { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), defaultness: node.defaultness, @@ -1796,11 +2211,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_impl_item_fn<F>(f: &mut F, node: ImplItemFn) -> ImplItemFn +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_impl_item_fn<F>(f: &mut F, node: crate::ImplItemFn) -> crate::ImplItemFn where F: Fold + ?Sized, { - ImplItemFn { + crate::ImplItemFn { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), defaultness: node.defaultness, @@ -1809,22 +2225,30 @@ where } } #[cfg(feature = "full")] -pub fn fold_impl_item_macro<F>(f: &mut F, node: ImplItemMacro) -> ImplItemMacro +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_impl_item_macro<F>( + f: &mut F, + node: crate::ImplItemMacro, +) -> crate::ImplItemMacro where F: Fold + ?Sized, { - ImplItemMacro { + crate::ImplItemMacro { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), mac: f.fold_macro(node.mac), semi_token: node.semi_token, } } #[cfg(feature = "full")] -pub fn fold_impl_item_type<F>(f: &mut F, node: ImplItemType) -> ImplItemType +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_impl_item_type<F>( + f: &mut F, + node: crate::ImplItemType, +) -> crate::ImplItemType where F: Fold + ?Sized, { - ImplItemType { + crate::ImplItemType { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), defaultness: node.defaultness, @@ -1837,58 +2261,77 @@ where } } #[cfg(feature = "full")] -pub fn fold_impl_restriction<F>(f: &mut F, node: ImplRestriction) -> ImplRestriction +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_impl_restriction<F>( + f: &mut F, + node: crate::ImplRestriction, +) -> crate::ImplRestriction where F: Fold + ?Sized, { match node {} } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_index<F>(f: &mut F, node: Index) -> Index +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_index<F>(f: &mut F, node: crate::Index) -> crate::Index where F: Fold + ?Sized, { - Index { + crate::Index { index: node.index, span: f.fold_span(node.span), } } #[cfg(feature = "full")] -pub fn fold_item<F>(f: &mut F, node: Item) -> Item +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item<F>(f: &mut F, node: crate::Item) -> crate::Item where F: Fold + ?Sized, { match node { - Item::Const(_binding_0) => Item::Const(f.fold_item_const(_binding_0)), - Item::Enum(_binding_0) => Item::Enum(f.fold_item_enum(_binding_0)), - Item::ExternCrate(_binding_0) => { - Item::ExternCrate(f.fold_item_extern_crate(_binding_0)) + crate::Item::Const(_binding_0) => { + crate::Item::Const(f.fold_item_const(_binding_0)) + } + crate::Item::Enum(_binding_0) => crate::Item::Enum(f.fold_item_enum(_binding_0)), + crate::Item::ExternCrate(_binding_0) => { + crate::Item::ExternCrate(f.fold_item_extern_crate(_binding_0)) + } + crate::Item::Fn(_binding_0) => crate::Item::Fn(f.fold_item_fn(_binding_0)), + crate::Item::ForeignMod(_binding_0) => { + crate::Item::ForeignMod(f.fold_item_foreign_mod(_binding_0)) } - Item::Fn(_binding_0) => Item::Fn(f.fold_item_fn(_binding_0)), - Item::ForeignMod(_binding_0) => { - Item::ForeignMod(f.fold_item_foreign_mod(_binding_0)) + crate::Item::Impl(_binding_0) => crate::Item::Impl(f.fold_item_impl(_binding_0)), + crate::Item::Macro(_binding_0) => { + crate::Item::Macro(f.fold_item_macro(_binding_0)) } - Item::Impl(_binding_0) => Item::Impl(f.fold_item_impl(_binding_0)), - Item::Macro(_binding_0) => Item::Macro(f.fold_item_macro(_binding_0)), - Item::Mod(_binding_0) => Item::Mod(f.fold_item_mod(_binding_0)), - Item::Static(_binding_0) => Item::Static(f.fold_item_static(_binding_0)), - Item::Struct(_binding_0) => Item::Struct(f.fold_item_struct(_binding_0)), - Item::Trait(_binding_0) => Item::Trait(f.fold_item_trait(_binding_0)), - Item::TraitAlias(_binding_0) => { - Item::TraitAlias(f.fold_item_trait_alias(_binding_0)) + crate::Item::Mod(_binding_0) => crate::Item::Mod(f.fold_item_mod(_binding_0)), + crate::Item::Static(_binding_0) => { + crate::Item::Static(f.fold_item_static(_binding_0)) } - Item::Type(_binding_0) => Item::Type(f.fold_item_type(_binding_0)), - Item::Union(_binding_0) => Item::Union(f.fold_item_union(_binding_0)), - Item::Use(_binding_0) => Item::Use(f.fold_item_use(_binding_0)), - Item::Verbatim(_binding_0) => Item::Verbatim(_binding_0), + crate::Item::Struct(_binding_0) => { + crate::Item::Struct(f.fold_item_struct(_binding_0)) + } + crate::Item::Trait(_binding_0) => { + crate::Item::Trait(f.fold_item_trait(_binding_0)) + } + crate::Item::TraitAlias(_binding_0) => { + crate::Item::TraitAlias(f.fold_item_trait_alias(_binding_0)) + } + crate::Item::Type(_binding_0) => crate::Item::Type(f.fold_item_type(_binding_0)), + crate::Item::Union(_binding_0) => { + crate::Item::Union(f.fold_item_union(_binding_0)) + } + crate::Item::Use(_binding_0) => crate::Item::Use(f.fold_item_use(_binding_0)), + crate::Item::Verbatim(_binding_0) => crate::Item::Verbatim(_binding_0), } } #[cfg(feature = "full")] -pub fn fold_item_const<F>(f: &mut F, node: ItemConst) -> ItemConst +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_const<F>(f: &mut F, node: crate::ItemConst) -> crate::ItemConst where F: Fold + ?Sized, { - ItemConst { + crate::ItemConst { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), const_token: node.const_token, @@ -1902,11 +2345,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_item_enum<F>(f: &mut F, node: ItemEnum) -> ItemEnum +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_enum<F>(f: &mut F, node: crate::ItemEnum) -> crate::ItemEnum where F: Fold + ?Sized, { - ItemEnum { + crate::ItemEnum { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), enum_token: node.enum_token, @@ -1917,11 +2361,15 @@ where } } #[cfg(feature = "full")] -pub fn fold_item_extern_crate<F>(f: &mut F, node: ItemExternCrate) -> ItemExternCrate +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_extern_crate<F>( + f: &mut F, + node: crate::ItemExternCrate, +) -> crate::ItemExternCrate where F: Fold + ?Sized, { - ItemExternCrate { + crate::ItemExternCrate { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), extern_token: node.extern_token, @@ -1932,11 +2380,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_item_fn<F>(f: &mut F, node: ItemFn) -> ItemFn +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_fn<F>(f: &mut F, node: crate::ItemFn) -> crate::ItemFn where F: Fold + ?Sized, { - ItemFn { + crate::ItemFn { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), sig: f.fold_signature(node.sig), @@ -1944,11 +2393,15 @@ where } } #[cfg(feature = "full")] -pub fn fold_item_foreign_mod<F>(f: &mut F, node: ItemForeignMod) -> ItemForeignMod +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_foreign_mod<F>( + f: &mut F, + node: crate::ItemForeignMod, +) -> crate::ItemForeignMod where F: Fold + ?Sized, { - ItemForeignMod { + crate::ItemForeignMod { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), unsafety: node.unsafety, abi: f.fold_abi(node.abi), @@ -1957,11 +2410,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_item_impl<F>(f: &mut F, node: ItemImpl) -> ItemImpl +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_impl<F>(f: &mut F, node: crate::ItemImpl) -> crate::ItemImpl where F: Fold + ?Sized, { - ItemImpl { + crate::ItemImpl { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), defaultness: node.defaultness, unsafety: node.unsafety, @@ -1974,11 +2428,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_item_macro<F>(f: &mut F, node: ItemMacro) -> ItemMacro +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_macro<F>(f: &mut F, node: crate::ItemMacro) -> crate::ItemMacro where F: Fold + ?Sized, { - ItemMacro { + crate::ItemMacro { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), ident: (node.ident).map(|it| f.fold_ident(it)), mac: f.fold_macro(node.mac), @@ -1986,11 +2441,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_item_mod<F>(f: &mut F, node: ItemMod) -> ItemMod +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_mod<F>(f: &mut F, node: crate::ItemMod) -> crate::ItemMod where F: Fold + ?Sized, { - ItemMod { + crate::ItemMod { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), unsafety: node.unsafety, @@ -2002,11 +2458,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_item_static<F>(f: &mut F, node: ItemStatic) -> ItemStatic +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_static<F>(f: &mut F, node: crate::ItemStatic) -> crate::ItemStatic where F: Fold + ?Sized, { - ItemStatic { + crate::ItemStatic { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), static_token: node.static_token, @@ -2020,11 +2477,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_item_struct<F>(f: &mut F, node: ItemStruct) -> ItemStruct +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_struct<F>(f: &mut F, node: crate::ItemStruct) -> crate::ItemStruct where F: Fold + ?Sized, { - ItemStruct { + crate::ItemStruct { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), struct_token: node.struct_token, @@ -2035,11 +2493,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_item_trait<F>(f: &mut F, node: ItemTrait) -> ItemTrait +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_trait<F>(f: &mut F, node: crate::ItemTrait) -> crate::ItemTrait where F: Fold + ?Sized, { - ItemTrait { + crate::ItemTrait { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), unsafety: node.unsafety, @@ -2058,11 +2517,15 @@ where } } #[cfg(feature = "full")] -pub fn fold_item_trait_alias<F>(f: &mut F, node: ItemTraitAlias) -> ItemTraitAlias +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_trait_alias<F>( + f: &mut F, + node: crate::ItemTraitAlias, +) -> crate::ItemTraitAlias where F: Fold + ?Sized, { - ItemTraitAlias { + crate::ItemTraitAlias { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), trait_token: node.trait_token, @@ -2074,11 +2537,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_item_type<F>(f: &mut F, node: ItemType) -> ItemType +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_type<F>(f: &mut F, node: crate::ItemType) -> crate::ItemType where F: Fold + ?Sized, { - ItemType { + crate::ItemType { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), type_token: node.type_token, @@ -2090,11 +2554,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_item_union<F>(f: &mut F, node: ItemUnion) -> ItemUnion +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_union<F>(f: &mut F, node: crate::ItemUnion) -> crate::ItemUnion where F: Fold + ?Sized, { - ItemUnion { + crate::ItemUnion { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), union_token: node.union_token, @@ -2104,11 +2569,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_item_use<F>(f: &mut F, node: ItemUse) -> ItemUse +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_item_use<F>(f: &mut F, node: crate::ItemUse) -> crate::ItemUse where F: Fold + ?Sized, { - ItemUse { + crate::ItemUse { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), vis: f.fold_visibility(node.vis), use_token: node.use_token, @@ -2118,61 +2584,68 @@ where } } #[cfg(feature = "full")] -pub fn fold_label<F>(f: &mut F, node: Label) -> Label +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_label<F>(f: &mut F, node: crate::Label) -> crate::Label where F: Fold + ?Sized, { - Label { + crate::Label { name: f.fold_lifetime(node.name), colon_token: node.colon_token, } } -pub fn fold_lifetime<F>(f: &mut F, node: Lifetime) -> Lifetime +pub fn fold_lifetime<F>(f: &mut F, node: crate::Lifetime) -> crate::Lifetime where F: Fold + ?Sized, { - Lifetime { + crate::Lifetime { apostrophe: f.fold_span(node.apostrophe), ident: f.fold_ident(node.ident), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_lifetime_param<F>(f: &mut F, node: LifetimeParam) -> LifetimeParam +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_lifetime_param<F>( + f: &mut F, + node: crate::LifetimeParam, +) -> crate::LifetimeParam where F: Fold + ?Sized, { - LifetimeParam { + crate::LifetimeParam { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), lifetime: f.fold_lifetime(node.lifetime), colon_token: node.colon_token, bounds: FoldHelper::lift(node.bounds, |it| f.fold_lifetime(it)), } } -pub fn fold_lit<F>(f: &mut F, node: Lit) -> Lit +pub fn fold_lit<F>(f: &mut F, node: crate::Lit) -> crate::Lit where F: Fold + ?Sized, { match node { - Lit::Str(_binding_0) => Lit::Str(f.fold_lit_str(_binding_0)), - Lit::ByteStr(_binding_0) => Lit::ByteStr(f.fold_lit_byte_str(_binding_0)), - Lit::Byte(_binding_0) => Lit::Byte(f.fold_lit_byte(_binding_0)), - Lit::Char(_binding_0) => Lit::Char(f.fold_lit_char(_binding_0)), - Lit::Int(_binding_0) => Lit::Int(f.fold_lit_int(_binding_0)), - Lit::Float(_binding_0) => Lit::Float(f.fold_lit_float(_binding_0)), - Lit::Bool(_binding_0) => Lit::Bool(f.fold_lit_bool(_binding_0)), - Lit::Verbatim(_binding_0) => Lit::Verbatim(_binding_0), + crate::Lit::Str(_binding_0) => crate::Lit::Str(f.fold_lit_str(_binding_0)), + crate::Lit::ByteStr(_binding_0) => { + crate::Lit::ByteStr(f.fold_lit_byte_str(_binding_0)) + } + crate::Lit::Byte(_binding_0) => crate::Lit::Byte(f.fold_lit_byte(_binding_0)), + crate::Lit::Char(_binding_0) => crate::Lit::Char(f.fold_lit_char(_binding_0)), + crate::Lit::Int(_binding_0) => crate::Lit::Int(f.fold_lit_int(_binding_0)), + crate::Lit::Float(_binding_0) => crate::Lit::Float(f.fold_lit_float(_binding_0)), + crate::Lit::Bool(_binding_0) => crate::Lit::Bool(f.fold_lit_bool(_binding_0)), + crate::Lit::Verbatim(_binding_0) => crate::Lit::Verbatim(_binding_0), } } -pub fn fold_lit_bool<F>(f: &mut F, node: LitBool) -> LitBool +pub fn fold_lit_bool<F>(f: &mut F, node: crate::LitBool) -> crate::LitBool where F: Fold + ?Sized, { - LitBool { + crate::LitBool { value: node.value, span: f.fold_span(node.span), } } -pub fn fold_lit_byte<F>(f: &mut F, node: LitByte) -> LitByte +pub fn fold_lit_byte<F>(f: &mut F, node: crate::LitByte) -> crate::LitByte where F: Fold + ?Sized, { @@ -2181,7 +2654,7 @@ where node.set_span(span); node } -pub fn fold_lit_byte_str<F>(f: &mut F, node: LitByteStr) -> LitByteStr +pub fn fold_lit_byte_str<F>(f: &mut F, node: crate::LitByteStr) -> crate::LitByteStr where F: Fold + ?Sized, { @@ -2190,7 +2663,7 @@ where node.set_span(span); node } -pub fn fold_lit_char<F>(f: &mut F, node: LitChar) -> LitChar +pub fn fold_lit_char<F>(f: &mut F, node: crate::LitChar) -> crate::LitChar where F: Fold + ?Sized, { @@ -2199,7 +2672,7 @@ where node.set_span(span); node } -pub fn fold_lit_float<F>(f: &mut F, node: LitFloat) -> LitFloat +pub fn fold_lit_float<F>(f: &mut F, node: crate::LitFloat) -> crate::LitFloat where F: Fold + ?Sized, { @@ -2208,7 +2681,7 @@ where node.set_span(span); node } -pub fn fold_lit_int<F>(f: &mut F, node: LitInt) -> LitInt +pub fn fold_lit_int<F>(f: &mut F, node: crate::LitInt) -> crate::LitInt where F: Fold + ?Sized, { @@ -2217,7 +2690,7 @@ where node.set_span(span); node } -pub fn fold_lit_str<F>(f: &mut F, node: LitStr) -> LitStr +pub fn fold_lit_str<F>(f: &mut F, node: crate::LitStr) -> crate::LitStr where F: Fold + ?Sized, { @@ -2227,11 +2700,12 @@ where node } #[cfg(feature = "full")] -pub fn fold_local<F>(f: &mut F, node: Local) -> Local +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_local<F>(f: &mut F, node: crate::Local) -> crate::Local where F: Fold + ?Sized, { - Local { + crate::Local { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), let_token: node.let_token, pat: f.fold_pat(node.pat), @@ -2240,22 +2714,24 @@ where } } #[cfg(feature = "full")] -pub fn fold_local_init<F>(f: &mut F, node: LocalInit) -> LocalInit +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_local_init<F>(f: &mut F, node: crate::LocalInit) -> crate::LocalInit where F: Fold + ?Sized, { - LocalInit { + crate::LocalInit { eq_token: node.eq_token, expr: Box::new(f.fold_expr(*node.expr)), diverge: (node.diverge).map(|it| ((it).0, Box::new(f.fold_expr(*(it).1)))), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_macro<F>(f: &mut F, node: Macro) -> Macro +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_macro<F>(f: &mut F, node: crate::Macro) -> crate::Macro where F: Fold + ?Sized, { - Macro { + crate::Macro { path: f.fold_path(node.path), bang_token: node.bang_token, delimiter: f.fold_macro_delimiter(node.delimiter), @@ -2263,108 +2739,136 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_macro_delimiter<F>(f: &mut F, node: MacroDelimiter) -> MacroDelimiter +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_macro_delimiter<F>( + f: &mut F, + node: crate::MacroDelimiter, +) -> crate::MacroDelimiter where F: Fold + ?Sized, { match node { - MacroDelimiter::Paren(_binding_0) => MacroDelimiter::Paren(_binding_0), - MacroDelimiter::Brace(_binding_0) => MacroDelimiter::Brace(_binding_0), - MacroDelimiter::Bracket(_binding_0) => MacroDelimiter::Bracket(_binding_0), + crate::MacroDelimiter::Paren(_binding_0) => { + crate::MacroDelimiter::Paren(_binding_0) + } + crate::MacroDelimiter::Brace(_binding_0) => { + crate::MacroDelimiter::Brace(_binding_0) + } + crate::MacroDelimiter::Bracket(_binding_0) => { + crate::MacroDelimiter::Bracket(_binding_0) + } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_member<F>(f: &mut F, node: Member) -> Member +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_member<F>(f: &mut F, node: crate::Member) -> crate::Member where F: Fold + ?Sized, { match node { - Member::Named(_binding_0) => Member::Named(f.fold_ident(_binding_0)), - Member::Unnamed(_binding_0) => Member::Unnamed(f.fold_index(_binding_0)), + crate::Member::Named(_binding_0) => { + crate::Member::Named(f.fold_ident(_binding_0)) + } + crate::Member::Unnamed(_binding_0) => { + crate::Member::Unnamed(f.fold_index(_binding_0)) + } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_meta<F>(f: &mut F, node: Meta) -> Meta +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_meta<F>(f: &mut F, node: crate::Meta) -> crate::Meta where F: Fold + ?Sized, { match node { - Meta::Path(_binding_0) => Meta::Path(f.fold_path(_binding_0)), - Meta::List(_binding_0) => Meta::List(f.fold_meta_list(_binding_0)), - Meta::NameValue(_binding_0) => { - Meta::NameValue(f.fold_meta_name_value(_binding_0)) + crate::Meta::Path(_binding_0) => crate::Meta::Path(f.fold_path(_binding_0)), + crate::Meta::List(_binding_0) => crate::Meta::List(f.fold_meta_list(_binding_0)), + crate::Meta::NameValue(_binding_0) => { + crate::Meta::NameValue(f.fold_meta_name_value(_binding_0)) } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_meta_list<F>(f: &mut F, node: MetaList) -> MetaList +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_meta_list<F>(f: &mut F, node: crate::MetaList) -> crate::MetaList where F: Fold + ?Sized, { - MetaList { + crate::MetaList { path: f.fold_path(node.path), delimiter: f.fold_macro_delimiter(node.delimiter), tokens: node.tokens, } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_meta_name_value<F>(f: &mut F, node: MetaNameValue) -> MetaNameValue +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_meta_name_value<F>( + f: &mut F, + node: crate::MetaNameValue, +) -> crate::MetaNameValue where F: Fold + ?Sized, { - MetaNameValue { + crate::MetaNameValue { path: f.fold_path(node.path), eq_token: node.eq_token, value: f.fold_expr(node.value), } } #[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] pub fn fold_parenthesized_generic_arguments<F>( f: &mut F, - node: ParenthesizedGenericArguments, -) -> ParenthesizedGenericArguments + node: crate::ParenthesizedGenericArguments, +) -> crate::ParenthesizedGenericArguments where F: Fold + ?Sized, { - ParenthesizedGenericArguments { + crate::ParenthesizedGenericArguments { paren_token: node.paren_token, inputs: FoldHelper::lift(node.inputs, |it| f.fold_type(it)), output: f.fold_return_type(node.output), } } #[cfg(feature = "full")] -pub fn fold_pat<F>(f: &mut F, node: Pat) -> Pat +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat<F>(f: &mut F, node: crate::Pat) -> crate::Pat where F: Fold + ?Sized, { match node { - Pat::Const(_binding_0) => Pat::Const(f.fold_expr_const(_binding_0)), - Pat::Ident(_binding_0) => Pat::Ident(f.fold_pat_ident(_binding_0)), - Pat::Lit(_binding_0) => Pat::Lit(f.fold_expr_lit(_binding_0)), - Pat::Macro(_binding_0) => Pat::Macro(f.fold_expr_macro(_binding_0)), - Pat::Or(_binding_0) => Pat::Or(f.fold_pat_or(_binding_0)), - Pat::Paren(_binding_0) => Pat::Paren(f.fold_pat_paren(_binding_0)), - Pat::Path(_binding_0) => Pat::Path(f.fold_expr_path(_binding_0)), - Pat::Range(_binding_0) => Pat::Range(f.fold_expr_range(_binding_0)), - Pat::Reference(_binding_0) => Pat::Reference(f.fold_pat_reference(_binding_0)), - Pat::Rest(_binding_0) => Pat::Rest(f.fold_pat_rest(_binding_0)), - Pat::Slice(_binding_0) => Pat::Slice(f.fold_pat_slice(_binding_0)), - Pat::Struct(_binding_0) => Pat::Struct(f.fold_pat_struct(_binding_0)), - Pat::Tuple(_binding_0) => Pat::Tuple(f.fold_pat_tuple(_binding_0)), - Pat::TupleStruct(_binding_0) => { - Pat::TupleStruct(f.fold_pat_tuple_struct(_binding_0)) + crate::Pat::Const(_binding_0) => crate::Pat::Const(f.fold_expr_const(_binding_0)), + crate::Pat::Ident(_binding_0) => crate::Pat::Ident(f.fold_pat_ident(_binding_0)), + crate::Pat::Lit(_binding_0) => crate::Pat::Lit(f.fold_expr_lit(_binding_0)), + crate::Pat::Macro(_binding_0) => crate::Pat::Macro(f.fold_expr_macro(_binding_0)), + crate::Pat::Or(_binding_0) => crate::Pat::Or(f.fold_pat_or(_binding_0)), + crate::Pat::Paren(_binding_0) => crate::Pat::Paren(f.fold_pat_paren(_binding_0)), + crate::Pat::Path(_binding_0) => crate::Pat::Path(f.fold_expr_path(_binding_0)), + crate::Pat::Range(_binding_0) => crate::Pat::Range(f.fold_expr_range(_binding_0)), + crate::Pat::Reference(_binding_0) => { + crate::Pat::Reference(f.fold_pat_reference(_binding_0)) + } + crate::Pat::Rest(_binding_0) => crate::Pat::Rest(f.fold_pat_rest(_binding_0)), + crate::Pat::Slice(_binding_0) => crate::Pat::Slice(f.fold_pat_slice(_binding_0)), + crate::Pat::Struct(_binding_0) => { + crate::Pat::Struct(f.fold_pat_struct(_binding_0)) } - Pat::Type(_binding_0) => Pat::Type(f.fold_pat_type(_binding_0)), - Pat::Verbatim(_binding_0) => Pat::Verbatim(_binding_0), - Pat::Wild(_binding_0) => Pat::Wild(f.fold_pat_wild(_binding_0)), + crate::Pat::Tuple(_binding_0) => crate::Pat::Tuple(f.fold_pat_tuple(_binding_0)), + crate::Pat::TupleStruct(_binding_0) => { + crate::Pat::TupleStruct(f.fold_pat_tuple_struct(_binding_0)) + } + crate::Pat::Type(_binding_0) => crate::Pat::Type(f.fold_pat_type(_binding_0)), + crate::Pat::Verbatim(_binding_0) => crate::Pat::Verbatim(_binding_0), + crate::Pat::Wild(_binding_0) => crate::Pat::Wild(f.fold_pat_wild(_binding_0)), } } #[cfg(feature = "full")] -pub fn fold_pat_ident<F>(f: &mut F, node: PatIdent) -> PatIdent +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_ident<F>(f: &mut F, node: crate::PatIdent) -> crate::PatIdent where F: Fold + ?Sized, { - PatIdent { + crate::PatIdent { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), by_ref: node.by_ref, mutability: node.mutability, @@ -2373,33 +2877,36 @@ where } } #[cfg(feature = "full")] -pub fn fold_pat_or<F>(f: &mut F, node: PatOr) -> PatOr +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_or<F>(f: &mut F, node: crate::PatOr) -> crate::PatOr where F: Fold + ?Sized, { - PatOr { + crate::PatOr { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), leading_vert: node.leading_vert, cases: FoldHelper::lift(node.cases, |it| f.fold_pat(it)), } } #[cfg(feature = "full")] -pub fn fold_pat_paren<F>(f: &mut F, node: PatParen) -> PatParen +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_paren<F>(f: &mut F, node: crate::PatParen) -> crate::PatParen where F: Fold + ?Sized, { - PatParen { + crate::PatParen { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), paren_token: node.paren_token, pat: Box::new(f.fold_pat(*node.pat)), } } #[cfg(feature = "full")] -pub fn fold_pat_reference<F>(f: &mut F, node: PatReference) -> PatReference +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_reference<F>(f: &mut F, node: crate::PatReference) -> crate::PatReference where F: Fold + ?Sized, { - PatReference { + crate::PatReference { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), and_token: node.and_token, mutability: node.mutability, @@ -2407,32 +2914,35 @@ where } } #[cfg(feature = "full")] -pub fn fold_pat_rest<F>(f: &mut F, node: PatRest) -> PatRest +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_rest<F>(f: &mut F, node: crate::PatRest) -> crate::PatRest where F: Fold + ?Sized, { - PatRest { + crate::PatRest { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), dot2_token: node.dot2_token, } } #[cfg(feature = "full")] -pub fn fold_pat_slice<F>(f: &mut F, node: PatSlice) -> PatSlice +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_slice<F>(f: &mut F, node: crate::PatSlice) -> crate::PatSlice where F: Fold + ?Sized, { - PatSlice { + crate::PatSlice { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), bracket_token: node.bracket_token, elems: FoldHelper::lift(node.elems, |it| f.fold_pat(it)), } } #[cfg(feature = "full")] -pub fn fold_pat_struct<F>(f: &mut F, node: PatStruct) -> PatStruct +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_struct<F>(f: &mut F, node: crate::PatStruct) -> crate::PatStruct where F: Fold + ?Sized, { - PatStruct { + crate::PatStruct { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), qself: (node.qself).map(|it| f.fold_qself(it)), path: f.fold_path(node.path), @@ -2442,22 +2952,27 @@ where } } #[cfg(feature = "full")] -pub fn fold_pat_tuple<F>(f: &mut F, node: PatTuple) -> PatTuple +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_tuple<F>(f: &mut F, node: crate::PatTuple) -> crate::PatTuple where F: Fold + ?Sized, { - PatTuple { + crate::PatTuple { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), paren_token: node.paren_token, elems: FoldHelper::lift(node.elems, |it| f.fold_pat(it)), } } #[cfg(feature = "full")] -pub fn fold_pat_tuple_struct<F>(f: &mut F, node: PatTupleStruct) -> PatTupleStruct +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_tuple_struct<F>( + f: &mut F, + node: crate::PatTupleStruct, +) -> crate::PatTupleStruct where F: Fold + ?Sized, { - PatTupleStruct { + crate::PatTupleStruct { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), qself: (node.qself).map(|it| f.fold_qself(it)), path: f.fold_path(node.path), @@ -2466,11 +2981,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_pat_type<F>(f: &mut F, node: PatType) -> PatType +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_type<F>(f: &mut F, node: crate::PatType) -> crate::PatType where F: Fold + ?Sized, { - PatType { + crate::PatType { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), pat: Box::new(f.fold_pat(*node.pat)), colon_token: node.colon_token, @@ -2478,74 +2994,86 @@ where } } #[cfg(feature = "full")] -pub fn fold_pat_wild<F>(f: &mut F, node: PatWild) -> PatWild +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_pat_wild<F>(f: &mut F, node: crate::PatWild) -> crate::PatWild where F: Fold + ?Sized, { - PatWild { + crate::PatWild { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), underscore_token: node.underscore_token, } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_path<F>(f: &mut F, node: Path) -> Path +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_path<F>(f: &mut F, node: crate::Path) -> crate::Path where F: Fold + ?Sized, { - Path { + crate::Path { leading_colon: node.leading_colon, segments: FoldHelper::lift(node.segments, |it| f.fold_path_segment(it)), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_path_arguments<F>(f: &mut F, node: PathArguments) -> PathArguments +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_path_arguments<F>( + f: &mut F, + node: crate::PathArguments, +) -> crate::PathArguments where F: Fold + ?Sized, { match node { - PathArguments::None => PathArguments::None, - PathArguments::AngleBracketed(_binding_0) => { - PathArguments::AngleBracketed( + crate::PathArguments::None => crate::PathArguments::None, + crate::PathArguments::AngleBracketed(_binding_0) => { + crate::PathArguments::AngleBracketed( f.fold_angle_bracketed_generic_arguments(_binding_0), ) } - PathArguments::Parenthesized(_binding_0) => { - PathArguments::Parenthesized( + crate::PathArguments::Parenthesized(_binding_0) => { + crate::PathArguments::Parenthesized( f.fold_parenthesized_generic_arguments(_binding_0), ) } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_path_segment<F>(f: &mut F, node: PathSegment) -> PathSegment +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_path_segment<F>(f: &mut F, node: crate::PathSegment) -> crate::PathSegment where F: Fold + ?Sized, { - PathSegment { + crate::PathSegment { ident: f.fold_ident(node.ident), arguments: f.fold_path_arguments(node.arguments), } } #[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] pub fn fold_predicate_lifetime<F>( f: &mut F, - node: PredicateLifetime, -) -> PredicateLifetime + node: crate::PredicateLifetime, +) -> crate::PredicateLifetime where F: Fold + ?Sized, { - PredicateLifetime { + crate::PredicateLifetime { lifetime: f.fold_lifetime(node.lifetime), colon_token: node.colon_token, bounds: FoldHelper::lift(node.bounds, |it| f.fold_lifetime(it)), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_predicate_type<F>(f: &mut F, node: PredicateType) -> PredicateType +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_predicate_type<F>( + f: &mut F, + node: crate::PredicateType, +) -> crate::PredicateType where F: Fold + ?Sized, { - PredicateType { + crate::PredicateType { lifetimes: (node.lifetimes).map(|it| f.fold_bound_lifetimes(it)), bounded_ty: f.fold_type(node.bounded_ty), colon_token: node.colon_token, @@ -2553,11 +3081,12 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_qself<F>(f: &mut F, node: QSelf) -> QSelf +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_qself<F>(f: &mut F, node: crate::QSelf) -> crate::QSelf where F: Fold + ?Sized, { - QSelf { + crate::QSelf { lt_token: node.lt_token, ty: Box::new(f.fold_type(*node.ty)), position: node.position, @@ -2566,21 +3095,25 @@ where } } #[cfg(feature = "full")] -pub fn fold_range_limits<F>(f: &mut F, node: RangeLimits) -> RangeLimits +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_range_limits<F>(f: &mut F, node: crate::RangeLimits) -> crate::RangeLimits where F: Fold + ?Sized, { match node { - RangeLimits::HalfOpen(_binding_0) => RangeLimits::HalfOpen(_binding_0), - RangeLimits::Closed(_binding_0) => RangeLimits::Closed(_binding_0), + crate::RangeLimits::HalfOpen(_binding_0) => { + crate::RangeLimits::HalfOpen(_binding_0) + } + crate::RangeLimits::Closed(_binding_0) => crate::RangeLimits::Closed(_binding_0), } } #[cfg(feature = "full")] -pub fn fold_receiver<F>(f: &mut F, node: Receiver) -> Receiver +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_receiver<F>(f: &mut F, node: crate::Receiver) -> crate::Receiver where F: Fold + ?Sized, { - Receiver { + crate::Receiver { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), reference: (node.reference) .map(|it| ((it).0, ((it).1).map(|it| f.fold_lifetime(it)))), @@ -2591,23 +3124,25 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_return_type<F>(f: &mut F, node: ReturnType) -> ReturnType +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_return_type<F>(f: &mut F, node: crate::ReturnType) -> crate::ReturnType where F: Fold + ?Sized, { match node { - ReturnType::Default => ReturnType::Default, - ReturnType::Type(_binding_0, _binding_1) => { - ReturnType::Type(_binding_0, Box::new(f.fold_type(*_binding_1))) + crate::ReturnType::Default => crate::ReturnType::Default, + crate::ReturnType::Type(_binding_0, _binding_1) => { + crate::ReturnType::Type(_binding_0, Box::new(f.fold_type(*_binding_1))) } } } #[cfg(feature = "full")] -pub fn fold_signature<F>(f: &mut F, node: Signature) -> Signature +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_signature<F>(f: &mut F, node: crate::Signature) -> crate::Signature where F: Fold + ?Sized, { - Signature { + crate::Signature { constness: node.constness, asyncness: node.asyncness, unsafety: node.unsafety, @@ -2621,53 +3156,64 @@ where output: f.fold_return_type(node.output), } } -pub fn fold_span<F>(f: &mut F, node: Span) -> Span +pub fn fold_span<F>(f: &mut F, node: proc_macro2::Span) -> proc_macro2::Span where F: Fold + ?Sized, { node } #[cfg(feature = "full")] -pub fn fold_static_mutability<F>(f: &mut F, node: StaticMutability) -> StaticMutability +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_static_mutability<F>( + f: &mut F, + node: crate::StaticMutability, +) -> crate::StaticMutability where F: Fold + ?Sized, { match node { - StaticMutability::Mut(_binding_0) => StaticMutability::Mut(_binding_0), - StaticMutability::None => StaticMutability::None, + crate::StaticMutability::Mut(_binding_0) => { + crate::StaticMutability::Mut(_binding_0) + } + crate::StaticMutability::None => crate::StaticMutability::None, } } #[cfg(feature = "full")] -pub fn fold_stmt<F>(f: &mut F, node: Stmt) -> Stmt +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_stmt<F>(f: &mut F, node: crate::Stmt) -> crate::Stmt where F: Fold + ?Sized, { match node { - Stmt::Local(_binding_0) => Stmt::Local(f.fold_local(_binding_0)), - Stmt::Item(_binding_0) => Stmt::Item(f.fold_item(_binding_0)), - Stmt::Expr(_binding_0, _binding_1) => { - Stmt::Expr(f.fold_expr(_binding_0), _binding_1) + crate::Stmt::Local(_binding_0) => crate::Stmt::Local(f.fold_local(_binding_0)), + crate::Stmt::Item(_binding_0) => crate::Stmt::Item(f.fold_item(_binding_0)), + crate::Stmt::Expr(_binding_0, _binding_1) => { + crate::Stmt::Expr(f.fold_expr(_binding_0), _binding_1) + } + crate::Stmt::Macro(_binding_0) => { + crate::Stmt::Macro(f.fold_stmt_macro(_binding_0)) } - Stmt::Macro(_binding_0) => Stmt::Macro(f.fold_stmt_macro(_binding_0)), } } #[cfg(feature = "full")] -pub fn fold_stmt_macro<F>(f: &mut F, node: StmtMacro) -> StmtMacro +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_stmt_macro<F>(f: &mut F, node: crate::StmtMacro) -> crate::StmtMacro where F: Fold + ?Sized, { - StmtMacro { + crate::StmtMacro { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), mac: f.fold_macro(node.mac), semi_token: node.semi_token, } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_trait_bound<F>(f: &mut F, node: TraitBound) -> TraitBound +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_trait_bound<F>(f: &mut F, node: crate::TraitBound) -> crate::TraitBound where F: Fold + ?Sized, { - TraitBound { + crate::TraitBound { paren_token: node.paren_token, modifier: f.fold_trait_bound_modifier(node.modifier), lifetimes: (node.lifetimes).map(|it| f.fold_bound_lifetimes(it)), @@ -2675,43 +3221,53 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] pub fn fold_trait_bound_modifier<F>( f: &mut F, - node: TraitBoundModifier, -) -> TraitBoundModifier + node: crate::TraitBoundModifier, +) -> crate::TraitBoundModifier where F: Fold + ?Sized, { match node { - TraitBoundModifier::None => TraitBoundModifier::None, - TraitBoundModifier::Maybe(_binding_0) => TraitBoundModifier::Maybe(_binding_0), + crate::TraitBoundModifier::None => crate::TraitBoundModifier::None, + crate::TraitBoundModifier::Maybe(_binding_0) => { + crate::TraitBoundModifier::Maybe(_binding_0) + } } } #[cfg(feature = "full")] -pub fn fold_trait_item<F>(f: &mut F, node: TraitItem) -> TraitItem +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_trait_item<F>(f: &mut F, node: crate::TraitItem) -> crate::TraitItem where F: Fold + ?Sized, { match node { - TraitItem::Const(_binding_0) => { - TraitItem::Const(f.fold_trait_item_const(_binding_0)) + crate::TraitItem::Const(_binding_0) => { + crate::TraitItem::Const(f.fold_trait_item_const(_binding_0)) + } + crate::TraitItem::Fn(_binding_0) => { + crate::TraitItem::Fn(f.fold_trait_item_fn(_binding_0)) } - TraitItem::Fn(_binding_0) => TraitItem::Fn(f.fold_trait_item_fn(_binding_0)), - TraitItem::Type(_binding_0) => { - TraitItem::Type(f.fold_trait_item_type(_binding_0)) + crate::TraitItem::Type(_binding_0) => { + crate::TraitItem::Type(f.fold_trait_item_type(_binding_0)) } - TraitItem::Macro(_binding_0) => { - TraitItem::Macro(f.fold_trait_item_macro(_binding_0)) + crate::TraitItem::Macro(_binding_0) => { + crate::TraitItem::Macro(f.fold_trait_item_macro(_binding_0)) } - TraitItem::Verbatim(_binding_0) => TraitItem::Verbatim(_binding_0), + crate::TraitItem::Verbatim(_binding_0) => crate::TraitItem::Verbatim(_binding_0), } } #[cfg(feature = "full")] -pub fn fold_trait_item_const<F>(f: &mut F, node: TraitItemConst) -> TraitItemConst +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_trait_item_const<F>( + f: &mut F, + node: crate::TraitItemConst, +) -> crate::TraitItemConst where F: Fold + ?Sized, { - TraitItemConst { + crate::TraitItemConst { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), const_token: node.const_token, ident: f.fold_ident(node.ident), @@ -2723,11 +3279,12 @@ where } } #[cfg(feature = "full")] -pub fn fold_trait_item_fn<F>(f: &mut F, node: TraitItemFn) -> TraitItemFn +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_trait_item_fn<F>(f: &mut F, node: crate::TraitItemFn) -> crate::TraitItemFn where F: Fold + ?Sized, { - TraitItemFn { + crate::TraitItemFn { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), sig: f.fold_signature(node.sig), default: (node.default).map(|it| f.fold_block(it)), @@ -2735,22 +3292,30 @@ where } } #[cfg(feature = "full")] -pub fn fold_trait_item_macro<F>(f: &mut F, node: TraitItemMacro) -> TraitItemMacro +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_trait_item_macro<F>( + f: &mut F, + node: crate::TraitItemMacro, +) -> crate::TraitItemMacro where F: Fold + ?Sized, { - TraitItemMacro { + crate::TraitItemMacro { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), mac: f.fold_macro(node.mac), semi_token: node.semi_token, } } #[cfg(feature = "full")] -pub fn fold_trait_item_type<F>(f: &mut F, node: TraitItemType) -> TraitItemType +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_trait_item_type<F>( + f: &mut F, + node: crate::TraitItemType, +) -> crate::TraitItemType where F: Fold + ?Sized, { - TraitItemType { + crate::TraitItemType { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), type_token: node.type_token, ident: f.fold_ident(node.ident), @@ -2762,38 +3327,60 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type<F>(f: &mut F, node: Type) -> Type +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type<F>(f: &mut F, node: crate::Type) -> crate::Type where F: Fold + ?Sized, { match node { - Type::Array(_binding_0) => Type::Array(f.fold_type_array(_binding_0)), - Type::BareFn(_binding_0) => Type::BareFn(f.fold_type_bare_fn(_binding_0)), - Type::Group(_binding_0) => Type::Group(f.fold_type_group(_binding_0)), - Type::ImplTrait(_binding_0) => { - Type::ImplTrait(f.fold_type_impl_trait(_binding_0)) + crate::Type::Array(_binding_0) => { + crate::Type::Array(f.fold_type_array(_binding_0)) + } + crate::Type::BareFn(_binding_0) => { + crate::Type::BareFn(f.fold_type_bare_fn(_binding_0)) + } + crate::Type::Group(_binding_0) => { + crate::Type::Group(f.fold_type_group(_binding_0)) + } + crate::Type::ImplTrait(_binding_0) => { + crate::Type::ImplTrait(f.fold_type_impl_trait(_binding_0)) } - Type::Infer(_binding_0) => Type::Infer(f.fold_type_infer(_binding_0)), - Type::Macro(_binding_0) => Type::Macro(f.fold_type_macro(_binding_0)), - Type::Never(_binding_0) => Type::Never(f.fold_type_never(_binding_0)), - Type::Paren(_binding_0) => Type::Paren(f.fold_type_paren(_binding_0)), - Type::Path(_binding_0) => Type::Path(f.fold_type_path(_binding_0)), - Type::Ptr(_binding_0) => Type::Ptr(f.fold_type_ptr(_binding_0)), - Type::Reference(_binding_0) => Type::Reference(f.fold_type_reference(_binding_0)), - Type::Slice(_binding_0) => Type::Slice(f.fold_type_slice(_binding_0)), - Type::TraitObject(_binding_0) => { - Type::TraitObject(f.fold_type_trait_object(_binding_0)) + crate::Type::Infer(_binding_0) => { + crate::Type::Infer(f.fold_type_infer(_binding_0)) } - Type::Tuple(_binding_0) => Type::Tuple(f.fold_type_tuple(_binding_0)), - Type::Verbatim(_binding_0) => Type::Verbatim(_binding_0), + crate::Type::Macro(_binding_0) => { + crate::Type::Macro(f.fold_type_macro(_binding_0)) + } + crate::Type::Never(_binding_0) => { + crate::Type::Never(f.fold_type_never(_binding_0)) + } + crate::Type::Paren(_binding_0) => { + crate::Type::Paren(f.fold_type_paren(_binding_0)) + } + crate::Type::Path(_binding_0) => crate::Type::Path(f.fold_type_path(_binding_0)), + crate::Type::Ptr(_binding_0) => crate::Type::Ptr(f.fold_type_ptr(_binding_0)), + crate::Type::Reference(_binding_0) => { + crate::Type::Reference(f.fold_type_reference(_binding_0)) + } + crate::Type::Slice(_binding_0) => { + crate::Type::Slice(f.fold_type_slice(_binding_0)) + } + crate::Type::TraitObject(_binding_0) => { + crate::Type::TraitObject(f.fold_type_trait_object(_binding_0)) + } + crate::Type::Tuple(_binding_0) => { + crate::Type::Tuple(f.fold_type_tuple(_binding_0)) + } + crate::Type::Verbatim(_binding_0) => crate::Type::Verbatim(_binding_0), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_array<F>(f: &mut F, node: TypeArray) -> TypeArray +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_array<F>(f: &mut F, node: crate::TypeArray) -> crate::TypeArray where F: Fold + ?Sized, { - TypeArray { + crate::TypeArray { bracket_token: node.bracket_token, elem: Box::new(f.fold_type(*node.elem)), semi_token: node.semi_token, @@ -2801,11 +3388,12 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_bare_fn<F>(f: &mut F, node: TypeBareFn) -> TypeBareFn +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_bare_fn<F>(f: &mut F, node: crate::TypeBareFn) -> crate::TypeBareFn where F: Fold + ?Sized, { - TypeBareFn { + crate::TypeBareFn { lifetimes: (node.lifetimes).map(|it| f.fold_bound_lifetimes(it)), unsafety: node.unsafety, abi: (node.abi).map(|it| f.fold_abi(it)), @@ -2817,58 +3405,67 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_group<F>(f: &mut F, node: TypeGroup) -> TypeGroup +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_group<F>(f: &mut F, node: crate::TypeGroup) -> crate::TypeGroup where F: Fold + ?Sized, { - TypeGroup { + crate::TypeGroup { group_token: node.group_token, elem: Box::new(f.fold_type(*node.elem)), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_impl_trait<F>(f: &mut F, node: TypeImplTrait) -> TypeImplTrait +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_impl_trait<F>( + f: &mut F, + node: crate::TypeImplTrait, +) -> crate::TypeImplTrait where F: Fold + ?Sized, { - TypeImplTrait { + crate::TypeImplTrait { impl_token: node.impl_token, bounds: FoldHelper::lift(node.bounds, |it| f.fold_type_param_bound(it)), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_infer<F>(f: &mut F, node: TypeInfer) -> TypeInfer +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_infer<F>(f: &mut F, node: crate::TypeInfer) -> crate::TypeInfer where F: Fold + ?Sized, { - TypeInfer { + crate::TypeInfer { underscore_token: node.underscore_token, } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_macro<F>(f: &mut F, node: TypeMacro) -> TypeMacro +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_macro<F>(f: &mut F, node: crate::TypeMacro) -> crate::TypeMacro where F: Fold + ?Sized, { - TypeMacro { + crate::TypeMacro { mac: f.fold_macro(node.mac), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_never<F>(f: &mut F, node: TypeNever) -> TypeNever +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_never<F>(f: &mut F, node: crate::TypeNever) -> crate::TypeNever where F: Fold + ?Sized, { - TypeNever { + crate::TypeNever { bang_token: node.bang_token, } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_param<F>(f: &mut F, node: TypeParam) -> TypeParam +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_param<F>(f: &mut F, node: crate::TypeParam) -> crate::TypeParam where F: Fold + ?Sized, { - TypeParam { + crate::TypeParam { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), ident: f.fold_ident(node.ident), colon_token: node.colon_token, @@ -2878,46 +3475,55 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_param_bound<F>(f: &mut F, node: TypeParamBound) -> TypeParamBound +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_param_bound<F>( + f: &mut F, + node: crate::TypeParamBound, +) -> crate::TypeParamBound where F: Fold + ?Sized, { match node { - TypeParamBound::Trait(_binding_0) => { - TypeParamBound::Trait(f.fold_trait_bound(_binding_0)) + crate::TypeParamBound::Trait(_binding_0) => { + crate::TypeParamBound::Trait(f.fold_trait_bound(_binding_0)) + } + crate::TypeParamBound::Lifetime(_binding_0) => { + crate::TypeParamBound::Lifetime(f.fold_lifetime(_binding_0)) } - TypeParamBound::Lifetime(_binding_0) => { - TypeParamBound::Lifetime(f.fold_lifetime(_binding_0)) + crate::TypeParamBound::Verbatim(_binding_0) => { + crate::TypeParamBound::Verbatim(_binding_0) } - TypeParamBound::Verbatim(_binding_0) => TypeParamBound::Verbatim(_binding_0), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_paren<F>(f: &mut F, node: TypeParen) -> TypeParen +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_paren<F>(f: &mut F, node: crate::TypeParen) -> crate::TypeParen where F: Fold + ?Sized, { - TypeParen { + crate::TypeParen { paren_token: node.paren_token, elem: Box::new(f.fold_type(*node.elem)), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_path<F>(f: &mut F, node: TypePath) -> TypePath +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_path<F>(f: &mut F, node: crate::TypePath) -> crate::TypePath where F: Fold + ?Sized, { - TypePath { + crate::TypePath { qself: (node.qself).map(|it| f.fold_qself(it)), path: f.fold_path(node.path), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_ptr<F>(f: &mut F, node: TypePtr) -> TypePtr +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_ptr<F>(f: &mut F, node: crate::TypePtr) -> crate::TypePtr where F: Fold + ?Sized, { - TypePtr { + crate::TypePtr { star_token: node.star_token, const_token: node.const_token, mutability: node.mutability, @@ -2925,11 +3531,15 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_reference<F>(f: &mut F, node: TypeReference) -> TypeReference +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_reference<F>( + f: &mut F, + node: crate::TypeReference, +) -> crate::TypeReference where F: Fold + ?Sized, { - TypeReference { + crate::TypeReference { and_token: node.and_token, lifetime: (node.lifetime).map(|it| f.fold_lifetime(it)), mutability: node.mutability, @@ -2937,115 +3547,139 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_slice<F>(f: &mut F, node: TypeSlice) -> TypeSlice +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_slice<F>(f: &mut F, node: crate::TypeSlice) -> crate::TypeSlice where F: Fold + ?Sized, { - TypeSlice { + crate::TypeSlice { bracket_token: node.bracket_token, elem: Box::new(f.fold_type(*node.elem)), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_trait_object<F>(f: &mut F, node: TypeTraitObject) -> TypeTraitObject +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_trait_object<F>( + f: &mut F, + node: crate::TypeTraitObject, +) -> crate::TypeTraitObject where F: Fold + ?Sized, { - TypeTraitObject { + crate::TypeTraitObject { dyn_token: node.dyn_token, bounds: FoldHelper::lift(node.bounds, |it| f.fold_type_param_bound(it)), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_type_tuple<F>(f: &mut F, node: TypeTuple) -> TypeTuple +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_type_tuple<F>(f: &mut F, node: crate::TypeTuple) -> crate::TypeTuple where F: Fold + ?Sized, { - TypeTuple { + crate::TypeTuple { paren_token: node.paren_token, elems: FoldHelper::lift(node.elems, |it| f.fold_type(it)), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_un_op<F>(f: &mut F, node: UnOp) -> UnOp +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_un_op<F>(f: &mut F, node: crate::UnOp) -> crate::UnOp where F: Fold + ?Sized, { match node { - UnOp::Deref(_binding_0) => UnOp::Deref(_binding_0), - UnOp::Not(_binding_0) => UnOp::Not(_binding_0), - UnOp::Neg(_binding_0) => UnOp::Neg(_binding_0), + crate::UnOp::Deref(_binding_0) => crate::UnOp::Deref(_binding_0), + crate::UnOp::Not(_binding_0) => crate::UnOp::Not(_binding_0), + crate::UnOp::Neg(_binding_0) => crate::UnOp::Neg(_binding_0), } } #[cfg(feature = "full")] -pub fn fold_use_glob<F>(f: &mut F, node: UseGlob) -> UseGlob +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_use_glob<F>(f: &mut F, node: crate::UseGlob) -> crate::UseGlob where F: Fold + ?Sized, { - UseGlob { + crate::UseGlob { star_token: node.star_token, } } #[cfg(feature = "full")] -pub fn fold_use_group<F>(f: &mut F, node: UseGroup) -> UseGroup +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_use_group<F>(f: &mut F, node: crate::UseGroup) -> crate::UseGroup where F: Fold + ?Sized, { - UseGroup { + crate::UseGroup { brace_token: node.brace_token, items: FoldHelper::lift(node.items, |it| f.fold_use_tree(it)), } } #[cfg(feature = "full")] -pub fn fold_use_name<F>(f: &mut F, node: UseName) -> UseName +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_use_name<F>(f: &mut F, node: crate::UseName) -> crate::UseName where F: Fold + ?Sized, { - UseName { + crate::UseName { ident: f.fold_ident(node.ident), } } #[cfg(feature = "full")] -pub fn fold_use_path<F>(f: &mut F, node: UsePath) -> UsePath +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_use_path<F>(f: &mut F, node: crate::UsePath) -> crate::UsePath where F: Fold + ?Sized, { - UsePath { + crate::UsePath { ident: f.fold_ident(node.ident), colon2_token: node.colon2_token, tree: Box::new(f.fold_use_tree(*node.tree)), } } #[cfg(feature = "full")] -pub fn fold_use_rename<F>(f: &mut F, node: UseRename) -> UseRename +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_use_rename<F>(f: &mut F, node: crate::UseRename) -> crate::UseRename where F: Fold + ?Sized, { - UseRename { + crate::UseRename { ident: f.fold_ident(node.ident), as_token: node.as_token, rename: f.fold_ident(node.rename), } } #[cfg(feature = "full")] -pub fn fold_use_tree<F>(f: &mut F, node: UseTree) -> UseTree +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_use_tree<F>(f: &mut F, node: crate::UseTree) -> crate::UseTree where F: Fold + ?Sized, { match node { - UseTree::Path(_binding_0) => UseTree::Path(f.fold_use_path(_binding_0)), - UseTree::Name(_binding_0) => UseTree::Name(f.fold_use_name(_binding_0)), - UseTree::Rename(_binding_0) => UseTree::Rename(f.fold_use_rename(_binding_0)), - UseTree::Glob(_binding_0) => UseTree::Glob(f.fold_use_glob(_binding_0)), - UseTree::Group(_binding_0) => UseTree::Group(f.fold_use_group(_binding_0)), + crate::UseTree::Path(_binding_0) => { + crate::UseTree::Path(f.fold_use_path(_binding_0)) + } + crate::UseTree::Name(_binding_0) => { + crate::UseTree::Name(f.fold_use_name(_binding_0)) + } + crate::UseTree::Rename(_binding_0) => { + crate::UseTree::Rename(f.fold_use_rename(_binding_0)) + } + crate::UseTree::Glob(_binding_0) => { + crate::UseTree::Glob(f.fold_use_glob(_binding_0)) + } + crate::UseTree::Group(_binding_0) => { + crate::UseTree::Group(f.fold_use_group(_binding_0)) + } } } #[cfg(feature = "full")] -pub fn fold_variadic<F>(f: &mut F, node: Variadic) -> Variadic +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn fold_variadic<F>(f: &mut F, node: crate::Variadic) -> crate::Variadic where F: Fold + ?Sized, { - Variadic { + crate::Variadic { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), pat: (node.pat).map(|it| (Box::new(f.fold_pat(*(it).0)), (it).1)), dots: node.dots, @@ -3053,11 +3687,12 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_variant<F>(f: &mut F, node: Variant) -> Variant +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_variant<F>(f: &mut F, node: crate::Variant) -> crate::Variant where F: Fold + ?Sized, { - Variant { + crate::Variant { attrs: FoldHelper::lift(node.attrs, |it| f.fold_attribute(it)), ident: f.fold_ident(node.ident), fields: f.fold_fields(node.fields), @@ -3065,11 +3700,15 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_vis_restricted<F>(f: &mut F, node: VisRestricted) -> VisRestricted +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_vis_restricted<F>( + f: &mut F, + node: crate::VisRestricted, +) -> crate::VisRestricted where F: Fold + ?Sized, { - VisRestricted { + crate::VisRestricted { pub_token: node.pub_token, paren_token: node.paren_token, in_token: node.in_token, @@ -3077,39 +3716,45 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_visibility<F>(f: &mut F, node: Visibility) -> Visibility +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_visibility<F>(f: &mut F, node: crate::Visibility) -> crate::Visibility where F: Fold + ?Sized, { match node { - Visibility::Public(_binding_0) => Visibility::Public(_binding_0), - Visibility::Restricted(_binding_0) => { - Visibility::Restricted(f.fold_vis_restricted(_binding_0)) + crate::Visibility::Public(_binding_0) => crate::Visibility::Public(_binding_0), + crate::Visibility::Restricted(_binding_0) => { + crate::Visibility::Restricted(f.fold_vis_restricted(_binding_0)) } - Visibility::Inherited => Visibility::Inherited, + crate::Visibility::Inherited => crate::Visibility::Inherited, } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_where_clause<F>(f: &mut F, node: WhereClause) -> WhereClause +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_where_clause<F>(f: &mut F, node: crate::WhereClause) -> crate::WhereClause where F: Fold + ?Sized, { - WhereClause { + crate::WhereClause { where_token: node.where_token, predicates: FoldHelper::lift(node.predicates, |it| f.fold_where_predicate(it)), } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn fold_where_predicate<F>(f: &mut F, node: WherePredicate) -> WherePredicate +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn fold_where_predicate<F>( + f: &mut F, + node: crate::WherePredicate, +) -> crate::WherePredicate where F: Fold + ?Sized, { match node { - WherePredicate::Lifetime(_binding_0) => { - WherePredicate::Lifetime(f.fold_predicate_lifetime(_binding_0)) + crate::WherePredicate::Lifetime(_binding_0) => { + crate::WherePredicate::Lifetime(f.fold_predicate_lifetime(_binding_0)) } - WherePredicate::Type(_binding_0) => { - WherePredicate::Type(f.fold_predicate_type(_binding_0)) + crate::WherePredicate::Type(_binding_0) => { + crate::WherePredicate::Type(f.fold_predicate_type(_binding_0)) } } } diff --git a/vendor/syn/src/gen/hash.rs b/vendor/syn/src/gen/hash.rs index 40dfc57..7ead139 100644 --- a/vendor/syn/src/gen/hash.rs +++ b/vendor/syn/src/gen/hash.rs @@ -3,11 +3,10 @@ #[cfg(any(feature = "derive", feature = "full"))] use crate::tt::TokenStreamHelper; -use crate::*; use std::hash::{Hash, Hasher}; #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Abi { +impl Hash for crate::Abi { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -17,7 +16,7 @@ impl Hash for Abi { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for AngleBracketedGenericArguments { +impl Hash for crate::AngleBracketedGenericArguments { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -28,7 +27,7 @@ impl Hash for AngleBracketedGenericArguments { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Arm { +impl Hash for crate::Arm { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -42,7 +41,7 @@ impl Hash for Arm { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for AssocConst { +impl Hash for crate::AssocConst { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -54,7 +53,7 @@ impl Hash for AssocConst { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for AssocType { +impl Hash for crate::AssocType { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -66,16 +65,16 @@ impl Hash for AssocType { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for AttrStyle { +impl Hash for crate::AttrStyle { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - AttrStyle::Outer => { + crate::AttrStyle::Outer => { state.write_u8(0u8); } - AttrStyle::Inner(_) => { + crate::AttrStyle::Inner(_) => { state.write_u8(1u8); } } @@ -83,7 +82,7 @@ impl Hash for AttrStyle { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Attribute { +impl Hash for crate::Attribute { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -94,7 +93,7 @@ impl Hash for Attribute { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for BareFnArg { +impl Hash for crate::BareFnArg { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -106,7 +105,7 @@ impl Hash for BareFnArg { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for BareVariadic { +impl Hash for crate::BareVariadic { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -118,94 +117,94 @@ impl Hash for BareVariadic { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for BinOp { +impl Hash for crate::BinOp { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - BinOp::Add(_) => { + crate::BinOp::Add(_) => { state.write_u8(0u8); } - BinOp::Sub(_) => { + crate::BinOp::Sub(_) => { state.write_u8(1u8); } - BinOp::Mul(_) => { + crate::BinOp::Mul(_) => { state.write_u8(2u8); } - BinOp::Div(_) => { + crate::BinOp::Div(_) => { state.write_u8(3u8); } - BinOp::Rem(_) => { + crate::BinOp::Rem(_) => { state.write_u8(4u8); } - BinOp::And(_) => { + crate::BinOp::And(_) => { state.write_u8(5u8); } - BinOp::Or(_) => { + crate::BinOp::Or(_) => { state.write_u8(6u8); } - BinOp::BitXor(_) => { + crate::BinOp::BitXor(_) => { state.write_u8(7u8); } - BinOp::BitAnd(_) => { + crate::BinOp::BitAnd(_) => { state.write_u8(8u8); } - BinOp::BitOr(_) => { + crate::BinOp::BitOr(_) => { state.write_u8(9u8); } - BinOp::Shl(_) => { + crate::BinOp::Shl(_) => { state.write_u8(10u8); } - BinOp::Shr(_) => { + crate::BinOp::Shr(_) => { state.write_u8(11u8); } - BinOp::Eq(_) => { + crate::BinOp::Eq(_) => { state.write_u8(12u8); } - BinOp::Lt(_) => { + crate::BinOp::Lt(_) => { state.write_u8(13u8); } - BinOp::Le(_) => { + crate::BinOp::Le(_) => { state.write_u8(14u8); } - BinOp::Ne(_) => { + crate::BinOp::Ne(_) => { state.write_u8(15u8); } - BinOp::Ge(_) => { + crate::BinOp::Ge(_) => { state.write_u8(16u8); } - BinOp::Gt(_) => { + crate::BinOp::Gt(_) => { state.write_u8(17u8); } - BinOp::AddAssign(_) => { + crate::BinOp::AddAssign(_) => { state.write_u8(18u8); } - BinOp::SubAssign(_) => { + crate::BinOp::SubAssign(_) => { state.write_u8(19u8); } - BinOp::MulAssign(_) => { + crate::BinOp::MulAssign(_) => { state.write_u8(20u8); } - BinOp::DivAssign(_) => { + crate::BinOp::DivAssign(_) => { state.write_u8(21u8); } - BinOp::RemAssign(_) => { + crate::BinOp::RemAssign(_) => { state.write_u8(22u8); } - BinOp::BitXorAssign(_) => { + crate::BinOp::BitXorAssign(_) => { state.write_u8(23u8); } - BinOp::BitAndAssign(_) => { + crate::BinOp::BitAndAssign(_) => { state.write_u8(24u8); } - BinOp::BitOrAssign(_) => { + crate::BinOp::BitOrAssign(_) => { state.write_u8(25u8); } - BinOp::ShlAssign(_) => { + crate::BinOp::ShlAssign(_) => { state.write_u8(26u8); } - BinOp::ShrAssign(_) => { + crate::BinOp::ShrAssign(_) => { state.write_u8(27u8); } } @@ -213,7 +212,7 @@ impl Hash for BinOp { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Block { +impl Hash for crate::Block { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -223,7 +222,7 @@ impl Hash for Block { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for BoundLifetimes { +impl Hash for crate::BoundLifetimes { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -233,7 +232,7 @@ impl Hash for BoundLifetimes { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ConstParam { +impl Hash for crate::ConstParam { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -247,7 +246,7 @@ impl Hash for ConstParam { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Constraint { +impl Hash for crate::Constraint { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -259,21 +258,21 @@ impl Hash for Constraint { } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Data { +impl Hash for crate::Data { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - Data::Struct(v0) => { + crate::Data::Struct(v0) => { state.write_u8(0u8); v0.hash(state); } - Data::Enum(v0) => { + crate::Data::Enum(v0) => { state.write_u8(1u8); v0.hash(state); } - Data::Union(v0) => { + crate::Data::Union(v0) => { state.write_u8(2u8); v0.hash(state); } @@ -282,7 +281,7 @@ impl Hash for Data { } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for DataEnum { +impl Hash for crate::DataEnum { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -292,7 +291,7 @@ impl Hash for DataEnum { } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for DataStruct { +impl Hash for crate::DataStruct { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -303,7 +302,7 @@ impl Hash for DataStruct { } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for DataUnion { +impl Hash for crate::DataUnion { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -313,7 +312,7 @@ impl Hash for DataUnion { } #[cfg(feature = "derive")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for DeriveInput { +impl Hash for crate::DeriveInput { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -327,192 +326,189 @@ impl Hash for DeriveInput { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Expr { +impl Hash for crate::Expr { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { #[cfg(feature = "full")] - Expr::Array(v0) => { + crate::Expr::Array(v0) => { state.write_u8(0u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Assign(v0) => { + crate::Expr::Assign(v0) => { state.write_u8(1u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Async(v0) => { + crate::Expr::Async(v0) => { state.write_u8(2u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Await(v0) => { + crate::Expr::Await(v0) => { state.write_u8(3u8); v0.hash(state); } - Expr::Binary(v0) => { + crate::Expr::Binary(v0) => { state.write_u8(4u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Block(v0) => { + crate::Expr::Block(v0) => { state.write_u8(5u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Break(v0) => { + crate::Expr::Break(v0) => { state.write_u8(6u8); v0.hash(state); } - Expr::Call(v0) => { + crate::Expr::Call(v0) => { state.write_u8(7u8); v0.hash(state); } - Expr::Cast(v0) => { + crate::Expr::Cast(v0) => { state.write_u8(8u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Closure(v0) => { + crate::Expr::Closure(v0) => { state.write_u8(9u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Const(v0) => { + crate::Expr::Const(v0) => { state.write_u8(10u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Continue(v0) => { + crate::Expr::Continue(v0) => { state.write_u8(11u8); v0.hash(state); } - Expr::Field(v0) => { + crate::Expr::Field(v0) => { state.write_u8(12u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::ForLoop(v0) => { + crate::Expr::ForLoop(v0) => { state.write_u8(13u8); v0.hash(state); } - Expr::Group(v0) => { + crate::Expr::Group(v0) => { state.write_u8(14u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::If(v0) => { + crate::Expr::If(v0) => { state.write_u8(15u8); v0.hash(state); } - Expr::Index(v0) => { + crate::Expr::Index(v0) => { state.write_u8(16u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Infer(v0) => { + crate::Expr::Infer(v0) => { state.write_u8(17u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Let(v0) => { + crate::Expr::Let(v0) => { state.write_u8(18u8); v0.hash(state); } - Expr::Lit(v0) => { + crate::Expr::Lit(v0) => { state.write_u8(19u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Loop(v0) => { + crate::Expr::Loop(v0) => { state.write_u8(20u8); v0.hash(state); } - Expr::Macro(v0) => { + crate::Expr::Macro(v0) => { state.write_u8(21u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Match(v0) => { + crate::Expr::Match(v0) => { state.write_u8(22u8); v0.hash(state); } - #[cfg(feature = "full")] - Expr::MethodCall(v0) => { + crate::Expr::MethodCall(v0) => { state.write_u8(23u8); v0.hash(state); } - Expr::Paren(v0) => { + crate::Expr::Paren(v0) => { state.write_u8(24u8); v0.hash(state); } - Expr::Path(v0) => { + crate::Expr::Path(v0) => { state.write_u8(25u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Range(v0) => { + crate::Expr::Range(v0) => { state.write_u8(26u8); v0.hash(state); } - #[cfg(feature = "full")] - Expr::Reference(v0) => { + crate::Expr::Reference(v0) => { state.write_u8(27u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Repeat(v0) => { + crate::Expr::Repeat(v0) => { state.write_u8(28u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Return(v0) => { + crate::Expr::Return(v0) => { state.write_u8(29u8); v0.hash(state); } - #[cfg(feature = "full")] - Expr::Struct(v0) => { + crate::Expr::Struct(v0) => { state.write_u8(30u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Try(v0) => { + crate::Expr::Try(v0) => { state.write_u8(31u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::TryBlock(v0) => { + crate::Expr::TryBlock(v0) => { state.write_u8(32u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Tuple(v0) => { + crate::Expr::Tuple(v0) => { state.write_u8(33u8); v0.hash(state); } - Expr::Unary(v0) => { + crate::Expr::Unary(v0) => { state.write_u8(34u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Unsafe(v0) => { + crate::Expr::Unsafe(v0) => { state.write_u8(35u8); v0.hash(state); } - Expr::Verbatim(v0) => { + crate::Expr::Verbatim(v0) => { state.write_u8(36u8); TokenStreamHelper(v0).hash(state); } #[cfg(feature = "full")] - Expr::While(v0) => { + crate::Expr::While(v0) => { state.write_u8(37u8); v0.hash(state); } #[cfg(feature = "full")] - Expr::Yield(v0) => { + crate::Expr::Yield(v0) => { state.write_u8(38u8); v0.hash(state); } @@ -523,7 +519,7 @@ impl Hash for Expr { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprArray { +impl Hash for crate::ExprArray { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -534,7 +530,7 @@ impl Hash for ExprArray { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprAssign { +impl Hash for crate::ExprAssign { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -546,7 +542,7 @@ impl Hash for ExprAssign { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprAsync { +impl Hash for crate::ExprAsync { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -558,7 +554,7 @@ impl Hash for ExprAsync { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprAwait { +impl Hash for crate::ExprAwait { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -569,7 +565,7 @@ impl Hash for ExprAwait { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprBinary { +impl Hash for crate::ExprBinary { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -582,7 +578,7 @@ impl Hash for ExprBinary { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprBlock { +impl Hash for crate::ExprBlock { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -594,7 +590,7 @@ impl Hash for ExprBlock { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprBreak { +impl Hash for crate::ExprBreak { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -606,7 +602,7 @@ impl Hash for ExprBreak { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprCall { +impl Hash for crate::ExprCall { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -618,7 +614,7 @@ impl Hash for ExprCall { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprCast { +impl Hash for crate::ExprCast { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -630,7 +626,7 @@ impl Hash for ExprCast { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprClosure { +impl Hash for crate::ExprClosure { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -648,7 +644,7 @@ impl Hash for ExprClosure { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprConst { +impl Hash for crate::ExprConst { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -659,7 +655,7 @@ impl Hash for ExprConst { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprContinue { +impl Hash for crate::ExprContinue { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -670,7 +666,7 @@ impl Hash for ExprContinue { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprField { +impl Hash for crate::ExprField { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -682,7 +678,7 @@ impl Hash for ExprField { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprForLoop { +impl Hash for crate::ExprForLoop { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -696,7 +692,7 @@ impl Hash for ExprForLoop { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprGroup { +impl Hash for crate::ExprGroup { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -707,7 +703,7 @@ impl Hash for ExprGroup { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprIf { +impl Hash for crate::ExprIf { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -720,7 +716,7 @@ impl Hash for ExprIf { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprIndex { +impl Hash for crate::ExprIndex { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -732,7 +728,7 @@ impl Hash for ExprIndex { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprInfer { +impl Hash for crate::ExprInfer { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -742,7 +738,7 @@ impl Hash for ExprInfer { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprLet { +impl Hash for crate::ExprLet { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -754,7 +750,7 @@ impl Hash for ExprLet { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprLit { +impl Hash for crate::ExprLit { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -765,7 +761,7 @@ impl Hash for ExprLit { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprLoop { +impl Hash for crate::ExprLoop { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -777,7 +773,7 @@ impl Hash for ExprLoop { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprMacro { +impl Hash for crate::ExprMacro { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -788,7 +784,7 @@ impl Hash for ExprMacro { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprMatch { +impl Hash for crate::ExprMatch { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -798,9 +794,9 @@ impl Hash for ExprMatch { self.arms.hash(state); } } -#[cfg(feature = "full")] +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprMethodCall { +impl Hash for crate::ExprMethodCall { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -814,7 +810,7 @@ impl Hash for ExprMethodCall { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprParen { +impl Hash for crate::ExprParen { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -825,7 +821,7 @@ impl Hash for ExprParen { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprPath { +impl Hash for crate::ExprPath { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -837,7 +833,7 @@ impl Hash for ExprPath { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprRange { +impl Hash for crate::ExprRange { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -848,9 +844,9 @@ impl Hash for ExprRange { self.end.hash(state); } } -#[cfg(feature = "full")] +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprReference { +impl Hash for crate::ExprReference { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -862,7 +858,7 @@ impl Hash for ExprReference { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprRepeat { +impl Hash for crate::ExprRepeat { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -874,7 +870,7 @@ impl Hash for ExprRepeat { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprReturn { +impl Hash for crate::ExprReturn { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -883,9 +879,9 @@ impl Hash for ExprReturn { self.expr.hash(state); } } -#[cfg(feature = "full")] +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprStruct { +impl Hash for crate::ExprStruct { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -900,7 +896,7 @@ impl Hash for ExprStruct { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprTry { +impl Hash for crate::ExprTry { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -911,7 +907,7 @@ impl Hash for ExprTry { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprTryBlock { +impl Hash for crate::ExprTryBlock { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -922,7 +918,7 @@ impl Hash for ExprTryBlock { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprTuple { +impl Hash for crate::ExprTuple { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -933,7 +929,7 @@ impl Hash for ExprTuple { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprUnary { +impl Hash for crate::ExprUnary { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -945,7 +941,7 @@ impl Hash for ExprUnary { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprUnsafe { +impl Hash for crate::ExprUnsafe { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -956,7 +952,7 @@ impl Hash for ExprUnsafe { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprWhile { +impl Hash for crate::ExprWhile { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -969,7 +965,7 @@ impl Hash for ExprWhile { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ExprYield { +impl Hash for crate::ExprYield { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -980,7 +976,7 @@ impl Hash for ExprYield { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Field { +impl Hash for crate::Field { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -995,13 +991,13 @@ impl Hash for Field { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for FieldMutability { +impl Hash for crate::FieldMutability { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - FieldMutability::None => { + crate::FieldMutability::None => { state.write_u8(0u8); } } @@ -1009,7 +1005,7 @@ impl Hash for FieldMutability { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for FieldPat { +impl Hash for crate::FieldPat { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1020,9 +1016,9 @@ impl Hash for FieldPat { self.pat.hash(state); } } -#[cfg(feature = "full")] +#[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for FieldValue { +impl Hash for crate::FieldValue { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1035,21 +1031,21 @@ impl Hash for FieldValue { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Fields { +impl Hash for crate::Fields { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - Fields::Named(v0) => { + crate::Fields::Named(v0) => { state.write_u8(0u8); v0.hash(state); } - Fields::Unnamed(v0) => { + crate::Fields::Unnamed(v0) => { state.write_u8(1u8); v0.hash(state); } - Fields::Unit => { + crate::Fields::Unit => { state.write_u8(2u8); } } @@ -1057,7 +1053,7 @@ impl Hash for Fields { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for FieldsNamed { +impl Hash for crate::FieldsNamed { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1067,7 +1063,7 @@ impl Hash for FieldsNamed { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for FieldsUnnamed { +impl Hash for crate::FieldsUnnamed { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1077,7 +1073,7 @@ impl Hash for FieldsUnnamed { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for File { +impl Hash for crate::File { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1089,17 +1085,17 @@ impl Hash for File { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for FnArg { +impl Hash for crate::FnArg { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - FnArg::Receiver(v0) => { + crate::FnArg::Receiver(v0) => { state.write_u8(0u8); v0.hash(state); } - FnArg::Typed(v0) => { + crate::FnArg::Typed(v0) => { state.write_u8(1u8); v0.hash(state); } @@ -1108,29 +1104,29 @@ impl Hash for FnArg { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ForeignItem { +impl Hash for crate::ForeignItem { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - ForeignItem::Fn(v0) => { + crate::ForeignItem::Fn(v0) => { state.write_u8(0u8); v0.hash(state); } - ForeignItem::Static(v0) => { + crate::ForeignItem::Static(v0) => { state.write_u8(1u8); v0.hash(state); } - ForeignItem::Type(v0) => { + crate::ForeignItem::Type(v0) => { state.write_u8(2u8); v0.hash(state); } - ForeignItem::Macro(v0) => { + crate::ForeignItem::Macro(v0) => { state.write_u8(3u8); v0.hash(state); } - ForeignItem::Verbatim(v0) => { + crate::ForeignItem::Verbatim(v0) => { state.write_u8(4u8); TokenStreamHelper(v0).hash(state); } @@ -1139,7 +1135,7 @@ impl Hash for ForeignItem { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ForeignItemFn { +impl Hash for crate::ForeignItemFn { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1151,7 +1147,7 @@ impl Hash for ForeignItemFn { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ForeignItemMacro { +impl Hash for crate::ForeignItemMacro { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1163,7 +1159,7 @@ impl Hash for ForeignItemMacro { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ForeignItemStatic { +impl Hash for crate::ForeignItemStatic { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1177,7 +1173,7 @@ impl Hash for ForeignItemStatic { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ForeignItemType { +impl Hash for crate::ForeignItemType { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1190,33 +1186,33 @@ impl Hash for ForeignItemType { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for GenericArgument { +impl Hash for crate::GenericArgument { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - GenericArgument::Lifetime(v0) => { + crate::GenericArgument::Lifetime(v0) => { state.write_u8(0u8); v0.hash(state); } - GenericArgument::Type(v0) => { + crate::GenericArgument::Type(v0) => { state.write_u8(1u8); v0.hash(state); } - GenericArgument::Const(v0) => { + crate::GenericArgument::Const(v0) => { state.write_u8(2u8); v0.hash(state); } - GenericArgument::AssocType(v0) => { + crate::GenericArgument::AssocType(v0) => { state.write_u8(3u8); v0.hash(state); } - GenericArgument::AssocConst(v0) => { + crate::GenericArgument::AssocConst(v0) => { state.write_u8(4u8); v0.hash(state); } - GenericArgument::Constraint(v0) => { + crate::GenericArgument::Constraint(v0) => { state.write_u8(5u8); v0.hash(state); } @@ -1225,21 +1221,21 @@ impl Hash for GenericArgument { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for GenericParam { +impl Hash for crate::GenericParam { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - GenericParam::Lifetime(v0) => { + crate::GenericParam::Lifetime(v0) => { state.write_u8(0u8); v0.hash(state); } - GenericParam::Type(v0) => { + crate::GenericParam::Type(v0) => { state.write_u8(1u8); v0.hash(state); } - GenericParam::Const(v0) => { + crate::GenericParam::Const(v0) => { state.write_u8(2u8); v0.hash(state); } @@ -1248,7 +1244,7 @@ impl Hash for GenericParam { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Generics { +impl Hash for crate::Generics { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1261,29 +1257,29 @@ impl Hash for Generics { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ImplItem { +impl Hash for crate::ImplItem { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - ImplItem::Const(v0) => { + crate::ImplItem::Const(v0) => { state.write_u8(0u8); v0.hash(state); } - ImplItem::Fn(v0) => { + crate::ImplItem::Fn(v0) => { state.write_u8(1u8); v0.hash(state); } - ImplItem::Type(v0) => { + crate::ImplItem::Type(v0) => { state.write_u8(2u8); v0.hash(state); } - ImplItem::Macro(v0) => { + crate::ImplItem::Macro(v0) => { state.write_u8(3u8); v0.hash(state); } - ImplItem::Verbatim(v0) => { + crate::ImplItem::Verbatim(v0) => { state.write_u8(4u8); TokenStreamHelper(v0).hash(state); } @@ -1292,7 +1288,7 @@ impl Hash for ImplItem { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ImplItemConst { +impl Hash for crate::ImplItemConst { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1308,7 +1304,7 @@ impl Hash for ImplItemConst { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ImplItemFn { +impl Hash for crate::ImplItemFn { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1322,7 +1318,7 @@ impl Hash for ImplItemFn { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ImplItemMacro { +impl Hash for crate::ImplItemMacro { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1334,7 +1330,7 @@ impl Hash for ImplItemMacro { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ImplItemType { +impl Hash for crate::ImplItemType { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1349,7 +1345,7 @@ impl Hash for ImplItemType { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ImplRestriction { +impl Hash for crate::ImplRestriction { fn hash<H>(&self, _state: &mut H) where H: Hasher, @@ -1359,73 +1355,73 @@ impl Hash for ImplRestriction { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Item { +impl Hash for crate::Item { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - Item::Const(v0) => { + crate::Item::Const(v0) => { state.write_u8(0u8); v0.hash(state); } - Item::Enum(v0) => { + crate::Item::Enum(v0) => { state.write_u8(1u8); v0.hash(state); } - Item::ExternCrate(v0) => { + crate::Item::ExternCrate(v0) => { state.write_u8(2u8); v0.hash(state); } - Item::Fn(v0) => { + crate::Item::Fn(v0) => { state.write_u8(3u8); v0.hash(state); } - Item::ForeignMod(v0) => { + crate::Item::ForeignMod(v0) => { state.write_u8(4u8); v0.hash(state); } - Item::Impl(v0) => { + crate::Item::Impl(v0) => { state.write_u8(5u8); v0.hash(state); } - Item::Macro(v0) => { + crate::Item::Macro(v0) => { state.write_u8(6u8); v0.hash(state); } - Item::Mod(v0) => { + crate::Item::Mod(v0) => { state.write_u8(7u8); v0.hash(state); } - Item::Static(v0) => { + crate::Item::Static(v0) => { state.write_u8(8u8); v0.hash(state); } - Item::Struct(v0) => { + crate::Item::Struct(v0) => { state.write_u8(9u8); v0.hash(state); } - Item::Trait(v0) => { + crate::Item::Trait(v0) => { state.write_u8(10u8); v0.hash(state); } - Item::TraitAlias(v0) => { + crate::Item::TraitAlias(v0) => { state.write_u8(11u8); v0.hash(state); } - Item::Type(v0) => { + crate::Item::Type(v0) => { state.write_u8(12u8); v0.hash(state); } - Item::Union(v0) => { + crate::Item::Union(v0) => { state.write_u8(13u8); v0.hash(state); } - Item::Use(v0) => { + crate::Item::Use(v0) => { state.write_u8(14u8); v0.hash(state); } - Item::Verbatim(v0) => { + crate::Item::Verbatim(v0) => { state.write_u8(15u8); TokenStreamHelper(v0).hash(state); } @@ -1434,7 +1430,7 @@ impl Hash for Item { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ItemConst { +impl Hash for crate::ItemConst { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1449,7 +1445,7 @@ impl Hash for ItemConst { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ItemEnum { +impl Hash for crate::ItemEnum { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1463,7 +1459,7 @@ impl Hash for ItemEnum { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ItemExternCrate { +impl Hash for crate::ItemExternCrate { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1476,7 +1472,7 @@ impl Hash for ItemExternCrate { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ItemFn { +impl Hash for crate::ItemFn { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1489,7 +1485,7 @@ impl Hash for ItemFn { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ItemForeignMod { +impl Hash for crate::ItemForeignMod { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1502,7 +1498,7 @@ impl Hash for ItemForeignMod { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ItemImpl { +impl Hash for crate::ItemImpl { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1518,7 +1514,7 @@ impl Hash for ItemImpl { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ItemMacro { +impl Hash for crate::ItemMacro { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1531,7 +1527,7 @@ impl Hash for ItemMacro { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ItemMod { +impl Hash for crate::ItemMod { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1546,7 +1542,7 @@ impl Hash for ItemMod { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ItemStatic { +impl Hash for crate::ItemStatic { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1561,7 +1557,7 @@ impl Hash for ItemStatic { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ItemStruct { +impl Hash for crate::ItemStruct { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1576,7 +1572,7 @@ impl Hash for ItemStruct { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ItemTrait { +impl Hash for crate::ItemTrait { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1595,7 +1591,7 @@ impl Hash for ItemTrait { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ItemTraitAlias { +impl Hash for crate::ItemTraitAlias { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1609,7 +1605,7 @@ impl Hash for ItemTraitAlias { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ItemType { +impl Hash for crate::ItemType { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1623,7 +1619,7 @@ impl Hash for ItemType { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ItemUnion { +impl Hash for crate::ItemUnion { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1637,7 +1633,7 @@ impl Hash for ItemUnion { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ItemUse { +impl Hash for crate::ItemUse { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1650,7 +1646,7 @@ impl Hash for ItemUse { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Label { +impl Hash for crate::Label { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1660,7 +1656,7 @@ impl Hash for Label { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for LifetimeParam { +impl Hash for crate::LifetimeParam { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1672,41 +1668,41 @@ impl Hash for LifetimeParam { } } #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Lit { +impl Hash for crate::Lit { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - Lit::Str(v0) => { + crate::Lit::Str(v0) => { state.write_u8(0u8); v0.hash(state); } - Lit::ByteStr(v0) => { + crate::Lit::ByteStr(v0) => { state.write_u8(1u8); v0.hash(state); } - Lit::Byte(v0) => { + crate::Lit::Byte(v0) => { state.write_u8(2u8); v0.hash(state); } - Lit::Char(v0) => { + crate::Lit::Char(v0) => { state.write_u8(3u8); v0.hash(state); } - Lit::Int(v0) => { + crate::Lit::Int(v0) => { state.write_u8(4u8); v0.hash(state); } - Lit::Float(v0) => { + crate::Lit::Float(v0) => { state.write_u8(5u8); v0.hash(state); } - Lit::Bool(v0) => { + crate::Lit::Bool(v0) => { state.write_u8(6u8); v0.hash(state); } - Lit::Verbatim(v0) => { + crate::Lit::Verbatim(v0) => { state.write_u8(7u8); v0.to_string().hash(state); } @@ -1714,7 +1710,7 @@ impl Hash for Lit { } } #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for LitBool { +impl Hash for crate::LitBool { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1724,7 +1720,7 @@ impl Hash for LitBool { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Local { +impl Hash for crate::Local { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1736,7 +1732,7 @@ impl Hash for Local { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for LocalInit { +impl Hash for crate::LocalInit { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1747,7 +1743,7 @@ impl Hash for LocalInit { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Macro { +impl Hash for crate::Macro { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1759,19 +1755,19 @@ impl Hash for Macro { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for MacroDelimiter { +impl Hash for crate::MacroDelimiter { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - MacroDelimiter::Paren(_) => { + crate::MacroDelimiter::Paren(_) => { state.write_u8(0u8); } - MacroDelimiter::Brace(_) => { + crate::MacroDelimiter::Brace(_) => { state.write_u8(1u8); } - MacroDelimiter::Bracket(_) => { + crate::MacroDelimiter::Bracket(_) => { state.write_u8(2u8); } } @@ -1779,21 +1775,21 @@ impl Hash for MacroDelimiter { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Meta { +impl Hash for crate::Meta { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - Meta::Path(v0) => { + crate::Meta::Path(v0) => { state.write_u8(0u8); v0.hash(state); } - Meta::List(v0) => { + crate::Meta::List(v0) => { state.write_u8(1u8); v0.hash(state); } - Meta::NameValue(v0) => { + crate::Meta::NameValue(v0) => { state.write_u8(2u8); v0.hash(state); } @@ -1802,7 +1798,7 @@ impl Hash for Meta { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for MetaList { +impl Hash for crate::MetaList { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1814,7 +1810,7 @@ impl Hash for MetaList { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for MetaNameValue { +impl Hash for crate::MetaNameValue { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1825,7 +1821,7 @@ impl Hash for MetaNameValue { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ParenthesizedGenericArguments { +impl Hash for crate::ParenthesizedGenericArguments { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1836,77 +1832,77 @@ impl Hash for ParenthesizedGenericArguments { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Pat { +impl Hash for crate::Pat { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - Pat::Const(v0) => { + crate::Pat::Const(v0) => { state.write_u8(0u8); v0.hash(state); } - Pat::Ident(v0) => { + crate::Pat::Ident(v0) => { state.write_u8(1u8); v0.hash(state); } - Pat::Lit(v0) => { + crate::Pat::Lit(v0) => { state.write_u8(2u8); v0.hash(state); } - Pat::Macro(v0) => { + crate::Pat::Macro(v0) => { state.write_u8(3u8); v0.hash(state); } - Pat::Or(v0) => { + crate::Pat::Or(v0) => { state.write_u8(4u8); v0.hash(state); } - Pat::Paren(v0) => { + crate::Pat::Paren(v0) => { state.write_u8(5u8); v0.hash(state); } - Pat::Path(v0) => { + crate::Pat::Path(v0) => { state.write_u8(6u8); v0.hash(state); } - Pat::Range(v0) => { + crate::Pat::Range(v0) => { state.write_u8(7u8); v0.hash(state); } - Pat::Reference(v0) => { + crate::Pat::Reference(v0) => { state.write_u8(8u8); v0.hash(state); } - Pat::Rest(v0) => { + crate::Pat::Rest(v0) => { state.write_u8(9u8); v0.hash(state); } - Pat::Slice(v0) => { + crate::Pat::Slice(v0) => { state.write_u8(10u8); v0.hash(state); } - Pat::Struct(v0) => { + crate::Pat::Struct(v0) => { state.write_u8(11u8); v0.hash(state); } - Pat::Tuple(v0) => { + crate::Pat::Tuple(v0) => { state.write_u8(12u8); v0.hash(state); } - Pat::TupleStruct(v0) => { + crate::Pat::TupleStruct(v0) => { state.write_u8(13u8); v0.hash(state); } - Pat::Type(v0) => { + crate::Pat::Type(v0) => { state.write_u8(14u8); v0.hash(state); } - Pat::Verbatim(v0) => { + crate::Pat::Verbatim(v0) => { state.write_u8(15u8); TokenStreamHelper(v0).hash(state); } - Pat::Wild(v0) => { + crate::Pat::Wild(v0) => { state.write_u8(16u8); v0.hash(state); } @@ -1915,7 +1911,7 @@ impl Hash for Pat { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for PatIdent { +impl Hash for crate::PatIdent { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1929,7 +1925,7 @@ impl Hash for PatIdent { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for PatOr { +impl Hash for crate::PatOr { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1941,7 +1937,7 @@ impl Hash for PatOr { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for PatParen { +impl Hash for crate::PatParen { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1952,7 +1948,7 @@ impl Hash for PatParen { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for PatReference { +impl Hash for crate::PatReference { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1964,7 +1960,7 @@ impl Hash for PatReference { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for PatRest { +impl Hash for crate::PatRest { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1974,7 +1970,7 @@ impl Hash for PatRest { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for PatSlice { +impl Hash for crate::PatSlice { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1985,7 +1981,7 @@ impl Hash for PatSlice { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for PatStruct { +impl Hash for crate::PatStruct { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -1999,7 +1995,7 @@ impl Hash for PatStruct { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for PatTuple { +impl Hash for crate::PatTuple { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2010,7 +2006,7 @@ impl Hash for PatTuple { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for PatTupleStruct { +impl Hash for crate::PatTupleStruct { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2023,7 +2019,7 @@ impl Hash for PatTupleStruct { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for PatType { +impl Hash for crate::PatType { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2035,7 +2031,7 @@ impl Hash for PatType { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for PatWild { +impl Hash for crate::PatWild { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2045,7 +2041,7 @@ impl Hash for PatWild { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Path { +impl Hash for crate::Path { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2056,20 +2052,20 @@ impl Hash for Path { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for PathArguments { +impl Hash for crate::PathArguments { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - PathArguments::None => { + crate::PathArguments::None => { state.write_u8(0u8); } - PathArguments::AngleBracketed(v0) => { + crate::PathArguments::AngleBracketed(v0) => { state.write_u8(1u8); v0.hash(state); } - PathArguments::Parenthesized(v0) => { + crate::PathArguments::Parenthesized(v0) => { state.write_u8(2u8); v0.hash(state); } @@ -2078,7 +2074,7 @@ impl Hash for PathArguments { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for PathSegment { +impl Hash for crate::PathSegment { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2089,7 +2085,7 @@ impl Hash for PathSegment { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for PredicateLifetime { +impl Hash for crate::PredicateLifetime { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2100,7 +2096,7 @@ impl Hash for PredicateLifetime { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for PredicateType { +impl Hash for crate::PredicateType { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2112,7 +2108,7 @@ impl Hash for PredicateType { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for QSelf { +impl Hash for crate::QSelf { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2124,16 +2120,16 @@ impl Hash for QSelf { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for RangeLimits { +impl Hash for crate::RangeLimits { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - RangeLimits::HalfOpen(_) => { + crate::RangeLimits::HalfOpen(_) => { state.write_u8(0u8); } - RangeLimits::Closed(_) => { + crate::RangeLimits::Closed(_) => { state.write_u8(1u8); } } @@ -2141,7 +2137,7 @@ impl Hash for RangeLimits { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Receiver { +impl Hash for crate::Receiver { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2155,16 +2151,16 @@ impl Hash for Receiver { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for ReturnType { +impl Hash for crate::ReturnType { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - ReturnType::Default => { + crate::ReturnType::Default => { state.write_u8(0u8); } - ReturnType::Type(_, v1) => { + crate::ReturnType::Type(_, v1) => { state.write_u8(1u8); v1.hash(state); } @@ -2173,7 +2169,7 @@ impl Hash for ReturnType { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Signature { +impl Hash for crate::Signature { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2191,16 +2187,16 @@ impl Hash for Signature { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for StaticMutability { +impl Hash for crate::StaticMutability { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - StaticMutability::Mut(_) => { + crate::StaticMutability::Mut(_) => { state.write_u8(0u8); } - StaticMutability::None => { + crate::StaticMutability::None => { state.write_u8(1u8); } } @@ -2208,26 +2204,26 @@ impl Hash for StaticMutability { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Stmt { +impl Hash for crate::Stmt { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - Stmt::Local(v0) => { + crate::Stmt::Local(v0) => { state.write_u8(0u8); v0.hash(state); } - Stmt::Item(v0) => { + crate::Stmt::Item(v0) => { state.write_u8(1u8); v0.hash(state); } - Stmt::Expr(v0, v1) => { + crate::Stmt::Expr(v0, v1) => { state.write_u8(2u8); v0.hash(state); v1.hash(state); } - Stmt::Macro(v0) => { + crate::Stmt::Macro(v0) => { state.write_u8(3u8); v0.hash(state); } @@ -2236,7 +2232,7 @@ impl Hash for Stmt { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for StmtMacro { +impl Hash for crate::StmtMacro { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2248,7 +2244,7 @@ impl Hash for StmtMacro { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TraitBound { +impl Hash for crate::TraitBound { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2261,16 +2257,16 @@ impl Hash for TraitBound { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TraitBoundModifier { +impl Hash for crate::TraitBoundModifier { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - TraitBoundModifier::None => { + crate::TraitBoundModifier::None => { state.write_u8(0u8); } - TraitBoundModifier::Maybe(_) => { + crate::TraitBoundModifier::Maybe(_) => { state.write_u8(1u8); } } @@ -2278,29 +2274,29 @@ impl Hash for TraitBoundModifier { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TraitItem { +impl Hash for crate::TraitItem { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - TraitItem::Const(v0) => { + crate::TraitItem::Const(v0) => { state.write_u8(0u8); v0.hash(state); } - TraitItem::Fn(v0) => { + crate::TraitItem::Fn(v0) => { state.write_u8(1u8); v0.hash(state); } - TraitItem::Type(v0) => { + crate::TraitItem::Type(v0) => { state.write_u8(2u8); v0.hash(state); } - TraitItem::Macro(v0) => { + crate::TraitItem::Macro(v0) => { state.write_u8(3u8); v0.hash(state); } - TraitItem::Verbatim(v0) => { + crate::TraitItem::Verbatim(v0) => { state.write_u8(4u8); TokenStreamHelper(v0).hash(state); } @@ -2309,7 +2305,7 @@ impl Hash for TraitItem { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TraitItemConst { +impl Hash for crate::TraitItemConst { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2323,7 +2319,7 @@ impl Hash for TraitItemConst { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TraitItemFn { +impl Hash for crate::TraitItemFn { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2336,7 +2332,7 @@ impl Hash for TraitItemFn { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TraitItemMacro { +impl Hash for crate::TraitItemMacro { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2348,7 +2344,7 @@ impl Hash for TraitItemMacro { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TraitItemType { +impl Hash for crate::TraitItemType { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2363,69 +2359,69 @@ impl Hash for TraitItemType { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Type { +impl Hash for crate::Type { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - Type::Array(v0) => { + crate::Type::Array(v0) => { state.write_u8(0u8); v0.hash(state); } - Type::BareFn(v0) => { + crate::Type::BareFn(v0) => { state.write_u8(1u8); v0.hash(state); } - Type::Group(v0) => { + crate::Type::Group(v0) => { state.write_u8(2u8); v0.hash(state); } - Type::ImplTrait(v0) => { + crate::Type::ImplTrait(v0) => { state.write_u8(3u8); v0.hash(state); } - Type::Infer(v0) => { + crate::Type::Infer(v0) => { state.write_u8(4u8); v0.hash(state); } - Type::Macro(v0) => { + crate::Type::Macro(v0) => { state.write_u8(5u8); v0.hash(state); } - Type::Never(v0) => { + crate::Type::Never(v0) => { state.write_u8(6u8); v0.hash(state); } - Type::Paren(v0) => { + crate::Type::Paren(v0) => { state.write_u8(7u8); v0.hash(state); } - Type::Path(v0) => { + crate::Type::Path(v0) => { state.write_u8(8u8); v0.hash(state); } - Type::Ptr(v0) => { + crate::Type::Ptr(v0) => { state.write_u8(9u8); v0.hash(state); } - Type::Reference(v0) => { + crate::Type::Reference(v0) => { state.write_u8(10u8); v0.hash(state); } - Type::Slice(v0) => { + crate::Type::Slice(v0) => { state.write_u8(11u8); v0.hash(state); } - Type::TraitObject(v0) => { + crate::Type::TraitObject(v0) => { state.write_u8(12u8); v0.hash(state); } - Type::Tuple(v0) => { + crate::Type::Tuple(v0) => { state.write_u8(13u8); v0.hash(state); } - Type::Verbatim(v0) => { + crate::Type::Verbatim(v0) => { state.write_u8(14u8); TokenStreamHelper(v0).hash(state); } @@ -2434,7 +2430,7 @@ impl Hash for Type { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypeArray { +impl Hash for crate::TypeArray { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2445,7 +2441,7 @@ impl Hash for TypeArray { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypeBareFn { +impl Hash for crate::TypeBareFn { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2460,7 +2456,7 @@ impl Hash for TypeBareFn { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypeGroup { +impl Hash for crate::TypeGroup { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2470,7 +2466,7 @@ impl Hash for TypeGroup { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypeImplTrait { +impl Hash for crate::TypeImplTrait { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2480,7 +2476,7 @@ impl Hash for TypeImplTrait { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypeInfer { +impl Hash for crate::TypeInfer { fn hash<H>(&self, _state: &mut H) where H: Hasher, @@ -2488,7 +2484,7 @@ impl Hash for TypeInfer { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypeMacro { +impl Hash for crate::TypeMacro { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2498,7 +2494,7 @@ impl Hash for TypeMacro { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypeNever { +impl Hash for crate::TypeNever { fn hash<H>(&self, _state: &mut H) where H: Hasher, @@ -2506,7 +2502,7 @@ impl Hash for TypeNever { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypeParam { +impl Hash for crate::TypeParam { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2521,21 +2517,21 @@ impl Hash for TypeParam { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypeParamBound { +impl Hash for crate::TypeParamBound { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - TypeParamBound::Trait(v0) => { + crate::TypeParamBound::Trait(v0) => { state.write_u8(0u8); v0.hash(state); } - TypeParamBound::Lifetime(v0) => { + crate::TypeParamBound::Lifetime(v0) => { state.write_u8(1u8); v0.hash(state); } - TypeParamBound::Verbatim(v0) => { + crate::TypeParamBound::Verbatim(v0) => { state.write_u8(2u8); TokenStreamHelper(v0).hash(state); } @@ -2544,7 +2540,7 @@ impl Hash for TypeParamBound { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypeParen { +impl Hash for crate::TypeParen { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2554,7 +2550,7 @@ impl Hash for TypeParen { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypePath { +impl Hash for crate::TypePath { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2565,7 +2561,7 @@ impl Hash for TypePath { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypePtr { +impl Hash for crate::TypePtr { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2577,7 +2573,7 @@ impl Hash for TypePtr { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypeReference { +impl Hash for crate::TypeReference { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2589,7 +2585,7 @@ impl Hash for TypeReference { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypeSlice { +impl Hash for crate::TypeSlice { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2599,7 +2595,7 @@ impl Hash for TypeSlice { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypeTraitObject { +impl Hash for crate::TypeTraitObject { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2610,7 +2606,7 @@ impl Hash for TypeTraitObject { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for TypeTuple { +impl Hash for crate::TypeTuple { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2620,19 +2616,19 @@ impl Hash for TypeTuple { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for UnOp { +impl Hash for crate::UnOp { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - UnOp::Deref(_) => { + crate::UnOp::Deref(_) => { state.write_u8(0u8); } - UnOp::Not(_) => { + crate::UnOp::Not(_) => { state.write_u8(1u8); } - UnOp::Neg(_) => { + crate::UnOp::Neg(_) => { state.write_u8(2u8); } } @@ -2640,7 +2636,7 @@ impl Hash for UnOp { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for UseGlob { +impl Hash for crate::UseGlob { fn hash<H>(&self, _state: &mut H) where H: Hasher, @@ -2648,7 +2644,7 @@ impl Hash for UseGlob { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for UseGroup { +impl Hash for crate::UseGroup { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2658,7 +2654,7 @@ impl Hash for UseGroup { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for UseName { +impl Hash for crate::UseName { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2668,7 +2664,7 @@ impl Hash for UseName { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for UsePath { +impl Hash for crate::UsePath { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2679,7 +2675,7 @@ impl Hash for UsePath { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for UseRename { +impl Hash for crate::UseRename { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2690,29 +2686,29 @@ impl Hash for UseRename { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for UseTree { +impl Hash for crate::UseTree { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - UseTree::Path(v0) => { + crate::UseTree::Path(v0) => { state.write_u8(0u8); v0.hash(state); } - UseTree::Name(v0) => { + crate::UseTree::Name(v0) => { state.write_u8(1u8); v0.hash(state); } - UseTree::Rename(v0) => { + crate::UseTree::Rename(v0) => { state.write_u8(2u8); v0.hash(state); } - UseTree::Glob(v0) => { + crate::UseTree::Glob(v0) => { state.write_u8(3u8); v0.hash(state); } - UseTree::Group(v0) => { + crate::UseTree::Group(v0) => { state.write_u8(4u8); v0.hash(state); } @@ -2721,7 +2717,7 @@ impl Hash for UseTree { } #[cfg(feature = "full")] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Variadic { +impl Hash for crate::Variadic { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2733,7 +2729,7 @@ impl Hash for Variadic { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Variant { +impl Hash for crate::Variant { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2746,7 +2742,7 @@ impl Hash for Variant { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for VisRestricted { +impl Hash for crate::VisRestricted { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2757,20 +2753,20 @@ impl Hash for VisRestricted { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for Visibility { +impl Hash for crate::Visibility { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - Visibility::Public(_) => { + crate::Visibility::Public(_) => { state.write_u8(0u8); } - Visibility::Restricted(v0) => { + crate::Visibility::Restricted(v0) => { state.write_u8(1u8); v0.hash(state); } - Visibility::Inherited => { + crate::Visibility::Inherited => { state.write_u8(2u8); } } @@ -2778,7 +2774,7 @@ impl Hash for Visibility { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for WhereClause { +impl Hash for crate::WhereClause { fn hash<H>(&self, state: &mut H) where H: Hasher, @@ -2788,17 +2784,17 @@ impl Hash for WhereClause { } #[cfg(any(feature = "derive", feature = "full"))] #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] -impl Hash for WherePredicate { +impl Hash for crate::WherePredicate { fn hash<H>(&self, state: &mut H) where H: Hasher, { match self { - WherePredicate::Lifetime(v0) => { + crate::WherePredicate::Lifetime(v0) => { state.write_u8(0u8); v0.hash(state); } - WherePredicate::Type(v0) => { + crate::WherePredicate::Type(v0) => { state.write_u8(1u8); v0.hash(state); } diff --git a/vendor/syn/src/gen/visit.rs b/vendor/syn/src/gen/visit.rs index 9eaa24f..5d87e63 100644 --- a/vendor/syn/src/gen/visit.rs +++ b/vendor/syn/src/gen/visit.rs @@ -2,10 +2,9 @@ // It is not intended for manual editing. #![allow(unused_variables)] +#![allow(clippy::needless_pass_by_ref_mut)] #[cfg(any(feature = "full", feature = "derive"))] use crate::punctuated::Punctuated; -use crate::*; -use proc_macro2::Span; #[cfg(feature = "full")] macro_rules! full { ($e:expr) => { @@ -28,731 +27,903 @@ macro_rules! skip { /// [module documentation]: self pub trait Visit<'ast> { #[cfg(any(feature = "derive", feature = "full"))] - fn visit_abi(&mut self, i: &'ast Abi) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_abi(&mut self, i: &'ast crate::Abi) { visit_abi(self, i); } #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] fn visit_angle_bracketed_generic_arguments( &mut self, - i: &'ast AngleBracketedGenericArguments, + i: &'ast crate::AngleBracketedGenericArguments, ) { visit_angle_bracketed_generic_arguments(self, i); } #[cfg(feature = "full")] - fn visit_arm(&mut self, i: &'ast Arm) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_arm(&mut self, i: &'ast crate::Arm) { visit_arm(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_assoc_const(&mut self, i: &'ast AssocConst) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_assoc_const(&mut self, i: &'ast crate::AssocConst) { visit_assoc_const(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_assoc_type(&mut self, i: &'ast AssocType) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_assoc_type(&mut self, i: &'ast crate::AssocType) { visit_assoc_type(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_attr_style(&mut self, i: &'ast AttrStyle) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_attr_style(&mut self, i: &'ast crate::AttrStyle) { visit_attr_style(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_attribute(&mut self, i: &'ast Attribute) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_attribute(&mut self, i: &'ast crate::Attribute) { visit_attribute(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_bare_fn_arg(&mut self, i: &'ast BareFnArg) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bare_fn_arg(&mut self, i: &'ast crate::BareFnArg) { visit_bare_fn_arg(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_bare_variadic(&mut self, i: &'ast BareVariadic) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bare_variadic(&mut self, i: &'ast crate::BareVariadic) { visit_bare_variadic(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_bin_op(&mut self, i: &'ast BinOp) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bin_op(&mut self, i: &'ast crate::BinOp) { visit_bin_op(self, i); } #[cfg(feature = "full")] - fn visit_block(&mut self, i: &'ast Block) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_block(&mut self, i: &'ast crate::Block) { visit_block(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_bound_lifetimes(&mut self, i: &'ast BoundLifetimes) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bound_lifetimes(&mut self, i: &'ast crate::BoundLifetimes) { visit_bound_lifetimes(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_const_param(&mut self, i: &'ast ConstParam) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_const_param(&mut self, i: &'ast crate::ConstParam) { visit_const_param(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_constraint(&mut self, i: &'ast Constraint) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_constraint(&mut self, i: &'ast crate::Constraint) { visit_constraint(self, i); } #[cfg(feature = "derive")] - fn visit_data(&mut self, i: &'ast Data) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data(&mut self, i: &'ast crate::Data) { visit_data(self, i); } #[cfg(feature = "derive")] - fn visit_data_enum(&mut self, i: &'ast DataEnum) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data_enum(&mut self, i: &'ast crate::DataEnum) { visit_data_enum(self, i); } #[cfg(feature = "derive")] - fn visit_data_struct(&mut self, i: &'ast DataStruct) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data_struct(&mut self, i: &'ast crate::DataStruct) { visit_data_struct(self, i); } #[cfg(feature = "derive")] - fn visit_data_union(&mut self, i: &'ast DataUnion) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data_union(&mut self, i: &'ast crate::DataUnion) { visit_data_union(self, i); } #[cfg(feature = "derive")] - fn visit_derive_input(&mut self, i: &'ast DeriveInput) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_derive_input(&mut self, i: &'ast crate::DeriveInput) { visit_derive_input(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr(&mut self, i: &'ast Expr) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr(&mut self, i: &'ast crate::Expr) { visit_expr(self, i); } #[cfg(feature = "full")] - fn visit_expr_array(&mut self, i: &'ast ExprArray) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_array(&mut self, i: &'ast crate::ExprArray) { visit_expr_array(self, i); } #[cfg(feature = "full")] - fn visit_expr_assign(&mut self, i: &'ast ExprAssign) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_assign(&mut self, i: &'ast crate::ExprAssign) { visit_expr_assign(self, i); } #[cfg(feature = "full")] - fn visit_expr_async(&mut self, i: &'ast ExprAsync) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_async(&mut self, i: &'ast crate::ExprAsync) { visit_expr_async(self, i); } #[cfg(feature = "full")] - fn visit_expr_await(&mut self, i: &'ast ExprAwait) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_await(&mut self, i: &'ast crate::ExprAwait) { visit_expr_await(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_binary(&mut self, i: &'ast ExprBinary) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_binary(&mut self, i: &'ast crate::ExprBinary) { visit_expr_binary(self, i); } #[cfg(feature = "full")] - fn visit_expr_block(&mut self, i: &'ast ExprBlock) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_block(&mut self, i: &'ast crate::ExprBlock) { visit_expr_block(self, i); } #[cfg(feature = "full")] - fn visit_expr_break(&mut self, i: &'ast ExprBreak) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_break(&mut self, i: &'ast crate::ExprBreak) { visit_expr_break(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_call(&mut self, i: &'ast ExprCall) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_call(&mut self, i: &'ast crate::ExprCall) { visit_expr_call(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_cast(&mut self, i: &'ast ExprCast) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_cast(&mut self, i: &'ast crate::ExprCast) { visit_expr_cast(self, i); } #[cfg(feature = "full")] - fn visit_expr_closure(&mut self, i: &'ast ExprClosure) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_closure(&mut self, i: &'ast crate::ExprClosure) { visit_expr_closure(self, i); } #[cfg(feature = "full")] - fn visit_expr_const(&mut self, i: &'ast ExprConst) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_const(&mut self, i: &'ast crate::ExprConst) { visit_expr_const(self, i); } #[cfg(feature = "full")] - fn visit_expr_continue(&mut self, i: &'ast ExprContinue) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_continue(&mut self, i: &'ast crate::ExprContinue) { visit_expr_continue(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_field(&mut self, i: &'ast ExprField) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_field(&mut self, i: &'ast crate::ExprField) { visit_expr_field(self, i); } #[cfg(feature = "full")] - fn visit_expr_for_loop(&mut self, i: &'ast ExprForLoop) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_for_loop(&mut self, i: &'ast crate::ExprForLoop) { visit_expr_for_loop(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_group(&mut self, i: &'ast ExprGroup) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_group(&mut self, i: &'ast crate::ExprGroup) { visit_expr_group(self, i); } #[cfg(feature = "full")] - fn visit_expr_if(&mut self, i: &'ast ExprIf) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_if(&mut self, i: &'ast crate::ExprIf) { visit_expr_if(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_index(&mut self, i: &'ast ExprIndex) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_index(&mut self, i: &'ast crate::ExprIndex) { visit_expr_index(self, i); } #[cfg(feature = "full")] - fn visit_expr_infer(&mut self, i: &'ast ExprInfer) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_infer(&mut self, i: &'ast crate::ExprInfer) { visit_expr_infer(self, i); } #[cfg(feature = "full")] - fn visit_expr_let(&mut self, i: &'ast ExprLet) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_let(&mut self, i: &'ast crate::ExprLet) { visit_expr_let(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_lit(&mut self, i: &'ast ExprLit) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_lit(&mut self, i: &'ast crate::ExprLit) { visit_expr_lit(self, i); } #[cfg(feature = "full")] - fn visit_expr_loop(&mut self, i: &'ast ExprLoop) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_loop(&mut self, i: &'ast crate::ExprLoop) { visit_expr_loop(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_macro(&mut self, i: &'ast ExprMacro) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_macro(&mut self, i: &'ast crate::ExprMacro) { visit_expr_macro(self, i); } #[cfg(feature = "full")] - fn visit_expr_match(&mut self, i: &'ast ExprMatch) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_match(&mut self, i: &'ast crate::ExprMatch) { visit_expr_match(self, i); } - #[cfg(feature = "full")] - fn visit_expr_method_call(&mut self, i: &'ast ExprMethodCall) { + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_method_call(&mut self, i: &'ast crate::ExprMethodCall) { visit_expr_method_call(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_paren(&mut self, i: &'ast ExprParen) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_paren(&mut self, i: &'ast crate::ExprParen) { visit_expr_paren(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_path(&mut self, i: &'ast ExprPath) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_path(&mut self, i: &'ast crate::ExprPath) { visit_expr_path(self, i); } #[cfg(feature = "full")] - fn visit_expr_range(&mut self, i: &'ast ExprRange) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_range(&mut self, i: &'ast crate::ExprRange) { visit_expr_range(self, i); } - #[cfg(feature = "full")] - fn visit_expr_reference(&mut self, i: &'ast ExprReference) { + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_reference(&mut self, i: &'ast crate::ExprReference) { visit_expr_reference(self, i); } #[cfg(feature = "full")] - fn visit_expr_repeat(&mut self, i: &'ast ExprRepeat) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_repeat(&mut self, i: &'ast crate::ExprRepeat) { visit_expr_repeat(self, i); } #[cfg(feature = "full")] - fn visit_expr_return(&mut self, i: &'ast ExprReturn) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_return(&mut self, i: &'ast crate::ExprReturn) { visit_expr_return(self, i); } - #[cfg(feature = "full")] - fn visit_expr_struct(&mut self, i: &'ast ExprStruct) { + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_struct(&mut self, i: &'ast crate::ExprStruct) { visit_expr_struct(self, i); } #[cfg(feature = "full")] - fn visit_expr_try(&mut self, i: &'ast ExprTry) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_try(&mut self, i: &'ast crate::ExprTry) { visit_expr_try(self, i); } #[cfg(feature = "full")] - fn visit_expr_try_block(&mut self, i: &'ast ExprTryBlock) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_try_block(&mut self, i: &'ast crate::ExprTryBlock) { visit_expr_try_block(self, i); } #[cfg(feature = "full")] - fn visit_expr_tuple(&mut self, i: &'ast ExprTuple) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_tuple(&mut self, i: &'ast crate::ExprTuple) { visit_expr_tuple(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_unary(&mut self, i: &'ast ExprUnary) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_unary(&mut self, i: &'ast crate::ExprUnary) { visit_expr_unary(self, i); } #[cfg(feature = "full")] - fn visit_expr_unsafe(&mut self, i: &'ast ExprUnsafe) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_unsafe(&mut self, i: &'ast crate::ExprUnsafe) { visit_expr_unsafe(self, i); } #[cfg(feature = "full")] - fn visit_expr_while(&mut self, i: &'ast ExprWhile) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_while(&mut self, i: &'ast crate::ExprWhile) { visit_expr_while(self, i); } #[cfg(feature = "full")] - fn visit_expr_yield(&mut self, i: &'ast ExprYield) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_yield(&mut self, i: &'ast crate::ExprYield) { visit_expr_yield(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_field(&mut self, i: &'ast Field) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_field(&mut self, i: &'ast crate::Field) { visit_field(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_field_mutability(&mut self, i: &'ast FieldMutability) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_field_mutability(&mut self, i: &'ast crate::FieldMutability) { visit_field_mutability(self, i); } #[cfg(feature = "full")] - fn visit_field_pat(&mut self, i: &'ast FieldPat) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_field_pat(&mut self, i: &'ast crate::FieldPat) { visit_field_pat(self, i); } - #[cfg(feature = "full")] - fn visit_field_value(&mut self, i: &'ast FieldValue) { + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_field_value(&mut self, i: &'ast crate::FieldValue) { visit_field_value(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_fields(&mut self, i: &'ast Fields) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_fields(&mut self, i: &'ast crate::Fields) { visit_fields(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_fields_named(&mut self, i: &'ast FieldsNamed) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_fields_named(&mut self, i: &'ast crate::FieldsNamed) { visit_fields_named(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_fields_unnamed(&mut self, i: &'ast FieldsUnnamed) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_fields_unnamed(&mut self, i: &'ast crate::FieldsUnnamed) { visit_fields_unnamed(self, i); } #[cfg(feature = "full")] - fn visit_file(&mut self, i: &'ast File) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_file(&mut self, i: &'ast crate::File) { visit_file(self, i); } #[cfg(feature = "full")] - fn visit_fn_arg(&mut self, i: &'ast FnArg) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_fn_arg(&mut self, i: &'ast crate::FnArg) { visit_fn_arg(self, i); } #[cfg(feature = "full")] - fn visit_foreign_item(&mut self, i: &'ast ForeignItem) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item(&mut self, i: &'ast crate::ForeignItem) { visit_foreign_item(self, i); } #[cfg(feature = "full")] - fn visit_foreign_item_fn(&mut self, i: &'ast ForeignItemFn) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_fn(&mut self, i: &'ast crate::ForeignItemFn) { visit_foreign_item_fn(self, i); } #[cfg(feature = "full")] - fn visit_foreign_item_macro(&mut self, i: &'ast ForeignItemMacro) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_macro(&mut self, i: &'ast crate::ForeignItemMacro) { visit_foreign_item_macro(self, i); } #[cfg(feature = "full")] - fn visit_foreign_item_static(&mut self, i: &'ast ForeignItemStatic) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_static(&mut self, i: &'ast crate::ForeignItemStatic) { visit_foreign_item_static(self, i); } #[cfg(feature = "full")] - fn visit_foreign_item_type(&mut self, i: &'ast ForeignItemType) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_type(&mut self, i: &'ast crate::ForeignItemType) { visit_foreign_item_type(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_generic_argument(&mut self, i: &'ast GenericArgument) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_generic_argument(&mut self, i: &'ast crate::GenericArgument) { visit_generic_argument(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_generic_param(&mut self, i: &'ast GenericParam) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_generic_param(&mut self, i: &'ast crate::GenericParam) { visit_generic_param(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_generics(&mut self, i: &'ast Generics) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_generics(&mut self, i: &'ast crate::Generics) { visit_generics(self, i); } - fn visit_ident(&mut self, i: &'ast Ident) { + fn visit_ident(&mut self, i: &'ast proc_macro2::Ident) { visit_ident(self, i); } #[cfg(feature = "full")] - fn visit_impl_item(&mut self, i: &'ast ImplItem) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item(&mut self, i: &'ast crate::ImplItem) { visit_impl_item(self, i); } #[cfg(feature = "full")] - fn visit_impl_item_const(&mut self, i: &'ast ImplItemConst) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_const(&mut self, i: &'ast crate::ImplItemConst) { visit_impl_item_const(self, i); } #[cfg(feature = "full")] - fn visit_impl_item_fn(&mut self, i: &'ast ImplItemFn) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_fn(&mut self, i: &'ast crate::ImplItemFn) { visit_impl_item_fn(self, i); } #[cfg(feature = "full")] - fn visit_impl_item_macro(&mut self, i: &'ast ImplItemMacro) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_macro(&mut self, i: &'ast crate::ImplItemMacro) { visit_impl_item_macro(self, i); } #[cfg(feature = "full")] - fn visit_impl_item_type(&mut self, i: &'ast ImplItemType) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_type(&mut self, i: &'ast crate::ImplItemType) { visit_impl_item_type(self, i); } #[cfg(feature = "full")] - fn visit_impl_restriction(&mut self, i: &'ast ImplRestriction) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_restriction(&mut self, i: &'ast crate::ImplRestriction) { visit_impl_restriction(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_index(&mut self, i: &'ast Index) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_index(&mut self, i: &'ast crate::Index) { visit_index(self, i); } #[cfg(feature = "full")] - fn visit_item(&mut self, i: &'ast Item) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item(&mut self, i: &'ast crate::Item) { visit_item(self, i); } #[cfg(feature = "full")] - fn visit_item_const(&mut self, i: &'ast ItemConst) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_const(&mut self, i: &'ast crate::ItemConst) { visit_item_const(self, i); } #[cfg(feature = "full")] - fn visit_item_enum(&mut self, i: &'ast ItemEnum) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_enum(&mut self, i: &'ast crate::ItemEnum) { visit_item_enum(self, i); } #[cfg(feature = "full")] - fn visit_item_extern_crate(&mut self, i: &'ast ItemExternCrate) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_extern_crate(&mut self, i: &'ast crate::ItemExternCrate) { visit_item_extern_crate(self, i); } #[cfg(feature = "full")] - fn visit_item_fn(&mut self, i: &'ast ItemFn) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_fn(&mut self, i: &'ast crate::ItemFn) { visit_item_fn(self, i); } #[cfg(feature = "full")] - fn visit_item_foreign_mod(&mut self, i: &'ast ItemForeignMod) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_foreign_mod(&mut self, i: &'ast crate::ItemForeignMod) { visit_item_foreign_mod(self, i); } #[cfg(feature = "full")] - fn visit_item_impl(&mut self, i: &'ast ItemImpl) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_impl(&mut self, i: &'ast crate::ItemImpl) { visit_item_impl(self, i); } #[cfg(feature = "full")] - fn visit_item_macro(&mut self, i: &'ast ItemMacro) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_macro(&mut self, i: &'ast crate::ItemMacro) { visit_item_macro(self, i); } #[cfg(feature = "full")] - fn visit_item_mod(&mut self, i: &'ast ItemMod) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_mod(&mut self, i: &'ast crate::ItemMod) { visit_item_mod(self, i); } #[cfg(feature = "full")] - fn visit_item_static(&mut self, i: &'ast ItemStatic) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_static(&mut self, i: &'ast crate::ItemStatic) { visit_item_static(self, i); } #[cfg(feature = "full")] - fn visit_item_struct(&mut self, i: &'ast ItemStruct) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_struct(&mut self, i: &'ast crate::ItemStruct) { visit_item_struct(self, i); } #[cfg(feature = "full")] - fn visit_item_trait(&mut self, i: &'ast ItemTrait) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_trait(&mut self, i: &'ast crate::ItemTrait) { visit_item_trait(self, i); } #[cfg(feature = "full")] - fn visit_item_trait_alias(&mut self, i: &'ast ItemTraitAlias) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_trait_alias(&mut self, i: &'ast crate::ItemTraitAlias) { visit_item_trait_alias(self, i); } #[cfg(feature = "full")] - fn visit_item_type(&mut self, i: &'ast ItemType) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_type(&mut self, i: &'ast crate::ItemType) { visit_item_type(self, i); } #[cfg(feature = "full")] - fn visit_item_union(&mut self, i: &'ast ItemUnion) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_union(&mut self, i: &'ast crate::ItemUnion) { visit_item_union(self, i); } #[cfg(feature = "full")] - fn visit_item_use(&mut self, i: &'ast ItemUse) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_use(&mut self, i: &'ast crate::ItemUse) { visit_item_use(self, i); } #[cfg(feature = "full")] - fn visit_label(&mut self, i: &'ast Label) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_label(&mut self, i: &'ast crate::Label) { visit_label(self, i); } - fn visit_lifetime(&mut self, i: &'ast Lifetime) { + fn visit_lifetime(&mut self, i: &'ast crate::Lifetime) { visit_lifetime(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_lifetime_param(&mut self, i: &'ast LifetimeParam) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_lifetime_param(&mut self, i: &'ast crate::LifetimeParam) { visit_lifetime_param(self, i); } - fn visit_lit(&mut self, i: &'ast Lit) { + fn visit_lit(&mut self, i: &'ast crate::Lit) { visit_lit(self, i); } - fn visit_lit_bool(&mut self, i: &'ast LitBool) { + fn visit_lit_bool(&mut self, i: &'ast crate::LitBool) { visit_lit_bool(self, i); } - fn visit_lit_byte(&mut self, i: &'ast LitByte) { + fn visit_lit_byte(&mut self, i: &'ast crate::LitByte) { visit_lit_byte(self, i); } - fn visit_lit_byte_str(&mut self, i: &'ast LitByteStr) { + fn visit_lit_byte_str(&mut self, i: &'ast crate::LitByteStr) { visit_lit_byte_str(self, i); } - fn visit_lit_char(&mut self, i: &'ast LitChar) { + fn visit_lit_char(&mut self, i: &'ast crate::LitChar) { visit_lit_char(self, i); } - fn visit_lit_float(&mut self, i: &'ast LitFloat) { + fn visit_lit_float(&mut self, i: &'ast crate::LitFloat) { visit_lit_float(self, i); } - fn visit_lit_int(&mut self, i: &'ast LitInt) { + fn visit_lit_int(&mut self, i: &'ast crate::LitInt) { visit_lit_int(self, i); } - fn visit_lit_str(&mut self, i: &'ast LitStr) { + fn visit_lit_str(&mut self, i: &'ast crate::LitStr) { visit_lit_str(self, i); } #[cfg(feature = "full")] - fn visit_local(&mut self, i: &'ast Local) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_local(&mut self, i: &'ast crate::Local) { visit_local(self, i); } #[cfg(feature = "full")] - fn visit_local_init(&mut self, i: &'ast LocalInit) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_local_init(&mut self, i: &'ast crate::LocalInit) { visit_local_init(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_macro(&mut self, i: &'ast Macro) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_macro(&mut self, i: &'ast crate::Macro) { visit_macro(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_macro_delimiter(&mut self, i: &'ast MacroDelimiter) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_macro_delimiter(&mut self, i: &'ast crate::MacroDelimiter) { visit_macro_delimiter(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_member(&mut self, i: &'ast Member) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_member(&mut self, i: &'ast crate::Member) { visit_member(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_meta(&mut self, i: &'ast Meta) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_meta(&mut self, i: &'ast crate::Meta) { visit_meta(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_meta_list(&mut self, i: &'ast MetaList) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_meta_list(&mut self, i: &'ast crate::MetaList) { visit_meta_list(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_meta_name_value(&mut self, i: &'ast MetaNameValue) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_meta_name_value(&mut self, i: &'ast crate::MetaNameValue) { visit_meta_name_value(self, i); } #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] fn visit_parenthesized_generic_arguments( &mut self, - i: &'ast ParenthesizedGenericArguments, + i: &'ast crate::ParenthesizedGenericArguments, ) { visit_parenthesized_generic_arguments(self, i); } #[cfg(feature = "full")] - fn visit_pat(&mut self, i: &'ast Pat) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat(&mut self, i: &'ast crate::Pat) { visit_pat(self, i); } #[cfg(feature = "full")] - fn visit_pat_ident(&mut self, i: &'ast PatIdent) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_ident(&mut self, i: &'ast crate::PatIdent) { visit_pat_ident(self, i); } #[cfg(feature = "full")] - fn visit_pat_or(&mut self, i: &'ast PatOr) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_or(&mut self, i: &'ast crate::PatOr) { visit_pat_or(self, i); } #[cfg(feature = "full")] - fn visit_pat_paren(&mut self, i: &'ast PatParen) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_paren(&mut self, i: &'ast crate::PatParen) { visit_pat_paren(self, i); } #[cfg(feature = "full")] - fn visit_pat_reference(&mut self, i: &'ast PatReference) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_reference(&mut self, i: &'ast crate::PatReference) { visit_pat_reference(self, i); } #[cfg(feature = "full")] - fn visit_pat_rest(&mut self, i: &'ast PatRest) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_rest(&mut self, i: &'ast crate::PatRest) { visit_pat_rest(self, i); } #[cfg(feature = "full")] - fn visit_pat_slice(&mut self, i: &'ast PatSlice) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_slice(&mut self, i: &'ast crate::PatSlice) { visit_pat_slice(self, i); } #[cfg(feature = "full")] - fn visit_pat_struct(&mut self, i: &'ast PatStruct) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_struct(&mut self, i: &'ast crate::PatStruct) { visit_pat_struct(self, i); } #[cfg(feature = "full")] - fn visit_pat_tuple(&mut self, i: &'ast PatTuple) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_tuple(&mut self, i: &'ast crate::PatTuple) { visit_pat_tuple(self, i); } #[cfg(feature = "full")] - fn visit_pat_tuple_struct(&mut self, i: &'ast PatTupleStruct) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_tuple_struct(&mut self, i: &'ast crate::PatTupleStruct) { visit_pat_tuple_struct(self, i); } #[cfg(feature = "full")] - fn visit_pat_type(&mut self, i: &'ast PatType) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_type(&mut self, i: &'ast crate::PatType) { visit_pat_type(self, i); } #[cfg(feature = "full")] - fn visit_pat_wild(&mut self, i: &'ast PatWild) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_wild(&mut self, i: &'ast crate::PatWild) { visit_pat_wild(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_path(&mut self, i: &'ast Path) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_path(&mut self, i: &'ast crate::Path) { visit_path(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_path_arguments(&mut self, i: &'ast PathArguments) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_path_arguments(&mut self, i: &'ast crate::PathArguments) { visit_path_arguments(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_path_segment(&mut self, i: &'ast PathSegment) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_path_segment(&mut self, i: &'ast crate::PathSegment) { visit_path_segment(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_predicate_lifetime(&mut self, i: &'ast PredicateLifetime) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_predicate_lifetime(&mut self, i: &'ast crate::PredicateLifetime) { visit_predicate_lifetime(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_predicate_type(&mut self, i: &'ast PredicateType) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_predicate_type(&mut self, i: &'ast crate::PredicateType) { visit_predicate_type(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_qself(&mut self, i: &'ast QSelf) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_qself(&mut self, i: &'ast crate::QSelf) { visit_qself(self, i); } #[cfg(feature = "full")] - fn visit_range_limits(&mut self, i: &'ast RangeLimits) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_range_limits(&mut self, i: &'ast crate::RangeLimits) { visit_range_limits(self, i); } #[cfg(feature = "full")] - fn visit_receiver(&mut self, i: &'ast Receiver) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_receiver(&mut self, i: &'ast crate::Receiver) { visit_receiver(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_return_type(&mut self, i: &'ast ReturnType) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_return_type(&mut self, i: &'ast crate::ReturnType) { visit_return_type(self, i); } #[cfg(feature = "full")] - fn visit_signature(&mut self, i: &'ast Signature) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_signature(&mut self, i: &'ast crate::Signature) { visit_signature(self, i); } - fn visit_span(&mut self, i: &Span) { + fn visit_span(&mut self, i: &proc_macro2::Span) { visit_span(self, i); } #[cfg(feature = "full")] - fn visit_static_mutability(&mut self, i: &'ast StaticMutability) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_static_mutability(&mut self, i: &'ast crate::StaticMutability) { visit_static_mutability(self, i); } #[cfg(feature = "full")] - fn visit_stmt(&mut self, i: &'ast Stmt) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_stmt(&mut self, i: &'ast crate::Stmt) { visit_stmt(self, i); } #[cfg(feature = "full")] - fn visit_stmt_macro(&mut self, i: &'ast StmtMacro) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_stmt_macro(&mut self, i: &'ast crate::StmtMacro) { visit_stmt_macro(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_trait_bound(&mut self, i: &'ast TraitBound) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_trait_bound(&mut self, i: &'ast crate::TraitBound) { visit_trait_bound(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_trait_bound_modifier(&mut self, i: &'ast TraitBoundModifier) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_trait_bound_modifier(&mut self, i: &'ast crate::TraitBoundModifier) { visit_trait_bound_modifier(self, i); } #[cfg(feature = "full")] - fn visit_trait_item(&mut self, i: &'ast TraitItem) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item(&mut self, i: &'ast crate::TraitItem) { visit_trait_item(self, i); } #[cfg(feature = "full")] - fn visit_trait_item_const(&mut self, i: &'ast TraitItemConst) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_const(&mut self, i: &'ast crate::TraitItemConst) { visit_trait_item_const(self, i); } #[cfg(feature = "full")] - fn visit_trait_item_fn(&mut self, i: &'ast TraitItemFn) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_fn(&mut self, i: &'ast crate::TraitItemFn) { visit_trait_item_fn(self, i); } #[cfg(feature = "full")] - fn visit_trait_item_macro(&mut self, i: &'ast TraitItemMacro) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_macro(&mut self, i: &'ast crate::TraitItemMacro) { visit_trait_item_macro(self, i); } #[cfg(feature = "full")] - fn visit_trait_item_type(&mut self, i: &'ast TraitItemType) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_type(&mut self, i: &'ast crate::TraitItemType) { visit_trait_item_type(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type(&mut self, i: &'ast Type) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type(&mut self, i: &'ast crate::Type) { visit_type(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_array(&mut self, i: &'ast TypeArray) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_array(&mut self, i: &'ast crate::TypeArray) { visit_type_array(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_bare_fn(&mut self, i: &'ast TypeBareFn) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_bare_fn(&mut self, i: &'ast crate::TypeBareFn) { visit_type_bare_fn(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_group(&mut self, i: &'ast TypeGroup) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_group(&mut self, i: &'ast crate::TypeGroup) { visit_type_group(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_impl_trait(&mut self, i: &'ast TypeImplTrait) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_impl_trait(&mut self, i: &'ast crate::TypeImplTrait) { visit_type_impl_trait(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_infer(&mut self, i: &'ast TypeInfer) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_infer(&mut self, i: &'ast crate::TypeInfer) { visit_type_infer(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_macro(&mut self, i: &'ast TypeMacro) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_macro(&mut self, i: &'ast crate::TypeMacro) { visit_type_macro(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_never(&mut self, i: &'ast TypeNever) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_never(&mut self, i: &'ast crate::TypeNever) { visit_type_never(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_param(&mut self, i: &'ast TypeParam) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_param(&mut self, i: &'ast crate::TypeParam) { visit_type_param(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_param_bound(&mut self, i: &'ast TypeParamBound) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_param_bound(&mut self, i: &'ast crate::TypeParamBound) { visit_type_param_bound(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_paren(&mut self, i: &'ast TypeParen) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_paren(&mut self, i: &'ast crate::TypeParen) { visit_type_paren(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_path(&mut self, i: &'ast TypePath) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_path(&mut self, i: &'ast crate::TypePath) { visit_type_path(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_ptr(&mut self, i: &'ast TypePtr) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_ptr(&mut self, i: &'ast crate::TypePtr) { visit_type_ptr(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_reference(&mut self, i: &'ast TypeReference) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_reference(&mut self, i: &'ast crate::TypeReference) { visit_type_reference(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_slice(&mut self, i: &'ast TypeSlice) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_slice(&mut self, i: &'ast crate::TypeSlice) { visit_type_slice(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_trait_object(&mut self, i: &'ast TypeTraitObject) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_trait_object(&mut self, i: &'ast crate::TypeTraitObject) { visit_type_trait_object(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_tuple(&mut self, i: &'ast TypeTuple) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_tuple(&mut self, i: &'ast crate::TypeTuple) { visit_type_tuple(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_un_op(&mut self, i: &'ast UnOp) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_un_op(&mut self, i: &'ast crate::UnOp) { visit_un_op(self, i); } #[cfg(feature = "full")] - fn visit_use_glob(&mut self, i: &'ast UseGlob) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_glob(&mut self, i: &'ast crate::UseGlob) { visit_use_glob(self, i); } #[cfg(feature = "full")] - fn visit_use_group(&mut self, i: &'ast UseGroup) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_group(&mut self, i: &'ast crate::UseGroup) { visit_use_group(self, i); } #[cfg(feature = "full")] - fn visit_use_name(&mut self, i: &'ast UseName) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_name(&mut self, i: &'ast crate::UseName) { visit_use_name(self, i); } #[cfg(feature = "full")] - fn visit_use_path(&mut self, i: &'ast UsePath) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_path(&mut self, i: &'ast crate::UsePath) { visit_use_path(self, i); } #[cfg(feature = "full")] - fn visit_use_rename(&mut self, i: &'ast UseRename) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_rename(&mut self, i: &'ast crate::UseRename) { visit_use_rename(self, i); } #[cfg(feature = "full")] - fn visit_use_tree(&mut self, i: &'ast UseTree) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_tree(&mut self, i: &'ast crate::UseTree) { visit_use_tree(self, i); } #[cfg(feature = "full")] - fn visit_variadic(&mut self, i: &'ast Variadic) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_variadic(&mut self, i: &'ast crate::Variadic) { visit_variadic(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_variant(&mut self, i: &'ast Variant) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_variant(&mut self, i: &'ast crate::Variant) { visit_variant(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_vis_restricted(&mut self, i: &'ast VisRestricted) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_vis_restricted(&mut self, i: &'ast crate::VisRestricted) { visit_vis_restricted(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_visibility(&mut self, i: &'ast Visibility) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_visibility(&mut self, i: &'ast crate::Visibility) { visit_visibility(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_where_clause(&mut self, i: &'ast WhereClause) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_where_clause(&mut self, i: &'ast crate::WhereClause) { visit_where_clause(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_where_predicate(&mut self, i: &'ast WherePredicate) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_where_predicate(&mut self, i: &'ast crate::WherePredicate) { visit_where_predicate(self, i); } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_abi<'ast, V>(v: &mut V, node: &'ast Abi) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_abi<'ast, V>(v: &mut V, node: &'ast crate::Abi) where V: Visit<'ast> + ?Sized, { @@ -762,9 +933,10 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] pub fn visit_angle_bracketed_generic_arguments<'ast, V>( v: &mut V, - node: &'ast AngleBracketedGenericArguments, + node: &'ast crate::AngleBracketedGenericArguments, ) where V: Visit<'ast> + ?Sized, @@ -778,7 +950,8 @@ where skip!(node.gt_token); } #[cfg(feature = "full")] -pub fn visit_arm<'ast, V>(v: &mut V, node: &'ast Arm) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_arm<'ast, V>(v: &mut V, node: &'ast crate::Arm) where V: Visit<'ast> + ?Sized, { @@ -795,7 +968,8 @@ where skip!(node.comma); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_assoc_const<'ast, V>(v: &mut V, node: &'ast AssocConst) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_assoc_const<'ast, V>(v: &mut V, node: &'ast crate::AssocConst) where V: Visit<'ast> + ?Sized, { @@ -807,7 +981,8 @@ where v.visit_expr(&node.value); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_assoc_type<'ast, V>(v: &mut V, node: &'ast AssocType) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_assoc_type<'ast, V>(v: &mut V, node: &'ast crate::AssocType) where V: Visit<'ast> + ?Sized, { @@ -819,19 +994,21 @@ where v.visit_type(&node.ty); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_attr_style<'ast, V>(v: &mut V, node: &'ast AttrStyle) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_attr_style<'ast, V>(v: &mut V, node: &'ast crate::AttrStyle) where V: Visit<'ast> + ?Sized, { match node { - AttrStyle::Outer => {} - AttrStyle::Inner(_binding_0) => { + crate::AttrStyle::Outer => {} + crate::AttrStyle::Inner(_binding_0) => { skip!(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_attribute<'ast, V>(v: &mut V, node: &'ast Attribute) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_attribute<'ast, V>(v: &mut V, node: &'ast crate::Attribute) where V: Visit<'ast> + ?Sized, { @@ -841,7 +1018,8 @@ where v.visit_meta(&node.meta); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_bare_fn_arg<'ast, V>(v: &mut V, node: &'ast BareFnArg) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bare_fn_arg<'ast, V>(v: &mut V, node: &'ast crate::BareFnArg) where V: Visit<'ast> + ?Sized, { @@ -855,7 +1033,8 @@ where v.visit_type(&node.ty); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_bare_variadic<'ast, V>(v: &mut V, node: &'ast BareVariadic) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bare_variadic<'ast, V>(v: &mut V, node: &'ast crate::BareVariadic) where V: Visit<'ast> + ?Sized, { @@ -870,99 +1049,101 @@ where skip!(node.comma); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_bin_op<'ast, V>(v: &mut V, node: &'ast BinOp) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bin_op<'ast, V>(v: &mut V, node: &'ast crate::BinOp) where V: Visit<'ast> + ?Sized, { match node { - BinOp::Add(_binding_0) => { + crate::BinOp::Add(_binding_0) => { skip!(_binding_0); } - BinOp::Sub(_binding_0) => { + crate::BinOp::Sub(_binding_0) => { skip!(_binding_0); } - BinOp::Mul(_binding_0) => { + crate::BinOp::Mul(_binding_0) => { skip!(_binding_0); } - BinOp::Div(_binding_0) => { + crate::BinOp::Div(_binding_0) => { skip!(_binding_0); } - BinOp::Rem(_binding_0) => { + crate::BinOp::Rem(_binding_0) => { skip!(_binding_0); } - BinOp::And(_binding_0) => { + crate::BinOp::And(_binding_0) => { skip!(_binding_0); } - BinOp::Or(_binding_0) => { + crate::BinOp::Or(_binding_0) => { skip!(_binding_0); } - BinOp::BitXor(_binding_0) => { + crate::BinOp::BitXor(_binding_0) => { skip!(_binding_0); } - BinOp::BitAnd(_binding_0) => { + crate::BinOp::BitAnd(_binding_0) => { skip!(_binding_0); } - BinOp::BitOr(_binding_0) => { + crate::BinOp::BitOr(_binding_0) => { skip!(_binding_0); } - BinOp::Shl(_binding_0) => { + crate::BinOp::Shl(_binding_0) => { skip!(_binding_0); } - BinOp::Shr(_binding_0) => { + crate::BinOp::Shr(_binding_0) => { skip!(_binding_0); } - BinOp::Eq(_binding_0) => { + crate::BinOp::Eq(_binding_0) => { skip!(_binding_0); } - BinOp::Lt(_binding_0) => { + crate::BinOp::Lt(_binding_0) => { skip!(_binding_0); } - BinOp::Le(_binding_0) => { + crate::BinOp::Le(_binding_0) => { skip!(_binding_0); } - BinOp::Ne(_binding_0) => { + crate::BinOp::Ne(_binding_0) => { skip!(_binding_0); } - BinOp::Ge(_binding_0) => { + crate::BinOp::Ge(_binding_0) => { skip!(_binding_0); } - BinOp::Gt(_binding_0) => { + crate::BinOp::Gt(_binding_0) => { skip!(_binding_0); } - BinOp::AddAssign(_binding_0) => { + crate::BinOp::AddAssign(_binding_0) => { skip!(_binding_0); } - BinOp::SubAssign(_binding_0) => { + crate::BinOp::SubAssign(_binding_0) => { skip!(_binding_0); } - BinOp::MulAssign(_binding_0) => { + crate::BinOp::MulAssign(_binding_0) => { skip!(_binding_0); } - BinOp::DivAssign(_binding_0) => { + crate::BinOp::DivAssign(_binding_0) => { skip!(_binding_0); } - BinOp::RemAssign(_binding_0) => { + crate::BinOp::RemAssign(_binding_0) => { skip!(_binding_0); } - BinOp::BitXorAssign(_binding_0) => { + crate::BinOp::BitXorAssign(_binding_0) => { skip!(_binding_0); } - BinOp::BitAndAssign(_binding_0) => { + crate::BinOp::BitAndAssign(_binding_0) => { skip!(_binding_0); } - BinOp::BitOrAssign(_binding_0) => { + crate::BinOp::BitOrAssign(_binding_0) => { skip!(_binding_0); } - BinOp::ShlAssign(_binding_0) => { + crate::BinOp::ShlAssign(_binding_0) => { skip!(_binding_0); } - BinOp::ShrAssign(_binding_0) => { + crate::BinOp::ShrAssign(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_block<'ast, V>(v: &mut V, node: &'ast Block) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_block<'ast, V>(v: &mut V, node: &'ast crate::Block) where V: Visit<'ast> + ?Sized, { @@ -972,7 +1153,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_bound_lifetimes<'ast, V>(v: &mut V, node: &'ast BoundLifetimes) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bound_lifetimes<'ast, V>(v: &mut V, node: &'ast crate::BoundLifetimes) where V: Visit<'ast> + ?Sized, { @@ -985,7 +1167,8 @@ where skip!(node.gt_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_const_param<'ast, V>(v: &mut V, node: &'ast ConstParam) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_const_param<'ast, V>(v: &mut V, node: &'ast crate::ConstParam) where V: Visit<'ast> + ?Sized, { @@ -1002,7 +1185,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_constraint<'ast, V>(v: &mut V, node: &'ast Constraint) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_constraint<'ast, V>(v: &mut V, node: &'ast crate::Constraint) where V: Visit<'ast> + ?Sized, { @@ -1017,24 +1201,26 @@ where } } #[cfg(feature = "derive")] -pub fn visit_data<'ast, V>(v: &mut V, node: &'ast Data) +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data<'ast, V>(v: &mut V, node: &'ast crate::Data) where V: Visit<'ast> + ?Sized, { match node { - Data::Struct(_binding_0) => { + crate::Data::Struct(_binding_0) => { v.visit_data_struct(_binding_0); } - Data::Enum(_binding_0) => { + crate::Data::Enum(_binding_0) => { v.visit_data_enum(_binding_0); } - Data::Union(_binding_0) => { + crate::Data::Union(_binding_0) => { v.visit_data_union(_binding_0); } } } #[cfg(feature = "derive")] -pub fn visit_data_enum<'ast, V>(v: &mut V, node: &'ast DataEnum) +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data_enum<'ast, V>(v: &mut V, node: &'ast crate::DataEnum) where V: Visit<'ast> + ?Sized, { @@ -1046,7 +1232,8 @@ where } } #[cfg(feature = "derive")] -pub fn visit_data_struct<'ast, V>(v: &mut V, node: &'ast DataStruct) +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data_struct<'ast, V>(v: &mut V, node: &'ast crate::DataStruct) where V: Visit<'ast> + ?Sized, { @@ -1055,7 +1242,8 @@ where skip!(node.semi_token); } #[cfg(feature = "derive")] -pub fn visit_data_union<'ast, V>(v: &mut V, node: &'ast DataUnion) +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data_union<'ast, V>(v: &mut V, node: &'ast crate::DataUnion) where V: Visit<'ast> + ?Sized, { @@ -1063,7 +1251,8 @@ where v.visit_fields_named(&node.fields); } #[cfg(feature = "derive")] -pub fn visit_derive_input<'ast, V>(v: &mut V, node: &'ast DeriveInput) +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_derive_input<'ast, V>(v: &mut V, node: &'ast crate::DeriveInput) where V: Visit<'ast> + ?Sized, { @@ -1076,132 +1265,134 @@ where v.visit_data(&node.data); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr<'ast, V>(v: &mut V, node: &'ast Expr) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr<'ast, V>(v: &mut V, node: &'ast crate::Expr) where V: Visit<'ast> + ?Sized, { match node { - Expr::Array(_binding_0) => { + crate::Expr::Array(_binding_0) => { full!(v.visit_expr_array(_binding_0)); } - Expr::Assign(_binding_0) => { + crate::Expr::Assign(_binding_0) => { full!(v.visit_expr_assign(_binding_0)); } - Expr::Async(_binding_0) => { + crate::Expr::Async(_binding_0) => { full!(v.visit_expr_async(_binding_0)); } - Expr::Await(_binding_0) => { + crate::Expr::Await(_binding_0) => { full!(v.visit_expr_await(_binding_0)); } - Expr::Binary(_binding_0) => { + crate::Expr::Binary(_binding_0) => { v.visit_expr_binary(_binding_0); } - Expr::Block(_binding_0) => { + crate::Expr::Block(_binding_0) => { full!(v.visit_expr_block(_binding_0)); } - Expr::Break(_binding_0) => { + crate::Expr::Break(_binding_0) => { full!(v.visit_expr_break(_binding_0)); } - Expr::Call(_binding_0) => { + crate::Expr::Call(_binding_0) => { v.visit_expr_call(_binding_0); } - Expr::Cast(_binding_0) => { + crate::Expr::Cast(_binding_0) => { v.visit_expr_cast(_binding_0); } - Expr::Closure(_binding_0) => { + crate::Expr::Closure(_binding_0) => { full!(v.visit_expr_closure(_binding_0)); } - Expr::Const(_binding_0) => { + crate::Expr::Const(_binding_0) => { full!(v.visit_expr_const(_binding_0)); } - Expr::Continue(_binding_0) => { + crate::Expr::Continue(_binding_0) => { full!(v.visit_expr_continue(_binding_0)); } - Expr::Field(_binding_0) => { + crate::Expr::Field(_binding_0) => { v.visit_expr_field(_binding_0); } - Expr::ForLoop(_binding_0) => { + crate::Expr::ForLoop(_binding_0) => { full!(v.visit_expr_for_loop(_binding_0)); } - Expr::Group(_binding_0) => { + crate::Expr::Group(_binding_0) => { v.visit_expr_group(_binding_0); } - Expr::If(_binding_0) => { + crate::Expr::If(_binding_0) => { full!(v.visit_expr_if(_binding_0)); } - Expr::Index(_binding_0) => { + crate::Expr::Index(_binding_0) => { v.visit_expr_index(_binding_0); } - Expr::Infer(_binding_0) => { + crate::Expr::Infer(_binding_0) => { full!(v.visit_expr_infer(_binding_0)); } - Expr::Let(_binding_0) => { + crate::Expr::Let(_binding_0) => { full!(v.visit_expr_let(_binding_0)); } - Expr::Lit(_binding_0) => { + crate::Expr::Lit(_binding_0) => { v.visit_expr_lit(_binding_0); } - Expr::Loop(_binding_0) => { + crate::Expr::Loop(_binding_0) => { full!(v.visit_expr_loop(_binding_0)); } - Expr::Macro(_binding_0) => { + crate::Expr::Macro(_binding_0) => { v.visit_expr_macro(_binding_0); } - Expr::Match(_binding_0) => { + crate::Expr::Match(_binding_0) => { full!(v.visit_expr_match(_binding_0)); } - Expr::MethodCall(_binding_0) => { - full!(v.visit_expr_method_call(_binding_0)); + crate::Expr::MethodCall(_binding_0) => { + v.visit_expr_method_call(_binding_0); } - Expr::Paren(_binding_0) => { + crate::Expr::Paren(_binding_0) => { v.visit_expr_paren(_binding_0); } - Expr::Path(_binding_0) => { + crate::Expr::Path(_binding_0) => { v.visit_expr_path(_binding_0); } - Expr::Range(_binding_0) => { + crate::Expr::Range(_binding_0) => { full!(v.visit_expr_range(_binding_0)); } - Expr::Reference(_binding_0) => { - full!(v.visit_expr_reference(_binding_0)); + crate::Expr::Reference(_binding_0) => { + v.visit_expr_reference(_binding_0); } - Expr::Repeat(_binding_0) => { + crate::Expr::Repeat(_binding_0) => { full!(v.visit_expr_repeat(_binding_0)); } - Expr::Return(_binding_0) => { + crate::Expr::Return(_binding_0) => { full!(v.visit_expr_return(_binding_0)); } - Expr::Struct(_binding_0) => { - full!(v.visit_expr_struct(_binding_0)); + crate::Expr::Struct(_binding_0) => { + v.visit_expr_struct(_binding_0); } - Expr::Try(_binding_0) => { + crate::Expr::Try(_binding_0) => { full!(v.visit_expr_try(_binding_0)); } - Expr::TryBlock(_binding_0) => { + crate::Expr::TryBlock(_binding_0) => { full!(v.visit_expr_try_block(_binding_0)); } - Expr::Tuple(_binding_0) => { + crate::Expr::Tuple(_binding_0) => { full!(v.visit_expr_tuple(_binding_0)); } - Expr::Unary(_binding_0) => { + crate::Expr::Unary(_binding_0) => { v.visit_expr_unary(_binding_0); } - Expr::Unsafe(_binding_0) => { + crate::Expr::Unsafe(_binding_0) => { full!(v.visit_expr_unsafe(_binding_0)); } - Expr::Verbatim(_binding_0) => { + crate::Expr::Verbatim(_binding_0) => { skip!(_binding_0); } - Expr::While(_binding_0) => { + crate::Expr::While(_binding_0) => { full!(v.visit_expr_while(_binding_0)); } - Expr::Yield(_binding_0) => { + crate::Expr::Yield(_binding_0) => { full!(v.visit_expr_yield(_binding_0)); } } } #[cfg(feature = "full")] -pub fn visit_expr_array<'ast, V>(v: &mut V, node: &'ast ExprArray) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_array<'ast, V>(v: &mut V, node: &'ast crate::ExprArray) where V: Visit<'ast> + ?Sized, { @@ -1215,7 +1406,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_expr_assign<'ast, V>(v: &mut V, node: &'ast ExprAssign) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_assign<'ast, V>(v: &mut V, node: &'ast crate::ExprAssign) where V: Visit<'ast> + ?Sized, { @@ -1227,7 +1419,8 @@ where v.visit_expr(&*node.right); } #[cfg(feature = "full")] -pub fn visit_expr_async<'ast, V>(v: &mut V, node: &'ast ExprAsync) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_async<'ast, V>(v: &mut V, node: &'ast crate::ExprAsync) where V: Visit<'ast> + ?Sized, { @@ -1239,7 +1432,8 @@ where v.visit_block(&node.block); } #[cfg(feature = "full")] -pub fn visit_expr_await<'ast, V>(v: &mut V, node: &'ast ExprAwait) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_await<'ast, V>(v: &mut V, node: &'ast crate::ExprAwait) where V: Visit<'ast> + ?Sized, { @@ -1251,7 +1445,8 @@ where skip!(node.await_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_binary<'ast, V>(v: &mut V, node: &'ast ExprBinary) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_binary<'ast, V>(v: &mut V, node: &'ast crate::ExprBinary) where V: Visit<'ast> + ?Sized, { @@ -1263,7 +1458,8 @@ where v.visit_expr(&*node.right); } #[cfg(feature = "full")] -pub fn visit_expr_block<'ast, V>(v: &mut V, node: &'ast ExprBlock) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_block<'ast, V>(v: &mut V, node: &'ast crate::ExprBlock) where V: Visit<'ast> + ?Sized, { @@ -1276,7 +1472,8 @@ where v.visit_block(&node.block); } #[cfg(feature = "full")] -pub fn visit_expr_break<'ast, V>(v: &mut V, node: &'ast ExprBreak) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_break<'ast, V>(v: &mut V, node: &'ast crate::ExprBreak) where V: Visit<'ast> + ?Sized, { @@ -1292,7 +1489,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_call<'ast, V>(v: &mut V, node: &'ast ExprCall) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_call<'ast, V>(v: &mut V, node: &'ast crate::ExprCall) where V: Visit<'ast> + ?Sized, { @@ -1307,7 +1505,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_cast<'ast, V>(v: &mut V, node: &'ast ExprCast) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_cast<'ast, V>(v: &mut V, node: &'ast crate::ExprCast) where V: Visit<'ast> + ?Sized, { @@ -1319,7 +1518,8 @@ where v.visit_type(&*node.ty); } #[cfg(feature = "full")] -pub fn visit_expr_closure<'ast, V>(v: &mut V, node: &'ast ExprClosure) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_closure<'ast, V>(v: &mut V, node: &'ast crate::ExprClosure) where V: Visit<'ast> + ?Sized, { @@ -1343,7 +1543,8 @@ where v.visit_expr(&*node.body); } #[cfg(feature = "full")] -pub fn visit_expr_const<'ast, V>(v: &mut V, node: &'ast ExprConst) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_const<'ast, V>(v: &mut V, node: &'ast crate::ExprConst) where V: Visit<'ast> + ?Sized, { @@ -1354,7 +1555,8 @@ where v.visit_block(&node.block); } #[cfg(feature = "full")] -pub fn visit_expr_continue<'ast, V>(v: &mut V, node: &'ast ExprContinue) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_continue<'ast, V>(v: &mut V, node: &'ast crate::ExprContinue) where V: Visit<'ast> + ?Sized, { @@ -1367,7 +1569,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_field<'ast, V>(v: &mut V, node: &'ast ExprField) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_field<'ast, V>(v: &mut V, node: &'ast crate::ExprField) where V: Visit<'ast> + ?Sized, { @@ -1379,7 +1582,8 @@ where v.visit_member(&node.member); } #[cfg(feature = "full")] -pub fn visit_expr_for_loop<'ast, V>(v: &mut V, node: &'ast ExprForLoop) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_for_loop<'ast, V>(v: &mut V, node: &'ast crate::ExprForLoop) where V: Visit<'ast> + ?Sized, { @@ -1396,7 +1600,8 @@ where v.visit_block(&node.body); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_group<'ast, V>(v: &mut V, node: &'ast ExprGroup) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_group<'ast, V>(v: &mut V, node: &'ast crate::ExprGroup) where V: Visit<'ast> + ?Sized, { @@ -1407,7 +1612,8 @@ where v.visit_expr(&*node.expr); } #[cfg(feature = "full")] -pub fn visit_expr_if<'ast, V>(v: &mut V, node: &'ast ExprIf) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_if<'ast, V>(v: &mut V, node: &'ast crate::ExprIf) where V: Visit<'ast> + ?Sized, { @@ -1423,7 +1629,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_index<'ast, V>(v: &mut V, node: &'ast ExprIndex) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_index<'ast, V>(v: &mut V, node: &'ast crate::ExprIndex) where V: Visit<'ast> + ?Sized, { @@ -1435,7 +1642,8 @@ where v.visit_expr(&*node.index); } #[cfg(feature = "full")] -pub fn visit_expr_infer<'ast, V>(v: &mut V, node: &'ast ExprInfer) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_infer<'ast, V>(v: &mut V, node: &'ast crate::ExprInfer) where V: Visit<'ast> + ?Sized, { @@ -1445,7 +1653,8 @@ where skip!(node.underscore_token); } #[cfg(feature = "full")] -pub fn visit_expr_let<'ast, V>(v: &mut V, node: &'ast ExprLet) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_let<'ast, V>(v: &mut V, node: &'ast crate::ExprLet) where V: Visit<'ast> + ?Sized, { @@ -1458,7 +1667,8 @@ where v.visit_expr(&*node.expr); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_lit<'ast, V>(v: &mut V, node: &'ast ExprLit) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_lit<'ast, V>(v: &mut V, node: &'ast crate::ExprLit) where V: Visit<'ast> + ?Sized, { @@ -1468,7 +1678,8 @@ where v.visit_lit(&node.lit); } #[cfg(feature = "full")] -pub fn visit_expr_loop<'ast, V>(v: &mut V, node: &'ast ExprLoop) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_loop<'ast, V>(v: &mut V, node: &'ast crate::ExprLoop) where V: Visit<'ast> + ?Sized, { @@ -1482,7 +1693,8 @@ where v.visit_block(&node.body); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_macro<'ast, V>(v: &mut V, node: &'ast ExprMacro) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_macro<'ast, V>(v: &mut V, node: &'ast crate::ExprMacro) where V: Visit<'ast> + ?Sized, { @@ -1492,7 +1704,8 @@ where v.visit_macro(&node.mac); } #[cfg(feature = "full")] -pub fn visit_expr_match<'ast, V>(v: &mut V, node: &'ast ExprMatch) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_match<'ast, V>(v: &mut V, node: &'ast crate::ExprMatch) where V: Visit<'ast> + ?Sized, { @@ -1506,8 +1719,9 @@ where v.visit_arm(it); } } -#[cfg(feature = "full")] -pub fn visit_expr_method_call<'ast, V>(v: &mut V, node: &'ast ExprMethodCall) +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_method_call<'ast, V>(v: &mut V, node: &'ast crate::ExprMethodCall) where V: Visit<'ast> + ?Sized, { @@ -1527,7 +1741,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_paren<'ast, V>(v: &mut V, node: &'ast ExprParen) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_paren<'ast, V>(v: &mut V, node: &'ast crate::ExprParen) where V: Visit<'ast> + ?Sized, { @@ -1538,7 +1753,8 @@ where v.visit_expr(&*node.expr); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_path<'ast, V>(v: &mut V, node: &'ast ExprPath) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_path<'ast, V>(v: &mut V, node: &'ast crate::ExprPath) where V: Visit<'ast> + ?Sized, { @@ -1551,7 +1767,8 @@ where v.visit_path(&node.path); } #[cfg(feature = "full")] -pub fn visit_expr_range<'ast, V>(v: &mut V, node: &'ast ExprRange) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_range<'ast, V>(v: &mut V, node: &'ast crate::ExprRange) where V: Visit<'ast> + ?Sized, { @@ -1566,8 +1783,9 @@ where v.visit_expr(&**it); } } -#[cfg(feature = "full")] -pub fn visit_expr_reference<'ast, V>(v: &mut V, node: &'ast ExprReference) +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_reference<'ast, V>(v: &mut V, node: &'ast crate::ExprReference) where V: Visit<'ast> + ?Sized, { @@ -1579,7 +1797,8 @@ where v.visit_expr(&*node.expr); } #[cfg(feature = "full")] -pub fn visit_expr_repeat<'ast, V>(v: &mut V, node: &'ast ExprRepeat) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_repeat<'ast, V>(v: &mut V, node: &'ast crate::ExprRepeat) where V: Visit<'ast> + ?Sized, { @@ -1592,7 +1811,8 @@ where v.visit_expr(&*node.len); } #[cfg(feature = "full")] -pub fn visit_expr_return<'ast, V>(v: &mut V, node: &'ast ExprReturn) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_return<'ast, V>(v: &mut V, node: &'ast crate::ExprReturn) where V: Visit<'ast> + ?Sized, { @@ -1604,8 +1824,9 @@ where v.visit_expr(&**it); } } -#[cfg(feature = "full")] -pub fn visit_expr_struct<'ast, V>(v: &mut V, node: &'ast ExprStruct) +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_struct<'ast, V>(v: &mut V, node: &'ast crate::ExprStruct) where V: Visit<'ast> + ?Sized, { @@ -1627,7 +1848,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_expr_try<'ast, V>(v: &mut V, node: &'ast ExprTry) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_try<'ast, V>(v: &mut V, node: &'ast crate::ExprTry) where V: Visit<'ast> + ?Sized, { @@ -1638,7 +1860,8 @@ where skip!(node.question_token); } #[cfg(feature = "full")] -pub fn visit_expr_try_block<'ast, V>(v: &mut V, node: &'ast ExprTryBlock) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_try_block<'ast, V>(v: &mut V, node: &'ast crate::ExprTryBlock) where V: Visit<'ast> + ?Sized, { @@ -1649,7 +1872,8 @@ where v.visit_block(&node.block); } #[cfg(feature = "full")] -pub fn visit_expr_tuple<'ast, V>(v: &mut V, node: &'ast ExprTuple) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_tuple<'ast, V>(v: &mut V, node: &'ast crate::ExprTuple) where V: Visit<'ast> + ?Sized, { @@ -1663,7 +1887,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_unary<'ast, V>(v: &mut V, node: &'ast ExprUnary) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_unary<'ast, V>(v: &mut V, node: &'ast crate::ExprUnary) where V: Visit<'ast> + ?Sized, { @@ -1674,7 +1899,8 @@ where v.visit_expr(&*node.expr); } #[cfg(feature = "full")] -pub fn visit_expr_unsafe<'ast, V>(v: &mut V, node: &'ast ExprUnsafe) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_unsafe<'ast, V>(v: &mut V, node: &'ast crate::ExprUnsafe) where V: Visit<'ast> + ?Sized, { @@ -1685,7 +1911,8 @@ where v.visit_block(&node.block); } #[cfg(feature = "full")] -pub fn visit_expr_while<'ast, V>(v: &mut V, node: &'ast ExprWhile) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_while<'ast, V>(v: &mut V, node: &'ast crate::ExprWhile) where V: Visit<'ast> + ?Sized, { @@ -1700,7 +1927,8 @@ where v.visit_block(&node.body); } #[cfg(feature = "full")] -pub fn visit_expr_yield<'ast, V>(v: &mut V, node: &'ast ExprYield) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_yield<'ast, V>(v: &mut V, node: &'ast crate::ExprYield) where V: Visit<'ast> + ?Sized, { @@ -1713,7 +1941,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_field<'ast, V>(v: &mut V, node: &'ast Field) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_field<'ast, V>(v: &mut V, node: &'ast crate::Field) where V: Visit<'ast> + ?Sized, { @@ -1729,16 +1958,18 @@ where v.visit_type(&node.ty); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_field_mutability<'ast, V>(v: &mut V, node: &'ast FieldMutability) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_field_mutability<'ast, V>(v: &mut V, node: &'ast crate::FieldMutability) where V: Visit<'ast> + ?Sized, { match node { - FieldMutability::None => {} + crate::FieldMutability::None => {} } } #[cfg(feature = "full")] -pub fn visit_field_pat<'ast, V>(v: &mut V, node: &'ast FieldPat) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_field_pat<'ast, V>(v: &mut V, node: &'ast crate::FieldPat) where V: Visit<'ast> + ?Sized, { @@ -1749,8 +1980,9 @@ where skip!(node.colon_token); v.visit_pat(&*node.pat); } -#[cfg(feature = "full")] -pub fn visit_field_value<'ast, V>(v: &mut V, node: &'ast FieldValue) +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_field_value<'ast, V>(v: &mut V, node: &'ast crate::FieldValue) where V: Visit<'ast> + ?Sized, { @@ -1762,22 +1994,24 @@ where v.visit_expr(&node.expr); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_fields<'ast, V>(v: &mut V, node: &'ast Fields) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_fields<'ast, V>(v: &mut V, node: &'ast crate::Fields) where V: Visit<'ast> + ?Sized, { match node { - Fields::Named(_binding_0) => { + crate::Fields::Named(_binding_0) => { v.visit_fields_named(_binding_0); } - Fields::Unnamed(_binding_0) => { + crate::Fields::Unnamed(_binding_0) => { v.visit_fields_unnamed(_binding_0); } - Fields::Unit => {} + crate::Fields::Unit => {} } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_fields_named<'ast, V>(v: &mut V, node: &'ast FieldsNamed) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_fields_named<'ast, V>(v: &mut V, node: &'ast crate::FieldsNamed) where V: Visit<'ast> + ?Sized, { @@ -1788,7 +2022,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_fields_unnamed<'ast, V>(v: &mut V, node: &'ast FieldsUnnamed) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_fields_unnamed<'ast, V>(v: &mut V, node: &'ast crate::FieldsUnnamed) where V: Visit<'ast> + ?Sized, { @@ -1799,7 +2034,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_file<'ast, V>(v: &mut V, node: &'ast File) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_file<'ast, V>(v: &mut V, node: &'ast crate::File) where V: Visit<'ast> + ?Sized, { @@ -1812,44 +2048,47 @@ where } } #[cfg(feature = "full")] -pub fn visit_fn_arg<'ast, V>(v: &mut V, node: &'ast FnArg) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_fn_arg<'ast, V>(v: &mut V, node: &'ast crate::FnArg) where V: Visit<'ast> + ?Sized, { match node { - FnArg::Receiver(_binding_0) => { + crate::FnArg::Receiver(_binding_0) => { v.visit_receiver(_binding_0); } - FnArg::Typed(_binding_0) => { + crate::FnArg::Typed(_binding_0) => { v.visit_pat_type(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_foreign_item<'ast, V>(v: &mut V, node: &'ast ForeignItem) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item<'ast, V>(v: &mut V, node: &'ast crate::ForeignItem) where V: Visit<'ast> + ?Sized, { match node { - ForeignItem::Fn(_binding_0) => { + crate::ForeignItem::Fn(_binding_0) => { v.visit_foreign_item_fn(_binding_0); } - ForeignItem::Static(_binding_0) => { + crate::ForeignItem::Static(_binding_0) => { v.visit_foreign_item_static(_binding_0); } - ForeignItem::Type(_binding_0) => { + crate::ForeignItem::Type(_binding_0) => { v.visit_foreign_item_type(_binding_0); } - ForeignItem::Macro(_binding_0) => { + crate::ForeignItem::Macro(_binding_0) => { v.visit_foreign_item_macro(_binding_0); } - ForeignItem::Verbatim(_binding_0) => { + crate::ForeignItem::Verbatim(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_foreign_item_fn<'ast, V>(v: &mut V, node: &'ast ForeignItemFn) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_fn<'ast, V>(v: &mut V, node: &'ast crate::ForeignItemFn) where V: Visit<'ast> + ?Sized, { @@ -1861,7 +2100,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_foreign_item_macro<'ast, V>(v: &mut V, node: &'ast ForeignItemMacro) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_macro<'ast, V>(v: &mut V, node: &'ast crate::ForeignItemMacro) where V: Visit<'ast> + ?Sized, { @@ -1872,7 +2112,11 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_foreign_item_static<'ast, V>(v: &mut V, node: &'ast ForeignItemStatic) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_static<'ast, V>( + v: &mut V, + node: &'ast crate::ForeignItemStatic, +) where V: Visit<'ast> + ?Sized, { @@ -1888,7 +2132,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_foreign_item_type<'ast, V>(v: &mut V, node: &'ast ForeignItemType) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_type<'ast, V>(v: &mut V, node: &'ast crate::ForeignItemType) where V: Visit<'ast> + ?Sized, { @@ -1902,50 +2147,53 @@ where skip!(node.semi_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_generic_argument<'ast, V>(v: &mut V, node: &'ast GenericArgument) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_generic_argument<'ast, V>(v: &mut V, node: &'ast crate::GenericArgument) where V: Visit<'ast> + ?Sized, { match node { - GenericArgument::Lifetime(_binding_0) => { + crate::GenericArgument::Lifetime(_binding_0) => { v.visit_lifetime(_binding_0); } - GenericArgument::Type(_binding_0) => { + crate::GenericArgument::Type(_binding_0) => { v.visit_type(_binding_0); } - GenericArgument::Const(_binding_0) => { + crate::GenericArgument::Const(_binding_0) => { v.visit_expr(_binding_0); } - GenericArgument::AssocType(_binding_0) => { + crate::GenericArgument::AssocType(_binding_0) => { v.visit_assoc_type(_binding_0); } - GenericArgument::AssocConst(_binding_0) => { + crate::GenericArgument::AssocConst(_binding_0) => { v.visit_assoc_const(_binding_0); } - GenericArgument::Constraint(_binding_0) => { + crate::GenericArgument::Constraint(_binding_0) => { v.visit_constraint(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_generic_param<'ast, V>(v: &mut V, node: &'ast GenericParam) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_generic_param<'ast, V>(v: &mut V, node: &'ast crate::GenericParam) where V: Visit<'ast> + ?Sized, { match node { - GenericParam::Lifetime(_binding_0) => { + crate::GenericParam::Lifetime(_binding_0) => { v.visit_lifetime_param(_binding_0); } - GenericParam::Type(_binding_0) => { + crate::GenericParam::Type(_binding_0) => { v.visit_type_param(_binding_0); } - GenericParam::Const(_binding_0) => { + crate::GenericParam::Const(_binding_0) => { v.visit_const_param(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_generics<'ast, V>(v: &mut V, node: &'ast Generics) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_generics<'ast, V>(v: &mut V, node: &'ast crate::Generics) where V: Visit<'ast> + ?Sized, { @@ -1959,37 +2207,39 @@ where v.visit_where_clause(it); } } -pub fn visit_ident<'ast, V>(v: &mut V, node: &'ast Ident) +pub fn visit_ident<'ast, V>(v: &mut V, node: &'ast proc_macro2::Ident) where V: Visit<'ast> + ?Sized, { v.visit_span(&node.span()); } #[cfg(feature = "full")] -pub fn visit_impl_item<'ast, V>(v: &mut V, node: &'ast ImplItem) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item<'ast, V>(v: &mut V, node: &'ast crate::ImplItem) where V: Visit<'ast> + ?Sized, { match node { - ImplItem::Const(_binding_0) => { + crate::ImplItem::Const(_binding_0) => { v.visit_impl_item_const(_binding_0); } - ImplItem::Fn(_binding_0) => { + crate::ImplItem::Fn(_binding_0) => { v.visit_impl_item_fn(_binding_0); } - ImplItem::Type(_binding_0) => { + crate::ImplItem::Type(_binding_0) => { v.visit_impl_item_type(_binding_0); } - ImplItem::Macro(_binding_0) => { + crate::ImplItem::Macro(_binding_0) => { v.visit_impl_item_macro(_binding_0); } - ImplItem::Verbatim(_binding_0) => { + crate::ImplItem::Verbatim(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_impl_item_const<'ast, V>(v: &mut V, node: &'ast ImplItemConst) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_const<'ast, V>(v: &mut V, node: &'ast crate::ImplItemConst) where V: Visit<'ast> + ?Sized, { @@ -2008,7 +2258,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_impl_item_fn<'ast, V>(v: &mut V, node: &'ast ImplItemFn) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_fn<'ast, V>(v: &mut V, node: &'ast crate::ImplItemFn) where V: Visit<'ast> + ?Sized, { @@ -2021,7 +2272,8 @@ where v.visit_block(&node.block); } #[cfg(feature = "full")] -pub fn visit_impl_item_macro<'ast, V>(v: &mut V, node: &'ast ImplItemMacro) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_macro<'ast, V>(v: &mut V, node: &'ast crate::ImplItemMacro) where V: Visit<'ast> + ?Sized, { @@ -2032,7 +2284,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_impl_item_type<'ast, V>(v: &mut V, node: &'ast ImplItemType) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_type<'ast, V>(v: &mut V, node: &'ast crate::ImplItemType) where V: Visit<'ast> + ?Sized, { @@ -2049,14 +2302,16 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_impl_restriction<'ast, V>(v: &mut V, node: &'ast ImplRestriction) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_restriction<'ast, V>(v: &mut V, node: &'ast crate::ImplRestriction) where V: Visit<'ast> + ?Sized, { match *node {} } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_index<'ast, V>(v: &mut V, node: &'ast Index) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_index<'ast, V>(v: &mut V, node: &'ast crate::Index) where V: Visit<'ast> + ?Sized, { @@ -2064,63 +2319,65 @@ where v.visit_span(&node.span); } #[cfg(feature = "full")] -pub fn visit_item<'ast, V>(v: &mut V, node: &'ast Item) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item<'ast, V>(v: &mut V, node: &'ast crate::Item) where V: Visit<'ast> + ?Sized, { match node { - Item::Const(_binding_0) => { + crate::Item::Const(_binding_0) => { v.visit_item_const(_binding_0); } - Item::Enum(_binding_0) => { + crate::Item::Enum(_binding_0) => { v.visit_item_enum(_binding_0); } - Item::ExternCrate(_binding_0) => { + crate::Item::ExternCrate(_binding_0) => { v.visit_item_extern_crate(_binding_0); } - Item::Fn(_binding_0) => { + crate::Item::Fn(_binding_0) => { v.visit_item_fn(_binding_0); } - Item::ForeignMod(_binding_0) => { + crate::Item::ForeignMod(_binding_0) => { v.visit_item_foreign_mod(_binding_0); } - Item::Impl(_binding_0) => { + crate::Item::Impl(_binding_0) => { v.visit_item_impl(_binding_0); } - Item::Macro(_binding_0) => { + crate::Item::Macro(_binding_0) => { v.visit_item_macro(_binding_0); } - Item::Mod(_binding_0) => { + crate::Item::Mod(_binding_0) => { v.visit_item_mod(_binding_0); } - Item::Static(_binding_0) => { + crate::Item::Static(_binding_0) => { v.visit_item_static(_binding_0); } - Item::Struct(_binding_0) => { + crate::Item::Struct(_binding_0) => { v.visit_item_struct(_binding_0); } - Item::Trait(_binding_0) => { + crate::Item::Trait(_binding_0) => { v.visit_item_trait(_binding_0); } - Item::TraitAlias(_binding_0) => { + crate::Item::TraitAlias(_binding_0) => { v.visit_item_trait_alias(_binding_0); } - Item::Type(_binding_0) => { + crate::Item::Type(_binding_0) => { v.visit_item_type(_binding_0); } - Item::Union(_binding_0) => { + crate::Item::Union(_binding_0) => { v.visit_item_union(_binding_0); } - Item::Use(_binding_0) => { + crate::Item::Use(_binding_0) => { v.visit_item_use(_binding_0); } - Item::Verbatim(_binding_0) => { + crate::Item::Verbatim(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_item_const<'ast, V>(v: &mut V, node: &'ast ItemConst) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_const<'ast, V>(v: &mut V, node: &'ast crate::ItemConst) where V: Visit<'ast> + ?Sized, { @@ -2138,7 +2395,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_item_enum<'ast, V>(v: &mut V, node: &'ast ItemEnum) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_enum<'ast, V>(v: &mut V, node: &'ast crate::ItemEnum) where V: Visit<'ast> + ?Sized, { @@ -2156,7 +2414,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_item_extern_crate<'ast, V>(v: &mut V, node: &'ast ItemExternCrate) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_extern_crate<'ast, V>(v: &mut V, node: &'ast crate::ItemExternCrate) where V: Visit<'ast> + ?Sized, { @@ -2174,7 +2433,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_item_fn<'ast, V>(v: &mut V, node: &'ast ItemFn) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_fn<'ast, V>(v: &mut V, node: &'ast crate::ItemFn) where V: Visit<'ast> + ?Sized, { @@ -2186,7 +2446,8 @@ where v.visit_block(&*node.block); } #[cfg(feature = "full")] -pub fn visit_item_foreign_mod<'ast, V>(v: &mut V, node: &'ast ItemForeignMod) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_foreign_mod<'ast, V>(v: &mut V, node: &'ast crate::ItemForeignMod) where V: Visit<'ast> + ?Sized, { @@ -2201,7 +2462,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_item_impl<'ast, V>(v: &mut V, node: &'ast ItemImpl) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_impl<'ast, V>(v: &mut V, node: &'ast crate::ItemImpl) where V: Visit<'ast> + ?Sized, { @@ -2224,7 +2486,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_item_macro<'ast, V>(v: &mut V, node: &'ast ItemMacro) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_macro<'ast, V>(v: &mut V, node: &'ast crate::ItemMacro) where V: Visit<'ast> + ?Sized, { @@ -2238,7 +2501,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_item_mod<'ast, V>(v: &mut V, node: &'ast ItemMod) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_mod<'ast, V>(v: &mut V, node: &'ast crate::ItemMod) where V: Visit<'ast> + ?Sized, { @@ -2258,7 +2522,8 @@ where skip!(node.semi); } #[cfg(feature = "full")] -pub fn visit_item_static<'ast, V>(v: &mut V, node: &'ast ItemStatic) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_static<'ast, V>(v: &mut V, node: &'ast crate::ItemStatic) where V: Visit<'ast> + ?Sized, { @@ -2276,7 +2541,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_item_struct<'ast, V>(v: &mut V, node: &'ast ItemStruct) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_struct<'ast, V>(v: &mut V, node: &'ast crate::ItemStruct) where V: Visit<'ast> + ?Sized, { @@ -2291,7 +2557,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_item_trait<'ast, V>(v: &mut V, node: &'ast ItemTrait) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_trait<'ast, V>(v: &mut V, node: &'ast crate::ItemTrait) where V: Visit<'ast> + ?Sized, { @@ -2318,7 +2585,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_item_trait_alias<'ast, V>(v: &mut V, node: &'ast ItemTraitAlias) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_trait_alias<'ast, V>(v: &mut V, node: &'ast crate::ItemTraitAlias) where V: Visit<'ast> + ?Sized, { @@ -2337,7 +2605,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_item_type<'ast, V>(v: &mut V, node: &'ast ItemType) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_type<'ast, V>(v: &mut V, node: &'ast crate::ItemType) where V: Visit<'ast> + ?Sized, { @@ -2353,7 +2622,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_item_union<'ast, V>(v: &mut V, node: &'ast ItemUnion) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_union<'ast, V>(v: &mut V, node: &'ast crate::ItemUnion) where V: Visit<'ast> + ?Sized, { @@ -2367,7 +2637,8 @@ where v.visit_fields_named(&node.fields); } #[cfg(feature = "full")] -pub fn visit_item_use<'ast, V>(v: &mut V, node: &'ast ItemUse) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_use<'ast, V>(v: &mut V, node: &'ast crate::ItemUse) where V: Visit<'ast> + ?Sized, { @@ -2381,14 +2652,15 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_label<'ast, V>(v: &mut V, node: &'ast Label) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_label<'ast, V>(v: &mut V, node: &'ast crate::Label) where V: Visit<'ast> + ?Sized, { v.visit_lifetime(&node.name); skip!(node.colon_token); } -pub fn visit_lifetime<'ast, V>(v: &mut V, node: &'ast Lifetime) +pub fn visit_lifetime<'ast, V>(v: &mut V, node: &'ast crate::Lifetime) where V: Visit<'ast> + ?Sized, { @@ -2396,7 +2668,8 @@ where v.visit_ident(&node.ident); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_lifetime_param<'ast, V>(v: &mut V, node: &'ast LifetimeParam) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_lifetime_param<'ast, V>(v: &mut V, node: &'ast crate::LifetimeParam) where V: Visit<'ast> + ?Sized, { @@ -2410,70 +2683,71 @@ where v.visit_lifetime(it); } } -pub fn visit_lit<'ast, V>(v: &mut V, node: &'ast Lit) +pub fn visit_lit<'ast, V>(v: &mut V, node: &'ast crate::Lit) where V: Visit<'ast> + ?Sized, { match node { - Lit::Str(_binding_0) => { + crate::Lit::Str(_binding_0) => { v.visit_lit_str(_binding_0); } - Lit::ByteStr(_binding_0) => { + crate::Lit::ByteStr(_binding_0) => { v.visit_lit_byte_str(_binding_0); } - Lit::Byte(_binding_0) => { + crate::Lit::Byte(_binding_0) => { v.visit_lit_byte(_binding_0); } - Lit::Char(_binding_0) => { + crate::Lit::Char(_binding_0) => { v.visit_lit_char(_binding_0); } - Lit::Int(_binding_0) => { + crate::Lit::Int(_binding_0) => { v.visit_lit_int(_binding_0); } - Lit::Float(_binding_0) => { + crate::Lit::Float(_binding_0) => { v.visit_lit_float(_binding_0); } - Lit::Bool(_binding_0) => { + crate::Lit::Bool(_binding_0) => { v.visit_lit_bool(_binding_0); } - Lit::Verbatim(_binding_0) => { + crate::Lit::Verbatim(_binding_0) => { skip!(_binding_0); } } } -pub fn visit_lit_bool<'ast, V>(v: &mut V, node: &'ast LitBool) +pub fn visit_lit_bool<'ast, V>(v: &mut V, node: &'ast crate::LitBool) where V: Visit<'ast> + ?Sized, { skip!(node.value); v.visit_span(&node.span); } -pub fn visit_lit_byte<'ast, V>(v: &mut V, node: &'ast LitByte) +pub fn visit_lit_byte<'ast, V>(v: &mut V, node: &'ast crate::LitByte) where V: Visit<'ast> + ?Sized, {} -pub fn visit_lit_byte_str<'ast, V>(v: &mut V, node: &'ast LitByteStr) +pub fn visit_lit_byte_str<'ast, V>(v: &mut V, node: &'ast crate::LitByteStr) where V: Visit<'ast> + ?Sized, {} -pub fn visit_lit_char<'ast, V>(v: &mut V, node: &'ast LitChar) +pub fn visit_lit_char<'ast, V>(v: &mut V, node: &'ast crate::LitChar) where V: Visit<'ast> + ?Sized, {} -pub fn visit_lit_float<'ast, V>(v: &mut V, node: &'ast LitFloat) +pub fn visit_lit_float<'ast, V>(v: &mut V, node: &'ast crate::LitFloat) where V: Visit<'ast> + ?Sized, {} -pub fn visit_lit_int<'ast, V>(v: &mut V, node: &'ast LitInt) +pub fn visit_lit_int<'ast, V>(v: &mut V, node: &'ast crate::LitInt) where V: Visit<'ast> + ?Sized, {} -pub fn visit_lit_str<'ast, V>(v: &mut V, node: &'ast LitStr) +pub fn visit_lit_str<'ast, V>(v: &mut V, node: &'ast crate::LitStr) where V: Visit<'ast> + ?Sized, {} #[cfg(feature = "full")] -pub fn visit_local<'ast, V>(v: &mut V, node: &'ast Local) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_local<'ast, V>(v: &mut V, node: &'ast crate::Local) where V: Visit<'ast> + ?Sized, { @@ -2488,7 +2762,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_local_init<'ast, V>(v: &mut V, node: &'ast LocalInit) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_local_init<'ast, V>(v: &mut V, node: &'ast crate::LocalInit) where V: Visit<'ast> + ?Sized, { @@ -2500,7 +2775,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_macro<'ast, V>(v: &mut V, node: &'ast Macro) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_macro<'ast, V>(v: &mut V, node: &'ast crate::Macro) where V: Visit<'ast> + ?Sized, { @@ -2510,55 +2786,59 @@ where skip!(node.tokens); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_macro_delimiter<'ast, V>(v: &mut V, node: &'ast MacroDelimiter) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_macro_delimiter<'ast, V>(v: &mut V, node: &'ast crate::MacroDelimiter) where V: Visit<'ast> + ?Sized, { match node { - MacroDelimiter::Paren(_binding_0) => { + crate::MacroDelimiter::Paren(_binding_0) => { skip!(_binding_0); } - MacroDelimiter::Brace(_binding_0) => { + crate::MacroDelimiter::Brace(_binding_0) => { skip!(_binding_0); } - MacroDelimiter::Bracket(_binding_0) => { + crate::MacroDelimiter::Bracket(_binding_0) => { skip!(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_member<'ast, V>(v: &mut V, node: &'ast Member) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_member<'ast, V>(v: &mut V, node: &'ast crate::Member) where V: Visit<'ast> + ?Sized, { match node { - Member::Named(_binding_0) => { + crate::Member::Named(_binding_0) => { v.visit_ident(_binding_0); } - Member::Unnamed(_binding_0) => { + crate::Member::Unnamed(_binding_0) => { v.visit_index(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_meta<'ast, V>(v: &mut V, node: &'ast Meta) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_meta<'ast, V>(v: &mut V, node: &'ast crate::Meta) where V: Visit<'ast> + ?Sized, { match node { - Meta::Path(_binding_0) => { + crate::Meta::Path(_binding_0) => { v.visit_path(_binding_0); } - Meta::List(_binding_0) => { + crate::Meta::List(_binding_0) => { v.visit_meta_list(_binding_0); } - Meta::NameValue(_binding_0) => { + crate::Meta::NameValue(_binding_0) => { v.visit_meta_name_value(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_meta_list<'ast, V>(v: &mut V, node: &'ast MetaList) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_meta_list<'ast, V>(v: &mut V, node: &'ast crate::MetaList) where V: Visit<'ast> + ?Sized, { @@ -2567,7 +2847,8 @@ where skip!(node.tokens); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_meta_name_value<'ast, V>(v: &mut V, node: &'ast MetaNameValue) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_meta_name_value<'ast, V>(v: &mut V, node: &'ast crate::MetaNameValue) where V: Visit<'ast> + ?Sized, { @@ -2576,9 +2857,10 @@ where v.visit_expr(&node.value); } #[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] pub fn visit_parenthesized_generic_arguments<'ast, V>( v: &mut V, - node: &'ast ParenthesizedGenericArguments, + node: &'ast crate::ParenthesizedGenericArguments, ) where V: Visit<'ast> + ?Sized, @@ -2591,66 +2873,68 @@ where v.visit_return_type(&node.output); } #[cfg(feature = "full")] -pub fn visit_pat<'ast, V>(v: &mut V, node: &'ast Pat) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat<'ast, V>(v: &mut V, node: &'ast crate::Pat) where V: Visit<'ast> + ?Sized, { match node { - Pat::Const(_binding_0) => { + crate::Pat::Const(_binding_0) => { v.visit_expr_const(_binding_0); } - Pat::Ident(_binding_0) => { + crate::Pat::Ident(_binding_0) => { v.visit_pat_ident(_binding_0); } - Pat::Lit(_binding_0) => { + crate::Pat::Lit(_binding_0) => { v.visit_expr_lit(_binding_0); } - Pat::Macro(_binding_0) => { + crate::Pat::Macro(_binding_0) => { v.visit_expr_macro(_binding_0); } - Pat::Or(_binding_0) => { + crate::Pat::Or(_binding_0) => { v.visit_pat_or(_binding_0); } - Pat::Paren(_binding_0) => { + crate::Pat::Paren(_binding_0) => { v.visit_pat_paren(_binding_0); } - Pat::Path(_binding_0) => { + crate::Pat::Path(_binding_0) => { v.visit_expr_path(_binding_0); } - Pat::Range(_binding_0) => { + crate::Pat::Range(_binding_0) => { v.visit_expr_range(_binding_0); } - Pat::Reference(_binding_0) => { + crate::Pat::Reference(_binding_0) => { v.visit_pat_reference(_binding_0); } - Pat::Rest(_binding_0) => { + crate::Pat::Rest(_binding_0) => { v.visit_pat_rest(_binding_0); } - Pat::Slice(_binding_0) => { + crate::Pat::Slice(_binding_0) => { v.visit_pat_slice(_binding_0); } - Pat::Struct(_binding_0) => { + crate::Pat::Struct(_binding_0) => { v.visit_pat_struct(_binding_0); } - Pat::Tuple(_binding_0) => { + crate::Pat::Tuple(_binding_0) => { v.visit_pat_tuple(_binding_0); } - Pat::TupleStruct(_binding_0) => { + crate::Pat::TupleStruct(_binding_0) => { v.visit_pat_tuple_struct(_binding_0); } - Pat::Type(_binding_0) => { + crate::Pat::Type(_binding_0) => { v.visit_pat_type(_binding_0); } - Pat::Verbatim(_binding_0) => { + crate::Pat::Verbatim(_binding_0) => { skip!(_binding_0); } - Pat::Wild(_binding_0) => { + crate::Pat::Wild(_binding_0) => { v.visit_pat_wild(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_pat_ident<'ast, V>(v: &mut V, node: &'ast PatIdent) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_ident<'ast, V>(v: &mut V, node: &'ast crate::PatIdent) where V: Visit<'ast> + ?Sized, { @@ -2666,7 +2950,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_pat_or<'ast, V>(v: &mut V, node: &'ast PatOr) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_or<'ast, V>(v: &mut V, node: &'ast crate::PatOr) where V: Visit<'ast> + ?Sized, { @@ -2680,7 +2965,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_pat_paren<'ast, V>(v: &mut V, node: &'ast PatParen) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_paren<'ast, V>(v: &mut V, node: &'ast crate::PatParen) where V: Visit<'ast> + ?Sized, { @@ -2691,7 +2977,8 @@ where v.visit_pat(&*node.pat); } #[cfg(feature = "full")] -pub fn visit_pat_reference<'ast, V>(v: &mut V, node: &'ast PatReference) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_reference<'ast, V>(v: &mut V, node: &'ast crate::PatReference) where V: Visit<'ast> + ?Sized, { @@ -2703,7 +2990,8 @@ where v.visit_pat(&*node.pat); } #[cfg(feature = "full")] -pub fn visit_pat_rest<'ast, V>(v: &mut V, node: &'ast PatRest) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_rest<'ast, V>(v: &mut V, node: &'ast crate::PatRest) where V: Visit<'ast> + ?Sized, { @@ -2713,7 +3001,8 @@ where skip!(node.dot2_token); } #[cfg(feature = "full")] -pub fn visit_pat_slice<'ast, V>(v: &mut V, node: &'ast PatSlice) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_slice<'ast, V>(v: &mut V, node: &'ast crate::PatSlice) where V: Visit<'ast> + ?Sized, { @@ -2727,7 +3016,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_pat_struct<'ast, V>(v: &mut V, node: &'ast PatStruct) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_struct<'ast, V>(v: &mut V, node: &'ast crate::PatStruct) where V: Visit<'ast> + ?Sized, { @@ -2748,7 +3038,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_pat_tuple<'ast, V>(v: &mut V, node: &'ast PatTuple) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_tuple<'ast, V>(v: &mut V, node: &'ast crate::PatTuple) where V: Visit<'ast> + ?Sized, { @@ -2762,7 +3053,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_pat_tuple_struct<'ast, V>(v: &mut V, node: &'ast PatTupleStruct) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_tuple_struct<'ast, V>(v: &mut V, node: &'ast crate::PatTupleStruct) where V: Visit<'ast> + ?Sized, { @@ -2780,7 +3072,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_pat_type<'ast, V>(v: &mut V, node: &'ast PatType) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_type<'ast, V>(v: &mut V, node: &'ast crate::PatType) where V: Visit<'ast> + ?Sized, { @@ -2792,7 +3085,8 @@ where v.visit_type(&*node.ty); } #[cfg(feature = "full")] -pub fn visit_pat_wild<'ast, V>(v: &mut V, node: &'ast PatWild) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_wild<'ast, V>(v: &mut V, node: &'ast crate::PatWild) where V: Visit<'ast> + ?Sized, { @@ -2802,7 +3096,8 @@ where skip!(node.underscore_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_path<'ast, V>(v: &mut V, node: &'ast Path) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_path<'ast, V>(v: &mut V, node: &'ast crate::Path) where V: Visit<'ast> + ?Sized, { @@ -2813,22 +3108,24 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_path_arguments<'ast, V>(v: &mut V, node: &'ast PathArguments) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_path_arguments<'ast, V>(v: &mut V, node: &'ast crate::PathArguments) where V: Visit<'ast> + ?Sized, { match node { - PathArguments::None => {} - PathArguments::AngleBracketed(_binding_0) => { + crate::PathArguments::None => {} + crate::PathArguments::AngleBracketed(_binding_0) => { v.visit_angle_bracketed_generic_arguments(_binding_0); } - PathArguments::Parenthesized(_binding_0) => { + crate::PathArguments::Parenthesized(_binding_0) => { v.visit_parenthesized_generic_arguments(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_path_segment<'ast, V>(v: &mut V, node: &'ast PathSegment) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_path_segment<'ast, V>(v: &mut V, node: &'ast crate::PathSegment) where V: Visit<'ast> + ?Sized, { @@ -2836,7 +3133,8 @@ where v.visit_path_arguments(&node.arguments); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_predicate_lifetime<'ast, V>(v: &mut V, node: &'ast PredicateLifetime) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_predicate_lifetime<'ast, V>(v: &mut V, node: &'ast crate::PredicateLifetime) where V: Visit<'ast> + ?Sized, { @@ -2848,7 +3146,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_predicate_type<'ast, V>(v: &mut V, node: &'ast PredicateType) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_predicate_type<'ast, V>(v: &mut V, node: &'ast crate::PredicateType) where V: Visit<'ast> + ?Sized, { @@ -2863,7 +3162,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_qself<'ast, V>(v: &mut V, node: &'ast QSelf) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_qself<'ast, V>(v: &mut V, node: &'ast crate::QSelf) where V: Visit<'ast> + ?Sized, { @@ -2874,21 +3174,23 @@ where skip!(node.gt_token); } #[cfg(feature = "full")] -pub fn visit_range_limits<'ast, V>(v: &mut V, node: &'ast RangeLimits) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_range_limits<'ast, V>(v: &mut V, node: &'ast crate::RangeLimits) where V: Visit<'ast> + ?Sized, { match node { - RangeLimits::HalfOpen(_binding_0) => { + crate::RangeLimits::HalfOpen(_binding_0) => { skip!(_binding_0); } - RangeLimits::Closed(_binding_0) => { + crate::RangeLimits::Closed(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_receiver<'ast, V>(v: &mut V, node: &'ast Receiver) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_receiver<'ast, V>(v: &mut V, node: &'ast crate::Receiver) where V: Visit<'ast> + ?Sized, { @@ -2907,20 +3209,22 @@ where v.visit_type(&*node.ty); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_return_type<'ast, V>(v: &mut V, node: &'ast ReturnType) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_return_type<'ast, V>(v: &mut V, node: &'ast crate::ReturnType) where V: Visit<'ast> + ?Sized, { match node { - ReturnType::Default => {} - ReturnType::Type(_binding_0, _binding_1) => { + crate::ReturnType::Default => {} + crate::ReturnType::Type(_binding_0, _binding_1) => { skip!(_binding_0); v.visit_type(&**_binding_1); } } } #[cfg(feature = "full")] -pub fn visit_signature<'ast, V>(v: &mut V, node: &'ast Signature) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_signature<'ast, V>(v: &mut V, node: &'ast crate::Signature) where V: Visit<'ast> + ?Sized, { @@ -2943,45 +3247,48 @@ where } v.visit_return_type(&node.output); } -pub fn visit_span<'ast, V>(v: &mut V, node: &Span) +pub fn visit_span<'ast, V>(v: &mut V, node: &proc_macro2::Span) where V: Visit<'ast> + ?Sized, {} #[cfg(feature = "full")] -pub fn visit_static_mutability<'ast, V>(v: &mut V, node: &'ast StaticMutability) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_static_mutability<'ast, V>(v: &mut V, node: &'ast crate::StaticMutability) where V: Visit<'ast> + ?Sized, { match node { - StaticMutability::Mut(_binding_0) => { + crate::StaticMutability::Mut(_binding_0) => { skip!(_binding_0); } - StaticMutability::None => {} + crate::StaticMutability::None => {} } } #[cfg(feature = "full")] -pub fn visit_stmt<'ast, V>(v: &mut V, node: &'ast Stmt) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_stmt<'ast, V>(v: &mut V, node: &'ast crate::Stmt) where V: Visit<'ast> + ?Sized, { match node { - Stmt::Local(_binding_0) => { + crate::Stmt::Local(_binding_0) => { v.visit_local(_binding_0); } - Stmt::Item(_binding_0) => { + crate::Stmt::Item(_binding_0) => { v.visit_item(_binding_0); } - Stmt::Expr(_binding_0, _binding_1) => { + crate::Stmt::Expr(_binding_0, _binding_1) => { v.visit_expr(_binding_0); skip!(_binding_1); } - Stmt::Macro(_binding_0) => { + crate::Stmt::Macro(_binding_0) => { v.visit_stmt_macro(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_stmt_macro<'ast, V>(v: &mut V, node: &'ast StmtMacro) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_stmt_macro<'ast, V>(v: &mut V, node: &'ast crate::StmtMacro) where V: Visit<'ast> + ?Sized, { @@ -2992,7 +3299,8 @@ where skip!(node.semi_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_trait_bound<'ast, V>(v: &mut V, node: &'ast TraitBound) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_trait_bound<'ast, V>(v: &mut V, node: &'ast crate::TraitBound) where V: Visit<'ast> + ?Sized, { @@ -3004,42 +3312,48 @@ where v.visit_path(&node.path); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_trait_bound_modifier<'ast, V>(v: &mut V, node: &'ast TraitBoundModifier) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_trait_bound_modifier<'ast, V>( + v: &mut V, + node: &'ast crate::TraitBoundModifier, +) where V: Visit<'ast> + ?Sized, { match node { - TraitBoundModifier::None => {} - TraitBoundModifier::Maybe(_binding_0) => { + crate::TraitBoundModifier::None => {} + crate::TraitBoundModifier::Maybe(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_trait_item<'ast, V>(v: &mut V, node: &'ast TraitItem) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item<'ast, V>(v: &mut V, node: &'ast crate::TraitItem) where V: Visit<'ast> + ?Sized, { match node { - TraitItem::Const(_binding_0) => { + crate::TraitItem::Const(_binding_0) => { v.visit_trait_item_const(_binding_0); } - TraitItem::Fn(_binding_0) => { + crate::TraitItem::Fn(_binding_0) => { v.visit_trait_item_fn(_binding_0); } - TraitItem::Type(_binding_0) => { + crate::TraitItem::Type(_binding_0) => { v.visit_trait_item_type(_binding_0); } - TraitItem::Macro(_binding_0) => { + crate::TraitItem::Macro(_binding_0) => { v.visit_trait_item_macro(_binding_0); } - TraitItem::Verbatim(_binding_0) => { + crate::TraitItem::Verbatim(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_trait_item_const<'ast, V>(v: &mut V, node: &'ast TraitItemConst) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_const<'ast, V>(v: &mut V, node: &'ast crate::TraitItemConst) where V: Visit<'ast> + ?Sized, { @@ -3058,7 +3372,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_trait_item_fn<'ast, V>(v: &mut V, node: &'ast TraitItemFn) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_fn<'ast, V>(v: &mut V, node: &'ast crate::TraitItemFn) where V: Visit<'ast> + ?Sized, { @@ -3072,7 +3387,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_trait_item_macro<'ast, V>(v: &mut V, node: &'ast TraitItemMacro) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_macro<'ast, V>(v: &mut V, node: &'ast crate::TraitItemMacro) where V: Visit<'ast> + ?Sized, { @@ -3083,7 +3399,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_trait_item_type<'ast, V>(v: &mut V, node: &'ast TraitItemType) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_type<'ast, V>(v: &mut V, node: &'ast crate::TraitItemType) where V: Visit<'ast> + ?Sized, { @@ -3105,60 +3422,62 @@ where skip!(node.semi_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type<'ast, V>(v: &mut V, node: &'ast Type) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type<'ast, V>(v: &mut V, node: &'ast crate::Type) where V: Visit<'ast> + ?Sized, { match node { - Type::Array(_binding_0) => { + crate::Type::Array(_binding_0) => { v.visit_type_array(_binding_0); } - Type::BareFn(_binding_0) => { + crate::Type::BareFn(_binding_0) => { v.visit_type_bare_fn(_binding_0); } - Type::Group(_binding_0) => { + crate::Type::Group(_binding_0) => { v.visit_type_group(_binding_0); } - Type::ImplTrait(_binding_0) => { + crate::Type::ImplTrait(_binding_0) => { v.visit_type_impl_trait(_binding_0); } - Type::Infer(_binding_0) => { + crate::Type::Infer(_binding_0) => { v.visit_type_infer(_binding_0); } - Type::Macro(_binding_0) => { + crate::Type::Macro(_binding_0) => { v.visit_type_macro(_binding_0); } - Type::Never(_binding_0) => { + crate::Type::Never(_binding_0) => { v.visit_type_never(_binding_0); } - Type::Paren(_binding_0) => { + crate::Type::Paren(_binding_0) => { v.visit_type_paren(_binding_0); } - Type::Path(_binding_0) => { + crate::Type::Path(_binding_0) => { v.visit_type_path(_binding_0); } - Type::Ptr(_binding_0) => { + crate::Type::Ptr(_binding_0) => { v.visit_type_ptr(_binding_0); } - Type::Reference(_binding_0) => { + crate::Type::Reference(_binding_0) => { v.visit_type_reference(_binding_0); } - Type::Slice(_binding_0) => { + crate::Type::Slice(_binding_0) => { v.visit_type_slice(_binding_0); } - Type::TraitObject(_binding_0) => { + crate::Type::TraitObject(_binding_0) => { v.visit_type_trait_object(_binding_0); } - Type::Tuple(_binding_0) => { + crate::Type::Tuple(_binding_0) => { v.visit_type_tuple(_binding_0); } - Type::Verbatim(_binding_0) => { + crate::Type::Verbatim(_binding_0) => { skip!(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_array<'ast, V>(v: &mut V, node: &'ast TypeArray) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_array<'ast, V>(v: &mut V, node: &'ast crate::TypeArray) where V: Visit<'ast> + ?Sized, { @@ -3168,7 +3487,8 @@ where v.visit_expr(&node.len); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_bare_fn<'ast, V>(v: &mut V, node: &'ast TypeBareFn) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_bare_fn<'ast, V>(v: &mut V, node: &'ast crate::TypeBareFn) where V: Visit<'ast> + ?Sized, { @@ -3191,7 +3511,8 @@ where v.visit_return_type(&node.output); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_group<'ast, V>(v: &mut V, node: &'ast TypeGroup) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_group<'ast, V>(v: &mut V, node: &'ast crate::TypeGroup) where V: Visit<'ast> + ?Sized, { @@ -3199,7 +3520,8 @@ where v.visit_type(&*node.elem); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_impl_trait<'ast, V>(v: &mut V, node: &'ast TypeImplTrait) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_impl_trait<'ast, V>(v: &mut V, node: &'ast crate::TypeImplTrait) where V: Visit<'ast> + ?Sized, { @@ -3210,28 +3532,32 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_infer<'ast, V>(v: &mut V, node: &'ast TypeInfer) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_infer<'ast, V>(v: &mut V, node: &'ast crate::TypeInfer) where V: Visit<'ast> + ?Sized, { skip!(node.underscore_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_macro<'ast, V>(v: &mut V, node: &'ast TypeMacro) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_macro<'ast, V>(v: &mut V, node: &'ast crate::TypeMacro) where V: Visit<'ast> + ?Sized, { v.visit_macro(&node.mac); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_never<'ast, V>(v: &mut V, node: &'ast TypeNever) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_never<'ast, V>(v: &mut V, node: &'ast crate::TypeNever) where V: Visit<'ast> + ?Sized, { skip!(node.bang_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_param<'ast, V>(v: &mut V, node: &'ast TypeParam) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_param<'ast, V>(v: &mut V, node: &'ast crate::TypeParam) where V: Visit<'ast> + ?Sized, { @@ -3250,24 +3576,26 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_param_bound<'ast, V>(v: &mut V, node: &'ast TypeParamBound) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_param_bound<'ast, V>(v: &mut V, node: &'ast crate::TypeParamBound) where V: Visit<'ast> + ?Sized, { match node { - TypeParamBound::Trait(_binding_0) => { + crate::TypeParamBound::Trait(_binding_0) => { v.visit_trait_bound(_binding_0); } - TypeParamBound::Lifetime(_binding_0) => { + crate::TypeParamBound::Lifetime(_binding_0) => { v.visit_lifetime(_binding_0); } - TypeParamBound::Verbatim(_binding_0) => { + crate::TypeParamBound::Verbatim(_binding_0) => { skip!(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_paren<'ast, V>(v: &mut V, node: &'ast TypeParen) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_paren<'ast, V>(v: &mut V, node: &'ast crate::TypeParen) where V: Visit<'ast> + ?Sized, { @@ -3275,7 +3603,8 @@ where v.visit_type(&*node.elem); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_path<'ast, V>(v: &mut V, node: &'ast TypePath) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_path<'ast, V>(v: &mut V, node: &'ast crate::TypePath) where V: Visit<'ast> + ?Sized, { @@ -3285,7 +3614,8 @@ where v.visit_path(&node.path); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_ptr<'ast, V>(v: &mut V, node: &'ast TypePtr) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_ptr<'ast, V>(v: &mut V, node: &'ast crate::TypePtr) where V: Visit<'ast> + ?Sized, { @@ -3295,7 +3625,8 @@ where v.visit_type(&*node.elem); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_reference<'ast, V>(v: &mut V, node: &'ast TypeReference) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_reference<'ast, V>(v: &mut V, node: &'ast crate::TypeReference) where V: Visit<'ast> + ?Sized, { @@ -3307,7 +3638,8 @@ where v.visit_type(&*node.elem); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_slice<'ast, V>(v: &mut V, node: &'ast TypeSlice) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_slice<'ast, V>(v: &mut V, node: &'ast crate::TypeSlice) where V: Visit<'ast> + ?Sized, { @@ -3315,7 +3647,8 @@ where v.visit_type(&*node.elem); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_trait_object<'ast, V>(v: &mut V, node: &'ast TypeTraitObject) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_trait_object<'ast, V>(v: &mut V, node: &'ast crate::TypeTraitObject) where V: Visit<'ast> + ?Sized, { @@ -3326,7 +3659,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_tuple<'ast, V>(v: &mut V, node: &'ast TypeTuple) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_tuple<'ast, V>(v: &mut V, node: &'ast crate::TypeTuple) where V: Visit<'ast> + ?Sized, { @@ -3337,31 +3671,34 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_un_op<'ast, V>(v: &mut V, node: &'ast UnOp) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_un_op<'ast, V>(v: &mut V, node: &'ast crate::UnOp) where V: Visit<'ast> + ?Sized, { match node { - UnOp::Deref(_binding_0) => { + crate::UnOp::Deref(_binding_0) => { skip!(_binding_0); } - UnOp::Not(_binding_0) => { + crate::UnOp::Not(_binding_0) => { skip!(_binding_0); } - UnOp::Neg(_binding_0) => { + crate::UnOp::Neg(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_use_glob<'ast, V>(v: &mut V, node: &'ast UseGlob) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_glob<'ast, V>(v: &mut V, node: &'ast crate::UseGlob) where V: Visit<'ast> + ?Sized, { skip!(node.star_token); } #[cfg(feature = "full")] -pub fn visit_use_group<'ast, V>(v: &mut V, node: &'ast UseGroup) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_group<'ast, V>(v: &mut V, node: &'ast crate::UseGroup) where V: Visit<'ast> + ?Sized, { @@ -3372,14 +3709,16 @@ where } } #[cfg(feature = "full")] -pub fn visit_use_name<'ast, V>(v: &mut V, node: &'ast UseName) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_name<'ast, V>(v: &mut V, node: &'ast crate::UseName) where V: Visit<'ast> + ?Sized, { v.visit_ident(&node.ident); } #[cfg(feature = "full")] -pub fn visit_use_path<'ast, V>(v: &mut V, node: &'ast UsePath) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_path<'ast, V>(v: &mut V, node: &'ast crate::UsePath) where V: Visit<'ast> + ?Sized, { @@ -3388,7 +3727,8 @@ where v.visit_use_tree(&*node.tree); } #[cfg(feature = "full")] -pub fn visit_use_rename<'ast, V>(v: &mut V, node: &'ast UseRename) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_rename<'ast, V>(v: &mut V, node: &'ast crate::UseRename) where V: Visit<'ast> + ?Sized, { @@ -3397,30 +3737,32 @@ where v.visit_ident(&node.rename); } #[cfg(feature = "full")] -pub fn visit_use_tree<'ast, V>(v: &mut V, node: &'ast UseTree) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_tree<'ast, V>(v: &mut V, node: &'ast crate::UseTree) where V: Visit<'ast> + ?Sized, { match node { - UseTree::Path(_binding_0) => { + crate::UseTree::Path(_binding_0) => { v.visit_use_path(_binding_0); } - UseTree::Name(_binding_0) => { + crate::UseTree::Name(_binding_0) => { v.visit_use_name(_binding_0); } - UseTree::Rename(_binding_0) => { + crate::UseTree::Rename(_binding_0) => { v.visit_use_rename(_binding_0); } - UseTree::Glob(_binding_0) => { + crate::UseTree::Glob(_binding_0) => { v.visit_use_glob(_binding_0); } - UseTree::Group(_binding_0) => { + crate::UseTree::Group(_binding_0) => { v.visit_use_group(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_variadic<'ast, V>(v: &mut V, node: &'ast Variadic) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_variadic<'ast, V>(v: &mut V, node: &'ast crate::Variadic) where V: Visit<'ast> + ?Sized, { @@ -3435,7 +3777,8 @@ where skip!(node.comma); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_variant<'ast, V>(v: &mut V, node: &'ast Variant) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_variant<'ast, V>(v: &mut V, node: &'ast crate::Variant) where V: Visit<'ast> + ?Sized, { @@ -3450,7 +3793,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_vis_restricted<'ast, V>(v: &mut V, node: &'ast VisRestricted) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_vis_restricted<'ast, V>(v: &mut V, node: &'ast crate::VisRestricted) where V: Visit<'ast> + ?Sized, { @@ -3460,22 +3804,24 @@ where v.visit_path(&*node.path); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_visibility<'ast, V>(v: &mut V, node: &'ast Visibility) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_visibility<'ast, V>(v: &mut V, node: &'ast crate::Visibility) where V: Visit<'ast> + ?Sized, { match node { - Visibility::Public(_binding_0) => { + crate::Visibility::Public(_binding_0) => { skip!(_binding_0); } - Visibility::Restricted(_binding_0) => { + crate::Visibility::Restricted(_binding_0) => { v.visit_vis_restricted(_binding_0); } - Visibility::Inherited => {} + crate::Visibility::Inherited => {} } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_where_clause<'ast, V>(v: &mut V, node: &'ast WhereClause) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_where_clause<'ast, V>(v: &mut V, node: &'ast crate::WhereClause) where V: Visit<'ast> + ?Sized, { @@ -3486,15 +3832,16 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_where_predicate<'ast, V>(v: &mut V, node: &'ast WherePredicate) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_where_predicate<'ast, V>(v: &mut V, node: &'ast crate::WherePredicate) where V: Visit<'ast> + ?Sized, { match node { - WherePredicate::Lifetime(_binding_0) => { + crate::WherePredicate::Lifetime(_binding_0) => { v.visit_predicate_lifetime(_binding_0); } - WherePredicate::Type(_binding_0) => { + crate::WherePredicate::Type(_binding_0) => { v.visit_predicate_type(_binding_0); } } diff --git a/vendor/syn/src/gen/visit_mut.rs b/vendor/syn/src/gen/visit_mut.rs index 83bd1cc..f35fc09 100644 --- a/vendor/syn/src/gen/visit_mut.rs +++ b/vendor/syn/src/gen/visit_mut.rs @@ -2,10 +2,9 @@ // It is not intended for manual editing. #![allow(unused_variables)] +#![allow(clippy::needless_pass_by_ref_mut)] #[cfg(any(feature = "full", feature = "derive"))] use crate::punctuated::Punctuated; -use crate::*; -use proc_macro2::Span; #[cfg(feature = "full")] macro_rules! full { ($e:expr) => { @@ -29,731 +28,903 @@ macro_rules! skip { /// [module documentation]: self pub trait VisitMut { #[cfg(any(feature = "derive", feature = "full"))] - fn visit_abi_mut(&mut self, i: &mut Abi) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_abi_mut(&mut self, i: &mut crate::Abi) { visit_abi_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] fn visit_angle_bracketed_generic_arguments_mut( &mut self, - i: &mut AngleBracketedGenericArguments, + i: &mut crate::AngleBracketedGenericArguments, ) { visit_angle_bracketed_generic_arguments_mut(self, i); } #[cfg(feature = "full")] - fn visit_arm_mut(&mut self, i: &mut Arm) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_arm_mut(&mut self, i: &mut crate::Arm) { visit_arm_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_assoc_const_mut(&mut self, i: &mut AssocConst) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_assoc_const_mut(&mut self, i: &mut crate::AssocConst) { visit_assoc_const_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_assoc_type_mut(&mut self, i: &mut AssocType) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_assoc_type_mut(&mut self, i: &mut crate::AssocType) { visit_assoc_type_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_attr_style_mut(&mut self, i: &mut AttrStyle) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_attr_style_mut(&mut self, i: &mut crate::AttrStyle) { visit_attr_style_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_attribute_mut(&mut self, i: &mut Attribute) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_attribute_mut(&mut self, i: &mut crate::Attribute) { visit_attribute_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_bare_fn_arg_mut(&mut self, i: &mut BareFnArg) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bare_fn_arg_mut(&mut self, i: &mut crate::BareFnArg) { visit_bare_fn_arg_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_bare_variadic_mut(&mut self, i: &mut BareVariadic) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bare_variadic_mut(&mut self, i: &mut crate::BareVariadic) { visit_bare_variadic_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_bin_op_mut(&mut self, i: &mut BinOp) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bin_op_mut(&mut self, i: &mut crate::BinOp) { visit_bin_op_mut(self, i); } #[cfg(feature = "full")] - fn visit_block_mut(&mut self, i: &mut Block) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_block_mut(&mut self, i: &mut crate::Block) { visit_block_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_bound_lifetimes_mut(&mut self, i: &mut BoundLifetimes) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_bound_lifetimes_mut(&mut self, i: &mut crate::BoundLifetimes) { visit_bound_lifetimes_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_const_param_mut(&mut self, i: &mut ConstParam) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_const_param_mut(&mut self, i: &mut crate::ConstParam) { visit_const_param_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_constraint_mut(&mut self, i: &mut Constraint) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_constraint_mut(&mut self, i: &mut crate::Constraint) { visit_constraint_mut(self, i); } #[cfg(feature = "derive")] - fn visit_data_mut(&mut self, i: &mut Data) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data_mut(&mut self, i: &mut crate::Data) { visit_data_mut(self, i); } #[cfg(feature = "derive")] - fn visit_data_enum_mut(&mut self, i: &mut DataEnum) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data_enum_mut(&mut self, i: &mut crate::DataEnum) { visit_data_enum_mut(self, i); } #[cfg(feature = "derive")] - fn visit_data_struct_mut(&mut self, i: &mut DataStruct) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data_struct_mut(&mut self, i: &mut crate::DataStruct) { visit_data_struct_mut(self, i); } #[cfg(feature = "derive")] - fn visit_data_union_mut(&mut self, i: &mut DataUnion) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_data_union_mut(&mut self, i: &mut crate::DataUnion) { visit_data_union_mut(self, i); } #[cfg(feature = "derive")] - fn visit_derive_input_mut(&mut self, i: &mut DeriveInput) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] + fn visit_derive_input_mut(&mut self, i: &mut crate::DeriveInput) { visit_derive_input_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_mut(&mut self, i: &mut Expr) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_mut(&mut self, i: &mut crate::Expr) { visit_expr_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_array_mut(&mut self, i: &mut ExprArray) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_array_mut(&mut self, i: &mut crate::ExprArray) { visit_expr_array_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_assign_mut(&mut self, i: &mut ExprAssign) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_assign_mut(&mut self, i: &mut crate::ExprAssign) { visit_expr_assign_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_async_mut(&mut self, i: &mut ExprAsync) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_async_mut(&mut self, i: &mut crate::ExprAsync) { visit_expr_async_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_await_mut(&mut self, i: &mut ExprAwait) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_await_mut(&mut self, i: &mut crate::ExprAwait) { visit_expr_await_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_binary_mut(&mut self, i: &mut ExprBinary) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_binary_mut(&mut self, i: &mut crate::ExprBinary) { visit_expr_binary_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_block_mut(&mut self, i: &mut ExprBlock) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_block_mut(&mut self, i: &mut crate::ExprBlock) { visit_expr_block_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_break_mut(&mut self, i: &mut ExprBreak) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_break_mut(&mut self, i: &mut crate::ExprBreak) { visit_expr_break_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_call_mut(&mut self, i: &mut ExprCall) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_call_mut(&mut self, i: &mut crate::ExprCall) { visit_expr_call_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_cast_mut(&mut self, i: &mut ExprCast) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_cast_mut(&mut self, i: &mut crate::ExprCast) { visit_expr_cast_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_closure_mut(&mut self, i: &mut ExprClosure) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_closure_mut(&mut self, i: &mut crate::ExprClosure) { visit_expr_closure_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_const_mut(&mut self, i: &mut ExprConst) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_const_mut(&mut self, i: &mut crate::ExprConst) { visit_expr_const_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_continue_mut(&mut self, i: &mut ExprContinue) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_continue_mut(&mut self, i: &mut crate::ExprContinue) { visit_expr_continue_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_field_mut(&mut self, i: &mut ExprField) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_field_mut(&mut self, i: &mut crate::ExprField) { visit_expr_field_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_for_loop_mut(&mut self, i: &mut ExprForLoop) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_for_loop_mut(&mut self, i: &mut crate::ExprForLoop) { visit_expr_for_loop_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_group_mut(&mut self, i: &mut ExprGroup) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_group_mut(&mut self, i: &mut crate::ExprGroup) { visit_expr_group_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_if_mut(&mut self, i: &mut ExprIf) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_if_mut(&mut self, i: &mut crate::ExprIf) { visit_expr_if_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_index_mut(&mut self, i: &mut ExprIndex) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_index_mut(&mut self, i: &mut crate::ExprIndex) { visit_expr_index_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_infer_mut(&mut self, i: &mut ExprInfer) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_infer_mut(&mut self, i: &mut crate::ExprInfer) { visit_expr_infer_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_let_mut(&mut self, i: &mut ExprLet) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_let_mut(&mut self, i: &mut crate::ExprLet) { visit_expr_let_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_lit_mut(&mut self, i: &mut ExprLit) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_lit_mut(&mut self, i: &mut crate::ExprLit) { visit_expr_lit_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_loop_mut(&mut self, i: &mut ExprLoop) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_loop_mut(&mut self, i: &mut crate::ExprLoop) { visit_expr_loop_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_macro_mut(&mut self, i: &mut ExprMacro) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_macro_mut(&mut self, i: &mut crate::ExprMacro) { visit_expr_macro_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_match_mut(&mut self, i: &mut ExprMatch) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_match_mut(&mut self, i: &mut crate::ExprMatch) { visit_expr_match_mut(self, i); } - #[cfg(feature = "full")] - fn visit_expr_method_call_mut(&mut self, i: &mut ExprMethodCall) { + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_method_call_mut(&mut self, i: &mut crate::ExprMethodCall) { visit_expr_method_call_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_paren_mut(&mut self, i: &mut ExprParen) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_paren_mut(&mut self, i: &mut crate::ExprParen) { visit_expr_paren_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_path_mut(&mut self, i: &mut ExprPath) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_path_mut(&mut self, i: &mut crate::ExprPath) { visit_expr_path_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_range_mut(&mut self, i: &mut ExprRange) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_range_mut(&mut self, i: &mut crate::ExprRange) { visit_expr_range_mut(self, i); } - #[cfg(feature = "full")] - fn visit_expr_reference_mut(&mut self, i: &mut ExprReference) { + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_reference_mut(&mut self, i: &mut crate::ExprReference) { visit_expr_reference_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_repeat_mut(&mut self, i: &mut ExprRepeat) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_repeat_mut(&mut self, i: &mut crate::ExprRepeat) { visit_expr_repeat_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_return_mut(&mut self, i: &mut ExprReturn) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_return_mut(&mut self, i: &mut crate::ExprReturn) { visit_expr_return_mut(self, i); } - #[cfg(feature = "full")] - fn visit_expr_struct_mut(&mut self, i: &mut ExprStruct) { + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_struct_mut(&mut self, i: &mut crate::ExprStruct) { visit_expr_struct_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_try_mut(&mut self, i: &mut ExprTry) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_try_mut(&mut self, i: &mut crate::ExprTry) { visit_expr_try_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_try_block_mut(&mut self, i: &mut ExprTryBlock) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_try_block_mut(&mut self, i: &mut crate::ExprTryBlock) { visit_expr_try_block_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_tuple_mut(&mut self, i: &mut ExprTuple) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_tuple_mut(&mut self, i: &mut crate::ExprTuple) { visit_expr_tuple_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_expr_unary_mut(&mut self, i: &mut ExprUnary) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_expr_unary_mut(&mut self, i: &mut crate::ExprUnary) { visit_expr_unary_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_unsafe_mut(&mut self, i: &mut ExprUnsafe) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_unsafe_mut(&mut self, i: &mut crate::ExprUnsafe) { visit_expr_unsafe_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_while_mut(&mut self, i: &mut ExprWhile) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_while_mut(&mut self, i: &mut crate::ExprWhile) { visit_expr_while_mut(self, i); } #[cfg(feature = "full")] - fn visit_expr_yield_mut(&mut self, i: &mut ExprYield) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_expr_yield_mut(&mut self, i: &mut crate::ExprYield) { visit_expr_yield_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_field_mut(&mut self, i: &mut Field) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_field_mut(&mut self, i: &mut crate::Field) { visit_field_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_field_mutability_mut(&mut self, i: &mut FieldMutability) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_field_mutability_mut(&mut self, i: &mut crate::FieldMutability) { visit_field_mutability_mut(self, i); } #[cfg(feature = "full")] - fn visit_field_pat_mut(&mut self, i: &mut FieldPat) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_field_pat_mut(&mut self, i: &mut crate::FieldPat) { visit_field_pat_mut(self, i); } - #[cfg(feature = "full")] - fn visit_field_value_mut(&mut self, i: &mut FieldValue) { + #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_field_value_mut(&mut self, i: &mut crate::FieldValue) { visit_field_value_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_fields_mut(&mut self, i: &mut Fields) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_fields_mut(&mut self, i: &mut crate::Fields) { visit_fields_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_fields_named_mut(&mut self, i: &mut FieldsNamed) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_fields_named_mut(&mut self, i: &mut crate::FieldsNamed) { visit_fields_named_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_fields_unnamed_mut(&mut self, i: &mut FieldsUnnamed) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_fields_unnamed_mut(&mut self, i: &mut crate::FieldsUnnamed) { visit_fields_unnamed_mut(self, i); } #[cfg(feature = "full")] - fn visit_file_mut(&mut self, i: &mut File) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_file_mut(&mut self, i: &mut crate::File) { visit_file_mut(self, i); } #[cfg(feature = "full")] - fn visit_fn_arg_mut(&mut self, i: &mut FnArg) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_fn_arg_mut(&mut self, i: &mut crate::FnArg) { visit_fn_arg_mut(self, i); } #[cfg(feature = "full")] - fn visit_foreign_item_mut(&mut self, i: &mut ForeignItem) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_mut(&mut self, i: &mut crate::ForeignItem) { visit_foreign_item_mut(self, i); } #[cfg(feature = "full")] - fn visit_foreign_item_fn_mut(&mut self, i: &mut ForeignItemFn) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_fn_mut(&mut self, i: &mut crate::ForeignItemFn) { visit_foreign_item_fn_mut(self, i); } #[cfg(feature = "full")] - fn visit_foreign_item_macro_mut(&mut self, i: &mut ForeignItemMacro) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_macro_mut(&mut self, i: &mut crate::ForeignItemMacro) { visit_foreign_item_macro_mut(self, i); } #[cfg(feature = "full")] - fn visit_foreign_item_static_mut(&mut self, i: &mut ForeignItemStatic) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_static_mut(&mut self, i: &mut crate::ForeignItemStatic) { visit_foreign_item_static_mut(self, i); } #[cfg(feature = "full")] - fn visit_foreign_item_type_mut(&mut self, i: &mut ForeignItemType) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_foreign_item_type_mut(&mut self, i: &mut crate::ForeignItemType) { visit_foreign_item_type_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_generic_argument_mut(&mut self, i: &mut GenericArgument) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_generic_argument_mut(&mut self, i: &mut crate::GenericArgument) { visit_generic_argument_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_generic_param_mut(&mut self, i: &mut GenericParam) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_generic_param_mut(&mut self, i: &mut crate::GenericParam) { visit_generic_param_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_generics_mut(&mut self, i: &mut Generics) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_generics_mut(&mut self, i: &mut crate::Generics) { visit_generics_mut(self, i); } - fn visit_ident_mut(&mut self, i: &mut Ident) { + fn visit_ident_mut(&mut self, i: &mut proc_macro2::Ident) { visit_ident_mut(self, i); } #[cfg(feature = "full")] - fn visit_impl_item_mut(&mut self, i: &mut ImplItem) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_mut(&mut self, i: &mut crate::ImplItem) { visit_impl_item_mut(self, i); } #[cfg(feature = "full")] - fn visit_impl_item_const_mut(&mut self, i: &mut ImplItemConst) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_const_mut(&mut self, i: &mut crate::ImplItemConst) { visit_impl_item_const_mut(self, i); } #[cfg(feature = "full")] - fn visit_impl_item_fn_mut(&mut self, i: &mut ImplItemFn) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_fn_mut(&mut self, i: &mut crate::ImplItemFn) { visit_impl_item_fn_mut(self, i); } #[cfg(feature = "full")] - fn visit_impl_item_macro_mut(&mut self, i: &mut ImplItemMacro) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_macro_mut(&mut self, i: &mut crate::ImplItemMacro) { visit_impl_item_macro_mut(self, i); } #[cfg(feature = "full")] - fn visit_impl_item_type_mut(&mut self, i: &mut ImplItemType) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_item_type_mut(&mut self, i: &mut crate::ImplItemType) { visit_impl_item_type_mut(self, i); } #[cfg(feature = "full")] - fn visit_impl_restriction_mut(&mut self, i: &mut ImplRestriction) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_impl_restriction_mut(&mut self, i: &mut crate::ImplRestriction) { visit_impl_restriction_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_index_mut(&mut self, i: &mut Index) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_index_mut(&mut self, i: &mut crate::Index) { visit_index_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_mut(&mut self, i: &mut Item) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_mut(&mut self, i: &mut crate::Item) { visit_item_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_const_mut(&mut self, i: &mut ItemConst) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_const_mut(&mut self, i: &mut crate::ItemConst) { visit_item_const_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_enum_mut(&mut self, i: &mut ItemEnum) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_enum_mut(&mut self, i: &mut crate::ItemEnum) { visit_item_enum_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_extern_crate_mut(&mut self, i: &mut ItemExternCrate) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_extern_crate_mut(&mut self, i: &mut crate::ItemExternCrate) { visit_item_extern_crate_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_fn_mut(&mut self, i: &mut ItemFn) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_fn_mut(&mut self, i: &mut crate::ItemFn) { visit_item_fn_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_foreign_mod_mut(&mut self, i: &mut ItemForeignMod) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_foreign_mod_mut(&mut self, i: &mut crate::ItemForeignMod) { visit_item_foreign_mod_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_impl_mut(&mut self, i: &mut ItemImpl) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_impl_mut(&mut self, i: &mut crate::ItemImpl) { visit_item_impl_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_macro_mut(&mut self, i: &mut ItemMacro) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_macro_mut(&mut self, i: &mut crate::ItemMacro) { visit_item_macro_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_mod_mut(&mut self, i: &mut ItemMod) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_mod_mut(&mut self, i: &mut crate::ItemMod) { visit_item_mod_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_static_mut(&mut self, i: &mut ItemStatic) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_static_mut(&mut self, i: &mut crate::ItemStatic) { visit_item_static_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_struct_mut(&mut self, i: &mut ItemStruct) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_struct_mut(&mut self, i: &mut crate::ItemStruct) { visit_item_struct_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_trait_mut(&mut self, i: &mut ItemTrait) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_trait_mut(&mut self, i: &mut crate::ItemTrait) { visit_item_trait_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_trait_alias_mut(&mut self, i: &mut ItemTraitAlias) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_trait_alias_mut(&mut self, i: &mut crate::ItemTraitAlias) { visit_item_trait_alias_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_type_mut(&mut self, i: &mut ItemType) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_type_mut(&mut self, i: &mut crate::ItemType) { visit_item_type_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_union_mut(&mut self, i: &mut ItemUnion) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_union_mut(&mut self, i: &mut crate::ItemUnion) { visit_item_union_mut(self, i); } #[cfg(feature = "full")] - fn visit_item_use_mut(&mut self, i: &mut ItemUse) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_item_use_mut(&mut self, i: &mut crate::ItemUse) { visit_item_use_mut(self, i); } #[cfg(feature = "full")] - fn visit_label_mut(&mut self, i: &mut Label) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_label_mut(&mut self, i: &mut crate::Label) { visit_label_mut(self, i); } - fn visit_lifetime_mut(&mut self, i: &mut Lifetime) { + fn visit_lifetime_mut(&mut self, i: &mut crate::Lifetime) { visit_lifetime_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_lifetime_param_mut(&mut self, i: &mut LifetimeParam) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_lifetime_param_mut(&mut self, i: &mut crate::LifetimeParam) { visit_lifetime_param_mut(self, i); } - fn visit_lit_mut(&mut self, i: &mut Lit) { + fn visit_lit_mut(&mut self, i: &mut crate::Lit) { visit_lit_mut(self, i); } - fn visit_lit_bool_mut(&mut self, i: &mut LitBool) { + fn visit_lit_bool_mut(&mut self, i: &mut crate::LitBool) { visit_lit_bool_mut(self, i); } - fn visit_lit_byte_mut(&mut self, i: &mut LitByte) { + fn visit_lit_byte_mut(&mut self, i: &mut crate::LitByte) { visit_lit_byte_mut(self, i); } - fn visit_lit_byte_str_mut(&mut self, i: &mut LitByteStr) { + fn visit_lit_byte_str_mut(&mut self, i: &mut crate::LitByteStr) { visit_lit_byte_str_mut(self, i); } - fn visit_lit_char_mut(&mut self, i: &mut LitChar) { + fn visit_lit_char_mut(&mut self, i: &mut crate::LitChar) { visit_lit_char_mut(self, i); } - fn visit_lit_float_mut(&mut self, i: &mut LitFloat) { + fn visit_lit_float_mut(&mut self, i: &mut crate::LitFloat) { visit_lit_float_mut(self, i); } - fn visit_lit_int_mut(&mut self, i: &mut LitInt) { + fn visit_lit_int_mut(&mut self, i: &mut crate::LitInt) { visit_lit_int_mut(self, i); } - fn visit_lit_str_mut(&mut self, i: &mut LitStr) { + fn visit_lit_str_mut(&mut self, i: &mut crate::LitStr) { visit_lit_str_mut(self, i); } #[cfg(feature = "full")] - fn visit_local_mut(&mut self, i: &mut Local) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_local_mut(&mut self, i: &mut crate::Local) { visit_local_mut(self, i); } #[cfg(feature = "full")] - fn visit_local_init_mut(&mut self, i: &mut LocalInit) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_local_init_mut(&mut self, i: &mut crate::LocalInit) { visit_local_init_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_macro_mut(&mut self, i: &mut Macro) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_macro_mut(&mut self, i: &mut crate::Macro) { visit_macro_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_macro_delimiter_mut(&mut self, i: &mut MacroDelimiter) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_macro_delimiter_mut(&mut self, i: &mut crate::MacroDelimiter) { visit_macro_delimiter_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_member_mut(&mut self, i: &mut Member) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_member_mut(&mut self, i: &mut crate::Member) { visit_member_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_meta_mut(&mut self, i: &mut Meta) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_meta_mut(&mut self, i: &mut crate::Meta) { visit_meta_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_meta_list_mut(&mut self, i: &mut MetaList) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_meta_list_mut(&mut self, i: &mut crate::MetaList) { visit_meta_list_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_meta_name_value_mut(&mut self, i: &mut MetaNameValue) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_meta_name_value_mut(&mut self, i: &mut crate::MetaNameValue) { visit_meta_name_value_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] fn visit_parenthesized_generic_arguments_mut( &mut self, - i: &mut ParenthesizedGenericArguments, + i: &mut crate::ParenthesizedGenericArguments, ) { visit_parenthesized_generic_arguments_mut(self, i); } #[cfg(feature = "full")] - fn visit_pat_mut(&mut self, i: &mut Pat) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_mut(&mut self, i: &mut crate::Pat) { visit_pat_mut(self, i); } #[cfg(feature = "full")] - fn visit_pat_ident_mut(&mut self, i: &mut PatIdent) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_ident_mut(&mut self, i: &mut crate::PatIdent) { visit_pat_ident_mut(self, i); } #[cfg(feature = "full")] - fn visit_pat_or_mut(&mut self, i: &mut PatOr) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_or_mut(&mut self, i: &mut crate::PatOr) { visit_pat_or_mut(self, i); } #[cfg(feature = "full")] - fn visit_pat_paren_mut(&mut self, i: &mut PatParen) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_paren_mut(&mut self, i: &mut crate::PatParen) { visit_pat_paren_mut(self, i); } #[cfg(feature = "full")] - fn visit_pat_reference_mut(&mut self, i: &mut PatReference) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_reference_mut(&mut self, i: &mut crate::PatReference) { visit_pat_reference_mut(self, i); } #[cfg(feature = "full")] - fn visit_pat_rest_mut(&mut self, i: &mut PatRest) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_rest_mut(&mut self, i: &mut crate::PatRest) { visit_pat_rest_mut(self, i); } #[cfg(feature = "full")] - fn visit_pat_slice_mut(&mut self, i: &mut PatSlice) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_slice_mut(&mut self, i: &mut crate::PatSlice) { visit_pat_slice_mut(self, i); } #[cfg(feature = "full")] - fn visit_pat_struct_mut(&mut self, i: &mut PatStruct) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_struct_mut(&mut self, i: &mut crate::PatStruct) { visit_pat_struct_mut(self, i); } #[cfg(feature = "full")] - fn visit_pat_tuple_mut(&mut self, i: &mut PatTuple) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_tuple_mut(&mut self, i: &mut crate::PatTuple) { visit_pat_tuple_mut(self, i); } #[cfg(feature = "full")] - fn visit_pat_tuple_struct_mut(&mut self, i: &mut PatTupleStruct) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_tuple_struct_mut(&mut self, i: &mut crate::PatTupleStruct) { visit_pat_tuple_struct_mut(self, i); } #[cfg(feature = "full")] - fn visit_pat_type_mut(&mut self, i: &mut PatType) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_type_mut(&mut self, i: &mut crate::PatType) { visit_pat_type_mut(self, i); } #[cfg(feature = "full")] - fn visit_pat_wild_mut(&mut self, i: &mut PatWild) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_pat_wild_mut(&mut self, i: &mut crate::PatWild) { visit_pat_wild_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_path_mut(&mut self, i: &mut Path) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_path_mut(&mut self, i: &mut crate::Path) { visit_path_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_path_arguments_mut(&mut self, i: &mut PathArguments) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_path_arguments_mut(&mut self, i: &mut crate::PathArguments) { visit_path_arguments_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_path_segment_mut(&mut self, i: &mut PathSegment) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_path_segment_mut(&mut self, i: &mut crate::PathSegment) { visit_path_segment_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_predicate_lifetime_mut(&mut self, i: &mut PredicateLifetime) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_predicate_lifetime_mut(&mut self, i: &mut crate::PredicateLifetime) { visit_predicate_lifetime_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_predicate_type_mut(&mut self, i: &mut PredicateType) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_predicate_type_mut(&mut self, i: &mut crate::PredicateType) { visit_predicate_type_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_qself_mut(&mut self, i: &mut QSelf) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_qself_mut(&mut self, i: &mut crate::QSelf) { visit_qself_mut(self, i); } #[cfg(feature = "full")] - fn visit_range_limits_mut(&mut self, i: &mut RangeLimits) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_range_limits_mut(&mut self, i: &mut crate::RangeLimits) { visit_range_limits_mut(self, i); } #[cfg(feature = "full")] - fn visit_receiver_mut(&mut self, i: &mut Receiver) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_receiver_mut(&mut self, i: &mut crate::Receiver) { visit_receiver_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_return_type_mut(&mut self, i: &mut ReturnType) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_return_type_mut(&mut self, i: &mut crate::ReturnType) { visit_return_type_mut(self, i); } #[cfg(feature = "full")] - fn visit_signature_mut(&mut self, i: &mut Signature) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_signature_mut(&mut self, i: &mut crate::Signature) { visit_signature_mut(self, i); } - fn visit_span_mut(&mut self, i: &mut Span) { + fn visit_span_mut(&mut self, i: &mut proc_macro2::Span) { visit_span_mut(self, i); } #[cfg(feature = "full")] - fn visit_static_mutability_mut(&mut self, i: &mut StaticMutability) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_static_mutability_mut(&mut self, i: &mut crate::StaticMutability) { visit_static_mutability_mut(self, i); } #[cfg(feature = "full")] - fn visit_stmt_mut(&mut self, i: &mut Stmt) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_stmt_mut(&mut self, i: &mut crate::Stmt) { visit_stmt_mut(self, i); } #[cfg(feature = "full")] - fn visit_stmt_macro_mut(&mut self, i: &mut StmtMacro) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_stmt_macro_mut(&mut self, i: &mut crate::StmtMacro) { visit_stmt_macro_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_trait_bound_mut(&mut self, i: &mut TraitBound) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_trait_bound_mut(&mut self, i: &mut crate::TraitBound) { visit_trait_bound_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_trait_bound_modifier_mut(&mut self, i: &mut TraitBoundModifier) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_trait_bound_modifier_mut(&mut self, i: &mut crate::TraitBoundModifier) { visit_trait_bound_modifier_mut(self, i); } #[cfg(feature = "full")] - fn visit_trait_item_mut(&mut self, i: &mut TraitItem) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_mut(&mut self, i: &mut crate::TraitItem) { visit_trait_item_mut(self, i); } #[cfg(feature = "full")] - fn visit_trait_item_const_mut(&mut self, i: &mut TraitItemConst) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_const_mut(&mut self, i: &mut crate::TraitItemConst) { visit_trait_item_const_mut(self, i); } #[cfg(feature = "full")] - fn visit_trait_item_fn_mut(&mut self, i: &mut TraitItemFn) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_fn_mut(&mut self, i: &mut crate::TraitItemFn) { visit_trait_item_fn_mut(self, i); } #[cfg(feature = "full")] - fn visit_trait_item_macro_mut(&mut self, i: &mut TraitItemMacro) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_macro_mut(&mut self, i: &mut crate::TraitItemMacro) { visit_trait_item_macro_mut(self, i); } #[cfg(feature = "full")] - fn visit_trait_item_type_mut(&mut self, i: &mut TraitItemType) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_trait_item_type_mut(&mut self, i: &mut crate::TraitItemType) { visit_trait_item_type_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_mut(&mut self, i: &mut Type) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_mut(&mut self, i: &mut crate::Type) { visit_type_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_array_mut(&mut self, i: &mut TypeArray) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_array_mut(&mut self, i: &mut crate::TypeArray) { visit_type_array_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_bare_fn_mut(&mut self, i: &mut TypeBareFn) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_bare_fn_mut(&mut self, i: &mut crate::TypeBareFn) { visit_type_bare_fn_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_group_mut(&mut self, i: &mut TypeGroup) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_group_mut(&mut self, i: &mut crate::TypeGroup) { visit_type_group_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_impl_trait_mut(&mut self, i: &mut TypeImplTrait) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_impl_trait_mut(&mut self, i: &mut crate::TypeImplTrait) { visit_type_impl_trait_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_infer_mut(&mut self, i: &mut TypeInfer) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_infer_mut(&mut self, i: &mut crate::TypeInfer) { visit_type_infer_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_macro_mut(&mut self, i: &mut TypeMacro) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_macro_mut(&mut self, i: &mut crate::TypeMacro) { visit_type_macro_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_never_mut(&mut self, i: &mut TypeNever) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_never_mut(&mut self, i: &mut crate::TypeNever) { visit_type_never_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_param_mut(&mut self, i: &mut TypeParam) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_param_mut(&mut self, i: &mut crate::TypeParam) { visit_type_param_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_param_bound_mut(&mut self, i: &mut TypeParamBound) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_param_bound_mut(&mut self, i: &mut crate::TypeParamBound) { visit_type_param_bound_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_paren_mut(&mut self, i: &mut TypeParen) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_paren_mut(&mut self, i: &mut crate::TypeParen) { visit_type_paren_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_path_mut(&mut self, i: &mut TypePath) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_path_mut(&mut self, i: &mut crate::TypePath) { visit_type_path_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_ptr_mut(&mut self, i: &mut TypePtr) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_ptr_mut(&mut self, i: &mut crate::TypePtr) { visit_type_ptr_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_reference_mut(&mut self, i: &mut TypeReference) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_reference_mut(&mut self, i: &mut crate::TypeReference) { visit_type_reference_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_slice_mut(&mut self, i: &mut TypeSlice) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_slice_mut(&mut self, i: &mut crate::TypeSlice) { visit_type_slice_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_trait_object_mut(&mut self, i: &mut TypeTraitObject) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_trait_object_mut(&mut self, i: &mut crate::TypeTraitObject) { visit_type_trait_object_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_type_tuple_mut(&mut self, i: &mut TypeTuple) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_type_tuple_mut(&mut self, i: &mut crate::TypeTuple) { visit_type_tuple_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_un_op_mut(&mut self, i: &mut UnOp) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_un_op_mut(&mut self, i: &mut crate::UnOp) { visit_un_op_mut(self, i); } #[cfg(feature = "full")] - fn visit_use_glob_mut(&mut self, i: &mut UseGlob) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_glob_mut(&mut self, i: &mut crate::UseGlob) { visit_use_glob_mut(self, i); } #[cfg(feature = "full")] - fn visit_use_group_mut(&mut self, i: &mut UseGroup) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_group_mut(&mut self, i: &mut crate::UseGroup) { visit_use_group_mut(self, i); } #[cfg(feature = "full")] - fn visit_use_name_mut(&mut self, i: &mut UseName) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_name_mut(&mut self, i: &mut crate::UseName) { visit_use_name_mut(self, i); } #[cfg(feature = "full")] - fn visit_use_path_mut(&mut self, i: &mut UsePath) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_path_mut(&mut self, i: &mut crate::UsePath) { visit_use_path_mut(self, i); } #[cfg(feature = "full")] - fn visit_use_rename_mut(&mut self, i: &mut UseRename) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_rename_mut(&mut self, i: &mut crate::UseRename) { visit_use_rename_mut(self, i); } #[cfg(feature = "full")] - fn visit_use_tree_mut(&mut self, i: &mut UseTree) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_use_tree_mut(&mut self, i: &mut crate::UseTree) { visit_use_tree_mut(self, i); } #[cfg(feature = "full")] - fn visit_variadic_mut(&mut self, i: &mut Variadic) { + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] + fn visit_variadic_mut(&mut self, i: &mut crate::Variadic) { visit_variadic_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_variant_mut(&mut self, i: &mut Variant) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_variant_mut(&mut self, i: &mut crate::Variant) { visit_variant_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_vis_restricted_mut(&mut self, i: &mut VisRestricted) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_vis_restricted_mut(&mut self, i: &mut crate::VisRestricted) { visit_vis_restricted_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_visibility_mut(&mut self, i: &mut Visibility) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_visibility_mut(&mut self, i: &mut crate::Visibility) { visit_visibility_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_where_clause_mut(&mut self, i: &mut WhereClause) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_where_clause_mut(&mut self, i: &mut crate::WhereClause) { visit_where_clause_mut(self, i); } #[cfg(any(feature = "derive", feature = "full"))] - fn visit_where_predicate_mut(&mut self, i: &mut WherePredicate) { + #[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] + fn visit_where_predicate_mut(&mut self, i: &mut crate::WherePredicate) { visit_where_predicate_mut(self, i); } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_abi_mut<V>(v: &mut V, node: &mut Abi) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_abi_mut<V>(v: &mut V, node: &mut crate::Abi) where V: VisitMut + ?Sized, { @@ -763,9 +934,10 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] pub fn visit_angle_bracketed_generic_arguments_mut<V>( v: &mut V, - node: &mut AngleBracketedGenericArguments, + node: &mut crate::AngleBracketedGenericArguments, ) where V: VisitMut + ?Sized, @@ -779,7 +951,8 @@ where skip!(node.gt_token); } #[cfg(feature = "full")] -pub fn visit_arm_mut<V>(v: &mut V, node: &mut Arm) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_arm_mut<V>(v: &mut V, node: &mut crate::Arm) where V: VisitMut + ?Sized, { @@ -796,7 +969,8 @@ where skip!(node.comma); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_assoc_const_mut<V>(v: &mut V, node: &mut AssocConst) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_assoc_const_mut<V>(v: &mut V, node: &mut crate::AssocConst) where V: VisitMut + ?Sized, { @@ -808,7 +982,8 @@ where v.visit_expr_mut(&mut node.value); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_assoc_type_mut<V>(v: &mut V, node: &mut AssocType) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_assoc_type_mut<V>(v: &mut V, node: &mut crate::AssocType) where V: VisitMut + ?Sized, { @@ -820,19 +995,21 @@ where v.visit_type_mut(&mut node.ty); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_attr_style_mut<V>(v: &mut V, node: &mut AttrStyle) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_attr_style_mut<V>(v: &mut V, node: &mut crate::AttrStyle) where V: VisitMut + ?Sized, { match node { - AttrStyle::Outer => {} - AttrStyle::Inner(_binding_0) => { + crate::AttrStyle::Outer => {} + crate::AttrStyle::Inner(_binding_0) => { skip!(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_attribute_mut<V>(v: &mut V, node: &mut Attribute) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_attribute_mut<V>(v: &mut V, node: &mut crate::Attribute) where V: VisitMut + ?Sized, { @@ -842,7 +1019,8 @@ where v.visit_meta_mut(&mut node.meta); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_bare_fn_arg_mut<V>(v: &mut V, node: &mut BareFnArg) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bare_fn_arg_mut<V>(v: &mut V, node: &mut crate::BareFnArg) where V: VisitMut + ?Sized, { @@ -856,7 +1034,8 @@ where v.visit_type_mut(&mut node.ty); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_bare_variadic_mut<V>(v: &mut V, node: &mut BareVariadic) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bare_variadic_mut<V>(v: &mut V, node: &mut crate::BareVariadic) where V: VisitMut + ?Sized, { @@ -871,99 +1050,101 @@ where skip!(node.comma); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_bin_op_mut<V>(v: &mut V, node: &mut BinOp) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bin_op_mut<V>(v: &mut V, node: &mut crate::BinOp) where V: VisitMut + ?Sized, { match node { - BinOp::Add(_binding_0) => { + crate::BinOp::Add(_binding_0) => { skip!(_binding_0); } - BinOp::Sub(_binding_0) => { + crate::BinOp::Sub(_binding_0) => { skip!(_binding_0); } - BinOp::Mul(_binding_0) => { + crate::BinOp::Mul(_binding_0) => { skip!(_binding_0); } - BinOp::Div(_binding_0) => { + crate::BinOp::Div(_binding_0) => { skip!(_binding_0); } - BinOp::Rem(_binding_0) => { + crate::BinOp::Rem(_binding_0) => { skip!(_binding_0); } - BinOp::And(_binding_0) => { + crate::BinOp::And(_binding_0) => { skip!(_binding_0); } - BinOp::Or(_binding_0) => { + crate::BinOp::Or(_binding_0) => { skip!(_binding_0); } - BinOp::BitXor(_binding_0) => { + crate::BinOp::BitXor(_binding_0) => { skip!(_binding_0); } - BinOp::BitAnd(_binding_0) => { + crate::BinOp::BitAnd(_binding_0) => { skip!(_binding_0); } - BinOp::BitOr(_binding_0) => { + crate::BinOp::BitOr(_binding_0) => { skip!(_binding_0); } - BinOp::Shl(_binding_0) => { + crate::BinOp::Shl(_binding_0) => { skip!(_binding_0); } - BinOp::Shr(_binding_0) => { + crate::BinOp::Shr(_binding_0) => { skip!(_binding_0); } - BinOp::Eq(_binding_0) => { + crate::BinOp::Eq(_binding_0) => { skip!(_binding_0); } - BinOp::Lt(_binding_0) => { + crate::BinOp::Lt(_binding_0) => { skip!(_binding_0); } - BinOp::Le(_binding_0) => { + crate::BinOp::Le(_binding_0) => { skip!(_binding_0); } - BinOp::Ne(_binding_0) => { + crate::BinOp::Ne(_binding_0) => { skip!(_binding_0); } - BinOp::Ge(_binding_0) => { + crate::BinOp::Ge(_binding_0) => { skip!(_binding_0); } - BinOp::Gt(_binding_0) => { + crate::BinOp::Gt(_binding_0) => { skip!(_binding_0); } - BinOp::AddAssign(_binding_0) => { + crate::BinOp::AddAssign(_binding_0) => { skip!(_binding_0); } - BinOp::SubAssign(_binding_0) => { + crate::BinOp::SubAssign(_binding_0) => { skip!(_binding_0); } - BinOp::MulAssign(_binding_0) => { + crate::BinOp::MulAssign(_binding_0) => { skip!(_binding_0); } - BinOp::DivAssign(_binding_0) => { + crate::BinOp::DivAssign(_binding_0) => { skip!(_binding_0); } - BinOp::RemAssign(_binding_0) => { + crate::BinOp::RemAssign(_binding_0) => { skip!(_binding_0); } - BinOp::BitXorAssign(_binding_0) => { + crate::BinOp::BitXorAssign(_binding_0) => { skip!(_binding_0); } - BinOp::BitAndAssign(_binding_0) => { + crate::BinOp::BitAndAssign(_binding_0) => { skip!(_binding_0); } - BinOp::BitOrAssign(_binding_0) => { + crate::BinOp::BitOrAssign(_binding_0) => { skip!(_binding_0); } - BinOp::ShlAssign(_binding_0) => { + crate::BinOp::ShlAssign(_binding_0) => { skip!(_binding_0); } - BinOp::ShrAssign(_binding_0) => { + crate::BinOp::ShrAssign(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_block_mut<V>(v: &mut V, node: &mut Block) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_block_mut<V>(v: &mut V, node: &mut crate::Block) where V: VisitMut + ?Sized, { @@ -973,7 +1154,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_bound_lifetimes_mut<V>(v: &mut V, node: &mut BoundLifetimes) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_bound_lifetimes_mut<V>(v: &mut V, node: &mut crate::BoundLifetimes) where V: VisitMut + ?Sized, { @@ -986,7 +1168,8 @@ where skip!(node.gt_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_const_param_mut<V>(v: &mut V, node: &mut ConstParam) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_const_param_mut<V>(v: &mut V, node: &mut crate::ConstParam) where V: VisitMut + ?Sized, { @@ -1003,7 +1186,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_constraint_mut<V>(v: &mut V, node: &mut Constraint) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_constraint_mut<V>(v: &mut V, node: &mut crate::Constraint) where V: VisitMut + ?Sized, { @@ -1018,24 +1202,26 @@ where } } #[cfg(feature = "derive")] -pub fn visit_data_mut<V>(v: &mut V, node: &mut Data) +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data_mut<V>(v: &mut V, node: &mut crate::Data) where V: VisitMut + ?Sized, { match node { - Data::Struct(_binding_0) => { + crate::Data::Struct(_binding_0) => { v.visit_data_struct_mut(_binding_0); } - Data::Enum(_binding_0) => { + crate::Data::Enum(_binding_0) => { v.visit_data_enum_mut(_binding_0); } - Data::Union(_binding_0) => { + crate::Data::Union(_binding_0) => { v.visit_data_union_mut(_binding_0); } } } #[cfg(feature = "derive")] -pub fn visit_data_enum_mut<V>(v: &mut V, node: &mut DataEnum) +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data_enum_mut<V>(v: &mut V, node: &mut crate::DataEnum) where V: VisitMut + ?Sized, { @@ -1047,7 +1233,8 @@ where } } #[cfg(feature = "derive")] -pub fn visit_data_struct_mut<V>(v: &mut V, node: &mut DataStruct) +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data_struct_mut<V>(v: &mut V, node: &mut crate::DataStruct) where V: VisitMut + ?Sized, { @@ -1056,7 +1243,8 @@ where skip!(node.semi_token); } #[cfg(feature = "derive")] -pub fn visit_data_union_mut<V>(v: &mut V, node: &mut DataUnion) +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_data_union_mut<V>(v: &mut V, node: &mut crate::DataUnion) where V: VisitMut + ?Sized, { @@ -1064,7 +1252,8 @@ where v.visit_fields_named_mut(&mut node.fields); } #[cfg(feature = "derive")] -pub fn visit_derive_input_mut<V>(v: &mut V, node: &mut DeriveInput) +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] +pub fn visit_derive_input_mut<V>(v: &mut V, node: &mut crate::DeriveInput) where V: VisitMut + ?Sized, { @@ -1077,132 +1266,134 @@ where v.visit_data_mut(&mut node.data); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_mut<V>(v: &mut V, node: &mut Expr) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_mut<V>(v: &mut V, node: &mut crate::Expr) where V: VisitMut + ?Sized, { match node { - Expr::Array(_binding_0) => { + crate::Expr::Array(_binding_0) => { full!(v.visit_expr_array_mut(_binding_0)); } - Expr::Assign(_binding_0) => { + crate::Expr::Assign(_binding_0) => { full!(v.visit_expr_assign_mut(_binding_0)); } - Expr::Async(_binding_0) => { + crate::Expr::Async(_binding_0) => { full!(v.visit_expr_async_mut(_binding_0)); } - Expr::Await(_binding_0) => { + crate::Expr::Await(_binding_0) => { full!(v.visit_expr_await_mut(_binding_0)); } - Expr::Binary(_binding_0) => { + crate::Expr::Binary(_binding_0) => { v.visit_expr_binary_mut(_binding_0); } - Expr::Block(_binding_0) => { + crate::Expr::Block(_binding_0) => { full!(v.visit_expr_block_mut(_binding_0)); } - Expr::Break(_binding_0) => { + crate::Expr::Break(_binding_0) => { full!(v.visit_expr_break_mut(_binding_0)); } - Expr::Call(_binding_0) => { + crate::Expr::Call(_binding_0) => { v.visit_expr_call_mut(_binding_0); } - Expr::Cast(_binding_0) => { + crate::Expr::Cast(_binding_0) => { v.visit_expr_cast_mut(_binding_0); } - Expr::Closure(_binding_0) => { + crate::Expr::Closure(_binding_0) => { full!(v.visit_expr_closure_mut(_binding_0)); } - Expr::Const(_binding_0) => { + crate::Expr::Const(_binding_0) => { full!(v.visit_expr_const_mut(_binding_0)); } - Expr::Continue(_binding_0) => { + crate::Expr::Continue(_binding_0) => { full!(v.visit_expr_continue_mut(_binding_0)); } - Expr::Field(_binding_0) => { + crate::Expr::Field(_binding_0) => { v.visit_expr_field_mut(_binding_0); } - Expr::ForLoop(_binding_0) => { + crate::Expr::ForLoop(_binding_0) => { full!(v.visit_expr_for_loop_mut(_binding_0)); } - Expr::Group(_binding_0) => { + crate::Expr::Group(_binding_0) => { v.visit_expr_group_mut(_binding_0); } - Expr::If(_binding_0) => { + crate::Expr::If(_binding_0) => { full!(v.visit_expr_if_mut(_binding_0)); } - Expr::Index(_binding_0) => { + crate::Expr::Index(_binding_0) => { v.visit_expr_index_mut(_binding_0); } - Expr::Infer(_binding_0) => { + crate::Expr::Infer(_binding_0) => { full!(v.visit_expr_infer_mut(_binding_0)); } - Expr::Let(_binding_0) => { + crate::Expr::Let(_binding_0) => { full!(v.visit_expr_let_mut(_binding_0)); } - Expr::Lit(_binding_0) => { + crate::Expr::Lit(_binding_0) => { v.visit_expr_lit_mut(_binding_0); } - Expr::Loop(_binding_0) => { + crate::Expr::Loop(_binding_0) => { full!(v.visit_expr_loop_mut(_binding_0)); } - Expr::Macro(_binding_0) => { + crate::Expr::Macro(_binding_0) => { v.visit_expr_macro_mut(_binding_0); } - Expr::Match(_binding_0) => { + crate::Expr::Match(_binding_0) => { full!(v.visit_expr_match_mut(_binding_0)); } - Expr::MethodCall(_binding_0) => { - full!(v.visit_expr_method_call_mut(_binding_0)); + crate::Expr::MethodCall(_binding_0) => { + v.visit_expr_method_call_mut(_binding_0); } - Expr::Paren(_binding_0) => { + crate::Expr::Paren(_binding_0) => { v.visit_expr_paren_mut(_binding_0); } - Expr::Path(_binding_0) => { + crate::Expr::Path(_binding_0) => { v.visit_expr_path_mut(_binding_0); } - Expr::Range(_binding_0) => { + crate::Expr::Range(_binding_0) => { full!(v.visit_expr_range_mut(_binding_0)); } - Expr::Reference(_binding_0) => { - full!(v.visit_expr_reference_mut(_binding_0)); + crate::Expr::Reference(_binding_0) => { + v.visit_expr_reference_mut(_binding_0); } - Expr::Repeat(_binding_0) => { + crate::Expr::Repeat(_binding_0) => { full!(v.visit_expr_repeat_mut(_binding_0)); } - Expr::Return(_binding_0) => { + crate::Expr::Return(_binding_0) => { full!(v.visit_expr_return_mut(_binding_0)); } - Expr::Struct(_binding_0) => { - full!(v.visit_expr_struct_mut(_binding_0)); + crate::Expr::Struct(_binding_0) => { + v.visit_expr_struct_mut(_binding_0); } - Expr::Try(_binding_0) => { + crate::Expr::Try(_binding_0) => { full!(v.visit_expr_try_mut(_binding_0)); } - Expr::TryBlock(_binding_0) => { + crate::Expr::TryBlock(_binding_0) => { full!(v.visit_expr_try_block_mut(_binding_0)); } - Expr::Tuple(_binding_0) => { + crate::Expr::Tuple(_binding_0) => { full!(v.visit_expr_tuple_mut(_binding_0)); } - Expr::Unary(_binding_0) => { + crate::Expr::Unary(_binding_0) => { v.visit_expr_unary_mut(_binding_0); } - Expr::Unsafe(_binding_0) => { + crate::Expr::Unsafe(_binding_0) => { full!(v.visit_expr_unsafe_mut(_binding_0)); } - Expr::Verbatim(_binding_0) => { + crate::Expr::Verbatim(_binding_0) => { skip!(_binding_0); } - Expr::While(_binding_0) => { + crate::Expr::While(_binding_0) => { full!(v.visit_expr_while_mut(_binding_0)); } - Expr::Yield(_binding_0) => { + crate::Expr::Yield(_binding_0) => { full!(v.visit_expr_yield_mut(_binding_0)); } } } #[cfg(feature = "full")] -pub fn visit_expr_array_mut<V>(v: &mut V, node: &mut ExprArray) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_array_mut<V>(v: &mut V, node: &mut crate::ExprArray) where V: VisitMut + ?Sized, { @@ -1216,7 +1407,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_expr_assign_mut<V>(v: &mut V, node: &mut ExprAssign) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_assign_mut<V>(v: &mut V, node: &mut crate::ExprAssign) where V: VisitMut + ?Sized, { @@ -1228,7 +1420,8 @@ where v.visit_expr_mut(&mut *node.right); } #[cfg(feature = "full")] -pub fn visit_expr_async_mut<V>(v: &mut V, node: &mut ExprAsync) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_async_mut<V>(v: &mut V, node: &mut crate::ExprAsync) where V: VisitMut + ?Sized, { @@ -1240,7 +1433,8 @@ where v.visit_block_mut(&mut node.block); } #[cfg(feature = "full")] -pub fn visit_expr_await_mut<V>(v: &mut V, node: &mut ExprAwait) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_await_mut<V>(v: &mut V, node: &mut crate::ExprAwait) where V: VisitMut + ?Sized, { @@ -1252,7 +1446,8 @@ where skip!(node.await_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_binary_mut<V>(v: &mut V, node: &mut ExprBinary) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_binary_mut<V>(v: &mut V, node: &mut crate::ExprBinary) where V: VisitMut + ?Sized, { @@ -1264,7 +1459,8 @@ where v.visit_expr_mut(&mut *node.right); } #[cfg(feature = "full")] -pub fn visit_expr_block_mut<V>(v: &mut V, node: &mut ExprBlock) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_block_mut<V>(v: &mut V, node: &mut crate::ExprBlock) where V: VisitMut + ?Sized, { @@ -1277,7 +1473,8 @@ where v.visit_block_mut(&mut node.block); } #[cfg(feature = "full")] -pub fn visit_expr_break_mut<V>(v: &mut V, node: &mut ExprBreak) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_break_mut<V>(v: &mut V, node: &mut crate::ExprBreak) where V: VisitMut + ?Sized, { @@ -1293,7 +1490,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_call_mut<V>(v: &mut V, node: &mut ExprCall) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_call_mut<V>(v: &mut V, node: &mut crate::ExprCall) where V: VisitMut + ?Sized, { @@ -1308,7 +1506,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_cast_mut<V>(v: &mut V, node: &mut ExprCast) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_cast_mut<V>(v: &mut V, node: &mut crate::ExprCast) where V: VisitMut + ?Sized, { @@ -1320,7 +1519,8 @@ where v.visit_type_mut(&mut *node.ty); } #[cfg(feature = "full")] -pub fn visit_expr_closure_mut<V>(v: &mut V, node: &mut ExprClosure) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_closure_mut<V>(v: &mut V, node: &mut crate::ExprClosure) where V: VisitMut + ?Sized, { @@ -1344,7 +1544,8 @@ where v.visit_expr_mut(&mut *node.body); } #[cfg(feature = "full")] -pub fn visit_expr_const_mut<V>(v: &mut V, node: &mut ExprConst) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_const_mut<V>(v: &mut V, node: &mut crate::ExprConst) where V: VisitMut + ?Sized, { @@ -1355,7 +1556,8 @@ where v.visit_block_mut(&mut node.block); } #[cfg(feature = "full")] -pub fn visit_expr_continue_mut<V>(v: &mut V, node: &mut ExprContinue) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_continue_mut<V>(v: &mut V, node: &mut crate::ExprContinue) where V: VisitMut + ?Sized, { @@ -1368,7 +1570,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_field_mut<V>(v: &mut V, node: &mut ExprField) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_field_mut<V>(v: &mut V, node: &mut crate::ExprField) where V: VisitMut + ?Sized, { @@ -1380,7 +1583,8 @@ where v.visit_member_mut(&mut node.member); } #[cfg(feature = "full")] -pub fn visit_expr_for_loop_mut<V>(v: &mut V, node: &mut ExprForLoop) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_for_loop_mut<V>(v: &mut V, node: &mut crate::ExprForLoop) where V: VisitMut + ?Sized, { @@ -1397,7 +1601,8 @@ where v.visit_block_mut(&mut node.body); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_group_mut<V>(v: &mut V, node: &mut ExprGroup) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_group_mut<V>(v: &mut V, node: &mut crate::ExprGroup) where V: VisitMut + ?Sized, { @@ -1408,7 +1613,8 @@ where v.visit_expr_mut(&mut *node.expr); } #[cfg(feature = "full")] -pub fn visit_expr_if_mut<V>(v: &mut V, node: &mut ExprIf) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_if_mut<V>(v: &mut V, node: &mut crate::ExprIf) where V: VisitMut + ?Sized, { @@ -1424,7 +1630,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_index_mut<V>(v: &mut V, node: &mut ExprIndex) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_index_mut<V>(v: &mut V, node: &mut crate::ExprIndex) where V: VisitMut + ?Sized, { @@ -1436,7 +1643,8 @@ where v.visit_expr_mut(&mut *node.index); } #[cfg(feature = "full")] -pub fn visit_expr_infer_mut<V>(v: &mut V, node: &mut ExprInfer) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_infer_mut<V>(v: &mut V, node: &mut crate::ExprInfer) where V: VisitMut + ?Sized, { @@ -1446,7 +1654,8 @@ where skip!(node.underscore_token); } #[cfg(feature = "full")] -pub fn visit_expr_let_mut<V>(v: &mut V, node: &mut ExprLet) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_let_mut<V>(v: &mut V, node: &mut crate::ExprLet) where V: VisitMut + ?Sized, { @@ -1459,7 +1668,8 @@ where v.visit_expr_mut(&mut *node.expr); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_lit_mut<V>(v: &mut V, node: &mut ExprLit) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_lit_mut<V>(v: &mut V, node: &mut crate::ExprLit) where V: VisitMut + ?Sized, { @@ -1469,7 +1679,8 @@ where v.visit_lit_mut(&mut node.lit); } #[cfg(feature = "full")] -pub fn visit_expr_loop_mut<V>(v: &mut V, node: &mut ExprLoop) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_loop_mut<V>(v: &mut V, node: &mut crate::ExprLoop) where V: VisitMut + ?Sized, { @@ -1483,7 +1694,8 @@ where v.visit_block_mut(&mut node.body); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_macro_mut<V>(v: &mut V, node: &mut ExprMacro) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_macro_mut<V>(v: &mut V, node: &mut crate::ExprMacro) where V: VisitMut + ?Sized, { @@ -1493,7 +1705,8 @@ where v.visit_macro_mut(&mut node.mac); } #[cfg(feature = "full")] -pub fn visit_expr_match_mut<V>(v: &mut V, node: &mut ExprMatch) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_match_mut<V>(v: &mut V, node: &mut crate::ExprMatch) where V: VisitMut + ?Sized, { @@ -1507,8 +1720,9 @@ where v.visit_arm_mut(it); } } -#[cfg(feature = "full")] -pub fn visit_expr_method_call_mut<V>(v: &mut V, node: &mut ExprMethodCall) +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_method_call_mut<V>(v: &mut V, node: &mut crate::ExprMethodCall) where V: VisitMut + ?Sized, { @@ -1528,7 +1742,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_paren_mut<V>(v: &mut V, node: &mut ExprParen) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_paren_mut<V>(v: &mut V, node: &mut crate::ExprParen) where V: VisitMut + ?Sized, { @@ -1539,7 +1754,8 @@ where v.visit_expr_mut(&mut *node.expr); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_path_mut<V>(v: &mut V, node: &mut ExprPath) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_path_mut<V>(v: &mut V, node: &mut crate::ExprPath) where V: VisitMut + ?Sized, { @@ -1552,7 +1768,8 @@ where v.visit_path_mut(&mut node.path); } #[cfg(feature = "full")] -pub fn visit_expr_range_mut<V>(v: &mut V, node: &mut ExprRange) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_range_mut<V>(v: &mut V, node: &mut crate::ExprRange) where V: VisitMut + ?Sized, { @@ -1567,8 +1784,9 @@ where v.visit_expr_mut(&mut **it); } } -#[cfg(feature = "full")] -pub fn visit_expr_reference_mut<V>(v: &mut V, node: &mut ExprReference) +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_reference_mut<V>(v: &mut V, node: &mut crate::ExprReference) where V: VisitMut + ?Sized, { @@ -1580,7 +1798,8 @@ where v.visit_expr_mut(&mut *node.expr); } #[cfg(feature = "full")] -pub fn visit_expr_repeat_mut<V>(v: &mut V, node: &mut ExprRepeat) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_repeat_mut<V>(v: &mut V, node: &mut crate::ExprRepeat) where V: VisitMut + ?Sized, { @@ -1593,7 +1812,8 @@ where v.visit_expr_mut(&mut *node.len); } #[cfg(feature = "full")] -pub fn visit_expr_return_mut<V>(v: &mut V, node: &mut ExprReturn) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_return_mut<V>(v: &mut V, node: &mut crate::ExprReturn) where V: VisitMut + ?Sized, { @@ -1605,8 +1825,9 @@ where v.visit_expr_mut(&mut **it); } } -#[cfg(feature = "full")] -pub fn visit_expr_struct_mut<V>(v: &mut V, node: &mut ExprStruct) +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_struct_mut<V>(v: &mut V, node: &mut crate::ExprStruct) where V: VisitMut + ?Sized, { @@ -1628,7 +1849,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_expr_try_mut<V>(v: &mut V, node: &mut ExprTry) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_try_mut<V>(v: &mut V, node: &mut crate::ExprTry) where V: VisitMut + ?Sized, { @@ -1639,7 +1861,8 @@ where skip!(node.question_token); } #[cfg(feature = "full")] -pub fn visit_expr_try_block_mut<V>(v: &mut V, node: &mut ExprTryBlock) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_try_block_mut<V>(v: &mut V, node: &mut crate::ExprTryBlock) where V: VisitMut + ?Sized, { @@ -1650,7 +1873,8 @@ where v.visit_block_mut(&mut node.block); } #[cfg(feature = "full")] -pub fn visit_expr_tuple_mut<V>(v: &mut V, node: &mut ExprTuple) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_tuple_mut<V>(v: &mut V, node: &mut crate::ExprTuple) where V: VisitMut + ?Sized, { @@ -1664,7 +1888,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_expr_unary_mut<V>(v: &mut V, node: &mut ExprUnary) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_expr_unary_mut<V>(v: &mut V, node: &mut crate::ExprUnary) where V: VisitMut + ?Sized, { @@ -1675,7 +1900,8 @@ where v.visit_expr_mut(&mut *node.expr); } #[cfg(feature = "full")] -pub fn visit_expr_unsafe_mut<V>(v: &mut V, node: &mut ExprUnsafe) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_unsafe_mut<V>(v: &mut V, node: &mut crate::ExprUnsafe) where V: VisitMut + ?Sized, { @@ -1686,7 +1912,8 @@ where v.visit_block_mut(&mut node.block); } #[cfg(feature = "full")] -pub fn visit_expr_while_mut<V>(v: &mut V, node: &mut ExprWhile) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_while_mut<V>(v: &mut V, node: &mut crate::ExprWhile) where V: VisitMut + ?Sized, { @@ -1701,7 +1928,8 @@ where v.visit_block_mut(&mut node.body); } #[cfg(feature = "full")] -pub fn visit_expr_yield_mut<V>(v: &mut V, node: &mut ExprYield) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_expr_yield_mut<V>(v: &mut V, node: &mut crate::ExprYield) where V: VisitMut + ?Sized, { @@ -1714,7 +1942,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_field_mut<V>(v: &mut V, node: &mut Field) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_field_mut<V>(v: &mut V, node: &mut crate::Field) where V: VisitMut + ?Sized, { @@ -1730,16 +1959,18 @@ where v.visit_type_mut(&mut node.ty); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_field_mutability_mut<V>(v: &mut V, node: &mut FieldMutability) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_field_mutability_mut<V>(v: &mut V, node: &mut crate::FieldMutability) where V: VisitMut + ?Sized, { match node { - FieldMutability::None => {} + crate::FieldMutability::None => {} } } #[cfg(feature = "full")] -pub fn visit_field_pat_mut<V>(v: &mut V, node: &mut FieldPat) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_field_pat_mut<V>(v: &mut V, node: &mut crate::FieldPat) where V: VisitMut + ?Sized, { @@ -1750,8 +1981,9 @@ where skip!(node.colon_token); v.visit_pat_mut(&mut *node.pat); } -#[cfg(feature = "full")] -pub fn visit_field_value_mut<V>(v: &mut V, node: &mut FieldValue) +#[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_field_value_mut<V>(v: &mut V, node: &mut crate::FieldValue) where V: VisitMut + ?Sized, { @@ -1763,22 +1995,24 @@ where v.visit_expr_mut(&mut node.expr); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_fields_mut<V>(v: &mut V, node: &mut Fields) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_fields_mut<V>(v: &mut V, node: &mut crate::Fields) where V: VisitMut + ?Sized, { match node { - Fields::Named(_binding_0) => { + crate::Fields::Named(_binding_0) => { v.visit_fields_named_mut(_binding_0); } - Fields::Unnamed(_binding_0) => { + crate::Fields::Unnamed(_binding_0) => { v.visit_fields_unnamed_mut(_binding_0); } - Fields::Unit => {} + crate::Fields::Unit => {} } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_fields_named_mut<V>(v: &mut V, node: &mut FieldsNamed) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_fields_named_mut<V>(v: &mut V, node: &mut crate::FieldsNamed) where V: VisitMut + ?Sized, { @@ -1789,7 +2023,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_fields_unnamed_mut<V>(v: &mut V, node: &mut FieldsUnnamed) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_fields_unnamed_mut<V>(v: &mut V, node: &mut crate::FieldsUnnamed) where V: VisitMut + ?Sized, { @@ -1800,7 +2035,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_file_mut<V>(v: &mut V, node: &mut File) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_file_mut<V>(v: &mut V, node: &mut crate::File) where V: VisitMut + ?Sized, { @@ -1813,44 +2049,47 @@ where } } #[cfg(feature = "full")] -pub fn visit_fn_arg_mut<V>(v: &mut V, node: &mut FnArg) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_fn_arg_mut<V>(v: &mut V, node: &mut crate::FnArg) where V: VisitMut + ?Sized, { match node { - FnArg::Receiver(_binding_0) => { + crate::FnArg::Receiver(_binding_0) => { v.visit_receiver_mut(_binding_0); } - FnArg::Typed(_binding_0) => { + crate::FnArg::Typed(_binding_0) => { v.visit_pat_type_mut(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_foreign_item_mut<V>(v: &mut V, node: &mut ForeignItem) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_mut<V>(v: &mut V, node: &mut crate::ForeignItem) where V: VisitMut + ?Sized, { match node { - ForeignItem::Fn(_binding_0) => { + crate::ForeignItem::Fn(_binding_0) => { v.visit_foreign_item_fn_mut(_binding_0); } - ForeignItem::Static(_binding_0) => { + crate::ForeignItem::Static(_binding_0) => { v.visit_foreign_item_static_mut(_binding_0); } - ForeignItem::Type(_binding_0) => { + crate::ForeignItem::Type(_binding_0) => { v.visit_foreign_item_type_mut(_binding_0); } - ForeignItem::Macro(_binding_0) => { + crate::ForeignItem::Macro(_binding_0) => { v.visit_foreign_item_macro_mut(_binding_0); } - ForeignItem::Verbatim(_binding_0) => { + crate::ForeignItem::Verbatim(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_foreign_item_fn_mut<V>(v: &mut V, node: &mut ForeignItemFn) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_fn_mut<V>(v: &mut V, node: &mut crate::ForeignItemFn) where V: VisitMut + ?Sized, { @@ -1862,7 +2101,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_foreign_item_macro_mut<V>(v: &mut V, node: &mut ForeignItemMacro) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_macro_mut<V>(v: &mut V, node: &mut crate::ForeignItemMacro) where V: VisitMut + ?Sized, { @@ -1873,7 +2113,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_foreign_item_static_mut<V>(v: &mut V, node: &mut ForeignItemStatic) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_static_mut<V>(v: &mut V, node: &mut crate::ForeignItemStatic) where V: VisitMut + ?Sized, { @@ -1889,7 +2130,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_foreign_item_type_mut<V>(v: &mut V, node: &mut ForeignItemType) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_foreign_item_type_mut<V>(v: &mut V, node: &mut crate::ForeignItemType) where V: VisitMut + ?Sized, { @@ -1903,50 +2145,53 @@ where skip!(node.semi_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_generic_argument_mut<V>(v: &mut V, node: &mut GenericArgument) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_generic_argument_mut<V>(v: &mut V, node: &mut crate::GenericArgument) where V: VisitMut + ?Sized, { match node { - GenericArgument::Lifetime(_binding_0) => { + crate::GenericArgument::Lifetime(_binding_0) => { v.visit_lifetime_mut(_binding_0); } - GenericArgument::Type(_binding_0) => { + crate::GenericArgument::Type(_binding_0) => { v.visit_type_mut(_binding_0); } - GenericArgument::Const(_binding_0) => { + crate::GenericArgument::Const(_binding_0) => { v.visit_expr_mut(_binding_0); } - GenericArgument::AssocType(_binding_0) => { + crate::GenericArgument::AssocType(_binding_0) => { v.visit_assoc_type_mut(_binding_0); } - GenericArgument::AssocConst(_binding_0) => { + crate::GenericArgument::AssocConst(_binding_0) => { v.visit_assoc_const_mut(_binding_0); } - GenericArgument::Constraint(_binding_0) => { + crate::GenericArgument::Constraint(_binding_0) => { v.visit_constraint_mut(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_generic_param_mut<V>(v: &mut V, node: &mut GenericParam) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_generic_param_mut<V>(v: &mut V, node: &mut crate::GenericParam) where V: VisitMut + ?Sized, { match node { - GenericParam::Lifetime(_binding_0) => { + crate::GenericParam::Lifetime(_binding_0) => { v.visit_lifetime_param_mut(_binding_0); } - GenericParam::Type(_binding_0) => { + crate::GenericParam::Type(_binding_0) => { v.visit_type_param_mut(_binding_0); } - GenericParam::Const(_binding_0) => { + crate::GenericParam::Const(_binding_0) => { v.visit_const_param_mut(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_generics_mut<V>(v: &mut V, node: &mut Generics) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_generics_mut<V>(v: &mut V, node: &mut crate::Generics) where V: VisitMut + ?Sized, { @@ -1960,7 +2205,7 @@ where v.visit_where_clause_mut(it); } } -pub fn visit_ident_mut<V>(v: &mut V, node: &mut Ident) +pub fn visit_ident_mut<V>(v: &mut V, node: &mut proc_macro2::Ident) where V: VisitMut + ?Sized, { @@ -1969,30 +2214,32 @@ where node.set_span(span); } #[cfg(feature = "full")] -pub fn visit_impl_item_mut<V>(v: &mut V, node: &mut ImplItem) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_mut<V>(v: &mut V, node: &mut crate::ImplItem) where V: VisitMut + ?Sized, { match node { - ImplItem::Const(_binding_0) => { + crate::ImplItem::Const(_binding_0) => { v.visit_impl_item_const_mut(_binding_0); } - ImplItem::Fn(_binding_0) => { + crate::ImplItem::Fn(_binding_0) => { v.visit_impl_item_fn_mut(_binding_0); } - ImplItem::Type(_binding_0) => { + crate::ImplItem::Type(_binding_0) => { v.visit_impl_item_type_mut(_binding_0); } - ImplItem::Macro(_binding_0) => { + crate::ImplItem::Macro(_binding_0) => { v.visit_impl_item_macro_mut(_binding_0); } - ImplItem::Verbatim(_binding_0) => { + crate::ImplItem::Verbatim(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_impl_item_const_mut<V>(v: &mut V, node: &mut ImplItemConst) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_const_mut<V>(v: &mut V, node: &mut crate::ImplItemConst) where V: VisitMut + ?Sized, { @@ -2011,7 +2258,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_impl_item_fn_mut<V>(v: &mut V, node: &mut ImplItemFn) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_fn_mut<V>(v: &mut V, node: &mut crate::ImplItemFn) where V: VisitMut + ?Sized, { @@ -2024,7 +2272,8 @@ where v.visit_block_mut(&mut node.block); } #[cfg(feature = "full")] -pub fn visit_impl_item_macro_mut<V>(v: &mut V, node: &mut ImplItemMacro) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_macro_mut<V>(v: &mut V, node: &mut crate::ImplItemMacro) where V: VisitMut + ?Sized, { @@ -2035,7 +2284,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_impl_item_type_mut<V>(v: &mut V, node: &mut ImplItemType) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_item_type_mut<V>(v: &mut V, node: &mut crate::ImplItemType) where V: VisitMut + ?Sized, { @@ -2052,14 +2302,16 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_impl_restriction_mut<V>(v: &mut V, node: &mut ImplRestriction) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_impl_restriction_mut<V>(v: &mut V, node: &mut crate::ImplRestriction) where V: VisitMut + ?Sized, { match *node {} } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_index_mut<V>(v: &mut V, node: &mut Index) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_index_mut<V>(v: &mut V, node: &mut crate::Index) where V: VisitMut + ?Sized, { @@ -2067,63 +2319,65 @@ where v.visit_span_mut(&mut node.span); } #[cfg(feature = "full")] -pub fn visit_item_mut<V>(v: &mut V, node: &mut Item) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_mut<V>(v: &mut V, node: &mut crate::Item) where V: VisitMut + ?Sized, { match node { - Item::Const(_binding_0) => { + crate::Item::Const(_binding_0) => { v.visit_item_const_mut(_binding_0); } - Item::Enum(_binding_0) => { + crate::Item::Enum(_binding_0) => { v.visit_item_enum_mut(_binding_0); } - Item::ExternCrate(_binding_0) => { + crate::Item::ExternCrate(_binding_0) => { v.visit_item_extern_crate_mut(_binding_0); } - Item::Fn(_binding_0) => { + crate::Item::Fn(_binding_0) => { v.visit_item_fn_mut(_binding_0); } - Item::ForeignMod(_binding_0) => { + crate::Item::ForeignMod(_binding_0) => { v.visit_item_foreign_mod_mut(_binding_0); } - Item::Impl(_binding_0) => { + crate::Item::Impl(_binding_0) => { v.visit_item_impl_mut(_binding_0); } - Item::Macro(_binding_0) => { + crate::Item::Macro(_binding_0) => { v.visit_item_macro_mut(_binding_0); } - Item::Mod(_binding_0) => { + crate::Item::Mod(_binding_0) => { v.visit_item_mod_mut(_binding_0); } - Item::Static(_binding_0) => { + crate::Item::Static(_binding_0) => { v.visit_item_static_mut(_binding_0); } - Item::Struct(_binding_0) => { + crate::Item::Struct(_binding_0) => { v.visit_item_struct_mut(_binding_0); } - Item::Trait(_binding_0) => { + crate::Item::Trait(_binding_0) => { v.visit_item_trait_mut(_binding_0); } - Item::TraitAlias(_binding_0) => { + crate::Item::TraitAlias(_binding_0) => { v.visit_item_trait_alias_mut(_binding_0); } - Item::Type(_binding_0) => { + crate::Item::Type(_binding_0) => { v.visit_item_type_mut(_binding_0); } - Item::Union(_binding_0) => { + crate::Item::Union(_binding_0) => { v.visit_item_union_mut(_binding_0); } - Item::Use(_binding_0) => { + crate::Item::Use(_binding_0) => { v.visit_item_use_mut(_binding_0); } - Item::Verbatim(_binding_0) => { + crate::Item::Verbatim(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_item_const_mut<V>(v: &mut V, node: &mut ItemConst) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_const_mut<V>(v: &mut V, node: &mut crate::ItemConst) where V: VisitMut + ?Sized, { @@ -2141,7 +2395,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_item_enum_mut<V>(v: &mut V, node: &mut ItemEnum) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_enum_mut<V>(v: &mut V, node: &mut crate::ItemEnum) where V: VisitMut + ?Sized, { @@ -2159,7 +2414,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_item_extern_crate_mut<V>(v: &mut V, node: &mut ItemExternCrate) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_extern_crate_mut<V>(v: &mut V, node: &mut crate::ItemExternCrate) where V: VisitMut + ?Sized, { @@ -2177,7 +2433,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_item_fn_mut<V>(v: &mut V, node: &mut ItemFn) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_fn_mut<V>(v: &mut V, node: &mut crate::ItemFn) where V: VisitMut + ?Sized, { @@ -2189,7 +2446,8 @@ where v.visit_block_mut(&mut *node.block); } #[cfg(feature = "full")] -pub fn visit_item_foreign_mod_mut<V>(v: &mut V, node: &mut ItemForeignMod) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_foreign_mod_mut<V>(v: &mut V, node: &mut crate::ItemForeignMod) where V: VisitMut + ?Sized, { @@ -2204,7 +2462,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_item_impl_mut<V>(v: &mut V, node: &mut ItemImpl) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_impl_mut<V>(v: &mut V, node: &mut crate::ItemImpl) where V: VisitMut + ?Sized, { @@ -2227,7 +2486,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_item_macro_mut<V>(v: &mut V, node: &mut ItemMacro) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_macro_mut<V>(v: &mut V, node: &mut crate::ItemMacro) where V: VisitMut + ?Sized, { @@ -2241,7 +2501,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_item_mod_mut<V>(v: &mut V, node: &mut ItemMod) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_mod_mut<V>(v: &mut V, node: &mut crate::ItemMod) where V: VisitMut + ?Sized, { @@ -2261,7 +2522,8 @@ where skip!(node.semi); } #[cfg(feature = "full")] -pub fn visit_item_static_mut<V>(v: &mut V, node: &mut ItemStatic) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_static_mut<V>(v: &mut V, node: &mut crate::ItemStatic) where V: VisitMut + ?Sized, { @@ -2279,7 +2541,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_item_struct_mut<V>(v: &mut V, node: &mut ItemStruct) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_struct_mut<V>(v: &mut V, node: &mut crate::ItemStruct) where V: VisitMut + ?Sized, { @@ -2294,7 +2557,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_item_trait_mut<V>(v: &mut V, node: &mut ItemTrait) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_trait_mut<V>(v: &mut V, node: &mut crate::ItemTrait) where V: VisitMut + ?Sized, { @@ -2321,7 +2585,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_item_trait_alias_mut<V>(v: &mut V, node: &mut ItemTraitAlias) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_trait_alias_mut<V>(v: &mut V, node: &mut crate::ItemTraitAlias) where V: VisitMut + ?Sized, { @@ -2340,7 +2605,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_item_type_mut<V>(v: &mut V, node: &mut ItemType) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_type_mut<V>(v: &mut V, node: &mut crate::ItemType) where V: VisitMut + ?Sized, { @@ -2356,7 +2622,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_item_union_mut<V>(v: &mut V, node: &mut ItemUnion) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_union_mut<V>(v: &mut V, node: &mut crate::ItemUnion) where V: VisitMut + ?Sized, { @@ -2370,7 +2637,8 @@ where v.visit_fields_named_mut(&mut node.fields); } #[cfg(feature = "full")] -pub fn visit_item_use_mut<V>(v: &mut V, node: &mut ItemUse) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_item_use_mut<V>(v: &mut V, node: &mut crate::ItemUse) where V: VisitMut + ?Sized, { @@ -2384,14 +2652,15 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_label_mut<V>(v: &mut V, node: &mut Label) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_label_mut<V>(v: &mut V, node: &mut crate::Label) where V: VisitMut + ?Sized, { v.visit_lifetime_mut(&mut node.name); skip!(node.colon_token); } -pub fn visit_lifetime_mut<V>(v: &mut V, node: &mut Lifetime) +pub fn visit_lifetime_mut<V>(v: &mut V, node: &mut crate::Lifetime) where V: VisitMut + ?Sized, { @@ -2399,7 +2668,8 @@ where v.visit_ident_mut(&mut node.ident); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_lifetime_param_mut<V>(v: &mut V, node: &mut LifetimeParam) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_lifetime_param_mut<V>(v: &mut V, node: &mut crate::LifetimeParam) where V: VisitMut + ?Sized, { @@ -2413,70 +2683,71 @@ where v.visit_lifetime_mut(it); } } -pub fn visit_lit_mut<V>(v: &mut V, node: &mut Lit) +pub fn visit_lit_mut<V>(v: &mut V, node: &mut crate::Lit) where V: VisitMut + ?Sized, { match node { - Lit::Str(_binding_0) => { + crate::Lit::Str(_binding_0) => { v.visit_lit_str_mut(_binding_0); } - Lit::ByteStr(_binding_0) => { + crate::Lit::ByteStr(_binding_0) => { v.visit_lit_byte_str_mut(_binding_0); } - Lit::Byte(_binding_0) => { + crate::Lit::Byte(_binding_0) => { v.visit_lit_byte_mut(_binding_0); } - Lit::Char(_binding_0) => { + crate::Lit::Char(_binding_0) => { v.visit_lit_char_mut(_binding_0); } - Lit::Int(_binding_0) => { + crate::Lit::Int(_binding_0) => { v.visit_lit_int_mut(_binding_0); } - Lit::Float(_binding_0) => { + crate::Lit::Float(_binding_0) => { v.visit_lit_float_mut(_binding_0); } - Lit::Bool(_binding_0) => { + crate::Lit::Bool(_binding_0) => { v.visit_lit_bool_mut(_binding_0); } - Lit::Verbatim(_binding_0) => { + crate::Lit::Verbatim(_binding_0) => { skip!(_binding_0); } } } -pub fn visit_lit_bool_mut<V>(v: &mut V, node: &mut LitBool) +pub fn visit_lit_bool_mut<V>(v: &mut V, node: &mut crate::LitBool) where V: VisitMut + ?Sized, { skip!(node.value); v.visit_span_mut(&mut node.span); } -pub fn visit_lit_byte_mut<V>(v: &mut V, node: &mut LitByte) +pub fn visit_lit_byte_mut<V>(v: &mut V, node: &mut crate::LitByte) where V: VisitMut + ?Sized, {} -pub fn visit_lit_byte_str_mut<V>(v: &mut V, node: &mut LitByteStr) +pub fn visit_lit_byte_str_mut<V>(v: &mut V, node: &mut crate::LitByteStr) where V: VisitMut + ?Sized, {} -pub fn visit_lit_char_mut<V>(v: &mut V, node: &mut LitChar) +pub fn visit_lit_char_mut<V>(v: &mut V, node: &mut crate::LitChar) where V: VisitMut + ?Sized, {} -pub fn visit_lit_float_mut<V>(v: &mut V, node: &mut LitFloat) +pub fn visit_lit_float_mut<V>(v: &mut V, node: &mut crate::LitFloat) where V: VisitMut + ?Sized, {} -pub fn visit_lit_int_mut<V>(v: &mut V, node: &mut LitInt) +pub fn visit_lit_int_mut<V>(v: &mut V, node: &mut crate::LitInt) where V: VisitMut + ?Sized, {} -pub fn visit_lit_str_mut<V>(v: &mut V, node: &mut LitStr) +pub fn visit_lit_str_mut<V>(v: &mut V, node: &mut crate::LitStr) where V: VisitMut + ?Sized, {} #[cfg(feature = "full")] -pub fn visit_local_mut<V>(v: &mut V, node: &mut Local) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_local_mut<V>(v: &mut V, node: &mut crate::Local) where V: VisitMut + ?Sized, { @@ -2491,7 +2762,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_local_init_mut<V>(v: &mut V, node: &mut LocalInit) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_local_init_mut<V>(v: &mut V, node: &mut crate::LocalInit) where V: VisitMut + ?Sized, { @@ -2503,7 +2775,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_macro_mut<V>(v: &mut V, node: &mut Macro) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_macro_mut<V>(v: &mut V, node: &mut crate::Macro) where V: VisitMut + ?Sized, { @@ -2513,55 +2786,59 @@ where skip!(node.tokens); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_macro_delimiter_mut<V>(v: &mut V, node: &mut MacroDelimiter) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_macro_delimiter_mut<V>(v: &mut V, node: &mut crate::MacroDelimiter) where V: VisitMut + ?Sized, { match node { - MacroDelimiter::Paren(_binding_0) => { + crate::MacroDelimiter::Paren(_binding_0) => { skip!(_binding_0); } - MacroDelimiter::Brace(_binding_0) => { + crate::MacroDelimiter::Brace(_binding_0) => { skip!(_binding_0); } - MacroDelimiter::Bracket(_binding_0) => { + crate::MacroDelimiter::Bracket(_binding_0) => { skip!(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_member_mut<V>(v: &mut V, node: &mut Member) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_member_mut<V>(v: &mut V, node: &mut crate::Member) where V: VisitMut + ?Sized, { match node { - Member::Named(_binding_0) => { + crate::Member::Named(_binding_0) => { v.visit_ident_mut(_binding_0); } - Member::Unnamed(_binding_0) => { + crate::Member::Unnamed(_binding_0) => { v.visit_index_mut(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_meta_mut<V>(v: &mut V, node: &mut Meta) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_meta_mut<V>(v: &mut V, node: &mut crate::Meta) where V: VisitMut + ?Sized, { match node { - Meta::Path(_binding_0) => { + crate::Meta::Path(_binding_0) => { v.visit_path_mut(_binding_0); } - Meta::List(_binding_0) => { + crate::Meta::List(_binding_0) => { v.visit_meta_list_mut(_binding_0); } - Meta::NameValue(_binding_0) => { + crate::Meta::NameValue(_binding_0) => { v.visit_meta_name_value_mut(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_meta_list_mut<V>(v: &mut V, node: &mut MetaList) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_meta_list_mut<V>(v: &mut V, node: &mut crate::MetaList) where V: VisitMut + ?Sized, { @@ -2570,7 +2847,8 @@ where skip!(node.tokens); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_meta_name_value_mut<V>(v: &mut V, node: &mut MetaNameValue) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_meta_name_value_mut<V>(v: &mut V, node: &mut crate::MetaNameValue) where V: VisitMut + ?Sized, { @@ -2579,9 +2857,10 @@ where v.visit_expr_mut(&mut node.value); } #[cfg(any(feature = "derive", feature = "full"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] pub fn visit_parenthesized_generic_arguments_mut<V>( v: &mut V, - node: &mut ParenthesizedGenericArguments, + node: &mut crate::ParenthesizedGenericArguments, ) where V: VisitMut + ?Sized, @@ -2594,66 +2873,68 @@ where v.visit_return_type_mut(&mut node.output); } #[cfg(feature = "full")] -pub fn visit_pat_mut<V>(v: &mut V, node: &mut Pat) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_mut<V>(v: &mut V, node: &mut crate::Pat) where V: VisitMut + ?Sized, { match node { - Pat::Const(_binding_0) => { + crate::Pat::Const(_binding_0) => { v.visit_expr_const_mut(_binding_0); } - Pat::Ident(_binding_0) => { + crate::Pat::Ident(_binding_0) => { v.visit_pat_ident_mut(_binding_0); } - Pat::Lit(_binding_0) => { + crate::Pat::Lit(_binding_0) => { v.visit_expr_lit_mut(_binding_0); } - Pat::Macro(_binding_0) => { + crate::Pat::Macro(_binding_0) => { v.visit_expr_macro_mut(_binding_0); } - Pat::Or(_binding_0) => { + crate::Pat::Or(_binding_0) => { v.visit_pat_or_mut(_binding_0); } - Pat::Paren(_binding_0) => { + crate::Pat::Paren(_binding_0) => { v.visit_pat_paren_mut(_binding_0); } - Pat::Path(_binding_0) => { + crate::Pat::Path(_binding_0) => { v.visit_expr_path_mut(_binding_0); } - Pat::Range(_binding_0) => { + crate::Pat::Range(_binding_0) => { v.visit_expr_range_mut(_binding_0); } - Pat::Reference(_binding_0) => { + crate::Pat::Reference(_binding_0) => { v.visit_pat_reference_mut(_binding_0); } - Pat::Rest(_binding_0) => { + crate::Pat::Rest(_binding_0) => { v.visit_pat_rest_mut(_binding_0); } - Pat::Slice(_binding_0) => { + crate::Pat::Slice(_binding_0) => { v.visit_pat_slice_mut(_binding_0); } - Pat::Struct(_binding_0) => { + crate::Pat::Struct(_binding_0) => { v.visit_pat_struct_mut(_binding_0); } - Pat::Tuple(_binding_0) => { + crate::Pat::Tuple(_binding_0) => { v.visit_pat_tuple_mut(_binding_0); } - Pat::TupleStruct(_binding_0) => { + crate::Pat::TupleStruct(_binding_0) => { v.visit_pat_tuple_struct_mut(_binding_0); } - Pat::Type(_binding_0) => { + crate::Pat::Type(_binding_0) => { v.visit_pat_type_mut(_binding_0); } - Pat::Verbatim(_binding_0) => { + crate::Pat::Verbatim(_binding_0) => { skip!(_binding_0); } - Pat::Wild(_binding_0) => { + crate::Pat::Wild(_binding_0) => { v.visit_pat_wild_mut(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_pat_ident_mut<V>(v: &mut V, node: &mut PatIdent) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_ident_mut<V>(v: &mut V, node: &mut crate::PatIdent) where V: VisitMut + ?Sized, { @@ -2669,7 +2950,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_pat_or_mut<V>(v: &mut V, node: &mut PatOr) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_or_mut<V>(v: &mut V, node: &mut crate::PatOr) where V: VisitMut + ?Sized, { @@ -2683,7 +2965,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_pat_paren_mut<V>(v: &mut V, node: &mut PatParen) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_paren_mut<V>(v: &mut V, node: &mut crate::PatParen) where V: VisitMut + ?Sized, { @@ -2694,7 +2977,8 @@ where v.visit_pat_mut(&mut *node.pat); } #[cfg(feature = "full")] -pub fn visit_pat_reference_mut<V>(v: &mut V, node: &mut PatReference) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_reference_mut<V>(v: &mut V, node: &mut crate::PatReference) where V: VisitMut + ?Sized, { @@ -2706,7 +2990,8 @@ where v.visit_pat_mut(&mut *node.pat); } #[cfg(feature = "full")] -pub fn visit_pat_rest_mut<V>(v: &mut V, node: &mut PatRest) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_rest_mut<V>(v: &mut V, node: &mut crate::PatRest) where V: VisitMut + ?Sized, { @@ -2716,7 +3001,8 @@ where skip!(node.dot2_token); } #[cfg(feature = "full")] -pub fn visit_pat_slice_mut<V>(v: &mut V, node: &mut PatSlice) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_slice_mut<V>(v: &mut V, node: &mut crate::PatSlice) where V: VisitMut + ?Sized, { @@ -2730,7 +3016,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_pat_struct_mut<V>(v: &mut V, node: &mut PatStruct) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_struct_mut<V>(v: &mut V, node: &mut crate::PatStruct) where V: VisitMut + ?Sized, { @@ -2751,7 +3038,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_pat_tuple_mut<V>(v: &mut V, node: &mut PatTuple) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_tuple_mut<V>(v: &mut V, node: &mut crate::PatTuple) where V: VisitMut + ?Sized, { @@ -2765,7 +3053,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_pat_tuple_struct_mut<V>(v: &mut V, node: &mut PatTupleStruct) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_tuple_struct_mut<V>(v: &mut V, node: &mut crate::PatTupleStruct) where V: VisitMut + ?Sized, { @@ -2783,7 +3072,8 @@ where } } #[cfg(feature = "full")] -pub fn visit_pat_type_mut<V>(v: &mut V, node: &mut PatType) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_type_mut<V>(v: &mut V, node: &mut crate::PatType) where V: VisitMut + ?Sized, { @@ -2795,7 +3085,8 @@ where v.visit_type_mut(&mut *node.ty); } #[cfg(feature = "full")] -pub fn visit_pat_wild_mut<V>(v: &mut V, node: &mut PatWild) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_pat_wild_mut<V>(v: &mut V, node: &mut crate::PatWild) where V: VisitMut + ?Sized, { @@ -2805,7 +3096,8 @@ where skip!(node.underscore_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_path_mut<V>(v: &mut V, node: &mut Path) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_path_mut<V>(v: &mut V, node: &mut crate::Path) where V: VisitMut + ?Sized, { @@ -2816,22 +3108,24 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_path_arguments_mut<V>(v: &mut V, node: &mut PathArguments) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_path_arguments_mut<V>(v: &mut V, node: &mut crate::PathArguments) where V: VisitMut + ?Sized, { match node { - PathArguments::None => {} - PathArguments::AngleBracketed(_binding_0) => { + crate::PathArguments::None => {} + crate::PathArguments::AngleBracketed(_binding_0) => { v.visit_angle_bracketed_generic_arguments_mut(_binding_0); } - PathArguments::Parenthesized(_binding_0) => { + crate::PathArguments::Parenthesized(_binding_0) => { v.visit_parenthesized_generic_arguments_mut(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_path_segment_mut<V>(v: &mut V, node: &mut PathSegment) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_path_segment_mut<V>(v: &mut V, node: &mut crate::PathSegment) where V: VisitMut + ?Sized, { @@ -2839,7 +3133,8 @@ where v.visit_path_arguments_mut(&mut node.arguments); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_predicate_lifetime_mut<V>(v: &mut V, node: &mut PredicateLifetime) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_predicate_lifetime_mut<V>(v: &mut V, node: &mut crate::PredicateLifetime) where V: VisitMut + ?Sized, { @@ -2851,7 +3146,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_predicate_type_mut<V>(v: &mut V, node: &mut PredicateType) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_predicate_type_mut<V>(v: &mut V, node: &mut crate::PredicateType) where V: VisitMut + ?Sized, { @@ -2866,7 +3162,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_qself_mut<V>(v: &mut V, node: &mut QSelf) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_qself_mut<V>(v: &mut V, node: &mut crate::QSelf) where V: VisitMut + ?Sized, { @@ -2877,21 +3174,23 @@ where skip!(node.gt_token); } #[cfg(feature = "full")] -pub fn visit_range_limits_mut<V>(v: &mut V, node: &mut RangeLimits) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_range_limits_mut<V>(v: &mut V, node: &mut crate::RangeLimits) where V: VisitMut + ?Sized, { match node { - RangeLimits::HalfOpen(_binding_0) => { + crate::RangeLimits::HalfOpen(_binding_0) => { skip!(_binding_0); } - RangeLimits::Closed(_binding_0) => { + crate::RangeLimits::Closed(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_receiver_mut<V>(v: &mut V, node: &mut Receiver) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_receiver_mut<V>(v: &mut V, node: &mut crate::Receiver) where V: VisitMut + ?Sized, { @@ -2910,20 +3209,22 @@ where v.visit_type_mut(&mut *node.ty); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_return_type_mut<V>(v: &mut V, node: &mut ReturnType) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_return_type_mut<V>(v: &mut V, node: &mut crate::ReturnType) where V: VisitMut + ?Sized, { match node { - ReturnType::Default => {} - ReturnType::Type(_binding_0, _binding_1) => { + crate::ReturnType::Default => {} + crate::ReturnType::Type(_binding_0, _binding_1) => { skip!(_binding_0); v.visit_type_mut(&mut **_binding_1); } } } #[cfg(feature = "full")] -pub fn visit_signature_mut<V>(v: &mut V, node: &mut Signature) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_signature_mut<V>(v: &mut V, node: &mut crate::Signature) where V: VisitMut + ?Sized, { @@ -2946,45 +3247,48 @@ where } v.visit_return_type_mut(&mut node.output); } -pub fn visit_span_mut<V>(v: &mut V, node: &mut Span) +pub fn visit_span_mut<V>(v: &mut V, node: &mut proc_macro2::Span) where V: VisitMut + ?Sized, {} #[cfg(feature = "full")] -pub fn visit_static_mutability_mut<V>(v: &mut V, node: &mut StaticMutability) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_static_mutability_mut<V>(v: &mut V, node: &mut crate::StaticMutability) where V: VisitMut + ?Sized, { match node { - StaticMutability::Mut(_binding_0) => { + crate::StaticMutability::Mut(_binding_0) => { skip!(_binding_0); } - StaticMutability::None => {} + crate::StaticMutability::None => {} } } #[cfg(feature = "full")] -pub fn visit_stmt_mut<V>(v: &mut V, node: &mut Stmt) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_stmt_mut<V>(v: &mut V, node: &mut crate::Stmt) where V: VisitMut + ?Sized, { match node { - Stmt::Local(_binding_0) => { + crate::Stmt::Local(_binding_0) => { v.visit_local_mut(_binding_0); } - Stmt::Item(_binding_0) => { + crate::Stmt::Item(_binding_0) => { v.visit_item_mut(_binding_0); } - Stmt::Expr(_binding_0, _binding_1) => { + crate::Stmt::Expr(_binding_0, _binding_1) => { v.visit_expr_mut(_binding_0); skip!(_binding_1); } - Stmt::Macro(_binding_0) => { + crate::Stmt::Macro(_binding_0) => { v.visit_stmt_macro_mut(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_stmt_macro_mut<V>(v: &mut V, node: &mut StmtMacro) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_stmt_macro_mut<V>(v: &mut V, node: &mut crate::StmtMacro) where V: VisitMut + ?Sized, { @@ -2995,7 +3299,8 @@ where skip!(node.semi_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_trait_bound_mut<V>(v: &mut V, node: &mut TraitBound) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_trait_bound_mut<V>(v: &mut V, node: &mut crate::TraitBound) where V: VisitMut + ?Sized, { @@ -3007,42 +3312,45 @@ where v.visit_path_mut(&mut node.path); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_trait_bound_modifier_mut<V>(v: &mut V, node: &mut TraitBoundModifier) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_trait_bound_modifier_mut<V>(v: &mut V, node: &mut crate::TraitBoundModifier) where V: VisitMut + ?Sized, { match node { - TraitBoundModifier::None => {} - TraitBoundModifier::Maybe(_binding_0) => { + crate::TraitBoundModifier::None => {} + crate::TraitBoundModifier::Maybe(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_trait_item_mut<V>(v: &mut V, node: &mut TraitItem) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_mut<V>(v: &mut V, node: &mut crate::TraitItem) where V: VisitMut + ?Sized, { match node { - TraitItem::Const(_binding_0) => { + crate::TraitItem::Const(_binding_0) => { v.visit_trait_item_const_mut(_binding_0); } - TraitItem::Fn(_binding_0) => { + crate::TraitItem::Fn(_binding_0) => { v.visit_trait_item_fn_mut(_binding_0); } - TraitItem::Type(_binding_0) => { + crate::TraitItem::Type(_binding_0) => { v.visit_trait_item_type_mut(_binding_0); } - TraitItem::Macro(_binding_0) => { + crate::TraitItem::Macro(_binding_0) => { v.visit_trait_item_macro_mut(_binding_0); } - TraitItem::Verbatim(_binding_0) => { + crate::TraitItem::Verbatim(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_trait_item_const_mut<V>(v: &mut V, node: &mut TraitItemConst) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_const_mut<V>(v: &mut V, node: &mut crate::TraitItemConst) where V: VisitMut + ?Sized, { @@ -3061,7 +3369,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_trait_item_fn_mut<V>(v: &mut V, node: &mut TraitItemFn) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_fn_mut<V>(v: &mut V, node: &mut crate::TraitItemFn) where V: VisitMut + ?Sized, { @@ -3075,7 +3384,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_trait_item_macro_mut<V>(v: &mut V, node: &mut TraitItemMacro) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_macro_mut<V>(v: &mut V, node: &mut crate::TraitItemMacro) where V: VisitMut + ?Sized, { @@ -3086,7 +3396,8 @@ where skip!(node.semi_token); } #[cfg(feature = "full")] -pub fn visit_trait_item_type_mut<V>(v: &mut V, node: &mut TraitItemType) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_trait_item_type_mut<V>(v: &mut V, node: &mut crate::TraitItemType) where V: VisitMut + ?Sized, { @@ -3108,60 +3419,62 @@ where skip!(node.semi_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_mut<V>(v: &mut V, node: &mut Type) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_mut<V>(v: &mut V, node: &mut crate::Type) where V: VisitMut + ?Sized, { match node { - Type::Array(_binding_0) => { + crate::Type::Array(_binding_0) => { v.visit_type_array_mut(_binding_0); } - Type::BareFn(_binding_0) => { + crate::Type::BareFn(_binding_0) => { v.visit_type_bare_fn_mut(_binding_0); } - Type::Group(_binding_0) => { + crate::Type::Group(_binding_0) => { v.visit_type_group_mut(_binding_0); } - Type::ImplTrait(_binding_0) => { + crate::Type::ImplTrait(_binding_0) => { v.visit_type_impl_trait_mut(_binding_0); } - Type::Infer(_binding_0) => { + crate::Type::Infer(_binding_0) => { v.visit_type_infer_mut(_binding_0); } - Type::Macro(_binding_0) => { + crate::Type::Macro(_binding_0) => { v.visit_type_macro_mut(_binding_0); } - Type::Never(_binding_0) => { + crate::Type::Never(_binding_0) => { v.visit_type_never_mut(_binding_0); } - Type::Paren(_binding_0) => { + crate::Type::Paren(_binding_0) => { v.visit_type_paren_mut(_binding_0); } - Type::Path(_binding_0) => { + crate::Type::Path(_binding_0) => { v.visit_type_path_mut(_binding_0); } - Type::Ptr(_binding_0) => { + crate::Type::Ptr(_binding_0) => { v.visit_type_ptr_mut(_binding_0); } - Type::Reference(_binding_0) => { + crate::Type::Reference(_binding_0) => { v.visit_type_reference_mut(_binding_0); } - Type::Slice(_binding_0) => { + crate::Type::Slice(_binding_0) => { v.visit_type_slice_mut(_binding_0); } - Type::TraitObject(_binding_0) => { + crate::Type::TraitObject(_binding_0) => { v.visit_type_trait_object_mut(_binding_0); } - Type::Tuple(_binding_0) => { + crate::Type::Tuple(_binding_0) => { v.visit_type_tuple_mut(_binding_0); } - Type::Verbatim(_binding_0) => { + crate::Type::Verbatim(_binding_0) => { skip!(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_array_mut<V>(v: &mut V, node: &mut TypeArray) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_array_mut<V>(v: &mut V, node: &mut crate::TypeArray) where V: VisitMut + ?Sized, { @@ -3171,7 +3484,8 @@ where v.visit_expr_mut(&mut node.len); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_bare_fn_mut<V>(v: &mut V, node: &mut TypeBareFn) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_bare_fn_mut<V>(v: &mut V, node: &mut crate::TypeBareFn) where V: VisitMut + ?Sized, { @@ -3194,7 +3508,8 @@ where v.visit_return_type_mut(&mut node.output); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_group_mut<V>(v: &mut V, node: &mut TypeGroup) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_group_mut<V>(v: &mut V, node: &mut crate::TypeGroup) where V: VisitMut + ?Sized, { @@ -3202,7 +3517,8 @@ where v.visit_type_mut(&mut *node.elem); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_impl_trait_mut<V>(v: &mut V, node: &mut TypeImplTrait) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_impl_trait_mut<V>(v: &mut V, node: &mut crate::TypeImplTrait) where V: VisitMut + ?Sized, { @@ -3213,28 +3529,32 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_infer_mut<V>(v: &mut V, node: &mut TypeInfer) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_infer_mut<V>(v: &mut V, node: &mut crate::TypeInfer) where V: VisitMut + ?Sized, { skip!(node.underscore_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_macro_mut<V>(v: &mut V, node: &mut TypeMacro) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_macro_mut<V>(v: &mut V, node: &mut crate::TypeMacro) where V: VisitMut + ?Sized, { v.visit_macro_mut(&mut node.mac); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_never_mut<V>(v: &mut V, node: &mut TypeNever) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_never_mut<V>(v: &mut V, node: &mut crate::TypeNever) where V: VisitMut + ?Sized, { skip!(node.bang_token); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_param_mut<V>(v: &mut V, node: &mut TypeParam) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_param_mut<V>(v: &mut V, node: &mut crate::TypeParam) where V: VisitMut + ?Sized, { @@ -3253,24 +3573,26 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_param_bound_mut<V>(v: &mut V, node: &mut TypeParamBound) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_param_bound_mut<V>(v: &mut V, node: &mut crate::TypeParamBound) where V: VisitMut + ?Sized, { match node { - TypeParamBound::Trait(_binding_0) => { + crate::TypeParamBound::Trait(_binding_0) => { v.visit_trait_bound_mut(_binding_0); } - TypeParamBound::Lifetime(_binding_0) => { + crate::TypeParamBound::Lifetime(_binding_0) => { v.visit_lifetime_mut(_binding_0); } - TypeParamBound::Verbatim(_binding_0) => { + crate::TypeParamBound::Verbatim(_binding_0) => { skip!(_binding_0); } } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_paren_mut<V>(v: &mut V, node: &mut TypeParen) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_paren_mut<V>(v: &mut V, node: &mut crate::TypeParen) where V: VisitMut + ?Sized, { @@ -3278,7 +3600,8 @@ where v.visit_type_mut(&mut *node.elem); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_path_mut<V>(v: &mut V, node: &mut TypePath) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_path_mut<V>(v: &mut V, node: &mut crate::TypePath) where V: VisitMut + ?Sized, { @@ -3288,7 +3611,8 @@ where v.visit_path_mut(&mut node.path); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_ptr_mut<V>(v: &mut V, node: &mut TypePtr) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_ptr_mut<V>(v: &mut V, node: &mut crate::TypePtr) where V: VisitMut + ?Sized, { @@ -3298,7 +3622,8 @@ where v.visit_type_mut(&mut *node.elem); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_reference_mut<V>(v: &mut V, node: &mut TypeReference) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_reference_mut<V>(v: &mut V, node: &mut crate::TypeReference) where V: VisitMut + ?Sized, { @@ -3310,7 +3635,8 @@ where v.visit_type_mut(&mut *node.elem); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_slice_mut<V>(v: &mut V, node: &mut TypeSlice) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_slice_mut<V>(v: &mut V, node: &mut crate::TypeSlice) where V: VisitMut + ?Sized, { @@ -3318,7 +3644,8 @@ where v.visit_type_mut(&mut *node.elem); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_trait_object_mut<V>(v: &mut V, node: &mut TypeTraitObject) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_trait_object_mut<V>(v: &mut V, node: &mut crate::TypeTraitObject) where V: VisitMut + ?Sized, { @@ -3329,7 +3656,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_type_tuple_mut<V>(v: &mut V, node: &mut TypeTuple) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_type_tuple_mut<V>(v: &mut V, node: &mut crate::TypeTuple) where V: VisitMut + ?Sized, { @@ -3340,31 +3668,34 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_un_op_mut<V>(v: &mut V, node: &mut UnOp) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_un_op_mut<V>(v: &mut V, node: &mut crate::UnOp) where V: VisitMut + ?Sized, { match node { - UnOp::Deref(_binding_0) => { + crate::UnOp::Deref(_binding_0) => { skip!(_binding_0); } - UnOp::Not(_binding_0) => { + crate::UnOp::Not(_binding_0) => { skip!(_binding_0); } - UnOp::Neg(_binding_0) => { + crate::UnOp::Neg(_binding_0) => { skip!(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_use_glob_mut<V>(v: &mut V, node: &mut UseGlob) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_glob_mut<V>(v: &mut V, node: &mut crate::UseGlob) where V: VisitMut + ?Sized, { skip!(node.star_token); } #[cfg(feature = "full")] -pub fn visit_use_group_mut<V>(v: &mut V, node: &mut UseGroup) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_group_mut<V>(v: &mut V, node: &mut crate::UseGroup) where V: VisitMut + ?Sized, { @@ -3375,14 +3706,16 @@ where } } #[cfg(feature = "full")] -pub fn visit_use_name_mut<V>(v: &mut V, node: &mut UseName) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_name_mut<V>(v: &mut V, node: &mut crate::UseName) where V: VisitMut + ?Sized, { v.visit_ident_mut(&mut node.ident); } #[cfg(feature = "full")] -pub fn visit_use_path_mut<V>(v: &mut V, node: &mut UsePath) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_path_mut<V>(v: &mut V, node: &mut crate::UsePath) where V: VisitMut + ?Sized, { @@ -3391,7 +3724,8 @@ where v.visit_use_tree_mut(&mut *node.tree); } #[cfg(feature = "full")] -pub fn visit_use_rename_mut<V>(v: &mut V, node: &mut UseRename) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_rename_mut<V>(v: &mut V, node: &mut crate::UseRename) where V: VisitMut + ?Sized, { @@ -3400,30 +3734,32 @@ where v.visit_ident_mut(&mut node.rename); } #[cfg(feature = "full")] -pub fn visit_use_tree_mut<V>(v: &mut V, node: &mut UseTree) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_use_tree_mut<V>(v: &mut V, node: &mut crate::UseTree) where V: VisitMut + ?Sized, { match node { - UseTree::Path(_binding_0) => { + crate::UseTree::Path(_binding_0) => { v.visit_use_path_mut(_binding_0); } - UseTree::Name(_binding_0) => { + crate::UseTree::Name(_binding_0) => { v.visit_use_name_mut(_binding_0); } - UseTree::Rename(_binding_0) => { + crate::UseTree::Rename(_binding_0) => { v.visit_use_rename_mut(_binding_0); } - UseTree::Glob(_binding_0) => { + crate::UseTree::Glob(_binding_0) => { v.visit_use_glob_mut(_binding_0); } - UseTree::Group(_binding_0) => { + crate::UseTree::Group(_binding_0) => { v.visit_use_group_mut(_binding_0); } } } #[cfg(feature = "full")] -pub fn visit_variadic_mut<V>(v: &mut V, node: &mut Variadic) +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub fn visit_variadic_mut<V>(v: &mut V, node: &mut crate::Variadic) where V: VisitMut + ?Sized, { @@ -3438,7 +3774,8 @@ where skip!(node.comma); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_variant_mut<V>(v: &mut V, node: &mut Variant) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_variant_mut<V>(v: &mut V, node: &mut crate::Variant) where V: VisitMut + ?Sized, { @@ -3453,7 +3790,8 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_vis_restricted_mut<V>(v: &mut V, node: &mut VisRestricted) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_vis_restricted_mut<V>(v: &mut V, node: &mut crate::VisRestricted) where V: VisitMut + ?Sized, { @@ -3463,22 +3801,24 @@ where v.visit_path_mut(&mut *node.path); } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_visibility_mut<V>(v: &mut V, node: &mut Visibility) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_visibility_mut<V>(v: &mut V, node: &mut crate::Visibility) where V: VisitMut + ?Sized, { match node { - Visibility::Public(_binding_0) => { + crate::Visibility::Public(_binding_0) => { skip!(_binding_0); } - Visibility::Restricted(_binding_0) => { + crate::Visibility::Restricted(_binding_0) => { v.visit_vis_restricted_mut(_binding_0); } - Visibility::Inherited => {} + crate::Visibility::Inherited => {} } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_where_clause_mut<V>(v: &mut V, node: &mut WhereClause) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_where_clause_mut<V>(v: &mut V, node: &mut crate::WhereClause) where V: VisitMut + ?Sized, { @@ -3489,15 +3829,16 @@ where } } #[cfg(any(feature = "derive", feature = "full"))] -pub fn visit_where_predicate_mut<V>(v: &mut V, node: &mut WherePredicate) +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "derive", feature = "full"))))] +pub fn visit_where_predicate_mut<V>(v: &mut V, node: &mut crate::WherePredicate) where V: VisitMut + ?Sized, { match node { - WherePredicate::Lifetime(_binding_0) => { + crate::WherePredicate::Lifetime(_binding_0) => { v.visit_predicate_lifetime_mut(_binding_0); } - WherePredicate::Type(_binding_0) => { + crate::WherePredicate::Type(_binding_0) => { v.visit_predicate_type_mut(_binding_0); } } diff --git a/vendor/syn/src/generics.rs b/vendor/syn/src/generics.rs index 44a10da..a3cd5e6 100644 --- a/vendor/syn/src/generics.rs +++ b/vendor/syn/src/generics.rs @@ -1,5 +1,11 @@ -use super::*; +use crate::attr::Attribute; +use crate::expr::Expr; +use crate::ident::Ident; +use crate::lifetime::Lifetime; +use crate::path::Path; use crate::punctuated::{Iter, IterMut, Punctuated}; +use crate::token; +use crate::ty::Type; use proc_macro2::TokenStream; #[cfg(all(feature = "printing", feature = "extra-traits"))] use std::fmt::{self, Debug}; @@ -33,7 +39,7 @@ ast_enum_of_structs! { /// /// This type is a [syntax tree enum]. /// - /// [syntax tree enum]: Expr#syntax-tree-enums + /// [syntax tree enum]: crate::expr::Expr#syntax-tree-enums #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] pub enum GenericParam { /// A lifetime parameter: `'a: 'b + 'c + 'd`. @@ -493,7 +499,7 @@ ast_enum_of_structs! { /// /// This type is a [syntax tree enum]. /// - /// [syntax tree enum]: Expr#syntax-tree-enums + /// [syntax tree enum]: crate::expr::Expr#syntax-tree-enums #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] #[non_exhaustive] pub enum WherePredicate { @@ -531,9 +537,22 @@ ast_struct! { #[cfg(feature = "parsing")] pub(crate) mod parsing { - use super::*; - use crate::ext::IdentExt; - use crate::parse::{Parse, ParseStream, Result}; + use crate::attr::Attribute; + use crate::error::Result; + use crate::ext::IdentExt as _; + use crate::generics::{ + BoundLifetimes, ConstParam, GenericParam, Generics, LifetimeParam, PredicateLifetime, + PredicateType, TraitBound, TraitBoundModifier, TypeParam, TypeParamBound, WhereClause, + WherePredicate, + }; + use crate::ident::Ident; + use crate::lifetime::Lifetime; + use crate::parse::{Parse, ParseStream}; + use crate::path::{self, ParenthesizedGenericArguments, Path, PathArguments}; + use crate::punctuated::Punctuated; + use crate::token; + use crate::ty::Type; + use crate::verbatim; #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for Generics { @@ -771,7 +790,7 @@ pub(crate) mod parsing { bound.paren_token = paren_token; if is_tilde_const { - Ok(TypeParamBound::Verbatim(verbatim::between(begin, input))) + Ok(TypeParamBound::Verbatim(verbatim::between(&begin, input))) } else { Ok(TypeParamBound::Trait(bound)) } @@ -971,8 +990,12 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] mod printing { - use super::*; use crate::attr::FilterAttrs; + use crate::generics::{ + BoundLifetimes, ConstParam, GenericParam, Generics, ImplGenerics, LifetimeParam, + PredicateLifetime, PredicateType, TraitBound, TraitBoundModifier, Turbofish, TypeGenerics, + TypeParam, WhereClause, + }; use crate::print::TokensOrDefault; use proc_macro2::TokenStream; use quote::{ToTokens, TokenStreamExt}; diff --git a/vendor/syn/src/group.rs b/vendor/syn/src/group.rs index cccbc46..2730233 100644 --- a/vendor/syn/src/group.rs +++ b/vendor/syn/src/group.rs @@ -7,21 +7,27 @@ use proc_macro2::Delimiter; // Not public API. #[doc(hidden)] pub struct Parens<'a> { + #[doc(hidden)] pub token: token::Paren, + #[doc(hidden)] pub content: ParseBuffer<'a>, } // Not public API. #[doc(hidden)] pub struct Braces<'a> { + #[doc(hidden)] pub token: token::Brace, + #[doc(hidden)] pub content: ParseBuffer<'a>, } // Not public API. #[doc(hidden)] pub struct Brackets<'a> { + #[doc(hidden)] pub token: token::Bracket, + #[doc(hidden)] pub content: ParseBuffer<'a>, } @@ -29,7 +35,9 @@ pub struct Brackets<'a> { #[cfg(any(feature = "full", feature = "derive"))] #[doc(hidden)] pub struct Group<'a> { + #[doc(hidden)] pub token: token::Group, + #[doc(hidden)] pub content: ParseBuffer<'a>, } diff --git a/vendor/syn/src/ident.rs b/vendor/syn/src/ident.rs index bd6f3f9..3e57324 100644 --- a/vendor/syn/src/ident.rs +++ b/vendor/syn/src/ident.rs @@ -3,12 +3,13 @@ use crate::lookahead; pub use proc_macro2::Ident; -#[cfg(not(doc))] // rustdoc bug: https://github.com/rust-lang/rust/issues/105735 #[cfg(feature = "parsing")] -#[doc(hidden)] -#[allow(non_snake_case)] -pub fn Ident(marker: lookahead::TokenMarker) -> Ident { - match marker {} +pub_if_not_doc! { + #[doc(hidden)] + #[allow(non_snake_case)] + pub fn Ident(marker: lookahead::TokenMarker) -> Ident { + match marker {} + } } macro_rules! ident_from_token { @@ -50,7 +51,8 @@ pub(crate) fn xid_ok(symbol: &str) -> bool { #[cfg(feature = "parsing")] mod parsing { use crate::buffer::Cursor; - use crate::parse::{Parse, ParseStream, Result}; + use crate::error::Result; + use crate::parse::{Parse, ParseStream}; use crate::token::Token; use proc_macro2::Ident; diff --git a/vendor/syn/src/item.rs b/vendor/syn/src/item.rs index 46ccd73..95a1436 100644 --- a/vendor/syn/src/item.rs +++ b/vendor/syn/src/item.rs @@ -1,8 +1,19 @@ -use super::*; +use crate::attr::Attribute; +use crate::data::{Fields, FieldsNamed, Variant}; use crate::derive::{Data, DataEnum, DataStruct, DataUnion, DeriveInput}; +use crate::expr::Expr; +use crate::generics::{Generics, TypeParamBound}; +use crate::ident::Ident; +use crate::lifetime::Lifetime; +use crate::mac::Macro; +use crate::pat::{Pat, PatType}; +use crate::path::Path; use crate::punctuated::Punctuated; +use crate::restriction::Visibility; +use crate::stmt::Block; +use crate::token; +use crate::ty::{Abi, ReturnType, Type}; use proc_macro2::TokenStream; - #[cfg(feature = "parsing")] use std::mem; @@ -13,7 +24,7 @@ ast_enum_of_structs! { /// /// This type is a [syntax tree enum]. /// - /// [syntax tree enum]: Expr#syntax-tree-enums + /// [syntax tree enum]: crate::expr::Expr#syntax-tree-enums #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] #[non_exhaustive] pub enum Item { @@ -70,12 +81,13 @@ ast_enum_of_structs! { // For testing exhaustiveness in downstream code, use the following idiom: // // match item { + // #![cfg_attr(test, deny(non_exhaustive_omitted_patterns))] + // // Item::Const(item) => {...} // Item::Enum(item) => {...} // ... // Item::Verbatim(item) => {...} // - // #[cfg_attr(test, deny(non_exhaustive_omitted_patterns))] // _ => { /* some sane fallback */ } // } // @@ -416,7 +428,7 @@ ast_enum_of_structs! { /// /// This type is a [syntax tree enum]. /// - /// [syntax tree enum]: Expr#syntax-tree-enums + /// [syntax tree enum]: crate::expr::Expr#syntax-tree-enums #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] pub enum UseTree { /// A path prefix of imports in a `use` item: `std::...`. @@ -488,7 +500,7 @@ ast_enum_of_structs! { /// /// This type is a [syntax tree enum]. /// - /// [syntax tree enum]: Expr#syntax-tree-enums + /// [syntax tree enum]: crate::expr::Expr#syntax-tree-enums #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] #[non_exhaustive] pub enum ForeignItem { @@ -510,12 +522,13 @@ ast_enum_of_structs! { // For testing exhaustiveness in downstream code, use the following idiom: // // match item { + // #![cfg_attr(test, deny(non_exhaustive_omitted_patterns))] + // // ForeignItem::Fn(item) => {...} // ForeignItem::Static(item) => {...} // ... // ForeignItem::Verbatim(item) => {...} // - // #[cfg_attr(test, deny(non_exhaustive_omitted_patterns))] // _ => { /* some sane fallback */ } // } // @@ -582,7 +595,7 @@ ast_enum_of_structs! { /// /// This type is a [syntax tree enum]. /// - /// [syntax tree enum]: Expr#syntax-tree-enums + /// [syntax tree enum]: crate::expr::Expr#syntax-tree-enums #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] #[non_exhaustive] pub enum TraitItem { @@ -604,12 +617,13 @@ ast_enum_of_structs! { // For testing exhaustiveness in downstream code, use the following idiom: // // match item { + // #![cfg_attr(test, deny(non_exhaustive_omitted_patterns))] + // // TraitItem::Const(item) => {...} // TraitItem::Fn(item) => {...} // ... // TraitItem::Verbatim(item) => {...} // - // #[cfg_attr(test, deny(non_exhaustive_omitted_patterns))] // _ => { /* some sane fallback */ } // } // @@ -678,7 +692,7 @@ ast_enum_of_structs! { /// /// This type is a [syntax tree enum]. /// - /// [syntax tree enum]: Expr#syntax-tree-enums + /// [syntax tree enum]: crate::expr::Expr#syntax-tree-enums #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] #[non_exhaustive] pub enum ImplItem { @@ -700,12 +714,13 @@ ast_enum_of_structs! { // For testing exhaustiveness in downstream code, use the following idiom: // // match item { + // #![cfg_attr(test, deny(non_exhaustive_omitted_patterns))] + // // ImplItem::Const(item) => {...} // ImplItem::Fn(item) => {...} // ... // ImplItem::Verbatim(item) => {...} // - // #[cfg_attr(test, deny(non_exhaustive_omitted_patterns))] // _ => { /* some sane fallback */ } // } // @@ -889,105 +904,110 @@ ast_enum! { #[cfg(feature = "parsing")] pub(crate) mod parsing { - use super::*; - use crate::ext::IdentExt; - use crate::parse::discouraged::Speculative; - use crate::parse::{Parse, ParseBuffer, ParseStream, Result}; + use crate::attr::{self, Attribute}; + use crate::derive; + use crate::error::{Error, Result}; + use crate::expr::Expr; + use crate::ext::IdentExt as _; + use crate::generics::{Generics, TypeParamBound}; + use crate::ident::Ident; + use crate::item::{ + FnArg, ForeignItem, ForeignItemFn, ForeignItemMacro, ForeignItemStatic, ForeignItemType, + ImplItem, ImplItemConst, ImplItemFn, ImplItemMacro, ImplItemType, Item, ItemConst, + ItemEnum, ItemExternCrate, ItemFn, ItemForeignMod, ItemImpl, ItemMacro, ItemMod, + ItemStatic, ItemStruct, ItemTrait, ItemTraitAlias, ItemType, ItemUnion, ItemUse, Receiver, + Signature, StaticMutability, TraitItem, TraitItemConst, TraitItemFn, TraitItemMacro, + TraitItemType, UseGlob, UseGroup, UseName, UsePath, UseRename, UseTree, Variadic, + }; + use crate::lifetime::Lifetime; + use crate::lit::LitStr; + use crate::mac::{self, Macro, MacroDelimiter}; + use crate::parse::discouraged::Speculative as _; + use crate::parse::{Parse, ParseBuffer, ParseStream}; + use crate::pat::{Pat, PatType, PatWild}; + use crate::path::Path; + use crate::punctuated::Punctuated; + use crate::restriction::Visibility; + use crate::stmt::Block; + use crate::token; + use crate::ty::{Abi, ReturnType, Type, TypePath, TypeReference}; + use crate::verbatim; + use proc_macro2::TokenStream; #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for Item { fn parse(input: ParseStream) -> Result<Self> { let begin = input.fork(); - let mut attrs = input.call(Attribute::parse_outer)?; - let ahead = input.fork(); - let vis: Visibility = ahead.parse()?; + let attrs = input.call(Attribute::parse_outer)?; + parse_rest_of_item(begin, attrs, input) + } + } + + pub(crate) fn parse_rest_of_item( + begin: ParseBuffer, + mut attrs: Vec<Attribute>, + input: ParseStream, + ) -> Result<Item> { + let ahead = input.fork(); + let vis: Visibility = ahead.parse()?; + let lookahead = ahead.lookahead1(); + let mut item = if lookahead.peek(Token![fn]) || peek_signature(&ahead) { + let vis: Visibility = input.parse()?; + let sig: Signature = input.parse()?; + if input.peek(Token![;]) { + input.parse::<Token![;]>()?; + Ok(Item::Verbatim(verbatim::between(&begin, input))) + } else { + parse_rest_of_fn(input, Vec::new(), vis, sig).map(Item::Fn) + } + } else if lookahead.peek(Token![extern]) { + ahead.parse::<Token![extern]>()?; let lookahead = ahead.lookahead1(); - let mut item = if lookahead.peek(Token![fn]) || peek_signature(&ahead) { - let vis: Visibility = input.parse()?; - let sig: Signature = input.parse()?; - if input.peek(Token![;]) { - input.parse::<Token![;]>()?; - Ok(Item::Verbatim(verbatim::between(begin, input))) - } else { - parse_rest_of_fn(input, Vec::new(), vis, sig).map(Item::Fn) - } - } else if lookahead.peek(Token![extern]) { - ahead.parse::<Token![extern]>()?; + if lookahead.peek(Token![crate]) { + input.parse().map(Item::ExternCrate) + } else if lookahead.peek(token::Brace) { + input.parse().map(Item::ForeignMod) + } else if lookahead.peek(LitStr) { + ahead.parse::<LitStr>()?; let lookahead = ahead.lookahead1(); - if lookahead.peek(Token![crate]) { - input.parse().map(Item::ExternCrate) - } else if lookahead.peek(token::Brace) { + if lookahead.peek(token::Brace) { input.parse().map(Item::ForeignMod) - } else if lookahead.peek(LitStr) { - ahead.parse::<LitStr>()?; - let lookahead = ahead.lookahead1(); - if lookahead.peek(token::Brace) { - input.parse().map(Item::ForeignMod) - } else { - Err(lookahead.error()) - } } else { Err(lookahead.error()) } - } else if lookahead.peek(Token![use]) { - let allow_crate_root_in_path = true; - match parse_item_use(input, allow_crate_root_in_path)? { - Some(item_use) => Ok(Item::Use(item_use)), - None => Ok(Item::Verbatim(verbatim::between(begin, input))), - } - } else if lookahead.peek(Token![static]) { - let vis = input.parse()?; - let static_token = input.parse()?; - let mutability = input.parse()?; - let ident = input.parse()?; - if input.peek(Token![=]) { - input.parse::<Token![=]>()?; - input.parse::<Expr>()?; - input.parse::<Token![;]>()?; - Ok(Item::Verbatim(verbatim::between(begin, input))) - } else { - let colon_token = input.parse()?; - let ty = input.parse()?; - if input.peek(Token![;]) { - input.parse::<Token![;]>()?; - Ok(Item::Verbatim(verbatim::between(begin, input))) - } else { - Ok(Item::Static(ItemStatic { - attrs: Vec::new(), - vis, - static_token, - mutability, - ident, - colon_token, - ty, - eq_token: input.parse()?, - expr: input.parse()?, - semi_token: input.parse()?, - })) - } - } - } else if lookahead.peek(Token![const]) { - let vis = input.parse()?; - let const_token: Token![const] = input.parse()?; - let lookahead = input.lookahead1(); - let ident = if lookahead.peek(Ident) || lookahead.peek(Token![_]) { - input.call(Ident::parse_any)? - } else { - return Err(lookahead.error()); - }; + } else { + Err(lookahead.error()) + } + } else if lookahead.peek(Token![use]) { + let allow_crate_root_in_path = true; + match parse_item_use(input, allow_crate_root_in_path)? { + Some(item_use) => Ok(Item::Use(item_use)), + None => Ok(Item::Verbatim(verbatim::between(&begin, input))), + } + } else if lookahead.peek(Token![static]) { + let vis = input.parse()?; + let static_token = input.parse()?; + let mutability = input.parse()?; + let ident = input.parse()?; + if input.peek(Token![=]) { + input.parse::<Token![=]>()?; + input.parse::<Expr>()?; + input.parse::<Token![;]>()?; + Ok(Item::Verbatim(verbatim::between(&begin, input))) + } else { let colon_token = input.parse()?; let ty = input.parse()?; if input.peek(Token![;]) { input.parse::<Token![;]>()?; - Ok(Item::Verbatim(verbatim::between(begin, input))) + Ok(Item::Verbatim(verbatim::between(&begin, input))) } else { - Ok(Item::Const(ItemConst { + Ok(Item::Static(ItemStatic { attrs: Vec::new(), vis, - const_token, + static_token, + mutability, ident, - generics: Generics::default(), colon_token, ty, eq_token: input.parse()?, @@ -995,69 +1015,108 @@ pub(crate) mod parsing { semi_token: input.parse()?, })) } - } else if lookahead.peek(Token![unsafe]) { - ahead.parse::<Token![unsafe]>()?; - let lookahead = ahead.lookahead1(); - if lookahead.peek(Token![trait]) - || lookahead.peek(Token![auto]) && ahead.peek2(Token![trait]) + } + } else if lookahead.peek(Token![const]) { + let vis = input.parse()?; + let const_token: Token![const] = input.parse()?; + let lookahead = input.lookahead1(); + let ident = if lookahead.peek(Ident) || lookahead.peek(Token![_]) { + input.call(Ident::parse_any)? + } else { + return Err(lookahead.error()); + }; + let mut generics: Generics = input.parse()?; + let colon_token = input.parse()?; + let ty = input.parse()?; + let value = if let Some(eq_token) = input.parse::<Option<Token![=]>>()? { + let expr: Expr = input.parse()?; + Some((eq_token, expr)) + } else { + None + }; + generics.where_clause = input.parse()?; + let semi_token: Token![;] = input.parse()?; + match value { + Some((eq_token, expr)) + if generics.lt_token.is_none() && generics.where_clause.is_none() => { - input.parse().map(Item::Trait) - } else if lookahead.peek(Token![impl]) { - let allow_verbatim_impl = true; - if let Some(item) = parse_impl(input, allow_verbatim_impl)? { - Ok(Item::Impl(item)) - } else { - Ok(Item::Verbatim(verbatim::between(begin, input))) - } - } else if lookahead.peek(Token![extern]) { - input.parse().map(Item::ForeignMod) - } else if lookahead.peek(Token![mod]) { - input.parse().map(Item::Mod) - } else { - Err(lookahead.error()) + Ok(Item::Const(ItemConst { + attrs: Vec::new(), + vis, + const_token, + ident, + generics, + colon_token, + ty, + eq_token, + expr: Box::new(expr), + semi_token, + })) } - } else if lookahead.peek(Token![mod]) { - input.parse().map(Item::Mod) - } else if lookahead.peek(Token![type]) { - parse_item_type(begin, input) - } else if lookahead.peek(Token![struct]) { - input.parse().map(Item::Struct) - } else if lookahead.peek(Token![enum]) { - input.parse().map(Item::Enum) - } else if lookahead.peek(Token![union]) && ahead.peek2(Ident) { - input.parse().map(Item::Union) - } else if lookahead.peek(Token![trait]) { - input.call(parse_trait_or_trait_alias) - } else if lookahead.peek(Token![auto]) && ahead.peek2(Token![trait]) { - input.parse().map(Item::Trait) - } else if lookahead.peek(Token![impl]) - || lookahead.peek(Token![default]) && !ahead.peek2(Token![!]) + _ => Ok(Item::Verbatim(verbatim::between(&begin, input))), + } + } else if lookahead.peek(Token![unsafe]) { + ahead.parse::<Token![unsafe]>()?; + let lookahead = ahead.lookahead1(); + if lookahead.peek(Token![trait]) + || lookahead.peek(Token![auto]) && ahead.peek2(Token![trait]) { + input.parse().map(Item::Trait) + } else if lookahead.peek(Token![impl]) { let allow_verbatim_impl = true; if let Some(item) = parse_impl(input, allow_verbatim_impl)? { Ok(Item::Impl(item)) } else { - Ok(Item::Verbatim(verbatim::between(begin, input))) + Ok(Item::Verbatim(verbatim::between(&begin, input))) } - } else if lookahead.peek(Token![macro]) { - input.advance_to(&ahead); - parse_macro2(begin, vis, input) - } else if vis.is_inherited() - && (lookahead.peek(Ident) - || lookahead.peek(Token![self]) - || lookahead.peek(Token![super]) - || lookahead.peek(Token![crate]) - || lookahead.peek(Token![::])) - { - input.parse().map(Item::Macro) + } else if lookahead.peek(Token![extern]) { + input.parse().map(Item::ForeignMod) + } else if lookahead.peek(Token![mod]) { + input.parse().map(Item::Mod) } else { Err(lookahead.error()) - }?; + } + } else if lookahead.peek(Token![mod]) { + input.parse().map(Item::Mod) + } else if lookahead.peek(Token![type]) { + parse_item_type(begin, input) + } else if lookahead.peek(Token![struct]) { + input.parse().map(Item::Struct) + } else if lookahead.peek(Token![enum]) { + input.parse().map(Item::Enum) + } else if lookahead.peek(Token![union]) && ahead.peek2(Ident) { + input.parse().map(Item::Union) + } else if lookahead.peek(Token![trait]) { + input.call(parse_trait_or_trait_alias) + } else if lookahead.peek(Token![auto]) && ahead.peek2(Token![trait]) { + input.parse().map(Item::Trait) + } else if lookahead.peek(Token![impl]) + || lookahead.peek(Token![default]) && !ahead.peek2(Token![!]) + { + let allow_verbatim_impl = true; + if let Some(item) = parse_impl(input, allow_verbatim_impl)? { + Ok(Item::Impl(item)) + } else { + Ok(Item::Verbatim(verbatim::between(&begin, input))) + } + } else if lookahead.peek(Token![macro]) { + input.advance_to(&ahead); + parse_macro2(begin, vis, input) + } else if vis.is_inherited() + && (lookahead.peek(Ident) + || lookahead.peek(Token![self]) + || lookahead.peek(Token![super]) + || lookahead.peek(Token![crate]) + || lookahead.peek(Token![::])) + { + input.parse().map(Item::Macro) + } else { + Err(lookahead.error()) + }?; - attrs.extend(item.replace_attrs(Vec::new())); - item.replace_attrs(attrs); - Ok(item) - } + attrs.extend(item.replace_attrs(Vec::new())); + item.replace_attrs(attrs); + Ok(item) } struct FlexibleItemType { @@ -1219,7 +1278,7 @@ pub(crate) mod parsing { return Err(lookahead.error()); } - Ok(Item::Verbatim(verbatim::between(begin, input))) + Ok(Item::Verbatim(verbatim::between(&begin, input))) } #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] @@ -1340,22 +1399,28 @@ pub(crate) mod parsing { let content; let brace_token = braced!(content in input); let mut items = Punctuated::new(); - let mut has_crate_root_in_path = false; + let mut has_any_crate_root_in_path = false; loop { if content.is_empty() { break; } - has_crate_root_in_path |= + let this_tree_starts_with_crate_root = allow_crate_root_in_path && content.parse::<Option<Token![::]>>()?.is_some(); - let tree: UseTree = content.parse()?; - items.push_value(tree); + has_any_crate_root_in_path |= this_tree_starts_with_crate_root; + match parse_use_tree( + &content, + allow_crate_root_in_path && !this_tree_starts_with_crate_root, + )? { + Some(tree) => items.push_value(tree), + None => has_any_crate_root_in_path = true, + } if content.is_empty() { break; } let comma: Token![,] = content.parse()?; items.push_punct(comma); } - if has_crate_root_in_path { + if has_any_crate_root_in_path { Ok(None) } else { Ok(Some(UseTree::Group(UseGroup { brace_token, items }))) @@ -1386,24 +1451,34 @@ pub(crate) mod parsing { #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for ItemConst { fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + let const_token: Token![const] = input.parse()?; + + let lookahead = input.lookahead1(); + let ident = if lookahead.peek(Ident) || lookahead.peek(Token![_]) { + input.call(Ident::parse_any)? + } else { + return Err(lookahead.error()); + }; + + let colon_token: Token![:] = input.parse()?; + let ty: Type = input.parse()?; + let eq_token: Token![=] = input.parse()?; + let expr: Expr = input.parse()?; + let semi_token: Token![;] = input.parse()?; + Ok(ItemConst { - attrs: input.call(Attribute::parse_outer)?, - vis: input.parse()?, - const_token: input.parse()?, - ident: { - let lookahead = input.lookahead1(); - if lookahead.peek(Ident) || lookahead.peek(Token![_]) { - input.call(Ident::parse_any)? - } else { - return Err(lookahead.error()); - } - }, + attrs, + vis, + const_token, + ident, generics: Generics::default(), - colon_token: input.parse()?, - ty: input.parse()?, - eq_token: input.parse()?, - expr: input.parse()?, - semi_token: input.parse()?, + colon_token, + ty: Box::new(ty), + eq_token, + expr: Box::new(expr), + semi_token, }) } } @@ -1753,7 +1828,7 @@ pub(crate) mod parsing { content.call(Attribute::parse_inner)?; content.call(Block::parse_within)?; - Ok(ForeignItem::Verbatim(verbatim::between(begin, input))) + Ok(ForeignItem::Verbatim(verbatim::between(&begin, input))) } else { Ok(ForeignItem::Fn(ForeignItemFn { attrs: Vec::new(), @@ -1773,7 +1848,7 @@ pub(crate) mod parsing { input.parse::<Token![=]>()?; input.parse::<Expr>()?; input.parse::<Token![;]>()?; - Ok(ForeignItem::Verbatim(verbatim::between(begin, input))) + Ok(ForeignItem::Verbatim(verbatim::between(&begin, input))) } else { Ok(ForeignItem::Static(ForeignItemStatic { attrs: Vec::new(), @@ -1882,7 +1957,7 @@ pub(crate) mod parsing { )?; if colon_token.is_some() || ty.is_some() { - Ok(ForeignItem::Verbatim(verbatim::between(begin, input))) + Ok(ForeignItem::Verbatim(verbatim::between(&begin, input))) } else { Ok(ForeignItem::Type(ForeignItemType { attrs: Vec::new(), @@ -1952,7 +2027,7 @@ pub(crate) mod parsing { let (eq_token, ty) = match ty { Some(ty) if colon_token.is_none() => ty, - _ => return Ok(Item::Verbatim(verbatim::between(begin, input))), + _ => return Ok(Item::Verbatim(verbatim::between(&begin, input))), }; Ok(Item::Type(ItemType { @@ -2210,10 +2285,36 @@ pub(crate) mod parsing { let mut item = if lookahead.peek(Token![fn]) || peek_signature(&ahead) { input.parse().map(TraitItem::Fn) } else if lookahead.peek(Token![const]) { - ahead.parse::<Token![const]>()?; + let const_token: Token![const] = ahead.parse()?; let lookahead = ahead.lookahead1(); if lookahead.peek(Ident) || lookahead.peek(Token![_]) { - input.parse().map(TraitItem::Const) + input.advance_to(&ahead); + let ident = input.call(Ident::parse_any)?; + let mut generics: Generics = input.parse()?; + let colon_token: Token![:] = input.parse()?; + let ty: Type = input.parse()?; + let default = if let Some(eq_token) = input.parse::<Option<Token![=]>>()? { + let expr: Expr = input.parse()?; + Some((eq_token, expr)) + } else { + None + }; + generics.where_clause = input.parse()?; + let semi_token: Token![;] = input.parse()?; + if generics.lt_token.is_none() && generics.where_clause.is_none() { + Ok(TraitItem::Const(TraitItemConst { + attrs: Vec::new(), + const_token, + ident, + generics, + colon_token, + ty, + default, + semi_token, + })) + } else { + return Ok(TraitItem::Verbatim(verbatim::between(&begin, input))); + } } else if lookahead.peek(Token![async]) || lookahead.peek(Token![unsafe]) || lookahead.peek(Token![extern]) @@ -2240,7 +2341,7 @@ pub(crate) mod parsing { match (vis, defaultness) { (Visibility::Inherited, None) => {} - _ => return Ok(TraitItem::Verbatim(verbatim::between(begin, input))), + _ => return Ok(TraitItem::Verbatim(verbatim::between(&begin, input))), } let item_attrs = match &mut item { @@ -2259,30 +2360,36 @@ pub(crate) mod parsing { #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for TraitItemConst { fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let const_token: Token![const] = input.parse()?; + + let lookahead = input.lookahead1(); + let ident = if lookahead.peek(Ident) || lookahead.peek(Token![_]) { + input.call(Ident::parse_any)? + } else { + return Err(lookahead.error()); + }; + + let colon_token: Token![:] = input.parse()?; + let ty: Type = input.parse()?; + let default = if input.peek(Token![=]) { + let eq_token: Token![=] = input.parse()?; + let default: Expr = input.parse()?; + Some((eq_token, default)) + } else { + None + }; + let semi_token: Token![;] = input.parse()?; + Ok(TraitItemConst { - attrs: input.call(Attribute::parse_outer)?, - const_token: input.parse()?, - ident: { - let lookahead = input.lookahead1(); - if lookahead.peek(Ident) || lookahead.peek(Token![_]) { - input.call(Ident::parse_any)? - } else { - return Err(lookahead.error()); - } - }, + attrs, + const_token, + ident, generics: Generics::default(), - colon_token: input.parse()?, - ty: input.parse()?, - default: { - if input.peek(Token![=]) { - let eq_token: Token![=] = input.parse()?; - let default: Expr = input.parse()?; - Some((eq_token, default)) - } else { - None - } - }, - semi_token: input.parse()?, + colon_token, + ty, + default, + semi_token, }) } } @@ -2358,7 +2465,7 @@ pub(crate) mod parsing { )?; if vis.is_some() { - Ok(TraitItem::Verbatim(verbatim::between(begin, input))) + Ok(TraitItem::Verbatim(verbatim::between(&begin, input))) } else { Ok(TraitItem::Type(TraitItemType { attrs: Vec::new(), @@ -2471,7 +2578,7 @@ pub(crate) mod parsing { self_ty = if polarity.is_none() { first_ty } else { - Type::Verbatim(verbatim::between(begin, input)) + Type::Verbatim(verbatim::between(&begin, input)) }; } @@ -2525,7 +2632,7 @@ pub(crate) mod parsing { if let Some(item) = parse_impl_item_fn(input, allow_omitted_body)? { Ok(ImplItem::Fn(item)) } else { - Ok(ImplItem::Verbatim(verbatim::between(begin, input))) + Ok(ImplItem::Verbatim(verbatim::between(&begin, input))) } } else if lookahead.peek(Token![const]) { input.advance_to(&ahead); @@ -2536,26 +2643,37 @@ pub(crate) mod parsing { } else { return Err(lookahead.error()); }; + let mut generics: Generics = input.parse()?; let colon_token: Token![:] = input.parse()?; let ty: Type = input.parse()?; - if let Some(eq_token) = input.parse()? { - return Ok(ImplItem::Const(ImplItemConst { - attrs, - vis, - defaultness, - const_token, - ident, - generics: Generics::default(), - colon_token, - ty, - eq_token, - expr: input.parse()?, - semi_token: input.parse()?, - })); + let value = if let Some(eq_token) = input.parse::<Option<Token![=]>>()? { + let expr: Expr = input.parse()?; + Some((eq_token, expr)) } else { - input.parse::<Token![;]>()?; - return Ok(ImplItem::Verbatim(verbatim::between(begin, input))); - } + None + }; + generics.where_clause = input.parse()?; + let semi_token: Token![;] = input.parse()?; + return match value { + Some((eq_token, expr)) + if generics.lt_token.is_none() && generics.where_clause.is_none() => + { + Ok(ImplItem::Const(ImplItemConst { + attrs, + vis, + defaultness, + const_token, + ident, + generics, + colon_token, + ty, + eq_token, + expr, + semi_token, + })) + } + _ => Ok(ImplItem::Verbatim(verbatim::between(&begin, input))), + }; } else if lookahead.peek(Token![type]) { parse_impl_item_type(begin, input) } else if vis.is_inherited() @@ -2590,25 +2708,36 @@ pub(crate) mod parsing { #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for ImplItemConst { fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + let defaultness: Option<Token![default]> = input.parse()?; + let const_token: Token![const] = input.parse()?; + + let lookahead = input.lookahead1(); + let ident = if lookahead.peek(Ident) || lookahead.peek(Token![_]) { + input.call(Ident::parse_any)? + } else { + return Err(lookahead.error()); + }; + + let colon_token: Token![:] = input.parse()?; + let ty: Type = input.parse()?; + let eq_token: Token![=] = input.parse()?; + let expr: Expr = input.parse()?; + let semi_token: Token![;] = input.parse()?; + Ok(ImplItemConst { - attrs: input.call(Attribute::parse_outer)?, - vis: input.parse()?, - defaultness: input.parse()?, - const_token: input.parse()?, - ident: { - let lookahead = input.lookahead1(); - if lookahead.peek(Ident) || lookahead.peek(Token![_]) { - input.call(Ident::parse_any)? - } else { - return Err(lookahead.error()); - } - }, + attrs, + vis, + defaultness, + const_token, + ident, generics: Generics::default(), - colon_token: input.parse()?, - ty: input.parse()?, - eq_token: input.parse()?, - expr: input.parse()?, - semi_token: input.parse()?, + colon_token, + ty, + eq_token, + expr, + semi_token, }) } } @@ -2700,7 +2829,7 @@ pub(crate) mod parsing { let (eq_token, ty) = match ty { Some(ty) if colon_token.is_none() => ty, - _ => return Ok(ImplItem::Verbatim(verbatim::between(begin, input))), + _ => return Ok(ImplItem::Verbatim(verbatim::between(&begin, input))), }; Ok(ImplItem::Type(ImplItemType { @@ -2763,9 +2892,19 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] mod printing { - use super::*; use crate::attr::FilterAttrs; + use crate::data::Fields; + use crate::item::{ + ForeignItemFn, ForeignItemMacro, ForeignItemStatic, ForeignItemType, ImplItemConst, + ImplItemFn, ImplItemMacro, ImplItemType, ItemConst, ItemEnum, ItemExternCrate, ItemFn, + ItemForeignMod, ItemImpl, ItemMacro, ItemMod, ItemStatic, ItemStruct, ItemTrait, + ItemTraitAlias, ItemType, ItemUnion, ItemUse, Receiver, Signature, StaticMutability, + TraitItemConst, TraitItemFn, TraitItemMacro, TraitItemType, UseGlob, UseGroup, UseName, + UsePath, UseRename, Variadic, + }; + use crate::mac::MacroDelimiter; use crate::print::TokensOrDefault; + use crate::ty::Type; use proc_macro2::TokenStream; use quote::{ToTokens, TokenStreamExt}; diff --git a/vendor/syn/src/lib.rs b/vendor/syn/src/lib.rs index 583c405..609fdd5 100644 --- a/vendor/syn/src/lib.rs +++ b/vendor/syn/src/lib.rs @@ -249,8 +249,9 @@ //! dynamic library libproc_macro from rustc toolchain. // Syn types in rustdoc of other crates get linked to here. -#![doc(html_root_url = "https://docs.rs/syn/2.0.15")] +#![doc(html_root_url = "https://docs.rs/syn/2.0.51")] #![cfg_attr(doc_cfg, feature(doc_cfg))] +#![deny(unsafe_op_in_unsafe_fn)] #![allow(non_camel_case_types)] #![allow( clippy::bool_to_int_with_if, @@ -260,11 +261,13 @@ clippy::cast_ptr_alignment, clippy::default_trait_access, clippy::derivable_impls, + clippy::diverging_sub_expression, clippy::doc_markdown, clippy::expl_impl_clone_on_copy, clippy::explicit_auto_deref, clippy::if_not_else, clippy::inherent_to_string, + clippy::into_iter_without_iter, clippy::items_after_statements, clippy::large_enum_variant, clippy::let_underscore_untyped, // https://github.com/rust-lang/rust-clippy/issues/10410 @@ -289,6 +292,8 @@ clippy::too_many_arguments, clippy::too_many_lines, clippy::trivially_copy_pass_by_ref, + clippy::unconditional_recursion, // https://github.com/rust-lang/rust-clippy/issues/12133 + clippy::uninhabited_references, clippy::uninlined_format_args, clippy::unnecessary_box_returns, clippy::unnecessary_unwrap, @@ -296,10 +301,7 @@ clippy::wildcard_imports, )] -#[cfg(all( - not(all(target_arch = "wasm32", any(target_os = "unknown", target_os = "wasi"))), - feature = "proc-macro" -))] +#[cfg(feature = "proc-macro")] extern crate proc_macro; #[macro_use] @@ -315,6 +317,7 @@ pub mod token; #[cfg(any(feature = "full", feature = "derive"))] mod attr; #[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] pub use crate::attr::{AttrStyle, Attribute, Meta, MetaList, MetaNameValue}; mod bigint; @@ -330,11 +333,13 @@ mod custom_punctuation; #[cfg(any(feature = "full", feature = "derive"))] mod data; #[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] pub use crate::data::{Field, Fields, FieldsNamed, FieldsUnnamed, Variant}; #[cfg(any(feature = "full", feature = "derive"))] mod derive; #[cfg(feature = "derive")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "derive")))] pub use crate::derive::{Data, DataEnum, DataStruct, DataUnion, DeriveInput}; mod drops; @@ -345,14 +350,21 @@ pub use crate::error::{Error, Result}; #[cfg(any(feature = "full", feature = "derive"))] mod expr; #[cfg(feature = "full")] -pub use crate::expr::{Arm, FieldValue, Label, RangeLimits}; +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub use crate::expr::{Arm, Label, RangeLimits}; #[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] pub use crate::expr::{ - Expr, ExprArray, ExprAssign, ExprAsync, ExprAwait, ExprBinary, ExprBlock, ExprBreak, ExprCall, - ExprCast, ExprClosure, ExprConst, ExprContinue, ExprField, ExprForLoop, ExprGroup, ExprIf, - ExprIndex, ExprInfer, ExprLet, ExprLit, ExprLoop, ExprMacro, ExprMatch, ExprMethodCall, - ExprParen, ExprPath, ExprRange, ExprReference, ExprRepeat, ExprReturn, ExprStruct, ExprTry, - ExprTryBlock, ExprTuple, ExprUnary, ExprUnsafe, ExprWhile, ExprYield, Index, Member, + Expr, ExprBinary, ExprCall, ExprCast, ExprField, ExprIndex, ExprLit, ExprMacro, ExprMethodCall, + ExprParen, ExprPath, ExprReference, ExprStruct, ExprUnary, FieldValue, Index, Member, +}; +#[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] +pub use crate::expr::{ + ExprArray, ExprAssign, ExprAsync, ExprAwait, ExprBlock, ExprBreak, ExprClosure, ExprConst, + ExprContinue, ExprForLoop, ExprGroup, ExprIf, ExprInfer, ExprLet, ExprLoop, ExprMatch, + ExprRange, ExprRepeat, ExprReturn, ExprTry, ExprTryBlock, ExprTuple, ExprUnsafe, ExprWhile, + ExprYield, }; #[cfg(feature = "parsing")] @@ -362,25 +374,33 @@ pub mod ext; #[cfg(feature = "full")] mod file; #[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] pub use crate::file::File; #[cfg(any(feature = "full", feature = "derive"))] mod generics; #[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] pub use crate::generics::{ BoundLifetimes, ConstParam, GenericParam, Generics, LifetimeParam, PredicateLifetime, PredicateType, TraitBound, TraitBoundModifier, TypeParam, TypeParamBound, WhereClause, WherePredicate, }; #[cfg(all(any(feature = "full", feature = "derive"), feature = "printing"))] +#[cfg_attr( + doc_cfg, + doc(cfg(all(any(feature = "full", feature = "derive"), feature = "printing"))) +)] pub use crate::generics::{ImplGenerics, Turbofish, TypeGenerics}; mod ident; +#[doc(inline)] pub use crate::ident::Ident; #[cfg(feature = "full")] mod item; #[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] pub use crate::item::{ FnArg, ForeignItem, ForeignItemFn, ForeignItemMacro, ForeignItemStatic, ForeignItemType, ImplItem, ImplItemConst, ImplItemFn, ImplItemMacro, ImplItemType, ImplRestriction, Item, @@ -391,12 +411,14 @@ pub use crate::item::{ }; mod lifetime; +#[doc(inline)] pub use crate::lifetime::Lifetime; mod lit; -pub use crate::lit::{ - Lit, LitBool, LitByte, LitByteStr, LitChar, LitFloat, LitInt, LitStr, StrStyle, -}; +#[doc(hidden)] // https://github.com/dtolnay/syn/issues/1566 +pub use crate::lit::StrStyle; +#[doc(inline)] +pub use crate::lit::{Lit, LitBool, LitByte, LitByteStr, LitChar, LitFloat, LitInt, LitStr}; #[cfg(feature = "parsing")] mod lookahead; @@ -404,6 +426,7 @@ mod lookahead; #[cfg(any(feature = "full", feature = "derive"))] mod mac; #[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] pub use crate::mac::{Macro, MacroDelimiter}; #[cfg(all(feature = "parsing", any(feature = "full", feature = "derive")))] @@ -416,17 +439,14 @@ pub mod meta; #[cfg(any(feature = "full", feature = "derive"))] mod op; #[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] pub use crate::op::{BinOp, UnOp}; #[cfg(feature = "parsing")] #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] pub mod parse; -#[cfg(all( - not(all(target_arch = "wasm32", any(target_os = "unknown", target_os = "wasi"))), - feature = "parsing", - feature = "proc-macro" -))] +#[cfg(all(feature = "parsing", feature = "proc-macro"))] mod parse_macro_input; #[cfg(all(feature = "parsing", feature = "printing"))] @@ -435,19 +455,16 @@ mod parse_quote; #[cfg(feature = "full")] mod pat; #[cfg(feature = "full")] -pub use crate::expr::{ - ExprConst as PatConst, ExprLit as PatLit, ExprMacro as PatMacro, ExprPath as PatPath, - ExprRange as PatRange, -}; -#[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] pub use crate::pat::{ - FieldPat, Pat, PatIdent, PatOr, PatParen, PatReference, PatRest, PatSlice, PatStruct, PatTuple, - PatTupleStruct, PatType, PatWild, + FieldPat, Pat, PatConst, PatIdent, PatLit, PatMacro, PatOr, PatParen, PatPath, PatRange, + PatReference, PatRest, PatSlice, PatStruct, PatTuple, PatTupleStruct, PatType, PatWild, }; #[cfg(any(feature = "full", feature = "derive"))] mod path; #[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] pub use crate::path::{ AngleBracketedGenericArguments, AssocConst, AssocType, Constraint, GenericArgument, ParenthesizedGenericArguments, Path, PathArguments, PathSegment, QSelf, @@ -461,6 +478,7 @@ pub mod punctuated; #[cfg(any(feature = "full", feature = "derive"))] mod restriction; #[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] pub use crate::restriction::{FieldMutability, VisRestricted, Visibility}; mod sealed; @@ -474,6 +492,7 @@ pub mod spanned; #[cfg(feature = "full")] mod stmt; #[cfg(feature = "full")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] pub use crate::stmt::{Block, Local, LocalInit, Stmt, StmtMacro}; mod thread; @@ -484,6 +503,7 @@ mod tt; #[cfg(any(feature = "full", feature = "derive"))] mod ty; #[cfg(any(feature = "full", feature = "derive"))] +#[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] pub use crate::ty::{ Abi, BareFnArg, BareVariadic, ReturnType, Type, TypeArray, TypeBareFn, TypeGroup, TypeImplTrait, TypeInfer, TypeMacro, TypeNever, TypeParen, TypePath, TypePtr, TypeReference, @@ -815,7 +835,18 @@ mod gen { #[path = "../gen_helper.rs"] mod helper; } -pub use crate::gen::*; + +#[cfg(feature = "fold")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "fold")))] +pub use crate::gen::fold; + +#[cfg(feature = "visit")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "visit")))] +pub use crate::gen::visit; + +#[cfg(feature = "visit-mut")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "visit-mut")))] +pub use crate::gen::visit_mut; // Not public API. #[doc(hidden)] @@ -860,11 +891,7 @@ pub mod __private; /// expanded.into() /// } /// ``` -#[cfg(all( - not(all(target_arch = "wasm32", any(target_os = "unknown", target_os = "wasi"))), - feature = "parsing", - feature = "proc-macro" -))] +#[cfg(all(feature = "parsing", feature = "proc-macro"))] #[cfg_attr(doc_cfg, doc(cfg(all(feature = "parsing", feature = "proc-macro"))))] pub fn parse<T: parse::Parse>(tokens: proc_macro::TokenStream) -> Result<T> { parse::Parser::parse(T::parse, tokens) diff --git a/vendor/syn/src/lifetime.rs b/vendor/syn/src/lifetime.rs index 96920ad..1df2c53 100644 --- a/vendor/syn/src/lifetime.rs +++ b/vendor/syn/src/lifetime.rs @@ -1,11 +1,10 @@ +#[cfg(feature = "parsing")] +use crate::lookahead; use proc_macro2::{Ident, Span}; use std::cmp::Ordering; use std::fmt::{self, Display}; use std::hash::{Hash, Hasher}; -#[cfg(feature = "parsing")] -use crate::lookahead; - /// A Rust lifetime: `'a`. /// /// Lifetime names must conform to the following rules: @@ -113,16 +112,19 @@ impl Hash for Lifetime { } #[cfg(feature = "parsing")] -#[doc(hidden)] -#[allow(non_snake_case)] -pub fn Lifetime(marker: lookahead::TokenMarker) -> Lifetime { - match marker {} +pub_if_not_doc! { + #[doc(hidden)] + #[allow(non_snake_case)] + pub fn Lifetime(marker: lookahead::TokenMarker) -> Lifetime { + match marker {} + } } #[cfg(feature = "parsing")] pub(crate) mod parsing { - use super::*; - use crate::parse::{Parse, ParseStream, Result}; + use crate::error::Result; + use crate::lifetime::Lifetime; + use crate::parse::{Parse, ParseStream}; #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for Lifetime { @@ -138,7 +140,7 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] mod printing { - use super::*; + use crate::lifetime::Lifetime; use proc_macro2::{Punct, Spacing, TokenStream}; use quote::{ToTokens, TokenStreamExt}; diff --git a/vendor/syn/src/lit.rs b/vendor/syn/src/lit.rs index 662ef8b..67b638d 100644 --- a/vendor/syn/src/lit.rs +++ b/vendor/syn/src/lit.rs @@ -18,7 +18,7 @@ ast_enum_of_structs! { /// /// This type is a [syntax tree enum]. /// - /// [syntax tree enum]: crate::Expr#syntax-tree-enums + /// [syntax tree enum]: crate::expr::Expr#syntax-tree-enums #[non_exhaustive] pub enum Lit { /// A UTF-8 string literal: `"foo"`. @@ -225,10 +225,21 @@ impl LitStr { // Parse string literal into a token stream with every span equal to the // original literal's span. + let span = self.span(); let mut tokens = TokenStream::from_str(&self.value())?; - tokens = respan_token_stream(tokens, self.span()); + tokens = respan_token_stream(tokens, span); - parser.parse2(tokens) + let result = crate::parse::parse_scoped(parser, span, tokens)?; + + let suffix = self.suffix(); + if !suffix.is_empty() { + return Err(Error::new( + self.span(), + format!("unexpected suffix `{}` on string literal", suffix), + )); + } + + Ok(result) } pub fn span(&self) -> Span { @@ -544,141 +555,120 @@ impl LitBool { #[cfg(feature = "extra-traits")] mod debug_impls { - use super::*; + use crate::lit::{LitBool, LitByte, LitByteStr, LitChar, LitFloat, LitInt, LitStr}; use std::fmt::{self, Debug}; #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] impl Debug for LitStr { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl LitStr { - pub(crate) fn debug( - &self, - formatter: &mut fmt::Formatter, - name: &str, - ) -> fmt::Result { - formatter - .debug_struct(name) - .field("token", &format_args!("{}", self.repr.token)) - .finish() - } - } self.debug(formatter, "LitStr") } } + impl LitStr { + pub(crate) fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + formatter + .debug_struct(name) + .field("token", &format_args!("{}", self.repr.token)) + .finish() + } + } + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] impl Debug for LitByteStr { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl LitByteStr { - pub(crate) fn debug( - &self, - formatter: &mut fmt::Formatter, - name: &str, - ) -> fmt::Result { - formatter - .debug_struct(name) - .field("token", &format_args!("{}", self.repr.token)) - .finish() - } - } self.debug(formatter, "LitByteStr") } } + impl LitByteStr { + pub(crate) fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + formatter + .debug_struct(name) + .field("token", &format_args!("{}", self.repr.token)) + .finish() + } + } + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] impl Debug for LitByte { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl LitByte { - pub(crate) fn debug( - &self, - formatter: &mut fmt::Formatter, - name: &str, - ) -> fmt::Result { - formatter - .debug_struct(name) - .field("token", &format_args!("{}", self.repr.token)) - .finish() - } - } self.debug(formatter, "LitByte") } } + impl LitByte { + pub(crate) fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + formatter + .debug_struct(name) + .field("token", &format_args!("{}", self.repr.token)) + .finish() + } + } + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] impl Debug for LitChar { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl LitChar { - pub(crate) fn debug( - &self, - formatter: &mut fmt::Formatter, - name: &str, - ) -> fmt::Result { - formatter - .debug_struct(name) - .field("token", &format_args!("{}", self.repr.token)) - .finish() - } - } self.debug(formatter, "LitChar") } } + impl LitChar { + pub(crate) fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + formatter + .debug_struct(name) + .field("token", &format_args!("{}", self.repr.token)) + .finish() + } + } + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] impl Debug for LitInt { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl LitInt { - pub(crate) fn debug( - &self, - formatter: &mut fmt::Formatter, - name: &str, - ) -> fmt::Result { - formatter - .debug_struct(name) - .field("token", &format_args!("{}", self.repr.token)) - .finish() - } - } self.debug(formatter, "LitInt") } } + impl LitInt { + pub(crate) fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + formatter + .debug_struct(name) + .field("token", &format_args!("{}", self.repr.token)) + .finish() + } + } + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] impl Debug for LitFloat { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl LitFloat { - pub(crate) fn debug( - &self, - formatter: &mut fmt::Formatter, - name: &str, - ) -> fmt::Result { - formatter - .debug_struct(name) - .field("token", &format_args!("{}", self.repr.token)) - .finish() - } - } self.debug(formatter, "LitFloat") } } + impl LitFloat { + pub(crate) fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + formatter + .debug_struct(name) + .field("token", &format_args!("{}", self.repr.token)) + .finish() + } + } + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] impl Debug for LitBool { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - impl LitBool { - pub(crate) fn debug( - &self, - formatter: &mut fmt::Formatter, - name: &str, - ) -> fmt::Result { - formatter - .debug_struct(name) - .field("value", &self.value) - .finish() - } - } self.debug(formatter, "LitBool") } } + + impl LitBool { + pub(crate) fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + formatter + .debug_struct(name) + .field("value", &self.value) + .finish() + } + } } #[cfg(feature = "clone-impls")] @@ -748,10 +738,12 @@ macro_rules! lit_extra_traits { } #[cfg(feature = "parsing")] - #[doc(hidden)] - #[allow(non_snake_case)] - pub fn $ty(marker: lookahead::TokenMarker) -> $ty { - match marker {} + pub_if_not_doc! { + #[doc(hidden)] + #[allow(non_snake_case)] + pub fn $ty(marker: lookahead::TokenMarker) -> $ty { + match marker {} + } } }; } @@ -764,38 +756,45 @@ lit_extra_traits!(LitInt); lit_extra_traits!(LitFloat); #[cfg(feature = "parsing")] -#[doc(hidden)] -#[allow(non_snake_case)] -pub fn LitBool(marker: lookahead::TokenMarker) -> LitBool { - match marker {} +pub_if_not_doc! { + #[doc(hidden)] + #[allow(non_snake_case)] + pub fn LitBool(marker: lookahead::TokenMarker) -> LitBool { + match marker {} + } } -ast_enum! { - /// The style of a string literal, either plain quoted or a raw string like - /// `r##"data"##`. - pub enum StrStyle #no_visit { - /// An ordinary string like `"data"`. - Cooked, - /// A raw string like `r##"data"##`. - /// - /// The unsigned integer is the number of `#` symbols used. - Raw(usize), - } +/// The style of a string literal, either plain quoted or a raw string like +/// `r##"data"##`. +#[doc(hidden)] // https://github.com/dtolnay/syn/issues/1566 +pub enum StrStyle { + /// An ordinary string like `"data"`. + Cooked, + /// A raw string like `r##"data"##`. + /// + /// The unsigned integer is the number of `#` symbols used. + Raw(usize), } #[cfg(feature = "parsing")] -#[doc(hidden)] -#[allow(non_snake_case)] -pub fn Lit(marker: lookahead::TokenMarker) -> Lit { - match marker {} +pub_if_not_doc! { + #[doc(hidden)] + #[allow(non_snake_case)] + pub fn Lit(marker: lookahead::TokenMarker) -> Lit { + match marker {} + } } #[cfg(feature = "parsing")] pub(crate) mod parsing { - use super::*; use crate::buffer::Cursor; - use crate::parse::{Parse, ParseStream, Result}; - use proc_macro2::Punct; + use crate::error::Result; + use crate::lit::{ + value, Lit, LitBool, LitByte, LitByteStr, LitChar, LitFloat, LitFloatRepr, LitInt, + LitIntRepr, LitStr, + }; + use crate::parse::{Parse, ParseStream}; + use proc_macro2::{Literal, Punct}; #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for Lit { @@ -948,7 +947,7 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] mod printing { - use super::*; + use crate::lit::{LitBool, LitByte, LitByteStr, LitChar, LitFloat, LitInt, LitStr}; use proc_macro2::TokenStream; use quote::{ToTokens, TokenStreamExt}; @@ -1003,8 +1002,12 @@ mod printing { } mod value { - use super::*; use crate::bigint::BigInt; + use crate::lit::{ + Lit, LitBool, LitByte, LitByteStr, LitChar, LitFloat, LitFloatRepr, LitInt, LitIntRepr, + LitRepr, LitStr, + }; + use proc_macro2::{Literal, Span}; use std::char; use std::ops::{Index, RangeFrom}; @@ -1079,6 +1082,7 @@ mod value { // c"...", cr"...", cr#"..."# // TODO: add a Lit::CStr variant? b'c' => return Lit::Verbatim(token), + b'(' if repr == "(/*ERROR*/)" => return Lit::Verbatim(token), _ => {} } @@ -1166,7 +1170,7 @@ mod value { b'x' => { let (byte, rest) = backslash_x(s); s = rest; - assert!(byte <= 0x80, "Invalid \\x byte in string literal"); + assert!(byte <= 0x7F, "Invalid \\x byte in string literal"); char::from_u32(u32::from(byte)).unwrap() } b'u' => { @@ -1273,8 +1277,7 @@ mod value { b'"' => b'"', b'\r' | b'\n' => loop { let byte = byte(v, 0); - let ch = char::from_u32(u32::from(byte)).unwrap(); - if ch.is_whitespace() { + if matches!(byte, b' ' | b'\t' | b'\n' | b'\r') { v = &v[1..]; } else { continue 'outer; diff --git a/vendor/syn/src/mac.rs b/vendor/syn/src/mac.rs index 8f687cc..d32cce4 100644 --- a/vendor/syn/src/mac.rs +++ b/vendor/syn/src/mac.rs @@ -1,15 +1,16 @@ -use super::*; +#[cfg(feature = "parsing")] +use crate::error::Result; +#[cfg(feature = "parsing")] +use crate::parse::{Parse, ParseStream, Parser}; +use crate::path::Path; use crate::token::{Brace, Bracket, Paren}; use proc_macro2::extra::DelimSpan; -#[cfg(any(feature = "parsing", feature = "printing"))] +#[cfg(feature = "parsing")] use proc_macro2::Delimiter; use proc_macro2::TokenStream; #[cfg(feature = "parsing")] use proc_macro2::TokenTree; -#[cfg(feature = "parsing")] -use crate::parse::{Parse, ParseStream, Parser, Result}; - ast_struct! { /// A macro invocation: `println!("{}", mac)`. #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] @@ -162,8 +163,10 @@ pub(crate) fn parse_delimiter(input: ParseStream) -> Result<(MacroDelimiter, Tok #[cfg(feature = "parsing")] pub(crate) mod parsing { - use super::*; - use crate::parse::{Parse, ParseStream, Result}; + use crate::error::Result; + use crate::mac::{parse_delimiter, Macro}; + use crate::parse::{Parse, ParseStream}; + use crate::path::Path; #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for Macro { @@ -185,8 +188,9 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] mod printing { - use super::*; - use proc_macro2::TokenStream; + use crate::mac::{Macro, MacroDelimiter}; + use crate::token; + use proc_macro2::{Delimiter, TokenStream}; use quote::ToTokens; impl MacroDelimiter { diff --git a/vendor/syn/src/macros.rs b/vendor/syn/src/macros.rs index 953841b..46d2832 100644 --- a/vendor/syn/src/macros.rs +++ b/vendor/syn/src/macros.rs @@ -4,14 +4,17 @@ )] macro_rules! ast_struct { ( - [$($attrs_pub:tt)*] - struct $name:ident #full $($rest:tt)* + $(#[$attr:meta])* + $pub:ident $struct:ident $name:ident #full $body:tt ) => { + check_keyword_matches!(pub $pub); + check_keyword_matches!(struct $struct); + #[cfg(feature = "full")] - $($attrs_pub)* struct $name $($rest)* + $(#[$attr])* $pub $struct $name $body #[cfg(not(feature = "full"))] - $($attrs_pub)* struct $name { + $(#[$attr])* $pub $struct $name { _noconstruct: ::std::marker::PhantomData<::proc_macro2::Span>, } @@ -24,35 +27,26 @@ macro_rules! ast_struct { }; ( - [$($attrs_pub:tt)*] - struct $name:ident $($rest:tt)* + $(#[$attr:meta])* + $pub:ident $struct:ident $name:ident $body:tt ) => { - $($attrs_pub)* struct $name $($rest)* - }; + check_keyword_matches!(pub $pub); + check_keyword_matches!(struct $struct); - ($($t:tt)*) => { - strip_attrs_pub!(ast_struct!($($t)*)); + $(#[$attr])* $pub $struct $name $body }; } +#[cfg(any(feature = "full", feature = "derive"))] macro_rules! ast_enum { - // Drop the `#no_visit` attribute, if present. ( - [$($attrs_pub:tt)*] - enum $name:ident #no_visit $($rest:tt)* - ) => ( - ast_enum!([$($attrs_pub)*] enum $name $($rest)*); - ); + $(#[$enum_attr:meta])* + $pub:ident $enum:ident $name:ident $body:tt + ) => { + check_keyword_matches!(pub $pub); + check_keyword_matches!(enum $enum); - ( - [$($attrs_pub:tt)*] - enum $name:ident $($rest:tt)* - ) => ( - $($attrs_pub)* enum $name $($rest)* - ); - - ($($t:tt)*) => { - strip_attrs_pub!(ast_enum!($($t)*)); + $(#[$enum_attr])* $pub $enum $name $body }; } @@ -60,16 +54,19 @@ macro_rules! ast_enum_of_structs { ( $(#[$enum_attr:meta])* $pub:ident $enum:ident $name:ident $body:tt - $($remaining:tt)* ) => { - ast_enum!($(#[$enum_attr])* $pub $enum $name $body); - ast_enum_of_structs_impl!($pub $enum $name $body $($remaining)*); + check_keyword_matches!(pub $pub); + check_keyword_matches!(enum $enum); + + $(#[$enum_attr])* $pub $enum $name $body + + ast_enum_of_structs_impl!($name $body); }; } macro_rules! ast_enum_of_structs_impl { ( - $pub:ident $enum:ident $name:ident { + $name:ident { $( $(#[cfg $cfg_attr:tt])* $(#[doc $($doc_attr:tt)*])* @@ -77,9 +74,6 @@ macro_rules! ast_enum_of_structs_impl { )* } ) => { - check_keyword_matches!(pub $pub); - check_keyword_matches!(enum $enum); - $($( ast_enum_from_struct!($name::$variant, $($member)::+); )*)* @@ -154,15 +148,29 @@ macro_rules! generate_to_tokens { }; } -macro_rules! strip_attrs_pub { - ($mac:ident!($(#[$m:meta])* $pub:ident $($t:tt)*)) => { +// Rustdoc bug: does not respect the doc(hidden) on some items. +#[cfg(all(doc, feature = "parsing"))] +macro_rules! pub_if_not_doc { + ($(#[$m:meta])* $pub:ident $($item:tt)*) => { + check_keyword_matches!(pub $pub); + + $(#[$m])* + $pub(crate) $($item)* + }; +} + +#[cfg(all(not(doc), feature = "parsing"))] +macro_rules! pub_if_not_doc { + ($(#[$m:meta])* $pub:ident $($item:tt)*) => { check_keyword_matches!(pub $pub); - $mac!([$(#[$m])* $pub] $($t)*); + $(#[$m])* + $pub $($item)* }; } macro_rules! check_keyword_matches { (enum enum) => {}; (pub pub) => {}; + (struct struct) => {}; } diff --git a/vendor/syn/src/meta.rs b/vendor/syn/src/meta.rs index b6bcf98..ffeeb26 100644 --- a/vendor/syn/src/meta.rs +++ b/vendor/syn/src/meta.rs @@ -1,8 +1,9 @@ //! Facility for interpreting structured content inside of an `Attribute`. -use crate::ext::IdentExt; +use crate::error::{Error, Result}; +use crate::ext::IdentExt as _; use crate::lit::Lit; -use crate::parse::{Error, ParseStream, Parser, Result}; +use crate::parse::{ParseStream, Parser}; use crate::path::{Path, PathSegment}; use crate::punctuated::Punctuated; use proc_macro2::Ident; @@ -129,7 +130,13 @@ use std::fmt::Display; /// } /// ``` pub fn parser(logic: impl FnMut(ParseNestedMeta) -> Result<()>) -> impl Parser<Output = ()> { - |input: ParseStream| parse_nested_meta(input, logic) + |input: ParseStream| { + if input.is_empty() { + Ok(()) + } else { + parse_nested_meta(input, logic) + } + } } /// Context for parsing a single property in the conventional syntax for diff --git a/vendor/syn/src/op.rs b/vendor/syn/src/op.rs index bff72c8..ff83bd7 100644 --- a/vendor/syn/src/op.rs +++ b/vendor/syn/src/op.rs @@ -78,59 +78,12 @@ ast_enum! { #[cfg(feature = "parsing")] pub(crate) mod parsing { - use super::*; - use crate::parse::{Parse, ParseStream, Result}; - - fn parse_binop(input: ParseStream) -> Result<BinOp> { - if input.peek(Token![&&]) { - input.parse().map(BinOp::And) - } else if input.peek(Token![||]) { - input.parse().map(BinOp::Or) - } else if input.peek(Token![<<]) { - input.parse().map(BinOp::Shl) - } else if input.peek(Token![>>]) { - input.parse().map(BinOp::Shr) - } else if input.peek(Token![==]) { - input.parse().map(BinOp::Eq) - } else if input.peek(Token![<=]) { - input.parse().map(BinOp::Le) - } else if input.peek(Token![!=]) { - input.parse().map(BinOp::Ne) - } else if input.peek(Token![>=]) { - input.parse().map(BinOp::Ge) - } else if input.peek(Token![+]) { - input.parse().map(BinOp::Add) - } else if input.peek(Token![-]) { - input.parse().map(BinOp::Sub) - } else if input.peek(Token![*]) { - input.parse().map(BinOp::Mul) - } else if input.peek(Token![/]) { - input.parse().map(BinOp::Div) - } else if input.peek(Token![%]) { - input.parse().map(BinOp::Rem) - } else if input.peek(Token![^]) { - input.parse().map(BinOp::BitXor) - } else if input.peek(Token![&]) { - input.parse().map(BinOp::BitAnd) - } else if input.peek(Token![|]) { - input.parse().map(BinOp::BitOr) - } else if input.peek(Token![<]) { - input.parse().map(BinOp::Lt) - } else if input.peek(Token![>]) { - input.parse().map(BinOp::Gt) - } else { - Err(input.error("expected binary operator")) - } - } + use crate::error::Result; + use crate::op::{BinOp, UnOp}; + use crate::parse::{Parse, ParseStream}; #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for BinOp { - #[cfg(not(feature = "full"))] - fn parse(input: ParseStream) -> Result<Self> { - parse_binop(input) - } - - #[cfg(feature = "full")] fn parse(input: ParseStream) -> Result<Self> { if input.peek(Token![+=]) { input.parse().map(BinOp::AddAssign) @@ -152,8 +105,44 @@ pub(crate) mod parsing { input.parse().map(BinOp::ShlAssign) } else if input.peek(Token![>>=]) { input.parse().map(BinOp::ShrAssign) + } else if input.peek(Token![&&]) { + input.parse().map(BinOp::And) + } else if input.peek(Token![||]) { + input.parse().map(BinOp::Or) + } else if input.peek(Token![<<]) { + input.parse().map(BinOp::Shl) + } else if input.peek(Token![>>]) { + input.parse().map(BinOp::Shr) + } else if input.peek(Token![==]) { + input.parse().map(BinOp::Eq) + } else if input.peek(Token![<=]) { + input.parse().map(BinOp::Le) + } else if input.peek(Token![!=]) { + input.parse().map(BinOp::Ne) + } else if input.peek(Token![>=]) { + input.parse().map(BinOp::Ge) + } else if input.peek(Token![+]) { + input.parse().map(BinOp::Add) + } else if input.peek(Token![-]) { + input.parse().map(BinOp::Sub) + } else if input.peek(Token![*]) { + input.parse().map(BinOp::Mul) + } else if input.peek(Token![/]) { + input.parse().map(BinOp::Div) + } else if input.peek(Token![%]) { + input.parse().map(BinOp::Rem) + } else if input.peek(Token![^]) { + input.parse().map(BinOp::BitXor) + } else if input.peek(Token![&]) { + input.parse().map(BinOp::BitAnd) + } else if input.peek(Token![|]) { + input.parse().map(BinOp::BitOr) + } else if input.peek(Token![<]) { + input.parse().map(BinOp::Lt) + } else if input.peek(Token![>]) { + input.parse().map(BinOp::Gt) } else { - parse_binop(input) + Err(input.error("expected binary operator")) } } } @@ -177,7 +166,7 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] mod printing { - use super::*; + use crate::op::{BinOp, UnOp}; use proc_macro2::TokenStream; use quote::ToTokens; diff --git a/vendor/syn/src/parse.rs b/vendor/syn/src/parse.rs index 61a10d2..8668e0e 100644 --- a/vendor/syn/src/parse.rs +++ b/vendor/syn/src/parse.rs @@ -185,14 +185,9 @@ pub mod discouraged; use crate::buffer::{Cursor, TokenBuffer}; use crate::error; use crate::lookahead; -#[cfg(all( - not(all(target_arch = "wasm32", any(target_os = "unknown", target_os = "wasi"))), - feature = "proc-macro" -))] -use crate::proc_macro; use crate::punctuated::Punctuated; use crate::token::Token; -use proc_macro2::{self, Delimiter, Group, Literal, Punct, Span, TokenStream, TokenTree}; +use proc_macro2::{Delimiter, Group, Literal, Punct, Span, TokenStream, TokenTree}; use std::cell::Cell; use std::fmt::{self, Debug, Display}; #[cfg(feature = "extra-traits")] @@ -516,8 +511,8 @@ impl<'a> ParseBuffer<'a> { /// /// - `input.peek(Token![struct])` /// - `input.peek(Token![==])` - /// - `input.peek(Ident)` *(does not accept keywords)* - /// - `input.peek(Ident::peek_any)` + /// - `input.peek(syn::Ident)` *(does not accept keywords)* + /// - `input.peek(syn::Ident::peek_any)` /// - `input.peek(Lifetime)` /// - `input.peek(token::Brace)` /// @@ -1099,6 +1094,58 @@ impl<'a> ParseBuffer<'a> { /// /// Cursors are immutable so no operations you perform against the cursor /// will affect the state of this parse stream. + /// + /// # Example + /// + /// ``` + /// use proc_macro2::TokenStream; + /// use syn::buffer::Cursor; + /// use syn::parse::{ParseStream, Result}; + /// + /// // Run a parser that returns T, but get its output as TokenStream instead of T. + /// // This works without T needing to implement ToTokens. + /// fn recognize_token_stream<T>( + /// recognizer: fn(ParseStream) -> Result<T>, + /// ) -> impl Fn(ParseStream) -> Result<TokenStream> { + /// move |input| { + /// let begin = input.cursor(); + /// recognizer(input)?; + /// let end = input.cursor(); + /// Ok(tokens_between(begin, end)) + /// } + /// } + /// + /// // Collect tokens between two cursors as a TokenStream. + /// fn tokens_between(begin: Cursor, end: Cursor) -> TokenStream { + /// assert!(begin <= end); + /// + /// let mut cursor = begin; + /// let mut tokens = TokenStream::new(); + /// while cursor < end { + /// let (token, next) = cursor.token_tree().unwrap(); + /// tokens.extend(std::iter::once(token)); + /// cursor = next; + /// } + /// tokens + /// } + /// + /// fn main() { + /// use quote::quote; + /// use syn::parse::{Parse, Parser}; + /// use syn::Token; + /// + /// // Parse syn::Type as a TokenStream, surrounded by angle brackets. + /// fn example(input: ParseStream) -> Result<TokenStream> { + /// let _langle: Token![<] = input.parse()?; + /// let ty = recognize_token_stream(syn::Type::parse)(input)?; + /// let _rangle: Token![>] = input.parse()?; + /// Ok(ty) + /// } + /// + /// let tokens = quote! { <fn() -> u8> }; + /// println!("{}", example.parse2(tokens).unwrap()); + /// } + /// ``` pub fn cursor(&self) -> Cursor<'a> { self.cell.get() } @@ -1198,10 +1245,7 @@ pub trait Parser: Sized { /// /// This function will check that the input is fully parsed. If there are /// any unparsed tokens at the end of the stream, an error is returned. - #[cfg(all( - not(all(target_arch = "wasm32", any(target_os = "unknown", target_os = "wasi"))), - feature = "proc-macro" - ))] + #[cfg(feature = "proc-macro")] #[cfg_attr(doc_cfg, doc(cfg(feature = "proc-macro")))] fn parse(self, tokens: proc_macro::TokenStream) -> Result<Self::Output> { self.parse2(proc_macro2::TokenStream::from(tokens)) @@ -1222,7 +1266,6 @@ pub trait Parser: Sized { // Not public API. #[doc(hidden)] - #[cfg(any(feature = "full", feature = "derive"))] fn __parse_scoped(self, scope: Span, tokens: TokenStream) -> Result<Self::Output> { let _ = scope; self.parse2(tokens) @@ -1254,7 +1297,6 @@ where } } - #[cfg(any(feature = "full", feature = "derive"))] fn __parse_scoped(self, scope: Span, tokens: TokenStream) -> Result<Self::Output> { let buf = TokenBuffer::new2(tokens); let cursor = buf.begin(); @@ -1270,7 +1312,6 @@ where } } -#[cfg(any(feature = "full", feature = "derive"))] pub(crate) fn parse_scoped<F: Parser>(f: F, scope: Span, tokens: TokenStream) -> Result<F::Output> { f.__parse_scoped(scope, tokens) } diff --git a/vendor/syn/src/parse_quote.rs b/vendor/syn/src/parse_quote.rs index f512943..1e3cacf 100644 --- a/vendor/syn/src/parse_quote.rs +++ b/vendor/syn/src/parse_quote.rs @@ -107,7 +107,8 @@ macro_rules! parse_quote_spanned { //////////////////////////////////////////////////////////////////////////////// // Can parse any type that implements Parse. -use crate::parse::{Parse, ParseStream, Parser, Result}; +use crate::error::Result; +use crate::parse::{Parse, ParseStream, Parser}; use proc_macro2::TokenStream; // Not public API. @@ -120,6 +121,7 @@ pub fn parse<T: ParseQuote>(token_stream: TokenStream) -> T { } } +#[doc(hidden)] pub trait ParseQuote: Sized { fn parse(input: ParseStream) -> Result<Self>; } @@ -135,7 +137,7 @@ impl<T: Parse> ParseQuote for T { use crate::punctuated::Punctuated; #[cfg(any(feature = "full", feature = "derive"))] -use crate::{attr, Attribute}; +use crate::{attr, Attribute, Field, FieldMutability, Ident, Type, Visibility}; #[cfg(feature = "full")] use crate::{Block, Pat, Stmt}; @@ -150,6 +152,36 @@ impl ParseQuote for Attribute { } } +#[cfg(any(feature = "full", feature = "derive"))] +impl ParseQuote for Field { + fn parse(input: ParseStream) -> Result<Self> { + let attrs = input.call(Attribute::parse_outer)?; + let vis: Visibility = input.parse()?; + + let ident: Option<Ident>; + let colon_token: Option<Token![:]>; + let is_named = input.peek(Ident) && input.peek2(Token![:]) && !input.peek2(Token![::]); + if is_named { + ident = Some(input.parse()?); + colon_token = Some(input.parse()?); + } else { + ident = None; + colon_token = None; + } + + let ty: Type = input.parse()?; + + Ok(Field { + attrs, + vis, + mutability: FieldMutability::None, + ident, + colon_token, + ty, + }) + } +} + #[cfg(feature = "full")] impl ParseQuote for Pat { fn parse(input: ParseStream) -> Result<Self> { diff --git a/vendor/syn/src/pat.rs b/vendor/syn/src/pat.rs index 2e6376b..c029bae 100644 --- a/vendor/syn/src/pat.rs +++ b/vendor/syn/src/pat.rs @@ -1,7 +1,17 @@ -use super::*; +use crate::attr::Attribute; +use crate::expr::Member; +use crate::ident::Ident; +use crate::path::{Path, QSelf}; use crate::punctuated::Punctuated; +use crate::token; +use crate::ty::Type; use proc_macro2::TokenStream; +pub use crate::expr::{ + ExprConst as PatConst, ExprLit as PatLit, ExprMacro as PatMacro, ExprPath as PatPath, + ExprRange as PatRange, +}; + ast_enum_of_structs! { /// A pattern in a local binding, function signature, match expression, or /// various other places. @@ -10,7 +20,7 @@ ast_enum_of_structs! { /// /// This type is a [syntax tree enum]. /// - /// [syntax tree enum]: Expr#syntax-tree-enums + /// [syntax tree enum]: crate::expr::Expr#syntax-tree-enums #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] #[non_exhaustive] pub enum Pat { @@ -74,12 +84,13 @@ ast_enum_of_structs! { // For testing exhaustiveness in downstream code, use the following idiom: // // match pat { + // #![cfg_attr(test, deny(non_exhaustive_omitted_patterns))] + // // Pat::Box(pat) => {...} // Pat::Ident(pat) => {...} // ... // Pat::Wild(pat) => {...} // - // #[cfg_attr(test, deny(non_exhaustive_omitted_patterns))] // _ => { /* some sane fallback */ } // } // @@ -117,6 +128,7 @@ ast_struct! { ast_struct! { /// A parenthesized pattern: `(A | B)`. + #[cfg_attr(doc_cfg, doc(cfg(feature = "full")))] pub struct PatParen { pub attrs: Vec<Attribute>, pub paren_token: token::Paren, @@ -225,10 +237,26 @@ ast_struct! { #[cfg(feature = "parsing")] pub(crate) mod parsing { - use super::*; - use crate::ext::IdentExt; - use crate::parse::{ParseBuffer, ParseStream, Result}; - use crate::path; + use crate::attr::Attribute; + use crate::error::{self, Result}; + use crate::expr::{ + Expr, ExprConst, ExprLit, ExprMacro, ExprPath, ExprRange, Member, RangeLimits, + }; + use crate::ext::IdentExt as _; + use crate::ident::Ident; + use crate::lit::Lit; + use crate::mac::{self, Macro}; + use crate::parse::{Parse, ParseBuffer, ParseStream}; + use crate::pat::{ + FieldPat, Pat, PatIdent, PatOr, PatParen, PatReference, PatRest, PatSlice, PatStruct, + PatTuple, PatTupleStruct, PatType, PatWild, + }; + use crate::path::{self, Path, QSelf}; + use crate::punctuated::Punctuated; + use crate::stmt::Block; + use crate::token; + use crate::verbatim; + use proc_macro2::TokenStream; #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Pat { @@ -354,6 +382,18 @@ pub(crate) mod parsing { } } + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for PatType { + fn parse(input: ParseStream) -> Result<Self> { + Ok(PatType { + attrs: Vec::new(), + pat: Box::new(Pat::parse_single(input)?), + colon_token: input.parse()?, + ty: input.parse()?, + }) + } + } + fn multi_pat_impl(input: ParseStream, leading_vert: Option<Token![|]>) -> Result<Pat> { let mut pat = Pat::parse_single(input)?; if leading_vert.is_some() @@ -422,7 +462,7 @@ pub(crate) mod parsing { fn pat_box(begin: ParseBuffer, input: ParseStream) -> Result<Pat> { input.parse::<Token![box]>()?; Pat::parse_single(input)?; - Ok(Pat::Verbatim(verbatim::between(begin, input))) + Ok(Pat::Verbatim(verbatim::between(&begin, input))) } fn pat_ident(input: ParseStream) -> Result<PatIdent> { @@ -506,15 +546,6 @@ pub(crate) mod parsing { }) } - impl Member { - fn is_unnamed(&self) -> bool { - match self { - Member::Named(_) => false, - Member::Unnamed(_) => true, - } - } - } - fn field_pat(input: ParseStream) -> Result<FieldPat> { let begin = input.fork(); let boxed: Option<Token![box]> = input.parse()?; @@ -528,7 +559,7 @@ pub(crate) mod parsing { }?; if boxed.is_none() && by_ref.is_none() && mutability.is_none() && input.peek(Token![:]) - || member.is_unnamed() + || !member.is_named() { return Ok(FieldPat { attrs: Vec::new(), @@ -544,7 +575,7 @@ pub(crate) mod parsing { }; let pat = if boxed.is_some() { - Pat::Verbatim(verbatim::between(begin, input)) + Pat::Verbatim(verbatim::between(&begin, input)) } else { Pat::Ident(PatIdent { attrs: Vec::new(), @@ -762,14 +793,18 @@ pub(crate) mod parsing { content.call(Attribute::parse_inner)?; content.call(Block::parse_within)?; - Ok(verbatim::between(begin, input)) + Ok(verbatim::between(&begin, input)) } } #[cfg(feature = "printing")] mod printing { - use super::*; use crate::attr::FilterAttrs; + use crate::pat::{ + FieldPat, Pat, PatIdent, PatOr, PatParen, PatReference, PatRest, PatSlice, PatStruct, + PatTuple, PatTupleStruct, PatType, PatWild, + }; + use crate::path; use proc_macro2::TokenStream; use quote::{ToTokens, TokenStreamExt}; @@ -856,6 +891,15 @@ mod printing { tokens.append_all(self.attrs.outer()); self.paren_token.surround(tokens, |tokens| { self.elems.to_tokens(tokens); + // If there is only one element, a trailing comma is needed to + // distinguish PatTuple from PatParen, unless this is `(..)` + // which is a tuple pattern even without comma. + if self.elems.len() == 1 + && !self.elems.trailing_punct() + && !matches!(self.elems[0], Pat::Rest { .. }) + { + <Token![,]>::default().to_tokens(tokens); + } }); } } diff --git a/vendor/syn/src/path.rs b/vendor/syn/src/path.rs index e99a3f8..3f58945 100644 --- a/vendor/syn/src/path.rs +++ b/vendor/syn/src/path.rs @@ -1,5 +1,12 @@ -use super::*; +#[cfg(feature = "parsing")] +use crate::error::Result; +use crate::expr::Expr; +use crate::generics::TypeParamBound; +use crate::ident::Ident; +use crate::lifetime::Lifetime; use crate::punctuated::Punctuated; +use crate::token; +use crate::ty::{ReturnType, Type}; ast_struct! { /// A path at which a named item is exported (e.g. `std::collections::HashMap`). @@ -53,8 +60,9 @@ impl Path { /// } /// } /// ``` - pub fn is_ident<I: ?Sized>(&self, ident: &I) -> bool + pub fn is_ident<I>(&self, ident: &I) -> bool where + I: ?Sized, Ident: PartialEq<I>, { match self.get_ident() { @@ -81,6 +89,19 @@ impl Path { None } } + + /// An error if this path is not a single ident, as defined in `get_ident`. + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + pub fn require_ident(&self) -> Result<&Ident> { + self.get_ident().ok_or_else(|| { + crate::error::new2( + self.segments.first().unwrap().ident.span(), + self.segments.last().unwrap().ident.span(), + "expected this path to be an identifier", + ) + }) + } } ast_struct! { @@ -261,10 +282,28 @@ ast_struct! { #[cfg(feature = "parsing")] pub(crate) mod parsing { - use super::*; - - use crate::ext::IdentExt; - use crate::parse::{Parse, ParseStream, Result}; + use crate::error::Result; + #[cfg(feature = "full")] + use crate::expr::ExprBlock; + use crate::expr::{Expr, ExprPath}; + use crate::ext::IdentExt as _; + #[cfg(feature = "full")] + use crate::generics::TypeParamBound; + use crate::ident::Ident; + use crate::lifetime::Lifetime; + use crate::lit::Lit; + use crate::parse::{Parse, ParseStream}; + #[cfg(feature = "full")] + use crate::path::Constraint; + use crate::path::{ + AngleBracketedGenericArguments, AssocConst, AssocType, GenericArgument, + ParenthesizedGenericArguments, Path, PathArguments, PathSegment, QSelf, + }; + use crate::punctuated::Punctuated; + use crate::token; + use crate::ty::{ReturnType, Type}; + #[cfg(not(feature = "full"))] + use crate::verbatim; #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for Path { @@ -368,7 +407,6 @@ pub(crate) mod parsing { return Ok(Expr::Lit(lit)); } - #[cfg(feature = "full")] if input.peek(Ident) { let ident: Ident = input.parse()?; return Ok(Expr::Path(ExprPath { @@ -391,7 +429,7 @@ pub(crate) mod parsing { let content; braced!(content in input); content.parse::<Expr>()?; - let verbatim = verbatim::between(begin, input); + let verbatim = verbatim::between(&begin, input); return Ok(Expr::Verbatim(verbatim)); } } @@ -411,7 +449,10 @@ pub(crate) mod parsing { Self::do_parse(Some(colon2_token), input) } - fn do_parse(colon2_token: Option<Token![::]>, input: ParseStream) -> Result<Self> { + pub(crate) fn do_parse( + colon2_token: Option<Token![::]>, + input: ParseStream, + ) -> Result<Self> { Ok(AngleBracketedGenericArguments { colon2_token, lt_token: input.parse()?, @@ -647,8 +688,17 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] pub(crate) mod printing { - use super::*; + use crate::expr::Expr; + use crate::path::{ + AngleBracketedGenericArguments, AssocConst, AssocType, Constraint, GenericArgument, + ParenthesizedGenericArguments, Path, PathArguments, PathSegment, QSelf, + }; use crate::print::TokensOrDefault; + #[cfg(feature = "parsing")] + use crate::spanned::Spanned; + use crate::token; + #[cfg(feature = "parsing")] + use proc_macro2::Span; use proc_macro2::TokenStream; use quote::ToTokens; use std::cmp; @@ -692,10 +742,21 @@ pub(crate) mod printing { GenericArgument::Lifetime(lt) => lt.to_tokens(tokens), GenericArgument::Type(ty) => ty.to_tokens(tokens), GenericArgument::Const(expr) => match expr { - Expr::Lit(_) => expr.to_tokens(tokens), + Expr::Lit(expr) => expr.to_tokens(tokens), + + Expr::Path(expr) + if expr.attrs.is_empty() + && expr.qself.is_none() + && expr.path.get_ident().is_some() => + { + expr.to_tokens(tokens); + } #[cfg(feature = "full")] - Expr::Block(_) => expr.to_tokens(tokens), + Expr::Block(expr) => expr.to_tokens(tokens), + + #[cfg(not(feature = "full"))] + Expr::Verbatim(expr) => expr.to_tokens(tokens), // ERROR CORRECTION: Add braces to make sure that the // generated code is valid. @@ -826,4 +887,21 @@ pub(crate) mod printing { segment.to_tokens(tokens); } } + + #[cfg(feature = "parsing")] + #[cfg_attr(doc_cfg, doc(cfg(all(feature = "parsing", feature = "printing"))))] + impl Spanned for QSelf { + fn span(&self) -> Span { + struct QSelfDelimiters<'a>(&'a QSelf); + + impl<'a> ToTokens for QSelfDelimiters<'a> { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.0.lt_token.to_tokens(tokens); + self.0.gt_token.to_tokens(tokens); + } + } + + QSelfDelimiters(self).span() + } + } } diff --git a/vendor/syn/src/punctuated.rs b/vendor/syn/src/punctuated.rs index a427808..d644354 100644 --- a/vendor/syn/src/punctuated.rs +++ b/vendor/syn/src/punctuated.rs @@ -20,6 +20,13 @@ //! ~~~~^ ~~~~^ ~~~~ //! ``` +use crate::drops::{NoDrop, TrivialDrop}; +#[cfg(feature = "parsing")] +use crate::error::Result; +#[cfg(feature = "parsing")] +use crate::parse::{Parse, ParseStream}; +#[cfg(feature = "parsing")] +use crate::token::Token; #[cfg(feature = "extra-traits")] use std::fmt::{self, Debug}; #[cfg(feature = "extra-traits")] @@ -31,12 +38,6 @@ use std::option; use std::slice; use std::vec; -use crate::drops::{NoDrop, TrivialDrop}; -#[cfg(feature = "parsing")] -use crate::parse::{Parse, ParseStream, Result}; -#[cfg(feature = "parsing")] -use crate::token::Token; - /// **A punctuated sequence of syntax tree nodes of type `T` separated by /// punctuation of type `P`.** /// @@ -369,6 +370,11 @@ where last: self.last.clone(), } } + + fn clone_from(&mut self, other: &Self) { + self.inner.clone_from(&other.inner); + self.last.clone_from(&other.last); + } } #[cfg(feature = "extra-traits")] @@ -1069,7 +1075,7 @@ impl<T, P> IndexMut<usize> for Punctuated<T, P> { #[cfg(feature = "printing")] mod printing { - use super::*; + use crate::punctuated::{Pair, Punctuated}; use proc_macro2::TokenStream; use quote::{ToTokens, TokenStreamExt}; diff --git a/vendor/syn/src/restriction.rs b/vendor/syn/src/restriction.rs index 97c7f5a..70d2853 100644 --- a/vendor/syn/src/restriction.rs +++ b/vendor/syn/src/restriction.rs @@ -1,4 +1,5 @@ -use super::*; +use crate::path::Path; +use crate::token; ast_enum! { /// The visibility level of an item: inherited or `pub` or @@ -8,7 +9,7 @@ ast_enum! { /// /// This type is a [syntax tree enum]. /// - /// [syntax tree enum]: Expr#syntax-tree-enums + /// [syntax tree enum]: crate::expr::Expr#syntax-tree-enums #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] pub enum Visibility { /// A public visibility level: `pub`. @@ -57,10 +58,14 @@ ast_enum! { #[cfg(feature = "parsing")] pub(crate) mod parsing { - use super::*; - use crate::ext::IdentExt; - use crate::parse::discouraged::Speculative; - use crate::parse::{Parse, ParseStream, Result}; + use crate::error::Result; + use crate::ext::IdentExt as _; + use crate::ident::Ident; + use crate::parse::discouraged::Speculative as _; + use crate::parse::{Parse, ParseStream}; + use crate::path::Path; + use crate::restriction::{VisRestricted, Visibility}; + use crate::token; #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for Visibility { @@ -141,7 +146,7 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] mod printing { - use super::*; + use crate::restriction::{VisRestricted, Visibility}; use proc_macro2::TokenStream; use quote::ToTokens; diff --git a/vendor/syn/src/span.rs b/vendor/syn/src/span.rs index 50a26b8..eb27794 100644 --- a/vendor/syn/src/span.rs +++ b/vendor/syn/src/span.rs @@ -1,6 +1,7 @@ use proc_macro2::extra::DelimSpan; use proc_macro2::{Delimiter, Group, Span, TokenStream}; +#[doc(hidden)] pub trait IntoSpans<S> { fn into_spans(self) -> S; } diff --git a/vendor/syn/src/spanned.rs b/vendor/syn/src/spanned.rs index 7e101d2..17b69e9 100644 --- a/vendor/syn/src/spanned.rs +++ b/vendor/syn/src/spanned.rs @@ -108,8 +108,11 @@ impl<T: ?Sized + ToTokens> Spanned for T { } mod private { - use super::*; + use crate::spanned::ToTokens; pub trait Sealed {} impl<T: ?Sized + ToTokens> Sealed for T {} + + #[cfg(any(feature = "full", feature = "derive"))] + impl Sealed for crate::QSelf {} } diff --git a/vendor/syn/src/stmt.rs b/vendor/syn/src/stmt.rs index b5434f7..8a987a5 100644 --- a/vendor/syn/src/stmt.rs +++ b/vendor/syn/src/stmt.rs @@ -1,4 +1,9 @@ -use super::*; +use crate::attr::Attribute; +use crate::expr::Expr; +use crate::item::Item; +use crate::mac::Macro; +use crate::pat::Pat; +use crate::token; ast_struct! { /// A braced block containing Rust statements. @@ -74,9 +79,19 @@ ast_struct! { #[cfg(feature = "parsing")] pub(crate) mod parsing { - use super::*; - use crate::parse::discouraged::Speculative; - use crate::parse::{Parse, ParseStream, Result}; + use crate::attr::Attribute; + use crate::error::Result; + use crate::expr::{self, Expr, ExprBlock, ExprMacro}; + use crate::ident::Ident; + use crate::item; + use crate::mac::{self, Macro}; + use crate::parse::discouraged::Speculative as _; + use crate::parse::{Parse, ParseStream}; + use crate::pat::{Pat, PatType}; + use crate::path::Path; + use crate::stmt::{Block, Local, LocalInit, Stmt, StmtMacro}; + use crate::token; + use crate::ty::Type; use proc_macro2::TokenStream; struct AllowNoSemi(bool); @@ -180,7 +195,8 @@ pub(crate) mod parsing { } fn parse_stmt(input: ParseStream, allow_nosemi: AllowNoSemi) -> Result<Stmt> { - let mut attrs = input.call(Attribute::parse_outer)?; + let begin = input.fork(); + let attrs = input.call(Attribute::parse_outer)?; // brace-style macros; paren and bracket macros get parsed as // expression statements. @@ -199,7 +215,7 @@ pub(crate) mod parsing { } } - if input.peek(Token![let]) { + if input.peek(Token![let]) && !input.peek(token::Group) { stmt_local(input, attrs).map(Stmt::Local) } else if input.peek(Token![pub]) || input.peek(Token![crate]) && !input.peek2(Token![::]) @@ -238,9 +254,7 @@ pub(crate) mod parsing { || input.peek(Token![macro]) || is_item_macro { - let mut item: Item = input.parse()?; - attrs.extend(item.replace_attrs(Vec::new())); - item.replace_attrs(attrs); + let item = item::parsing::parse_rest_of_item(begin, attrs, input)?; Ok(Stmt::Item(item)) } else { stmt_expr(input, allow_nosemi, attrs) @@ -396,7 +410,8 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] mod printing { - use super::*; + use crate::expr; + use crate::stmt::{Block, Local, Stmt, StmtMacro}; use proc_macro2::TokenStream; use quote::{ToTokens, TokenStreamExt}; diff --git a/vendor/syn/src/thread.rs b/vendor/syn/src/thread.rs index 63fdea8..b33d248 100644 --- a/vendor/syn/src/thread.rs +++ b/vendor/syn/src/thread.rs @@ -12,6 +12,9 @@ pub(crate) struct ThreadBound<T> { unsafe impl<T> Sync for ThreadBound<T> {} // Send bound requires Copy, as otherwise Drop could run in the wrong place. +// +// Today Copy and Drop are mutually exclusive so `T: Copy` implies `T: !Drop`. +// This impl needs to be revisited if that restriction is relaxed in the future. unsafe impl<T: Copy> Send for ThreadBound<T> {} impl<T> ThreadBound<T> { @@ -40,11 +43,18 @@ impl<T: Debug> Debug for ThreadBound<T> { } } -impl<T: Clone> Clone for ThreadBound<T> { +// Copy the bytes of T, even if the currently running thread is the "wrong" +// thread. This is fine as long as the original thread is not simultaneously +// mutating this value via interior mutability, which would be a data race. +// +// Currently `T: Copy` is sufficient to guarantee that T contains no interior +// mutability, because _all_ interior mutability in Rust is built on +// std::cell::UnsafeCell, which has no Copy impl. This impl needs to be +// revisited if that restriction is relaxed in the future. +impl<T: Copy> Copy for ThreadBound<T> {} + +impl<T: Copy> Clone for ThreadBound<T> { fn clone(&self) -> Self { - ThreadBound { - value: self.value.clone(), - thread_id: self.thread_id, - } + *self } } diff --git a/vendor/syn/src/token.rs b/vendor/syn/src/token.rs index c140571..05d8f56 100644 --- a/vendor/syn/src/token.rs +++ b/vendor/syn/src/token.rs @@ -88,6 +88,8 @@ //! [Printing]: https://docs.rs/quote/1.0/quote/trait.ToTokens.html //! [`Span`]: https://docs.rs/proc-macro2/1.0/proc_macro2/struct.Span.html +#[cfg(feature = "parsing")] +pub(crate) use self::private::CustomToken; use self::private::WithSpan; #[cfg(feature = "parsing")] use crate::buffer::Cursor; @@ -134,7 +136,9 @@ pub trait Token: private::Sealed { fn display() -> &'static str; } -mod private { +pub(crate) mod private { + #[cfg(feature = "parsing")] + use crate::buffer::Cursor; use proc_macro2::Span; #[cfg(feature = "parsing")] @@ -143,9 +147,18 @@ mod private { /// Support writing `token.span` rather than `token.spans[0]` on tokens that /// hold a single span. #[repr(transparent)] + #[allow(unknown_lints, repr_transparent_external_private_fields)] // False positive: https://github.com/rust-lang/rust/issues/78586#issuecomment-1722680482 pub struct WithSpan { pub span: Span, } + + // Not public API. + #[doc(hidden)] + #[cfg(feature = "parsing")] + pub trait CustomToken { + fn peek(cursor: Cursor) -> bool; + fn display() -> &'static str; + } } #[cfg(feature = "parsing")] @@ -217,14 +230,6 @@ impl_low_level_token!("punctuation token" Punct punct); impl_low_level_token!("literal" Literal literal); impl_low_level_token!("token" TokenTree token_tree); -// Not public API. -#[doc(hidden)] -#[cfg(feature = "parsing")] -pub trait CustomToken { - fn peek(cursor: Cursor) -> bool; - fn display() -> &'static str; -} - #[cfg(feature = "parsing")] impl<T: CustomToken> private::Sealed for T {} @@ -365,6 +370,7 @@ macro_rules! define_punctuation_structs { ($($token:literal pub struct $name:ident/$len:tt #[doc = $usage:literal])*) => { $( #[cfg_attr(not(doc), repr(transparent))] + #[allow(unknown_lints, repr_transparent_external_private_fields)] // False positive: https://github.com/rust-lang/rust/issues/78586#issuecomment-1722680482 #[doc = concat!('`', $token, '`')] /// /// Usage: @@ -840,6 +846,67 @@ define_delimiters! { /// A type-macro that expands to the name of the Rust type representation of a /// given token. /// +/// As a type, `Token!` is commonly used in the type of struct fields, the type +/// of a `let` statement, or in turbofish for a `parse` function. +/// +/// ``` +/// use syn::{Ident, Token}; +/// use syn::parse::{Parse, ParseStream, Result}; +/// +/// // `struct Foo;` +/// pub struct UnitStruct { +/// struct_token: Token![struct], +/// ident: Ident, +/// semi_token: Token![;], +/// } +/// +/// impl Parse for UnitStruct { +/// fn parse(input: ParseStream) -> Result<Self> { +/// let struct_token: Token![struct] = input.parse()?; +/// let ident: Ident = input.parse()?; +/// let semi_token = input.parse::<Token![;]>()?; +/// Ok(UnitStruct { struct_token, ident, semi_token }) +/// } +/// } +/// ``` +/// +/// As an expression, `Token!` is used for peeking tokens or instantiating +/// tokens from a span. +/// +/// ``` +/// # use syn::{Ident, Token}; +/// # use syn::parse::{Parse, ParseStream, Result}; +/// # +/// # struct UnitStruct { +/// # struct_token: Token![struct], +/// # ident: Ident, +/// # semi_token: Token![;], +/// # } +/// # +/// # impl Parse for UnitStruct { +/// # fn parse(input: ParseStream) -> Result<Self> { +/// # unimplemented!() +/// # } +/// # } +/// # +/// fn make_unit_struct(name: Ident) -> UnitStruct { +/// let span = name.span(); +/// UnitStruct { +/// struct_token: Token![struct](span), +/// ident: name, +/// semi_token: Token![;](span), +/// } +/// } +/// +/// # fn parse(input: ParseStream) -> Result<()> { +/// if input.peek(Token![struct]) { +/// let unit_struct: UnitStruct = input.parse()?; +/// /* ... */ +/// } +/// # Ok(()) +/// # } +/// ``` +/// /// See the [token module] documentation for details and examples. /// /// [token module]: crate::token @@ -974,6 +1041,7 @@ pub(crate) mod parsing { } } + #[doc(hidden)] pub fn punct<const N: usize>(input: ParseStream, token: &str) -> Result<[Span; N]> { let mut spans = [input.span(); N]; punct_helper(input, token, &mut spans)?; @@ -1006,6 +1074,7 @@ pub(crate) mod parsing { }) } + #[doc(hidden)] pub fn peek_punct(mut cursor: Cursor, token: &str) -> bool { for (i, ch) in token.chars().enumerate() { match cursor.punct() { @@ -1033,6 +1102,7 @@ pub(crate) mod printing { use proc_macro2::{Delimiter, Group, Ident, Punct, Spacing, Span, TokenStream}; use quote::TokenStreamExt; + #[doc(hidden)] pub fn punct(s: &str, spans: &[Span], tokens: &mut TokenStream) { assert_eq!(s.len(), spans.len()); diff --git a/vendor/syn/src/ty.rs b/vendor/syn/src/ty.rs index 9282ba4..55c3139 100644 --- a/vendor/syn/src/ty.rs +++ b/vendor/syn/src/ty.rs @@ -1,5 +1,13 @@ -use super::*; +use crate::attr::Attribute; +use crate::expr::Expr; +use crate::generics::{BoundLifetimes, TypeParamBound}; +use crate::ident::Ident; +use crate::lifetime::Lifetime; +use crate::lit::LitStr; +use crate::mac::Macro; +use crate::path::{Path, QSelf}; use crate::punctuated::Punctuated; +use crate::token; use proc_macro2::TokenStream; ast_enum_of_structs! { @@ -9,7 +17,7 @@ ast_enum_of_structs! { /// /// This type is a [syntax tree enum]. /// - /// [syntax tree enum]: Expr#syntax-tree-enums + /// [syntax tree enum]: crate::expr::Expr#syntax-tree-enums #[cfg_attr(doc_cfg, doc(cfg(any(feature = "full", feature = "derive"))))] #[non_exhaustive] pub enum Type { @@ -64,12 +72,13 @@ ast_enum_of_structs! { // For testing exhaustiveness in downstream code, use the following idiom: // // match ty { + // #![cfg_attr(test, deny(non_exhaustive_omitted_patterns))] + // // Type::Array(ty) => {...} // Type::BareFn(ty) => {...} // ... // Type::Verbatim(ty) => {...} // - // #[cfg_attr(test, deny(non_exhaustive_omitted_patterns))] // _ => { /* some sane fallback */ } // } // @@ -263,10 +272,24 @@ ast_enum! { #[cfg(feature = "parsing")] pub(crate) mod parsing { - use super::*; - use crate::ext::IdentExt; - use crate::parse::{Parse, ParseStream, Result}; + use crate::attr::Attribute; + use crate::error::{self, Result}; + use crate::ext::IdentExt as _; + use crate::generics::{BoundLifetimes, TraitBound, TraitBoundModifier, TypeParamBound}; + use crate::ident::Ident; + use crate::lifetime::Lifetime; + use crate::mac::{self, Macro}; + use crate::parse::{Parse, ParseStream}; use crate::path; + use crate::path::{Path, PathArguments, QSelf}; + use crate::punctuated::Punctuated; + use crate::token; + use crate::ty::{ + Abi, BareFnArg, BareVariadic, ReturnType, Type, TypeArray, TypeBareFn, TypeGroup, + TypeImplTrait, TypeInfer, TypeMacro, TypeNever, TypeParen, TypePath, TypePtr, + TypeReference, TypeSlice, TypeTraitObject, TypeTuple, + }; + use crate::verbatim; use proc_macro2::Span; #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] @@ -525,7 +548,7 @@ pub(crate) mod parsing { let star_token: Option<Token![*]> = input.parse()?; let bounds = TypeTraitObject::parse_bounds(dyn_span, input, allow_plus)?; return Ok(if star_token.is_some() { - Type::Verbatim(verbatim::between(begin, input)) + Type::Verbatim(verbatim::between(&begin, input)) } else { Type::TraitObject(TypeTraitObject { dyn_token: Some(dyn_token), @@ -947,7 +970,7 @@ pub(crate) mod parsing { Some(ty) if !has_mut_self => ty, _ => { name = None; - Type::Verbatim(verbatim::between(begin, input)) + Type::Verbatim(verbatim::between(&begin, input)) } }; @@ -993,9 +1016,14 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] mod printing { - use super::*; use crate::attr::FilterAttrs; + use crate::path; use crate::print::TokensOrDefault; + use crate::ty::{ + Abi, BareFnArg, BareVariadic, ReturnType, TypeArray, TypeBareFn, TypeGroup, TypeImplTrait, + TypeInfer, TypeMacro, TypeNever, TypeParen, TypePath, TypePtr, TypeReference, TypeSlice, + TypeTraitObject, TypeTuple, + }; use proc_macro2::TokenStream; use quote::{ToTokens, TokenStreamExt}; diff --git a/vendor/syn/src/verbatim.rs b/vendor/syn/src/verbatim.rs index 436d873..54dc1cf 100644 --- a/vendor/syn/src/verbatim.rs +++ b/vendor/syn/src/verbatim.rs @@ -1,9 +1,9 @@ -use crate::parse::{ParseBuffer, ParseStream}; +use crate::parse::ParseStream; use proc_macro2::{Delimiter, TokenStream}; use std::cmp::Ordering; use std::iter; -pub(crate) fn between<'a>(begin: ParseBuffer<'a>, end: ParseStream<'a>) -> TokenStream { +pub(crate) fn between<'a>(begin: ParseStream<'a>, end: ParseStream<'a>) -> TokenStream { let end = end.cursor(); let mut cursor = begin.cursor(); assert!(crate::buffer::same_buffer(end, cursor)); diff --git a/vendor/syn/tests/common/eq.rs b/vendor/syn/tests/common/eq.rs index 2f64121..30ab9a1 100644 --- a/vendor/syn/tests/common/eq.rs +++ b/vendor/syn/tests/common/eq.rs @@ -13,7 +13,6 @@ use rustc_ast::ast::Arm; use rustc_ast::ast::AssocConstraint; use rustc_ast::ast::AssocConstraintKind; use rustc_ast::ast::AssocItemKind; -use rustc_ast::ast::Async; use rustc_ast::ast::AttrArgs; use rustc_ast::ast::AttrArgsEq; use rustc_ast::ast::AttrId; @@ -27,14 +26,19 @@ use rustc_ast::ast::BindingAnnotation; use rustc_ast::ast::Block; use rustc_ast::ast::BlockCheckMode; use rustc_ast::ast::BorrowKind; +use rustc_ast::ast::BoundAsyncness; +use rustc_ast::ast::BoundConstness; +use rustc_ast::ast::BoundPolarity; use rustc_ast::ast::ByRef; use rustc_ast::ast::CaptureBy; use rustc_ast::ast::Closure; use rustc_ast::ast::ClosureBinder; use rustc_ast::ast::Const; use rustc_ast::ast::ConstItem; +use rustc_ast::ast::CoroutineKind; use rustc_ast::ast::Crate; use rustc_ast::ast::Defaultness; +use rustc_ast::ast::Delegation; use rustc_ast::ast::DelimArgs; use rustc_ast::ast::EnumDef; use rustc_ast::ast::Expr; @@ -48,6 +52,7 @@ use rustc_ast::ast::FnDecl; use rustc_ast::ast::FnHeader; use rustc_ast::ast::FnRetTy; use rustc_ast::ast::FnSig; +use rustc_ast::ast::ForLoopKind; use rustc_ast::ast::ForeignItemKind; use rustc_ast::ast::ForeignMod; use rustc_ast::ast::FormatAlignment; @@ -64,6 +69,7 @@ use rustc_ast::ast::FormatOptions; use rustc_ast::ast::FormatPlaceholder; use rustc_ast::ast::FormatSign; use rustc_ast::ast::FormatTrait; +use rustc_ast::ast::GenBlockKind; use rustc_ast::ast::GenericArg; use rustc_ast::ast::GenericArgs; use rustc_ast::ast::GenericBound; @@ -92,7 +98,6 @@ use rustc_ast::ast::Local; use rustc_ast::ast::LocalKind; use rustc_ast::ast::MacCall; use rustc_ast::ast::MacCallStmt; -use rustc_ast::ast::MacDelimiter; use rustc_ast::ast::MacStmtStyle; use rustc_ast::ast::MacroDef; use rustc_ast::ast::MetaItemLit; @@ -108,6 +113,7 @@ use rustc_ast::ast::Param; use rustc_ast::ast::ParenthesizedArgs; use rustc_ast::ast::Pat; use rustc_ast::ast::PatField; +use rustc_ast::ast::PatFieldsRest; use rustc_ast::ast::PatKind; use rustc_ast::ast::Path; use rustc_ast::ast::PathSegment; @@ -125,7 +131,7 @@ use rustc_ast::ast::StructExpr; use rustc_ast::ast::StructRest; use rustc_ast::ast::Term; use rustc_ast::ast::Trait; -use rustc_ast::ast::TraitBoundModifier; +use rustc_ast::ast::TraitBoundModifiers; use rustc_ast::ast::TraitObjectSyntax; use rustc_ast::ast::TraitRef; use rustc_ast::ast::Ty; @@ -148,15 +154,18 @@ use rustc_ast::ast::WhereEqPredicate; use rustc_ast::ast::WherePredicate; use rustc_ast::ast::WhereRegionPredicate; use rustc_ast::ptr::P; -use rustc_ast::token::{self, CommentKind, Delimiter, Lit, Nonterminal, Token, TokenKind}; +use rustc_ast::token::{ + self, CommentKind, Delimiter, IdentIsRaw, Lit, Nonterminal, Token, TokenKind, +}; use rustc_ast::tokenstream::{ - AttrTokenStream, AttrTokenTree, AttributesData, DelimSpan, LazyAttrTokenStream, Spacing, - TokenStream, TokenTree, + AttrTokenStream, AttrTokenTree, AttributesData, DelimSpacing, DelimSpan, LazyAttrTokenStream, + Spacing, TokenStream, TokenTree, }; +use rustc_data_structures::packed::Pu128; use rustc_data_structures::sync::Lrc; use rustc_span::source_map::Spanned; use rustc_span::symbol::{sym, Ident}; -use rustc_span::{Span, Symbol, SyntaxContext, DUMMY_SP}; +use rustc_span::{ErrorGuaranteed, Span, Symbol, SyntaxContext, DUMMY_SP}; use std::collections::HashMap; use std::hash::{BuildHasher, Hash}; use thin_vec::ThinVec; @@ -291,11 +300,13 @@ spanless_eq_partial_eq!(u128); spanless_eq_partial_eq!(usize); spanless_eq_partial_eq!(char); spanless_eq_partial_eq!(String); +spanless_eq_partial_eq!(Pu128); spanless_eq_partial_eq!(Symbol); spanless_eq_partial_eq!(CommentKind); spanless_eq_partial_eq!(Delimiter); spanless_eq_partial_eq!(InlineAsmOptions); spanless_eq_partial_eq!(token::LitKind); +spanless_eq_partial_eq!(ErrorGuaranteed); macro_rules! spanless_eq_struct { { @@ -455,17 +466,19 @@ spanless_eq_struct!(AttributesData; attrs tokens); spanless_eq_struct!(BareFnTy; unsafety ext generic_params decl decl_span); spanless_eq_struct!(BindingAnnotation; 0 1); spanless_eq_struct!(Block; stmts id rules span tokens could_be_bare_literal); -spanless_eq_struct!(Closure; binder capture_clause constness asyncness movability fn_decl body !fn_decl_span !fn_arg_span); -spanless_eq_struct!(ConstItem; defaultness ty expr); +spanless_eq_struct!(Closure; binder capture_clause constness coroutine_kind movability fn_decl body !fn_decl_span !fn_arg_span); +spanless_eq_struct!(ConstItem; defaultness generics ty expr); spanless_eq_struct!(Crate; attrs items spans id is_placeholder); +spanless_eq_struct!(Delegation; id qself path body); spanless_eq_struct!(DelimArgs; dspan delim tokens); +spanless_eq_struct!(DelimSpacing; open close); spanless_eq_struct!(EnumDef; variants); spanless_eq_struct!(Expr; id kind span attrs !tokens); spanless_eq_struct!(ExprField; attrs id span ident expr is_shorthand is_placeholder); spanless_eq_struct!(FieldDef; attrs id span vis ident ty is_placeholder); spanless_eq_struct!(Fn; defaultness generics sig body); spanless_eq_struct!(FnDecl; inputs output); -spanless_eq_struct!(FnHeader; constness asyncness unsafety ext); +spanless_eq_struct!(FnHeader; constness coroutine_kind unsafety ext); spanless_eq_struct!(FnSig; header decl span); spanless_eq_struct!(ForeignMod; unsafety abi items); spanless_eq_struct!(FormatArgPosition; index kind span); @@ -483,7 +496,7 @@ spanless_eq_struct!(Label; ident); spanless_eq_struct!(Lifetime; id ident); spanless_eq_struct!(Lit; kind symbol suffix); spanless_eq_struct!(Local; pat ty kind id span attrs !tokens); -spanless_eq_struct!(MacCall; path args prior_type_ascription); +spanless_eq_struct!(MacCall; path args); spanless_eq_struct!(MacCallStmt; mac style attrs tokens); spanless_eq_struct!(MacroDef; body macro_rules); spanless_eq_struct!(MetaItemLit; symbol suffix kind span); @@ -504,6 +517,7 @@ spanless_eq_struct!(StrLit; symbol suffix symbol_unescaped style span); spanless_eq_struct!(StructExpr; qself path fields rest); spanless_eq_struct!(Token; kind span); spanless_eq_struct!(Trait; unsafety is_auto generics bounds items); +spanless_eq_struct!(TraitBoundModifiers; constness asyncness polarity); spanless_eq_struct!(TraitRef; path ref_id); spanless_eq_struct!(Ty; id kind span tokens); spanless_eq_struct!(TyAlias; defaultness generics where_clauses !where_predicates_split bounds ty); @@ -517,23 +531,26 @@ spanless_eq_struct!(WhereEqPredicate; span lhs_ty rhs_ty); spanless_eq_struct!(WhereRegionPredicate; span lifetime bounds); spanless_eq_enum!(AngleBracketedArg; Arg(0) Constraint(0)); spanless_eq_enum!(AssocConstraintKind; Equality(term) Bound(bounds)); -spanless_eq_enum!(AssocItemKind; Const(0) Fn(0) Type(0) MacCall(0)); -spanless_eq_enum!(Async; Yes(span closure_id return_impl_trait_id) No); +spanless_eq_enum!(AssocItemKind; Const(0) Fn(0) Type(0) MacCall(0) Delegation(0)); spanless_eq_enum!(AttrArgs; Empty Delimited(0) Eq(0 1)); spanless_eq_enum!(AttrArgsEq; Ast(0) Hir(0)); spanless_eq_enum!(AttrStyle; Outer Inner); -spanless_eq_enum!(AttrTokenTree; Token(0 1) Delimited(0 1 2) Attributes(0)); +spanless_eq_enum!(AttrTokenTree; Token(0 1) Delimited(0 1 2 3) Attributes(0)); spanless_eq_enum!(BinOpKind; Add Sub Mul Div Rem And Or BitXor BitAnd BitOr Shl Shr Eq Lt Le Ne Ge Gt); spanless_eq_enum!(BlockCheckMode; Default Unsafe(0)); spanless_eq_enum!(BorrowKind; Ref Raw); +spanless_eq_enum!(BoundAsyncness; Normal Async(0)); +spanless_eq_enum!(BoundConstness; Never Always(0) Maybe(0)); +spanless_eq_enum!(BoundPolarity; Positive Negative(0) Maybe(0)); spanless_eq_enum!(ByRef; Yes No); -spanless_eq_enum!(CaptureBy; Value Ref); +spanless_eq_enum!(CaptureBy; Value(move_kw) Ref); spanless_eq_enum!(ClosureBinder; NotPresent For(span generic_params)); spanless_eq_enum!(Const; Yes(0) No); spanless_eq_enum!(Defaultness; Default(0) Final); spanless_eq_enum!(Extern; None Implicit(0) Explicit(0 1)); spanless_eq_enum!(FloatTy; F32 F64); spanless_eq_enum!(FnRetTy; Default(0) Ty(0)); +spanless_eq_enum!(ForLoopKind; For ForAwait); spanless_eq_enum!(ForeignItemKind; Static(0 1 2) Fn(0) TyAlias(0) MacCall(0)); spanless_eq_enum!(FormatAlignment; Left Right Center); spanless_eq_enum!(FormatArgPositionKind; Implicit Number Named); @@ -543,6 +560,7 @@ spanless_eq_enum!(FormatCount; Literal(0) Argument(0)); spanless_eq_enum!(FormatDebugHex; Lower Upper); spanless_eq_enum!(FormatSign; Plus Minus); spanless_eq_enum!(FormatTrait; Display Debug LowerExp UpperExp Octal Pointer Binary LowerHex UpperHex); +spanless_eq_enum!(GenBlockKind; Async Gen AsyncGen); spanless_eq_enum!(GenericArg; Lifetime(0) Type(0) Const(0)); spanless_eq_enum!(GenericArgs; AngleBracketed(0) Parenthesized(0)); spanless_eq_enum!(GenericBound; Trait(0 1) Outlives(0)); @@ -556,49 +574,54 @@ spanless_eq_enum!(IsAuto; Yes No); spanless_eq_enum!(LitFloatType; Suffixed(0) Unsuffixed); spanless_eq_enum!(LitIntType; Signed(0) Unsigned(0) Unsuffixed); spanless_eq_enum!(LocalKind; Decl Init(0) InitElse(0 1)); -spanless_eq_enum!(MacDelimiter; Parenthesis Bracket Brace); spanless_eq_enum!(MacStmtStyle; Semicolon Braces NoBraces); spanless_eq_enum!(ModKind; Loaded(0 1 2) Unloaded); spanless_eq_enum!(Movability; Static Movable); spanless_eq_enum!(Mutability; Mut Not); +spanless_eq_enum!(PatFieldsRest; Rest None); spanless_eq_enum!(RangeEnd; Included(0) Excluded); spanless_eq_enum!(RangeLimits; HalfOpen Closed); spanless_eq_enum!(StmtKind; Local(0) Item(0) Expr(0) Semi(0) Empty MacCall(0)); spanless_eq_enum!(StrStyle; Cooked Raw(0)); spanless_eq_enum!(StructRest; Base(0) Rest(0) None); spanless_eq_enum!(Term; Ty(0) Const(0)); -spanless_eq_enum!(TokenTree; Token(0 1) Delimited(0 1 2)); -spanless_eq_enum!(TraitBoundModifier; None Maybe MaybeConst MaybeConstMaybe); +spanless_eq_enum!(TokenTree; Token(0 1) Delimited(0 1 2 3)); spanless_eq_enum!(TraitObjectSyntax; Dyn DynStar None); spanless_eq_enum!(UintTy; Usize U8 U16 U32 U64 U128); spanless_eq_enum!(UnOp; Deref Not Neg); spanless_eq_enum!(Unsafe; Yes(0) No); spanless_eq_enum!(UnsafeSource; CompilerGenerated UserProvided); spanless_eq_enum!(UseTreeKind; Simple(0) Nested(0) Glob); -spanless_eq_enum!(VariantData; Struct(0 1) Tuple(0 1) Unit(0)); +spanless_eq_enum!(VariantData; Struct(fields recovered) Tuple(0 1) Unit(0)); spanless_eq_enum!(VisibilityKind; Public Restricted(path id shorthand) Inherited); spanless_eq_enum!(WherePredicate; BoundPredicate(0) RegionPredicate(0) EqPredicate(0)); +spanless_eq_enum!(CoroutineKind; Async(span closure_id return_impl_trait_id) + Gen(span closure_id return_impl_trait_id) + AsyncGen(span closure_id return_impl_trait_id)); spanless_eq_enum!(ExprKind; Array(0) ConstBlock(0) Call(0 1) MethodCall(0) - Tup(0) Binary(0 1 2) Unary(0 1) Lit(0) Cast(0 1) Type(0 1) Let(0 1 2) - If(0 1 2) While(0 1 2) ForLoop(0 1 2 3) Loop(0 1 2) Match(0 1) Closure(0) - Block(0 1) Async(0 1) Await(0) TryBlock(0) Assign(0 1 2) AssignOp(0 1 2) - Field(0 1) Index(0 1) Underscore Range(0 1 2) Path(0 1) AddrOf(0 1 2) - Break(0 1) Continue(0) Ret(0) InlineAsm(0) MacCall(0) Struct(0) Repeat(0 1) - Paren(0) Try(0) Yield(0) Yeet(0) IncludedBytes(0) FormatArgs(0) Err); + Tup(0) Binary(0 1 2) Unary(0 1) Lit(0) Cast(0 1) Type(0 1) Let(0 1 2 3) + If(0 1 2) While(0 1 2) ForLoop(pat iter body label kind) Loop(0 1 2) + Match(0 1) Closure(0) Block(0 1) Gen(0 1 2) Await(0 1) TryBlock(0) + Assign(0 1 2) AssignOp(0 1 2) Field(0 1) Index(0 1 2) Underscore + Range(0 1 2) Path(0 1) AddrOf(0 1 2) Break(0 1) Continue(0) Ret(0) + InlineAsm(0) OffsetOf(0 1) MacCall(0) Struct(0) Repeat(0 1) Paren(0) Try(0) + Yield(0) Yeet(0) Become(0) IncludedBytes(0) FormatArgs(0) Err); spanless_eq_enum!(InlineAsmOperand; In(reg expr) Out(reg late expr) InOut(reg late expr) SplitInOut(reg late in_expr out_expr) Const(anon_const) Sym(sym)); spanless_eq_enum!(ItemKind; ExternCrate(0) Use(0) Static(0) Const(0) Fn(0) Mod(0 1) ForeignMod(0) GlobalAsm(0) TyAlias(0) Enum(0 1) Struct(0 1) - Union(0 1) Trait(0) TraitAlias(0 1) Impl(0) MacCall(0) MacroDef(0)); -spanless_eq_enum!(LitKind; Str(0 1) ByteStr(0 1) Byte(0) Char(0) Int(0 1) - Float(0 1) Bool(0) Err); + Union(0 1) Trait(0) TraitAlias(0 1) Impl(0) MacCall(0) MacroDef(0) + Delegation(0)); +spanless_eq_enum!(LitKind; Str(0 1) ByteStr(0 1) CStr(0 1) Byte(0) Char(0) + Int(0 1) Float(0 1) Bool(0) Err(0)); spanless_eq_enum!(PatKind; Wild Ident(0 1 2) Struct(0 1 2 3) TupleStruct(0 1 2) Or(0) Path(0 1) Tuple(0) Box(0) Ref(0 1) Lit(0) Range(0 1 2) Slice(0) Rest - Paren(0) MacCall(0)); + Never Paren(0) MacCall(0) Err(0)); spanless_eq_enum!(TyKind; Slice(0) Array(0 1) Ptr(0) Ref(0 1) BareFn(0) Never - Tup(0) Path(0 1) TraitObject(0 1) ImplTrait(0 1) Paren(0) Typeof(0) Infer - ImplicitSelf MacCall(0) Err CVarArgs); + Tup(0) AnonStruct(0 1) AnonUnion(0 1) Path(0 1) TraitObject(0 1) + ImplTrait(0 1) Paren(0) Typeof(0) Infer ImplicitSelf MacCall(0) CVarArgs + Dummy Err(0)); impl SpanlessEq for Ident { fn eq(&self, other: &Self) -> bool { @@ -634,8 +657,8 @@ impl SpanlessEq for Param { } = other; SpanlessEq::eq(id, id2) && SpanlessEq::eq(is_placeholder, is_placeholder2) - && (matches!(ty.kind, TyKind::Err) - || matches!(ty2.kind, TyKind::Err) + && (matches!(ty.kind, TyKind::Err(_)) + || matches!(ty2.kind, TyKind::Err(_)) || SpanlessEq::eq(attrs, attrs2) && SpanlessEq::eq(ty, ty2) && SpanlessEq::eq(pat, pat2)) @@ -651,12 +674,15 @@ impl SpanlessEq for TokenKind { _ => false, }, (TokenKind::Interpolated(this), TokenKind::Interpolated(other)) => { - match (this.as_ref(), other.as_ref()) { - (Nonterminal::NtExpr(this), Nonterminal::NtExpr(other)) => { - SpanlessEq::eq(this, other) + let (this, this_span) = this.as_ref(); + let (other, other_span) = other.as_ref(); + SpanlessEq::eq(this_span, other_span) + && match (this, other) { + (Nonterminal::NtExpr(this), Nonterminal::NtExpr(other)) => { + SpanlessEq::eq(this, other) + } + _ => this == other, } - _ => this == other, - } } _ => self == other, } @@ -721,14 +747,14 @@ fn doc_comment<'a>( } } let stream = match trees.next() { - Some(TokenTree::Delimited(_span, Delimiter::Bracket, stream)) => stream, + Some(TokenTree::Delimited(_span, _spacing, Delimiter::Bracket, stream)) => stream, _ => return false, }; let mut trees = stream.trees(); match trees.next() { Some(TokenTree::Token( Token { - kind: TokenKind::Ident(symbol, false), + kind: TokenKind::Ident(symbol, IdentIsRaw::No), span: _, }, _spacing, @@ -765,7 +791,7 @@ fn is_escaped_literal_token(token: &Token, unescaped: Symbol) -> bool { Token { kind: TokenKind::Interpolated(nonterminal), span: _, - } => match nonterminal.as_ref() { + } => match &nonterminal.0 { Nonterminal::NtExpr(expr) => match &expr.kind { ExprKind::Lit(lit) => is_escaped_lit(lit, unescaped), _ => false, diff --git a/vendor/syn/tests/common/parse.rs b/vendor/syn/tests/common/parse.rs index 73be101..dce0066 100644 --- a/vendor/syn/tests/common/parse.rs +++ b/vendor/syn/tests/common/parse.rs @@ -25,7 +25,7 @@ pub fn librustc_expr(input: &str) -> Option<P<ast::Expr>> { .parse_expr(); match e { Ok(expr) => Some(expr), - Err(mut diagnostic) => { + Err(diagnostic) => { diagnostic.emit(); None } diff --git a/vendor/syn/tests/macros/mod.rs b/vendor/syn/tests/macros/mod.rs index 5ca88b0..1c1bacf 100644 --- a/vendor/syn/tests/macros/mod.rs +++ b/vendor/syn/tests/macros/mod.rs @@ -3,7 +3,8 @@ #[path = "../debug/mod.rs"] pub mod debug; -use syn::parse::{Parse, Result}; +use std::str::FromStr; +use syn::parse::Result; macro_rules! errorf { ($($tt:tt)*) => {{ @@ -35,17 +36,25 @@ macro_rules! snapshot { macro_rules! snapshot_impl { (($expr:ident) as $t:ty, @$snapshot:literal) => { - let $expr = crate::macros::Tokens::parse::<$t>($expr).unwrap(); + let tokens = crate::macros::TryIntoTokens::try_into_tokens($expr).unwrap(); + let $expr: $t = syn::parse_quote!(#tokens); let debug = crate::macros::debug::Lite(&$expr); if !cfg!(miri) { - insta::assert_debug_snapshot!(debug, @$snapshot); + #[allow(clippy::needless_raw_string_hashes)] // https://github.com/mitsuhiko/insta/issues/389 + { + insta::assert_debug_snapshot!(debug, @$snapshot); + } } }; (($($expr:tt)*) as $t:ty, @$snapshot:literal) => {{ - let syntax_tree = crate::macros::Tokens::parse::<$t>($($expr)*).unwrap(); + let tokens = crate::macros::TryIntoTokens::try_into_tokens($($expr)*).unwrap(); + let syntax_tree: $t = syn::parse_quote!(#tokens); let debug = crate::macros::debug::Lite(&syntax_tree); if !cfg!(miri) { - insta::assert_debug_snapshot!(debug, @$snapshot); + #[allow(clippy::needless_raw_string_hashes)] + { + insta::assert_debug_snapshot!(debug, @$snapshot); + } } syntax_tree }}; @@ -53,7 +62,10 @@ macro_rules! snapshot_impl { let syntax_tree = $($expr)*; let debug = crate::macros::debug::Lite(&syntax_tree); if !cfg!(miri) { - insta::assert_debug_snapshot!(debug, @$snapshot); + #[allow(clippy::needless_raw_string_hashes)] + { + insta::assert_debug_snapshot!(debug, @$snapshot); + } } syntax_tree }}; @@ -62,18 +74,20 @@ macro_rules! snapshot_impl { }; } -pub trait Tokens { - fn parse<T: Parse>(self) -> Result<T>; +pub trait TryIntoTokens { + #[allow(dead_code)] + fn try_into_tokens(self) -> Result<proc_macro2::TokenStream>; } -impl<'a> Tokens for &'a str { - fn parse<T: Parse>(self) -> Result<T> { - syn::parse_str(self) +impl<'a> TryIntoTokens for &'a str { + fn try_into_tokens(self) -> Result<proc_macro2::TokenStream> { + let tokens = proc_macro2::TokenStream::from_str(self)?; + Ok(tokens) } } -impl Tokens for proc_macro2::TokenStream { - fn parse<T: Parse>(self) -> Result<T> { - syn::parse2(self) +impl TryIntoTokens for proc_macro2::TokenStream { + fn try_into_tokens(self) -> Result<proc_macro2::TokenStream> { + Ok(self) } } diff --git a/vendor/syn/tests/repo/mod.rs b/vendor/syn/tests/repo/mod.rs index cec42a6..6aedd2c 100644 --- a/vendor/syn/tests/repo/mod.rs +++ b/vendor/syn/tests/repo/mod.rs @@ -13,20 +13,50 @@ use std::path::{Path, PathBuf}; use tar::Archive; use walkdir::{DirEntry, WalkDir}; -const REVISION: &str = "5e1d3299a290026b85787bc9c7e72bcc53ac283f"; +const REVISION: &str = "b10cfcd65fd7f7b1ab9beb34798b2108de003452"; #[rustfmt::skip] static EXCLUDE_FILES: &[&str] = &[ + // TODO: CStr literals: c"…", cr"…" + // https://github.com/dtolnay/syn/issues/1502 + "src/tools/clippy/tests/ui/needless_raw_string.rs", + "src/tools/clippy/tests/ui/needless_raw_string_hashes.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0085_expr_literals.rs", + + // TODO: explicit tail calls: `become _g()` + // https://github.com/dtolnay/syn/issues/1501 + "tests/ui/explicit-tail-calls/return-lifetime-sub.rs", + // TODO: non-lifetime binders: `where for<'a, T> &'a Struct<T>: Trait` // https://github.com/dtolnay/syn/issues/1435 + "src/tools/rustfmt/tests/source/issue_5721.rs", + "src/tools/rustfmt/tests/source/non-lifetime-binders.rs", + "src/tools/rustfmt/tests/target/issue_5721.rs", + "src/tools/rustfmt/tests/target/non-lifetime-binders.rs", "tests/rustdoc-json/non_lifetime_binders.rs", + "tests/rustdoc/inline_cross/auxiliary/non_lifetime_binders.rs", "tests/rustdoc/non_lifetime_binders.rs", // TODO: return type notation: `where T: Trait<method(): Send>` // https://github.com/dtolnay/syn/issues/1434 + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0208_associated_return_type_bounds.rs", "tests/ui/associated-type-bounds/return-type-notation/basic.rs", "tests/ui/feature-gates/feature-gate-return_type_notation.rs", + // TODO: lazy type alias syntax with where-clause in trailing position + // https://github.com/dtolnay/syn/issues/1525 + "tests/rustdoc/typedef-inner-variants-lazy_type_alias.rs", + + // TODO: gen blocks and functions + // https://github.com/dtolnay/syn/issues/1526 + "compiler/rustc_codegen_cranelift/example/gen_block_iterate.rs", + "tests/ui/coroutine/gen_block_is_iter.rs", + "tests/ui/coroutine/gen_block_iterate.rs", + + // TODO: struct literal in match guard + // https://github.com/dtolnay/syn/issues/1527 + "tests/ui/parser/struct-literal-in-match-guard.rs", + // Compile-fail expr parameter in const generic position: f::<1 + 2>() "tests/ui/const-generics/early/closing-args-token.rs", "tests/ui/const-generics/early/const-expression-parameter.rs", @@ -37,11 +67,14 @@ static EXCLUDE_FILES: &[&str] = &[ // Need at least one trait in impl Trait, no such type as impl 'static "tests/ui/type-alias-impl-trait/generic_type_does_not_live_long_enough.rs", + // Negative polarity trait bound: `where T: !Copy` + "src/tools/rustfmt/tests/target/negative-bounds.rs", + // Lifetime bound inside for<>: `T: ~const ?for<'a: 'b> Trait<'a>` - "tests/ui/rfc-2632-const-trait-impl/tilde-const-syntax.rs", + "tests/ui/rfcs/rfc-2632-const-trait-impl/tilde-const-syntax.rs", // Const impl that is not a trait impl: `impl ~const T {}` - "tests/ui/rfc-2632-const-trait-impl/syntax.rs", + "tests/ui/rfcs/rfc-2632-const-trait-impl/syntax.rs", // Deprecated anonymous parameter syntax in traits "src/tools/rustfmt/tests/source/trait.rs", @@ -63,6 +96,7 @@ static EXCLUDE_FILES: &[&str] = &[ "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0004_value_parameters_no_patterns.rs", "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0104_path_fn_trait_args.rs", "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0202_typepathfn_with_coloncolon.rs", + "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0209_bare_dyn_types_with_paren_as_generic_args.rs", "src/tools/rustfmt/tests/source/attrib.rs", "src/tools/rustfmt/tests/source/closure.rs", "src/tools/rustfmt/tests/source/existential_type.rs", @@ -85,8 +119,8 @@ static EXCLUDE_FILES: &[&str] = &[ "tests/codegen-units/item-collection/non-generic-closures.rs", "tests/debuginfo/recursive-enum.rs", "tests/pretty/closure-reform-pretty.rs", - "tests/run-make-fulldeps/reproducible-build-2/reproducible-build.rs", - "tests/run-make-fulldeps/reproducible-build/reproducible-build.rs", + "tests/run-make/reproducible-build-2/reproducible-build.rs", + "tests/run-make/reproducible-build/reproducible-build.rs", "tests/ui/auxiliary/typeid-intrinsic-aux1.rs", "tests/ui/auxiliary/typeid-intrinsic-aux2.rs", "tests/ui/impl-trait/generic-with-implicit-hrtb-without-dyn.rs", @@ -95,19 +129,11 @@ static EXCLUDE_FILES: &[&str] = &[ "tests/ui/lifetimes/bare-trait-object.rs", "tests/ui/parser/bounds-obj-parens.rs", - // Old type ascription expression syntax - "src/tools/rustfmt/tests/source/type-ascription.rs", - "src/tools/rustfmt/tests/target/type-ascription.rs", - - // Obsolete box syntax - "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0132_box_expr.rs", - // Invalid unparenthesized range pattern inside slice pattern: `[1..]` "tests/ui/consts/miri_unleashed/const_refers_to_static_cross_crate.rs", // Various extensions to Rust syntax made up by rust-analyzer "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0012_type_item_where_clause.rs", - "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0040_crate_keyword_vis.rs", "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0058_range_pat.rs", "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0123_param_list_vararg.rs", "src/tools/rust-analyzer/crates/parser/test_data/parser/inline/ok/0131_existential_type.rs", @@ -280,21 +306,38 @@ pub fn clone_rust() { if needs_clone { download_and_unpack().unwrap(); } + let mut missing = String::new(); let test_src = Path::new("tests/rust"); + + let mut exclude_files_set = BTreeSet::new(); for exclude in EXCLUDE_FILES { + if !exclude_files_set.insert(exclude) { + panic!("duplicate path in EXCLUDE_FILES: {}", exclude); + } + for dir in EXCLUDE_DIRS { + if Path::new(exclude).starts_with(dir) { + panic!("excluded file {} is inside an excluded dir", exclude); + } + } if !test_src.join(exclude).is_file() { missing += "\ntests/rust/"; missing += exclude; } } + + let mut exclude_dirs_set = BTreeSet::new(); for exclude in EXCLUDE_DIRS { + if !exclude_dirs_set.insert(exclude) { + panic!("duplicate path in EXCLUDE_DIRS: {}", exclude); + } if !test_src.join(exclude).is_dir() { missing += "\ntests/rust/"; missing += exclude; missing += "/"; } } + if !missing.is_empty() { panic!("excluded test file does not exist:{}\n", missing); } diff --git a/vendor/syn/tests/test_asyncness.rs b/vendor/syn/tests/test_asyncness.rs index 6bc5a16..9968934 100644 --- a/vendor/syn/tests/test_asyncness.rs +++ b/vendor/syn/tests/test_asyncness.rs @@ -18,7 +18,9 @@ fn test_async_fn() { generics: Generics, output: ReturnType::Default, }, - block: Block, + block: Block { + stmts: [], + }, } "###); } @@ -32,7 +34,9 @@ fn test_async_closure() { asyncness: Some, output: ReturnType::Default, body: Expr::Block { - block: Block, + block: Block { + stmts: [], + }, }, } "###); diff --git a/vendor/syn/tests/test_derive_input.rs b/vendor/syn/tests/test_derive_input.rs index 3ec6aec..c3d31eb 100644 --- a/vendor/syn/tests/test_derive_input.rs +++ b/vendor/syn/tests/test_derive_input.rs @@ -78,6 +78,7 @@ fn test_struct() { }, }, }, + Token![,], Field { vis: Visibility::Public, ident: Some("attrs"), @@ -157,6 +158,7 @@ fn test_union() { colon_token: Some, ty: Type::Tuple, }, + Token![,], Field { vis: Visibility::Inherited, ident: Some("value"), @@ -232,6 +234,7 @@ fn test_enum() { GenericParam::Type(TypeParam { ident: "T", }), + Token![,], GenericParam::Type(TypeParam { ident: "E", }), @@ -259,6 +262,7 @@ fn test_enum() { ], }, }, + Token![,], Variant { ident: "Err", fields: Fields::Unnamed { @@ -278,6 +282,7 @@ fn test_enum() { ], }, }, + Token![,], Variant { ident: "Surprise", fields: Fields::Unit, @@ -285,6 +290,7 @@ fn test_enum() { lit: 0isize, }), }, + Token![,], Variant { ident: "ProcMacroHack", fields: Fields::Unit, @@ -294,6 +300,7 @@ fn test_enum() { Expr::Lit { lit: 0, }, + Token![,], Expr::Lit { lit: "data", }, @@ -363,6 +370,7 @@ fn test_attr_with_mod_style_path_with_self() { PathSegment { ident: "foo", }, + Token![::], PathSegment { ident: "self", }, @@ -386,6 +394,7 @@ fn test_attr_with_mod_style_path_with_self() { PathSegment { ident: "foo", }, + Token![::], PathSegment { ident: "self", }, @@ -426,6 +435,7 @@ fn test_pub_restricted() { PathSegment { ident: "m", }, + Token![::], PathSegment { ident: "n", }, @@ -589,6 +599,7 @@ fn test_fields_on_named_struct() { }, }, }, + Token![,], Field { vis: Visibility::Public, ident: Some("bar"), @@ -603,6 +614,7 @@ fn test_fields_on_named_struct() { }, }, }, + Token![,], ], }, }, @@ -674,6 +686,7 @@ fn test_fields_on_tuple_struct() { }, }, }, + Token![,], Field { vis: Visibility::Public, ty: Type::Path { @@ -751,6 +764,7 @@ fn test_ambiguous_crate() { PathSegment { ident: "crate", }, + Token![::], PathSegment { ident: "X", }, diff --git a/vendor/syn/tests/test_expr.rs b/vendor/syn/tests/test_expr.rs index c7230c6..daf0d63 100644 --- a/vendor/syn/tests/test_expr.rs +++ b/vendor/syn/tests/test_expr.rs @@ -1,11 +1,12 @@ -#![allow(clippy::uninlined_format_args)] +#![allow(clippy::single_element_loop, clippy::uninlined_format_args)] #[macro_use] mod macros; -use proc_macro2::{Delimiter, Group, Ident, Punct, Spacing, Span, TokenStream, TokenTree}; -use quote::quote; -use syn::{Expr, ExprRange}; +use proc_macro2::{Delimiter, Group}; +use quote::{quote, ToTokens as _}; +use syn::punctuated::Punctuated; +use syn::{parse_quote, token, Expr, ExprRange, ExprTuple, Stmt, Token}; #[test] fn test_expr_parse() { @@ -85,7 +86,7 @@ fn test_tuple_multi_index() { assert_eq!(expected, syn::parse_str(input).unwrap()); } - for tokens in vec![ + for tokens in [ quote!(tuple.0.0), quote!(tuple .0.0), quote!(tuple. 0.0), @@ -100,10 +101,8 @@ fn test_tuple_multi_index() { #[test] fn test_macro_variable_func() { // mimics the token stream corresponding to `$fn()` - let tokens = TokenStream::from_iter(vec![ - TokenTree::Group(Group::new(Delimiter::None, quote! { f })), - TokenTree::Group(Group::new(Delimiter::Parenthesis, TokenStream::new())), - ]); + let path = Group::new(Delimiter::None, quote!(f)); + let tokens = quote!(#path()); snapshot!(tokens as Expr, @r###" Expr::Call { @@ -121,12 +120,8 @@ fn test_macro_variable_func() { } "###); - let tokens = TokenStream::from_iter(vec![ - TokenTree::Punct(Punct::new('#', Spacing::Alone)), - TokenTree::Group(Group::new(Delimiter::Bracket, quote! { outside })), - TokenTree::Group(Group::new(Delimiter::None, quote! { #[inside] f })), - TokenTree::Group(Group::new(Delimiter::Parenthesis, TokenStream::new())), - ]); + let path = Group::new(Delimiter::None, quote! { #[inside] f }); + let tokens = quote!(#[outside] #path()); snapshot!(tokens as Expr, @r###" Expr::Call { @@ -172,11 +167,8 @@ fn test_macro_variable_func() { #[test] fn test_macro_variable_macro() { // mimics the token stream corresponding to `$macro!()` - let tokens = TokenStream::from_iter(vec![ - TokenTree::Group(Group::new(Delimiter::None, quote! { m })), - TokenTree::Punct(Punct::new('!', Spacing::Alone)), - TokenTree::Group(Group::new(Delimiter::Parenthesis, TokenStream::new())), - ]); + let mac = Group::new(Delimiter::None, quote!(m)); + let tokens = quote!(#mac!()); snapshot!(tokens as Expr, @r###" Expr::Macro { @@ -198,10 +190,8 @@ fn test_macro_variable_macro() { #[test] fn test_macro_variable_struct() { // mimics the token stream corresponding to `$struct {}` - let tokens = TokenStream::from_iter(vec![ - TokenTree::Group(Group::new(Delimiter::None, quote! { S })), - TokenTree::Group(Group::new(Delimiter::Brace, TokenStream::new())), - ]); + let s = Group::new(Delimiter::None, quote! { S }); + let tokens = quote!(#s {}); snapshot!(tokens as Expr, @r###" Expr::Struct { @@ -216,23 +206,36 @@ fn test_macro_variable_struct() { "###); } +#[test] +fn test_macro_variable_unary() { + // mimics the token stream corresponding to `$expr.method()` where expr is `&self` + let inner = Group::new(Delimiter::None, quote!(&self)); + let tokens = quote!(#inner.method()); + snapshot!(tokens as Expr, @r###" + Expr::MethodCall { + receiver: Expr::Group { + expr: Expr::Reference { + expr: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "self", + }, + ], + }, + }, + }, + }, + method: "method", + } + "###); +} + #[test] fn test_macro_variable_match_arm() { // mimics the token stream corresponding to `match v { _ => $expr }` - let tokens = TokenStream::from_iter(vec![ - TokenTree::Ident(Ident::new("match", Span::call_site())), - TokenTree::Ident(Ident::new("v", Span::call_site())), - TokenTree::Group(Group::new( - Delimiter::Brace, - TokenStream::from_iter(vec![ - TokenTree::Punct(Punct::new('_', Spacing::Alone)), - TokenTree::Punct(Punct::new('=', Spacing::Joint)), - TokenTree::Punct(Punct::new('>', Spacing::Alone)), - TokenTree::Group(Group::new(Delimiter::None, quote! { #[a] () })), - ]), - )), - ]); - + let expr = Group::new(Delimiter::None, quote! { #[a] () }); + let tokens = quote!(match v { _ => #expr }); snapshot!(tokens as Expr, @r###" Expr::Match { expr: Expr::Path { @@ -267,6 +270,40 @@ fn test_macro_variable_match_arm() { ], } "###); + + let expr = Group::new(Delimiter::None, quote!(loop {} + 1)); + let tokens = quote!(match v { _ => #expr }); + snapshot!(tokens as Expr, @r###" + Expr::Match { + expr: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "v", + }, + ], + }, + }, + arms: [ + Arm { + pat: Pat::Wild, + body: Expr::Group { + expr: Expr::Binary { + left: Expr::Loop { + body: Block { + stmts: [], + }, + }, + op: BinOp::Add, + right: Expr::Lit { + lit: 1, + }, + }, + }, + }, + ], + } + "###); } // https://github.com/dtolnay/syn/issues/1019 @@ -310,3 +347,194 @@ fn test_ranges() { syn::parse_str::<Expr>("lo...").unwrap_err(); syn::parse_str::<Expr>("lo...hi").unwrap_err(); } + +#[test] +fn test_ambiguous_label() { + for stmt in [ + quote! { + return 'label: loop { break 'label 42; }; + }, + quote! { + break ('label: loop { break 'label 42; }); + }, + quote! { + break 1 + 'label: loop { break 'label 42; }; + }, + quote! { + break 'outer 'inner: loop { break 'inner 42; }; + }, + ] { + syn::parse2::<Stmt>(stmt).unwrap(); + } + + for stmt in [ + // Parentheses required. See https://github.com/rust-lang/rust/pull/87026. + quote! { + break 'label: loop { break 'label 42; }; + }, + ] { + syn::parse2::<Stmt>(stmt).unwrap_err(); + } +} + +#[test] +fn test_extended_interpolated_path() { + let path = Group::new(Delimiter::None, quote!(a::b)); + + let tokens = quote!(if #path {}); + snapshot!(tokens as Expr, @r###" + Expr::If { + cond: Expr::Group { + expr: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "a", + }, + Token![::], + PathSegment { + ident: "b", + }, + ], + }, + }, + }, + then_branch: Block { + stmts: [], + }, + } + "###); + + let tokens = quote!(#path {}); + snapshot!(tokens as Expr, @r###" + Expr::Struct { + path: Path { + segments: [ + PathSegment { + ident: "a", + }, + Token![::], + PathSegment { + ident: "b", + }, + ], + }, + } + "###); + + let tokens = quote!(#path :: c); + snapshot!(tokens as Expr, @r###" + Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "a", + }, + Token![::], + PathSegment { + ident: "b", + }, + Token![::], + PathSegment { + ident: "c", + }, + ], + }, + } + "###); + + let nested = Group::new(Delimiter::None, quote!(a::b || true)); + let tokens = quote!(if #nested && false {}); + snapshot!(tokens as Expr, @r###" + Expr::If { + cond: Expr::Binary { + left: Expr::Group { + expr: Expr::Binary { + left: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "a", + }, + Token![::], + PathSegment { + ident: "b", + }, + ], + }, + }, + op: BinOp::Or, + right: Expr::Lit { + lit: Lit::Bool { + value: true, + }, + }, + }, + }, + op: BinOp::And, + right: Expr::Lit { + lit: Lit::Bool { + value: false, + }, + }, + }, + then_branch: Block { + stmts: [], + }, + } + "###); +} + +#[test] +fn test_tuple_comma() { + let mut expr = ExprTuple { + attrs: Vec::new(), + paren_token: token::Paren::default(), + elems: Punctuated::new(), + }; + snapshot!(expr.to_token_stream() as Expr, @"Expr::Tuple"); + + expr.elems.push_value(parse_quote!(continue)); + // Must not parse to Expr::Paren + snapshot!(expr.to_token_stream() as Expr, @r###" + Expr::Tuple { + elems: [ + Expr::Continue, + Token![,], + ], + } + "###); + + expr.elems.push_punct(<Token![,]>::default()); + snapshot!(expr.to_token_stream() as Expr, @r###" + Expr::Tuple { + elems: [ + Expr::Continue, + Token![,], + ], + } + "###); + + expr.elems.push_value(parse_quote!(continue)); + snapshot!(expr.to_token_stream() as Expr, @r###" + Expr::Tuple { + elems: [ + Expr::Continue, + Token![,], + Expr::Continue, + ], + } + "###); + + expr.elems.push_punct(<Token![,]>::default()); + snapshot!(expr.to_token_stream() as Expr, @r###" + Expr::Tuple { + elems: [ + Expr::Continue, + Token![,], + Expr::Continue, + Token![,], + ], + } + "###); +} diff --git a/vendor/syn/tests/test_generics.rs b/vendor/syn/tests/test_generics.rs index 51119ad..3faf0db 100644 --- a/vendor/syn/tests/test_generics.rs +++ b/vendor/syn/tests/test_generics.rs @@ -28,6 +28,7 @@ fn test_split_for_impl() { ident: "a", }, }), + Token![,], GenericParam::Lifetime(LifetimeParam { lifetime: Lifetime { ident: "b", @@ -39,6 +40,7 @@ fn test_split_for_impl() { }, ], }), + Token![,], GenericParam::Type(TypeParam { attrs: [ Attribute { @@ -227,6 +229,7 @@ fn test_fn_precedence_in_where_clause() { ], }, }), + Token![+], TypeParamBound::Trait(TraitBound { path: Path { segments: [ @@ -238,12 +241,15 @@ fn test_fn_precedence_in_where_clause() { }), ], }), + Token![,], ], }), }, output: ReturnType::Default, }, - block: Block, + block: Block { + stmts: [], + }, } "###); diff --git a/vendor/syn/tests/test_item.rs b/vendor/syn/tests/test_item.rs index 9b0e1c9..db9e3ab 100644 --- a/vendor/syn/tests/test_item.rs +++ b/vendor/syn/tests/test_item.rs @@ -38,7 +38,9 @@ fn test_macro_variable_attr() { generics: Generics, output: ReturnType::Default, }, - block: Block, + block: Block { + stmts: [], + }, } "###); } @@ -228,6 +230,7 @@ fn test_supertraits() { ], }, }), + Token![+], ], } "###); @@ -316,11 +319,14 @@ fn test_impl_trait_trailing_plus() { ], }, }), + Token![+], ], }, ), }, - block: Block, + block: Block { + stmts: [], + }, } "###); } diff --git a/vendor/syn/tests/test_lit.rs b/vendor/syn/tests/test_lit.rs index 82d2290..bc50136 100644 --- a/vendor/syn/tests/test_lit.rs +++ b/vendor/syn/tests/test_lit.rs @@ -1,6 +1,7 @@ #![allow( clippy::float_cmp, clippy::non_ascii_literal, + clippy::single_match_else, clippy::uninlined_format_args )] @@ -13,14 +14,13 @@ use std::str::FromStr; use syn::{Lit, LitFloat, LitInt, LitStr}; fn lit(s: &str) -> Lit { - match TokenStream::from_str(s) - .unwrap() - .into_iter() - .next() - .unwrap() - { - TokenTree::Literal(lit) => Lit::new(lit), - _ => panic!(), + let mut tokens = TokenStream::from_str(s).unwrap().into_iter(); + match tokens.next().unwrap() { + TokenTree::Literal(lit) => { + assert!(tokens.next().is_none()); + Lit::new(lit) + } + wrong => panic!("{:?}", wrong), } } diff --git a/vendor/syn/tests/test_meta.rs b/vendor/syn/tests/test_meta.rs index 91a9807..d991c38 100644 --- a/vendor/syn/tests/test_meta.rs +++ b/vendor/syn/tests/test_meta.rs @@ -144,6 +144,7 @@ fn test_parse_path() { PathSegment { ident: "serde", }, + Token![::], PathSegment { ident: "Serialize", }, diff --git a/vendor/syn/tests/test_parse_buffer.rs b/vendor/syn/tests/test_parse_buffer.rs index f2ca59c..2205b50 100644 --- a/vendor/syn/tests/test_parse_buffer.rs +++ b/vendor/syn/tests/test_parse_buffer.rs @@ -1,7 +1,8 @@ #![allow(clippy::non_ascii_literal)] use proc_macro2::{Delimiter, Group, Punct, Spacing, TokenStream, TokenTree}; -use syn::parse::{discouraged::Speculative, Parse, ParseStream, Parser, Result}; +use syn::parse::discouraged::Speculative as _; +use syn::parse::{Parse, ParseStream, Parser, Result}; use syn::{parenthesized, Token}; #[test] diff --git a/vendor/syn/tests/test_parse_quote.rs b/vendor/syn/tests/test_parse_quote.rs new file mode 100644 index 0000000..73aae70 --- /dev/null +++ b/vendor/syn/tests/test_parse_quote.rs @@ -0,0 +1,164 @@ +#[macro_use] +mod macros; + +use syn::punctuated::Punctuated; +use syn::{parse_quote, Attribute, Field, Lit, Pat, Stmt, Token}; + +#[test] +fn test_attribute() { + let attr: Attribute = parse_quote!(#[test]); + snapshot!(attr, @r###" + Attribute { + style: AttrStyle::Outer, + meta: Meta::Path { + segments: [ + PathSegment { + ident: "test", + }, + ], + }, + } + "###); + + let attr: Attribute = parse_quote!(#![no_std]); + snapshot!(attr, @r###" + Attribute { + style: AttrStyle::Inner, + meta: Meta::Path { + segments: [ + PathSegment { + ident: "no_std", + }, + ], + }, + } + "###); +} + +#[test] +fn test_field() { + let field: Field = parse_quote!(pub enabled: bool); + snapshot!(field, @r###" + Field { + vis: Visibility::Public, + ident: Some("enabled"), + colon_token: Some, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "bool", + }, + ], + }, + }, + } + "###); + + let field: Field = parse_quote!(primitive::bool); + snapshot!(field, @r###" + Field { + vis: Visibility::Inherited, + ty: Type::Path { + path: Path { + segments: [ + PathSegment { + ident: "primitive", + }, + Token![::], + PathSegment { + ident: "bool", + }, + ], + }, + }, + } + "###); +} + +#[test] +fn test_pat() { + let pat: Pat = parse_quote!(Some(false) | None); + snapshot!(&pat, @r###" + Pat::Or { + cases: [ + Pat::TupleStruct { + path: Path { + segments: [ + PathSegment { + ident: "Some", + }, + ], + }, + elems: [ + Pat::Lit(ExprLit { + lit: Lit::Bool { + value: false, + }, + }), + ], + }, + Token![|], + Pat::Ident { + ident: "None", + }, + ], + } + "###); + + let boxed_pat: Box<Pat> = parse_quote!(Some(false) | None); + assert_eq!(*boxed_pat, pat); +} + +#[test] +fn test_punctuated() { + let punctuated: Punctuated<Lit, Token![|]> = parse_quote!(true | true); + snapshot!(punctuated, @r###" + [ + Lit::Bool { + value: true, + }, + Token![|], + Lit::Bool { + value: true, + }, + ] + "###); + + let punctuated: Punctuated<Lit, Token![|]> = parse_quote!(true | true |); + snapshot!(punctuated, @r###" + [ + Lit::Bool { + value: true, + }, + Token![|], + Lit::Bool { + value: true, + }, + Token![|], + ] + "###); +} + +#[test] +fn test_vec_stmt() { + let stmts: Vec<Stmt> = parse_quote! { + let _; + true + }; + snapshot!(stmts, @r###" + [ + Stmt::Local { + pat: Pat::Wild, + }, + Stmt::Expr( + Expr::Lit { + lit: Lit::Bool { + value: true, + }, + }, + None, + ), + ] + "###); +} diff --git a/vendor/syn/tests/test_parse_stream.rs b/vendor/syn/tests/test_parse_stream.rs index 2265dfe..6e4a5a5 100644 --- a/vendor/syn/tests/test_parse_stream.rs +++ b/vendor/syn/tests/test_parse_stream.rs @@ -1,6 +1,6 @@ #![allow(clippy::let_underscore_untyped)] -use syn::ext::IdentExt; +use syn::ext::IdentExt as _; use syn::parse::ParseStream; use syn::{Ident, Token}; diff --git a/vendor/syn/tests/test_pat.rs b/vendor/syn/tests/test_pat.rs index cab7aa7..7b5f8b0 100644 --- a/vendor/syn/tests/test_pat.rs +++ b/vendor/syn/tests/test_pat.rs @@ -4,9 +4,10 @@ mod macros; use proc_macro2::{Delimiter, Group, TokenStream, TokenTree}; -use quote::quote; +use quote::{quote, ToTokens as _}; use syn::parse::Parser; -use syn::{Item, Pat, Stmt}; +use syn::punctuated::Punctuated; +use syn::{parse_quote, token, Item, Pat, PatTuple, Stmt, Token}; #[test] fn test_pat_ident() { @@ -95,3 +96,57 @@ fn test_ranges() { Pat::parse_single.parse_str("[_, (..=hi), _]").unwrap(); Pat::parse_single.parse_str("[_, lo..=hi, _]").unwrap(); } + +#[test] +fn test_tuple_comma() { + let mut expr = PatTuple { + attrs: Vec::new(), + paren_token: token::Paren::default(), + elems: Punctuated::new(), + }; + snapshot!(expr.to_token_stream() as Pat, @"Pat::Tuple"); + + expr.elems.push_value(parse_quote!(_)); + // Must not parse to Pat::Paren + snapshot!(expr.to_token_stream() as Pat, @r###" + Pat::Tuple { + elems: [ + Pat::Wild, + Token![,], + ], + } + "###); + + expr.elems.push_punct(<Token![,]>::default()); + snapshot!(expr.to_token_stream() as Pat, @r###" + Pat::Tuple { + elems: [ + Pat::Wild, + Token![,], + ], + } + "###); + + expr.elems.push_value(parse_quote!(_)); + snapshot!(expr.to_token_stream() as Pat, @r###" + Pat::Tuple { + elems: [ + Pat::Wild, + Token![,], + Pat::Wild, + ], + } + "###); + + expr.elems.push_punct(<Token![,]>::default()); + snapshot!(expr.to_token_stream() as Pat, @r###" + Pat::Tuple { + elems: [ + Pat::Wild, + Token![,], + Pat::Wild, + Token![,], + ], + } + "###); +} diff --git a/vendor/syn/tests/test_path.rs b/vendor/syn/tests/test_path.rs index 6aded74..2873441 100644 --- a/vendor/syn/tests/test_path.rs +++ b/vendor/syn/tests/test_path.rs @@ -24,6 +24,7 @@ fn parse_interpolated_leading_component() { PathSegment { ident: "first", }, + Token![::], PathSegment { ident: "rest", }, @@ -39,6 +40,7 @@ fn parse_interpolated_leading_component() { PathSegment { ident: "first", }, + Token![::], PathSegment { ident: "rest", }, diff --git a/vendor/syn/tests/test_precedence.rs b/vendor/syn/tests/test_precedence.rs index b49577f..026bece 100644 --- a/vendor/syn/tests/test_precedence.rs +++ b/vendor/syn/tests/test_precedence.rs @@ -1,8 +1,25 @@ +//! This test does the following for every file in the rust-lang/rust repo: +//! +//! 1. Parse the file using syn into a syn::File. +//! 2. Extract every syn::Expr from the file. +//! 3. Print each expr to a string of source code. +//! 4. Parse the source code using librustc_parse into a rustc_ast::Expr. +//! 5. For both the syn::Expr and rustc_ast::Expr, crawl the syntax tree to +//! insert parentheses surrounding every subexpression. +//! 6. Serialize the fully parenthesized syn::Expr to a string of source code. +//! 7. Parse the fully parenthesized source code using librustc_parse. +//! 8. Compare the rustc_ast::Expr resulting from parenthesizing using rustc +//! data structures vs syn data structures, ignoring spans. If they agree, +//! rustc's parser and syn's parser have identical handling of expression +//! precedence. + #![cfg(not(syn_disable_nightly_tests))] #![cfg(not(miri))] #![recursion_limit = "1024"] #![feature(rustc_private)] #![allow( + clippy::blocks_in_conditions, + clippy::doc_markdown, clippy::explicit_deref_methods, clippy::let_underscore_untyped, clippy::manual_assert, @@ -13,29 +30,17 @@ clippy::uninlined_format_args )] -//! The tests in this module do the following: -//! -//! 1. Parse a given expression in both `syn` and `librustc`. -//! 2. Fold over the expression adding brackets around each subexpression (with -//! some complications - see the `syn_brackets` and `librustc_brackets` -//! methods). -//! 3. Serialize the `syn` expression back into a string, and re-parse it with -//! `librustc`. -//! 4. Respan all of the expressions, replacing the spans with the default -//! spans. -//! 5. Compare the expressions with one another, if they are not equal fail. - extern crate rustc_ast; extern crate rustc_ast_pretty; extern crate rustc_data_structures; extern crate rustc_driver; extern crate rustc_span; +extern crate smallvec; extern crate thin_vec; use crate::common::eq::SpanlessEq; use crate::common::parse; -use quote::quote; -use regex::Regex; +use quote::ToTokens; use rustc_ast::ast; use rustc_ast::ptr::P; use rustc_ast_pretty::pprust; @@ -65,12 +70,8 @@ fn test_rustc_precedence() { let passed = AtomicUsize::new(0); let failed = AtomicUsize::new(0); - // 2018 edition is hard - let edition_regex = Regex::new(r"\b(async|try)[!(]").unwrap(); - repo::for_each_rust_file(|path| { let content = fs::read_to_string(path).unwrap(); - let content = edition_regex.replace_all(&content, "_$0"); let (l_passed, l_failed) = match syn::parse_file(&content) { Ok(file) => { @@ -116,41 +117,65 @@ fn test_expressions(path: &Path, edition: Edition, exprs: Vec<syn::Expr>) -> (us rustc_span::create_session_if_not_set_then(edition, |_| { for expr in exprs { - let raw = quote!(#expr).to_string(); - - let librustc_ast = if let Some(e) = librustc_parse_and_rewrite(&raw) { + let source_code = expr.to_token_stream().to_string(); + let librustc_ast = if let Some(e) = librustc_parse_and_rewrite(&source_code) { e } else { failed += 1; - errorf!("\nFAIL {} - librustc failed to parse raw\n", path.display()); + errorf!( + "\nFAIL {} - librustc failed to parse original\n", + path.display(), + ); continue; }; - let syn_expr = syn_brackets(expr); - let syn_ast = if let Some(e) = parse::librustc_expr("e!(#syn_expr).to_string()) { + let syn_parenthesized_code = + syn_parenthesize(expr.clone()).to_token_stream().to_string(); + let syn_ast = if let Some(e) = parse::librustc_expr(&syn_parenthesized_code) { e } else { failed += 1; errorf!( - "\nFAIL {} - librustc failed to parse bracketed\n", + "\nFAIL {} - librustc failed to parse parenthesized\n", path.display(), ); continue; }; - if SpanlessEq::eq(&syn_ast, &librustc_ast) { - passed += 1; - } else { + if !SpanlessEq::eq(&syn_ast, &librustc_ast) { failed += 1; - let syn_program = pprust::expr_to_string(&syn_ast); - let librustc_program = pprust::expr_to_string(&librustc_ast); + let syn_pretty = pprust::expr_to_string(&syn_ast); + let librustc_pretty = pprust::expr_to_string(&librustc_ast); errorf!( "\nFAIL {}\n{}\nsyn != rustc\n{}\n", path.display(), - syn_program, - librustc_program, + syn_pretty, + librustc_pretty, + ); + continue; + } + + let expr_invisible = make_parens_invisible(expr); + let Ok(reparsed_expr_invisible) = syn::parse2(expr_invisible.to_token_stream()) else { + failed += 1; + errorf!( + "\nFAIL {} - syn failed to parse invisible delimiters\n{}\n", + path.display(), + source_code, + ); + continue; + }; + if expr_invisible != reparsed_expr_invisible { + failed += 1; + errorf!( + "\nFAIL {} - mismatch after parsing invisible delimiters\n{}\n", + path.display(), + source_code, ); + continue; } + + passed += 1; } }); @@ -158,31 +183,37 @@ fn test_expressions(path: &Path, edition: Edition, exprs: Vec<syn::Expr>) -> (us } fn librustc_parse_and_rewrite(input: &str) -> Option<P<ast::Expr>> { - parse::librustc_expr(input).and_then(librustc_brackets) + parse::librustc_expr(input).map(librustc_parenthesize) } -/// Wrap every expression which is not already wrapped in parens with parens, to -/// reveal the precedence of the parsed expressions, and produce a stringified -/// form of the resulting expression. -/// -/// This method operates on librustc objects. -fn librustc_brackets(mut librustc_expr: P<ast::Expr>) -> Option<P<ast::Expr>> { +fn librustc_parenthesize(mut librustc_expr: P<ast::Expr>) -> P<ast::Expr> { use rustc_ast::ast::{ - Attribute, BinOpKind, Block, BorrowKind, Expr, ExprField, ExprKind, GenericArg, - GenericBound, Local, LocalKind, Pat, Stmt, StmtKind, StructExpr, StructRest, - TraitBoundModifier, Ty, + AssocItem, AssocItemKind, Attribute, BinOpKind, Block, BorrowKind, BoundConstness, Expr, + ExprField, ExprKind, GenericArg, GenericBound, ItemKind, Local, LocalKind, Pat, Stmt, + StmtKind, StructExpr, StructRest, TraitBoundModifiers, Ty, }; use rustc_ast::mut_visit::{ - noop_visit_generic_arg, noop_visit_local, noop_visit_param_bound, MutVisitor, + noop_flat_map_assoc_item, noop_visit_generic_arg, noop_visit_item_kind, noop_visit_local, + noop_visit_param_bound, MutVisitor, }; use rustc_data_structures::flat_map_in_place::FlatMapInPlace; use rustc_span::DUMMY_SP; + use smallvec::SmallVec; use std::mem; use std::ops::DerefMut; use thin_vec::ThinVec; - struct BracketsVisitor { - failed: bool, + struct FullyParenthesize; + + fn contains_let_chain(expr: &Expr) -> bool { + match &expr.kind { + ExprKind::Let(..) => true, + ExprKind::Binary(binop, left, right) => { + binop.node == BinOpKind::And + && (contains_let_chain(left) || contains_let_chain(right)) + } + _ => false, + } } fn flat_map_field<T: MutVisitor>(mut f: ExprField, vis: &mut T) -> Vec<ExprField> { @@ -236,17 +267,12 @@ fn librustc_brackets(mut librustc_expr: P<ast::Expr>) -> Option<P<ast::Expr>> { } } - impl MutVisitor for BracketsVisitor { + impl MutVisitor for FullyParenthesize { fn visit_expr(&mut self, e: &mut P<Expr>) { noop_visit_expr(e, self); match e.kind { ExprKind::Block(..) | ExprKind::If(..) | ExprKind::Let(..) => {} - ExprKind::Binary(binop, ref left, ref right) - if match (&left.kind, binop.node, &right.kind) { - (ExprKind::Let(..), BinOpKind::And, _) - | (_, BinOpKind::And, ExprKind::Let(..)) => true, - _ => false, - } => {} + ExprKind::Binary(..) if contains_let_chain(e) => {} _ => { let inner = mem::replace( e, @@ -279,7 +305,10 @@ fn librustc_brackets(mut librustc_expr: P<ast::Expr>) -> Option<P<ast::Expr>> { match bound { GenericBound::Trait( _, - TraitBoundModifier::MaybeConst | TraitBoundModifier::MaybeConstMaybe, + TraitBoundModifiers { + constness: BoundConstness::Maybe(_), + .. + }, ) => {} _ => noop_visit_param_bound(bound, self), } @@ -300,6 +329,39 @@ fn librustc_brackets(mut librustc_expr: P<ast::Expr>) -> Option<P<ast::Expr>> { } } + fn visit_item_kind(&mut self, item: &mut ItemKind) { + match item { + ItemKind::Const(const_item) + if !const_item.generics.params.is_empty() + || !const_item.generics.where_clause.predicates.is_empty() => {} + _ => noop_visit_item_kind(item, self), + } + } + + fn flat_map_trait_item(&mut self, item: P<AssocItem>) -> SmallVec<[P<AssocItem>; 1]> { + match &item.kind { + AssocItemKind::Const(const_item) + if !const_item.generics.params.is_empty() + || !const_item.generics.where_clause.predicates.is_empty() => + { + SmallVec::from([item]) + } + _ => noop_flat_map_assoc_item(item, self), + } + } + + fn flat_map_impl_item(&mut self, item: P<AssocItem>) -> SmallVec<[P<AssocItem>; 1]> { + match &item.kind { + AssocItemKind::Const(const_item) + if !const_item.generics.params.is_empty() + || !const_item.generics.where_clause.predicates.is_empty() => + { + SmallVec::from([item]) + } + _ => noop_flat_map_assoc_item(item, self), + } + } + // We don't want to look at expressions that might appear in patterns or // types yet. We'll look into comparing those in the future. For now // focus on expressions appearing in other places. @@ -316,46 +378,53 @@ fn librustc_brackets(mut librustc_expr: P<ast::Expr>) -> Option<P<ast::Expr>> { } } - let mut folder = BracketsVisitor { failed: false }; + let mut folder = FullyParenthesize; folder.visit_expr(&mut librustc_expr); - if folder.failed { - None - } else { - Some(librustc_expr) - } + librustc_expr } -/// Wrap every expression which is not already wrapped in parens with parens, to -/// reveal the precedence of the parsed expressions, and produce a stringified -/// form of the resulting expression. -fn syn_brackets(syn_expr: syn::Expr) -> syn::Expr { +fn syn_parenthesize(syn_expr: syn::Expr) -> syn::Expr { use syn::fold::{fold_expr, fold_generic_argument, Fold}; use syn::{token, BinOp, Expr, ExprParen, GenericArgument, MetaNameValue, Pat, Stmt, Type}; - struct ParenthesizeEveryExpr; + struct FullyParenthesize; + + fn parenthesize(expr: Expr) -> Expr { + Expr::Paren(ExprParen { + attrs: Vec::new(), + expr: Box::new(expr), + paren_token: token::Paren::default(), + }) + } fn needs_paren(expr: &Expr) -> bool { match expr { Expr::Group(_) => unreachable!(), Expr::If(_) | Expr::Unsafe(_) | Expr::Block(_) | Expr::Let(_) => false, - Expr::Binary(bin) => match (&*bin.left, bin.op, &*bin.right) { - (Expr::Let(_), BinOp::And(_), _) | (_, BinOp::And(_), Expr::Let(_)) => false, - _ => true, - }, + Expr::Binary(_) => !contains_let_chain(expr), _ => true, } } - impl Fold for ParenthesizeEveryExpr { + fn contains_let_chain(expr: &Expr) -> bool { + match expr { + Expr::Let(_) => true, + Expr::Binary(expr) => { + matches!(expr.op, BinOp::And(_)) + && (contains_let_chain(&expr.left) || contains_let_chain(&expr.right)) + } + _ => false, + } + } + + impl Fold for FullyParenthesize { fn fold_expr(&mut self, expr: Expr) -> Expr { - if needs_paren(&expr) { - Expr::Paren(ExprParen { - attrs: Vec::new(), - expr: Box::new(fold_expr(self, expr)), - paren_token: token::Paren::default(), - }) + let needs_paren = needs_paren(&expr); + let folded = fold_expr(self, expr); + if needs_paren { + parenthesize(folded) } else { - fold_expr(self, expr) + folded } } @@ -396,10 +465,48 @@ fn syn_brackets(syn_expr: syn::Expr) -> syn::Expr { } } - let mut folder = ParenthesizeEveryExpr; + let mut folder = FullyParenthesize; folder.fold_expr(syn_expr) } +fn make_parens_invisible(expr: syn::Expr) -> syn::Expr { + use syn::fold::{fold_expr, fold_stmt, Fold}; + use syn::{token, Expr, ExprGroup, ExprParen, Stmt}; + + struct MakeParensInvisible; + + impl Fold for MakeParensInvisible { + fn fold_expr(&mut self, mut expr: Expr) -> Expr { + if let Expr::Paren(paren) = expr { + expr = Expr::Group(ExprGroup { + attrs: paren.attrs, + group_token: token::Group(paren.paren_token.span.join()), + expr: paren.expr, + }); + } + fold_expr(self, expr) + } + + fn fold_stmt(&mut self, stmt: Stmt) -> Stmt { + if let Stmt::Expr(expr @ (Expr::Binary(_) | Expr::Cast(_)), None) = stmt { + Stmt::Expr( + Expr::Paren(ExprParen { + attrs: Vec::new(), + paren_token: token::Paren::default(), + expr: Box::new(fold_expr(self, expr)), + }), + None, + ) + } else { + fold_stmt(self, stmt) + } + } + } + + let mut folder = MakeParensInvisible; + folder.fold_expr(expr) +} + /// Walk through a crate collecting all expressions we can find in it. fn collect_exprs(file: syn::File) -> Vec<syn::Expr> { use syn::fold::Fold; diff --git a/vendor/syn/tests/test_round_trip.rs b/vendor/syn/tests/test_round_trip.rs index 0ef47b2..a320785 100644 --- a/vendor/syn/tests/test_round_trip.rs +++ b/vendor/syn/tests/test_round_trip.rs @@ -3,6 +3,7 @@ #![recursion_limit = "1024"] #![feature(rustc_private)] #![allow( + clippy::blocks_in_conditions, clippy::manual_assert, clippy::manual_let_else, clippy::match_like_matches_macro, @@ -33,6 +34,7 @@ use rustc_errors::{translation, Diagnostic, PResult}; use rustc_session::parse::ParseSess; use rustc_span::source_map::FilePathMapping; use rustc_span::FileName; +use std::borrow::Cow; use std::fs; use std::panic; use std::path::Path; @@ -104,7 +106,7 @@ fn test(path: &Path, failed: &AtomicUsize, abort_after: usize) { }; let after = match librustc_parse(back, &sess) { Ok(after) => after, - Err(mut diagnostic) => { + Err(diagnostic) => { errorf!("=== {}: librustc failed to parse", path.display()); diagnostic.emit(); return Err(false); @@ -154,7 +156,7 @@ fn librustc_parse(content: String, sess: &ParseSess) -> PResult<Crate> { parse::parse_crate_from_source_str(name, content, sess) } -fn translate_message(diagnostic: &Diagnostic) -> String { +fn translate_message(diagnostic: &Diagnostic) -> Cow<'static, str> { thread_local! { static FLUENT_BUNDLE: LazyFallbackBundle = { let locale_resources = rustc_driver::DEFAULT_LOCALE_RESOURCES.to_vec(); @@ -163,11 +165,11 @@ fn translate_message(diagnostic: &Diagnostic) -> String { }; } - let message = &diagnostic.message[0].0; - let args = translation::to_fluent_args(diagnostic.args()); + let message = &diagnostic.messages[0].0; + let args = translation::to_fluent_args(diagnostic.args.iter()); let (identifier, attr) = match message { - DiagnosticMessage::Str(msg) | DiagnosticMessage::Eager(msg) => return msg.clone(), + DiagnosticMessage::Str(msg) | DiagnosticMessage::Translated(msg) => return msg.clone(), DiagnosticMessage::FluentIdentifier(identifier, attr) => (identifier, attr), }; @@ -186,7 +188,7 @@ fn translate_message(diagnostic: &Diagnostic) -> String { let mut err = Vec::new(); let translated = fluent_bundle.format_pattern(value, Some(&args), &mut err); assert!(err.is_empty()); - translated.into_owned() + Cow::Owned(translated.into_owned()) }) } diff --git a/vendor/syn/tests/test_shebang.rs b/vendor/syn/tests/test_shebang.rs index 4c2a204..8439161 100644 --- a/vendor/syn/tests/test_shebang.rs +++ b/vendor/syn/tests/test_shebang.rs @@ -18,7 +18,9 @@ fn test_basic() { generics: Generics, output: ReturnType::Default, }, - block: Block, + block: Block { + stmts: [], + }, }, ], } @@ -55,7 +57,9 @@ fn test_comment() { generics: Generics, output: ReturnType::Default, }, - block: Block, + block: Block { + stmts: [], + }, }, ], } diff --git a/vendor/syn/tests/test_size.rs b/vendor/syn/tests/test_size.rs index 864c011..d64a3ab 100644 --- a/vendor/syn/tests/test_size.rs +++ b/vendor/syn/tests/test_size.rs @@ -17,20 +17,20 @@ fn test_item_size() { assert_eq!(mem::size_of::<Item>(), 360); } -#[rustversion::attr(before(2022-11-24), ignore)] +#[rustversion::attr(before(2023-04-29), ignore)] #[test] fn test_type_size() { - assert_eq!(mem::size_of::<Type>(), 240); + assert_eq!(mem::size_of::<Type>(), 232); } -#[rustversion::attr(before(2021-10-11), ignore)] +#[rustversion::attr(before(2023-04-29), ignore)] #[test] fn test_pat_size() { - assert_eq!(mem::size_of::<Pat>(), 192); + assert_eq!(mem::size_of::<Pat>(), 184); } -#[rustversion::attr(before(2022-09-09), ignore)] +#[rustversion::attr(before(2023-12-20), ignore)] #[test] fn test_lit_size() { - assert_eq!(mem::size_of::<Lit>(), 32); + assert_eq!(mem::size_of::<Lit>(), 24); } diff --git a/vendor/syn/tests/test_stmt.rs b/vendor/syn/tests/test_stmt.rs index bc57685..61890a4 100644 --- a/vendor/syn/tests/test_stmt.rs +++ b/vendor/syn/tests/test_stmt.rs @@ -8,8 +8,9 @@ mod macros; use proc_macro2::{Delimiter, Group, Ident, Span, TokenStream, TokenTree}; -use quote::quote; -use syn::Stmt; +use quote::{quote, ToTokens as _}; +use syn::parse::Parser as _; +use syn::{Block, Stmt}; #[test] fn test_raw_operator() { @@ -67,7 +68,6 @@ fn test_none_group() { TokenTree::Group(Group::new(Delimiter::Brace, TokenStream::new())), ]), ))]); - snapshot!(tokens as Stmt, @r###" Stmt::Item(Item::Fn { vis: Visibility::Inherited, @@ -77,9 +77,37 @@ fn test_none_group() { generics: Generics, output: ReturnType::Default, }, - block: Block, + block: Block { + stmts: [], + }, }) "###); + + let tokens = Group::new(Delimiter::None, quote!(let None = None)).to_token_stream(); + let stmts = Block::parse_within.parse2(tokens).unwrap(); + snapshot!(stmts, @r###" + [ + Stmt::Expr( + Expr::Group { + expr: Expr::Let { + pat: Pat::Ident { + ident: "None", + }, + expr: Expr::Path { + path: Path { + segments: [ + PathSegment { + ident: "None", + }, + ], + }, + }, + }, + }, + None, + ), + ] + "###); } #[test] @@ -234,3 +262,61 @@ fn test_macros() { }) "###); } + +#[test] +fn test_early_parse_loop() { + // The following is an Expr::Loop followed by Expr::Tuple. It is not an + // Expr::Call. + let tokens = quote! { + loop {} + () + }; + + let stmts = Block::parse_within.parse2(tokens).unwrap(); + + snapshot!(stmts, @r###" + [ + Stmt::Expr( + Expr::Loop { + body: Block { + stmts: [], + }, + }, + None, + ), + Stmt::Expr( + Expr::Tuple, + None, + ), + ] + "###); + + let tokens = quote! { + 'a: loop {} + () + }; + + let stmts = Block::parse_within.parse2(tokens).unwrap(); + + snapshot!(stmts, @r###" + [ + Stmt::Expr( + Expr::Loop { + label: Some(Label { + name: Lifetime { + ident: "a", + }, + }), + body: Block { + stmts: [], + }, + }, + None, + ), + Stmt::Expr( + Expr::Tuple, + None, + ), + ] + "###); +} diff --git a/vendor/syn/tests/test_ty.rs b/vendor/syn/tests/test_ty.rs index a400a76..0645393 100644 --- a/vendor/syn/tests/test_ty.rs +++ b/vendor/syn/tests/test_ty.rs @@ -4,8 +4,9 @@ mod macros; use proc_macro2::{Delimiter, Group, Ident, Punct, Spacing, Span, TokenStream, TokenTree}; -use quote::quote; -use syn::Type; +use quote::{quote, ToTokens as _}; +use syn::punctuated::Punctuated; +use syn::{parse_quote, token, Token, Type, TypeTuple}; #[test] fn test_mut_self() { @@ -170,6 +171,7 @@ fn test_group_colons() { ], }, }, + Token![::], PathSegment { ident: "Item", }, @@ -245,6 +247,7 @@ fn test_trait_object() { ], }, }), + Token![+], TypeParamBound::Lifetime { ident: "static", }, @@ -260,6 +263,7 @@ fn test_trait_object() { TypeParamBound::Lifetime { ident: "a", }, + Token![+], TypeParamBound::Trait(TraitBound { path: Path { segments: [ @@ -294,6 +298,7 @@ fn test_trailing_plus() { ], }, }), + Token![+], ], } "###); @@ -313,6 +318,7 @@ fn test_trailing_plus() { ], }, }), + Token![+], ], } "###); @@ -331,6 +337,60 @@ fn test_trailing_plus() { ], }, }), + Token![+], + ], + } + "###); +} + +#[test] +fn test_tuple_comma() { + let mut expr = TypeTuple { + paren_token: token::Paren::default(), + elems: Punctuated::new(), + }; + snapshot!(expr.to_token_stream() as Type, @"Type::Tuple"); + + expr.elems.push_value(parse_quote!(_)); + // Must not parse to Type::Paren + snapshot!(expr.to_token_stream() as Type, @r###" + Type::Tuple { + elems: [ + Type::Infer, + Token![,], + ], + } + "###); + + expr.elems.push_punct(<Token![,]>::default()); + snapshot!(expr.to_token_stream() as Type, @r###" + Type::Tuple { + elems: [ + Type::Infer, + Token![,], + ], + } + "###); + + expr.elems.push_value(parse_quote!(_)); + snapshot!(expr.to_token_stream() as Type, @r###" + Type::Tuple { + elems: [ + Type::Infer, + Token![,], + Type::Infer, + ], + } + "###); + + expr.elems.push_punct(<Token![,]>::default()); + snapshot!(expr.to_token_stream() as Type, @r###" + Type::Tuple { + elems: [ + Type::Infer, + Token![,], + Type::Infer, + Token![,], ], } "###); diff --git a/vendor/tinyrick-0.0.13/.cargo-checksum.json b/vendor/tinyrick-0.0.13/.cargo-checksum.json new file mode 100644 index 0000000..d35b88e --- /dev/null +++ b/vendor/tinyrick-0.0.13/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.lock":"99670f6141c3cf3e56dee55651543584c7a317acf0e22d8d68e0bb5267b63265","Cargo.toml":"097c224cc65407cbcc2662dd64c0aa63e70bbe47be0dbe6bafea633c233293e3","Cross.toml":"5ea46353870ddf25126cd53fd2d6ec8c0fc991ba06e17a93130d354ba444bdf4","DEVELOPMENT.md":"d9e2c7c92b48d893a1313da417b62cd8c8b7a58cf62747065fdd9a54c721255e","LICENSE.md":"692fd932aac63bc63ba3b4908850cdcc1a38cee1527a07440355811028656c38","README.md":"3f71dfa6c728f861faa80bf074d4ba1925a6ddce19107b301603471de3b8eca9","install.mk":"7430f7a209f12b2b45e3d3becc8152b825528c7909b6c75917eafc57427eb7e4","makefile":"69fc8044b284f138491c474595a98191b46756691c5ac3cda4f1695e96179041","sample.envrc":"cedd4bfb23ba835278b0e4df8c77ac1d42bd11a688d736621346bd35195a37ff","src/lib.rs":"4ed464aa7f4ff462cb315f1508240827e6b47aea00ffbefcfcf9f50d62c83d9c","src/tinyrick.rs":"8cdc996761ae236c717b03741a786bd5376a74c76c8bd2e8d248dc18ec98192e"},"package":"8219cde494438520d0714a476f486533e5befb02c6438c9ca741aaa2c43002ff"} \ No newline at end of file diff --git a/vendor/tinyrick-0.0.13/Cargo.toml b/vendor/tinyrick-0.0.13/Cargo.toml new file mode 100644 index 0000000..cc7b38a --- /dev/null +++ b/vendor/tinyrick-0.0.13/Cargo.toml @@ -0,0 +1,36 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +name = "tinyrick" +version = "0.0.13" +authors = ["Andrew Pennebaker <andrew.pennebaker@gmail.com>"] +description = "a freeform Rust build system" +homepage = "https://github.com/mcandre/tinyrick" +documentation = "https://docs.rs/tinyrick/" +readme = "README.md" +license = "BSD-2-Clause" + +[lib] +name = "tinyrick" + +[[bin]] +name = "tinyrick" +path = "src/tinyrick.rs" + +[dependencies.die] +version = "0.2.0" + +[dependencies.getopts] +version = "0.2.21" + +[dependencies.lazy_static] +version = "1.4.0" diff --git a/vendor/tinyrick-0.0.13/Cross.toml b/vendor/tinyrick-0.0.13/Cross.toml new file mode 100644 index 0000000..adf04ae --- /dev/null +++ b/vendor/tinyrick-0.0.13/Cross.toml @@ -0,0 +1,15 @@ +[target.x86_64-unknown-netbsd] +pre-build = [ + "mkdir -p /tmp/netbsd", + "curl https://cdn.netbsd.org/pub/NetBSD/NetBSD-9.2/amd64/binary/sets/base.tar.xz -O", + "tar -C /tmp/netbsd -xJf base.tar.xz", + "cp /tmp/netbsd/usr/lib/libexecinfo.so /usr/local/x86_64-unknown-netbsd/lib", + "rm base.tar.xz", + "rm -rf /tmp/netbsd", +] + +[target.aarch64-apple-darwin] +image = "freeznet/aarch64-apple-darwin-cross:11.3.1" + +[target.x86_64-apple-darwin] +image = "freeznet/x86_64-apple-darwin-cross:11.3.1" diff --git a/vendor/tinyrick-0.0.13/DEVELOPMENT.md b/vendor/tinyrick-0.0.13/DEVELOPMENT.md new file mode 100644 index 0000000..dd9a0fd --- /dev/null +++ b/vendor/tinyrick-0.0.13/DEVELOPMENT.md @@ -0,0 +1,61 @@ +# OVERVIEW + +tinyrick's own compilation process is compatible with standard `cargo`. We wrap some common workflows with `build` tasks for convenience. + +# BUILDTIME REQUIREMENTS + +* [Rust](https://www.rust-lang.org/en-US/) 1.75.0+ +* a UNIX environment with [coreutils](https://www.gnu.org/software/coreutils/) / [base](http://ftp.freebsd.org/pub/FreeBSD/releases/) / [macOS](https://www.apple.com/macos) / [WSL](https://learn.microsoft.com/en-us/windows/wsl/install) / etc. +* [GNU findutils](https://www.gnu.org/software/findutils/) +* [Docker](https://www.docker.com/) 20.10.21+ +* [zip](https://linux.die.net/man/1/zip) +* a POSIX compliant [make](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/make.html) implementation (e.g. GNU make, BSD make, etc.) +* Provision additional dev tools with `make -f install.mk` + +## Recommended + +* [ASDF](https://asdf-vm.com/) 0.10 (run `asdf reshim` after provisioning) +* [direnv](https://direnv.net/) 2 +* [cargo-cache](https://crates.io/crates/cargo-cache) + +# INSTALL BINARY ARTIFACTS FROM LOCAL SOURCE + +```console +$ make install +``` + +# UNINSTALL BINARY ARTIFACTS + +```console +$ make uninstall +``` + +# AUDIT + +```console +$ make audit +``` + +# BUILD: LINT, DOC, COMPILE, and TEST + +```console +$ make build +``` + +# PUBLISH + +```console +$ make publish +``` + +# PORT + +```console +$ make port +``` + +# CLEAN + +```console +$ make clean +``` diff --git a/vendor/tinyrick-0.0.13/LICENSE.md b/vendor/tinyrick-0.0.13/LICENSE.md new file mode 100644 index 0000000..bff0a40 --- /dev/null +++ b/vendor/tinyrick-0.0.13/LICENSE.md @@ -0,0 +1,26 @@ +Copyright (c) 2018, Andrew Pennebaker +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OR MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those +of the authors and should not be interpreted as representing official policies, +either expressed or implied, of the FreeBSD project. diff --git a/vendor/tinyrick-0.0.13/README.md b/vendor/tinyrick-0.0.13/README.md new file mode 100644 index 0000000..2d8e021 --- /dev/null +++ b/vendor/tinyrick-0.0.13/README.md @@ -0,0 +1,179 @@ +# tinyrick: a freeform Rust build system + +``` + .---. ^ + o{__ω__ o{ ^0^ -Let me out! +~~ ( // *|* \xx\) xx`|' + = = xxxx&x ' ` +``` + +# EXAMPLE + +```console +$ cd example + +$ tinyrick +running 1 test +test smoketest ... ok + +$ tinyrick -h +Usage: tinyrick [options] + +Options: + -l, --list list available tasks + -h, --help print usage info + -v, --version print version info +``` + +# ABOUT + +I'm tinyrick (TINYRICK!) and I build Rust projects. With tinyrick, you configure your build in the same normal Rust code as the rest of your project. Or keep picking your nose with make, it's up to you. + +Look at my pants! tinyrick! You think my pants are one size fits all? No, of course not! So get the pants that fit you. Get a `tinyrick.rs` that fits your workflow. Task dependency trees, get em while they're hot! Segfaults, get em while they're not. Smarter, butter, faster, stranger. + +Don't shell out, lib out. Your build is more portable that way. tinyricktinyricktinyrick. If you look closely, that last period is actually a *micro* rick rendered in ASCII; even tinier tinyrick! + +# CRATE + +https://crates.io/crates/tinyrick + +# API DOCUMENTATION + +https://docs.rs/tinyrick/latest/tinyrick/ + +# LICENSE + +BSD-2-Clause + +# RUNTIME REQUIREMENTS + +* [Rust](https://www.rust-lang.org/en-US/) 1.75.0+ + +## Recommended + +* [ASDF](https://asdf-vm.com/) 0.10 (run `asdf reshim` after each Rust application binary installation) +* [direnv](https://direnv.net/) 2 +* [cargo-cache](https://crates.io/crates/cargo-cache) + +# SETUP + +## tinyrick.rs + +Write some tasks in a `tinyrick.rs` build configuration script at the top-level directory of your Rust project: + +```rust +fn banner() { + println!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); +} + +fn test() { + tinyrick::exec!("cargo", &["test"]); +} + +fn build() { + tinyrick::deps(test); + tinyrick::exec!("cargo", &["build", "--release"]); +} + +fn publish() { + tinyrick::exec!("cargo", &["publish"]); +} + +fn clean() { + tinyrick::exec!("cargo", &["clean"]); +} + +fn main() { + tinyrick::phony!(clean); + tinyrick::wubba_lubba_dub_dub!(build; banner, test, publish, clean); +} +``` + +## Cargo.toml + +Now, wire up the tinyrick command line interface by configuring your top-level `Cargo.toml`: + +```toml +[package] +name = "derpmobile" +description = "hyperadvanced derpmobiles" +version = "3.1.4" + +[dependencies] +tinyrick = { version = "0.0.13", optional = true } + +[features] +letmeout = ["tinyrick"] + +[[bin]] +name = "tinyrick" +path = "tinyrick.rs" +required-features = ["letmeout"] +``` + +Launch a terminal session in your project directory. Install and run the tinyrick tool: + +```console +$ cargo install tinyrick +$ tinyrick +``` + +Watch how he behaves... I hope tinyrick is practicing good manners :P + +What happens when you run: + +* `tinyrick banner`? +* `tinyrick test`? +* `tinyrick clean`? +* `tinyrick build`? +* `tinyrick -h`? +* `tinyrick --list`? +* `VERBOSE=1 tinyrick build`? + +I bet the freakin' moon explodes if you run `VERBOSE=1 tinyrick build build build`! (Hold onto your pants~) + +# DEBRIEFING + +Where are my pants? Let's break down the code so far: + +* `fn name() { ... }` declares a task named `name`. +* `deps(requirement)` caches a dependency on task `requirement`. +* `exec!(...)` spawns raw shell command processes. +* `VERBOSE=1` enables command string emission during processing. +* `phony!(...)` disables dependency caching for some tasks. +* `wubba_lubba_dub_dub!(default; ...)` exposes a `default` task and some other tasks to the tinyrick command line. +* `letmeout` is a feature gate, so that neither the tinyrick package, nor your tinyrick binary escape with your Rust package when you `tinyrick publish`. + +# DoN't UsE sHelL cOmMaNdS!1 + +Just because the tinyrick library offers several *supremely convenient* macros for executing shell commands doesn't mean that you should always shell out. No way, man! + +Whenever possible, use regular Rust code, as in the `banner()` example. There's like a ba-jillion [crates](https://crates.io) of prewritten Rust code, so you might as well use it! + +# CONTRIBUTING + +For more details on developing tinyrick itself, see [DEVELOPMENT.md](DEVELOPMENT.md). + +# SEE ALSO + +* Inspired by the excellent [mage](https://magefile.org/) build system for Go projects +* [bb](https://github.com/mcandre/bb), a build system for (g)awk projects +* [cargo](https://doc.rust-lang.org/cargo/reference/build-scripts.html) custom build scripts, primarily for generating Rust source files from other languages +* [cmake](https://cmake.org/) for C/C++ projects +* [dale](https://github.com/mcandre/dale) builds D projects +* [GNU autotools](https://www.gnu.org/software/automake/manual/html_node/Autotools-Introduction.html), a build system for Linux C/C++ projects +* [Gradle](https://gradle.org/), a build system for JVM projects +* [invoke](https://pypi.org/project/invoke/), a Python task runner +* [jelly](https://github.com/mcandre/jelly), a JSON task runner +* [lake](https://luarocks.org/modules/steved/lake), a Lua task runner +* [lichen](https://github.com/mcandre/lichen), a sed task runner +* [POSIX make](https://pubs.opengroup.org/onlinepubs/009695299/utilities/make.html), a task runner standard for C/C++ and various other software projects +* [Rake](https://ruby.github.io/rake/), a task runner for Ruby projects +* [rez](https://github.com/mcandre/rez) builds C/C++ projects +* [Shake](https://shakebuild.com/), a task runner for Haskell projects + +# EVEN MORE EXAMPLES + +* The included [example](example) project provides a fully qualified demonstration of how to build projects with tinyrick. +* For a more practical example, see [ios7crypt-rs](https://github.com/mcandre/ios7crypt-rs), a little *modulino* library + command line tool for *deliciously dumb* password encryption. +* [tinyrick_extras](https://github.com/mcandre/tinyrick_extras) defines some common workflow tasks as plain old Rust functions, that you can sprinkle onto your tinyrick just like any other Rust crate. diff --git a/vendor/tinyrick-0.0.13/install.mk b/vendor/tinyrick-0.0.13/install.mk new file mode 100644 index 0000000..210297f --- /dev/null +++ b/vendor/tinyrick-0.0.13/install.mk @@ -0,0 +1,13 @@ +.POSIX: +.SILENT: +.PHONY: all + +all: + rustup component add \ + clippy \ + rustfmt + cargo install --force \ + cargo-audit \ + crit@0.0.7 \ + cross@0.2.5 \ + unmake@0.0.12 diff --git a/vendor/tinyrick-0.0.13/makefile b/vendor/tinyrick-0.0.13/makefile new file mode 100755 index 0000000..3c297df --- /dev/null +++ b/vendor/tinyrick-0.0.13/makefile @@ -0,0 +1,70 @@ +.POSIX: +.SILENT: +.IGNORE: uninstall clean +.PHONY: all \ + audit \ + doc \ + lint \ + clippy \ + rustfmt \ + unmake \ + build \ + port \ + crit \ + test \ + install \ + uninstall \ + publish \ + clean + +BANNER=tinyrick-0.0.13 + +all: build + +test: install + sh -c "cd example && tinyrick -l" + sh -c "cd example && tinyrick -v" + sh -c "cd example && tinyrick -h" + sh -c "cd example && tinyrick" + sh -c "cd example && VERBOSE=1 tinyrick test clippy lint build_debug build_release build doc install unit_test integration_test test banner uninstall clean_cargo clean" + +install: + cargo install --force --path . + +uninstall: + cargo uninstall tinyrick + +audit: + cargo audit + +doc: + cargo doc + +clippy: + cargo clippy + +rustfmt: + cargo fmt + +unmake: + unmake . + +lint: doc clippy rustfmt unmake + +build: lint test + cargo build --release + +publish: + cargo publish + +crit: + crit -b $(BANNER) + +port: crit + sh -c "cd .crit/bin && zip -r $(BANNER).zip $(BANNER)" + +clean: + crit -c + cargo clean + rm -rf example/target + rm -rf example/Cargo.lock diff --git a/vendor/tinyrick-0.0.13/sample.envrc b/vendor/tinyrick-0.0.13/sample.envrc new file mode 100644 index 0000000..e7fe0f8 --- /dev/null +++ b/vendor/tinyrick-0.0.13/sample.envrc @@ -0,0 +1,10 @@ +#!/bin/sh +# +# Install: +# cp sample.envrc .envrc +# +# Uninstall: +# rm .envrc + +source_up +export DOCKER_DEFAULT_PLATFORM='linux/amd64' diff --git a/vendor/tinyrick-0.0.13/src/lib.rs b/vendor/tinyrick-0.0.13/src/lib.rs new file mode 100644 index 0000000..1338b7e --- /dev/null +++ b/vendor/tinyrick-0.0.13/src/lib.rs @@ -0,0 +1,256 @@ +//! Common build patterns + +extern crate lazy_static; + +use std::collections::HashMap; +use std::sync::Mutex; + +/// Cargo toggle +pub static FEATURE: &str = "letmeout"; + +/// Environment name controlling verbosity +pub static VERBOSE_ENVIRONMENT_NAME: &str = "VERBOSE"; + +lazy_static::lazy_static! { + static ref DEPENDENCY_CACHE_MUTEX: Mutex<HashMap<fn(), bool>> = Mutex::new(HashMap::new()); + + pub static ref PHONY_TASK_MUTEX: Mutex<Vec<fn()>> = Mutex::new(Vec::new()); +} + +/// Query common host binary suffix +pub fn binary_suffix() -> String { + if cfg!(windows) { + return ".exe".to_string(); + } + + String::new() +} + +/// Declare a dependency on a task that may panic +pub fn deps(task: fn()) { + let phony: bool = PHONY_TASK_MUTEX.lock().unwrap().contains(&task); + let has_run: bool = DEPENDENCY_CACHE_MUTEX.lock().unwrap().contains_key(&task); + + if phony || !has_run { + task(); + DEPENDENCY_CACHE_MUTEX.lock().unwrap().insert(task, true); + } +} + +/// Declare tasks with no obviously cacheable artifacts. +#[macro_export] +macro_rules! phony { + ($t : expr) => { + { + tinyrick::PHONY_TASK_MUTEX + .lock() + .unwrap() + .push($t); + } + }; + ($t : expr, $($u : expr),*) => { + { + let ref mut phony_tasks = tinyrick::PHONY_TASK_MUTEX + .lock() + .unwrap(); + + phony_tasks.push($t); + $( phony_tasks.push($u); )* + } + }; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Returns the command object. +#[macro_export] +macro_rules! exec_mut_with_arguments { + ($p : expr, $a : expr) => {{ + use std::env::var; + use std::process::Command; + + if var(tinyrick::VERBOSE_ENVIRONMENT_NAME).is_ok() { + println!("{} {}", $p, $a.join(" ")); + } + + Command::new($p).args($a) + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program. Can also accept CLI arguments collection. +/// Returns the command object. +#[macro_export] +macro_rules! exec_mut { + ($p : expr) => {{ + let args: &[&str] = &[]; + tinyrick::exec_mut_with_arguments!($p, args) + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_mut_with_arguments!($p, $a) + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Returns the output object. +/// Panics if the command exits with a failure status. +#[macro_export] +macro_rules! exec_output { + ($p : expr) => {{ + tinyrick::exec_mut!($p).output().unwrap() + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_mut!($p, $a).output().unwrap() + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Returns the stdout stream. +/// Panics if the command exits with a failure status. +#[macro_export] +macro_rules! exec_stdout { + ($p : expr) => {{ + tinyrick::exec_output!($p).stdout + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_output!($p, $a).stdout + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Returns the stdout stream. +/// Panics if the command exits with a failure status. +#[macro_export] +macro_rules! exec_stderr { + ($p : expr) => {{ + tinyrick::exec_output!($p).stderr + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_output!($p, $a).stderr + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Returns the complete stdout string. +/// Panics if the command exits with a failure status. +#[macro_export] +macro_rules! exec_stdout_utf8 { + ($p : expr) => {{ + String::from_utf8(tinyrick::exec_stdout!($p)).unwrap() + }}; + ($p : expr, $a : expr) => {{ + String::from_utf8(tinyrick::exec_stdout!($p, $a)).unwrap() + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Returns the complete stderr string. +/// Panics if the command exits with a failure status. +#[macro_export] +macro_rules! exec_stderr_utf8 { + ($p : expr) => {{ + String::from_utf8(tinyrick::exec_stderr!($p)).unwrap() + }}; + ($p : expr, $a : expr) => {{ + String::from_utf8(tinyrick::exec_stderr!($p, $a)).unwrap() + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Returns the status. +/// Panics if the command could not run to completion. +#[macro_export] +macro_rules! exec_status { + ($p : expr) => {{ + tinyrick::exec_mut!($p).status().unwrap() + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_mut!($p, $a).status().unwrap() + }}; +} + +/// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. +/// +/// Executes the given program with the given arguments. +/// Panics if the command exits with a failure status. +#[macro_export] +macro_rules! exec { + ($p : expr) => {{ + assert!(tinyrick::exec_status!($p).success()); + }}; + ($p : expr, $a : expr) => {{ + assert!(tinyrick::exec_status!($p, $a).success()) + }}; +} + +/// Show registered tasks +#[macro_export] +macro_rules! list_tasks { + ($t : expr) => { + { + use std::process; + + println!("Registered tasks:\n"); + println!("* {}", stringify!($t)); + process::exit(0); + } + }; + ($t : expr, $($u : expr),*) => { + { + use std::process; + + println!("Registered tasks:\n"); + println!("* {}", stringify!($t)); + $(println!("* {}", stringify!($u));)* + process::exit(0); + } + }; +} + +/// Register tasks with CLI entrypoint. +/// The first entry is the default task, +/// When no tasks are named in CLI arguments. +#[macro_export] +macro_rules! wubba_lubba_dub_dub { + ($d : expr ; $($t : expr),*) => { + use std::env; + use std::process; + + let arguments: Vec<String> = env::args().collect(); + + let task_names: Vec<&str> = arguments + .iter() + .skip(1) + .map(String::as_str) + .collect(); + + if task_names.is_empty() { + $d(); + process::exit(0); + } + + for task_name in task_names { + match task_name { + "-l" => tinyrick::list_tasks!($d $(, $t)*), + "--list" => tinyrick::list_tasks!($d $(, $t)*), + stringify!($d) => $d(), + $(stringify!($t) => $t(),)* + _ => panic!("Unknown task {}", task_name) + } + } + }; +} diff --git a/vendor/tinyrick-0.0.13/src/tinyrick.rs b/vendor/tinyrick-0.0.13/src/tinyrick.rs new file mode 100644 index 0000000..a61124f --- /dev/null +++ b/vendor/tinyrick-0.0.13/src/tinyrick.rs @@ -0,0 +1,67 @@ +//! CLI tinyrick tool + +extern crate die; +extern crate getopts; +extern crate tinyrick; + +use die::{die, Die}; +use std::env; +use std::path; + +/// Show version information +pub fn banner() { + println!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); +} + +/// CLI entrypoint +fn main() { + let brief: String = format!("Usage: {} [options]", env!("CARGO_PKG_NAME")); + + let mut opts: getopts::Options = getopts::Options::new(); + opts.optflag("l", "list", "list available tasks"); + opts.optflag("h", "help", "print usage info"); + opts.optflag("v", "version", "print version info"); + + let usage: String = opts.usage(&brief); + let arguments: Vec<String> = env::args().collect(); + let optmatches: getopts::Matches = opts.parse(&arguments[1..]).die(&usage); + + if optmatches.opt_present("h") { + die!(0; usage); + } + + if optmatches.opt_present("v") { + die!(0; format!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))); + } + + let list_tasks: bool = optmatches.opt_present("l"); + let tasks: Vec<String> = optmatches.free; + + tinyrick::exec!( + "cargo", + &[ + "build", + "--bin", + env!("CARGO_PKG_NAME"), + "--features", + tinyrick::FEATURE + ] + ); + + let target_path: &path::Path = path::Path::new("target"); + + let rick_pathbuf: path::PathBuf = target_path.join("debug").join(format!( + "{}{}", + env!("CARGO_PKG_NAME"), + tinyrick::binary_suffix() + )); + + let rick_path: &str = rick_pathbuf.to_str().unwrap(); + + if list_tasks { + tinyrick::exec!(rick_path, &["-l"]); + die!(0); + } + + tinyrick::exec!(rick_path, tasks); +} diff --git a/vendor/tinyrick/.cargo-checksum.json b/vendor/tinyrick/.cargo-checksum.json index dac4740..88196dd 100644 --- a/vendor/tinyrick/.cargo-checksum.json +++ b/vendor/tinyrick/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"0a17a4f6b595c7e109dd4e1f671dd718fb774127e73f921259048cd2af84aaf8","DEVELOPMENT.md":"ab6cbb35cb42ca9a3091f2b6102a53a0aac7c38e65ea5e51649fbb7ba2b86161","README.md":"a49d82476c132075b496256f56b7f89458aa23f72b6fd7fa8de0458961f011d6","build.sh":"4bbee692a242410f9745ef86195f66877799d6983ea77f31589e4dbbb8adf483","crosscompile-linux.sh":"230130b57ea14dd8391dd7ae353b98495b8e6dff92c0f919aa7e573caff7f22e","src/lib.rs":"c6304e6f5d2beb5c8a56ed7a4884b9f1751c55ec88ead37863b195338dcf1ce0","src/tinyrick.rs":"db9727aef623f01615ec965bbb09f4baeb26d6488fa9a1399648e570008b8e9a"},"package":"504eac4d92d289782273d1cc4da582078f9b35e85b64c6dddfbf7ed773d7e294"} \ No newline at end of file +{"files":{"Cargo.lock":"1e8b91c307ee1da1e4e884d4db5761c8cb9dab85fa83555f86cfcebe9be6bd03","Cargo.toml":"a7ffc956a85913544bfaf94a1c1d30a5b25e419e74bed7a9579e70998a3534b6","Cross.toml":"5ea46353870ddf25126cd53fd2d6ec8c0fc991ba06e17a93130d354ba444bdf4","DEVELOPMENT.md":"b82234a910fe9195138019ceb158028bd46afa2949ab2310e0a3db6a8822bfff","LICENSE.md":"692fd932aac63bc63ba3b4908850cdcc1a38cee1527a07440355811028656c38","README.md":"4285f0a220d7a0e4319d8c45dfeea9c4220fb431880f2813d2506156f8590ab4","install.mk":"8afc130458ba6d1fba0f06fa13466324a9efcb59b845c26e63afc41231d0274d","makefile":"78476d08314d2b73862cdee087d51e1d0aebb3b3d46bda24e2176a26b00efcbf","sample.envrc":"cedd4bfb23ba835278b0e4df8c77ac1d42bd11a688d736621346bd35195a37ff","src/lib.rs":"4ed464aa7f4ff462cb315f1508240827e6b47aea00ffbefcfcf9f50d62c83d9c","src/tinyrick.rs":"8cdc996761ae236c717b03741a786bd5376a74c76c8bd2e8d248dc18ec98192e"},"package":"0b3b4fc359c03f76007b160be12ba2626986379f43dce9a016a4e473f3708b1f"} \ No newline at end of file diff --git a/vendor/tinyrick/Cargo.toml b/vendor/tinyrick/Cargo.toml index fcbe184..7843a3e 100644 --- a/vendor/tinyrick/Cargo.toml +++ b/vendor/tinyrick/Cargo.toml @@ -3,20 +3,20 @@ # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g. crates.io) dependencies +# to registry (e.g., crates.io) dependencies. # -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. [package] name = "tinyrick" -version = "0.0.9" +version = "0.0.14" authors = ["Andrew Pennebaker <andrew.pennebaker@gmail.com>"] description = "a freeform Rust build system" homepage = "https://github.com/mcandre/tinyrick" documentation = "https://docs.rs/tinyrick/" +readme = "README.md" license = "BSD-2-Clause" [lib] @@ -25,8 +25,12 @@ name = "tinyrick" [[bin]] name = "tinyrick" path = "src/tinyrick.rs" + +[dependencies.die] +version = "0.2.0" + [dependencies.getopts] -version = "0.2.18" +version = "0.2.21" [dependencies.lazy_static] -version = "1.1.0" +version = "1.4.0" diff --git a/vendor/tinyrick/Cross.toml b/vendor/tinyrick/Cross.toml new file mode 100644 index 0000000..adf04ae --- /dev/null +++ b/vendor/tinyrick/Cross.toml @@ -0,0 +1,15 @@ +[target.x86_64-unknown-netbsd] +pre-build = [ + "mkdir -p /tmp/netbsd", + "curl https://cdn.netbsd.org/pub/NetBSD/NetBSD-9.2/amd64/binary/sets/base.tar.xz -O", + "tar -C /tmp/netbsd -xJf base.tar.xz", + "cp /tmp/netbsd/usr/lib/libexecinfo.so /usr/local/x86_64-unknown-netbsd/lib", + "rm base.tar.xz", + "rm -rf /tmp/netbsd", +] + +[target.aarch64-apple-darwin] +image = "freeznet/aarch64-apple-darwin-cross:11.3.1" + +[target.x86_64-apple-darwin] +image = "freeznet/x86_64-apple-darwin-cross:11.3.1" diff --git a/vendor/tinyrick/DEVELOPMENT.md b/vendor/tinyrick/DEVELOPMENT.md index ab5a1ff..100fc27 100644 --- a/vendor/tinyrick/DEVELOPMENT.md +++ b/vendor/tinyrick/DEVELOPMENT.md @@ -1,47 +1,66 @@ # OVERVIEW -tinyrick's own compilation process is compatible with standard `cargo`. We wrap some common workflows with `build.sh` tasks for convenience. +tinyrick's own compilation process is compatible with standard `cargo`. We wrap some common workflows with `build` tasks for convenience. # BUILDTIME REQUIREMENTS -* [Rust](https://www.rust-lang.org/en-US/) 1.30+ -* [clippy](https://github.com/rust-lang-nursery/rust-clippy) -* [coreutils](https://www.gnu.org/software/coreutils/coreutils.html) -* [zip](https://linux.die.net/man/1/zip) -* [Docker](https://www.docker.com/) +* [Docker](https://www.docker.com/) 20.10.21+ +* POSIX compatible [make](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/make.html) +* [Rust](https://www.rust-lang.org/en-US/) 1.75.0+ +* POSIX compatible [sh](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sh.html) +* POSIX compatible [tar](https://pubs.opengroup.org/onlinepubs/7908799/xcu/tar.html) +* Provision additional dev tools with `make -f install.mk` + +## Recommended + +* a host capable of running musl/Linux containers (e.g. a GNU/Linux, musl/Linux, macOS, or Windows host) +* [Docker First Aid Kit](https://github.com/mcandre/docker-first-aid-kit) +* Apply `DOCKER_DEFAULT_PLATFORM` = `linux/amd64` environment variable +* [ASDF](https://asdf-vm.com/) 0.10 (run `asdf reshim` after provisioning) +* [cargo-cache](https://crates.io/crates/cargo-cache) +* [direnv](https://direnv.net/) 2 +* a UNIX environment, such as macOS, Linux, BSD, [WSL](https://learn.microsoft.com/en-us/windows/wsl/), etc. + +Non-UNIX environments may produce subtle adverse effects when linting or generating application ports. # INSTALL BINARY ARTIFACTS FROM LOCAL SOURCE ```console -$ sh build.sh install +$ make install ``` # UNINSTALL BINARY ARTIFACTS ```console -$ sh build.sh uninstall +$ make uninstall +``` + +# AUDIT + +```console +$ make audit ``` # BUILD: LINT, DOC, COMPILE, and TEST ```console -$ sh build.sh [build] +$ make build ``` # PUBLISH ```console -$ sh build.sh publish +$ make publish ``` # PORT ```console -$ sh build.sh port +$ make port ``` # CLEAN ```console -$ sh build.sh clean +$ make clean ``` diff --git a/vendor/tinyrick/LICENSE.md b/vendor/tinyrick/LICENSE.md new file mode 100644 index 0000000..bff0a40 --- /dev/null +++ b/vendor/tinyrick/LICENSE.md @@ -0,0 +1,26 @@ +Copyright (c) 2018, Andrew Pennebaker +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OR MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those +of the authors and should not be interpreted as representing official policies, +either expressed or implied, of the FreeBSD project. diff --git a/vendor/tinyrick/README.md b/vendor/tinyrick/README.md index fee621d..c131009 100644 --- a/vendor/tinyrick/README.md +++ b/vendor/tinyrick/README.md @@ -39,11 +39,21 @@ https://crates.io/crates/tinyrick # API DOCUMENTATION -https://docs.rs/tinyrick/ +https://docs.rs/tinyrick/latest/tinyrick/ + +# LICENSE + +BSD-2-Clause # RUNTIME REQUIREMENTS -* [Rust](https://www.rust-lang.org/en-US/) 1.30+ +* [Rust](https://www.rust-lang.org/en-US/) 1.75.0+ + +## Recommended + +* [ASDF](https://asdf-vm.com/) 0.10 (run `asdf reshim` after each Rust application binary installation) +* [direnv](https://direnv.net/) 2 +* [cargo-cache](https://crates.io/crates/cargo-cache) # SETUP @@ -53,29 +63,29 @@ Write some tasks in a `tinyrick.rs` build configuration script at the top-level ```rust fn banner() { - println!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); + println!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); } fn test() { - tinyrick::exec!("cargo", &["test"]); + tinyrick::exec!("cargo", &["test"]); } fn build() { - tinyrick::deps(test); - tinyrick::exec!("cargo", &["build", "--release"]); + tinyrick::deps(test); + tinyrick::exec!("cargo", &["build", "--release"]); } fn publish() { - tinyrick::exec!("cargo", &["publish"]); + tinyrick::exec!("cargo", &["publish"]); } fn clean() { - tinyrick::exec!("cargo", &["clean"]); + tinyrick::exec!("cargo", &["clean"]); } fn main() { - tinyrick::phony!(clean); - tinyrick::wubba_lubba_dub_dub!(build; banner, test, publish, clean); + tinyrick::phony!(clean); + tinyrick::wubba_lubba_dub_dub!(build; banner, test, publish, clean); } ``` @@ -90,7 +100,7 @@ description = "hyperadvanced derpmobiles" version = "3.1.4" [dependencies] -tinyrick = { version = "0.0.9", optional = true } +tinyrick = { version = "0.0.14", optional = true } [features] letmeout = ["tinyrick"] @@ -144,9 +154,28 @@ Whenever possible, use regular Rust code, as in the `banner()` example. There's For more details on developing tinyrick itself, see [DEVELOPMENT.md](DEVELOPMENT.md). -# CREDITS +# SEE ALSO * Inspired by the excellent [mage](https://magefile.org/) build system for Go projects +* [bb](https://github.com/mcandre/bb), a build system for (g)awk projects +* [cargo](https://doc.rust-lang.org/cargo/reference/build-scripts.html) custom build scripts, primarily for generating Rust source files from other languages +* [cmake](https://cmake.org/) for C/C++ projects +* [dale](https://github.com/mcandre/dale) builds D projects +* [GNU autotools](https://www.gnu.org/software/automake/manual/html_node/Autotools-Introduction.html), a build system for Linux C/C++ projects +* [Gradle](https://gradle.org/), a build system for JVM projects +* [invoke](https://pypi.org/project/invoke/), a Python task runner +* [jelly](https://github.com/mcandre/jelly), a JSON task runner +* [lake](https://luarocks.org/modules/steved/lake), a Lua task runner +* [Leiningen](https://leiningen.org/) + [lein-exec](https://github.com/kumarshantanu/lein-exec), a Clojure task runner +* [lichen](https://github.com/mcandre/lichen), a sed task runner +* [POSIX make](https://pubs.opengroup.org/onlinepubs/009695299/utilities/make.html), a task runner standard for C/C++ and various other software projects +* [mian](https://github.com/mcandre/mian), a task runner for (Chicken) Scheme Lisp +* [Rake](https://ruby.github.io/rake/), a task runner for Ruby projects +* [Rebar3](https://www.rebar3.org/), a build system for Erlang projects +* [rez](https://github.com/mcandre/rez) builds C/C++ projects +* [sbt](https://www.scala-sbt.org/index.html), a build system for Scala projects +* [Shake](https://shakebuild.com/), a task runner for Haskell projects +* [yao](https://github.com/mcandre/yao), a task runner for Common LISP projects # EVEN MORE EXAMPLES diff --git a/vendor/tinyrick/build.sh b/vendor/tinyrick/build.sh deleted file mode 100755 index f7febdb..0000000 --- a/vendor/tinyrick/build.sh +++ /dev/null @@ -1,90 +0,0 @@ -#!/bin/sh -# -# Avoid make in order to work around https://github.com/rust-lang/rust/issues/52801 - -PACKAGE='tinyrick' -VERSION='0.0.9' -ARCHIVE="${PACKAGE}-${VERSION}.zip" -BIN="target/debug/$PACKAGE" - -test() { - install && - sh -c "cd example && tinyrick -l" && - sh -c "cd example && tinyrick -v" && - sh -c "cd example && tinyrick -h" && - sh -c "cd example && tinyrick" && - sh -c "cd example && VERBOSE=1 tinyrick test clippy lint build_debug build_release build doc install unit_test integration_test test banner uninstall clean_cargo clean" -} - -install_binaries() { - cargo install --force --path . -} - -install() { - install_binaries -} - -uninstall() { - cargo uninstall tinyrick -} - -clippy() { - cargo clippy -} - -doc() { - cargo doc -} - -lint() { - doc && - clippy -} - -build() { - lint && - test && - cargo build --release -} - -publish() { - cargo publish -} - -crosscompile() { - sh crosscompile-linux.sh x86_64 gnu && - sh crosscompile-linux.sh x86_64 musl -} - -port() { - crosscompile && - zip "$ARCHIVE" \ - target/x86_64-unknown-linux-gnu/release/tinyrick target/x86_64-unknown-linux-musl/release/tinyrick -} - -clean_example() { - rm -rf example/target; - rm -rf example/Cargo.lock -} - -clean_cargo() { - cargo clean -} - -clean_ports() { - rm *.zip -} - -clean() { - clean_example; - clean_cargo; - clean_ports -} - -if [ -z "$1" ]; then - build -fi - -for task in "$@"; do - "$task" -done diff --git a/vendor/tinyrick/crosscompile-linux.sh b/vendor/tinyrick/crosscompile-linux.sh deleted file mode 100755 index 539d895..0000000 --- a/vendor/tinyrick/crosscompile-linux.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -set -euo pipefail - -usage() { - echo -e "Usage: $0 <arch> <libc>\n" - - echo -e "Architectures:\n" - echo -e "x86_64\n" - - echo -e "libc's:\n" - echo "gnu" - echo "musl" - - exit 1 -} - -if [ "$#" -ne 2 ]; then - usage -fi - -arch="$1" -libc="$2" - -host_root_src_dir="$(cd "$(dirname "$0")" && pwd)" - -docker run -v "${host_root_src_dir}:/src" \ - "mcandre/docker-rustup:${arch}-${libc}" \ - sh -c "cd /src && cargo build --release --target ${arch}-unknown-linux-${libc}" diff --git a/vendor/tinyrick/install.mk b/vendor/tinyrick/install.mk new file mode 100644 index 0000000..2531551 --- /dev/null +++ b/vendor/tinyrick/install.mk @@ -0,0 +1,13 @@ +.POSIX: +.SILENT: +.PHONY: all + +all: + rustup component add \ + clippy \ + rustfmt + cargo install --force \ + cargo-audit \ + crit@0.0.7 \ + cross@0.2.5 \ + unmake@0.0.16 diff --git a/vendor/tinyrick/makefile b/vendor/tinyrick/makefile new file mode 100755 index 0000000..7c2c820 --- /dev/null +++ b/vendor/tinyrick/makefile @@ -0,0 +1,71 @@ +.POSIX: +.SILENT: +.IGNORE: uninstall clean +.PHONY: all \ + audit \ + doc \ + lint \ + clippy \ + rustfmt \ + unmake \ + build \ + port \ + crit \ + test \ + install \ + uninstall \ + publish \ + clean + +BANNER=tinyrick-0.0.14 + +all: build + +test: install + sh -c "cd example && tinyrick -l" + sh -c "cd example && tinyrick -v" + sh -c "cd example && tinyrick -h" + sh -c "cd example && tinyrick" + sh -c "cd example && VERBOSE=1 tinyrick test clippy lint build_debug build_release build doc install unit_test integration_test test banner uninstall clean_cargo clean" + +install: + cargo install --force --path . + +uninstall: + cargo uninstall tinyrick + +audit: + cargo audit + +doc: + cargo doc + +clippy: + cargo clippy + +rustfmt: + cargo fmt + +unmake: + unmake . + unmake -n . + +lint: doc clippy rustfmt unmake + +build: lint test + cargo build --release + +publish: + cargo publish + +crit: + crit -b $(BANNER) + +port: crit + sh -c "cd .crit/bin && tar czf $(BANNER).tgz $(BANNER)" + +clean: + crit -c + cargo clean + rm -rf example/target + rm -rf example/Cargo.lock diff --git a/vendor/tinyrick/sample.envrc b/vendor/tinyrick/sample.envrc new file mode 100644 index 0000000..e7fe0f8 --- /dev/null +++ b/vendor/tinyrick/sample.envrc @@ -0,0 +1,10 @@ +#!/bin/sh +# +# Install: +# cp sample.envrc .envrc +# +# Uninstall: +# rm .envrc + +source_up +export DOCKER_DEFAULT_PLATFORM='linux/amd64' diff --git a/vendor/tinyrick/src/lib.rs b/vendor/tinyrick/src/lib.rs index 2f2cffa..1338b7e 100644 --- a/vendor/tinyrick/src/lib.rs +++ b/vendor/tinyrick/src/lib.rs @@ -6,71 +6,58 @@ use std::collections::HashMap; use std::sync::Mutex; /// Cargo toggle -pub static FEATURE : &str = "letmeout"; +pub static FEATURE: &str = "letmeout"; /// Environment name controlling verbosity -pub static VERBOSE_ENVIRONMENT_NAME : &str = "VERBOSE"; +pub static VERBOSE_ENVIRONMENT_NAME: &str = "VERBOSE"; lazy_static::lazy_static! { - static ref DEPENDENCY_CACHE_MUTEX : Mutex<HashMap<fn(), bool>> = Mutex::new(HashMap::new()); -} + static ref DEPENDENCY_CACHE_MUTEX: Mutex<HashMap<fn(), bool>> = Mutex::new(HashMap::new()); -lazy_static::lazy_static! { - pub static ref PHONY_TASK_MUTEX : Mutex<Vec<fn()>> = Mutex::new(Vec::new()); + pub static ref PHONY_TASK_MUTEX: Mutex<Vec<fn()>> = Mutex::new(Vec::new()); } /// Query common host binary suffix pub fn binary_suffix() -> String { - if cfg!(windows) { - ".exe".to_string() - } else { - "".to_string() - } + if cfg!(windows) { + return ".exe".to_string(); + } + + String::new() } /// Declare a dependency on a task that may panic pub fn deps(task: fn()) { - let phony : bool = PHONY_TASK_MUTEX - .lock() - .unwrap() - .contains(&task); - - let has_run = DEPENDENCY_CACHE_MUTEX - .lock() - .unwrap() - .contains_key(&task); - - if phony || !has_run { - task(); + let phony: bool = PHONY_TASK_MUTEX.lock().unwrap().contains(&task); + let has_run: bool = DEPENDENCY_CACHE_MUTEX.lock().unwrap().contains_key(&task); - DEPENDENCY_CACHE_MUTEX - .lock() - .unwrap() - .insert(task, true); - } + if phony || !has_run { + task(); + DEPENDENCY_CACHE_MUTEX.lock().unwrap().insert(task, true); + } } /// Declare tasks with no obviously cacheable artifacts. #[macro_export] macro_rules! phony { - ($t : expr) => { - { - tinyrick::PHONY_TASK_MUTEX - .lock() - .unwrap() - .push($t); - } - }; - ($t : expr, $($u : expr),*) => { - { - let ref mut phony_tasks = tinyrick::PHONY_TASK_MUTEX - .lock() - .unwrap(); - - phony_tasks.push($t); - $( phony_tasks.push($u); )* - } - }; + ($t : expr) => { + { + tinyrick::PHONY_TASK_MUTEX + .lock() + .unwrap() + .push($t); + } + }; + ($t : expr, $($u : expr),*) => { + { + let ref mut phony_tasks = tinyrick::PHONY_TASK_MUTEX + .lock() + .unwrap(); + + phony_tasks.push($t); + $( phony_tasks.push($u); )* + } + }; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -79,19 +66,16 @@ macro_rules! phony { /// Returns the command object. #[macro_export] macro_rules! exec_mut_with_arguments { - ($p : expr, $a : expr) => { - { - use std::env::var; - use std::process::Command; + ($p : expr, $a : expr) => {{ + use std::env::var; + use std::process::Command; - if var(tinyrick::VERBOSE_ENVIRONMENT_NAME).is_ok() { - println!("{} {}", $p, $a.join(" ")); - } + if var(tinyrick::VERBOSE_ENVIRONMENT_NAME).is_ok() { + println!("{} {}", $p, $a.join(" ")); + } - Command::new($p) - .args($a) - } - }; + Command::new($p).args($a) + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -100,17 +84,13 @@ macro_rules! exec_mut_with_arguments { /// Returns the command object. #[macro_export] macro_rules! exec_mut { - ($p : expr) => { - { - let args : &[&str] = &[]; - tinyrick::exec_mut_with_arguments!($p, args) - } - }; - ($p : expr, $a : expr) => { - { - tinyrick::exec_mut_with_arguments!($p, $a) - } - }; + ($p : expr) => {{ + let args: &[&str] = &[]; + tinyrick::exec_mut_with_arguments!($p, args) + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_mut_with_arguments!($p, $a) + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -120,20 +100,12 @@ macro_rules! exec_mut { /// Panics if the command exits with a failure status. #[macro_export] macro_rules! exec_output { - ($p : expr) => { - { - tinyrick::exec_mut!($p) - .output() - .unwrap() - } - }; - ($p : expr, $a : expr) => { - { - tinyrick::exec_mut!($p, $a) - .output() - .unwrap() - } - }; + ($p : expr) => {{ + tinyrick::exec_mut!($p).output().unwrap() + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_mut!($p, $a).output().unwrap() + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -143,18 +115,12 @@ macro_rules! exec_output { /// Panics if the command exits with a failure status. #[macro_export] macro_rules! exec_stdout { - ($p : expr) => { - { - tinyrick::exec_output!($p) - .stdout - } - }; - ($p : expr, $a : expr) => { - { - tinyrick::exec_output!($p, $a) - .stdout - } - }; + ($p : expr) => {{ + tinyrick::exec_output!($p).stdout + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_output!($p, $a).stdout + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -164,18 +130,12 @@ macro_rules! exec_stdout { /// Panics if the command exits with a failure status. #[macro_export] macro_rules! exec_stderr { - ($p : expr) => { - { - tinyrick::exec_output!($p) - .stderr - } - }; - ($p : expr, $a : expr) => { - { - tinyrick::exec_output!($p, $a) - .stderr - } - }; + ($p : expr) => {{ + tinyrick::exec_output!($p).stderr + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_output!($p, $a).stderr + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -185,18 +145,12 @@ macro_rules! exec_stderr { /// Panics if the command exits with a failure status. #[macro_export] macro_rules! exec_stdout_utf8 { - ($p : expr) => { - { - String::from_utf8(tinyrick::exec_stdout!($p)) - .unwrap() - } - }; - ($p : expr, $a : expr) => { - { - String::from_utf8(tinyrick::exec_stdout!($p, $a)) - .unwrap() - } - }; + ($p : expr) => {{ + String::from_utf8(tinyrick::exec_stdout!($p)).unwrap() + }}; + ($p : expr, $a : expr) => {{ + String::from_utf8(tinyrick::exec_stdout!($p, $a)).unwrap() + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -206,18 +160,12 @@ macro_rules! exec_stdout_utf8 { /// Panics if the command exits with a failure status. #[macro_export] macro_rules! exec_stderr_utf8 { - ($p : expr) => { - { - String::from_utf8(tinyrick::exec_stderr!($p)) - .unwrap() - } - }; - ($p : expr, $a : expr) => { - { - String::from_utf8(tinyrick::exec_stderr!($p, $a)) - .unwrap() - } - }; + ($p : expr) => {{ + String::from_utf8(tinyrick::exec_stderr!($p)).unwrap() + }}; + ($p : expr, $a : expr) => {{ + String::from_utf8(tinyrick::exec_stderr!($p, $a)).unwrap() + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -227,20 +175,12 @@ macro_rules! exec_stderr_utf8 { /// Panics if the command could not run to completion. #[macro_export] macro_rules! exec_status { - ($p : expr) => { - { - tinyrick::exec_mut!($p) - .status() - .unwrap() - } - }; - ($p : expr, $a : expr) => { - { - tinyrick::exec_mut!($p, $a) - .status() - .unwrap() - } - }; + ($p : expr) => {{ + tinyrick::exec_mut!($p).status().unwrap() + }}; + ($p : expr, $a : expr) => {{ + tinyrick::exec_mut!($p, $a).status().unwrap() + }}; } /// Hey genius, avoid executing commands whenever possible! Look for Rust libraries instead. @@ -249,51 +189,36 @@ macro_rules! exec_status { /// Panics if the command exits with a failure status. #[macro_export] macro_rules! exec { - ($p : expr) => { - { - assert!( - tinyrick::exec_status!($p) - .success() - ); - } - }; - ($p : expr, $a : expr) => { - { - assert!( - tinyrick::exec_status!($p, $a) - .success() - ) - } - }; + ($p : expr) => {{ + assert!(tinyrick::exec_status!($p).success()); + }}; + ($p : expr, $a : expr) => {{ + assert!(tinyrick::exec_status!($p, $a).success()) + }}; } - /// Show registered tasks #[macro_export] macro_rules! list_tasks { - ($t : expr) => { - { - use std::process::exit; - - println!("Registered tasks:\n"); - - println!("* {}", stringify!($t)); + ($t : expr) => { + { + use std::process; - exit(0); - } - }; - ($t : expr, $($u : expr),*) => { - { - use std::process::exit; - - println!("Registered tasks:\n"); - - println!("* {}", stringify!($t)); - $(println!("* {}", stringify!($u));)* - - exit(0); - } - }; + println!("Registered tasks:\n"); + println!("* {}", stringify!($t)); + process::exit(0); + } + }; + ($t : expr, $($u : expr),*) => { + { + use std::process; + + println!("Registered tasks:\n"); + println!("* {}", stringify!($t)); + $(println!("* {}", stringify!($u));)* + process::exit(0); + } + }; } /// Register tasks with CLI entrypoint. @@ -301,31 +226,31 @@ macro_rules! list_tasks { /// When no tasks are named in CLI arguments. #[macro_export] macro_rules! wubba_lubba_dub_dub { - ($d : expr ; $($t : expr),*) => { - use std::env::args; - use std::process::exit; + ($d : expr ; $($t : expr),*) => { + use std::env; + use std::process; - let arguments : Vec<String> = args() - .collect(); + let arguments: Vec<String> = env::args().collect(); - let task_names : Vec<&str> = arguments - .iter() - .skip(1) - .map(String::as_str) - .collect(); + let task_names: Vec<&str> = arguments + .iter() + .skip(1) + .map(String::as_str) + .collect(); - if task_names.len() == 0 { - $d(); - } else { - for task_name in task_names { - match task_name { - "-l" => tinyrick::list_tasks!($d $(, $t)*), - "--list" => tinyrick::list_tasks!($d $(, $t)*), - stringify!($d) => $d(), - $(stringify!($t) => $t(),)* - _ => panic!("Unknown task {}", task_name) + if task_names.is_empty() { + $d(); + process::exit(0); } - } - } - }; + + for task_name in task_names { + match task_name { + "-l" => tinyrick::list_tasks!($d $(, $t)*), + "--list" => tinyrick::list_tasks!($d $(, $t)*), + stringify!($d) => $d(), + $(stringify!($t) => $t(),)* + _ => panic!("Unknown task {}", task_name) + } + } + }; } diff --git a/vendor/tinyrick/src/tinyrick.rs b/vendor/tinyrick/src/tinyrick.rs index e273497..a61124f 100644 --- a/vendor/tinyrick/src/tinyrick.rs +++ b/vendor/tinyrick/src/tinyrick.rs @@ -1,76 +1,67 @@ //! CLI tinyrick tool +extern crate die; extern crate getopts; extern crate tinyrick; +use die::{die, Die}; use std::env; use std::path; -use std::process; - -// Show short CLI spec -fn usage(brief : &str, opts : &getopts::Options) { - println!("{}", (*opts).usage(brief)); -} /// Show version information pub fn banner() { - println!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); + println!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); } /// CLI entrypoint fn main() { - let arguments : Vec<String> = env::args() - .collect(); + let brief: String = format!("Usage: {} [options]", env!("CARGO_PKG_NAME")); + + let mut opts: getopts::Options = getopts::Options::new(); + opts.optflag("l", "list", "list available tasks"); + opts.optflag("h", "help", "print usage info"); + opts.optflag("v", "version", "print version info"); - let brief = format!("Usage: {} [options]", env!("CARGO_PKG_NAME")); + let usage: String = opts.usage(&brief); + let arguments: Vec<String> = env::args().collect(); + let optmatches: getopts::Matches = opts.parse(&arguments[1..]).die(&usage); - let mut opts : getopts::Options = getopts::Options::new(); - opts.optflag("l", "list", "list available tasks"); - opts.optflag("h", "help", "print usage info"); - opts.optflag("v", "version", "print version info"); + if optmatches.opt_present("h") { + die!(0; usage); + } - match opts.parse(&arguments[1..]) { - Err(_) => { - usage(&brief, &opts); - process::abort(); - }, - Ok(optmatches) => { - let list_tasks = optmatches.opt_present("l"); + if optmatches.opt_present("v") { + die!(0; format!("{} {}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))); + } - if optmatches.opt_present("h") { - usage(&brief, &opts); - process::exit(0); - } else if optmatches.opt_present("v") { - banner(); - process::exit(0); - } else { - let tasks = optmatches.free; + let list_tasks: bool = optmatches.opt_present("l"); + let tasks: Vec<String> = optmatches.free; - tinyrick::exec!( - "cargo", - &[ - "build", - "--bin", env!("CARGO_PKG_NAME"), - "--features", tinyrick::FEATURE - ] - ); + tinyrick::exec!( + "cargo", + &[ + "build", + "--bin", + env!("CARGO_PKG_NAME"), + "--features", + tinyrick::FEATURE + ] + ); - let target_path : &path::Path = path::Path::new("target"); + let target_path: &path::Path = path::Path::new("target"); - let rick_pathbuf : path::PathBuf = target_path - .join("debug") - .join(&format!("{}{}", env!("CARGO_PKG_NAME"), tinyrick::binary_suffix())); + let rick_pathbuf: path::PathBuf = target_path.join("debug").join(format!( + "{}{}", + env!("CARGO_PKG_NAME"), + tinyrick::binary_suffix() + )); - let rick_path : &str = rick_pathbuf - .to_str() - .unwrap(); + let rick_path: &str = rick_pathbuf.to_str().unwrap(); - if list_tasks { - tinyrick::exec!(rick_path, &["-l"]); - } else { - tinyrick::exec!(rick_path, tasks); - } - } + if list_tasks { + tinyrick::exec!(rick_path, &["-l"]); + die!(0); } - } + + tinyrick::exec!(rick_path, tasks); } diff --git a/vendor/tinyrick_extras/.cargo-checksum.json b/vendor/tinyrick_extras/.cargo-checksum.json index ba226ab..28b26d0 100644 --- a/vendor/tinyrick_extras/.cargo-checksum.json +++ b/vendor/tinyrick_extras/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.lock":"da198fcdf14b6a63bb62e67b6bd33e4740452bc304d12f71f988a1b970d6ce54","Cargo.toml":"81f4de08e15070dd6fe9a2b92d4b3c5ac5b9543213fb768865ef30993e81aa82","DEVELOPMENT.md":"327f69f87c56ffb70f81978f1b9704d051dc6690e1e3f3a31c7f8a8060caae20","README.md":"00dee732b98c8596daea3e513f088bf56fb0a4fa0b6d85e87989162b1cc9a8b3","src/lib.rs":"d09a356cfed5ba83e70d7d80983f249455467821413a9844aaa558f3623f845f","tinyrick.rs":"d94d99260868b7156b074d4a5c1d89d1f06826fdd2ae5d6a1e029159f8413933"},"package":"a59a496a47794576c8fb1d8fd9727f6338d98a95acf9337400f91f8e891c041b"} \ No newline at end of file +{"files":{"Cargo.lock":"6361f9ccec7eb7a205735e93f430cf2cff7a3b5dbcf564eaa6ec67114c370e1d","Cargo.toml":"30f5a2dfba3062ea9c959c2713a0fe0913f61ecb72beb78cba41429ee02e0068","DEVELOPMENT.md":"525f535c9556135d478bc07628c04d45b141f9e8ef3d8b57cc95ab485e7a7d56","LICENSE.md":"692fd932aac63bc63ba3b4908850cdcc1a38cee1527a07440355811028656c38","README.md":"f7fcddd061f63fb2ce0dede8fbb8d0b126066262cd1f9eefc856a27790d5807c","makefile":"4e6d5ef5ae5f4aca6cbb4ff33b884e24bc7ad8e72dbf45f9a71720ebfcde4033","src/lib.rs":"8a6b3a2b5348de9b3cd0de6c3a363de1c3c65e5a61199c4a4c32600ecd3dd84f","tinyrick.rs":"c49eaf976d62332e5f5d6841d5d82600b15a6ee68b277b411cc159c6a3449816"},"package":"4aae31f75079b9c67acd8a1b381f447727050a5596010dfc259a6179c0a8f8a8"} \ No newline at end of file diff --git a/vendor/tinyrick_extras/Cargo.toml b/vendor/tinyrick_extras/Cargo.toml index c8af778..0d01651 100644 --- a/vendor/tinyrick_extras/Cargo.toml +++ b/vendor/tinyrick_extras/Cargo.toml @@ -11,7 +11,7 @@ [package] name = "tinyrick_extras" -version = "0.0.6" +version = "0.0.8" authors = ["Andrew Pennebaker <andrew.pennebaker@gmail.com>"] description = "common tasks for tinyrick projects" homepage = "https://github.com/mcandre/tinyrick_extras" @@ -28,7 +28,7 @@ path = "tinyrick.rs" required-features = ["letmeout"] [dependencies.tinyrick] -version = "0.0.9" +version = "0.0.13" [features] letmeout = [] diff --git a/vendor/tinyrick_extras/DEVELOPMENT.md b/vendor/tinyrick_extras/DEVELOPMENT.md index b5178b5..3ed79c9 100644 --- a/vendor/tinyrick_extras/DEVELOPMENT.md +++ b/vendor/tinyrick_extras/DEVELOPMENT.md @@ -4,13 +4,16 @@ tinyrick_extras' own compilation process is compatible with standard cargo. We w # BUILDTIME REQUIREMENTS -* [Rust](https://www.rust-lang.org/en-US/) 1.68.2+ with `rustup component add clippy rustfmt` and `cargo install cargo-audit@0.17.5 tinyrick@0.0.9` +* POSIX compatible [make](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/make.html) +* [Rust](https://www.rust-lang.org/en-US/) 1.75.0+ +* Provision additional dev tools with `make` ## Recommended -* [ASDF](https://asdf-vm.com/) 0.10 (run `asdf reshim` after each Rust application binary installation) -* [direnv](https://direnv.net/) 2 +* [ASDF](https://asdf-vm.com/) 0.10 (run `asdf reshim` after provisioning) * [cargo-cache](https://crates.io/crates/cargo-cache) +* [direnv](https://direnv.net/) 2 +* POSIX compatible [tar](https://pubs.opengroup.org/onlinepubs/7908799/xcu/tar.html) # SECURITY AUDIT diff --git a/vendor/tinyrick_extras/LICENSE.md b/vendor/tinyrick_extras/LICENSE.md new file mode 100644 index 0000000..bff0a40 --- /dev/null +++ b/vendor/tinyrick_extras/LICENSE.md @@ -0,0 +1,26 @@ +Copyright (c) 2018, Andrew Pennebaker +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OR MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those +of the authors and should not be interpreted as representing official policies, +either expressed or implied, of the FreeBSD project. diff --git a/vendor/tinyrick_extras/README.md b/vendor/tinyrick_extras/README.md index 933ca75..598e70e 100644 --- a/vendor/tinyrick_extras/README.md +++ b/vendor/tinyrick_extras/README.md @@ -32,16 +32,21 @@ https://crates.io/crates/tinyrick_extras https://docs.rs/tinyrick_extras/latest/tinyrick_extras/ +# LICENSE + +BSD-2-Clause + # RUNTIME REQUIREMENTS -* [Rust](https://www.rust-lang.org/en-US/) 1.68.2+ +* [Rust](https://www.rust-lang.org/en-US/) 1.75.0+ ## Recommended * [ASDF](https://asdf-vm.com/) 0.10 (run `asdf reshim` after each Rust application binary installation) -* [direnv](https://direnv.net/) 2 * [cargo-cache](https://crates.io/crates/cargo-cache) * [crit](https://github.com/mcandre/crit) ports Rust applications +* [direnv](https://direnv.net/) 2 +* POSIX compatible [tar](https://pubs.opengroup.org/onlinepubs/7908799/xcu/tar.html) # CONTRIBUTING diff --git a/vendor/tinyrick_extras/makefile b/vendor/tinyrick_extras/makefile new file mode 100644 index 0000000..d08891b --- /dev/null +++ b/vendor/tinyrick_extras/makefile @@ -0,0 +1,12 @@ +.POSIX: +.SILENT: +.PHONY: all + +all: + rustup component add \ + clippy \ + rustfmt + cargo install --force \ + cargo-audit \ + tinyrick@0.0.13 \ + unmake@0.0.16 diff --git a/vendor/tinyrick_extras/src/lib.rs b/vendor/tinyrick_extras/src/lib.rs index 1267de7..89933b6 100644 --- a/vendor/tinyrick_extras/src/lib.rs +++ b/vendor/tinyrick_extras/src/lib.rs @@ -46,6 +46,26 @@ pub fn crit(args: Vec<String>) { .success()); } +/// Compress binaries. +/// +/// artifacts_path denotes a build directory root, +/// where a software project houses porting artifacts. +/// +/// port_basename denotes an archive directory root within the artifacts_path, +/// generally of the form "<app-name>-<version>". +pub fn archive(artifacts_path: String, port_basename: String) { + let artifacts_path_str: &str = &artifacts_path; + let port_basename_str: &str = &port_basename; + let archive_basename: &str = &format!("{}.tgz", port_basename_str); + assert!( + tinyrick::exec_mut!("tar", &["czf", archive_basename, port_basename_str]) + .current_dir(artifacts_path_str) + .status() + .unwrap() + .success() + ); +} + /// Uninstall artifacts pub fn uninstall_binaries() { tinyrick::exec!("cargo", &["uninstall"]); diff --git a/vendor/tinyrick_extras/tinyrick.rs b/vendor/tinyrick_extras/tinyrick.rs index 8cbafc8..b30f87c 100644 --- a/vendor/tinyrick_extras/tinyrick.rs +++ b/vendor/tinyrick_extras/tinyrick.rs @@ -23,11 +23,18 @@ fn rustfmt() { tinyrick_extras::rustfmt(); } +/// Run unmake +fn unmake() { + tinyrick::exec!("unmake", &["."]); + tinyrick::exec!("unmake", &["-n", "."]); +} + /// Validate documentation and run linters fn lint() { tinyrick::deps(doc); tinyrick::deps(clippy); tinyrick::deps(rustfmt); + tinyrick::deps(unmake); } /// Doc, lint, and run tests @@ -69,6 +76,7 @@ fn main() { audit, clippy, rustfmt, + unmake, lint, test, publish, diff --git a/vendor/unicode-ident/.cargo-checksum.json b/vendor/unicode-ident/.cargo-checksum.json index de029ff..9a708d8 100644 --- a/vendor/unicode-ident/.cargo-checksum.json +++ b/vendor/unicode-ident/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"7085b9b35cd2bdbe4fd1e48c33faaceab6fd0c8bab96366baac698d1e6fa9a88","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","LICENSE-UNICODE":"68f5b9f5ea36881a0942ba02f558e9e1faf76cc09cb165ad801744c61b738844","README.md":"4e3b8b33ce66d038b932b0824e9c5f459893c2004ce68065fb8b68c98692d298","benches/xid.rs":"a61f61ecc7d5124c759cdeb55ab74470ab69f2f3ca37613da65f16e0e5e33487","src/lib.rs":"778e23eb823541d946b5e5c0ca32dcd67db4e347c5d605ab05dac6f6fbce98e3","src/tables.rs":"b4609d6c2e2ba44fba8cdbcec271325ff196afba8001dee805be95424219f01b","tests/compare.rs":"89c4dc4f745064a9f734667b1d960596a10b8cb019a8ed1c5b9512678a866ad5","tests/fst/mod.rs":"69a3aaf59acd8bca962ecc6234be56be8c0934ab79b253162f10eb881523901f","tests/fst/xid_continue.fst":"0624500413ac318fee8424eecdad70397f911e3beae52231bfca295bb1bb9e04","tests/fst/xid_start.fst":"cc36f4f1149a4004ea7e2075cfb54756328b571946fda526be508cf5ed53dbdb","tests/roaring/mod.rs":"784f65a48477fab7549620c7843c7ad6da533f69a18abca1172f6acb95045e53","tests/static_size.rs":"8d9aff4d6e4e846aa18dc47b527753f7768fbc7d552d2d66450c4debe6c4466c","tests/tables/mod.rs":"e6949172d10fc4b2431ce7546269bfd4f9146454c8c3e31faf5e5d80c16a8ab6","tests/tables/tables.rs":"5194ac98137a3b61322213f2f8e8b83ff925ffcdd79e93a2ec414ef944dc63a3","tests/trie/mod.rs":"d4acbb716bcbaf80660039797f45e138ed8bbd66749fa3b19b1a971574679cc9","tests/trie/trie.rs":"dbd7de5fe601159643a4c6febed06793f812e8d71010b0ec78f2557353a976b2"},"package":"e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"} \ No newline at end of file +{"files":{"Cargo.toml":"7b10355305359d5feefb120329396a8823ce903cd66612d7d27612d51e6ceced","LICENSE-APACHE":"62c7a1e35f56406896d7aa7ca52d0cc0d272ac022b5d2796e7d6905db8a3636a","LICENSE-MIT":"23f18e03dc49df91622fe2a76176497404e46ced8a715d9d2b67a7446571cca3","LICENSE-UNICODE":"68f5b9f5ea36881a0942ba02f558e9e1faf76cc09cb165ad801744c61b738844","README.md":"eff1f30712e93cc160101c25bf31738448c284b90636deb3e3a651cb9ad20dd1","benches/xid.rs":"a61f61ecc7d5124c759cdeb55ab74470ab69f2f3ca37613da65f16e0e5e33487","src/lib.rs":"2673969775cff349816e3fb30f62476a802523fe4940482288b75bd747cbe748","src/tables.rs":"ffe8e252eabccf261385865cb781b3d76c9f32f6f9503d00196a30fb92d80b29","tests/compare.rs":"62471ffb157744cac6faae1adafdbdf785349d7eb6dc2ff4b4941c9d618397f9","tests/fst/mod.rs":"69a3aaf59acd8bca962ecc6234be56be8c0934ab79b253162f10eb881523901f","tests/fst/xid_continue.fst":"41fc751514b8bde658544d5fe7e100115d299d41897af855934b9f4ebda9d3a2","tests/fst/xid_start.fst":"ffa5e2bfe7dd5f6738fbe4b7a3e6e2083c9777191c54f8291a80d558ec4e2dd2","tests/roaring/mod.rs":"784f65a48477fab7549620c7843c7ad6da533f69a18abca1172f6acb95045e53","tests/static_size.rs":"4524332c1e424cb987d7cee1f47a98aea9ed7b256303a3828eda5aa1d06da240","tests/tables/mod.rs":"e6949172d10fc4b2431ce7546269bfd4f9146454c8c3e31faf5e5d80c16a8ab6","tests/tables/tables.rs":"011404dab8a3958da6e18a1fe9406c191675e6f49bf30ce813e3d05f582e750b","tests/trie/mod.rs":"d4acbb716bcbaf80660039797f45e138ed8bbd66749fa3b19b1a971574679cc9","tests/trie/trie.rs":"3c1ca56062f1b3ffdf2ae2063d3fee8d362b90082778056181b5c95e2e242ad8"},"package":"3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"} \ No newline at end of file diff --git a/vendor/unicode-ident/Cargo.toml b/vendor/unicode-ident/Cargo.toml index 05687ca..1c40dde 100644 --- a/vendor/unicode-ident/Cargo.toml +++ b/vendor/unicode-ident/Cargo.toml @@ -13,7 +13,7 @@ edition = "2018" rust-version = "1.31" name = "unicode-ident" -version = "1.0.8" +version = "1.0.12" authors = ["David Tolnay <dtolnay@gmail.com>"] description = "Determine whether characters have the XID_Start or XID_Continue properties according to Unicode Standard Annex #31" documentation = "https://docs.rs/unicode-ident" @@ -25,11 +25,13 @@ keywords = [ categories = [ "development-tools::procedural-macro-helpers", "no-std", + "no-std::no-alloc", ] license = "(MIT OR Apache-2.0) AND Unicode-DFS-2016" repository = "https://github.com/dtolnay/unicode-ident" [package.metadata.docs.rs] +rustdoc-args = ["--generate-link-to-definition"] targets = ["x86_64-unknown-linux-gnu"] [lib] @@ -40,7 +42,7 @@ name = "xid" harness = false [dev-dependencies.criterion] -version = "0.4" +version = "0.5" default-features = false [dev-dependencies.fst] diff --git a/vendor/unicode-ident/README.md b/vendor/unicode-ident/README.md index dfb943b..0e9af82 100644 --- a/vendor/unicode-ident/README.md +++ b/vendor/unicode-ident/README.md @@ -41,10 +41,10 @@ different ratios of ASCII to non-ASCII codepoints in the input data. | | static storage | 0% nonascii | 1% | 10% | 100% nonascii | |---|---|---|---|---|---| -| **`unicode-ident`** | 10.0 K | 0.96 ns | 0.95 ns | 1.09 ns | 1.55 ns | +| **`unicode-ident`** | 10.1 K | 0.96 ns | 0.95 ns | 1.09 ns | 1.55 ns | | **`unicode-xid`** | 11.5 K | 1.88 ns | 2.14 ns | 3.48 ns | 15.63 ns | | **`ucd-trie`** | 10.2 K | 1.29 ns | 1.28 ns | 1.36 ns | 2.15 ns | -| **`fst`** | 138 K | 55.1 ns | 54.9 ns | 53.2 ns | 28.5 ns | +| **`fst`** | 139 K | 55.1 ns | 54.9 ns | 53.2 ns | 28.5 ns | | **`roaring`** | 66.1 K | 2.78 ns | 3.09 ns | 3.37 ns | 4.70 ns | Source code for the benchmark is provided in the *bench* directory of this repo diff --git a/vendor/unicode-ident/src/lib.rs b/vendor/unicode-ident/src/lib.rs index b20ecc4..f890886 100644 --- a/vendor/unicode-ident/src/lib.rs +++ b/vendor/unicode-ident/src/lib.rs @@ -43,10 +43,10 @@ //! //! | | static storage | 0% nonascii | 1% | 10% | 100% nonascii | //! |---|---|---|---|---|---| -//! | **`unicode-ident`** | 9.75 K | 0.96 ns | 0.95 ns | 1.09 ns | 1.55 ns | -//! | **`unicode-xid`** | 11.34 K | 1.88 ns | 2.14 ns | 3.48 ns | 15.63 ns | -//! | **`ucd-trie`** | 9.95 K | 1.29 ns | 1.28 ns | 1.36 ns | 2.15 ns | -//! | **`fst`** | 133 K | 55.1 ns | 54.9 ns | 53.2 ns | 28.5 ns | +//! | **`unicode-ident`** | 10.1 K | 0.96 ns | 0.95 ns | 1.09 ns | 1.55 ns | +//! | **`unicode-xid`** | 11.5 K | 1.88 ns | 2.14 ns | 3.48 ns | 15.63 ns | +//! | **`ucd-trie`** | 10.2 K | 1.29 ns | 1.28 ns | 1.36 ns | 2.15 ns | +//! | **`fst`** | 139 K | 55.1 ns | 54.9 ns | 53.2 ns | 28.5 ns | //! | **`roaring`** | 66.1 K | 2.78 ns | 3.09 ns | 3.37 ns | 4.70 ns | //! //! Source code for the benchmark is provided in the *bench* directory of this @@ -242,7 +242,7 @@ //! this data structure is straight-line code with no need for branching. #![no_std] -#![doc(html_root_url = "https://docs.rs/unicode-ident/1.0.8")] +#![doc(html_root_url = "https://docs.rs/unicode-ident/1.0.12")] #![allow(clippy::doc_markdown, clippy::must_use_candidate)] #[rustfmt::skip] diff --git a/vendor/unicode-ident/src/tables.rs b/vendor/unicode-ident/src/tables.rs index b355f30..bb691b5 100644 --- a/vendor/unicode-ident/src/tables.rs +++ b/vendor/unicode-ident/src/tables.rs @@ -52,7 +52,7 @@ pub(crate) static TRIE_START: Align8<[u8; 402]> = Align8([ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0xE0, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x52, 0xE3, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0xE6, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, - 0x05, 0x05, 0x05, 0x05, 0x05, 0xE1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0xE9, 0x00, 0x00, + 0x05, 0x05, 0x05, 0x05, 0x05, 0xE1, 0x05, 0xE9, 0x00, 0x00, 0x00, 0x00, 0x05, 0xEB, 0x00, 0x00, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0xE4, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0xE7, ]); @@ -81,7 +81,7 @@ pub(crate) static TRIE_CONTINUE: Align8<[u8; 1793]> = Align8([ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0xE0, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x52, 0xE3, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0xE6, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, - 0x05, 0x05, 0x05, 0x05, 0x05, 0xE1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0xE9, 0x00, 0x00, + 0x05, 0x05, 0x05, 0x05, 0x05, 0xE1, 0x05, 0xE9, 0x00, 0x00, 0x00, 0x00, 0x05, 0xEB, 0x00, 0x00, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0xE4, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0xE7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -173,7 +173,7 @@ pub(crate) static TRIE_CONTINUE: Align8<[u8; 1793]> = Align8([ 0xC2, ]); -pub(crate) static LEAF: Align64<[u8; 7520]> = Align64([ +pub(crate) static LEAF: Align64<[u8; 7584]> = Align64([ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -316,7 +316,7 @@ pub(crate) static LEAF: Align64<[u8; 7520]> = Align64([ 0x00, 0x00, 0xFF, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x84, 0xFC, 0x2F, 0x3F, 0x50, 0xFD, 0xFF, 0xF3, 0xE0, 0x43, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x80, + 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x80, 0x00, 0x00, 0xFF, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x1F, 0xE2, 0xFF, 0x01, 0x00, 0x84, 0xFC, 0x2F, 0x3F, 0x50, 0xFD, 0xFF, 0xF3, 0xE0, 0x43, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -333,7 +333,7 @@ pub(crate) static LEAF: Align64<[u8; 7520]> = Align64([ 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xE0, 0x00, 0x00, 0x00, 0xFE, 0xFF, 0x3E, 0x1F, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0x7F, 0xE6, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF7, + 0xFF, 0xFF, 0x7F, 0xE6, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, @@ -384,7 +384,7 @@ pub(crate) static LEAF: Align64<[u8; 7520]> = Align64([ 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0xFF, 0xFF, 0x7F, 0xFC, 0xFC, 0xFC, 0x1C, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0x18, 0x00, 0x00, 0xE0, 0x00, 0x00, 0x00, 0x00, 0x8A, 0xAA, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1F, - 0x00, 0x00, 0xFF, 0x03, 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07, 0xC0, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0xFF, 0x03, 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07, 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F, 0xFC, 0xFC, 0xFC, 0x1C, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xEF, 0xFF, 0xFF, 0x7F, 0xFF, 0xFF, 0xB7, 0xFF, 0x3F, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x07, @@ -627,7 +627,7 @@ pub(crate) static LEAF: Align64<[u8; 7520]> = Align64([ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, @@ -640,6 +640,10 @@ pub(crate) static LEAF: Align64<[u8; 7520]> = Align64([ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, diff --git a/vendor/unicode-ident/tests/compare.rs b/vendor/unicode-ident/tests/compare.rs index 764a7e7..14ee22f 100644 --- a/vendor/unicode-ident/tests/compare.rs +++ b/vendor/unicode-ident/tests/compare.rs @@ -14,6 +14,8 @@ fn compare_all_implementations() { let thought_to_be_continue = unicode_ident::is_xid_continue(ch); // unicode-xid + // FIXME: unicode-xid does not support Unicode 15.1.0 yet. + /* assert_eq!( thought_to_be_start, unicode_xid::UnicodeXID::is_xid_start(ch), @@ -24,6 +26,7 @@ fn compare_all_implementations() { unicode_xid::UnicodeXID::is_xid_continue(ch), "{ch:?}", ); + */ // ucd-trie assert_eq!( diff --git a/vendor/unicode-ident/tests/fst/xid_continue.fst b/vendor/unicode-ident/tests/fst/xid_continue.fst index 05e7b1a..3024c81 100644 Binary files a/vendor/unicode-ident/tests/fst/xid_continue.fst and b/vendor/unicode-ident/tests/fst/xid_continue.fst differ diff --git a/vendor/unicode-ident/tests/fst/xid_start.fst b/vendor/unicode-ident/tests/fst/xid_start.fst index a5975b6..3f5a46b 100644 Binary files a/vendor/unicode-ident/tests/fst/xid_start.fst and b/vendor/unicode-ident/tests/fst/xid_start.fst differ diff --git a/vendor/unicode-ident/tests/static_size.rs b/vendor/unicode-ident/tests/static_size.rs index 610adf3..2df3537 100644 --- a/vendor/unicode-ident/tests/static_size.rs +++ b/vendor/unicode-ident/tests/static_size.rs @@ -13,7 +13,7 @@ fn test_size() { + size_of_val(&tables::TRIE_START) + size_of_val(&tables::TRIE_CONTINUE) + size_of_val(&tables::LEAF); - assert_eq!(10016, size); + assert_eq!(10080, size); } #[test] @@ -23,7 +23,7 @@ fn test_xid_size() { mod tables; let size = size_of_val(tables::XID_START) + size_of_val(tables::XID_CONTINUE); - assert_eq!(11528, size); + assert_eq!(11544, size); let _ = tables::BY_NAME; } @@ -70,7 +70,7 @@ fn test_trieset_size() { + size_of_val(tree3_level2) + size_of_val(tree3_level3); - assert_eq!(10208, start_size + continue_size); + assert_eq!(10200, start_size + continue_size); let _ = trie::BY_NAME; } @@ -80,7 +80,7 @@ fn test_fst_size() { let xid_start_fst = include_bytes!("fst/xid_start.fst"); let xid_continue_fst = include_bytes!("fst/xid_continue.fst"); let size = xid_start_fst.len() + xid_continue_fst.len(); - assert_eq!(137749, size); + assert_eq!(138736, size); } #[test] diff --git a/vendor/unicode-ident/tests/tables/tables.rs b/vendor/unicode-ident/tests/tables/tables.rs index 30aeee9..ba7b061 100644 --- a/vendor/unicode-ident/tests/tables/tables.rs +++ b/vendor/unicode-ident/tests/tables/tables.rs @@ -2,9 +2,9 @@ // // ucd-generate property-bool UCD --include XID_Start,XID_Continue // -// Unicode version: 15.0.0. +// Unicode version: 15.1.0. // -// ucd-generate 0.2.13 is available on crates.io. +// ucd-generate 0.3.0 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(u32, u32)])] = &[ ("XID_Continue", XID_CONTINUE), ("XID_Start", XID_START), @@ -70,123 +70,123 @@ pub const XID_CONTINUE: &'static [(u32, u32)] = &[ (8016, 8023), (8025, 8025), (8027, 8027), (8029, 8029), (8031, 8061), (8064, 8116), (8118, 8124), (8126, 8126), (8130, 8132), (8134, 8140), (8144, 8147), (8150, 8155), (8160, 8172), (8178, 8180), (8182, 8188), - (8255, 8256), (8276, 8276), (8305, 8305), (8319, 8319), (8336, 8348), - (8400, 8412), (8417, 8417), (8421, 8432), (8450, 8450), (8455, 8455), - (8458, 8467), (8469, 8469), (8472, 8477), (8484, 8484), (8486, 8486), - (8488, 8488), (8490, 8505), (8508, 8511), (8517, 8521), (8526, 8526), - (8544, 8584), (11264, 11492), (11499, 11507), (11520, 11557), + (8204, 8205), (8255, 8256), (8276, 8276), (8305, 8305), (8319, 8319), + (8336, 8348), (8400, 8412), (8417, 8417), (8421, 8432), (8450, 8450), + (8455, 8455), (8458, 8467), (8469, 8469), (8472, 8477), (8484, 8484), + (8486, 8486), (8488, 8488), (8490, 8505), (8508, 8511), (8517, 8521), + (8526, 8526), (8544, 8584), (11264, 11492), (11499, 11507), (11520, 11557), (11559, 11559), (11565, 11565), (11568, 11623), (11631, 11631), (11647, 11670), (11680, 11686), (11688, 11694), (11696, 11702), (11704, 11710), (11712, 11718), (11720, 11726), (11728, 11734), (11736, 11742), (11744, 11775), (12293, 12295), (12321, 12335), (12337, 12341), (12344, 12348), (12353, 12438), (12441, 12442), - (12445, 12447), (12449, 12538), (12540, 12543), (12549, 12591), - (12593, 12686), (12704, 12735), (12784, 12799), (13312, 19903), - (19968, 42124), (42192, 42237), (42240, 42508), (42512, 42539), - (42560, 42607), (42612, 42621), (42623, 42737), (42775, 42783), - (42786, 42888), (42891, 42954), (42960, 42961), (42963, 42963), - (42965, 42969), (42994, 43047), (43052, 43052), (43072, 43123), - (43136, 43205), (43216, 43225), (43232, 43255), (43259, 43259), - (43261, 43309), (43312, 43347), (43360, 43388), (43392, 43456), - (43471, 43481), (43488, 43518), (43520, 43574), (43584, 43597), - (43600, 43609), (43616, 43638), (43642, 43714), (43739, 43741), - (43744, 43759), (43762, 43766), (43777, 43782), (43785, 43790), - (43793, 43798), (43808, 43814), (43816, 43822), (43824, 43866), - (43868, 43881), (43888, 44010), (44012, 44013), (44016, 44025), - (44032, 55203), (55216, 55238), (55243, 55291), (63744, 64109), - (64112, 64217), (64256, 64262), (64275, 64279), (64285, 64296), - (64298, 64310), (64312, 64316), (64318, 64318), (64320, 64321), - (64323, 64324), (64326, 64433), (64467, 64605), (64612, 64829), - (64848, 64911), (64914, 64967), (65008, 65017), (65024, 65039), - (65056, 65071), (65075, 65076), (65101, 65103), (65137, 65137), - (65139, 65139), (65143, 65143), (65145, 65145), (65147, 65147), - (65149, 65149), (65151, 65276), (65296, 65305), (65313, 65338), - (65343, 65343), (65345, 65370), (65382, 65470), (65474, 65479), - (65482, 65487), (65490, 65495), (65498, 65500), (65536, 65547), - (65549, 65574), (65576, 65594), (65596, 65597), (65599, 65613), - (65616, 65629), (65664, 65786), (65856, 65908), (66045, 66045), - (66176, 66204), (66208, 66256), (66272, 66272), (66304, 66335), - (66349, 66378), (66384, 66426), (66432, 66461), (66464, 66499), - (66504, 66511), (66513, 66517), (66560, 66717), (66720, 66729), - (66736, 66771), (66776, 66811), (66816, 66855), (66864, 66915), - (66928, 66938), (66940, 66954), (66956, 66962), (66964, 66965), - (66967, 66977), (66979, 66993), (66995, 67001), (67003, 67004), - (67072, 67382), (67392, 67413), (67424, 67431), (67456, 67461), - (67463, 67504), (67506, 67514), (67584, 67589), (67592, 67592), - (67594, 67637), (67639, 67640), (67644, 67644), (67647, 67669), - (67680, 67702), (67712, 67742), (67808, 67826), (67828, 67829), - (67840, 67861), (67872, 67897), (67968, 68023), (68030, 68031), - (68096, 68099), (68101, 68102), (68108, 68115), (68117, 68119), - (68121, 68149), (68152, 68154), (68159, 68159), (68192, 68220), - (68224, 68252), (68288, 68295), (68297, 68326), (68352, 68405), - (68416, 68437), (68448, 68466), (68480, 68497), (68608, 68680), - (68736, 68786), (68800, 68850), (68864, 68903), (68912, 68921), - (69248, 69289), (69291, 69292), (69296, 69297), (69373, 69404), - (69415, 69415), (69424, 69456), (69488, 69509), (69552, 69572), - (69600, 69622), (69632, 69702), (69734, 69749), (69759, 69818), - (69826, 69826), (69840, 69864), (69872, 69881), (69888, 69940), - (69942, 69951), (69956, 69959), (69968, 70003), (70006, 70006), - (70016, 70084), (70089, 70092), (70094, 70106), (70108, 70108), - (70144, 70161), (70163, 70199), (70206, 70209), (70272, 70278), - (70280, 70280), (70282, 70285), (70287, 70301), (70303, 70312), - (70320, 70378), (70384, 70393), (70400, 70403), (70405, 70412), - (70415, 70416), (70419, 70440), (70442, 70448), (70450, 70451), - (70453, 70457), (70459, 70468), (70471, 70472), (70475, 70477), - (70480, 70480), (70487, 70487), (70493, 70499), (70502, 70508), - (70512, 70516), (70656, 70730), (70736, 70745), (70750, 70753), - (70784, 70853), (70855, 70855), (70864, 70873), (71040, 71093), - (71096, 71104), (71128, 71133), (71168, 71232), (71236, 71236), - (71248, 71257), (71296, 71352), (71360, 71369), (71424, 71450), - (71453, 71467), (71472, 71481), (71488, 71494), (71680, 71738), - (71840, 71913), (71935, 71942), (71945, 71945), (71948, 71955), - (71957, 71958), (71960, 71989), (71991, 71992), (71995, 72003), - (72016, 72025), (72096, 72103), (72106, 72151), (72154, 72161), - (72163, 72164), (72192, 72254), (72263, 72263), (72272, 72345), - (72349, 72349), (72368, 72440), (72704, 72712), (72714, 72758), - (72760, 72768), (72784, 72793), (72818, 72847), (72850, 72871), - (72873, 72886), (72960, 72966), (72968, 72969), (72971, 73014), - (73018, 73018), (73020, 73021), (73023, 73031), (73040, 73049), - (73056, 73061), (73063, 73064), (73066, 73102), (73104, 73105), - (73107, 73112), (73120, 73129), (73440, 73462), (73472, 73488), - (73490, 73530), (73534, 73538), (73552, 73561), (73648, 73648), - (73728, 74649), (74752, 74862), (74880, 75075), (77712, 77808), - (77824, 78895), (78912, 78933), (82944, 83526), (92160, 92728), - (92736, 92766), (92768, 92777), (92784, 92862), (92864, 92873), - (92880, 92909), (92912, 92916), (92928, 92982), (92992, 92995), - (93008, 93017), (93027, 93047), (93053, 93071), (93760, 93823), - (93952, 94026), (94031, 94087), (94095, 94111), (94176, 94177), - (94179, 94180), (94192, 94193), (94208, 100343), (100352, 101589), - (101632, 101640), (110576, 110579), (110581, 110587), (110589, 110590), - (110592, 110882), (110898, 110898), (110928, 110930), (110933, 110933), - (110948, 110951), (110960, 111355), (113664, 113770), (113776, 113788), - (113792, 113800), (113808, 113817), (113821, 113822), (118528, 118573), - (118576, 118598), (119141, 119145), (119149, 119154), (119163, 119170), - (119173, 119179), (119210, 119213), (119362, 119364), (119808, 119892), - (119894, 119964), (119966, 119967), (119970, 119970), (119973, 119974), - (119977, 119980), (119982, 119993), (119995, 119995), (119997, 120003), - (120005, 120069), (120071, 120074), (120077, 120084), (120086, 120092), - (120094, 120121), (120123, 120126), (120128, 120132), (120134, 120134), - (120138, 120144), (120146, 120485), (120488, 120512), (120514, 120538), - (120540, 120570), (120572, 120596), (120598, 120628), (120630, 120654), - (120656, 120686), (120688, 120712), (120714, 120744), (120746, 120770), - (120772, 120779), (120782, 120831), (121344, 121398), (121403, 121452), - (121461, 121461), (121476, 121476), (121499, 121503), (121505, 121519), - (122624, 122654), (122661, 122666), (122880, 122886), (122888, 122904), - (122907, 122913), (122915, 122916), (122918, 122922), (122928, 122989), - (123023, 123023), (123136, 123180), (123184, 123197), (123200, 123209), - (123214, 123214), (123536, 123566), (123584, 123641), (124112, 124153), - (124896, 124902), (124904, 124907), (124909, 124910), (124912, 124926), - (124928, 125124), (125136, 125142), (125184, 125259), (125264, 125273), - (126464, 126467), (126469, 126495), (126497, 126498), (126500, 126500), - (126503, 126503), (126505, 126514), (126516, 126519), (126521, 126521), - (126523, 126523), (126530, 126530), (126535, 126535), (126537, 126537), - (126539, 126539), (126541, 126543), (126545, 126546), (126548, 126548), - (126551, 126551), (126553, 126553), (126555, 126555), (126557, 126557), - (126559, 126559), (126561, 126562), (126564, 126564), (126567, 126570), - (126572, 126578), (126580, 126583), (126585, 126588), (126590, 126590), - (126592, 126601), (126603, 126619), (126625, 126627), (126629, 126633), - (126635, 126651), (130032, 130041), (131072, 173791), (173824, 177977), - (177984, 178205), (178208, 183969), (183984, 191456), (194560, 195101), + (12445, 12447), (12449, 12543), (12549, 12591), (12593, 12686), + (12704, 12735), (12784, 12799), (13312, 19903), (19968, 42124), + (42192, 42237), (42240, 42508), (42512, 42539), (42560, 42607), + (42612, 42621), (42623, 42737), (42775, 42783), (42786, 42888), + (42891, 42954), (42960, 42961), (42963, 42963), (42965, 42969), + (42994, 43047), (43052, 43052), (43072, 43123), (43136, 43205), + (43216, 43225), (43232, 43255), (43259, 43259), (43261, 43309), + (43312, 43347), (43360, 43388), (43392, 43456), (43471, 43481), + (43488, 43518), (43520, 43574), (43584, 43597), (43600, 43609), + (43616, 43638), (43642, 43714), (43739, 43741), (43744, 43759), + (43762, 43766), (43777, 43782), (43785, 43790), (43793, 43798), + (43808, 43814), (43816, 43822), (43824, 43866), (43868, 43881), + (43888, 44010), (44012, 44013), (44016, 44025), (44032, 55203), + (55216, 55238), (55243, 55291), (63744, 64109), (64112, 64217), + (64256, 64262), (64275, 64279), (64285, 64296), (64298, 64310), + (64312, 64316), (64318, 64318), (64320, 64321), (64323, 64324), + (64326, 64433), (64467, 64605), (64612, 64829), (64848, 64911), + (64914, 64967), (65008, 65017), (65024, 65039), (65056, 65071), + (65075, 65076), (65101, 65103), (65137, 65137), (65139, 65139), + (65143, 65143), (65145, 65145), (65147, 65147), (65149, 65149), + (65151, 65276), (65296, 65305), (65313, 65338), (65343, 65343), + (65345, 65370), (65381, 65470), (65474, 65479), (65482, 65487), + (65490, 65495), (65498, 65500), (65536, 65547), (65549, 65574), + (65576, 65594), (65596, 65597), (65599, 65613), (65616, 65629), + (65664, 65786), (65856, 65908), (66045, 66045), (66176, 66204), + (66208, 66256), (66272, 66272), (66304, 66335), (66349, 66378), + (66384, 66426), (66432, 66461), (66464, 66499), (66504, 66511), + (66513, 66517), (66560, 66717), (66720, 66729), (66736, 66771), + (66776, 66811), (66816, 66855), (66864, 66915), (66928, 66938), + (66940, 66954), (66956, 66962), (66964, 66965), (66967, 66977), + (66979, 66993), (66995, 67001), (67003, 67004), (67072, 67382), + (67392, 67413), (67424, 67431), (67456, 67461), (67463, 67504), + (67506, 67514), (67584, 67589), (67592, 67592), (67594, 67637), + (67639, 67640), (67644, 67644), (67647, 67669), (67680, 67702), + (67712, 67742), (67808, 67826), (67828, 67829), (67840, 67861), + (67872, 67897), (67968, 68023), (68030, 68031), (68096, 68099), + (68101, 68102), (68108, 68115), (68117, 68119), (68121, 68149), + (68152, 68154), (68159, 68159), (68192, 68220), (68224, 68252), + (68288, 68295), (68297, 68326), (68352, 68405), (68416, 68437), + (68448, 68466), (68480, 68497), (68608, 68680), (68736, 68786), + (68800, 68850), (68864, 68903), (68912, 68921), (69248, 69289), + (69291, 69292), (69296, 69297), (69373, 69404), (69415, 69415), + (69424, 69456), (69488, 69509), (69552, 69572), (69600, 69622), + (69632, 69702), (69734, 69749), (69759, 69818), (69826, 69826), + (69840, 69864), (69872, 69881), (69888, 69940), (69942, 69951), + (69956, 69959), (69968, 70003), (70006, 70006), (70016, 70084), + (70089, 70092), (70094, 70106), (70108, 70108), (70144, 70161), + (70163, 70199), (70206, 70209), (70272, 70278), (70280, 70280), + (70282, 70285), (70287, 70301), (70303, 70312), (70320, 70378), + (70384, 70393), (70400, 70403), (70405, 70412), (70415, 70416), + (70419, 70440), (70442, 70448), (70450, 70451), (70453, 70457), + (70459, 70468), (70471, 70472), (70475, 70477), (70480, 70480), + (70487, 70487), (70493, 70499), (70502, 70508), (70512, 70516), + (70656, 70730), (70736, 70745), (70750, 70753), (70784, 70853), + (70855, 70855), (70864, 70873), (71040, 71093), (71096, 71104), + (71128, 71133), (71168, 71232), (71236, 71236), (71248, 71257), + (71296, 71352), (71360, 71369), (71424, 71450), (71453, 71467), + (71472, 71481), (71488, 71494), (71680, 71738), (71840, 71913), + (71935, 71942), (71945, 71945), (71948, 71955), (71957, 71958), + (71960, 71989), (71991, 71992), (71995, 72003), (72016, 72025), + (72096, 72103), (72106, 72151), (72154, 72161), (72163, 72164), + (72192, 72254), (72263, 72263), (72272, 72345), (72349, 72349), + (72368, 72440), (72704, 72712), (72714, 72758), (72760, 72768), + (72784, 72793), (72818, 72847), (72850, 72871), (72873, 72886), + (72960, 72966), (72968, 72969), (72971, 73014), (73018, 73018), + (73020, 73021), (73023, 73031), (73040, 73049), (73056, 73061), + (73063, 73064), (73066, 73102), (73104, 73105), (73107, 73112), + (73120, 73129), (73440, 73462), (73472, 73488), (73490, 73530), + (73534, 73538), (73552, 73561), (73648, 73648), (73728, 74649), + (74752, 74862), (74880, 75075), (77712, 77808), (77824, 78895), + (78912, 78933), (82944, 83526), (92160, 92728), (92736, 92766), + (92768, 92777), (92784, 92862), (92864, 92873), (92880, 92909), + (92912, 92916), (92928, 92982), (92992, 92995), (93008, 93017), + (93027, 93047), (93053, 93071), (93760, 93823), (93952, 94026), + (94031, 94087), (94095, 94111), (94176, 94177), (94179, 94180), + (94192, 94193), (94208, 100343), (100352, 101589), (101632, 101640), + (110576, 110579), (110581, 110587), (110589, 110590), (110592, 110882), + (110898, 110898), (110928, 110930), (110933, 110933), (110948, 110951), + (110960, 111355), (113664, 113770), (113776, 113788), (113792, 113800), + (113808, 113817), (113821, 113822), (118528, 118573), (118576, 118598), + (119141, 119145), (119149, 119154), (119163, 119170), (119173, 119179), + (119210, 119213), (119362, 119364), (119808, 119892), (119894, 119964), + (119966, 119967), (119970, 119970), (119973, 119974), (119977, 119980), + (119982, 119993), (119995, 119995), (119997, 120003), (120005, 120069), + (120071, 120074), (120077, 120084), (120086, 120092), (120094, 120121), + (120123, 120126), (120128, 120132), (120134, 120134), (120138, 120144), + (120146, 120485), (120488, 120512), (120514, 120538), (120540, 120570), + (120572, 120596), (120598, 120628), (120630, 120654), (120656, 120686), + (120688, 120712), (120714, 120744), (120746, 120770), (120772, 120779), + (120782, 120831), (121344, 121398), (121403, 121452), (121461, 121461), + (121476, 121476), (121499, 121503), (121505, 121519), (122624, 122654), + (122661, 122666), (122880, 122886), (122888, 122904), (122907, 122913), + (122915, 122916), (122918, 122922), (122928, 122989), (123023, 123023), + (123136, 123180), (123184, 123197), (123200, 123209), (123214, 123214), + (123536, 123566), (123584, 123641), (124112, 124153), (124896, 124902), + (124904, 124907), (124909, 124910), (124912, 124926), (124928, 125124), + (125136, 125142), (125184, 125259), (125264, 125273), (126464, 126467), + (126469, 126495), (126497, 126498), (126500, 126500), (126503, 126503), + (126505, 126514), (126516, 126519), (126521, 126521), (126523, 126523), + (126530, 126530), (126535, 126535), (126537, 126537), (126539, 126539), + (126541, 126543), (126545, 126546), (126548, 126548), (126551, 126551), + (126553, 126553), (126555, 126555), (126557, 126557), (126559, 126559), + (126561, 126562), (126564, 126564), (126567, 126570), (126572, 126578), + (126580, 126583), (126585, 126588), (126590, 126590), (126592, 126601), + (126603, 126619), (126625, 126627), (126629, 126633), (126635, 126651), + (130032, 130041), (131072, 173791), (173824, 177977), (177984, 178205), + (178208, 183969), (183984, 191456), (191472, 192093), (194560, 195101), (196608, 201546), (201552, 205743), (917760, 917999), ]; @@ -342,6 +342,6 @@ pub const XID_START: &'static [(u32, u32)] = &[ (126567, 126570), (126572, 126578), (126580, 126583), (126585, 126588), (126590, 126590), (126592, 126601), (126603, 126619), (126625, 126627), (126629, 126633), (126635, 126651), (131072, 173791), (173824, 177977), - (177984, 178205), (178208, 183969), (183984, 191456), (194560, 195101), - (196608, 201546), (201552, 205743), + (177984, 178205), (178208, 183969), (183984, 191456), (191472, 192093), + (194560, 195101), (196608, 201546), (201552, 205743), ]; diff --git a/vendor/unicode-ident/tests/trie/trie.rs b/vendor/unicode-ident/tests/trie/trie.rs index 821e7cb..fc805f4 100644 --- a/vendor/unicode-ident/tests/trie/trie.rs +++ b/vendor/unicode-ident/tests/trie/trie.rs @@ -2,9 +2,9 @@ // // ucd-generate property-bool UCD --include XID_Start,XID_Continue --trie-set // -// Unicode version: 15.0.0. +// Unicode version: 15.1.0. // -// ucd-generate 0.2.13 is available on crates.io. +// ucd-generate 0.3.0 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static ::ucd_trie::TrieSet)] = &[ ("XID_Continue", XID_CONTINUE), ("XID_Start", XID_START), @@ -33,7 +33,7 @@ pub const XID_CONTINUE: &'static ::ucd_trie::TrieSet = &::ucd_trie::TrieSet { 75, 76, 77, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 4, 4, 4, 79, 80, 81, 82, 83, 78, 78, 78, - 78, 78, 78, 78, 78, 84, 42, 85, 86, 87, 4, 88, 89, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 84, 42, 85, 4, 86, 4, 87, 88, 78, 78, 78, 78, 78, 78, 78, 78, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, @@ -52,15 +52,15 @@ pub const XID_CONTINUE: &'static ::ucd_trie::TrieSet = &::ucd_trie::TrieSet { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 90, 91, 4, 4, 4, 4, 92, 93, 4, 94, 95, 4, 96, 97, 98, 62, 4, - 99, 100, 101, 4, 102, 103, 104, 4, 105, 106, 107, 4, 108, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 89, 90, 4, 4, 4, 4, 91, 92, 4, 93, 94, 4, 95, 96, 97, 62, 4, + 98, 99, 100, 4, 101, 102, 103, 4, 104, 105, 106, 4, 107, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 109, 110, 78, 78, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 108, 109, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, @@ -68,8 +68,8 @@ pub const XID_CONTINUE: &'static ::ucd_trie::TrieSet = &::ucd_trie::TrieSet { 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, - 78, 78, 78, 78, 4, 4, 4, 4, 4, 100, 4, 111, 112, 113, 94, 114, 4, 115, 4, - 4, 116, 117, 118, 119, 120, 121, 4, 122, 123, 124, 125, 126, + 78, 78, 78, 78, 4, 4, 4, 4, 4, 99, 4, 110, 111, 112, 93, 113, 4, 114, 4, + 4, 115, 116, 117, 118, 119, 120, 4, 121, 122, 123, 124, 125, ], tree2_level2: &[ 0x3FFFFFFFFFFF, 0xFFFF07FF0FFFFFFF, 0xFFFFFFFFFF007EFF, @@ -92,13 +92,13 @@ pub const XID_CONTINUE: &'static ::ucd_trie::TrieSet = &::ucd_trie::TrieSet { 0x7FFF, 0xFF80003FF1FFF, 0xFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFF, 0x3FFFFFFFFFFFE3FF, 0xE7FFFFFFFFFF01FF, 0x7FFFFFFFFF70000, 0xFFFFFFFF3F3FFFFF, 0x3FFFFFFFAAFF3F3F, 0x5FDFFFFFFFFFFFFF, - 0x1FDC1FFF0FCF1FDC, 0x8000000000000000, 0x8002000000100001, 0x1FFF0000, + 0x1FDC1FFF0FCF1FDC, 0x8000000000003000, 0x8002000000100001, 0x1FFF0000, 0x1FFE21FFF0000, 0xF3FFFD503F2FFC84, 0xFFFFFFFF000043E0, 0x1FF, 0, 0xFF81FFFFFFFFF, 0xFFFF20BFFFFFFFFF, 0x800080FFFFFFFFFF, 0x7F7F7F7F007FFFFF, 0xFFFFFFFF7F7F7F7F, 0x1F3EFFFE000000E0, - 0xFFFFFFFEE67FFFFF, 0xF7FFFFFFFFFFFFFF, 0xFFFEFFFFFFFFFFE0, - 0xFFFFFFFF00007FFF, 0xFFFF000000000000, 0x1FFF, 0x3FFFFFFFFFFF0000, - 0xFFFFFFF1FFF, 0xBFF0FFFFFFFFFFFF, 0x3FFFFFFFFFFFF, 0xFFFFFFFCFF800000, + 0xFFFFFFFEE67FFFFF, 0xFFFEFFFFFFFFFFE0, 0xFFFFFFFF00007FFF, + 0xFFFF000000000000, 0x1FFF, 0x3FFFFFFFFFFF0000, 0xFFFFFFF1FFF, + 0xBFF0FFFFFFFFFFFF, 0x3FFFFFFFFFFFF, 0xFFFFFFFCFF800000, 0xFFFFFFFFFFFFF9FF, 0xFFFC000003EB07FF, 0x10FFFFFFFFFF, 0xE8FFFFFF03FF003F, 0xFFFF3FFFFFFFFFFF, 0x1FFFFFFF000FFFFF, 0x7FFFFFFF03FF8001, 0x7FFFFFFFFFFFFF, 0xFC7FFFFF03FF3FFF, @@ -108,7 +108,7 @@ pub const XID_CONTINUE: &'static ::ucd_trie::TrieSet = &::ucd_trie::TrieSet { 0xFFFFFFF03FFFFFFF, 0x3FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFF0000, 0xFFFFFFFFFFFCFFFF, 0x3FF0000000000FF, 0x18FFFF0000FFFF, 0xAA8A00000000E000, 0x1FFFFFFFFFFFFFFF, 0x87FFFFFE03FF0000, - 0xFFFFFFC007FFFFFE, 0x7FFFFFFFFFFFFFFF, 0x1CFCFCFC, + 0xFFFFFFE007FFFFFE, 0x7FFFFFFFFFFFFFFF, 0x1CFCFCFC, ], tree3_level1: &[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 5, 9, 10, 11, 12, 13, 14, 7, 7, 7, 7, 7, 7, 7, @@ -174,18 +174,18 @@ pub const XID_CONTINUE: &'static ::ucd_trie::TrieSet = &::ucd_trie::TrieSet { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 138, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 139, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, - 2, 2, 2, 140, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 141, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 139, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 140, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, + 2, 2, 2, 2, 140, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 141, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 87, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 2, 2, 2, 2, 2, 2, 2, 2, 87, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 87, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 87, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, ], tree3_level3: &[ 0xB7FFFF7FFFFFEFFF, 0x3FFF3FFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFF, 0, @@ -223,7 +223,7 @@ pub const XID_CONTINUE: &'static ::ucd_trie::TrieSet = &::ucd_trie::TrieSet { 0x3FFFFFFFFFFF, 0x8000, 0x3FFF1FFFFFFFFFFF, 0x43FF, 0x7FFFFFFF0000, 0x3FFFFFFFFFFFFFF, 0x3FFFFFFFFFF0000, 0x7FFF6F7F00000000, 0x7F001F, 0x3FF0FFF, 0xAF7FE96FFFFFFEF, 0x5EF7F796AA96EA84, 0xFFFFBEE0FFFFBFF, - 0x3FF000000000000, 0xFFFFFFFF, 0xFFFF0003FFFFFFFF, 0x1FFFFFFFF, + 0x3FF000000000000, 0xFFFFFFFF, 0xFFFF0003FFFFFFFF, 0xFFFF0001FFFFFFFF, 0x3FFFFFFF, 0xFFFFFFFFFFFF07FF, ], }; @@ -397,15 +397,16 @@ pub const XID_START: &'static ::ucd_trie::TrieSet = &::ucd_trie::TrieSet { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 128, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 129, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 129, 2, 2, 2, 2, 2, 2, 2, 2, 2, 130, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 130, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 131, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 130, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 131, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 55, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 55, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, ], tree3_level3: &[ 0xB7FFFF7FFFFFEFFF, 0x3FFF3FFF, 0xFFFFFFFFFFFFFFFF, 0x7FFFFFFFFFFFFFF, 0, @@ -439,6 +440,6 @@ pub const XID_START: &'static ::ucd_trie::TrieSet = &::ucd_trie::TrieSet { 0xFFFF000000000000, 0x3FFFFFFFFFFF, 0x3F801FFFFFFFFFFF, 0x4000, 0xFFFFFFF0000, 0x7FFF6F7F00000000, 0x1F, 0x80F, 0xAF7FE96FFFFFFEF, 0x5EF7F796AA96EA84, 0xFFFFBEE0FFFFBFF, 0xFFFFFFFF, 0x3FFFFFFFFFFFFFF, - 0xFFFF0003FFFFFFFF, 0x1FFFFFFFF, 0x3FFFFFFF, 0xFFFFFFFFFFFF07FF, + 0xFFFF0003FFFFFFFF, 0xFFFF0001FFFFFFFF, 0x3FFFFFFF, 0xFFFFFFFFFFFF07FF, ], }; diff --git a/vendor/unicode-width/.cargo-checksum.json b/vendor/unicode-width/.cargo-checksum.json index 5c7b922..c1608bd 100644 --- a/vendor/unicode-width/.cargo-checksum.json +++ b/vendor/unicode-width/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"COPYRIGHT":"23860c2a7b5d96b21569afedf033469bab9fe14a1b24a35068b8641c578ce24d","Cargo.toml":"f22e31fb3559e916864820719a09ab3adbf80301440e1702acf827210bbf76df","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"8a041a4305fb318f5c2cb284046f8480796521d0e829023b0441b5e8469490eb","scripts/unicode.py":"0c53095ef99395338399f9ad218b4481cffcf63774fd61871ed32efb242419f8","src/lib.rs":"38c44436eac069bd8d11203f31ecfef8adfe92da1fce19ba00bdd25aa3fbbe20","src/tables.rs":"c6ddb420c289517bb92973199fd2987b9608f29fc10bb33b5290f39b301ce92f","src/tests.rs":"ff9f331210861ba78040f119a0f6ccfacf5b2ca1ebee430784de0858fad01860"},"package":"c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"} \ No newline at end of file +{"files":{"COPYRIGHT":"23860c2a7b5d96b21569afedf033469bab9fe14a1b24a35068b8641c578ce24d","Cargo.toml":"09233bddd9dcdd71355ad523e7293bc8764bb96f891e8968bea9b8aaf86ed314","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"8a041a4305fb318f5c2cb284046f8480796521d0e829023b0441b5e8469490eb","scripts/unicode.py":"d7884ff41ca9c54cbe97a8f9c909b0a8cd74bc8e6190b0c89bf3001a38d9d763","src/lib.rs":"baa30ab3913bde7d8e766a2fbccfda96f06dd499bebda899249fc7495d5ad6f7","src/tables.rs":"7201dff937e3b0f61f4fcad9cfb230fb0c44bb9082301e566351b95d7525605e","src/tests.rs":"ff9f331210861ba78040f119a0f6ccfacf5b2ca1ebee430784de0858fad01860"},"package":"e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"} \ No newline at end of file diff --git a/vendor/unicode-width/Cargo.toml b/vendor/unicode-width/Cargo.toml index 77aded9..6dcfacd 100644 --- a/vendor/unicode-width/Cargo.toml +++ b/vendor/unicode-width/Cargo.toml @@ -11,7 +11,7 @@ [package] name = "unicode-width" -version = "0.1.10" +version = "0.1.11" authors = [ "kwantam <kwantam@gmail.com>", "Manish Goregaokar <manishsmail@gmail.com>", diff --git a/vendor/unicode-width/scripts/unicode.py b/vendor/unicode-width/scripts/unicode.py index 2efb0b6..a6d58c5 100755 --- a/vendor/unicode-width/scripts/unicode.py +++ b/vendor/unicode-width/scripts/unicode.py @@ -106,9 +106,9 @@ def load_east_asian_widths() -> "list[EffectiveWidth]": `Ambiguous` chracters are assigned `EffectiveWidth.AMBIGUOUS`.""" with fetch_open("EastAsianWidth.txt") as eaw: # matches a width assignment for a single codepoint, i.e. "1F336;N # ..." - single = re.compile(r"^([0-9A-F]+);(\w+) +# (\w+)") + single = re.compile(r"^([0-9A-F]+)\s+;\s+(\w+) +# (\w+)") # matches a width assignment for a range of codepoints, i.e. "3001..3003;W # ..." - multiple = re.compile(r"^([0-9A-F]+)\.\.([0-9A-F]+);(\w+) +# (\w+)") + multiple = re.compile(r"^([0-9A-F]+)\.\.([0-9A-F]+)\s+;\s+(\w+) +# (\w+)") # map between width category code and condensed width width_codes = { **{c: EffectiveWidth.NARROW for c in ["N", "Na", "H"]}, diff --git a/vendor/unicode-width/src/lib.rs b/vendor/unicode-width/src/lib.rs index 1ee35c8..fac45fc 100644 --- a/vendor/unicode-width/src/lib.rs +++ b/vendor/unicode-width/src/lib.rs @@ -59,8 +59,6 @@ extern crate test; use tables::charwidth as cw; pub use tables::UNICODE_VERSION; -use core::ops::Add; - mod tables; #[cfg(test)] @@ -121,11 +119,11 @@ pub trait UnicodeWidthStr { impl UnicodeWidthStr for str { #[inline] fn width(&self) -> usize { - self.chars().map(|c| cw::width(c, false).unwrap_or(0)).fold(0, Add::add) + self.chars().map(|c| cw::width(c, false).unwrap_or(0)).sum() } #[inline] fn width_cjk(&self) -> usize { - self.chars().map(|c| cw::width(c, true).unwrap_or(0)).fold(0, Add::add) + self.chars().map(|c| cw::width(c, true).unwrap_or(0)).sum() } } diff --git a/vendor/unicode-width/src/tables.rs b/vendor/unicode-width/src/tables.rs index 439c69c..791d7a8 100644 --- a/vendor/unicode-width/src/tables.rs +++ b/vendor/unicode-width/src/tables.rs @@ -12,7 +12,7 @@ /// The version of [Unicode](http://www.unicode.org/) /// that this version of unicode-width is based on. -pub const UNICODE_VERSION: (u8, u8, u8) = (15, 0, 0); +pub const UNICODE_VERSION: (u8, u8, u8) = (15, 1, 0); pub mod charwidth { use core::option::Option::{self, None, Some}; @@ -395,13 +395,13 @@ pub mod charwidth { 0x00, 0x00, 0x00, 0x00, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x9A, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x55, 0x55, 0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x5A, 0x55, 0x55, 0x55, - 0x55, 0x55, 0x55, 0xAA, 0xAA, 0xAA, 0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, + 0x55, 0x55, 0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x0A, 0xA0, 0xAA, 0xAA, 0xAA, 0x6A, 0xA9, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x6A, 0x81, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x55, 0xA9, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xA9, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x6A, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, - 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x55, 0x55, 0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, + 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x55, 0x55, 0x95, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x6A, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xFF, 0xFF, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x56, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, diff --git a/vendor/walkdir/.cargo-checksum.json b/vendor/walkdir/.cargo-checksum.json index d90dfa1..dba6351 100644 --- a/vendor/walkdir/.cargo-checksum.json +++ b/vendor/walkdir/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"ca0ae974da5ac27e19ee2644ad445056f121f74ece668103c48f6a9efbc09c71","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"5fc28c8211fe74af2889a2eb859127f8d543e0f94be80659c307105ed831f84b","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","compare/nftw.c":"6b900b0ac78ce8ece1995f834b194fa14d87d744cabba8f475f0e10b21722c56","compare/walk.py":"d49e26d0b8b2b201d00f2f46bf1f9db46f873c27332da679c9a7adbbf54462d2","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/dent.rs":"ca573f4533370a09851579f5940f7cd9bd121b2f30ec51d29a40afdce984683b","src/error.rs":"ba58bf6f59d196567435d4b66699a928cc237fc7c8df01dc37ab822509905b7c","src/lib.rs":"d964635f63cd73487c10a4e32440c30d9d25c43baca02d2f5313b48cd9a40e1c","src/tests/mod.rs":"bbce9174bfdbb4a81a9000f702be056b63b439cb8f1bcac64b8569368627d56d","src/tests/recursive.rs":"1afd9bb86cb04e8fead4d1e075efe00f6e16d7cf17bb3014f8c94e8adf869bea","src/tests/util.rs":"ca72ef96f82bb87d8c93d13d581ebb65efcd53fffa87097a84437acecbc30faa","src/util.rs":"14e0da711cad4825ead21446cd61a1444fd49bab853a8a239d8cb74b2caab351"},"package":"36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698"} \ No newline at end of file +{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"e7c4db380e4ac3135c94dd92324d64c89b988d0b62d1aa41c9d2b5448b0e9f70","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"5fc28c8211fe74af2889a2eb859127f8d543e0f94be80659c307105ed831f84b","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","compare/nftw.c":"6b900b0ac78ce8ece1995f834b194fa14d87d744cabba8f475f0e10b21722c56","compare/walk.py":"d49e26d0b8b2b201d00f2f46bf1f9db46f873c27332da679c9a7adbbf54462d2","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/dent.rs":"ca573f4533370a09851579f5940f7cd9bd121b2f30ec51d29a40afdce984683b","src/error.rs":"ba58bf6f59d196567435d4b66699a928cc237fc7c8df01dc37ab822509905b7c","src/lib.rs":"cee55b7b95cc8e8613ee47aae6a7ee47d3b6258e690128ff69f0d4da1feed374","src/tests/mod.rs":"bbce9174bfdbb4a81a9000f702be056b63b439cb8f1bcac64b8569368627d56d","src/tests/recursive.rs":"b6305e7cc9f905ce6b7328ac9fb5b07e5a73fa549c0b84ef890fb442d1bbcb7c","src/tests/util.rs":"ca72ef96f82bb87d8c93d13d581ebb65efcd53fffa87097a84437acecbc30faa","src/util.rs":"14e0da711cad4825ead21446cd61a1444fd49bab853a8a239d8cb74b2caab351"},"package":"d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee"} \ No newline at end of file diff --git a/vendor/walkdir/Cargo.toml b/vendor/walkdir/Cargo.toml index 7db9263..4c29a20 100644 --- a/vendor/walkdir/Cargo.toml +++ b/vendor/walkdir/Cargo.toml @@ -12,7 +12,7 @@ [package] edition = "2018" name = "walkdir" -version = "2.3.3" +version = "2.4.0" authors = ["Andrew Gallant <jamslam@gmail.com>"] exclude = [ "/ci/*", diff --git a/vendor/walkdir/src/lib.rs b/vendor/walkdir/src/lib.rs index 4d41515..edf702e 100644 --- a/vendor/walkdir/src/lib.rs +++ b/vendor/walkdir/src/lib.rs @@ -237,6 +237,7 @@ pub struct WalkDir { struct WalkDirOptions { follow_links: bool, + follow_root_links: bool, max_open: usize, min_depth: usize, max_depth: usize, @@ -265,6 +266,7 @@ impl fmt::Debug for WalkDirOptions { }; f.debug_struct("WalkDirOptions") .field("follow_links", &self.follow_links) + .field("follow_root_link", &self.follow_root_links) .field("max_open", &self.max_open) .field("min_depth", &self.min_depth) .field("max_depth", &self.max_depth) @@ -287,6 +289,7 @@ impl WalkDir { WalkDir { opts: WalkDirOptions { follow_links: false, + follow_root_links: true, max_open: 10, min_depth: 0, max_depth: ::std::usize::MAX, @@ -344,6 +347,25 @@ impl WalkDir { self } + /// Follow symbolic links if these are the root of the traversal. + /// By default, this is enabled. + /// + /// When `yes` is `true`, symbolic links on root paths are followed + /// which is effective if the symbolic link points to a directory. + /// If a symbolic link is broken or is involved in a loop, an error is yielded + /// as the first entry of the traversal. + /// + /// When enabled, the yielded [`DirEntry`] values represent the target of + /// the link while the path corresponds to the link. See the [`DirEntry`] + /// type for more details, and all future entries will be contained within + /// the resolved directory behind the symbolic link of the root path. + /// + /// [`DirEntry`]: struct.DirEntry.html + pub fn follow_root_links(mut self, yes: bool) -> Self { + self.opts.follow_root_links = yes; + self + } + /// Set the maximum number of simultaneously open file descriptors used /// by the iterator. /// @@ -830,7 +852,10 @@ impl IntoIter { } else { itry!(self.push(&dent)); } - } else if dent.depth() == 0 && dent.file_type().is_symlink() { + } else if dent.depth() == 0 + && dent.file_type().is_symlink() + && self.opts.follow_root_links + { // As a special case, if we are processing a root entry, then we // always follow it even if it's a symlink and follow_links is // false. We are careful to not let this change the semantics of diff --git a/vendor/walkdir/src/tests/recursive.rs b/vendor/walkdir/src/tests/recursive.rs index 4119f46..e415b91 100644 --- a/vendor/walkdir/src/tests/recursive.rs +++ b/vendor/walkdir/src/tests/recursive.rs @@ -383,7 +383,76 @@ fn sym_root_file_follow() { } #[test] -fn sym_root_dir_nofollow() { +fn broken_sym_root_dir_nofollow_and_root_nofollow() { + let dir = Dir::tmp(); + dir.symlink_dir("broken", "a-link"); + + let wd = WalkDir::new(dir.join("a-link")) + .follow_links(false) + .follow_root_links(false); + let r = dir.run_recursive(wd); + let ents = r.sorted_ents(); + assert_eq!(ents.len(), 1); + let link = &ents[0]; + assert_eq!(dir.join("a-link"), link.path()); + assert!(link.path_is_symlink()); +} + +#[test] +fn broken_sym_root_dir_follow_and_root_nofollow() { + let dir = Dir::tmp(); + dir.symlink_dir("broken", "a-link"); + + let wd = WalkDir::new(dir.join("a-link")) + .follow_links(true) + .follow_root_links(false); + let r = dir.run_recursive(wd); + assert!(r.sorted_ents().is_empty()); + assert_eq!( + r.errs().len(), + 1, + "broken symlink cannot be traversed - they are followed if symlinks are followed" + ); +} + +#[test] +fn broken_sym_root_dir_root_is_always_followed() { + let dir = Dir::tmp(); + dir.symlink_dir("broken", "a-link"); + + for follow_symlinks in &[true, false] { + let wd = + WalkDir::new(dir.join("a-link")).follow_links(*follow_symlinks); + let r = dir.run_recursive(wd); + assert!(r.sorted_ents().is_empty()); + assert_eq!( + r.errs().len(), + 1, + "broken symlink in roots cannot be traversed, they are always followed" + ); + } +} + +#[test] +fn sym_root_dir_nofollow_root_nofollow() { + let dir = Dir::tmp(); + dir.mkdirp("a"); + dir.symlink_dir("a", "a-link"); + dir.touch("a/zzz"); + + let wd = WalkDir::new(dir.join("a-link")).follow_root_links(false); + let r = dir.run_recursive(wd); + r.assert_no_errors(); + + let ents = r.sorted_ents(); + assert_eq!(1, ents.len()); + let link = &ents[0]; + assert_eq!(dir.join("a-link"), link.path()); + assert_eq!(0, link.depth()); +} + +#[test] +fn sym_root_dir_nofollow_root_follow() { let dir = Dir::tmp(); dir.mkdirp("a"); dir.symlink_dir("a", "a-link"); diff --git a/vendor/winapi-util/.cargo-checksum.json b/vendor/winapi-util/.cargo-checksum.json index deb1761..698de40 100644 --- a/vendor/winapi-util/.cargo-checksum.json +++ b/vendor/winapi-util/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"8d5e3ed912b57104e1448a8437c840a5d5b9842e5cbb371429f9e879cb45fc56","LICENSE-MIT":"cb3c929a05e6cbc9de9ab06a4c57eeb60ca8c724bef6c138c87d3a577e27aa14","README.md":"b97062c8af7a615725a19394c8d26a19460a1840979196f0c119a1c1432d15f1","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/console.rs":"41d3d1d7501e9e4f8836d642bf8a848b690112fb707b040cabe7f8f1f0e4a692","src/file.rs":"5e2b5f60de7f8a2eeeafe165701eb0e442a0bafbf6df6e2e0d92fdccafd7a8bf","src/lib.rs":"36aba07c7bd526e341382b6471212ccfef8030f27b73a607f2539b307ccff8d1","src/win.rs":"146b65eae8fe2fe3c567609d7d7be0a983f607fc9d3fcdb5cd7ca6f2cc7bde33"},"package":"70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"} \ No newline at end of file +{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"602f5489f96ad637fc730bc17aeedb1d1dc842f0f8fefb4a6748251a8acf0e46","LICENSE-MIT":"cb3c929a05e6cbc9de9ab06a4c57eeb60ca8c724bef6c138c87d3a577e27aa14","README.md":"16ae300efe881b2c09fd167c4c298b3dafb23783e980eaa925433f324ff1d725","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/console.rs":"431fda2243a16dc1ecc8bdb634b580f492f788cb25e589fb513448eb559b77df","src/file.rs":"35c2610719d4febe1ef630d7ebcff836fad3b0e270aa1e0af443d4c27e375872","src/lib.rs":"c3a72d1581dcc03fdd66d697c34ede87c978ef042645c4babf6348290a3c2126","src/sysinfo.rs":"245fc118dceb155b7bb7696e2d7abcbae19f6790e37e7e41c02b9e9a9db7ec65","src/win.rs":"ee7f0200ed3ddc4a93f25aba0b507dcbce062ed2f82fe505cfae727e4894c287"},"package":"f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"} \ No newline at end of file diff --git a/vendor/winapi-util/Cargo.toml b/vendor/winapi-util/Cargo.toml index fe6933c..b3ac743 100644 --- a/vendor/winapi-util/Cargo.toml +++ b/vendor/winapi-util/Cargo.toml @@ -3,28 +3,49 @@ # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies +# to registry (e.g., crates.io) dependencies. # -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. [package] -edition = "2018" +edition = "2021" name = "winapi-util" -version = "0.1.5" +version = "0.1.6" authors = ["Andrew Gallant <jamslam@gmail.com>"] description = "A dumping ground for high level safe wrappers over winapi." homepage = "https://github.com/BurntSushi/winapi-util" documentation = "https://docs.rs/winapi-util" readme = "README.md" -keywords = ["windows", "winapi", "util", "win"] -categories = ["os::windows-apis", "external-ffi-bindings"] +keywords = [ + "windows", + "winapi", + "util", + "win", +] +categories = [ + "os::windows-apis", + "external-ffi-bindings", +] license = "Unlicense/MIT" repository = "https://github.com/BurntSushi/winapi-util" + [package.metadata.docs.rs] targets = ["x86_64-pc-windows-msvc"] + [target."cfg(windows)".dependencies.winapi] version = "0.3" -features = ["std", "consoleapi", "errhandlingapi", "fileapi", "minwindef", "processenv", "winbase", "wincon", "winerror", "winnt"] +features = [ + "std", + "consoleapi", + "errhandlingapi", + "fileapi", + "minwindef", + "processenv", + "sysinfoapi", + "winbase", + "wincon", + "winerror", + "winnt", +] diff --git a/vendor/winapi-util/README.md b/vendor/winapi-util/README.md index 79e4730..3f9ebb6 100644 --- a/vendor/winapi-util/README.md +++ b/vendor/winapi-util/README.md @@ -16,12 +16,7 @@ https://docs.rs/winapi-util ### Usage -Add this to your `Cargo.toml`: - -```toml -[dependencies] -winapi-util = "0.1" -``` +Run `cargo add winapi-util` to add this dependency to your `Cargo.toml` file. ### Notes @@ -46,7 +41,7 @@ got things right is most appreciated. ### Minimum Rust version policy -This crate's minimum supported `rustc` version is `1.34.0`. +This crate's minimum supported `rustc` version is `1.72.0`. The current policy is that the minimum Rust version required to use this crate can be increased in non-breaking version updates. For example, if `crate 1.0` diff --git a/vendor/winapi-util/src/console.rs b/vendor/winapi-util/src/console.rs index 233d5c9..44af5bd 100644 --- a/vendor/winapi-util/src/console.rs +++ b/vendor/winapi-util/src/console.rs @@ -1,13 +1,16 @@ -use std::io; -use std::mem; - -use winapi::shared::minwindef::WORD; -use winapi::um::consoleapi::{GetConsoleMode, SetConsoleMode}; -use winapi::um::wincon::{ - self, GetConsoleScreenBufferInfo, SetConsoleTextAttribute, - CONSOLE_SCREEN_BUFFER_INFO, FOREGROUND_BLUE as FG_BLUE, - FOREGROUND_GREEN as FG_GREEN, FOREGROUND_INTENSITY as FG_INTENSITY, - FOREGROUND_RED as FG_RED, +use std::{io, mem}; + +use winapi::{ + shared::minwindef::WORD, + um::{ + consoleapi::{GetConsoleMode, SetConsoleMode}, + wincon::{ + self, GetConsoleScreenBufferInfo, SetConsoleTextAttribute, + CONSOLE_SCREEN_BUFFER_INFO, FOREGROUND_BLUE as FG_BLUE, + FOREGROUND_GREEN as FG_GREEN, + FOREGROUND_INTENSITY as FG_INTENSITY, FOREGROUND_RED as FG_RED, + }, + }, }; use crate::{AsHandleRef, HandleRef}; @@ -208,7 +211,7 @@ impl Console { let h = kind.handle(); let info = screen_buffer_info(&h)?; let attr = TextAttributes::from_word(info.attributes()); - Ok(Console { kind: kind, start_attr: attr, cur_attr: attr }) + Ok(Console { kind, start_attr: attr, cur_attr: attr }) } /// Create a new Console to stdout. diff --git a/vendor/winapi-util/src/file.rs b/vendor/winapi-util/src/file.rs index 56a1e41..cdbcca3 100644 --- a/vendor/winapi-util/src/file.rs +++ b/vendor/winapi-util/src/file.rs @@ -1,13 +1,16 @@ -use std::io; -use std::mem; - -use winapi::shared::minwindef::FILETIME; -use winapi::shared::winerror::NO_ERROR; -use winapi::um::errhandlingapi::GetLastError; -use winapi::um::fileapi::{ - GetFileInformationByHandle, GetFileType, BY_HANDLE_FILE_INFORMATION, +use std::{io, mem}; + +use winapi::{ + shared::{minwindef::FILETIME, winerror::NO_ERROR}, + um::{ + errhandlingapi::GetLastError, + fileapi::{ + GetFileInformationByHandle, GetFileType, + BY_HANDLE_FILE_INFORMATION, + }, + winnt, + }, }; -use winapi::um::winnt; use crate::AsHandleRef; diff --git a/vendor/winapi-util/src/lib.rs b/vendor/winapi-util/src/lib.rs index 0bb259d..41fa683 100644 --- a/vendor/winapi-util/src/lib.rs +++ b/vendor/winapi-util/src/lib.rs @@ -29,4 +29,7 @@ pub mod console; #[cfg(windows)] pub mod file; #[cfg(windows)] +/// Safe routines for querying various Windows specific properties. +pub mod sysinfo; +#[cfg(windows)] mod win; diff --git a/vendor/winapi-util/src/sysinfo.rs b/vendor/winapi-util/src/sysinfo.rs new file mode 100644 index 0000000..eb3a564 --- /dev/null +++ b/vendor/winapi-util/src/sysinfo.rs @@ -0,0 +1,153 @@ +use std::{ffi::OsString, io}; + +use winapi::um::sysinfoapi::{GetComputerNameExW, COMPUTER_NAME_FORMAT}; + +/// The type of name to be retrieved by [`get_computer_name`]. +#[derive(Clone, Copy, Debug)] +#[non_exhaustive] +pub enum ComputerNameKind { + /// The name of the DNS domain assigned to the local computer. If the local + /// computer is a node in a cluster, lpBuffer receives the DNS domain name + /// of the cluster virtual server. + DnsDomain, + /// The fully qualified DNS name that uniquely identifies the local + /// computer. This name is a combination of the DNS host name and the DNS + /// domain name, using the form HostName.DomainName. If the local computer + /// is a node in a cluster, lpBuffer receives the fully qualified DNS name + /// of the cluster virtual server. + DnsFullyQualified, + /// The DNS host name of the local computer. If the local computer is a + /// node in a cluster, lpBuffer receives the DNS host name of the cluster + /// virtual server. + DnsHostname, + /// The NetBIOS name of the local computer. If the local computer is a node + /// in a cluster, lpBuffer receives the NetBIOS name of the cluster virtual + /// server. + NetBios, + /// The name of the DNS domain assigned to the local computer. If the local + /// computer is a node in a cluster, lpBuffer receives the DNS domain name + /// of the local computer, not the name of the cluster virtual server. + PhysicalDnsDomain, + /// The fully qualified DNS name that uniquely identifies the computer. If + /// the local computer is a node in a cluster, lpBuffer receives the fully + /// qualified DNS name of the local computer, not the name of the cluster + /// virtual server. + /// + /// The fully qualified DNS name is a combination of the DNS host name and + /// the DNS domain name, using the form HostName.DomainName. + PhysicalDnsFullyQualified, + /// The DNS host name of the local computer. If the local computer is a + /// node in a cluster, lpBuffer receives the DNS host name of the local + /// computer, not the name of the cluster virtual server. + PhysicalDnsHostname, + /// The NetBIOS name of the local computer. If the local computer is a node + /// in a cluster, lpBuffer receives the NetBIOS name of the local computer, + /// not the name of the cluster virtual server. + PhysicalNetBios, +} + +impl ComputerNameKind { + fn to_format(&self) -> COMPUTER_NAME_FORMAT { + use self::ComputerNameKind::*; + use winapi::um::sysinfoapi; + + match *self { + DnsDomain => sysinfoapi::ComputerNameDnsDomain, + DnsFullyQualified => sysinfoapi::ComputerNameDnsFullyQualified, + DnsHostname => sysinfoapi::ComputerNameDnsHostname, + NetBios => sysinfoapi::ComputerNameNetBIOS, + PhysicalDnsDomain => sysinfoapi::ComputerNamePhysicalDnsDomain, + PhysicalDnsFullyQualified => { + sysinfoapi::ComputerNamePhysicalDnsFullyQualified + } + PhysicalDnsHostname => sysinfoapi::ComputerNamePhysicalDnsHostname, + PhysicalNetBios => sysinfoapi::ComputerNamePhysicalNetBIOS, + } + } +} +/// Retrieves a NetBIOS or DNS name associated with the local computer. +/// +/// The names are established at system startup, when the system reads them +/// from the registry. +/// +/// This corresponds to calling [`GetComputerNameExW`]. +/// +/// [`GetComputerNameExW`]: https://learn.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getcomputernameexw +pub fn get_computer_name(kind: ComputerNameKind) -> io::Result<OsString> { + use std::os::windows::ffi::OsStringExt; + + let format = kind.to_format(); + let mut len1 = 0; + // SAFETY: As documented, we call this with a null pointer which will in + // turn cause this routine to write the required buffer size fo `len1`. + // Also, we explicitly ignore the return value since we expect this call to + // fail given that the destination buffer is too small by design. + let _ = + unsafe { GetComputerNameExW(format, std::ptr::null_mut(), &mut len1) }; + + let len = match usize::try_from(len1) { + Ok(len) => len, + Err(_) => { + return Err(io::Error::new( + io::ErrorKind::Other, + "GetComputerNameExW buffer length overflowed usize", + )) + } + }; + let mut buf = vec![0; len]; + let mut len2 = len1; + // SAFETY: We pass a valid pointer to an appropriately sized Vec<u16>. + let rc = + unsafe { GetComputerNameExW(format, buf.as_mut_ptr(), &mut len2) }; + if rc == 0 { + return Err(io::Error::last_os_error()); + } + // Apparently, the subsequent call writes the number of characters written + // to the buffer to `len2` but not including the NUL terminator. Notice + // that in the first call above, the length written to `len1` *does* + // include the NUL terminator. Therefore, we expect `len1` to be at least + // one greater than `len2`. If not, then something weird has happened and + // we report an error. + if len1 <= len2 { + let msg = format!( + "GetComputerNameExW buffer length mismatch, \ + expected length strictly less than {} \ + but got {}", + len1, len2, + ); + return Err(io::Error::new(io::ErrorKind::Other, msg)); + } + let len = usize::try_from(len2).expect("len1 fits implies len2 fits"); + Ok(OsString::from_wide(&buf[..len])) +} + +#[cfg(test)] +mod tests { + use super::*; + + // This test doesn't really check anything other than that we can + // successfully query all kinds of computer names. We just print them out + // since there aren't really any properties about the names that we can + // assert. + // + // We specifically run this test in CI with --nocapture so that we can see + // the output. + #[test] + fn itworks() { + let kinds = [ + ComputerNameKind::DnsDomain, + ComputerNameKind::DnsFullyQualified, + ComputerNameKind::DnsHostname, + ComputerNameKind::NetBios, + ComputerNameKind::PhysicalDnsDomain, + ComputerNameKind::PhysicalDnsFullyQualified, + ComputerNameKind::PhysicalDnsHostname, + ComputerNameKind::PhysicalNetBios, + ]; + for kind in kinds { + let result = get_computer_name(kind); + let name = result.unwrap(); + println!("{kind:?}: {name:?}"); + } + } +} diff --git a/vendor/winapi-util/src/win.rs b/vendor/winapi-util/src/win.rs index 9c77c0d..f64585e 100644 --- a/vendor/winapi-util/src/win.rs +++ b/vendor/winapi-util/src/win.rs @@ -1,10 +1,10 @@ -use std::fs::File; -use std::io; -use std::os::windows::io::{ - AsRawHandle, FromRawHandle, IntoRawHandle, RawHandle, +use std::{ + fs::File, + io, + os::windows::io::{AsRawHandle, FromRawHandle, IntoRawHandle, RawHandle}, + path::Path, + process, }; -use std::path::Path; -use std::process; /// A handle represents an owned and valid Windows handle to a file-like /// object.

{ + #[cfg_attr(feature = "perf-inline", inline(always))] + fn find(&self, haystack: &[u8], span: Span) -> Option { + (&**self).find(haystack, span) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + (&**self).prefix(haystack, span) + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn memory_usage(&self) -> usize { + (&**self).memory_usage() + } + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn is_fast(&self) -> bool { + (&**self).is_fast() + } +} + +/// A type that encapsulates the selection of a prefilter algorithm from a +/// sequence of needles. +/// +/// The existence of this type is a little tricky, because we don't (currently) +/// use it for performing a search. Instead, we really only consume it by +/// converting the underlying prefilter into a trait object, whether that be +/// `dyn PrefilterI` or `dyn Strategy` (for the meta regex engine). In order +/// to avoid re-copying the prefilter selection logic, we isolate it here, and +/// then force anything downstream that wants to convert it to a trait object +/// to do trivial case analysis on it. +/// +/// One wonders whether we *should* use an enum instead of a trait object. +/// At time of writing, I chose trait objects based on instinct because 1) I +/// knew I wasn't going to inline anything and 2) there would potentially be +/// many different choices. However, as of time of writing, I haven't actually +/// compared the trait object approach to the enum approach. That probably +/// should be litigated, but I ran out of steam. +/// +/// Note that if the `alloc` feature is disabled, then values of this type +/// are (and should) never be constructed. Also, in practice, for any of the +/// prefilters to be selected, you'll need at least one of the `perf-literal-*` +/// features enabled. +#[derive(Clone, Debug)] +pub(crate) enum Choice { + Memchr(Memchr), + Memchr2(Memchr2), + Memchr3(Memchr3), + Memmem(Memmem), + Teddy(Teddy), + ByteSet(ByteSet), + AhoCorasick(AhoCorasick), +} + +impl Choice { + /// Select what is believed to be the best prefilter algorithm for the + /// match semantics and sequence of needles given. + /// + /// This selection algorithm uses the needles as given without any + /// modification. For example, if `[bar]` is given, then this doesn't + /// try to select `memchr` for `b`. Instead, it would select `memmem` + /// for `bar`. If callers would want `memchr` selected for `[bar]`, then + /// callers should massages the literals themselves. That is, callers are + /// responsible for heuristics surrounding which sequence of literals is + /// best. + /// + /// What this selection algorithm does is attempt to use the fastest + /// prefilter that works for the literals given. So if `[a, b]`, is given, + /// then `memchr2` is selected. + /// + /// Of course, which prefilter is selected is also subject to what + /// is available. For example, if `alloc` isn't enabled, then + /// that limits which prefilters can be selected. Similarly, if + /// `perf-literal-substring` isn't enabled, then nothing from the `memchr` + /// crate can be returned. + pub(crate) fn new>( + kind: MatchKind, + needles: &[B], + ) -> Option { + // An empty set means the regex matches nothing, so no sense in + // building a prefilter. + if needles.len() == 0 { + debug!("prefilter building failed: found empty set of literals"); + return None; + } + // If the regex can match the empty string, then the prefilter + // will by definition match at every position. This is obviously + // completely ineffective. + if needles.iter().any(|n| n.as_ref().is_empty()) { + debug!("prefilter building failed: literals match empty string"); + return None; + } + // BREADCRUMBS: Perhaps the literal optimizer should special case + // sequences of length two or three if the leading bytes of each are + // "rare"? Or perhaps, if there are two or three total possible leading + // bytes, regardless of the number of literals, and all are rare... + // Then well, perhaps we should use memchr2 or memchr3 in those cases? + if let Some(pre) = Memchr::new(kind, needles) { + debug!("prefilter built: memchr"); + return Some(Choice::Memchr(pre)); + } + if let Some(pre) = Memchr2::new(kind, needles) { + debug!("prefilter built: memchr2"); + return Some(Choice::Memchr2(pre)); + } + if let Some(pre) = Memchr3::new(kind, needles) { + debug!("prefilter built: memchr3"); + return Some(Choice::Memchr3(pre)); + } + if let Some(pre) = Memmem::new(kind, needles) { + debug!("prefilter built: memmem"); + return Some(Choice::Memmem(pre)); + } + if let Some(pre) = Teddy::new(kind, needles) { + debug!("prefilter built: teddy"); + return Some(Choice::Teddy(pre)); + } + if let Some(pre) = ByteSet::new(kind, needles) { + debug!("prefilter built: byteset"); + return Some(Choice::ByteSet(pre)); + } + if let Some(pre) = AhoCorasick::new(kind, needles) { + debug!("prefilter built: aho-corasick"); + return Some(Choice::AhoCorasick(pre)); + } + debug!("prefilter building failed: no strategy could be found"); + None + } +} + +/// Extracts all of the prefix literals from the given HIR expressions into a +/// single `Seq`. The literals in the sequence are ordered with respect to the +/// order of the given HIR expressions and consistent with the match semantics +/// given. +/// +/// The sequence returned is "optimized." That is, they may be shrunk or even +/// truncated according to heuristics with the intent of making them more +/// useful as a prefilter. (Which translates to both using faster algorithms +/// and minimizing the false positive rate.) +/// +/// Note that this erases any connection between the literals and which pattern +/// (or patterns) they came from. +/// +/// The match kind given must correspond to the match semantics of the regex +/// that is represented by the HIRs given. The match semantics may change the +/// literal sequence returned. +#[cfg(feature = "syntax")] +pub(crate) fn prefixes(kind: MatchKind, hirs: &[H]) -> literal::Seq +where + H: core::borrow::Borrow, +{ + let mut extractor = literal::Extractor::new(); + extractor.kind(literal::ExtractKind::Prefix); + + let mut prefixes = literal::Seq::empty(); + for hir in hirs { + prefixes.union(&mut extractor.extract(hir.borrow())); + } + debug!( + "prefixes (len={:?}, exact={:?}) extracted before optimization: {:?}", + prefixes.len(), + prefixes.is_exact(), + prefixes + ); + match kind { + MatchKind::All => { + prefixes.sort(); + prefixes.dedup(); + } + MatchKind::LeftmostFirst => { + prefixes.optimize_for_prefix_by_preference(); + } + } + debug!( + "prefixes (len={:?}, exact={:?}) extracted after optimization: {:?}", + prefixes.len(), + prefixes.is_exact(), + prefixes + ); + prefixes +} + +/// Like `prefixes`, but for all suffixes of all matches for the given HIRs. +#[cfg(feature = "syntax")] +pub(crate) fn suffixes(kind: MatchKind, hirs: &[H]) -> literal::Seq +where + H: core::borrow::Borrow, +{ + let mut extractor = literal::Extractor::new(); + extractor.kind(literal::ExtractKind::Suffix); + + let mut suffixes = literal::Seq::empty(); + for hir in hirs { + suffixes.union(&mut extractor.extract(hir.borrow())); + } + debug!( + "suffixes (len={:?}, exact={:?}) extracted before optimization: {:?}", + suffixes.len(), + suffixes.is_exact(), + suffixes + ); + match kind { + MatchKind::All => { + suffixes.sort(); + suffixes.dedup(); + } + MatchKind::LeftmostFirst => { + suffixes.optimize_for_suffix_by_preference(); + } + } + debug!( + "suffixes (len={:?}, exact={:?}) extracted after optimization: {:?}", + suffixes.len(), + suffixes.is_exact(), + suffixes + ); + suffixes +} diff --git a/vendor/regex-automata/src/util/prefilter/teddy.rs b/vendor/regex-automata/src/util/prefilter/teddy.rs new file mode 100644 index 0000000..fc79f2b --- /dev/null +++ b/vendor/regex-automata/src/util/prefilter/teddy.rs @@ -0,0 +1,160 @@ +use crate::util::{ + prefilter::PrefilterI, + search::{MatchKind, Span}, +}; + +#[derive(Clone, Debug)] +pub(crate) struct Teddy { + #[cfg(not(feature = "perf-literal-multisubstring"))] + _unused: (), + /// The actual Teddy searcher. + /// + /// Technically, it's possible that Teddy doesn't actually get used, since + /// Teddy does require its haystack to at least be of a certain size + /// (usually around the size of whatever vector is being used, so ~16 + /// or ~32 bytes). For haystacks shorter than that, the implementation + /// currently uses Rabin-Karp. + #[cfg(feature = "perf-literal-multisubstring")] + searcher: aho_corasick::packed::Searcher, + /// When running an anchored search, the packed searcher can't handle it so + /// we defer to Aho-Corasick itself. Kind of sad, but changing the packed + /// searchers to support anchored search would be difficult at worst and + /// annoying at best. Since packed searchers only apply to small numbers of + /// literals, we content ourselves that this is not much of an added cost. + /// (That packed searchers only work with a small number of literals is + /// also why we use a DFA here. Otherwise, the memory usage of a DFA would + /// likely be unacceptable.) + #[cfg(feature = "perf-literal-multisubstring")] + anchored_ac: aho_corasick::dfa::DFA, + /// The length of the smallest literal we look for. + /// + /// We use this as a heuristic to figure out whether this will be "fast" or + /// not. Generally, the longer the better, because longer needles are more + /// discriminating and thus reduce false positive rate. + #[cfg(feature = "perf-literal-multisubstring")] + minimum_len: usize, +} + +impl Teddy { + pub(crate) fn new>( + kind: MatchKind, + needles: &[B], + ) -> Option { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + None + } + #[cfg(feature = "perf-literal-multisubstring")] + { + // We only really support leftmost-first semantics. In + // theory we could at least support leftmost-longest, as the + // aho-corasick crate does, but regex-automata doesn't know about + // leftmost-longest currently. + // + // And like the aho-corasick prefilter, if we're using `All` + // semantics, then we can still use leftmost semantics for a + // prefilter. (This might be a suspicious choice for the literal + // engine, which uses a prefilter as a regex engine directly, but + // that only happens when using leftmost-first semantics.) + let (packed_match_kind, ac_match_kind) = match kind { + MatchKind::LeftmostFirst | MatchKind::All => ( + aho_corasick::packed::MatchKind::LeftmostFirst, + aho_corasick::MatchKind::LeftmostFirst, + ), + }; + let minimum_len = + needles.iter().map(|n| n.as_ref().len()).min().unwrap_or(0); + let packed = aho_corasick::packed::Config::new() + .match_kind(packed_match_kind) + .builder() + .extend(needles) + .build()?; + let anchored_ac = aho_corasick::dfa::DFA::builder() + .match_kind(ac_match_kind) + .start_kind(aho_corasick::StartKind::Anchored) + .prefilter(false) + .build(needles) + .ok()?; + Some(Teddy { searcher: packed, anchored_ac, minimum_len }) + } + } +} + +impl PrefilterI for Teddy { + fn find(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + let ac_span = + aho_corasick::Span { start: span.start, end: span.end }; + self.searcher + .find_in(haystack, ac_span) + .map(|m| Span { start: m.start(), end: m.end() }) + } + } + + fn prefix(&self, haystack: &[u8], span: Span) -> Option { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + use aho_corasick::automaton::Automaton; + let input = aho_corasick::Input::new(haystack) + .anchored(aho_corasick::Anchored::Yes) + .span(span.start..span.end); + self.anchored_ac + .try_find(&input) + // OK because we build the DFA with anchored support. + .expect("aho-corasick DFA should never fail") + .map(|m| Span { start: m.start(), end: m.end() }) + } + } + + fn memory_usage(&self) -> usize { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + use aho_corasick::automaton::Automaton; + self.searcher.memory_usage() + self.anchored_ac.memory_usage() + } + } + + fn is_fast(&self) -> bool { + #[cfg(not(feature = "perf-literal-multisubstring"))] + { + unreachable!() + } + #[cfg(feature = "perf-literal-multisubstring")] + { + // Teddy is usually quite fast, but I have seen some cases where + // a large number of literals can overwhelm it and make it not so + // fast. We make an educated but conservative guess at a limit, at + // which point, we're not so comfortable thinking Teddy is "fast." + // + // Well... this used to incorporate a "limit" on the *number* + // of literals, but I have since changed it to a minimum on the + // *smallest* literal. Namely, when there is a very small literal + // (1 or 2 bytes), it is far more likely that it leads to a higher + // false positive rate. (Although, of course, not always. For + // example, 'zq' is likely to have a very low false positive rate.) + // But when we have 3 bytes, we have a really good chance of being + // quite discriminatory and thus fast. + // + // We may still want to add some kind of limit on the number of + // literals here, but keep in mind that Teddy already has its own + // somewhat small limit (64 at time of writing). The main issue + // here is that if 'is_fast' is false, it opens the door for the + // reverse inner optimization to kick in. We really only want to + // resort to the reverse inner optimization if we absolutely must. + self.minimum_len >= 3 + } + } +} diff --git a/vendor/regex-automata/src/util/primitives.rs b/vendor/regex-automata/src/util/primitives.rs new file mode 100644 index 0000000..5c5d187 --- /dev/null +++ b/vendor/regex-automata/src/util/primitives.rs @@ -0,0 +1,776 @@ +/*! +Lower level primitive types that are useful in a variety of circumstances. + +# Overview + +This list represents the principle types in this module and briefly describes +when you might want to use them. + +* [`PatternID`] - A type that represents the identifier of a regex pattern. +This is probably the most widely used type in this module (which is why it's +also re-exported in the crate root). +* [`StateID`] - A type the represents the identifier of a finite automaton +state. This is used for both NFAs and DFAs, with the notable exception of +the hybrid NFA/DFA. (The hybrid NFA/DFA uses a special purpose "lazy" state +identifier.) +* [`SmallIndex`] - The internal representation of both a `PatternID` and a +`StateID`. Its purpose is to serve as a type that can index memory without +being as big as a `usize` on 64-bit targets. The main idea behind this type +is that there are many things in regex engines that will, in practice, never +overflow a 32-bit integer. (For example, like the number of patterns in a regex +or the number of states in an NFA.) Thus, a `SmallIndex` can be used to index +memory without peppering `as` casts everywhere. Moreover, it forces callers +to handle errors in the case where, somehow, the value would otherwise overflow +either a 32-bit integer or a `usize` (e.g., on 16-bit targets). +* [`NonMaxUsize`] - Represents a `usize` that cannot be `usize::MAX`. As a +result, `Option` has the same size in memory as a `usize`. This +useful, for example, when representing the offsets of submatches since it +reduces memory usage by a factor of 2. It is a legal optimization since Rust +guarantees that slices never have a length that exceeds `isize::MAX`. +*/ + +use core::num::NonZeroUsize; + +#[cfg(feature = "alloc")] +use alloc::vec::Vec; + +use crate::util::int::{Usize, U16, U32, U64}; + +/// A `usize` that can never be `usize::MAX`. +/// +/// This is similar to `core::num::NonZeroUsize`, but instead of not permitting +/// a zero value, this does not permit a max value. +/// +/// This is useful in certain contexts where one wants to optimize the memory +/// usage of things that contain match offsets. Namely, since Rust slices +/// are guaranteed to never have a length exceeding `isize::MAX`, we can use +/// `usize::MAX` as a sentinel to indicate that no match was found. Indeed, +/// types like `Option` have exactly the same size in memory as a +/// `usize`. +/// +/// This type is defined to be `repr(transparent)` for +/// `core::num::NonZeroUsize`, which is in turn defined to be +/// `repr(transparent)` for `usize`. +#[derive(Clone, Copy, Eq, Hash, PartialEq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct NonMaxUsize(NonZeroUsize); + +impl NonMaxUsize { + /// Create a new `NonMaxUsize` from the given value. + /// + /// This returns `None` only when the given value is equal to `usize::MAX`. + #[inline] + pub fn new(value: usize) -> Option { + NonZeroUsize::new(value.wrapping_add(1)).map(NonMaxUsize) + } + + /// Return the underlying `usize` value. The returned value is guaranteed + /// to not equal `usize::MAX`. + #[inline] + pub fn get(self) -> usize { + self.0.get().wrapping_sub(1) + } +} + +// We provide our own Debug impl because seeing the internal repr can be quite +// surprising if you aren't expecting it. e.g., 'NonMaxUsize(5)' vs just '5'. +impl core::fmt::Debug for NonMaxUsize { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{:?}", self.get()) + } +} + +/// A type that represents a "small" index. +/// +/// The main idea of this type is to provide something that can index memory, +/// but uses less memory than `usize` on 64-bit systems. Specifically, its +/// representation is always a `u32` and has `repr(transparent)` enabled. (So +/// it is safe to transmute between a `u32` and a `SmallIndex`.) +/// +/// A small index is typically useful in cases where there is no practical way +/// that the index will overflow a 32-bit integer. A good example of this is +/// an NFA state. If you could somehow build an NFA with `2^30` states, its +/// memory usage would be exorbitant and its runtime execution would be so +/// slow as to be completely worthless. Therefore, this crate generally deems +/// it acceptable to return an error if it would otherwise build an NFA that +/// requires a slice longer than what a 32-bit integer can index. In exchange, +/// we can use 32-bit indices instead of 64-bit indices in various places. +/// +/// This type ensures this by providing a constructor that will return an error +/// if its argument cannot fit into the type. This makes it much easier to +/// handle these sorts of boundary cases that are otherwise extremely subtle. +/// +/// On all targets, this type guarantees that its value will fit in a `u32`, +/// `i32`, `usize` and an `isize`. This means that on 16-bit targets, for +/// example, this type's maximum value will never overflow an `isize`, +/// which means it will never overflow a `i16` even though its internal +/// representation is still a `u32`. +/// +/// The purpose for making the type fit into even signed integer types like +/// `isize` is to guarantee that the difference between any two small indices +/// is itself also a small index. This is useful in certain contexts, e.g., +/// for delta encoding. +/// +/// # Other types +/// +/// The following types wrap `SmallIndex` to provide a more focused use case: +/// +/// * [`PatternID`] is for representing the identifiers of patterns. +/// * [`StateID`] is for representing the identifiers of states in finite +/// automata. It is used for both NFAs and DFAs. +/// +/// # Representation +/// +/// This type is always represented internally by a `u32` and is marked as +/// `repr(transparent)`. Thus, this type always has the same representation as +/// a `u32`. It is thus safe to transmute between a `u32` and a `SmallIndex`. +/// +/// # Indexing +/// +/// For convenience, callers may use a `SmallIndex` to index slices. +/// +/// # Safety +/// +/// While a `SmallIndex` is meant to guarantee that its value fits into `usize` +/// without using as much space as a `usize` on all targets, callers must +/// not rely on this property for safety. Callers may choose to rely on this +/// property for correctness however. For example, creating a `SmallIndex` with +/// an invalid value can be done in entirely safe code. This may in turn result +/// in panics or silent logical errors. +#[derive( + Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord, +)] +#[repr(transparent)] +pub struct SmallIndex(u32); + +impl SmallIndex { + /// The maximum index value. + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] + pub const MAX: SmallIndex = + // FIXME: Use as_usize() once const functions in traits are stable. + SmallIndex::new_unchecked(core::i32::MAX as usize - 1); + + /// The maximum index value. + #[cfg(target_pointer_width = "16")] + pub const MAX: SmallIndex = + SmallIndex::new_unchecked(core::isize::MAX - 1); + + /// The total number of values that can be represented as a small index. + pub const LIMIT: usize = SmallIndex::MAX.as_usize() + 1; + + /// The zero index value. + pub const ZERO: SmallIndex = SmallIndex::new_unchecked(0); + + /// The number of bytes that a single small index uses in memory. + pub const SIZE: usize = core::mem::size_of::(); + + /// Create a new small index. + /// + /// If the given index exceeds [`SmallIndex::MAX`], then this returns + /// an error. + #[inline] + pub fn new(index: usize) -> Result { + SmallIndex::try_from(index) + } + + /// Create a new small index without checking whether the given value + /// exceeds [`SmallIndex::MAX`]. + /// + /// Using this routine with an invalid index value will result in + /// unspecified behavior, but *not* undefined behavior. In particular, an + /// invalid index value is likely to cause panics or possibly even silent + /// logical errors. + /// + /// Callers must never rely on a `SmallIndex` to be within a certain range + /// for memory safety. + #[inline] + pub const fn new_unchecked(index: usize) -> SmallIndex { + // FIXME: Use as_u32() once const functions in traits are stable. + SmallIndex(index as u32) + } + + /// Like [`SmallIndex::new`], but panics if the given index is not valid. + #[inline] + pub fn must(index: usize) -> SmallIndex { + SmallIndex::new(index).expect("invalid small index") + } + + /// Return this small index as a `usize`. This is guaranteed to never + /// overflow `usize`. + #[inline] + pub const fn as_usize(&self) -> usize { + // FIXME: Use as_usize() once const functions in traits are stable. + self.0 as usize + } + + /// Return this small index as a `u64`. This is guaranteed to never + /// overflow. + #[inline] + pub const fn as_u64(&self) -> u64 { + // FIXME: Use u64::from() once const functions in traits are stable. + self.0 as u64 + } + + /// Return the internal `u32` of this small index. This is guaranteed to + /// never overflow `u32`. + #[inline] + pub const fn as_u32(&self) -> u32 { + self.0 + } + + /// Return the internal `u32` of this small index represented as an `i32`. + /// This is guaranteed to never overflow an `i32`. + #[inline] + pub const fn as_i32(&self) -> i32 { + // This is OK because we guarantee that our max value is <= i32::MAX. + self.0 as i32 + } + + /// Returns one more than this small index as a usize. + /// + /// Since a small index has constraints on its maximum value, adding `1` to + /// it will always fit in a `usize`, `u32` and a `i32`. + #[inline] + pub fn one_more(&self) -> usize { + self.as_usize() + 1 + } + + /// Decode this small index from the bytes given using the native endian + /// byte order for the current target. + /// + /// If the decoded integer is not representable as a small index for the + /// current target, then this returns an error. + #[inline] + pub fn from_ne_bytes( + bytes: [u8; 4], + ) -> Result { + let id = u32::from_ne_bytes(bytes); + if id > SmallIndex::MAX.as_u32() { + return Err(SmallIndexError { attempted: u64::from(id) }); + } + Ok(SmallIndex::new_unchecked(id.as_usize())) + } + + /// Decode this small index from the bytes given using the native endian + /// byte order for the current target. + /// + /// This is analogous to [`SmallIndex::new_unchecked`] in that is does not + /// check whether the decoded integer is representable as a small index. + #[inline] + pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> SmallIndex { + SmallIndex::new_unchecked(u32::from_ne_bytes(bytes).as_usize()) + } + + /// Return the underlying small index integer as raw bytes in native endian + /// format. + #[inline] + pub fn to_ne_bytes(&self) -> [u8; 4] { + self.0.to_ne_bytes() + } +} + +impl core::ops::Index for [T] { + type Output = T; + + #[inline] + fn index(&self, index: SmallIndex) -> &T { + &self[index.as_usize()] + } +} + +impl core::ops::IndexMut for [T] { + #[inline] + fn index_mut(&mut self, index: SmallIndex) -> &mut T { + &mut self[index.as_usize()] + } +} + +#[cfg(feature = "alloc")] +impl core::ops::Index for Vec { + type Output = T; + + #[inline] + fn index(&self, index: SmallIndex) -> &T { + &self[index.as_usize()] + } +} + +#[cfg(feature = "alloc")] +impl core::ops::IndexMut for Vec { + #[inline] + fn index_mut(&mut self, index: SmallIndex) -> &mut T { + &mut self[index.as_usize()] + } +} + +impl From for SmallIndex { + fn from(index: u8) -> SmallIndex { + SmallIndex::new_unchecked(usize::from(index)) + } +} + +impl TryFrom for SmallIndex { + type Error = SmallIndexError; + + fn try_from(index: u16) -> Result { + if u32::from(index) > SmallIndex::MAX.as_u32() { + return Err(SmallIndexError { attempted: u64::from(index) }); + } + Ok(SmallIndex::new_unchecked(index.as_usize())) + } +} + +impl TryFrom for SmallIndex { + type Error = SmallIndexError; + + fn try_from(index: u32) -> Result { + if index > SmallIndex::MAX.as_u32() { + return Err(SmallIndexError { attempted: u64::from(index) }); + } + Ok(SmallIndex::new_unchecked(index.as_usize())) + } +} + +impl TryFrom for SmallIndex { + type Error = SmallIndexError; + + fn try_from(index: u64) -> Result { + if index > SmallIndex::MAX.as_u64() { + return Err(SmallIndexError { attempted: index }); + } + Ok(SmallIndex::new_unchecked(index.as_usize())) + } +} + +impl TryFrom for SmallIndex { + type Error = SmallIndexError; + + fn try_from(index: usize) -> Result { + if index > SmallIndex::MAX.as_usize() { + return Err(SmallIndexError { attempted: index.as_u64() }); + } + Ok(SmallIndex::new_unchecked(index)) + } +} + +#[cfg(test)] +impl quickcheck::Arbitrary for SmallIndex { + fn arbitrary(gen: &mut quickcheck::Gen) -> SmallIndex { + use core::cmp::max; + + let id = max(i32::MIN + 1, i32::arbitrary(gen)).abs(); + if id > SmallIndex::MAX.as_i32() { + SmallIndex::MAX + } else { + SmallIndex::new(usize::try_from(id).unwrap()).unwrap() + } + } +} + +/// This error occurs when a small index could not be constructed. +/// +/// This occurs when given an integer exceeding the maximum small index value. +/// +/// When the `std` feature is enabled, this implements the `Error` trait. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SmallIndexError { + attempted: u64, +} + +impl SmallIndexError { + /// Returns the value that could not be converted to a small index. + pub fn attempted(&self) -> u64 { + self.attempted + } +} + +#[cfg(feature = "std")] +impl std::error::Error for SmallIndexError {} + +impl core::fmt::Display for SmallIndexError { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "failed to create small index from {:?}, which exceeds {:?}", + self.attempted(), + SmallIndex::MAX, + ) + } +} + +#[derive(Clone, Debug)] +pub(crate) struct SmallIndexIter { + rng: core::ops::Range, +} + +impl Iterator for SmallIndexIter { + type Item = SmallIndex; + + fn next(&mut self) -> Option { + if self.rng.start >= self.rng.end { + return None; + } + let next_id = self.rng.start + 1; + let id = core::mem::replace(&mut self.rng.start, next_id); + // new_unchecked is OK since we asserted that the number of + // elements in this iterator will fit in an ID at construction. + Some(SmallIndex::new_unchecked(id)) + } +} + +macro_rules! index_type_impls { + ($name:ident, $err:ident, $iter:ident, $withiter:ident) => { + impl $name { + /// The maximum value. + pub const MAX: $name = $name(SmallIndex::MAX); + + /// The total number of values that can be represented. + pub const LIMIT: usize = SmallIndex::LIMIT; + + /// The zero value. + pub const ZERO: $name = $name(SmallIndex::ZERO); + + /// The number of bytes that a single value uses in memory. + pub const SIZE: usize = SmallIndex::SIZE; + + /// Create a new value that is represented by a "small index." + /// + /// If the given index exceeds the maximum allowed value, then this + /// returns an error. + #[inline] + pub fn new(value: usize) -> Result<$name, $err> { + SmallIndex::new(value).map($name).map_err($err) + } + + /// Create a new value without checking whether the given argument + /// exceeds the maximum. + /// + /// Using this routine with an invalid value will result in + /// unspecified behavior, but *not* undefined behavior. In + /// particular, an invalid ID value is likely to cause panics or + /// possibly even silent logical errors. + /// + /// Callers must never rely on this type to be within a certain + /// range for memory safety. + #[inline] + pub const fn new_unchecked(value: usize) -> $name { + $name(SmallIndex::new_unchecked(value)) + } + + /// Like `new`, but panics if the given value is not valid. + #[inline] + pub fn must(value: usize) -> $name { + $name::new(value).expect(concat!( + "invalid ", + stringify!($name), + " value" + )) + } + + /// Return the internal value as a `usize`. This is guaranteed to + /// never overflow `usize`. + #[inline] + pub const fn as_usize(&self) -> usize { + self.0.as_usize() + } + + /// Return the internal value as a `u64`. This is guaranteed to + /// never overflow. + #[inline] + pub const fn as_u64(&self) -> u64 { + self.0.as_u64() + } + + /// Return the internal value as a `u32`. This is guaranteed to + /// never overflow `u32`. + #[inline] + pub const fn as_u32(&self) -> u32 { + self.0.as_u32() + } + + /// Return the internal value as a i32`. This is guaranteed to + /// never overflow an `i32`. + #[inline] + pub const fn as_i32(&self) -> i32 { + self.0.as_i32() + } + + /// Returns one more than this value as a usize. + /// + /// Since values represented by a "small index" have constraints + /// on their maximum value, adding `1` to it will always fit in a + /// `usize`, `u32` and a `i32`. + #[inline] + pub fn one_more(&self) -> usize { + self.0.one_more() + } + + /// Decode this value from the bytes given using the native endian + /// byte order for the current target. + /// + /// If the decoded integer is not representable as a small index + /// for the current target, then this returns an error. + #[inline] + pub fn from_ne_bytes(bytes: [u8; 4]) -> Result<$name, $err> { + SmallIndex::from_ne_bytes(bytes).map($name).map_err($err) + } + + /// Decode this value from the bytes given using the native endian + /// byte order for the current target. + /// + /// This is analogous to `new_unchecked` in that is does not check + /// whether the decoded integer is representable as a small index. + #[inline] + pub fn from_ne_bytes_unchecked(bytes: [u8; 4]) -> $name { + $name(SmallIndex::from_ne_bytes_unchecked(bytes)) + } + + /// Return the underlying integer as raw bytes in native endian + /// format. + #[inline] + pub fn to_ne_bytes(&self) -> [u8; 4] { + self.0.to_ne_bytes() + } + + /// Returns an iterator over all values from 0 up to and not + /// including the given length. + /// + /// If the given length exceeds this type's limit, then this + /// panics. + pub(crate) fn iter(len: usize) -> $iter { + $iter::new(len) + } + } + + // We write our own Debug impl so that we get things like PatternID(5) + // instead of PatternID(SmallIndex(5)). + impl core::fmt::Debug for $name { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + f.debug_tuple(stringify!($name)).field(&self.as_u32()).finish() + } + } + + impl core::ops::Index<$name> for [T] { + type Output = T; + + #[inline] + fn index(&self, index: $name) -> &T { + &self[index.as_usize()] + } + } + + impl core::ops::IndexMut<$name> for [T] { + #[inline] + fn index_mut(&mut self, index: $name) -> &mut T { + &mut self[index.as_usize()] + } + } + + #[cfg(feature = "alloc")] + impl core::ops::Index<$name> for Vec { + type Output = T; + + #[inline] + fn index(&self, index: $name) -> &T { + &self[index.as_usize()] + } + } + + #[cfg(feature = "alloc")] + impl core::ops::IndexMut<$name> for Vec { + #[inline] + fn index_mut(&mut self, index: $name) -> &mut T { + &mut self[index.as_usize()] + } + } + + impl From for $name { + fn from(value: u8) -> $name { + $name(SmallIndex::from(value)) + } + } + + impl TryFrom for $name { + type Error = $err; + + fn try_from(value: u16) -> Result<$name, $err> { + SmallIndex::try_from(value).map($name).map_err($err) + } + } + + impl TryFrom for $name { + type Error = $err; + + fn try_from(value: u32) -> Result<$name, $err> { + SmallIndex::try_from(value).map($name).map_err($err) + } + } + + impl TryFrom for $name { + type Error = $err; + + fn try_from(value: u64) -> Result<$name, $err> { + SmallIndex::try_from(value).map($name).map_err($err) + } + } + + impl TryFrom for $name { + type Error = $err; + + fn try_from(value: usize) -> Result<$name, $err> { + SmallIndex::try_from(value).map($name).map_err($err) + } + } + + #[cfg(test)] + impl quickcheck::Arbitrary for $name { + fn arbitrary(gen: &mut quickcheck::Gen) -> $name { + $name(SmallIndex::arbitrary(gen)) + } + } + + /// This error occurs when a value could not be constructed. + /// + /// This occurs when given an integer exceeding the maximum allowed + /// value. + /// + /// When the `std` feature is enabled, this implements the `Error` + /// trait. + #[derive(Clone, Debug, Eq, PartialEq)] + pub struct $err(SmallIndexError); + + impl $err { + /// Returns the value that could not be converted to an ID. + pub fn attempted(&self) -> u64 { + self.0.attempted() + } + } + + #[cfg(feature = "std")] + impl std::error::Error for $err {} + + impl core::fmt::Display for $err { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "failed to create {} from {:?}, which exceeds {:?}", + stringify!($name), + self.attempted(), + $name::MAX, + ) + } + } + + #[derive(Clone, Debug)] + pub(crate) struct $iter(SmallIndexIter); + + impl $iter { + fn new(len: usize) -> $iter { + assert!( + len <= $name::LIMIT, + "cannot create iterator for {} when number of \ + elements exceed {:?}", + stringify!($name), + $name::LIMIT, + ); + $iter(SmallIndexIter { rng: 0..len }) + } + } + + impl Iterator for $iter { + type Item = $name; + + fn next(&mut self) -> Option<$name> { + self.0.next().map($name) + } + } + + /// An iterator adapter that is like std::iter::Enumerate, but attaches + /// small index values instead. It requires `ExactSizeIterator`. At + /// construction, it ensures that the index of each element in the + /// iterator is representable in the corresponding small index type. + #[derive(Clone, Debug)] + pub(crate) struct $withiter { + it: I, + ids: $iter, + } + + impl $withiter { + fn new(it: I) -> $withiter { + let ids = $name::iter(it.len()); + $withiter { it, ids } + } + } + + impl Iterator for $withiter { + type Item = ($name, I::Item); + + fn next(&mut self) -> Option<($name, I::Item)> { + let item = self.it.next()?; + // Number of elements in this iterator must match, according + // to contract of ExactSizeIterator. + let id = self.ids.next().unwrap(); + Some((id, item)) + } + } + }; +} + +/// The identifier of a regex pattern, represented by a [`SmallIndex`]. +/// +/// The identifier for a pattern corresponds to its relative position among +/// other patterns in a single finite state machine. Namely, when building +/// a multi-pattern regex engine, one must supply a sequence of patterns to +/// match. The position (starting at 0) of each pattern in that sequence +/// represents its identifier. This identifier is in turn used to identify and +/// report matches of that pattern in various APIs. +/// +/// See the [`SmallIndex`] type for more information about what it means for +/// a pattern ID to be a "small index." +/// +/// Note that this type is defined in the +/// [`util::primitives`](crate::util::primitives) module, but it is also +/// re-exported at the crate root due to how common it is. +#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct PatternID(SmallIndex); + +/// The identifier of a finite automaton state, represented by a +/// [`SmallIndex`]. +/// +/// Most regex engines in this crate are built on top of finite automata. Each +/// state in a finite automaton defines transitions from its state to another. +/// Those transitions point to other states via their identifiers, i.e., a +/// `StateID`. Since finite automata tend to contain many transitions, it is +/// much more memory efficient to define state IDs as small indices. +/// +/// See the [`SmallIndex`] type for more information about what it means for +/// a state ID to be a "small index." +#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct StateID(SmallIndex); + +index_type_impls!(PatternID, PatternIDError, PatternIDIter, WithPatternIDIter); +index_type_impls!(StateID, StateIDError, StateIDIter, WithStateIDIter); + +/// A utility trait that defines a couple of adapters for making it convenient +/// to access indices as "small index" types. We require ExactSizeIterator so +/// that iterator construction can do a single check to make sure the index of +/// each element is representable by its small index type. +pub(crate) trait IteratorIndexExt: Iterator { + fn with_pattern_ids(self) -> WithPatternIDIter + where + Self: Sized + ExactSizeIterator, + { + WithPatternIDIter::new(self) + } + + fn with_state_ids(self) -> WithStateIDIter + where + Self: Sized + ExactSizeIterator, + { + WithStateIDIter::new(self) + } +} + +impl IteratorIndexExt for I {} diff --git a/vendor/regex-automata/src/util/search.rs b/vendor/regex-automata/src/util/search.rs new file mode 100644 index 0000000..05b1cff --- /dev/null +++ b/vendor/regex-automata/src/util/search.rs @@ -0,0 +1,1993 @@ +/*! +Types and routines that support the search APIs of most regex engines. + +This sub-module isn't exposed directly, but rather, its contents are exported +at the crate root due to the universality of most of the types and routines in +this module. +*/ + +use core::ops::{Range, RangeBounds}; + +use crate::util::{escape::DebugByte, primitives::PatternID, utf8}; + +/// The parameters for a regex search including the haystack to search. +/// +/// It turns out that regex searches have a few parameters, and in most cases, +/// those parameters have defaults that work in the vast majority of cases. +/// This `Input` type exists to make that common case seamnless while also +/// providing an avenue for changing the parameters of a search. In particular, +/// this type enables doing so without a combinatorial explosion of different +/// methods and/or superfluous parameters in the common cases. +/// +/// An `Input` permits configuring the following things: +/// +/// * Search only a substring of a haystack, while taking the broader context +/// into account for resolving look-around assertions. +/// * Indicating whether to search for all patterns in a regex, or to +/// only search for one pattern in particular. +/// * Whether to perform an anchored on unanchored search. +/// * Whether to report a match as early as possible. +/// +/// All of these parameters, except for the haystack, have sensible default +/// values. This means that the minimal search configuration is simply a call +/// to [`Input::new`] with your haystack. Setting any other parameter is +/// optional. +/// +/// Moreover, for any `H` that implements `AsRef<[u8]>`, there exists a +/// `From for Input` implementation. This is useful because many of the +/// search APIs in this crate accept an `Into`. This means you can +/// provide string or byte strings to these routines directly, and they'll +/// automatically get converted into an `Input` for you. +/// +/// The lifetime parameter `'h` refers to the lifetime of the haystack. +/// +/// # Organization +/// +/// The API of `Input` is split into a few different parts: +/// +/// * A builder-like API that transforms a `Input` by value. Examples: +/// [`Input::span`] and [`Input::anchored`]. +/// * A setter API that permits mutating parameters in place. Examples: +/// [`Input::set_span`] and [`Input::set_anchored`]. +/// * A getter API that permits retrieving any of the search parameters. +/// Examples: [`Input::get_span`] and [`Input::get_anchored`]. +/// * A few convenience getter routines that don't conform to the above naming +/// pattern due to how common they are. Examples: [`Input::haystack`], +/// [`Input::start`] and [`Input::end`]. +/// * Miscellaneous predicates and other helper routines that are useful +/// in some contexts. Examples: [`Input::is_char_boundary`]. +/// +/// A `Input` exposes so much because it is meant to be used by both callers of +/// regex engines _and_ implementors of regex engines. A constraining factor is +/// that regex engines should accept a `&Input` as its lowest level API, which +/// means that implementors should only use the "getter" APIs of a `Input`. +/// +/// # Valid bounds and search termination +/// +/// An `Input` permits setting the bounds of a search via either +/// [`Input::span`] or [`Input::range`]. The bounds set must be valid, or +/// else a panic will occur. Bounds are valid if and only if: +/// +/// * The bounds represent a valid range into the input's haystack. +/// * **or** the end bound is a valid ending bound for the haystack *and* +/// the start bound is exactly one greater than the start bound. +/// +/// In the latter case, [`Input::is_done`] will return true and indicates any +/// search receiving such an input should immediately return with no match. +/// +/// Note that while `Input` is used for reverse searches in this crate, the +/// `Input::is_done` predicate assumes a forward search. Because unsigned +/// offsets are used internally, there is no way to tell from only the offsets +/// whether a reverse search is done or not. +/// +/// # Regex engine support +/// +/// Any regex engine accepting an `Input` must support at least the following +/// things: +/// +/// * Searching a `&[u8]` for matches. +/// * Searching a substring of `&[u8]` for a match, such that any match +/// reported must appear entirely within that substring. +/// * For a forwards search, a match should never be reported when +/// [`Input::is_done`] returns true. (For reverse searches, termination should +/// be handled outside of `Input`.) +/// +/// Supporting other aspects of an `Input` are optional, but regex engines +/// should handle aspects they don't support gracefully. How this is done is +/// generally up to the regex engine. This crate generally treats unsupported +/// anchored modes as an error to report for example, but for simplicity, in +/// the meta regex engine, trying to search with an invalid pattern ID just +/// results in no match being reported. +#[derive(Clone)] +pub struct Input<'h> { + haystack: &'h [u8], + span: Span, + anchored: Anchored, + earliest: bool, +} + +impl<'h> Input<'h> { + /// Create a new search configuration for the given haystack. + #[inline] + pub fn new>(haystack: &'h H) -> Input<'h> { + // Perform only one call to `haystack.as_ref()` to protect from incorrect + // implementations that return different values from multiple calls. + // This is important because there's code that relies on `span` not being + // out of bounds with respect to the stored `haystack`. + let haystack = haystack.as_ref(); + Input { + haystack, + span: Span { start: 0, end: haystack.len() }, + anchored: Anchored::No, + earliest: false, + } + } + + /// Set the span for this search. + /// + /// This routine does not panic if the span given is not a valid range for + /// this search's haystack. If this search is run with an invalid range, + /// then the most likely outcome is that the actual search execution will + /// panic. + /// + /// This routine is generic over how a span is provided. While + /// a [`Span`] may be given directly, one may also provide a + /// `std::ops::Range`. To provide anything supported by range + /// syntax, use the [`Input::range`] method. + /// + /// The default span is the entire haystack. + /// + /// Note that [`Input::range`] overrides this method and vice versa. + /// + /// # Panics + /// + /// This panics if the given span does not correspond to valid bounds in + /// the haystack or the termination of a search. + /// + /// # Example + /// + /// This example shows how the span of the search can impact whether a + /// match is reported or not. This is particularly relevant for look-around + /// operators, which might take things outside of the span into account + /// when determining whether they match. + /// + /// ``` + /// # if cfg!(miri) { return Ok(()); } // miri takes too long + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// Match, Input, + /// }; + /// + /// // Look for 'at', but as a distinct word. + /// let re = PikeVM::new(r"\bat\b")?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// // Our haystack contains 'at', but not as a distinct word. + /// let haystack = "batter"; + /// + /// // A standard search finds nothing, as expected. + /// let input = Input::new(haystack); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(None, caps.get_match()); + /// + /// // But if we wanted to search starting at position '1', we might + /// // slice the haystack. If we do this, it's impossible for the \b + /// // anchors to take the surrounding context into account! And thus, + /// // a match is produced. + /// let input = Input::new(&haystack[1..3]); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 0..2)), caps.get_match()); + /// + /// // But if we specify the span of the search instead of slicing the + /// // haystack, then the regex engine can "see" outside of the span + /// // and resolve the anchors correctly. + /// let input = Input::new(haystack).span(1..3); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(None, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + /// + /// This may seem a little ham-fisted, but this scenario tends to come up + /// if some other regex engine found the match span and now you need to + /// re-process that span to look for capturing groups. (e.g., Run a faster + /// DFA first, find a match, then run the PikeVM on just the match span to + /// resolve capturing groups.) In order to implement that sort of logic + /// correctly, you need to set the span on the search instead of slicing + /// the haystack directly. + /// + /// The other advantage of using this routine to specify the bounds of the + /// search is that the match offsets are still reported in terms of the + /// original haystack. For example, the second search in the example above + /// reported a match at position `0`, even though `at` starts at offset + /// `1` because we sliced the haystack. + #[inline] + pub fn span>(mut self, span: S) -> Input<'h> { + self.set_span(span); + self + } + + /// Like `Input::span`, but accepts any range instead. + /// + /// This routine does not panic if the range given is not a valid range for + /// this search's haystack. If this search is run with an invalid range, + /// then the most likely outcome is that the actual search execution will + /// panic. + /// + /// The default range is the entire haystack. + /// + /// Note that [`Input::span`] overrides this method and vice versa. + /// + /// # Panics + /// + /// This routine will panic if the given range could not be converted + /// to a valid [`Range`]. For example, this would panic when given + /// `0..=usize::MAX` since it cannot be represented using a half-open + /// interval in terms of `usize`. + /// + /// This also panics if the given range does not correspond to valid bounds + /// in the haystack or the termination of a search. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// + /// let input = Input::new("foobar").range(2..=4); + /// assert_eq!(2..5, input.get_range()); + /// ``` + #[inline] + pub fn range>(mut self, range: R) -> Input<'h> { + self.set_range(range); + self + } + + /// Sets the anchor mode of a search. + /// + /// When a search is anchored (so that's [`Anchored::Yes`] or + /// [`Anchored::Pattern`]), a match must begin at the start of a search. + /// When a search is not anchored (that's [`Anchored::No`]), regex engines + /// will behave as if the pattern started with a `(?s-u:.)*?`. This prefix + /// permits a match to appear anywhere. + /// + /// By default, the anchored mode is [`Anchored::No`]. + /// + /// **WARNING:** this is subtly different than using a `^` at the start of + /// your regex. A `^` forces a regex to match exclusively at the start of + /// a haystack, regardless of where you begin your search. In contrast, + /// anchoring a search will allow your regex to match anywhere in your + /// haystack, but the match must start at the beginning of a search. + /// + /// For example, consider the haystack `aba` and the following searches: + /// + /// 1. The regex `^a` is compiled with `Anchored::No` and searches `aba` + /// starting at position `2`. Since `^` requires the match to start at + /// the beginning of the haystack and `2 > 0`, no match is found. + /// 2. The regex `a` is compiled with `Anchored::Yes` and searches `aba` + /// starting at position `2`. This reports a match at `[2, 3]` since + /// the match starts where the search started. Since there is no `^`, + /// there is no requirement for the match to start at the beginning of + /// the haystack. + /// 3. The regex `a` is compiled with `Anchored::Yes` and searches `aba` + /// starting at position `1`. Since `b` corresponds to position `1` and + /// since the search is anchored, it finds no match. While the regex + /// matches at other positions, configuring the search to be anchored + /// requires that it only report a match that begins at the same offset + /// as the beginning of the search. + /// 4. The regex `a` is compiled with `Anchored::No` and searches `aba` + /// starting at position `1`. Since the search is not anchored and + /// the regex does not start with `^`, the search executes as if there + /// is a `(?s:.)*?` prefix that permits it to match anywhere. Thus, it + /// reports a match at `[2, 3]`. + /// + /// Note that the [`Anchored::Pattern`] mode is like `Anchored::Yes`, + /// except it only reports matches for a particular pattern. + /// + /// # Example + /// + /// This demonstrates the differences between an anchored search and + /// a pattern that begins with `^` (as described in the above warning + /// message). + /// + /// ``` + /// use regex_automata::{ + /// nfa::thompson::pikevm::PikeVM, + /// Anchored, Match, Input, + /// }; + /// + /// let haystack = "aba"; + /// + /// let re = PikeVM::new(r"^a")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let input = Input::new(haystack).span(2..3).anchored(Anchored::No); + /// re.search(&mut cache, &input, &mut caps); + /// // No match is found because 2 is not the beginning of the haystack, + /// // which is what ^ requires. + /// assert_eq!(None, caps.get_match()); + /// + /// let re = PikeVM::new(r"a")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let input = Input::new(haystack).span(2..3).anchored(Anchored::Yes); + /// re.search(&mut cache, &input, &mut caps); + /// // An anchored search can still match anywhere in the haystack, it just + /// // must begin at the start of the search which is '2' in this case. + /// assert_eq!(Some(Match::must(0, 2..3)), caps.get_match()); + /// + /// let re = PikeVM::new(r"a")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let input = Input::new(haystack).span(1..3).anchored(Anchored::Yes); + /// re.search(&mut cache, &input, &mut caps); + /// // No match is found since we start searching at offset 1 which + /// // corresponds to 'b'. Since there is no '(?s:.)*?' prefix, no match + /// // is found. + /// assert_eq!(None, caps.get_match()); + /// + /// let re = PikeVM::new(r"a")?; + /// let (mut cache, mut caps) = (re.create_cache(), re.create_captures()); + /// let input = Input::new(haystack).span(1..3).anchored(Anchored::No); + /// re.search(&mut cache, &input, &mut caps); + /// // Since anchored=no, an implicit '(?s:.)*?' prefix was added to the + /// // pattern. Even though the search starts at 'b', the 'match anything' + /// // prefix allows the search to match 'a'. + /// let expected = Some(Match::must(0, 2..3)); + /// assert_eq!(expected, caps.get_match()); + /// + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn anchored(mut self, mode: Anchored) -> Input<'h> { + self.set_anchored(mode); + self + } + + /// Whether to execute an "earliest" search or not. + /// + /// When running a non-overlapping search, an "earliest" search will return + /// the match location as early as possible. For example, given a pattern + /// of `foo[0-9]+` and a haystack of `foo12345`, a normal leftmost search + /// will return `foo12345` as a match. But an "earliest" search for regex + /// engines that support "earliest" semantics will return `foo1` as a + /// match, since as soon as the first digit following `foo` is seen, it is + /// known to have found a match. + /// + /// Note that "earliest" semantics generally depend on the regex engine. + /// Different regex engines may determine there is a match at different + /// points. So there is no guarantee that "earliest" matches will always + /// return the same offsets for all regex engines. The "earliest" notion + /// is really about when the particular regex engine determines there is + /// a match rather than a consistent semantic unto itself. This is often + /// useful for implementing "did a match occur or not" predicates, but + /// sometimes the offset is useful as well. + /// + /// This is disabled by default. + /// + /// # Example + /// + /// This example shows the difference between "earliest" searching and + /// normal searching. + /// + /// ``` + /// use regex_automata::{nfa::thompson::pikevm::PikeVM, Match, Input}; + /// + /// let re = PikeVM::new(r"foo[0-9]+")?; + /// let mut cache = re.create_cache(); + /// let mut caps = re.create_captures(); + /// + /// // A normal search implements greediness like you expect. + /// let input = Input::new("foo12345"); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 0..8)), caps.get_match()); + /// + /// // When 'earliest' is enabled and the regex engine supports + /// // it, the search will bail once it knows a match has been + /// // found. + /// let input = Input::new("foo12345").earliest(true); + /// re.search(&mut cache, &input, &mut caps); + /// assert_eq!(Some(Match::must(0, 0..4)), caps.get_match()); + /// # Ok::<(), Box>(()) + /// ``` + #[inline] + pub fn earliest(mut self, yes: bool) -> Input<'h> { + self.set_earliest(yes); + self + } + + /// Set the span for this search configuration. + /// + /// This is like the [`Input::span`] method, except this mutates the + /// span in place. + /// + /// This routine is generic over how a span is provided. While + /// a [`Span`] may be given directly, one may also provide a + /// `std::ops::Range`. + /// + /// # Panics + /// + /// This panics if the given span does not correspond to valid bounds in + /// the haystack or the termination of a search. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// input.set_span(2..4); + /// assert_eq!(2..4, input.get_range()); + /// ``` + #[inline] + pub fn set_span>(&mut self, span: S) { + let span = span.into(); + assert!( + span.end <= self.haystack.len() + && span.start <= span.end.wrapping_add(1), + "invalid span {:?} for haystack of length {}", + span, + self.haystack.len(), + ); + self.span = span; + } + + /// Set the span for this search configuration given any range. + /// + /// This is like the [`Input::range`] method, except this mutates the + /// span in place. + /// + /// This routine does not panic if the range given is not a valid range for + /// this search's haystack. If this search is run with an invalid range, + /// then the most likely outcome is that the actual search execution will + /// panic. + /// + /// # Panics + /// + /// This routine will panic if the given range could not be converted + /// to a valid [`Range`]. For example, this would panic when given + /// `0..=usize::MAX` since it cannot be represented using a half-open + /// interval in terms of `usize`. + /// + /// This also panics if the given span does not correspond to valid bounds + /// in the haystack or the termination of a search. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// input.set_range(2..=4); + /// assert_eq!(2..5, input.get_range()); + /// ``` + #[inline] + pub fn set_range>(&mut self, range: R) { + use core::ops::Bound; + + // It's a little weird to convert ranges into spans, and then spans + // back into ranges when we actually slice the haystack. Because + // of that process, we always represent everything as a half-open + // internal. Therefore, handling things like m..=n is a little awkward. + let start = match range.start_bound() { + Bound::Included(&i) => i, + // Can this case ever happen? Range syntax doesn't support it... + Bound::Excluded(&i) => i.checked_add(1).unwrap(), + Bound::Unbounded => 0, + }; + let end = match range.end_bound() { + Bound::Included(&i) => i.checked_add(1).unwrap(), + Bound::Excluded(&i) => i, + Bound::Unbounded => self.haystack().len(), + }; + self.set_span(Span { start, end }); + } + + /// Set the starting offset for the span for this search configuration. + /// + /// This is a convenience routine for only mutating the start of a span + /// without having to set the entire span. + /// + /// # Panics + /// + /// This panics if the span resulting from the new start position does not + /// correspond to valid bounds in the haystack or the termination of a + /// search. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// input.set_start(5); + /// assert_eq!(5..6, input.get_range()); + /// ``` + #[inline] + pub fn set_start(&mut self, start: usize) { + self.set_span(Span { start, ..self.get_span() }); + } + + /// Set the ending offset for the span for this search configuration. + /// + /// This is a convenience routine for only mutating the end of a span + /// without having to set the entire span. + /// + /// # Panics + /// + /// This panics if the span resulting from the new end position does not + /// correspond to valid bounds in the haystack or the termination of a + /// search. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// input.set_end(5); + /// assert_eq!(0..5, input.get_range()); + /// ``` + #[inline] + pub fn set_end(&mut self, end: usize) { + self.set_span(Span { end, ..self.get_span() }); + } + + /// Set the anchor mode of a search. + /// + /// This is like [`Input::anchored`], except it mutates the search + /// configuration in place. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Anchored, Input, PatternID}; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(Anchored::No, input.get_anchored()); + /// + /// let pid = PatternID::must(5); + /// input.set_anchored(Anchored::Pattern(pid)); + /// assert_eq!(Anchored::Pattern(pid), input.get_anchored()); + /// ``` + #[inline] + pub fn set_anchored(&mut self, mode: Anchored) { + self.anchored = mode; + } + + /// Set whether the search should execute in "earliest" mode or not. + /// + /// This is like [`Input::earliest`], except it mutates the search + /// configuration in place. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert!(!input.get_earliest()); + /// input.set_earliest(true); + /// assert!(input.get_earliest()); + /// ``` + #[inline] + pub fn set_earliest(&mut self, yes: bool) { + self.earliest = yes; + } + + /// Return a borrow of the underlying haystack as a slice of bytes. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(b"foobar", input.haystack()); + /// ``` + #[inline] + pub fn haystack(&self) -> &[u8] { + self.haystack + } + + /// Return the start position of this search. + /// + /// This is a convenience routine for `search.get_span().start()`. + /// + /// When [`Input::is_done`] is `false`, this is guaranteed to return + /// an offset that is less than or equal to [`Input::end`]. Otherwise, + /// the offset is one greater than [`Input::end`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(0, input.start()); + /// + /// let input = Input::new("foobar").span(2..4); + /// assert_eq!(2, input.start()); + /// ``` + #[inline] + pub fn start(&self) -> usize { + self.get_span().start + } + + /// Return the end position of this search. + /// + /// This is a convenience routine for `search.get_span().end()`. + /// + /// This is guaranteed to return an offset that is a valid exclusive end + /// bound for this input's haystack. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(6, input.end()); + /// + /// let input = Input::new("foobar").span(2..4); + /// assert_eq!(4, input.end()); + /// ``` + #[inline] + pub fn end(&self) -> usize { + self.get_span().end + } + + /// Return the span for this search configuration. + /// + /// If one was not explicitly set, then the span corresponds to the entire + /// range of the haystack. + /// + /// When [`Input::is_done`] is `false`, the span returned is guaranteed + /// to correspond to valid bounds for this input's haystack. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Input, Span}; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(Span { start: 0, end: 6 }, input.get_span()); + /// ``` + #[inline] + pub fn get_span(&self) -> Span { + self.span + } + + /// Return the span as a range for this search configuration. + /// + /// If one was not explicitly set, then the span corresponds to the entire + /// range of the haystack. + /// + /// When [`Input::is_done`] is `false`, the range returned is guaranteed + /// to correspond to valid bounds for this input's haystack. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let input = Input::new("foobar"); + /// assert_eq!(0..6, input.get_range()); + /// ``` + #[inline] + pub fn get_range(&self) -> Range { + self.get_span().range() + } + + /// Return the anchored mode for this search configuration. + /// + /// If no anchored mode was set, then it defaults to [`Anchored::No`]. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Anchored, Input, PatternID}; + /// + /// let mut input = Input::new("foobar"); + /// assert_eq!(Anchored::No, input.get_anchored()); + /// + /// let pid = PatternID::must(5); + /// input.set_anchored(Anchored::Pattern(pid)); + /// assert_eq!(Anchored::Pattern(pid), input.get_anchored()); + /// ``` + #[inline] + pub fn get_anchored(&self) -> Anchored { + self.anchored + } + + /// Return whether this search should execute in "earliest" mode. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let input = Input::new("foobar"); + /// assert!(!input.get_earliest()); + /// ``` + #[inline] + pub fn get_earliest(&self) -> bool { + self.earliest + } + + /// Return true if and only if this search can never return any other + /// matches. + /// + /// This occurs when the start position of this search is greater than the + /// end position of the search. + /// + /// # Example + /// + /// ``` + /// use regex_automata::Input; + /// + /// let mut input = Input::new("foobar"); + /// assert!(!input.is_done()); + /// input.set_start(6); + /// assert!(!input.is_done()); + /// input.set_start(7); + /// assert!(input.is_done()); + /// ``` + #[inline] + pub fn is_done(&self) -> bool { + self.get_span().start > self.get_span().end + } + + /// Returns true if and only if the given offset in this search's haystack + /// falls on a valid UTF-8 encoded codepoint boundary. + /// + /// If the haystack is not valid UTF-8, then the behavior of this routine + /// is unspecified. + /// + /// # Example + /// + /// This shows where codepoint boundaries do and don't exist in valid + /// UTF-8. + /// + /// ``` + /// use regex_automata::Input; + /// + /// let input = Input::new("☃"); + /// assert!(input.is_char_boundary(0)); + /// assert!(!input.is_char_boundary(1)); + /// assert!(!input.is_char_boundary(2)); + /// assert!(input.is_char_boundary(3)); + /// assert!(!input.is_char_boundary(4)); + /// ``` + #[inline] + pub fn is_char_boundary(&self, offset: usize) -> bool { + utf8::is_boundary(self.haystack(), offset) + } +} + +impl<'h> core::fmt::Debug for Input<'h> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + use crate::util::escape::DebugHaystack; + + f.debug_struct("Input") + .field("haystack", &DebugHaystack(self.haystack())) + .field("span", &self.span) + .field("anchored", &self.anchored) + .field("earliest", &self.earliest) + .finish() + } +} + +impl<'h, H: ?Sized + AsRef<[u8]>> From<&'h H> for Input<'h> { + fn from(haystack: &'h H) -> Input<'h> { + Input::new(haystack) + } +} + +/// A representation of a span reported by a regex engine. +/// +/// A span corresponds to the starting and ending _byte offsets_ of a +/// contiguous region of bytes. The starting offset is inclusive while the +/// ending offset is exclusive. That is, a span is a half-open interval. +/// +/// A span is used to report the offsets of a match, but it is also used to +/// convey which region of a haystack should be searched via routines like +/// [`Input::span`]. +/// +/// This is basically equivalent to a `std::ops::Range`, except this +/// type implements `Copy` which makes it more ergonomic to use in the context +/// of this crate. Like a range, this implements `Index` for `[u8]` and `str`, +/// and `IndexMut` for `[u8]`. For convenience, this also impls `From`, +/// which means things like `Span::from(5..10)` work. +#[derive(Clone, Copy, Eq, Hash, PartialEq)] +pub struct Span { + /// The start offset of the span, inclusive. + pub start: usize, + /// The end offset of the span, exclusive. + pub end: usize, +} + +impl Span { + /// Returns this span as a range. + #[inline] + pub fn range(&self) -> Range { + Range::from(*self) + } + + /// Returns true when this span is empty. That is, when `start >= end`. + #[inline] + pub fn is_empty(&self) -> bool { + self.start >= self.end + } + + /// Returns the length of this span. + /// + /// This returns `0` in precisely the cases that `is_empty` returns `true`. + #[inline] + pub fn len(&self) -> usize { + self.end.saturating_sub(self.start) + } + + /// Returns true when the given offset is contained within this span. + /// + /// Note that an empty span contains no offsets and will always return + /// false. + #[inline] + pub fn contains(&self, offset: usize) -> bool { + !self.is_empty() && self.start <= offset && offset <= self.end + } + + /// Returns a new span with `offset` added to this span's `start` and `end` + /// values. + #[inline] + pub fn offset(&self, offset: usize) -> Span { + Span { start: self.start + offset, end: self.end + offset } + } +} + +impl core::fmt::Debug for Span { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{}..{}", self.start, self.end) + } +} + +impl core::ops::Index for [u8] { + type Output = [u8]; + + #[inline] + fn index(&self, index: Span) -> &[u8] { + &self[index.range()] + } +} + +impl core::ops::IndexMut for [u8] { + #[inline] + fn index_mut(&mut self, index: Span) -> &mut [u8] { + &mut self[index.range()] + } +} + +impl core::ops::Index for str { + type Output = str; + + #[inline] + fn index(&self, index: Span) -> &str { + &self[index.range()] + } +} + +impl From> for Span { + #[inline] + fn from(range: Range) -> Span { + Span { start: range.start, end: range.end } + } +} + +impl From for Range { + #[inline] + fn from(span: Span) -> Range { + Range { start: span.start, end: span.end } + } +} + +impl PartialEq> for Span { + #[inline] + fn eq(&self, range: &Range) -> bool { + self.start == range.start && self.end == range.end + } +} + +impl PartialEq for Range { + #[inline] + fn eq(&self, span: &Span) -> bool { + self.start == span.start && self.end == span.end + } +} + +/// A representation of "half" of a match reported by a DFA. +/// +/// This is called a "half" match because it only includes the end location (or +/// start location for a reverse search) of a match. This corresponds to the +/// information that a single DFA scan can report. Getting the other half of +/// the match requires a second scan with a reversed DFA. +/// +/// A half match also includes the pattern that matched. The pattern is +/// identified by an ID, which corresponds to its position (starting from `0`) +/// relative to other patterns used to construct the corresponding DFA. If only +/// a single pattern is provided to the DFA, then all matches are guaranteed to +/// have a pattern ID of `0`. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct HalfMatch { + /// The pattern ID. + pattern: PatternID, + /// The offset of the match. + /// + /// For forward searches, the offset is exclusive. For reverse searches, + /// the offset is inclusive. + offset: usize, +} + +impl HalfMatch { + /// Create a new half match from a pattern ID and a byte offset. + #[inline] + pub fn new(pattern: PatternID, offset: usize) -> HalfMatch { + HalfMatch { pattern, offset } + } + + /// Create a new half match from a pattern ID and a byte offset. + /// + /// This is like [`HalfMatch::new`], but accepts a `usize` instead of a + /// [`PatternID`]. This panics if the given `usize` is not representable + /// as a `PatternID`. + #[inline] + pub fn must(pattern: usize, offset: usize) -> HalfMatch { + HalfMatch::new(PatternID::new(pattern).unwrap(), offset) + } + + /// Returns the ID of the pattern that matched. + /// + /// The ID of a pattern is derived from the position in which it was + /// originally inserted into the corresponding DFA. The first pattern has + /// identifier `0`, and each subsequent pattern is `1`, `2` and so on. + #[inline] + pub fn pattern(&self) -> PatternID { + self.pattern + } + + /// The position of the match. + /// + /// If this match was produced by a forward search, then the offset is + /// exclusive. If this match was produced by a reverse search, then the + /// offset is inclusive. + #[inline] + pub fn offset(&self) -> usize { + self.offset + } +} + +/// A representation of a match reported by a regex engine. +/// +/// A match has two essential pieces of information: the [`PatternID`] that +/// matches, and the [`Span`] of the match in a haystack. +/// +/// The pattern is identified by an ID, which corresponds to its position +/// (starting from `0`) relative to other patterns used to construct the +/// corresponding regex engine. If only a single pattern is provided, then all +/// matches are guaranteed to have a pattern ID of `0`. +/// +/// Every match reported by a regex engine guarantees that its span has its +/// start offset as less than or equal to its end offset. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct Match { + /// The pattern ID. + pattern: PatternID, + /// The underlying match span. + span: Span, +} + +impl Match { + /// Create a new match from a pattern ID and a span. + /// + /// This constructor is generic over how a span is provided. While + /// a [`Span`] may be given directly, one may also provide a + /// `std::ops::Range`. + /// + /// # Panics + /// + /// This panics if `end < start`. + /// + /// # Example + /// + /// This shows how to create a match for the first pattern in a regex + /// object using convenient range syntax. + /// + /// ``` + /// use regex_automata::{Match, PatternID}; + /// + /// let m = Match::new(PatternID::ZERO, 5..10); + /// assert_eq!(0, m.pattern().as_usize()); + /// assert_eq!(5, m.start()); + /// assert_eq!(10, m.end()); + /// ``` + #[inline] + pub fn new>(pattern: PatternID, span: S) -> Match { + let span: Span = span.into(); + assert!(span.start <= span.end, "invalid match span"); + Match { pattern, span } + } + + /// Create a new match from a pattern ID and a byte offset span. + /// + /// This constructor is generic over how a span is provided. While + /// a [`Span`] may be given directly, one may also provide a + /// `std::ops::Range`. + /// + /// This is like [`Match::new`], but accepts a `usize` instead of a + /// [`PatternID`]. This panics if the given `usize` is not representable + /// as a `PatternID`. + /// + /// # Panics + /// + /// This panics if `end < start` or if `pattern > PatternID::MAX`. + /// + /// # Example + /// + /// This shows how to create a match for the third pattern in a regex + /// object using convenient range syntax. + /// + /// ``` + /// use regex_automata::Match; + /// + /// let m = Match::must(3, 5..10); + /// assert_eq!(3, m.pattern().as_usize()); + /// assert_eq!(5, m.start()); + /// assert_eq!(10, m.end()); + /// ``` + #[inline] + pub fn must>(pattern: usize, span: S) -> Match { + Match::new(PatternID::must(pattern), span) + } + + /// Returns the ID of the pattern that matched. + /// + /// The ID of a pattern is derived from the position in which it was + /// originally inserted into the corresponding regex engine. The first + /// pattern has identifier `0`, and each subsequent pattern is `1`, `2` and + /// so on. + #[inline] + pub fn pattern(&self) -> PatternID { + self.pattern + } + + /// The starting position of the match. + /// + /// This is a convenience routine for `Match::span().start`. + #[inline] + pub fn start(&self) -> usize { + self.span().start + } + + /// The ending position of the match. + /// + /// This is a convenience routine for `Match::span().end`. + #[inline] + pub fn end(&self) -> usize { + self.span().end + } + + /// Returns the match span as a range. + /// + /// This is a convenience routine for `Match::span().range()`. + #[inline] + pub fn range(&self) -> core::ops::Range { + self.span().range() + } + + /// Returns the span for this match. + #[inline] + pub fn span(&self) -> Span { + self.span + } + + /// Returns true when the span in this match is empty. + /// + /// An empty match can only be returned when the regex itself can match + /// the empty string. + #[inline] + pub fn is_empty(&self) -> bool { + self.span().is_empty() + } + + /// Returns the length of this match. + /// + /// This returns `0` in precisely the cases that `is_empty` returns `true`. + #[inline] + pub fn len(&self) -> usize { + self.span().len() + } +} + +/// A set of `PatternID`s. +/// +/// A set of pattern identifiers is useful for recording which patterns have +/// matched a particular haystack. A pattern set _only_ includes pattern +/// identifiers. It does not include offset information. +/// +/// # Example +/// +/// This shows basic usage of a set. +/// +/// ``` +/// use regex_automata::{PatternID, PatternSet}; +/// +/// let pid1 = PatternID::must(5); +/// let pid2 = PatternID::must(8); +/// // Create a new empty set. +/// let mut set = PatternSet::new(10); +/// // Insert pattern IDs. +/// set.insert(pid1); +/// set.insert(pid2); +/// // Test membership. +/// assert!(set.contains(pid1)); +/// assert!(set.contains(pid2)); +/// // Get all members. +/// assert_eq!( +/// vec![5, 8], +/// set.iter().map(|p| p.as_usize()).collect::>(), +/// ); +/// // Clear the set. +/// set.clear(); +/// // Test that it is indeed empty. +/// assert!(set.is_empty()); +/// ``` +#[cfg(feature = "alloc")] +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct PatternSet { + /// The number of patterns set to 'true' in this set. + len: usize, + /// A map from PatternID to boolean of whether a pattern matches or not. + /// + /// This should probably be a bitset, but it's probably unlikely to matter + /// much in practice. + /// + /// The main downside of this representation (and similarly for a bitset) + /// is that iteration scales with the capacity of the set instead of + /// the length of the set. This doesn't seem likely to be a problem in + /// practice. + /// + /// Another alternative is to just use a 'SparseSet' for this. It does use + /// more memory (quite a bit more), but that seems fine I think compared + /// to the memory being used by the regex engine. The real hiccup with + /// it is that it yields pattern IDs in the order they were inserted. + /// Which is actually kind of nice, but at the time of writing, pattern + /// IDs are yielded in ascending order in the regex crate RegexSet API. + /// If we did change to 'SparseSet', we could provide an additional + /// 'iter_match_order' iterator, but keep the ascending order one for + /// compatibility. + which: alloc::boxed::Box<[bool]>, +} + +#[cfg(feature = "alloc")] +impl PatternSet { + /// Create a new set of pattern identifiers with the given capacity. + /// + /// The given capacity typically corresponds to (at least) the number of + /// patterns in a compiled regex object. + /// + /// # Panics + /// + /// This panics if the given capacity exceeds [`PatternID::LIMIT`]. This is + /// impossible if you use the `pattern_len()` method as defined on any of + /// the regex engines in this crate. Namely, a regex will fail to build by + /// returning an error if the number of patterns given to it exceeds the + /// limit. Therefore, the number of patterns in a valid regex is always + /// a correct capacity to provide here. + pub fn new(capacity: usize) -> PatternSet { + assert!( + capacity <= PatternID::LIMIT, + "pattern set capacity exceeds limit of {}", + PatternID::LIMIT, + ); + PatternSet { + len: 0, + which: alloc::vec![false; capacity].into_boxed_slice(), + } + } + + /// Clear this set such that it contains no pattern IDs. + pub fn clear(&mut self) { + self.len = 0; + for matched in self.which.iter_mut() { + *matched = false; + } + } + + /// Return true if and only if the given pattern identifier is in this set. + pub fn contains(&self, pid: PatternID) -> bool { + pid.as_usize() < self.capacity() && self.which[pid] + } + + /// Insert the given pattern identifier into this set and return `true` if + /// the given pattern ID was not previously in this set. + /// + /// If the pattern identifier is already in this set, then this is a no-op. + /// + /// Use [`PatternSet::try_insert`] for a fallible version of this routine. + /// + /// # Panics + /// + /// This panics if this pattern set has insufficient capacity to + /// store the given pattern ID. + pub fn insert(&mut self, pid: PatternID) -> bool { + self.try_insert(pid) + .expect("PatternSet should have sufficient capacity") + } + + /// Insert the given pattern identifier into this set and return `true` if + /// the given pattern ID was not previously in this set. + /// + /// If the pattern identifier is already in this set, then this is a no-op. + /// + /// # Errors + /// + /// This returns an error if this pattern set has insufficient capacity to + /// store the given pattern ID. + pub fn try_insert( + &mut self, + pid: PatternID, + ) -> Result { + if pid.as_usize() >= self.capacity() { + return Err(PatternSetInsertError { + attempted: pid, + capacity: self.capacity(), + }); + } + if self.which[pid] { + return Ok(false); + } + self.len += 1; + self.which[pid] = true; + Ok(true) + } + + /* + // This is currently commented out because it is unused and it is unclear + // whether it's useful or not. What's the harm in having it? When, if + // we ever wanted to change our representation to a 'SparseSet', then + // supporting this method would be a bit tricky. So in order to keep some + // API evolution flexibility, we leave it out for now. + + /// Remove the given pattern identifier from this set. + /// + /// If the pattern identifier was not previously in this set, then this + /// does not change the set and returns `false`. + /// + /// # Panics + /// + /// This panics if `pid` exceeds the capacity of this set. + pub fn remove(&mut self, pid: PatternID) -> bool { + if !self.which[pid] { + return false; + } + self.len -= 1; + self.which[pid] = false; + true + } + */ + + /// Return true if and only if this set has no pattern identifiers in it. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Return true if and only if this set has the maximum number of pattern + /// identifiers in the set. This occurs precisely when `PatternSet::len() + /// == PatternSet::capacity()`. + /// + /// This particular property is useful to test because it may allow one to + /// stop a search earlier than you might otherwise. Namely, if a search is + /// only reporting which patterns match a haystack and if you know all of + /// the patterns match at a given point, then there's no new information + /// that can be learned by continuing the search. (Because a pattern set + /// does not keep track of offset information.) + pub fn is_full(&self) -> bool { + self.len() == self.capacity() + } + + /// Returns the total number of pattern identifiers in this set. + pub fn len(&self) -> usize { + self.len + } + + /// Returns the total number of pattern identifiers that may be stored + /// in this set. + /// + /// This is guaranteed to be less than or equal to [`PatternID::LIMIT`]. + /// + /// Typically, the capacity of a pattern set matches the number of patterns + /// in a regex object with which you are searching. + pub fn capacity(&self) -> usize { + self.which.len() + } + + /// Returns an iterator over all pattern identifiers in this set. + /// + /// The iterator yields pattern identifiers in ascending order, starting + /// at zero. + pub fn iter(&self) -> PatternSetIter<'_> { + PatternSetIter { it: self.which.iter().enumerate() } + } +} + +/// An error that occurs when a `PatternID` failed to insert into a +/// `PatternSet`. +/// +/// An insert fails when the given `PatternID` exceeds the configured capacity +/// of the `PatternSet`. +/// +/// This error is created by the [`PatternSet::try_insert`] routine. +#[cfg(feature = "alloc")] +#[derive(Clone, Debug)] +pub struct PatternSetInsertError { + attempted: PatternID, + capacity: usize, +} + +#[cfg(feature = "std")] +impl std::error::Error for PatternSetInsertError {} + +#[cfg(feature = "alloc")] +impl core::fmt::Display for PatternSetInsertError { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!( + f, + "failed to insert pattern ID {} into pattern set \ + with insufficiet capacity of {}", + self.attempted.as_usize(), + self.capacity, + ) + } +} + +/// An iterator over all pattern identifiers in a [`PatternSet`]. +/// +/// The lifetime parameter `'a` refers to the lifetime of the pattern set being +/// iterated over. +/// +/// This iterator is created by the [`PatternSet::iter`] method. +#[cfg(feature = "alloc")] +#[derive(Clone, Debug)] +pub struct PatternSetIter<'a> { + it: core::iter::Enumerate>, +} + +#[cfg(feature = "alloc")] +impl<'a> Iterator for PatternSetIter<'a> { + type Item = PatternID; + + fn next(&mut self) -> Option { + while let Some((index, &yes)) = self.it.next() { + if yes { + // Only valid 'PatternID' values can be inserted into the set + // and construction of the set panics if the capacity would + // permit storing invalid pattern IDs. Thus, 'yes' is only true + // precisely when 'index' corresponds to a valid 'PatternID'. + return Some(PatternID::new_unchecked(index)); + } + } + None + } + + fn size_hint(&self) -> (usize, Option) { + self.it.size_hint() + } +} + +#[cfg(feature = "alloc")] +impl<'a> DoubleEndedIterator for PatternSetIter<'a> { + fn next_back(&mut self) -> Option { + while let Some((index, &yes)) = self.it.next_back() { + if yes { + // Only valid 'PatternID' values can be inserted into the set + // and construction of the set panics if the capacity would + // permit storing invalid pattern IDs. Thus, 'yes' is only true + // precisely when 'index' corresponds to a valid 'PatternID'. + return Some(PatternID::new_unchecked(index)); + } + } + None + } +} + +/// The type of anchored search to perform. +/// +/// This is *almost* a boolean option. That is, you can either do an unanchored +/// search for any pattern in a regex, or you can do an anchored search for any +/// pattern in a regex. +/// +/// A third option exists that, assuming the regex engine supports it, permits +/// you to do an anchored search for a specific pattern. +/// +/// Note that there is no way to run an unanchored search for a specific +/// pattern. If you need that, you'll need to build separate regexes for each +/// pattern. +/// +/// # Errors +/// +/// If a regex engine does not support the anchored mode selected, then the +/// regex engine will return an error. While any non-trivial regex engine +/// should support at least one of the available anchored modes, there is no +/// singular mode that is guaranteed to be universally supported. Some regex +/// engines might only support unanchored searches (DFAs compiled without +/// anchored starting states) and some regex engines might only support +/// anchored searches (like the one-pass DFA). +/// +/// The specific error returned is a [`MatchError`] with a +/// [`MatchErrorKind::UnsupportedAnchored`] kind. The kind includes the +/// `Anchored` value given that is unsupported. +/// +/// Note that regex engines should report "no match" if, for example, an +/// `Anchored::Pattern` is provided with an invalid pattern ID _but_ where +/// anchored searches for a specific pattern are supported. This is smooths out +/// behavior such that it's possible to guarantee that an error never occurs +/// based on how the regex engine is configured. All regex engines in this +/// crate report "no match" when searching for an invalid pattern ID, but where +/// searching for a valid pattern ID is otherwise supported. +/// +/// # Example +/// +/// This example shows how to use the various `Anchored` modes to run a +/// search. We use the [`PikeVM`](crate::nfa::thompson::pikevm::PikeVM) +/// because it supports all modes unconditionally. Some regex engines, like +/// the [`onepass::DFA`](crate::dfa::onepass::DFA) cannot support unanchored +/// searches. +/// +/// ``` +/// # if cfg!(miri) { return Ok(()); } // miri takes too long +/// use regex_automata::{ +/// nfa::thompson::pikevm::PikeVM, +/// Anchored, Input, Match, PatternID, +/// }; +/// +/// let re = PikeVM::new_many(&[ +/// r"Mrs. \w+", +/// r"Miss \w+", +/// r"Mr. \w+", +/// r"Ms. \w+", +/// ])?; +/// let mut cache = re.create_cache(); +/// let hay = "Hello Mr. Springsteen!"; +/// +/// // The default is to do an unanchored search. +/// assert_eq!(Some(Match::must(2, 6..21)), re.find(&mut cache, hay)); +/// // Explicitly ask for an unanchored search. Same as above. +/// let input = Input::new(hay).anchored(Anchored::No); +/// assert_eq!(Some(Match::must(2, 6..21)), re.find(&mut cache, hay)); +/// +/// // Now try an anchored search. Since the match doesn't start at the +/// // beginning of the haystack, no match is found! +/// let input = Input::new(hay).anchored(Anchored::Yes); +/// assert_eq!(None, re.find(&mut cache, input)); +/// +/// // We can try an anchored search again, but move the location of where +/// // we start the search. Note that the offsets reported are still in +/// // terms of the overall haystack and not relative to where we started +/// // the search. +/// let input = Input::new(hay).anchored(Anchored::Yes).range(6..); +/// assert_eq!(Some(Match::must(2, 6..21)), re.find(&mut cache, input)); +/// +/// // Now try an anchored search for a specific pattern. We specifically +/// // choose a pattern that we know doesn't match to prove that the search +/// // only looks for the pattern we provide. +/// let input = Input::new(hay) +/// .anchored(Anchored::Pattern(PatternID::must(1))) +/// .range(6..); +/// assert_eq!(None, re.find(&mut cache, input)); +/// +/// // But if we switch it to the pattern that we know matches, then we find +/// // the match. +/// let input = Input::new(hay) +/// .anchored(Anchored::Pattern(PatternID::must(2))) +/// .range(6..); +/// assert_eq!(Some(Match::must(2, 6..21)), re.find(&mut cache, input)); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Anchored { + /// Run an unanchored search. This means a match may occur anywhere at or + /// after the start position of the search. + /// + /// This search can return a match for any pattern in the regex. + No, + /// Run an anchored search. This means that a match must begin at the + /// start position of the search. + /// + /// This search can return a match for any pattern in the regex. + Yes, + /// Run an anchored search for a specific pattern. This means that a match + /// must be for the given pattern and must begin at the start position of + /// the search. + Pattern(PatternID), +} + +impl Anchored { + /// Returns true if and only if this anchor mode corresponds to any kind of + /// anchored search. + /// + /// # Example + /// + /// This examples shows that both `Anchored::Yes` and `Anchored::Pattern` + /// are considered anchored searches. + /// + /// ``` + /// use regex_automata::{Anchored, PatternID}; + /// + /// assert!(!Anchored::No.is_anchored()); + /// assert!(Anchored::Yes.is_anchored()); + /// assert!(Anchored::Pattern(PatternID::ZERO).is_anchored()); + /// ``` + #[inline] + pub fn is_anchored(&self) -> bool { + matches!(*self, Anchored::Yes | Anchored::Pattern(_)) + } + + /// Returns the pattern ID associated with this configuration if it is an + /// anchored search for a specific pattern. Otherwise `None` is returned. + /// + /// # Example + /// + /// ``` + /// use regex_automata::{Anchored, PatternID}; + /// + /// assert_eq!(None, Anchored::No.pattern()); + /// assert_eq!(None, Anchored::Yes.pattern()); + /// + /// let pid = PatternID::must(5); + /// assert_eq!(Some(pid), Anchored::Pattern(pid).pattern()); + /// ``` + #[inline] + pub fn pattern(&self) -> Option { + match *self { + Anchored::Pattern(pid) => Some(pid), + _ => None, + } + } +} + +/// The kind of match semantics to use for a regex pattern. +/// +/// The default match kind is `LeftmostFirst`, and this corresponds to the +/// match semantics used by most backtracking engines, such as Perl. +/// +/// # Leftmost first or "preference order" match semantics +/// +/// Leftmost-first semantics determine which match to report when there are +/// multiple paths through a regex that match at the same position. The tie is +/// essentially broken by how a backtracker would behave. For example, consider +/// running the regex `foofoofoo|foofoo|foo` on the haystack `foofoo`. In this +/// case, both the `foofoo` and `foo` branches match at position `0`. So should +/// the end of the match be `3` or `6`? +/// +/// A backtracker will conceptually work by trying `foofoofoo` and failing. +/// Then it will try `foofoo`, find the match and stop there. Thus, the +/// leftmost-first match position is `6`. This is called "leftmost-first" or +/// "preference order" because the order of the branches as written in the +/// regex pattern is what determines how to break the tie. +/// +/// (Note that leftmost-longest match semantics, which break ties by always +/// taking the longest matching string, are not currently supported by this +/// crate. These match semantics tend to be found in POSIX regex engines.) +/// +/// This example shows how leftmost-first semantics work, and how it even +/// applies to multi-pattern regexes: +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::pikevm::PikeVM, +/// Match, +/// }; +/// +/// let re = PikeVM::new_many(&[ +/// r"foofoofoo", +/// r"foofoo", +/// r"foo", +/// ])?; +/// let mut cache = re.create_cache(); +/// let got: Vec = re.find_iter(&mut cache, "foofoo").collect(); +/// let expected = vec![Match::must(1, 0..6)]; +/// assert_eq!(expected, got); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # All matches +/// +/// The `All` match semantics report any and all matches, and generally will +/// attempt to match as much as possible. It doesn't respect any sort of match +/// priority at all, so things like non-greedy matching don't work in this +/// mode. +/// +/// The fact that non-greedy matching doesn't work generally makes most forms +/// of unanchored non-overlapping searches have unintuitive behavior. Namely, +/// unanchored searches behave as if there is a `(?s-u:.)*?` prefix at the +/// beginning of the pattern, which is specifically non-greedy. Since it will +/// be treated as greedy in `All` match semantics, this generally means that +/// it will first attempt to consume all of the haystack and is likely to wind +/// up skipping matches. +/// +/// Generally speaking, `All` should only be used in two circumstances: +/// +/// * When running an anchored search and there is a desire to match as much as +/// possible. For example, when building a reverse regex matcher to find the +/// start of a match after finding the end. In this case, the reverse search +/// is anchored to the end of the match found by the forward search. +/// * When running overlapping searches. Since `All` encodes all possible +/// matches, this is generally what you want for an overlapping search. If you +/// try to use leftmost-first in an overlapping search, it is likely to produce +/// counter-intuitive results since leftmost-first specifically excludes some +/// matches from its underlying finite state machine. +/// +/// This example demonstrates the counter-intuitive behavior of `All` semantics +/// when using a standard leftmost unanchored search: +/// +/// ``` +/// use regex_automata::{ +/// nfa::thompson::pikevm::PikeVM, +/// Match, MatchKind, +/// }; +/// +/// let re = PikeVM::builder() +/// .configure(PikeVM::config().match_kind(MatchKind::All)) +/// .build("foo")?; +/// let hay = "first foo second foo wat"; +/// let mut cache = re.create_cache(); +/// let got: Vec = re.find_iter(&mut cache, hay).collect(); +/// // Notice that it completely skips the first 'foo'! +/// let expected = vec![Match::must(0, 17..20)]; +/// assert_eq!(expected, got); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// This second example shows how `All` semantics are useful for an overlapping +/// search. Note that we use lower level lazy DFA APIs here since the NFA +/// engines only currently support a very limited form of overlapping search. +/// +/// ``` +/// use regex_automata::{ +/// hybrid::dfa::{DFA, OverlappingState}, +/// HalfMatch, Input, MatchKind, +/// }; +/// +/// let re = DFA::builder() +/// // If we didn't set 'All' semantics here, then the regex would only +/// // match 'foo' at offset 3 and nothing else. Why? Because the state +/// // machine implements preference order and knows that the 'foofoo' and +/// // 'foofoofoo' branches can never match since 'foo' will always match +/// // when they match and take priority. +/// .configure(DFA::config().match_kind(MatchKind::All)) +/// .build(r"foo|foofoo|foofoofoo")?; +/// let mut cache = re.create_cache(); +/// let mut state = OverlappingState::start(); +/// let input = Input::new("foofoofoo"); +/// let mut got = vec![]; +/// loop { +/// re.try_search_overlapping_fwd(&mut cache, &input, &mut state)?; +/// let m = match state.get_match() { +/// None => break, +/// Some(m) => m, +/// }; +/// got.push(m); +/// } +/// let expected = vec![ +/// HalfMatch::must(0, 3), +/// HalfMatch::must(0, 6), +/// HalfMatch::must(0, 9), +/// ]; +/// assert_eq!(expected, got); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[non_exhaustive] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum MatchKind { + /// Report all possible matches. + All, + /// Report only the leftmost matches. When multiple leftmost matches exist, + /// report the match corresponding to the part of the regex that appears + /// first in the syntax. + LeftmostFirst, + // There is prior art in RE2 that shows that we should be able to add + // LeftmostLongest too. The tricky part of it is supporting ungreedy + // repetitions. Instead of treating all NFA states as having equivalent + // priority (as in 'All') or treating all NFA states as having distinct + // priority based on order (as in 'LeftmostFirst'), we instead group NFA + // states into sets, and treat members of each set as having equivalent + // priority, but having greater priority than all following members + // of different sets. + // + // However, it's not clear whether it's really worth adding this. After + // all, leftmost-longest can be emulated when using literals by using + // leftmost-first and sorting the literals by length in descending order. + // However, this won't work for arbitrary regexes. e.g., `\w|\w\w` will + // always match `a` in `ab` when using leftmost-first, but leftmost-longest + // would match `ab`. +} + +impl MatchKind { + #[cfg(feature = "alloc")] + pub(crate) fn continue_past_first_match(&self) -> bool { + *self == MatchKind::All + } +} + +impl Default for MatchKind { + fn default() -> MatchKind { + MatchKind::LeftmostFirst + } +} + +/// An error indicating that a search stopped before reporting whether a +/// match exists or not. +/// +/// To be very clear, this error type implies that one cannot assume that no +/// matches occur, since the search stopped before completing. That is, if +/// you're looking for information about where a search determined that no +/// match can occur, then this error type does *not* give you that. (Indeed, at +/// the time of writing, if you need such a thing, you have to write your own +/// search routine.) +/// +/// Normally, when one searches for something, the response is either an +/// affirmative "it was found at this location" or a negative "not found at +/// all." However, in some cases, a regex engine can be configured to stop its +/// search before concluding whether a match exists or not. When this happens, +/// it may be important for the caller to know why the regex engine gave up and +/// where in the input it gave up at. This error type exposes the 'why' and the +/// 'where.' +/// +/// For example, the DFAs provided by this library generally cannot correctly +/// implement Unicode word boundaries. Instead, they provide an option to +/// eagerly support them on ASCII text (since Unicode word boundaries are +/// equivalent to ASCII word boundaries when searching ASCII text), but will +/// "give up" if a non-ASCII byte is seen. In such cases, one is usually +/// required to either report the failure to the caller (unergonomic) or +/// otherwise fall back to some other regex engine (ergonomic, but potentially +/// costly). +/// +/// More generally, some regex engines offer the ability for callers to specify +/// certain bytes that will trigger the regex engine to automatically quit if +/// they are seen. +/// +/// Still yet, there may be other reasons for a failed match. For example, +/// the hybrid DFA provided by this crate can be configured to give up if it +/// believes that it is not efficient. This in turn permits callers to choose a +/// different regex engine. +/// +/// (Note that DFAs are configured by default to never quit or give up in this +/// fashion. For example, by default, a DFA will fail to build if the regex +/// pattern contains a Unicode word boundary. One needs to opt into the "quit" +/// behavior via options, like +/// [`hybrid::dfa::Config::unicode_word_boundary`](crate::hybrid::dfa::Config::unicode_word_boundary).) +/// +/// There are a couple other ways a search +/// can fail. For example, when using the +/// [`BoundedBacktracker`](crate::nfa::thompson::backtrack::BoundedBacktracker) +/// with a haystack that is too long, or trying to run an unanchored search +/// with a [one-pass DFA](crate::dfa::onepass). +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct MatchError( + #[cfg(feature = "alloc")] alloc::boxed::Box, + #[cfg(not(feature = "alloc"))] MatchErrorKind, +); + +impl MatchError { + /// Create a new error value with the given kind. + /// + /// This is a more verbose version of the kind-specific constructors, + /// e.g., `MatchError::quit`. + pub fn new(kind: MatchErrorKind) -> MatchError { + #[cfg(feature = "alloc")] + { + MatchError(alloc::boxed::Box::new(kind)) + } + #[cfg(not(feature = "alloc"))] + { + MatchError(kind) + } + } + + /// Returns a reference to the underlying error kind. + pub fn kind(&self) -> &MatchErrorKind { + &self.0 + } + + /// Create a new "quit" error. The given `byte` corresponds to the value + /// that tripped a search's quit condition, and `offset` corresponds to the + /// location in the haystack at which the search quit. + /// + /// This is the same as calling `MatchError::new` with a + /// [`MatchErrorKind::Quit`] kind. + pub fn quit(byte: u8, offset: usize) -> MatchError { + MatchError::new(MatchErrorKind::Quit { byte, offset }) + } + + /// Create a new "gave up" error. The given `offset` corresponds to the + /// location in the haystack at which the search gave up. + /// + /// This is the same as calling `MatchError::new` with a + /// [`MatchErrorKind::GaveUp`] kind. + pub fn gave_up(offset: usize) -> MatchError { + MatchError::new(MatchErrorKind::GaveUp { offset }) + } + + /// Create a new "haystack too long" error. The given `len` corresponds to + /// the length of the haystack that was problematic. + /// + /// This is the same as calling `MatchError::new` with a + /// [`MatchErrorKind::HaystackTooLong`] kind. + pub fn haystack_too_long(len: usize) -> MatchError { + MatchError::new(MatchErrorKind::HaystackTooLong { len }) + } + + /// Create a new "unsupported anchored" error. This occurs when the caller + /// requests a search with an anchor mode that is not supported by the + /// regex engine. + /// + /// This is the same as calling `MatchError::new` with a + /// [`MatchErrorKind::UnsupportedAnchored`] kind. + pub fn unsupported_anchored(mode: Anchored) -> MatchError { + MatchError::new(MatchErrorKind::UnsupportedAnchored { mode }) + } +} + +/// The underlying kind of a [`MatchError`]. +/// +/// This is a **non-exhaustive** enum. That means new variants may be added in +/// a semver-compatible release. +#[non_exhaustive] +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum MatchErrorKind { + /// The search saw a "quit" byte at which it was instructed to stop + /// searching. + Quit { + /// The "quit" byte that was observed that caused the search to stop. + byte: u8, + /// The offset at which the quit byte was observed. + offset: usize, + }, + /// The search, based on heuristics, determined that it would be better + /// to stop, typically to provide the caller an opportunity to use an + /// alternative regex engine. + /// + /// Currently, the only way for this to occur is via the lazy DFA and + /// only when it is configured to do so (it will not return this error by + /// default). + GaveUp { + /// The offset at which the search stopped. This corresponds to the + /// position immediately following the last byte scanned. + offset: usize, + }, + /// This error occurs if the haystack given to the regex engine was too + /// long to be searched. This occurs, for example, with regex engines + /// like the bounded backtracker that have a configurable fixed amount of + /// capacity that is tied to the length of the haystack. Anything beyond + /// that configured limit will result in an error at search time. + HaystackTooLong { + /// The length of the haystack that exceeded the limit. + len: usize, + }, + /// An error indicating that a particular type of anchored search was + /// requested, but that the regex engine does not support it. + /// + /// Note that this error should not be returned by a regex engine simply + /// because the pattern ID is invalid (i.e., equal to or exceeds the number + /// of patterns in the regex). In that case, the regex engine should report + /// a non-match. + UnsupportedAnchored { + /// The anchored mode given that is unsupported. + mode: Anchored, + }, +} + +#[cfg(feature = "std")] +impl std::error::Error for MatchError {} + +impl core::fmt::Display for MatchError { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + match *self.kind() { + MatchErrorKind::Quit { byte, offset } => write!( + f, + "quit search after observing byte {:?} at offset {}", + DebugByte(byte), + offset, + ), + MatchErrorKind::GaveUp { offset } => { + write!(f, "gave up searching at offset {}", offset) + } + MatchErrorKind::HaystackTooLong { len } => { + write!(f, "haystack of length {} is too long", len) + } + MatchErrorKind::UnsupportedAnchored { mode: Anchored::Yes } => { + write!(f, "anchored searches are not supported or enabled") + } + MatchErrorKind::UnsupportedAnchored { mode: Anchored::No } => { + write!(f, "unanchored searches are not supported or enabled") + } + MatchErrorKind::UnsupportedAnchored { + mode: Anchored::Pattern(pid), + } => { + write!( + f, + "anchored searches for a specific pattern ({}) are \ + not supported or enabled", + pid.as_usize(), + ) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // We test that our 'MatchError' type is the size we expect. This isn't an + // API guarantee, but if the size increases, we really want to make sure we + // decide to do that intentionally. So this should be a speed bump. And in + // general, we should not increase the size without a very good reason. + // + // Why? Because low level search APIs return Result<.., MatchError>. When + // MatchError gets bigger, so to does the Result type. + // + // Now, when 'alloc' is enabled, we do box the error, which de-emphasizes + // the importance of keeping a small error type. But without 'alloc', we + // still want things to be small. + #[test] + fn match_error_size() { + let expected_size = if cfg!(feature = "alloc") { + core::mem::size_of::() + } else { + 2 * core::mem::size_of::() + }; + assert_eq!(expected_size, core::mem::size_of::()); + } + + // Same as above, but for the underlying match error kind. + #[cfg(target_pointer_width = "64")] + #[test] + fn match_error_kind_size() { + let expected_size = 2 * core::mem::size_of::(); + assert_eq!(expected_size, core::mem::size_of::()); + } + + #[cfg(target_pointer_width = "32")] + #[test] + fn match_error_kind_size() { + let expected_size = 3 * core::mem::size_of::(); + assert_eq!(expected_size, core::mem::size_of::()); + } + + #[test] + fn incorrect_asref_guard() { + struct Bad(std::cell::Cell); + + impl AsRef<[u8]> for Bad { + fn as_ref(&self) -> &[u8] { + if self.0.replace(false) { + &[] + } else { + &[0; 1000] + } + } + } + + let bad = Bad(std::cell::Cell::new(true)); + let input = Input::new(&bad); + assert!(input.end() <= input.haystack().len()); + } +} diff --git a/vendor/regex-automata/src/util/sparse_set.rs b/vendor/regex-automata/src/util/sparse_set.rs new file mode 100644 index 0000000..cbaa0b6 --- /dev/null +++ b/vendor/regex-automata/src/util/sparse_set.rs @@ -0,0 +1,239 @@ +/*! +This module defines a sparse set data structure. Its most interesting +properties are: + +* They preserve insertion order. +* Set membership testing is done in constant time. +* Set insertion is done in constant time. +* Clearing the set is done in constant time. + +The cost for doing this is that the capacity of the set needs to be known up +front, and the elements in the set are limited to state identifiers. + +These sets are principally used when traversing an NFA state graph. This +happens at search time, for example, in the PikeVM. It also happens during DFA +determinization. +*/ + +use alloc::{vec, vec::Vec}; + +use crate::util::primitives::StateID; + +/// A pairse of sparse sets. +/// +/// This is useful when one needs to compute NFA epsilon closures from a +/// previous set of states derived from an epsilon closure. One set can be the +/// starting states where as the other set can be the destination states after +/// following the transitions for a particular byte of input. +/// +/// There is no significance to 'set1' or 'set2'. They are both sparse sets of +/// the same size. +/// +/// The members of this struct are exposed so that callers may borrow 'set1' +/// and 'set2' individually without being force to borrow both at the same +/// time. +#[derive(Clone, Debug)] +pub(crate) struct SparseSets { + pub(crate) set1: SparseSet, + pub(crate) set2: SparseSet, +} + +impl SparseSets { + /// Create a new pair of sparse sets where each set has the given capacity. + /// + /// This panics if the capacity given is bigger than `StateID::LIMIT`. + pub(crate) fn new(capacity: usize) -> SparseSets { + SparseSets { + set1: SparseSet::new(capacity), + set2: SparseSet::new(capacity), + } + } + + /// Resizes these sparse sets to have the new capacity given. + /// + /// The sets are automatically cleared. + /// + /// This panics if the capacity given is bigger than `StateID::LIMIT`. + #[inline] + pub(crate) fn resize(&mut self, new_capacity: usize) { + self.set1.resize(new_capacity); + self.set2.resize(new_capacity); + } + + /// Clear both sparse sets. + pub(crate) fn clear(&mut self) { + self.set1.clear(); + self.set2.clear(); + } + + /// Swap set1 with set2. + pub(crate) fn swap(&mut self) { + core::mem::swap(&mut self.set1, &mut self.set2); + } + + /// Returns the memory usage, in bytes, used by this pair of sparse sets. + pub(crate) fn memory_usage(&self) -> usize { + self.set1.memory_usage() + self.set2.memory_usage() + } +} + +/// A sparse set used for representing ordered NFA states. +/// +/// This supports constant time addition and membership testing. Clearing an +/// entire set can also be done in constant time. Iteration yields elements +/// in the order in which they were inserted. +/// +/// The data structure is based on: https://research.swtch.com/sparse +/// Note though that we don't actually use uninitialized memory. We generally +/// reuse sparse sets, so the initial allocation cost is bareable. However, its +/// other properties listed above are extremely useful. +#[derive(Clone)] +pub(crate) struct SparseSet { + /// The number of elements currently in this set. + len: usize, + /// Dense contains the ids in the order in which they were inserted. + dense: Vec, + /// Sparse maps ids to their location in dense. + /// + /// A state ID is in the set if and only if + /// sparse[id] < len && id == dense[sparse[id]]. + /// + /// Note that these are indices into 'dense'. It's a little weird to use + /// StateID here, but we know our length can never exceed the bounds of + /// StateID (enforced by 'resize') and StateID will be at most 4 bytes + /// where as a usize is likely double that in most cases. + sparse: Vec, +} + +impl SparseSet { + /// Create a new sparse set with the given capacity. + /// + /// Sparse sets have a fixed size and they cannot grow. Attempting to + /// insert more distinct elements than the total capacity of the set will + /// result in a panic. + /// + /// This panics if the capacity given is bigger than `StateID::LIMIT`. + #[inline] + pub(crate) fn new(capacity: usize) -> SparseSet { + let mut set = SparseSet { len: 0, dense: vec![], sparse: vec![] }; + set.resize(capacity); + set + } + + /// Resizes this sparse set to have the new capacity given. + /// + /// This set is automatically cleared. + /// + /// This panics if the capacity given is bigger than `StateID::LIMIT`. + #[inline] + pub(crate) fn resize(&mut self, new_capacity: usize) { + assert!( + new_capacity <= StateID::LIMIT, + "sparse set capacity cannot excced {:?}", + StateID::LIMIT + ); + self.clear(); + self.dense.resize(new_capacity, StateID::ZERO); + self.sparse.resize(new_capacity, StateID::ZERO); + } + + /// Returns the capacity of this set. + /// + /// The capacity represents a fixed limit on the number of distinct + /// elements that are allowed in this set. The capacity cannot be changed. + #[inline] + pub(crate) fn capacity(&self) -> usize { + self.dense.len() + } + + /// Returns the number of elements in this set. + #[inline] + pub(crate) fn len(&self) -> usize { + self.len + } + + /// Returns true if and only if this set is empty. + #[inline] + pub(crate) fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Insert the state ID value into this set and return true if the given + /// state ID was not previously in this set. + /// + /// This operation is idempotent. If the given value is already in this + /// set, then this is a no-op. + /// + /// If more than `capacity` ids are inserted, then this panics. + /// + /// This is marked as inline(always) since the compiler won't inline it + /// otherwise, and it's a fairly hot piece of code in DFA determinization. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn insert(&mut self, id: StateID) -> bool { + if self.contains(id) { + return false; + } + + let i = self.len(); + assert!( + i < self.capacity(), + "{:?} exceeds capacity of {:?} when inserting {:?}", + i, + self.capacity(), + id, + ); + // OK since i < self.capacity() and self.capacity() is guaranteed to + // be <= StateID::LIMIT. + let index = StateID::new_unchecked(i); + self.dense[index] = id; + self.sparse[id] = index; + self.len += 1; + true + } + + /// Returns true if and only if this set contains the given value. + #[inline] + pub(crate) fn contains(&self, id: StateID) -> bool { + let index = self.sparse[id]; + index.as_usize() < self.len() && self.dense[index] == id + } + + /// Clear this set such that it has no members. + #[inline] + pub(crate) fn clear(&mut self) { + self.len = 0; + } + + #[inline] + pub(crate) fn iter(&self) -> SparseSetIter<'_> { + SparseSetIter(self.dense[..self.len()].iter()) + } + + /// Returns the heap memory usage, in bytes, used by this sparse set. + #[inline] + pub(crate) fn memory_usage(&self) -> usize { + self.dense.len() * StateID::SIZE + self.sparse.len() * StateID::SIZE + } +} + +impl core::fmt::Debug for SparseSet { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + let elements: Vec = self.iter().collect(); + f.debug_tuple("SparseSet").field(&elements).finish() + } +} + +/// An iterator over all elements in a sparse set. +/// +/// The lifetime `'a` refers to the lifetime of the set being iterated over. +#[derive(Debug)] +pub(crate) struct SparseSetIter<'a>(core::slice::Iter<'a, StateID>); + +impl<'a> Iterator for SparseSetIter<'a> { + type Item = StateID; + + #[cfg_attr(feature = "perf-inline", inline(always))] + fn next(&mut self) -> Option { + self.0.next().map(|&id| id) + } +} diff --git a/vendor/regex-automata/src/util/start.rs b/vendor/regex-automata/src/util/start.rs new file mode 100644 index 0000000..2715378 --- /dev/null +++ b/vendor/regex-automata/src/util/start.rs @@ -0,0 +1,479 @@ +/*! +Provides helpers for dealing with start state configurations in DFAs. +*/ + +use crate::util::{ + look::LookMatcher, + search::{Anchored, Input}, + wire::{self, DeserializeError, SerializeError}, +}; + +/// The configuration used to determine a DFA's start state for a search. +/// +/// A DFA has a single starting state in the typical textbook description. That +/// is, it corresponds to the set of all starting states for the NFA that built +/// it, along with their espsilon closures. In this crate, however, DFAs have +/// many possible start states due to a few factors: +/// +/// * DFAs support the ability to run either anchored or unanchored searches. +/// Each type of search needs its own start state. For example, an unanchored +/// search requires starting at a state corresponding to a regex with a +/// `(?s-u:.)*?` prefix, which will match through anything. +/// * DFAs also optionally support starting an anchored search for any one +/// specific pattern. Each such pattern requires its own start state. +/// * If a look-behind assertion like `^` or `\b` is used in the regex, then +/// the DFA will need to inspect a single byte immediately before the start of +/// the search to choose the correct start state. +/// +/// Indeed, this configuration precisely encapsulates all of the above factors. +/// The [`Config::anchored`] method sets which kind of anchored search to +/// perform while the [`Config::look_behind`] method provides a way to set +/// the byte that occurs immediately before the start of the search. +/// +/// Generally speaking, this type is only useful when you want to run searches +/// without using an [`Input`]. In particular, an `Input` wants a haystack +/// slice, but callers may not have a contiguous sequence of bytes as a +/// haystack in all cases. This type provides a lower level of control such +/// that callers can provide their own anchored configuration and look-behind +/// byte explicitly. +/// +/// # Example +/// +/// This shows basic usage that permits running a search with a DFA without +/// using the `Input` abstraction. +/// +/// ``` +/// use regex_automata::{ +/// dfa::{Automaton, dense}, +/// util::start, +/// Anchored, +/// }; +/// +/// let dfa = dense::DFA::new(r"(?-u)\b\w+\b")?; +/// let haystack = "quartz"; +/// +/// let config = start::Config::new().anchored(Anchored::Yes); +/// let mut state = dfa.start_state(&config)?; +/// for &b in haystack.as_bytes().iter() { +/// state = dfa.next_state(state, b); +/// } +/// state = dfa.next_eoi_state(state); +/// assert!(dfa.is_match_state(state)); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// This example shows how to correctly run a search that doesn't begin at +/// the start of a haystack. Notice how we set the look-behind byte, and as +/// a result, the `\b` assertion does not match. +/// +/// ``` +/// use regex_automata::{ +/// dfa::{Automaton, dense}, +/// util::start, +/// Anchored, +/// }; +/// +/// let dfa = dense::DFA::new(r"(?-u)\b\w+\b")?; +/// let haystack = "quartz"; +/// +/// let config = start::Config::new() +/// .anchored(Anchored::Yes) +/// .look_behind(Some(b'q')); +/// let mut state = dfa.start_state(&config)?; +/// for &b in haystack.as_bytes().iter().skip(1) { +/// state = dfa.next_state(state, b); +/// } +/// state = dfa.next_eoi_state(state); +/// // No match! +/// assert!(!dfa.is_match_state(state)); +/// +/// # Ok::<(), Box>(()) +/// ``` +/// +/// If we had instead not set a look-behind byte, then the DFA would assume +/// that it was starting at the beginning of the haystack, and thus `\b` should +/// match. This in turn would result in erroneously reporting a match: +/// +/// ``` +/// use regex_automata::{ +/// dfa::{Automaton, dense}, +/// util::start, +/// Anchored, +/// }; +/// +/// let dfa = dense::DFA::new(r"(?-u)\b\w+\b")?; +/// let haystack = "quartz"; +/// +/// // Whoops, forgot the look-behind byte... +/// let config = start::Config::new().anchored(Anchored::Yes); +/// let mut state = dfa.start_state(&config)?; +/// for &b in haystack.as_bytes().iter().skip(1) { +/// state = dfa.next_state(state, b); +/// } +/// state = dfa.next_eoi_state(state); +/// // And now we get a match unexpectedly. +/// assert!(dfa.is_match_state(state)); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug)] +pub struct Config { + look_behind: Option, + anchored: Anchored, +} + +impl Config { + /// Create a new default start configuration. + /// + /// The default is an unanchored search that starts at the beginning of the + /// haystack. + pub fn new() -> Config { + Config { anchored: Anchored::No, look_behind: None } + } + + /// A convenience routine for building a start configuration from an + /// [`Input`] for a forward search. + /// + /// This automatically sets the look-behind byte to the byte immediately + /// preceding the start of the search. If the start of the search is at + /// offset `0`, then no look-behind byte is set. + pub fn from_input_forward(input: &Input<'_>) -> Config { + let look_behind = input + .start() + .checked_sub(1) + .and_then(|i| input.haystack().get(i).copied()); + Config { look_behind, anchored: input.get_anchored() } + } + + /// A convenience routine for building a start configuration from an + /// [`Input`] for a reverse search. + /// + /// This automatically sets the look-behind byte to the byte immediately + /// following the end of the search. If the end of the search is at + /// offset `haystack.len()`, then no look-behind byte is set. + pub fn from_input_reverse(input: &Input<'_>) -> Config { + let look_behind = input.haystack().get(input.end()).copied(); + Config { look_behind, anchored: input.get_anchored() } + } + + /// Set the look-behind byte at the start of a search. + /// + /// Unless the search is intended to logically start at the beginning of a + /// haystack, this should _always_ be set to the byte immediately preceding + /// the start of the search. If no look-behind byte is set, then the start + /// configuration will assume it is at the beginning of the haystack. For + /// example, the anchor `^` will match. + /// + /// The default is that no look-behind byte is set. + pub fn look_behind(mut self, byte: Option) -> Config { + self.look_behind = byte; + self + } + + /// Set the anchored mode of a search. + /// + /// The default is an unanchored search. + pub fn anchored(mut self, mode: Anchored) -> Config { + self.anchored = mode; + self + } + + /// Return the look-behind byte in this configuration, if one exists. + pub fn get_look_behind(&self) -> Option { + self.look_behind + } + + /// Return the anchored mode in this configuration. + pub fn get_anchored(&self) -> Anchored { + self.anchored + } +} + +/// A map from every possible byte value to its corresponding starting +/// configuration. +/// +/// This map is used in order to lookup the start configuration for a particular +/// position in a haystack. This start configuration is then used in +/// combination with things like the anchored mode and pattern ID to fully +/// determine the start state. +/// +/// Generally speaking, this map is only used for fully compiled DFAs and lazy +/// DFAs. For NFAs (including the one-pass DFA), the start state is generally +/// selected by virtue of traversing the NFA state graph. DFAs do the same +/// thing, but at build time and not search time. (Well, technically the lazy +/// DFA does it at search time, but it does enough work to cache the full +/// result of the epsilon closure that the NFA engines tend to need to do.) +#[derive(Clone)] +pub(crate) struct StartByteMap { + map: [Start; 256], +} + +impl StartByteMap { + /// Create a new map from byte values to their corresponding starting + /// configurations. The map is determined, in part, by how look-around + /// assertions are matched via the matcher given. + pub(crate) fn new(lookm: &LookMatcher) -> StartByteMap { + let mut map = [Start::NonWordByte; 256]; + map[usize::from(b'\n')] = Start::LineLF; + map[usize::from(b'\r')] = Start::LineCR; + map[usize::from(b'_')] = Start::WordByte; + + let mut byte = b'0'; + while byte <= b'9' { + map[usize::from(byte)] = Start::WordByte; + byte += 1; + } + byte = b'A'; + while byte <= b'Z' { + map[usize::from(byte)] = Start::WordByte; + byte += 1; + } + byte = b'a'; + while byte <= b'z' { + map[usize::from(byte)] = Start::WordByte; + byte += 1; + } + + let lineterm = lookm.get_line_terminator(); + // If our line terminator is normal, then it is already handled by + // the LineLF and LineCR configurations. But if it's weird, then we + // overwrite whatever was there before for that terminator with a + // special configuration. The trick here is that if the terminator + // is, say, a word byte like `a`, then callers seeing this start + // configuration need to account for that and build their DFA state as + // if it *also* came from a word byte. + if lineterm != b'\r' && lineterm != b'\n' { + map[usize::from(lineterm)] = Start::CustomLineTerminator; + } + StartByteMap { map } + } + + /// Return the starting configuration for the given look-behind byte. + /// + /// If no look-behind exists, callers should use `Start::Text`. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn get(&self, byte: u8) -> Start { + self.map[usize::from(byte)] + } + + /// Deserializes a byte class map from the given slice. If the slice is of + /// insufficient length or otherwise contains an impossible mapping, then + /// an error is returned. Upon success, the number of bytes read along with + /// the map are returned. The number of bytes read is always a multiple of + /// 8. + pub(crate) fn from_bytes( + slice: &[u8], + ) -> Result<(StartByteMap, usize), DeserializeError> { + wire::check_slice_len(slice, 256, "start byte map")?; + let mut map = [Start::NonWordByte; 256]; + for (i, &repr) in slice[..256].iter().enumerate() { + map[i] = match Start::from_usize(usize::from(repr)) { + Some(start) => start, + None => { + return Err(DeserializeError::generic( + "found invalid starting configuration", + )) + } + }; + } + Ok((StartByteMap { map }, 256)) + } + + /// Writes this map to the given byte buffer. if the given buffer is too + /// small, then an error is returned. Upon success, the total number of + /// bytes written is returned. The number of bytes written is guaranteed to + /// be a multiple of 8. + pub(crate) fn write_to( + &self, + dst: &mut [u8], + ) -> Result { + let nwrite = self.write_to_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("start byte map")); + } + for (i, &start) in self.map.iter().enumerate() { + dst[i] = start.as_u8(); + } + Ok(nwrite) + } + + /// Returns the total number of bytes written by `write_to`. + pub(crate) fn write_to_len(&self) -> usize { + 256 + } +} + +impl core::fmt::Debug for StartByteMap { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + use crate::util::escape::DebugByte; + + write!(f, "StartByteMap{{")?; + for byte in 0..=255 { + if byte > 0 { + write!(f, ", ")?; + } + let start = self.map[usize::from(byte)]; + write!(f, "{:?} => {:?}", DebugByte(byte), start)?; + } + write!(f, "}}")?; + Ok(()) + } +} + +/// Represents the six possible starting configurations of a DFA search. +/// +/// The starting configuration is determined by inspecting the the beginning +/// of the haystack (up to 1 byte). Ultimately, this along with a pattern ID +/// (if specified) and the type of search (anchored or not) is what selects the +/// start state to use in a DFA. +/// +/// As one example, if a DFA only supports unanchored searches and does not +/// support anchored searches for each pattern, then it will have at most 6 +/// distinct start states. (Some start states may be reused if determinization +/// can determine that they will be equivalent.) If the DFA supports both +/// anchored and unanchored searches, then it will have a maximum of 12 +/// distinct start states. Finally, if the DFA also supports anchored searches +/// for each pattern, then it can have up to `12 + (N * 6)` start states, where +/// `N` is the number of patterns. +/// +/// Handling each of these starting configurations in the context of DFA +/// determinization can be *quite* tricky and subtle. But the code is small +/// and can be found at `crate::util::determinize::set_lookbehind_from_start`. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum Start { + /// This occurs when the starting position is not any of the ones below. + NonWordByte = 0, + /// This occurs when the byte immediately preceding the start of the search + /// is an ASCII word byte. + WordByte = 1, + /// This occurs when the starting position of the search corresponds to the + /// beginning of the haystack. + Text = 2, + /// This occurs when the byte immediately preceding the start of the search + /// is a line terminator. Specifically, `\n`. + LineLF = 3, + /// This occurs when the byte immediately preceding the start of the search + /// is a line terminator. Specifically, `\r`. + LineCR = 4, + /// This occurs when a custom line terminator has been set via a + /// `LookMatcher`, and when that line terminator is neither a `\r` or a + /// `\n`. + /// + /// If the custom line terminator is a word byte, then this start + /// configuration is still selected. DFAs that implement word boundary + /// assertions will likely need to check whether the custom line terminator + /// is a word byte, in which case, it should behave as if the byte + /// satisfies `\b` in addition to multi-line anchors. + CustomLineTerminator = 5, +} + +impl Start { + /// Return the starting state corresponding to the given integer. If no + /// starting state exists for the given integer, then None is returned. + pub(crate) fn from_usize(n: usize) -> Option { + match n { + 0 => Some(Start::NonWordByte), + 1 => Some(Start::WordByte), + 2 => Some(Start::Text), + 3 => Some(Start::LineLF), + 4 => Some(Start::LineCR), + 5 => Some(Start::CustomLineTerminator), + _ => None, + } + } + + /// Returns the total number of starting state configurations. + pub(crate) fn len() -> usize { + 6 + } + + /// Return this starting configuration as `u8` integer. It is guaranteed to + /// be less than `Start::len()`. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn as_u8(&self) -> u8 { + // AFAIK, 'as' is the only way to zero-cost convert an int enum to an + // actual int. + *self as u8 + } + + /// Return this starting configuration as a `usize` integer. It is + /// guaranteed to be less than `Start::len()`. + #[cfg_attr(feature = "perf-inline", inline(always))] + pub(crate) fn as_usize(&self) -> usize { + usize::from(self.as_u8()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn start_fwd_done_range() { + let smap = StartByteMap::new(&LookMatcher::default()); + let input = Input::new("").range(1..0); + let config = Config::from_input_forward(&input); + let start = + config.get_look_behind().map_or(Start::Text, |b| smap.get(b)); + assert_eq!(Start::Text, start); + } + + #[test] + fn start_rev_done_range() { + let smap = StartByteMap::new(&LookMatcher::default()); + let input = Input::new("").range(1..0); + let config = Config::from_input_reverse(&input); + let start = + config.get_look_behind().map_or(Start::Text, |b| smap.get(b)); + assert_eq!(Start::Text, start); + } + + #[test] + fn start_fwd() { + let f = |haystack, start, end| { + let smap = StartByteMap::new(&LookMatcher::default()); + let input = Input::new(haystack).range(start..end); + let config = Config::from_input_forward(&input); + let start = + config.get_look_behind().map_or(Start::Text, |b| smap.get(b)); + start + }; + + assert_eq!(Start::Text, f("", 0, 0)); + assert_eq!(Start::Text, f("abc", 0, 3)); + assert_eq!(Start::Text, f("\nabc", 0, 3)); + + assert_eq!(Start::LineLF, f("\nabc", 1, 3)); + + assert_eq!(Start::LineCR, f("\rabc", 1, 3)); + + assert_eq!(Start::WordByte, f("abc", 1, 3)); + + assert_eq!(Start::NonWordByte, f(" abc", 1, 3)); + } + + #[test] + fn start_rev() { + let f = |haystack, start, end| { + let smap = StartByteMap::new(&LookMatcher::default()); + let input = Input::new(haystack).range(start..end); + let config = Config::from_input_reverse(&input); + let start = + config.get_look_behind().map_or(Start::Text, |b| smap.get(b)); + start + }; + + assert_eq!(Start::Text, f("", 0, 0)); + assert_eq!(Start::Text, f("abc", 0, 3)); + assert_eq!(Start::Text, f("abc\n", 0, 4)); + + assert_eq!(Start::LineLF, f("abc\nz", 0, 3)); + + assert_eq!(Start::LineCR, f("abc\rz", 0, 3)); + + assert_eq!(Start::WordByte, f("abc", 0, 2)); + + assert_eq!(Start::NonWordByte, f("abc ", 0, 3)); + } +} diff --git a/vendor/regex-automata/src/util/syntax.rs b/vendor/regex-automata/src/util/syntax.rs new file mode 100644 index 0000000..78e3cf9 --- /dev/null +++ b/vendor/regex-automata/src/util/syntax.rs @@ -0,0 +1,482 @@ +/*! +Utilities for dealing with the syntax of a regular expression. + +This module currently only exposes a [`Config`] type that +itself represents a wrapper around the configuration for a +[`regex-syntax::ParserBuilder`](regex_syntax::ParserBuilder). The purpose of +this wrapper is to make configuring syntax options very similar to how other +configuration is done throughout this crate. Namely, instead of duplicating +syntax options across every builder (of which there are many), we instead +create small config objects like this one that can be passed around and +composed. +*/ + +use alloc::{vec, vec::Vec}; + +use regex_syntax::{ + ast, + hir::{self, Hir}, + Error, ParserBuilder, +}; + +/// A convenience routine for parsing a pattern into an HIR value with the +/// default configuration. +/// +/// # Example +/// +/// This shows how to parse a pattern into an HIR value: +/// +/// ``` +/// use regex_automata::util::syntax; +/// +/// let hir = syntax::parse(r"([a-z]+)|([0-9]+)")?; +/// assert_eq!(Some(1), hir.properties().static_explicit_captures_len()); +/// +/// # Ok::<(), Box>(()) +/// ``` +pub fn parse(pattern: &str) -> Result { + parse_with(pattern, &Config::default()) +} + +/// A convenience routine for parsing many patterns into HIR value with the +/// default configuration. +/// +/// # Example +/// +/// This shows how to parse many patterns into an corresponding HIR values: +/// +/// ``` +/// use { +/// regex_automata::util::syntax, +/// regex_syntax::hir::Properties, +/// }; +/// +/// let hirs = syntax::parse_many(&[ +/// r"([a-z]+)|([0-9]+)", +/// r"foo(A-Z]+)bar", +/// ])?; +/// let props = Properties::union(hirs.iter().map(|h| h.properties())); +/// assert_eq!(Some(1), props.static_explicit_captures_len()); +/// +/// # Ok::<(), Box>(()) +/// ``` +pub fn parse_many>(patterns: &[P]) -> Result, Error> { + parse_many_with(patterns, &Config::default()) +} + +/// A convenience routine for parsing a pattern into an HIR value using a +/// `Config`. +/// +/// # Example +/// +/// This shows how to parse a pattern into an HIR value with a non-default +/// configuration: +/// +/// ``` +/// use regex_automata::util::syntax; +/// +/// let hir = syntax::parse_with( +/// r"^[a-z]+$", +/// &syntax::Config::new().multi_line(true).crlf(true), +/// )?; +/// assert!(hir.properties().look_set().contains_anchor_crlf()); +/// +/// # Ok::<(), Box>(()) +/// ``` +pub fn parse_with(pattern: &str, config: &Config) -> Result { + let mut builder = ParserBuilder::new(); + config.apply(&mut builder); + builder.build().parse(pattern) +} + +/// A convenience routine for parsing many patterns into HIR values using a +/// `Config`. +/// +/// # Example +/// +/// This shows how to parse many patterns into an corresponding HIR values +/// with a non-default configuration: +/// +/// ``` +/// use { +/// regex_automata::util::syntax, +/// regex_syntax::hir::Properties, +/// }; +/// +/// let patterns = &[ +/// r"([a-z]+)|([0-9]+)", +/// r"\W", +/// r"foo(A-Z]+)bar", +/// ]; +/// let config = syntax::Config::new().unicode(false).utf8(false); +/// let hirs = syntax::parse_many_with(patterns, &config)?; +/// let props = Properties::union(hirs.iter().map(|h| h.properties())); +/// assert!(!props.is_utf8()); +/// +/// # Ok::<(), Box>(()) +/// ``` +pub fn parse_many_with>( + patterns: &[P], + config: &Config, +) -> Result, Error> { + let mut builder = ParserBuilder::new(); + config.apply(&mut builder); + let mut hirs = vec![]; + for p in patterns.iter() { + hirs.push(builder.build().parse(p.as_ref())?); + } + Ok(hirs) +} + +/// A common set of configuration options that apply to the syntax of a regex. +/// +/// This represents a group of configuration options that specifically apply +/// to how the concrete syntax of a regular expression is interpreted. In +/// particular, they are generally forwarded to the +/// [`ParserBuilder`](https://docs.rs/regex-syntax/*/regex_syntax/struct.ParserBuilder.html) +/// in the +/// [`regex-syntax`](https://docs.rs/regex-syntax) +/// crate when building a regex from its concrete syntax directly. +/// +/// These options are defined as a group since they apply to every regex engine +/// in this crate. Instead of re-defining them on every engine's builder, they +/// are instead provided here as one cohesive unit. +#[derive(Clone, Copy, Debug)] +pub struct Config { + case_insensitive: bool, + multi_line: bool, + dot_matches_new_line: bool, + crlf: bool, + line_terminator: u8, + swap_greed: bool, + ignore_whitespace: bool, + unicode: bool, + utf8: bool, + nest_limit: u32, + octal: bool, +} + +impl Config { + /// Return a new default syntax configuration. + pub fn new() -> Config { + // These defaults match the ones used in regex-syntax. + Config { + case_insensitive: false, + multi_line: false, + dot_matches_new_line: false, + crlf: false, + line_terminator: b'\n', + swap_greed: false, + ignore_whitespace: false, + unicode: true, + utf8: true, + nest_limit: 250, + octal: false, + } + } + + /// Enable or disable the case insensitive flag by default. + /// + /// When Unicode mode is enabled, case insensitivity is Unicode-aware. + /// Specifically, it will apply the "simple" case folding rules as + /// specified by Unicode. + /// + /// By default this is disabled. It may alternatively be selectively + /// enabled in the regular expression itself via the `i` flag. + pub fn case_insensitive(mut self, yes: bool) -> Config { + self.case_insensitive = yes; + self + } + + /// Enable or disable the multi-line matching flag by default. + /// + /// When this is enabled, the `^` and `$` look-around assertions will + /// match immediately after and immediately before a new line character, + /// respectively. Note that the `\A` and `\z` look-around assertions are + /// unaffected by this setting and always correspond to matching at the + /// beginning and end of the input. + /// + /// By default this is disabled. It may alternatively be selectively + /// enabled in the regular expression itself via the `m` flag. + pub fn multi_line(mut self, yes: bool) -> Config { + self.multi_line = yes; + self + } + + /// Enable or disable the "dot matches any character" flag by default. + /// + /// When this is enabled, `.` will match any character. When it's disabled, + /// then `.` will match any character except for a new line character. + /// + /// Note that `.` is impacted by whether the "unicode" setting is enabled + /// or not. When Unicode is enabled (the default), `.` will match any UTF-8 + /// encoding of any Unicode scalar value (sans a new line, depending on + /// whether this "dot matches new line" option is enabled). When Unicode + /// mode is disabled, `.` will match any byte instead. Because of this, + /// when Unicode mode is disabled, `.` can only be used when the "allow + /// invalid UTF-8" option is enabled, since `.` could otherwise match + /// invalid UTF-8. + /// + /// By default this is disabled. It may alternatively be selectively + /// enabled in the regular expression itself via the `s` flag. + pub fn dot_matches_new_line(mut self, yes: bool) -> Config { + self.dot_matches_new_line = yes; + self + } + + /// Enable or disable the "CRLF mode" flag by default. + /// + /// By default this is disabled. It may alternatively be selectively + /// enabled in the regular expression itself via the `R` flag. + /// + /// When CRLF mode is enabled, the following happens: + /// + /// * Unless `dot_matches_new_line` is enabled, `.` will match any character + /// except for `\r` and `\n`. + /// * When `multi_line` mode is enabled, `^` and `$` will treat `\r\n`, + /// `\r` and `\n` as line terminators. And in particular, neither will + /// match between a `\r` and a `\n`. + pub fn crlf(mut self, yes: bool) -> Config { + self.crlf = yes; + self + } + + /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`. + /// + /// Namely, instead of `.` (by default) matching everything except for `\n`, + /// this will cause `.` to match everything except for the byte given. + /// + /// If `.` is used in a context where Unicode mode is enabled and this byte + /// isn't ASCII, then an error will be returned. When Unicode mode is + /// disabled, then any byte is permitted, but will return an error if UTF-8 + /// mode is enabled and it is a non-ASCII byte. + /// + /// In short, any ASCII value for a line terminator is always okay. But a + /// non-ASCII byte might result in an error depending on whether Unicode + /// mode or UTF-8 mode are enabled. + /// + /// Note that if `R` mode is enabled then it always takes precedence and + /// the line terminator will be treated as `\r` and `\n` simultaneously. + /// + /// Note also that this *doesn't* impact the look-around assertions + /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional + /// configuration in the regex engine itself. + pub fn line_terminator(mut self, byte: u8) -> Config { + self.line_terminator = byte; + self + } + + /// Enable or disable the "swap greed" flag by default. + /// + /// When this is enabled, `.*` (for example) will become ungreedy and `.*?` + /// will become greedy. + /// + /// By default this is disabled. It may alternatively be selectively + /// enabled in the regular expression itself via the `U` flag. + pub fn swap_greed(mut self, yes: bool) -> Config { + self.swap_greed = yes; + self + } + + /// Enable verbose mode in the regular expression. + /// + /// When enabled, verbose mode permits insigificant whitespace in many + /// places in the regular expression, as well as comments. Comments are + /// started using `#` and continue until the end of the line. + /// + /// By default, this is disabled. It may be selectively enabled in the + /// regular expression by using the `x` flag regardless of this setting. + pub fn ignore_whitespace(mut self, yes: bool) -> Config { + self.ignore_whitespace = yes; + self + } + + /// Enable or disable the Unicode flag (`u`) by default. + /// + /// By default this is **enabled**. It may alternatively be selectively + /// disabled in the regular expression itself via the `u` flag. + /// + /// Note that unless "allow invalid UTF-8" is enabled (it's disabled by + /// default), a regular expression will fail to parse if Unicode mode is + /// disabled and a sub-expression could possibly match invalid UTF-8. + /// + /// **WARNING**: Unicode mode can greatly increase the size of the compiled + /// DFA, which can noticeably impact both memory usage and compilation + /// time. This is especially noticeable if your regex contains character + /// classes like `\w` that are impacted by whether Unicode is enabled or + /// not. If Unicode is not necessary, you are encouraged to disable it. + pub fn unicode(mut self, yes: bool) -> Config { + self.unicode = yes; + self + } + + /// When disabled, the builder will permit the construction of a regular + /// expression that may match invalid UTF-8. + /// + /// For example, when [`Config::unicode`] is disabled, then + /// expressions like `[^a]` may match invalid UTF-8 since they can match + /// any single byte that is not `a`. By default, these sub-expressions + /// are disallowed to avoid returning offsets that split a UTF-8 + /// encoded codepoint. However, in cases where matching at arbitrary + /// locations is desired, this option can be disabled to permit all such + /// sub-expressions. + /// + /// When enabled (the default), the builder is guaranteed to produce a + /// regex that will only ever match valid UTF-8 (otherwise, the builder + /// will return an error). + pub fn utf8(mut self, yes: bool) -> Config { + self.utf8 = yes; + self + } + + /// Set the nesting limit used for the regular expression parser. + /// + /// The nesting limit controls how deep the abstract syntax tree is allowed + /// to be. If the AST exceeds the given limit (e.g., with too many nested + /// groups), then an error is returned by the parser. + /// + /// The purpose of this limit is to act as a heuristic to prevent stack + /// overflow when building a finite automaton from a regular expression's + /// abstract syntax tree. In particular, construction currently uses + /// recursion. In the future, the implementation may stop using recursion + /// and this option will no longer be necessary. + /// + /// This limit is not checked until the entire AST is parsed. Therefore, + /// if callers want to put a limit on the amount of heap space used, then + /// they should impose a limit on the length, in bytes, of the concrete + /// pattern string. In particular, this is viable since the parser will + /// limit itself to heap space proportional to the length of the pattern + /// string. + /// + /// Note that a nest limit of `0` will return a nest limit error for most + /// patterns but not all. For example, a nest limit of `0` permits `a` but + /// not `ab`, since `ab` requires a concatenation AST item, which results + /// in a nest depth of `1`. In general, a nest limit is not something that + /// manifests in an obvious way in the concrete syntax, therefore, it + /// should not be used in a granular way. + pub fn nest_limit(mut self, limit: u32) -> Config { + self.nest_limit = limit; + self + } + + /// Whether to support octal syntax or not. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints in + /// a regular expression. For example, `a`, `\x61`, `\u0061` and + /// `\141` are all equivalent regular expressions, where the last example + /// shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, it does + /// make good error messages harder. That is, in PCRE based regex engines, + /// syntax like `\1` invokes a backreference, which is explicitly + /// unsupported in Rust's regex engine. However, many users expect it to + /// be supported. Therefore, when octal support is disabled, the error + /// message will explicitly mention that backreferences aren't supported. + /// + /// Octal syntax is disabled by default. + pub fn octal(mut self, yes: bool) -> Config { + self.octal = yes; + self + } + + /// Returns whether "unicode" mode is enabled. + pub fn get_unicode(&self) -> bool { + self.unicode + } + + /// Returns whether "case insensitive" mode is enabled. + pub fn get_case_insensitive(&self) -> bool { + self.case_insensitive + } + + /// Returns whether "multi line" mode is enabled. + pub fn get_multi_line(&self) -> bool { + self.multi_line + } + + /// Returns whether "dot matches new line" mode is enabled. + pub fn get_dot_matches_new_line(&self) -> bool { + self.dot_matches_new_line + } + + /// Returns whether "CRLF" mode is enabled. + pub fn get_crlf(&self) -> bool { + self.crlf + } + + /// Returns the line terminator in this syntax configuration. + pub fn get_line_terminator(&self) -> u8 { + self.line_terminator + } + + /// Returns whether "swap greed" mode is enabled. + pub fn get_swap_greed(&self) -> bool { + self.swap_greed + } + + /// Returns whether "ignore whitespace" mode is enabled. + pub fn get_ignore_whitespace(&self) -> bool { + self.ignore_whitespace + } + + /// Returns whether UTF-8 mode is enabled. + pub fn get_utf8(&self) -> bool { + self.utf8 + } + + /// Returns the "nest limit" setting. + pub fn get_nest_limit(&self) -> u32 { + self.nest_limit + } + + /// Returns whether "octal" mode is enabled. + pub fn get_octal(&self) -> bool { + self.octal + } + + /// Applies this configuration to the given parser. + pub(crate) fn apply(&self, builder: &mut ParserBuilder) { + builder + .unicode(self.unicode) + .case_insensitive(self.case_insensitive) + .multi_line(self.multi_line) + .dot_matches_new_line(self.dot_matches_new_line) + .crlf(self.crlf) + .line_terminator(self.line_terminator) + .swap_greed(self.swap_greed) + .ignore_whitespace(self.ignore_whitespace) + .utf8(self.utf8) + .nest_limit(self.nest_limit) + .octal(self.octal); + } + + /// Applies this configuration to the given AST parser. + pub(crate) fn apply_ast(&self, builder: &mut ast::parse::ParserBuilder) { + builder + .ignore_whitespace(self.ignore_whitespace) + .nest_limit(self.nest_limit) + .octal(self.octal); + } + + /// Applies this configuration to the given AST-to-HIR translator. + pub(crate) fn apply_hir( + &self, + builder: &mut hir::translate::TranslatorBuilder, + ) { + builder + .unicode(self.unicode) + .case_insensitive(self.case_insensitive) + .multi_line(self.multi_line) + .crlf(self.crlf) + .dot_matches_new_line(self.dot_matches_new_line) + .line_terminator(self.line_terminator) + .swap_greed(self.swap_greed) + .utf8(self.utf8); + } +} + +impl Default for Config { + fn default() -> Config { + Config::new() + } +} diff --git a/vendor/regex-automata/src/util/unicode_data/mod.rs b/vendor/regex-automata/src/util/unicode_data/mod.rs new file mode 100644 index 0000000..fc7b1c7 --- /dev/null +++ b/vendor/regex-automata/src/util/unicode_data/mod.rs @@ -0,0 +1,17 @@ +// This cfg should match the one in src/util/look.rs that uses perl_word. +#[cfg(all( + // We have to explicitly want to support Unicode word boundaries. + feature = "unicode-word-boundary", + not(all( + // If we don't have regex-syntax at all, then we definitely need to + // bring our own \w data table. + feature = "syntax", + // If unicode-perl is enabled, then regex-syntax/unicode-perl is + // also enabled, which in turn means we can use regex-syntax's + // is_word_character routine (and thus use its data tables). But if + // unicode-perl is not enabled, even if syntax is, then we need to + // bring our own. + feature = "unicode-perl", + )), +))] +pub(crate) mod perl_word; diff --git a/vendor/regex-automata/src/util/unicode_data/perl_word.rs b/vendor/regex-automata/src/util/unicode_data/perl_word.rs new file mode 100644 index 0000000..74d6265 --- /dev/null +++ b/vendor/regex-automata/src/util/unicode_data/perl_word.rs @@ -0,0 +1,781 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// ucd-generate perl-word tmp/ucd-15.0.0/ --chars +// +// Unicode version: 15.0.0. +// +// ucd-generate 0.2.15 is available on crates.io. + +pub const PERL_WORD: &'static [(char, char)] = &[ + ('0', '9'), + ('A', 'Z'), + ('_', '_'), + ('a', 'z'), + ('ª', 'ª'), + ('µ', 'µ'), + ('º', 'º'), + ('À', 'Ö'), + ('Ø', 'ö'), + ('ø', 'ˁ'), + ('ˆ', 'ˑ'), + ('ˠ', 'ˤ'), + ('ˬ', 'ˬ'), + ('ˮ', 'ˮ'), + ('\u{300}', 'ʹ'), + ('Ͷ', 'ͷ'), + ('ͺ', 'ͽ'), + ('Ϳ', 'Ϳ'), + ('Ά', 'Ά'), + ('Έ', 'Ί'), + ('Ό', 'Ό'), + ('Ύ', 'Ρ'), + ('Σ', 'ϵ'), + ('Ϸ', 'ҁ'), + ('\u{483}', 'ԯ'), + ('Ա', 'Ֆ'), + ('ՙ', 'ՙ'), + ('ՠ', 'ֈ'), + ('\u{591}', '\u{5bd}'), + ('\u{5bf}', '\u{5bf}'), + ('\u{5c1}', '\u{5c2}'), + ('\u{5c4}', '\u{5c5}'), + ('\u{5c7}', '\u{5c7}'), + ('א', 'ת'), + ('ׯ', 'ײ'), + ('\u{610}', '\u{61a}'), + ('ؠ', '٩'), + ('ٮ', 'ۓ'), + ('ە', '\u{6dc}'), + ('\u{6df}', '\u{6e8}'), + ('\u{6ea}', 'ۼ'), + ('ۿ', 'ۿ'), + ('ܐ', '\u{74a}'), + ('ݍ', 'ޱ'), + ('߀', 'ߵ'), + ('ߺ', 'ߺ'), + ('\u{7fd}', '\u{7fd}'), + ('ࠀ', '\u{82d}'), + ('ࡀ', '\u{85b}'), + ('ࡠ', 'ࡪ'), + ('ࡰ', 'ࢇ'), + ('ࢉ', 'ࢎ'), + ('\u{898}', '\u{8e1}'), + ('\u{8e3}', '\u{963}'), + ('०', '९'), + ('ॱ', 'ঃ'), + ('অ', 'ঌ'), + ('এ', 'ঐ'), + ('ও', 'ন'), + ('প', 'র'), + ('ল', 'ল'), + ('শ', 'হ'), + ('\u{9bc}', '\u{9c4}'), + ('ে', 'ৈ'), + ('ো', 'ৎ'), + ('\u{9d7}', '\u{9d7}'), + ('ড়', 'ঢ়'), + ('য়', '\u{9e3}'), + ('০', 'ৱ'), + ('ৼ', 'ৼ'), + ('\u{9fe}', '\u{9fe}'), + ('\u{a01}', 'ਃ'), + ('ਅ', 'ਊ'), + ('ਏ', 'ਐ'), + ('ਓ', 'ਨ'), + ('ਪ', 'ਰ'), + ('ਲ', 'ਲ਼'), + ('ਵ', 'ਸ਼'), + ('ਸ', 'ਹ'), + ('\u{a3c}', '\u{a3c}'), + ('ਾ', '\u{a42}'), + ('\u{a47}', '\u{a48}'), + ('\u{a4b}', '\u{a4d}'), + ('\u{a51}', '\u{a51}'), + ('ਖ਼', 'ੜ'), + ('ਫ਼', 'ਫ਼'), + ('੦', '\u{a75}'), + ('\u{a81}', 'ઃ'), + ('અ', 'ઍ'), + ('એ', 'ઑ'), + ('ઓ', 'ન'), + ('પ', 'ર'), + ('લ', 'ળ'), + ('વ', 'હ'), + ('\u{abc}', '\u{ac5}'), + ('\u{ac7}', 'ૉ'), + ('ો', '\u{acd}'), + ('ૐ', 'ૐ'), + ('ૠ', '\u{ae3}'), + ('૦', '૯'), + ('ૹ', '\u{aff}'), + ('\u{b01}', 'ଃ'), + ('ଅ', 'ଌ'), + ('ଏ', 'ଐ'), + ('ଓ', 'ନ'), + ('ପ', 'ର'), + ('ଲ', 'ଳ'), + ('ଵ', 'ହ'), + ('\u{b3c}', '\u{b44}'), + ('େ', 'ୈ'), + ('ୋ', '\u{b4d}'), + ('\u{b55}', '\u{b57}'), + ('ଡ଼', 'ଢ଼'), + ('ୟ', '\u{b63}'), + ('୦', '୯'), + ('ୱ', 'ୱ'), + ('\u{b82}', 'ஃ'), + ('அ', 'ஊ'), + ('எ', 'ஐ'), + ('ஒ', 'க'), + ('ங', 'ச'), + ('ஜ', 'ஜ'), + ('ஞ', 'ட'), + ('ண', 'த'), + ('ந', 'ப'), + ('ம', 'ஹ'), + ('\u{bbe}', 'ூ'), + ('ெ', 'ை'), + ('ொ', '\u{bcd}'), + ('ௐ', 'ௐ'), + ('\u{bd7}', '\u{bd7}'), + ('௦', '௯'), + ('\u{c00}', 'ఌ'), + ('ఎ', 'ఐ'), + ('ఒ', 'న'), + ('ప', 'హ'), + ('\u{c3c}', 'ౄ'), + ('\u{c46}', '\u{c48}'), + ('\u{c4a}', '\u{c4d}'), + ('\u{c55}', '\u{c56}'), + ('ౘ', 'ౚ'), + ('ౝ', 'ౝ'), + ('ౠ', '\u{c63}'), + ('౦', '౯'), + ('ಀ', 'ಃ'), + ('ಅ', 'ಌ'), + ('ಎ', 'ಐ'), + ('ಒ', 'ನ'), + ('ಪ', 'ಳ'), + ('ವ', 'ಹ'), + ('\u{cbc}', 'ೄ'), + ('\u{cc6}', 'ೈ'), + ('ೊ', '\u{ccd}'), + ('\u{cd5}', '\u{cd6}'), + ('ೝ', 'ೞ'), + ('ೠ', '\u{ce3}'), + ('೦', '೯'), + ('ೱ', 'ೳ'), + ('\u{d00}', 'ഌ'), + ('എ', 'ഐ'), + ('ഒ', '\u{d44}'), + ('െ', 'ൈ'), + ('ൊ', 'ൎ'), + ('ൔ', '\u{d57}'), + ('ൟ', '\u{d63}'), + ('൦', '൯'), + ('ൺ', 'ൿ'), + ('\u{d81}', 'ඃ'), + ('අ', 'ඖ'), + ('ක', 'න'), + ('ඳ', 'ර'), + ('ල', 'ල'), + ('ව', 'ෆ'), + ('\u{dca}', '\u{dca}'), + ('\u{dcf}', '\u{dd4}'), + ('\u{dd6}', '\u{dd6}'), + ('ෘ', '\u{ddf}'), + ('෦', '෯'), + ('ෲ', 'ෳ'), + ('ก', '\u{e3a}'), + ('เ', '\u{e4e}'), + ('๐', '๙'), + ('ກ', 'ຂ'), + ('ຄ', 'ຄ'), + ('ຆ', 'ຊ'), + ('ຌ', 'ຣ'), + ('ລ', 'ລ'), + ('ວ', 'ຽ'), + ('ເ', 'ໄ'), + ('ໆ', 'ໆ'), + ('\u{ec8}', '\u{ece}'), + ('໐', '໙'), + ('ໜ', 'ໟ'), + ('ༀ', 'ༀ'), + ('\u{f18}', '\u{f19}'), + ('༠', '༩'), + ('\u{f35}', '\u{f35}'), + ('\u{f37}', '\u{f37}'), + ('\u{f39}', '\u{f39}'), + ('༾', 'ཇ'), + ('ཉ', 'ཬ'), + ('\u{f71}', '\u{f84}'), + ('\u{f86}', '\u{f97}'), + ('\u{f99}', '\u{fbc}'), + ('\u{fc6}', '\u{fc6}'), + ('က', '၉'), + ('ၐ', '\u{109d}'), + ('Ⴀ', 'Ⴥ'), + ('Ⴧ', 'Ⴧ'), + ('Ⴭ', 'Ⴭ'), + ('ა', 'ჺ'), + ('ჼ', 'ቈ'), + ('ቊ', 'ቍ'), + ('ቐ', 'ቖ'), + ('ቘ', 'ቘ'), + ('ቚ', 'ቝ'), + ('በ', 'ኈ'), + ('ኊ', 'ኍ'), + ('ነ', 'ኰ'), + ('ኲ', 'ኵ'), + ('ኸ', 'ኾ'), + ('ዀ', 'ዀ'), + ('ዂ', 'ዅ'), + ('ወ', 'ዖ'), + ('ዘ', 'ጐ'), + ('ጒ', 'ጕ'), + ('ጘ', 'ፚ'), + ('\u{135d}', '\u{135f}'), + ('ᎀ', 'ᎏ'), + ('Ꭰ', 'Ᏽ'), + ('ᏸ', 'ᏽ'), + ('ᐁ', 'ᙬ'), + ('ᙯ', 'ᙿ'), + ('ᚁ', 'ᚚ'), + ('ᚠ', 'ᛪ'), + ('ᛮ', 'ᛸ'), + ('ᜀ', '᜕'), + ('ᜟ', '᜴'), + ('ᝀ', '\u{1753}'), + ('ᝠ', 'ᝬ'), + ('ᝮ', 'ᝰ'), + ('\u{1772}', '\u{1773}'), + ('ក', '\u{17d3}'), + ('ៗ', 'ៗ'), + ('ៜ', '\u{17dd}'), + ('០', '៩'), + ('\u{180b}', '\u{180d}'), + ('\u{180f}', '᠙'), + ('ᠠ', 'ᡸ'), + ('ᢀ', 'ᢪ'), + ('ᢰ', 'ᣵ'), + ('ᤀ', 'ᤞ'), + ('\u{1920}', 'ᤫ'), + ('ᤰ', '\u{193b}'), + ('᥆', 'ᥭ'), + ('ᥰ', 'ᥴ'), + ('ᦀ', 'ᦫ'), + ('ᦰ', 'ᧉ'), + ('᧐', '᧙'), + ('ᨀ', '\u{1a1b}'), + ('ᨠ', '\u{1a5e}'), + ('\u{1a60}', '\u{1a7c}'), + ('\u{1a7f}', '᪉'), + ('᪐', '᪙'), + ('ᪧ', 'ᪧ'), + ('\u{1ab0}', '\u{1ace}'), + ('\u{1b00}', 'ᭌ'), + ('᭐', '᭙'), + ('\u{1b6b}', '\u{1b73}'), + ('\u{1b80}', '᯳'), + ('ᰀ', '\u{1c37}'), + ('᱀', '᱉'), + ('ᱍ', 'ᱽ'), + ('ᲀ', 'ᲈ'), + ('Ა', 'Ჺ'), + ('Ჽ', 'Ჿ'), + ('\u{1cd0}', '\u{1cd2}'), + ('\u{1cd4}', 'ᳺ'), + ('ᴀ', 'ἕ'), + ('Ἐ', 'Ἕ'), + ('ἠ', 'ὅ'), + ('Ὀ', 'Ὅ'), + ('ὐ', 'ὗ'), + ('Ὑ', 'Ὑ'), + ('Ὓ', 'Ὓ'), + ('Ὕ', 'Ὕ'), + ('Ὗ', 'ώ'), + ('ᾀ', 'ᾴ'), + ('ᾶ', 'ᾼ'), + ('ι', 'ι'), + ('ῂ', 'ῄ'), + ('ῆ', 'ῌ'), + ('ῐ', 'ΐ'), + ('ῖ', 'Ί'), + ('ῠ', 'Ῥ'), + ('ῲ', 'ῴ'), + ('ῶ', 'ῼ'), + ('\u{200c}', '\u{200d}'), + ('‿', '⁀'), + ('⁔', '⁔'), + ('ⁱ', 'ⁱ'), + ('ⁿ', 'ⁿ'), + ('ₐ', 'ₜ'), + ('\u{20d0}', '\u{20f0}'), + ('ℂ', 'ℂ'), + ('ℇ', 'ℇ'), + ('ℊ', 'ℓ'), + ('ℕ', 'ℕ'), + ('ℙ', 'ℝ'), + ('ℤ', 'ℤ'), + ('Ω', 'Ω'), + ('ℨ', 'ℨ'), + ('K', 'ℭ'), + ('ℯ', 'ℹ'), + ('ℼ', 'ℿ'), + ('ⅅ', 'ⅉ'), + ('ⅎ', 'ⅎ'), + ('Ⅰ', 'ↈ'), + ('Ⓐ', 'ⓩ'), + ('Ⰰ', 'ⳤ'), + ('Ⳬ', 'ⳳ'), + ('ⴀ', 'ⴥ'), + ('ⴧ', 'ⴧ'), + ('ⴭ', 'ⴭ'), + ('ⴰ', 'ⵧ'), + ('ⵯ', 'ⵯ'), + ('\u{2d7f}', 'ⶖ'), + ('ⶠ', 'ⶦ'), + ('ⶨ', 'ⶮ'), + ('ⶰ', 'ⶶ'), + ('ⶸ', 'ⶾ'), + ('ⷀ', 'ⷆ'), + ('ⷈ', 'ⷎ'), + ('ⷐ', 'ⷖ'), + ('ⷘ', 'ⷞ'), + ('\u{2de0}', '\u{2dff}'), + ('ⸯ', 'ⸯ'), + ('々', '〇'), + ('〡', '\u{302f}'), + ('〱', '〵'), + ('〸', '〼'), + ('ぁ', 'ゖ'), + ('\u{3099}', '\u{309a}'), + ('ゝ', 'ゟ'), + ('ァ', 'ヺ'), + ('ー', 'ヿ'), + ('ㄅ', 'ㄯ'), + ('ㄱ', 'ㆎ'), + ('ㆠ', 'ㆿ'), + ('ㇰ', 'ㇿ'), + ('㐀', '䶿'), + ('一', 'ꒌ'), + ('ꓐ', 'ꓽ'), + ('ꔀ', 'ꘌ'), + ('ꘐ', 'ꘫ'), + ('Ꙁ', '\u{a672}'), + ('\u{a674}', '\u{a67d}'), + ('ꙿ', '\u{a6f1}'), + ('ꜗ', 'ꜟ'), + ('Ꜣ', 'ꞈ'), + ('Ꞌ', 'ꟊ'), + ('Ꟑ', 'ꟑ'), + ('ꟓ', 'ꟓ'), + ('ꟕ', 'ꟙ'), + ('ꟲ', 'ꠧ'), + ('\u{a82c}', '\u{a82c}'), + ('ꡀ', 'ꡳ'), + ('ꢀ', '\u{a8c5}'), + ('꣐', '꣙'), + ('\u{a8e0}', 'ꣷ'), + ('ꣻ', 'ꣻ'), + ('ꣽ', '\u{a92d}'), + ('ꤰ', '꥓'), + ('ꥠ', 'ꥼ'), + ('\u{a980}', '꧀'), + ('ꧏ', '꧙'), + ('ꧠ', 'ꧾ'), + ('ꨀ', '\u{aa36}'), + ('ꩀ', 'ꩍ'), + ('꩐', '꩙'), + ('ꩠ', 'ꩶ'), + ('ꩺ', 'ꫂ'), + ('ꫛ', 'ꫝ'), + ('ꫠ', 'ꫯ'), + ('ꫲ', '\u{aaf6}'), + ('ꬁ', 'ꬆ'), + ('ꬉ', 'ꬎ'), + ('ꬑ', 'ꬖ'), + ('ꬠ', 'ꬦ'), + ('ꬨ', 'ꬮ'), + ('ꬰ', 'ꭚ'), + ('ꭜ', 'ꭩ'), + ('ꭰ', 'ꯪ'), + ('꯬', '\u{abed}'), + ('꯰', '꯹'), + ('가', '힣'), + ('ힰ', 'ퟆ'), + ('ퟋ', 'ퟻ'), + ('豈', '舘'), + ('並', '龎'), + ('ff', 'st'), + ('ﬓ', 'ﬗ'), + ('יִ', 'ﬨ'), + ('שׁ', 'זּ'), + ('טּ', 'לּ'), + ('מּ', 'מּ'), + ('נּ', 'סּ'), + ('ףּ', 'פּ'), + ('צּ', 'ﮱ'), + ('ﯓ', 'ﴽ'), + ('ﵐ', 'ﶏ'), + ('ﶒ', 'ﷇ'), + ('ﷰ', 'ﷻ'), + ('\u{fe00}', '\u{fe0f}'), + ('\u{fe20}', '\u{fe2f}'), + ('︳', '︴'), + ('﹍', '﹏'), + ('ﹰ', 'ﹴ'), + ('ﹶ', 'ﻼ'), + ('0', '9'), + ('A', 'Z'), + ('_', '_'), + ('a', 'z'), + ('ヲ', 'ᄒ'), + ('ᅡ', 'ᅦ'), + ('ᅧ', 'ᅬ'), + ('ᅭ', 'ᅲ'), + ('ᅳ', 'ᅵ'), + ('𐀀', '𐀋'), + ('𐀍', '𐀦'), + ('𐀨', '𐀺'), + ('𐀼', '𐀽'), + ('𐀿', '𐁍'), + ('𐁐', '𐁝'), + ('𐂀', '𐃺'), + ('𐅀', '𐅴'), + ('\u{101fd}', '\u{101fd}'), + ('𐊀', '𐊜'), + ('𐊠', '𐋐'), + ('\u{102e0}', '\u{102e0}'), + ('𐌀', '𐌟'), + ('𐌭', '𐍊'), + ('𐍐', '\u{1037a}'), + ('𐎀', '𐎝'), + ('𐎠', '𐏃'), + ('𐏈', '𐏏'), + ('𐏑', '𐏕'), + ('𐐀', '𐒝'), + ('𐒠', '𐒩'), + ('𐒰', '𐓓'), + ('𐓘', '𐓻'), + ('𐔀', '𐔧'), + ('𐔰', '𐕣'), + ('𐕰', '𐕺'), + ('𐕼', '𐖊'), + ('𐖌', '𐖒'), + ('𐖔', '𐖕'), + ('𐖗', '𐖡'), + ('𐖣', '𐖱'), + ('𐖳', '𐖹'), + ('𐖻', '𐖼'), + ('𐘀', '𐜶'), + ('𐝀', '𐝕'), + ('𐝠', '𐝧'), + ('𐞀', '𐞅'), + ('𐞇', '𐞰'), + ('𐞲', '𐞺'), + ('𐠀', '𐠅'), + ('𐠈', '𐠈'), + ('𐠊', '𐠵'), + ('𐠷', '𐠸'), + ('𐠼', '𐠼'), + ('𐠿', '𐡕'), + ('𐡠', '𐡶'), + ('𐢀', '𐢞'), + ('𐣠', '𐣲'), + ('𐣴', '𐣵'), + ('𐤀', '𐤕'), + ('𐤠', '𐤹'), + ('𐦀', '𐦷'), + ('𐦾', '𐦿'), + ('𐨀', '\u{10a03}'), + ('\u{10a05}', '\u{10a06}'), + ('\u{10a0c}', '𐨓'), + ('𐨕', '𐨗'), + ('𐨙', '𐨵'), + ('\u{10a38}', '\u{10a3a}'), + ('\u{10a3f}', '\u{10a3f}'), + ('𐩠', '𐩼'), + ('𐪀', '𐪜'), + ('𐫀', '𐫇'), + ('𐫉', '\u{10ae6}'), + ('𐬀', '𐬵'), + ('𐭀', '𐭕'), + ('𐭠', '𐭲'), + ('𐮀', '𐮑'), + ('𐰀', '𐱈'), + ('𐲀', '𐲲'), + ('𐳀', '𐳲'), + ('𐴀', '\u{10d27}'), + ('𐴰', '𐴹'), + ('𐺀', '𐺩'), + ('\u{10eab}', '\u{10eac}'), + ('𐺰', '𐺱'), + ('\u{10efd}', '𐼜'), + ('𐼧', '𐼧'), + ('𐼰', '\u{10f50}'), + ('𐽰', '\u{10f85}'), + ('𐾰', '𐿄'), + ('𐿠', '𐿶'), + ('𑀀', '\u{11046}'), + ('𑁦', '𑁵'), + ('\u{1107f}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), + ('𑃐', '𑃨'), + ('𑃰', '𑃹'), + ('\u{11100}', '\u{11134}'), + ('𑄶', '𑄿'), + ('𑅄', '𑅇'), + ('𑅐', '\u{11173}'), + ('𑅶', '𑅶'), + ('\u{11180}', '𑇄'), + ('\u{111c9}', '\u{111cc}'), + ('𑇎', '𑇚'), + ('𑇜', '𑇜'), + ('𑈀', '𑈑'), + ('𑈓', '\u{11237}'), + ('\u{1123e}', '\u{11241}'), + ('𑊀', '𑊆'), + ('𑊈', '𑊈'), + ('𑊊', '𑊍'), + ('𑊏', '𑊝'), + ('𑊟', '𑊨'), + ('𑊰', '\u{112ea}'), + ('𑋰', '𑋹'), + ('\u{11300}', '𑌃'), + ('𑌅', '𑌌'), + ('𑌏', '𑌐'), + ('𑌓', '𑌨'), + ('𑌪', '𑌰'), + ('𑌲', '𑌳'), + ('𑌵', '𑌹'), + ('\u{1133b}', '𑍄'), + ('𑍇', '𑍈'), + ('𑍋', '𑍍'), + ('𑍐', '𑍐'), + ('\u{11357}', '\u{11357}'), + ('𑍝', '𑍣'), + ('\u{11366}', '\u{1136c}'), + ('\u{11370}', '\u{11374}'), + ('𑐀', '𑑊'), + ('𑑐', '𑑙'), + ('\u{1145e}', '𑑡'), + ('𑒀', '𑓅'), + ('𑓇', '𑓇'), + ('𑓐', '𑓙'), + ('𑖀', '\u{115b5}'), + ('𑖸', '\u{115c0}'), + ('𑗘', '\u{115dd}'), + ('𑘀', '\u{11640}'), + ('𑙄', '𑙄'), + ('𑙐', '𑙙'), + ('𑚀', '𑚸'), + ('𑛀', '𑛉'), + ('𑜀', '𑜚'), + ('\u{1171d}', '\u{1172b}'), + ('𑜰', '𑜹'), + ('𑝀', '𑝆'), + ('𑠀', '\u{1183a}'), + ('𑢠', '𑣩'), + ('𑣿', '𑤆'), + ('𑤉', '𑤉'), + ('𑤌', '𑤓'), + ('𑤕', '𑤖'), + ('𑤘', '𑤵'), + ('𑤷', '𑤸'), + ('\u{1193b}', '\u{11943}'), + ('𑥐', '𑥙'), + ('𑦠', '𑦧'), + ('𑦪', '\u{119d7}'), + ('\u{119da}', '𑧡'), + ('𑧣', '𑧤'), + ('𑨀', '\u{11a3e}'), + ('\u{11a47}', '\u{11a47}'), + ('𑩐', '\u{11a99}'), + ('𑪝', '𑪝'), + ('𑪰', '𑫸'), + ('𑰀', '𑰈'), + ('𑰊', '\u{11c36}'), + ('\u{11c38}', '𑱀'), + ('𑱐', '𑱙'), + ('𑱲', '𑲏'), + ('\u{11c92}', '\u{11ca7}'), + ('𑲩', '\u{11cb6}'), + ('𑴀', '𑴆'), + ('𑴈', '𑴉'), + ('𑴋', '\u{11d36}'), + ('\u{11d3a}', '\u{11d3a}'), + ('\u{11d3c}', '\u{11d3d}'), + ('\u{11d3f}', '\u{11d47}'), + ('𑵐', '𑵙'), + ('𑵠', '𑵥'), + ('𑵧', '𑵨'), + ('𑵪', '𑶎'), + ('\u{11d90}', '\u{11d91}'), + ('𑶓', '𑶘'), + ('𑶠', '𑶩'), + ('𑻠', '𑻶'), + ('\u{11f00}', '𑼐'), + ('𑼒', '\u{11f3a}'), + ('𑼾', '\u{11f42}'), + ('𑽐', '𑽙'), + ('𑾰', '𑾰'), + ('𒀀', '𒎙'), + ('𒐀', '𒑮'), + ('𒒀', '𒕃'), + ('𒾐', '𒿰'), + ('𓀀', '𓐯'), + ('\u{13440}', '\u{13455}'), + ('𔐀', '𔙆'), + ('𖠀', '𖨸'), + ('𖩀', '𖩞'), + ('𖩠', '𖩩'), + ('𖩰', '𖪾'), + ('𖫀', '𖫉'), + ('𖫐', '𖫭'), + ('\u{16af0}', '\u{16af4}'), + ('𖬀', '\u{16b36}'), + ('𖭀', '𖭃'), + ('𖭐', '𖭙'), + ('𖭣', '𖭷'), + ('𖭽', '𖮏'), + ('𖹀', '𖹿'), + ('𖼀', '𖽊'), + ('\u{16f4f}', '𖾇'), + ('\u{16f8f}', '𖾟'), + ('𖿠', '𖿡'), + ('𖿣', '\u{16fe4}'), + ('𖿰', '𖿱'), + ('𗀀', '𘟷'), + ('𘠀', '𘳕'), + ('𘴀', '𘴈'), + ('𚿰', '𚿳'), + ('𚿵', '𚿻'), + ('𚿽', '𚿾'), + ('𛀀', '𛄢'), + ('𛄲', '𛄲'), + ('𛅐', '𛅒'), + ('𛅕', '𛅕'), + ('𛅤', '𛅧'), + ('𛅰', '𛋻'), + ('𛰀', '𛱪'), + ('𛱰', '𛱼'), + ('𛲀', '𛲈'), + ('𛲐', '𛲙'), + ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), + ('\u{1d165}', '\u{1d169}'), + ('𝅭', '\u{1d172}'), + ('\u{1d17b}', '\u{1d182}'), + ('\u{1d185}', '\u{1d18b}'), + ('\u{1d1aa}', '\u{1d1ad}'), + ('\u{1d242}', '\u{1d244}'), + ('𝐀', '𝑔'), + ('𝑖', '𝒜'), + ('𝒞', '𝒟'), + ('𝒢', '𝒢'), + ('𝒥', '𝒦'), + ('𝒩', '𝒬'), + ('𝒮', '𝒹'), + ('𝒻', '𝒻'), + ('𝒽', '𝓃'), + ('𝓅', '𝔅'), + ('𝔇', '𝔊'), + ('𝔍', '𝔔'), + ('𝔖', '𝔜'), + ('𝔞', '𝔹'), + ('𝔻', '𝔾'), + ('𝕀', '𝕄'), + ('𝕆', '𝕆'), + ('𝕊', '𝕐'), + ('𝕒', '𝚥'), + ('𝚨', '𝛀'), + ('𝛂', '𝛚'), + ('𝛜', '𝛺'), + ('𝛼', '𝜔'), + ('𝜖', '𝜴'), + ('𝜶', '𝝎'), + ('𝝐', '𝝮'), + ('𝝰', '𝞈'), + ('𝞊', '𝞨'), + ('𝞪', '𝟂'), + ('𝟄', '𝟋'), + ('𝟎', '𝟿'), + ('\u{1da00}', '\u{1da36}'), + ('\u{1da3b}', '\u{1da6c}'), + ('\u{1da75}', '\u{1da75}'), + ('\u{1da84}', '\u{1da84}'), + ('\u{1da9b}', '\u{1da9f}'), + ('\u{1daa1}', '\u{1daaf}'), + ('𝼀', '𝼞'), + ('𝼥', '𝼪'), + ('\u{1e000}', '\u{1e006}'), + ('\u{1e008}', '\u{1e018}'), + ('\u{1e01b}', '\u{1e021}'), + ('\u{1e023}', '\u{1e024}'), + ('\u{1e026}', '\u{1e02a}'), + ('𞀰', '𞁭'), + ('\u{1e08f}', '\u{1e08f}'), + ('𞄀', '𞄬'), + ('\u{1e130}', '𞄽'), + ('𞅀', '𞅉'), + ('𞅎', '𞅎'), + ('𞊐', '\u{1e2ae}'), + ('𞋀', '𞋹'), + ('𞓐', '𞓹'), + ('𞟠', '𞟦'), + ('𞟨', '𞟫'), + ('𞟭', '𞟮'), + ('𞟰', '𞟾'), + ('𞠀', '𞣄'), + ('\u{1e8d0}', '\u{1e8d6}'), + ('𞤀', '𞥋'), + ('𞥐', '𞥙'), + ('𞸀', '𞸃'), + ('𞸅', '𞸟'), + ('𞸡', '𞸢'), + ('𞸤', '𞸤'), + ('𞸧', '𞸧'), + ('𞸩', '𞸲'), + ('𞸴', '𞸷'), + ('𞸹', '𞸹'), + ('𞸻', '𞸻'), + ('𞹂', '𞹂'), + ('𞹇', '𞹇'), + ('𞹉', '𞹉'), + ('𞹋', '𞹋'), + ('𞹍', '𞹏'), + ('𞹑', '𞹒'), + ('𞹔', '𞹔'), + ('𞹗', '𞹗'), + ('𞹙', '𞹙'), + ('𞹛', '𞹛'), + ('𞹝', '𞹝'), + ('𞹟', '𞹟'), + ('𞹡', '𞹢'), + ('𞹤', '𞹤'), + ('𞹧', '𞹪'), + ('𞹬', '𞹲'), + ('𞹴', '𞹷'), + ('𞹹', '𞹼'), + ('𞹾', '𞹾'), + ('𞺀', '𞺉'), + ('𞺋', '𞺛'), + ('𞺡', '𞺣'), + ('𞺥', '𞺩'), + ('𞺫', '𞺻'), + ('🄰', '🅉'), + ('🅐', '🅩'), + ('🅰', '🆉'), + ('🯰', '🯹'), + ('𠀀', '𪛟'), + ('𪜀', '𫜹'), + ('𫝀', '𫠝'), + ('𫠠', '𬺡'), + ('𬺰', '𮯠'), + ('丽', '𪘀'), + ('𰀀', '𱍊'), + ('𱍐', '𲎯'), + ('\u{e0100}', '\u{e01ef}'), +]; diff --git a/vendor/regex-automata/src/util/utf8.rs b/vendor/regex-automata/src/util/utf8.rs new file mode 100644 index 0000000..91b27ef --- /dev/null +++ b/vendor/regex-automata/src/util/utf8.rs @@ -0,0 +1,196 @@ +/*! +Utilities for dealing with UTF-8. + +This module provides some UTF-8 related helper routines, including an +incremental decoder. +*/ + +/// Returns true if and only if the given byte is considered a word character. +/// This only applies to ASCII. +/// +/// This was copied from regex-syntax so that we can use it to determine the +/// starting DFA state while searching without depending on regex-syntax. The +/// definition is never going to change, so there's no maintenance/bit-rot +/// hazard here. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn is_word_byte(b: u8) -> bool { + const fn mkwordset() -> [bool; 256] { + // FIXME: Use as_usize() once const functions in traits are stable. + let mut set = [false; 256]; + set[b'_' as usize] = true; + + let mut byte = b'0'; + while byte <= b'9' { + set[byte as usize] = true; + byte += 1; + } + byte = b'A'; + while byte <= b'Z' { + set[byte as usize] = true; + byte += 1; + } + byte = b'a'; + while byte <= b'z' { + set[byte as usize] = true; + byte += 1; + } + set + } + const WORD: [bool; 256] = mkwordset(); + WORD[b as usize] +} + +/// Decodes the next UTF-8 encoded codepoint from the given byte slice. +/// +/// If no valid encoding of a codepoint exists at the beginning of the given +/// byte slice, then the first byte is returned instead. +/// +/// This returns `None` if and only if `bytes` is empty. +/// +/// This never panics. +/// +/// *WARNING*: This is not designed for performance. If you're looking for a +/// fast UTF-8 decoder, this is not it. If you feel like you need one in this +/// crate, then please file an issue and discuss your use case. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn decode(bytes: &[u8]) -> Option> { + if bytes.is_empty() { + return None; + } + let len = match len(bytes[0]) { + None => return Some(Err(bytes[0])), + Some(len) if len > bytes.len() => return Some(Err(bytes[0])), + Some(1) => return Some(Ok(char::from(bytes[0]))), + Some(len) => len, + }; + match core::str::from_utf8(&bytes[..len]) { + Ok(s) => Some(Ok(s.chars().next().unwrap())), + Err(_) => Some(Err(bytes[0])), + } +} + +/// Decodes the last UTF-8 encoded codepoint from the given byte slice. +/// +/// If no valid encoding of a codepoint exists at the end of the given byte +/// slice, then the last byte is returned instead. +/// +/// This returns `None` if and only if `bytes` is empty. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn decode_last(bytes: &[u8]) -> Option> { + if bytes.is_empty() { + return None; + } + let mut start = bytes.len() - 1; + let limit = bytes.len().saturating_sub(4); + while start > limit && !is_leading_or_invalid_byte(bytes[start]) { + start -= 1; + } + match decode(&bytes[start..]) { + None => None, + Some(Ok(ch)) => Some(Ok(ch)), + Some(Err(_)) => Some(Err(bytes[bytes.len() - 1])), + } +} + +/// Given a UTF-8 leading byte, this returns the total number of code units +/// in the following encoded codepoint. +/// +/// If the given byte is not a valid UTF-8 leading byte, then this returns +/// `None`. +#[cfg_attr(feature = "perf-inline", inline(always))] +fn len(byte: u8) -> Option { + if byte <= 0x7F { + return Some(1); + } else if byte & 0b1100_0000 == 0b1000_0000 { + return None; + } else if byte <= 0b1101_1111 { + Some(2) + } else if byte <= 0b1110_1111 { + Some(3) + } else if byte <= 0b1111_0111 { + Some(4) + } else { + None + } +} + +/// Returns true if and only if the given offset in the given bytes falls on a +/// valid UTF-8 encoded codepoint boundary. +/// +/// If `bytes` is not valid UTF-8, then the behavior of this routine is +/// unspecified. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn is_boundary(bytes: &[u8], i: usize) -> bool { + match bytes.get(i) { + // The position at the end of the bytes always represents an empty + // string, which is a valid boundary. But anything after that doesn't + // make much sense to call valid a boundary. + None => i == bytes.len(), + // Other than ASCII (where the most significant bit is never set), + // valid starting bytes always have their most significant two bits + // set, where as continuation bytes never have their second most + // significant bit set. Therefore, this only returns true when bytes[i] + // corresponds to a byte that begins a valid UTF-8 encoding of a + // Unicode scalar value. + Some(&b) => b <= 0b0111_1111 || b >= 0b1100_0000, + } +} + +/// Returns true if and only if the given byte is either a valid leading UTF-8 +/// byte, or is otherwise an invalid byte that can never appear anywhere in a +/// valid UTF-8 sequence. +#[cfg_attr(feature = "perf-inline", inline(always))] +fn is_leading_or_invalid_byte(b: u8) -> bool { + // In the ASCII case, the most significant bit is never set. The leading + // byte of a 2/3/4-byte sequence always has the top two most significant + // bits set. For bytes that can never appear anywhere in valid UTF-8, this + // also returns true, since every such byte has its two most significant + // bits set: + // + // \xC0 :: 11000000 + // \xC1 :: 11000001 + // \xF5 :: 11110101 + // \xF6 :: 11110110 + // \xF7 :: 11110111 + // \xF8 :: 11111000 + // \xF9 :: 11111001 + // \xFA :: 11111010 + // \xFB :: 11111011 + // \xFC :: 11111100 + // \xFD :: 11111101 + // \xFE :: 11111110 + // \xFF :: 11111111 + (b & 0b1100_0000) != 0b1000_0000 +} + +/* +/// Returns the smallest possible index of the next valid UTF-8 sequence +/// starting after `i`. +/// +/// For all inputs, including invalid UTF-8 and any value of `i`, the return +/// value is guaranteed to be greater than `i`. (If there is no value greater +/// than `i` that fits in `usize`, then this panics.) +/// +/// Generally speaking, this should only be called on `text` when it is +/// permitted to assume that it is valid UTF-8 and where either `i >= +/// text.len()` or where `text[i]` is a leading byte of a UTF-8 sequence. +/// +/// NOTE: This method was used in a previous conception of iterators where we +/// specifically tried to skip over empty matches that split a codepoint by +/// simply requiring that our next search begin at the beginning of codepoint. +/// But we ended up changing that technique to always advance by 1 byte and +/// then filter out matches that split a codepoint after-the-fact. Thus, we no +/// longer use this method. But I've kept it around in case we want to switch +/// back to this approach. Its guarantees are a little subtle, so I'd prefer +/// not to rebuild it from whole cloth. +pub(crate) fn next(text: &[u8], i: usize) -> usize { + let b = match text.get(i) { + None => return i.checked_add(1).unwrap(), + Some(&b) => b, + }; + // For cases where we see an invalid UTF-8 byte, there isn't much we can do + // other than just start at the next byte. + let inc = len(b).unwrap_or(1); + i.checked_add(inc).unwrap() +} +*/ diff --git a/vendor/regex-automata/src/util/wire.rs b/vendor/regex-automata/src/util/wire.rs new file mode 100644 index 0000000..ecf4fd8 --- /dev/null +++ b/vendor/regex-automata/src/util/wire.rs @@ -0,0 +1,975 @@ +/*! +Types and routines that support the wire format of finite automata. + +Currently, this module just exports a few error types and some small helpers +for deserializing [dense DFAs](crate::dfa::dense::DFA) using correct alignment. +*/ + +/* +A collection of helper functions, types and traits for serializing automata. + +This crate defines its own bespoke serialization mechanism for some structures +provided in the public API, namely, DFAs. A bespoke mechanism was developed +primarily because structures like automata demand a specific binary format. +Attempting to encode their rich structure in an existing serialization +format is just not feasible. Moreover, the format for each structure is +generally designed such that deserialization is cheap. More specifically, that +deserialization can be done in constant time. (The idea being that you can +embed it into your binary or mmap it, and then use it immediately.) + +In order to achieve this, the dense and sparse DFAs in this crate use an +in-memory representation that very closely corresponds to its binary serialized +form. This pervades and complicates everything, and in some cases, requires +dealing with alignment and reasoning about safety. + +This technique does have major advantages. In particular, it permits doing +the potentially costly work of compiling a finite state machine in an offline +manner, and then loading it at runtime not only without having to re-compile +the regex, but even without the code required to do the compilation. This, for +example, permits one to use a pre-compiled DFA not only in environments without +Rust's standard library, but also in environments without a heap. + +In the code below, whenever we insert some kind of padding, it's to enforce a +4-byte alignment, unless otherwise noted. Namely, u32 is the only state ID type +supported. (In a previous version of this library, DFAs were generic over the +state ID representation.) + +Also, serialization generally requires the caller to specify endianness, +where as deserialization always assumes native endianness (otherwise cheap +deserialization would be impossible). This implies that serializing a structure +generally requires serializing both its big-endian and little-endian variants, +and then loading the correct one based on the target's endianness. +*/ + +use core::{ + cmp, + convert::{TryFrom, TryInto}, + mem::size_of, +}; + +#[cfg(feature = "alloc")] +use alloc::{vec, vec::Vec}; + +use crate::util::{ + int::Pointer, + primitives::{PatternID, PatternIDError, StateID, StateIDError}, +}; + +/// A hack to align a smaller type `B` with a bigger type `T`. +/// +/// The usual use of this is with `B = [u8]` and `T = u32`. That is, +/// it permits aligning a sequence of bytes on a 4-byte boundary. This +/// is useful in contexts where one wants to embed a serialized [dense +/// DFA](crate::dfa::dense::DFA) into a Rust a program while guaranteeing the +/// alignment required for the DFA. +/// +/// See [`dense::DFA::from_bytes`](crate::dfa::dense::DFA::from_bytes) for an +/// example of how to use this type. +#[repr(C)] +#[derive(Debug)] +pub struct AlignAs { + /// A zero-sized field indicating the alignment we want. + pub _align: [T; 0], + /// A possibly non-sized field containing a sequence of bytes. + pub bytes: B, +} + +/// An error that occurs when serializing an object from this crate. +/// +/// Serialization, as used in this crate, universally refers to the process +/// of transforming a structure (like a DFA) into a custom binary format +/// represented by `&[u8]`. To this end, serialization is generally infallible. +/// However, it can fail when caller provided buffer sizes are too small. When +/// that occurs, a serialization error is reported. +/// +/// A `SerializeError` provides no introspection capabilities. Its only +/// supported operation is conversion to a human readable error message. +/// +/// This error type implements the `std::error::Error` trait only when the +/// `std` feature is enabled. Otherwise, this type is defined in all +/// configurations. +#[derive(Debug)] +pub struct SerializeError { + /// The name of the thing that a buffer is too small for. + /// + /// Currently, the only kind of serialization error is one that is + /// committed by a caller: providing a destination buffer that is too + /// small to fit the serialized object. This makes sense conceptually, + /// since every valid inhabitant of a type should be serializable. + /// + /// This is somewhat exposed in the public API of this crate. For example, + /// the `to_bytes_{big,little}_endian` APIs return a `Vec` and are + /// guaranteed to never panic or error. This is only possible because the + /// implementation guarantees that it will allocate a `Vec` that is + /// big enough. + /// + /// In summary, if a new serialization error kind needs to be added, then + /// it will need careful consideration. + what: &'static str, +} + +impl SerializeError { + pub(crate) fn buffer_too_small(what: &'static str) -> SerializeError { + SerializeError { what } + } +} + +impl core::fmt::Display for SerializeError { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "destination buffer is too small to write {}", self.what) + } +} + +#[cfg(feature = "std")] +impl std::error::Error for SerializeError {} + +/// An error that occurs when deserializing an object defined in this crate. +/// +/// Serialization, as used in this crate, universally refers to the process +/// of transforming a structure (like a DFA) into a custom binary format +/// represented by `&[u8]`. Deserialization, then, refers to the process of +/// cheaply converting this binary format back to the object's in-memory +/// representation as defined in this crate. To the extent possible, +/// deserialization will report this error whenever this process fails. +/// +/// A `DeserializeError` provides no introspection capabilities. Its only +/// supported operation is conversion to a human readable error message. +/// +/// This error type implements the `std::error::Error` trait only when the +/// `std` feature is enabled. Otherwise, this type is defined in all +/// configurations. +#[derive(Debug)] +pub struct DeserializeError(DeserializeErrorKind); + +#[derive(Debug)] +enum DeserializeErrorKind { + Generic { msg: &'static str }, + BufferTooSmall { what: &'static str }, + InvalidUsize { what: &'static str }, + VersionMismatch { expected: u32, found: u32 }, + EndianMismatch { expected: u32, found: u32 }, + AlignmentMismatch { alignment: usize, address: usize }, + LabelMismatch { expected: &'static str }, + ArithmeticOverflow { what: &'static str }, + PatternID { err: PatternIDError, what: &'static str }, + StateID { err: StateIDError, what: &'static str }, +} + +impl DeserializeError { + pub(crate) fn generic(msg: &'static str) -> DeserializeError { + DeserializeError(DeserializeErrorKind::Generic { msg }) + } + + pub(crate) fn buffer_too_small(what: &'static str) -> DeserializeError { + DeserializeError(DeserializeErrorKind::BufferTooSmall { what }) + } + + fn invalid_usize(what: &'static str) -> DeserializeError { + DeserializeError(DeserializeErrorKind::InvalidUsize { what }) + } + + fn version_mismatch(expected: u32, found: u32) -> DeserializeError { + DeserializeError(DeserializeErrorKind::VersionMismatch { + expected, + found, + }) + } + + fn endian_mismatch(expected: u32, found: u32) -> DeserializeError { + DeserializeError(DeserializeErrorKind::EndianMismatch { + expected, + found, + }) + } + + fn alignment_mismatch( + alignment: usize, + address: usize, + ) -> DeserializeError { + DeserializeError(DeserializeErrorKind::AlignmentMismatch { + alignment, + address, + }) + } + + fn label_mismatch(expected: &'static str) -> DeserializeError { + DeserializeError(DeserializeErrorKind::LabelMismatch { expected }) + } + + fn arithmetic_overflow(what: &'static str) -> DeserializeError { + DeserializeError(DeserializeErrorKind::ArithmeticOverflow { what }) + } + + fn pattern_id_error( + err: PatternIDError, + what: &'static str, + ) -> DeserializeError { + DeserializeError(DeserializeErrorKind::PatternID { err, what }) + } + + pub(crate) fn state_id_error( + err: StateIDError, + what: &'static str, + ) -> DeserializeError { + DeserializeError(DeserializeErrorKind::StateID { err, what }) + } +} + +#[cfg(feature = "std")] +impl std::error::Error for DeserializeError {} + +impl core::fmt::Display for DeserializeError { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + use self::DeserializeErrorKind::*; + + match self.0 { + Generic { msg } => write!(f, "{}", msg), + BufferTooSmall { what } => { + write!(f, "buffer is too small to read {}", what) + } + InvalidUsize { what } => { + write!(f, "{} is too big to fit in a usize", what) + } + VersionMismatch { expected, found } => write!( + f, + "unsupported version: \ + expected version {} but found version {}", + expected, found, + ), + EndianMismatch { expected, found } => write!( + f, + "endianness mismatch: expected 0x{:X} but got 0x{:X}. \ + (Are you trying to load an object serialized with a \ + different endianness?)", + expected, found, + ), + AlignmentMismatch { alignment, address } => write!( + f, + "alignment mismatch: slice starts at address \ + 0x{:X}, which is not aligned to a {} byte boundary", + address, alignment, + ), + LabelMismatch { expected } => write!( + f, + "label mismatch: start of serialized object should \ + contain a NUL terminated {:?} label, but a different \ + label was found", + expected, + ), + ArithmeticOverflow { what } => { + write!(f, "arithmetic overflow for {}", what) + } + PatternID { ref err, what } => { + write!(f, "failed to read pattern ID for {}: {}", what, err) + } + StateID { ref err, what } => { + write!(f, "failed to read state ID for {}: {}", what, err) + } + } + } +} + +/// Safely converts a `&[u32]` to `&[StateID]` with zero cost. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn u32s_to_state_ids(slice: &[u32]) -> &[StateID] { + // SAFETY: This is safe because StateID is defined to have the same memory + // representation as a u32 (it is repr(transparent)). While not every u32 + // is a "valid" StateID, callers are not permitted to rely on the validity + // of StateIDs for memory safety. It can only lead to logical errors. (This + // is why StateID::new_unchecked is safe.) + unsafe { + core::slice::from_raw_parts( + slice.as_ptr().cast::(), + slice.len(), + ) + } +} + +/// Safely converts a `&mut [u32]` to `&mut [StateID]` with zero cost. +pub(crate) fn u32s_to_state_ids_mut(slice: &mut [u32]) -> &mut [StateID] { + // SAFETY: This is safe because StateID is defined to have the same memory + // representation as a u32 (it is repr(transparent)). While not every u32 + // is a "valid" StateID, callers are not permitted to rely on the validity + // of StateIDs for memory safety. It can only lead to logical errors. (This + // is why StateID::new_unchecked is safe.) + unsafe { + core::slice::from_raw_parts_mut( + slice.as_mut_ptr().cast::(), + slice.len(), + ) + } +} + +/// Safely converts a `&[u32]` to `&[PatternID]` with zero cost. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn u32s_to_pattern_ids(slice: &[u32]) -> &[PatternID] { + // SAFETY: This is safe because PatternID is defined to have the same + // memory representation as a u32 (it is repr(transparent)). While not + // every u32 is a "valid" PatternID, callers are not permitted to rely + // on the validity of PatternIDs for memory safety. It can only lead to + // logical errors. (This is why PatternID::new_unchecked is safe.) + unsafe { + core::slice::from_raw_parts( + slice.as_ptr().cast::(), + slice.len(), + ) + } +} + +/// Checks that the given slice has an alignment that matches `T`. +/// +/// This is useful for checking that a slice has an appropriate alignment +/// before casting it to a &[T]. Note though that alignment is not itself +/// sufficient to perform the cast for any `T`. +pub(crate) fn check_alignment( + slice: &[u8], +) -> Result<(), DeserializeError> { + let alignment = core::mem::align_of::(); + let address = slice.as_ptr().as_usize(); + if address % alignment == 0 { + return Ok(()); + } + Err(DeserializeError::alignment_mismatch(alignment, address)) +} + +/// Reads a possibly empty amount of padding, up to 7 bytes, from the beginning +/// of the given slice. All padding bytes must be NUL bytes. +/// +/// This is useful because it can be theoretically necessary to pad the +/// beginning of a serialized object with NUL bytes to ensure that it starts +/// at a correctly aligned address. These padding bytes should come immediately +/// before the label. +/// +/// This returns the number of bytes read from the given slice. +pub(crate) fn skip_initial_padding(slice: &[u8]) -> usize { + let mut nread = 0; + while nread < 7 && nread < slice.len() && slice[nread] == 0 { + nread += 1; + } + nread +} + +/// Allocate a byte buffer of the given size, along with some initial padding +/// such that `buf[padding..]` has the same alignment as `T`, where the +/// alignment of `T` must be at most `8`. In particular, callers should treat +/// the first N bytes (second return value) as padding bytes that must not be +/// overwritten. In all cases, the following identity holds: +/// +/// ```ignore +/// let (buf, padding) = alloc_aligned_buffer::(SIZE); +/// assert_eq!(SIZE, buf[padding..].len()); +/// ``` +/// +/// In practice, padding is often zero. +/// +/// The requirement for `8` as a maximum here is somewhat arbitrary. In +/// practice, we never need anything bigger in this crate, and so this function +/// does some sanity asserts under the assumption of a max alignment of `8`. +#[cfg(feature = "alloc")] +pub(crate) fn alloc_aligned_buffer(size: usize) -> (Vec, usize) { + // NOTE: This is a kludge because there's no easy way to allocate a Vec + // with an alignment guaranteed to be greater than 1. We could create a + // Vec, but this cannot be safely transmuted to a Vec without + // concern, since reallocing or dropping the Vec is UB (different + // alignment than the initial allocation). We could define a wrapper type + // to manage this for us, but it seems like more machinery than it's worth. + let buf = vec![0; size]; + let align = core::mem::align_of::(); + let address = buf.as_ptr().as_usize(); + if address % align == 0 { + return (buf, 0); + } + // Let's try this again. We have to create a totally new alloc with + // the maximum amount of bytes we might need. We can't just extend our + // pre-existing 'buf' because that might create a new alloc with a + // different alignment. + let extra = align - 1; + let mut buf = vec![0; size + extra]; + let address = buf.as_ptr().as_usize(); + // The code below handles the case where 'address' is aligned to T, so if + // we got lucky and 'address' is now aligned to T (when it previously + // wasn't), then we're done. + if address % align == 0 { + buf.truncate(size); + return (buf, 0); + } + let padding = ((address & !(align - 1)).checked_add(align).unwrap()) + .checked_sub(address) + .unwrap(); + assert!(padding <= 7, "padding of {} is bigger than 7", padding); + assert!( + padding <= extra, + "padding of {} is bigger than extra {} bytes", + padding, + extra + ); + buf.truncate(size + padding); + assert_eq!(size + padding, buf.len()); + assert_eq!( + 0, + buf[padding..].as_ptr().as_usize() % align, + "expected end of initial padding to be aligned to {}", + align, + ); + (buf, padding) +} + +/// Reads a NUL terminated label starting at the beginning of the given slice. +/// +/// If a NUL terminated label could not be found, then an error is returned. +/// Similarly, if a label is found but doesn't match the expected label, then +/// an error is returned. +/// +/// Upon success, the total number of bytes read (including padding bytes) is +/// returned. +pub(crate) fn read_label( + slice: &[u8], + expected_label: &'static str, +) -> Result { + // Set an upper bound on how many bytes we scan for a NUL. Since no label + // in this crate is longer than 256 bytes, if we can't find one within that + // range, then we have corrupted data. + let first_nul = + slice[..cmp::min(slice.len(), 256)].iter().position(|&b| b == 0); + let first_nul = match first_nul { + Some(first_nul) => first_nul, + None => { + return Err(DeserializeError::generic( + "could not find NUL terminated label \ + at start of serialized object", + )); + } + }; + let len = first_nul + padding_len(first_nul); + if slice.len() < len { + return Err(DeserializeError::generic( + "could not find properly sized label at start of serialized object" + )); + } + if expected_label.as_bytes() != &slice[..first_nul] { + return Err(DeserializeError::label_mismatch(expected_label)); + } + Ok(len) +} + +/// Writes the given label to the buffer as a NUL terminated string. The label +/// given must not contain NUL, otherwise this will panic. Similarly, the label +/// must not be longer than 255 bytes, otherwise this will panic. +/// +/// Additional NUL bytes are written as necessary to ensure that the number of +/// bytes written is always a multiple of 4. +/// +/// Upon success, the total number of bytes written (including padding) is +/// returned. +pub(crate) fn write_label( + label: &str, + dst: &mut [u8], +) -> Result { + let nwrite = write_label_len(label); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("label")); + } + dst[..label.len()].copy_from_slice(label.as_bytes()); + for i in 0..(nwrite - label.len()) { + dst[label.len() + i] = 0; + } + assert_eq!(nwrite % 4, 0); + Ok(nwrite) +} + +/// Returns the total number of bytes (including padding) that would be written +/// for the given label. This panics if the given label contains a NUL byte or +/// is longer than 255 bytes. (The size restriction exists so that searching +/// for a label during deserialization can be done in small bounded space.) +pub(crate) fn write_label_len(label: &str) -> usize { + if label.len() > 255 { + panic!("label must not be longer than 255 bytes"); + } + if label.as_bytes().iter().position(|&b| b == 0).is_some() { + panic!("label must not contain NUL bytes"); + } + let label_len = label.len() + 1; // +1 for the NUL terminator + label_len + padding_len(label_len) +} + +/// Reads the endianness check from the beginning of the given slice and +/// confirms that the endianness of the serialized object matches the expected +/// endianness. If the slice is too small or if the endianness check fails, +/// this returns an error. +/// +/// Upon success, the total number of bytes read is returned. +pub(crate) fn read_endianness_check( + slice: &[u8], +) -> Result { + let (n, nr) = try_read_u32(slice, "endianness check")?; + assert_eq!(nr, write_endianness_check_len()); + if n != 0xFEFF { + return Err(DeserializeError::endian_mismatch(0xFEFF, n)); + } + Ok(nr) +} + +/// Writes 0xFEFF as an integer using the given endianness. +/// +/// This is useful for writing into the header of a serialized object. It can +/// be read during deserialization as a sanity check to ensure the proper +/// endianness is used. +/// +/// Upon success, the total number of bytes written is returned. +pub(crate) fn write_endianness_check( + dst: &mut [u8], +) -> Result { + let nwrite = write_endianness_check_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("endianness check")); + } + E::write_u32(0xFEFF, dst); + Ok(nwrite) +} + +/// Returns the number of bytes written by the endianness check. +pub(crate) fn write_endianness_check_len() -> usize { + size_of::() +} + +/// Reads a version number from the beginning of the given slice and confirms +/// that is matches the expected version number given. If the slice is too +/// small or if the version numbers aren't equivalent, this returns an error. +/// +/// Upon success, the total number of bytes read is returned. +/// +/// N.B. Currently, we require that the version number is exactly equivalent. +/// In the future, if we bump the version number without a semver bump, then +/// we'll need to relax this a bit and support older versions. +pub(crate) fn read_version( + slice: &[u8], + expected_version: u32, +) -> Result { + let (n, nr) = try_read_u32(slice, "version")?; + assert_eq!(nr, write_version_len()); + if n != expected_version { + return Err(DeserializeError::version_mismatch(expected_version, n)); + } + Ok(nr) +} + +/// Writes the given version number to the beginning of the given slice. +/// +/// This is useful for writing into the header of a serialized object. It can +/// be read during deserialization as a sanity check to ensure that the library +/// code supports the format of the serialized object. +/// +/// Upon success, the total number of bytes written is returned. +pub(crate) fn write_version( + version: u32, + dst: &mut [u8], +) -> Result { + let nwrite = write_version_len(); + if dst.len() < nwrite { + return Err(SerializeError::buffer_too_small("version number")); + } + E::write_u32(version, dst); + Ok(nwrite) +} + +/// Returns the number of bytes written by writing the version number. +pub(crate) fn write_version_len() -> usize { + size_of::() +} + +/// Reads a pattern ID from the given slice. If the slice has insufficient +/// length, then this panics. If the deserialized integer exceeds the pattern +/// ID limit for the current target, then this returns an error. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn read_pattern_id( + slice: &[u8], + what: &'static str, +) -> Result<(PatternID, usize), DeserializeError> { + let bytes: [u8; PatternID::SIZE] = + slice[..PatternID::SIZE].try_into().unwrap(); + let pid = PatternID::from_ne_bytes(bytes) + .map_err(|err| DeserializeError::pattern_id_error(err, what))?; + Ok((pid, PatternID::SIZE)) +} + +/// Reads a pattern ID from the given slice. If the slice has insufficient +/// length, then this panics. Otherwise, the deserialized integer is assumed +/// to be a valid pattern ID. +/// +/// This also returns the number of bytes read. +pub(crate) fn read_pattern_id_unchecked(slice: &[u8]) -> (PatternID, usize) { + let pid = PatternID::from_ne_bytes_unchecked( + slice[..PatternID::SIZE].try_into().unwrap(), + ); + (pid, PatternID::SIZE) +} + +/// Write the given pattern ID to the beginning of the given slice of bytes +/// using the specified endianness. The given slice must have length at least +/// `PatternID::SIZE`, or else this panics. Upon success, the total number of +/// bytes written is returned. +pub(crate) fn write_pattern_id( + pid: PatternID, + dst: &mut [u8], +) -> usize { + E::write_u32(pid.as_u32(), dst); + PatternID::SIZE +} + +/// Attempts to read a state ID from the given slice. If the slice has an +/// insufficient number of bytes or if the state ID exceeds the limit for +/// the current target, then this returns an error. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn try_read_state_id( + slice: &[u8], + what: &'static str, +) -> Result<(StateID, usize), DeserializeError> { + if slice.len() < StateID::SIZE { + return Err(DeserializeError::buffer_too_small(what)); + } + read_state_id(slice, what) +} + +/// Reads a state ID from the given slice. If the slice has insufficient +/// length, then this panics. If the deserialized integer exceeds the state ID +/// limit for the current target, then this returns an error. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn read_state_id( + slice: &[u8], + what: &'static str, +) -> Result<(StateID, usize), DeserializeError> { + let bytes: [u8; StateID::SIZE] = + slice[..StateID::SIZE].try_into().unwrap(); + let sid = StateID::from_ne_bytes(bytes) + .map_err(|err| DeserializeError::state_id_error(err, what))?; + Ok((sid, StateID::SIZE)) +} + +/// Reads a state ID from the given slice. If the slice has insufficient +/// length, then this panics. Otherwise, the deserialized integer is assumed +/// to be a valid state ID. +/// +/// This also returns the number of bytes read. +pub(crate) fn read_state_id_unchecked(slice: &[u8]) -> (StateID, usize) { + let sid = StateID::from_ne_bytes_unchecked( + slice[..StateID::SIZE].try_into().unwrap(), + ); + (sid, StateID::SIZE) +} + +/// Write the given state ID to the beginning of the given slice of bytes +/// using the specified endianness. The given slice must have length at least +/// `StateID::SIZE`, or else this panics. Upon success, the total number of +/// bytes written is returned. +pub(crate) fn write_state_id( + sid: StateID, + dst: &mut [u8], +) -> usize { + E::write_u32(sid.as_u32(), dst); + StateID::SIZE +} + +/// Try to read a u16 as a usize from the beginning of the given slice in +/// native endian format. If the slice has fewer than 2 bytes or if the +/// deserialized number cannot be represented by usize, then this returns an +/// error. The error message will include the `what` description of what is +/// being deserialized, for better error messages. `what` should be a noun in +/// singular form. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn try_read_u16_as_usize( + slice: &[u8], + what: &'static str, +) -> Result<(usize, usize), DeserializeError> { + try_read_u16(slice, what).and_then(|(n, nr)| { + usize::try_from(n) + .map(|n| (n, nr)) + .map_err(|_| DeserializeError::invalid_usize(what)) + }) +} + +/// Try to read a u32 as a usize from the beginning of the given slice in +/// native endian format. If the slice has fewer than 4 bytes or if the +/// deserialized number cannot be represented by usize, then this returns an +/// error. The error message will include the `what` description of what is +/// being deserialized, for better error messages. `what` should be a noun in +/// singular form. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn try_read_u32_as_usize( + slice: &[u8], + what: &'static str, +) -> Result<(usize, usize), DeserializeError> { + try_read_u32(slice, what).and_then(|(n, nr)| { + usize::try_from(n) + .map(|n| (n, nr)) + .map_err(|_| DeserializeError::invalid_usize(what)) + }) +} + +/// Try to read a u16 from the beginning of the given slice in native endian +/// format. If the slice has fewer than 2 bytes, then this returns an error. +/// The error message will include the `what` description of what is being +/// deserialized, for better error messages. `what` should be a noun in +/// singular form. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn try_read_u16( + slice: &[u8], + what: &'static str, +) -> Result<(u16, usize), DeserializeError> { + check_slice_len(slice, size_of::(), what)?; + Ok((read_u16(slice), size_of::())) +} + +/// Try to read a u32 from the beginning of the given slice in native endian +/// format. If the slice has fewer than 4 bytes, then this returns an error. +/// The error message will include the `what` description of what is being +/// deserialized, for better error messages. `what` should be a noun in +/// singular form. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn try_read_u32( + slice: &[u8], + what: &'static str, +) -> Result<(u32, usize), DeserializeError> { + check_slice_len(slice, size_of::(), what)?; + Ok((read_u32(slice), size_of::())) +} + +/// Try to read a u128 from the beginning of the given slice in native endian +/// format. If the slice has fewer than 16 bytes, then this returns an error. +/// The error message will include the `what` description of what is being +/// deserialized, for better error messages. `what` should be a noun in +/// singular form. +/// +/// Upon success, this also returns the number of bytes read. +pub(crate) fn try_read_u128( + slice: &[u8], + what: &'static str, +) -> Result<(u128, usize), DeserializeError> { + check_slice_len(slice, size_of::(), what)?; + Ok((read_u128(slice), size_of::())) +} + +/// Read a u16 from the beginning of the given slice in native endian format. +/// If the slice has fewer than 2 bytes, then this panics. +/// +/// Marked as inline to speed up sparse searching which decodes integers from +/// its automaton at search time. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn read_u16(slice: &[u8]) -> u16 { + let bytes: [u8; 2] = slice[..size_of::()].try_into().unwrap(); + u16::from_ne_bytes(bytes) +} + +/// Read a u32 from the beginning of the given slice in native endian format. +/// If the slice has fewer than 4 bytes, then this panics. +/// +/// Marked as inline to speed up sparse searching which decodes integers from +/// its automaton at search time. +#[cfg_attr(feature = "perf-inline", inline(always))] +pub(crate) fn read_u32(slice: &[u8]) -> u32 { + let bytes: [u8; 4] = slice[..size_of::()].try_into().unwrap(); + u32::from_ne_bytes(bytes) +} + +/// Read a u128 from the beginning of the given slice in native endian format. +/// If the slice has fewer than 16 bytes, then this panics. +pub(crate) fn read_u128(slice: &[u8]) -> u128 { + let bytes: [u8; 16] = slice[..size_of::()].try_into().unwrap(); + u128::from_ne_bytes(bytes) +} + +/// Checks that the given slice has some minimal length. If it's smaller than +/// the bound given, then a "buffer too small" error is returned with `what` +/// describing what the buffer represents. +pub(crate) fn check_slice_len( + slice: &[T], + at_least_len: usize, + what: &'static str, +) -> Result<(), DeserializeError> { + if slice.len() < at_least_len { + return Err(DeserializeError::buffer_too_small(what)); + } + Ok(()) +} + +/// Multiply the given numbers, and on overflow, return an error that includes +/// 'what' in the error message. +/// +/// This is useful when doing arithmetic with untrusted data. +pub(crate) fn mul( + a: usize, + b: usize, + what: &'static str, +) -> Result { + match a.checked_mul(b) { + Some(c) => Ok(c), + None => Err(DeserializeError::arithmetic_overflow(what)), + } +} + +/// Add the given numbers, and on overflow, return an error that includes +/// 'what' in the error message. +/// +/// This is useful when doing arithmetic with untrusted data. +pub(crate) fn add( + a: usize, + b: usize, + what: &'static str, +) -> Result { + match a.checked_add(b) { + Some(c) => Ok(c), + None => Err(DeserializeError::arithmetic_overflow(what)), + } +} + +/// Shift `a` left by `b`, and on overflow, return an error that includes +/// 'what' in the error message. +/// +/// This is useful when doing arithmetic with untrusted data. +pub(crate) fn shl( + a: usize, + b: usize, + what: &'static str, +) -> Result { + let amount = u32::try_from(b) + .map_err(|_| DeserializeError::arithmetic_overflow(what))?; + match a.checked_shl(amount) { + Some(c) => Ok(c), + None => Err(DeserializeError::arithmetic_overflow(what)), + } +} + +/// Returns the number of additional bytes required to add to the given length +/// in order to make the total length a multiple of 4. The return value is +/// always less than 4. +pub(crate) fn padding_len(non_padding_len: usize) -> usize { + (4 - (non_padding_len & 0b11)) & 0b11 +} + +/// A simple trait for writing code generic over endianness. +/// +/// This is similar to what byteorder provides, but we only need a very small +/// subset. +pub(crate) trait Endian { + /// Writes a u16 to the given destination buffer in a particular + /// endianness. If the destination buffer has a length smaller than 2, then + /// this panics. + fn write_u16(n: u16, dst: &mut [u8]); + + /// Writes a u32 to the given destination buffer in a particular + /// endianness. If the destination buffer has a length smaller than 4, then + /// this panics. + fn write_u32(n: u32, dst: &mut [u8]); + + /// Writes a u64 to the given destination buffer in a particular + /// endianness. If the destination buffer has a length smaller than 8, then + /// this panics. + fn write_u64(n: u64, dst: &mut [u8]); + + /// Writes a u128 to the given destination buffer in a particular + /// endianness. If the destination buffer has a length smaller than 16, + /// then this panics. + fn write_u128(n: u128, dst: &mut [u8]); +} + +/// Little endian writing. +pub(crate) enum LE {} +/// Big endian writing. +pub(crate) enum BE {} + +#[cfg(target_endian = "little")] +pub(crate) type NE = LE; +#[cfg(target_endian = "big")] +pub(crate) type NE = BE; + +impl Endian for LE { + fn write_u16(n: u16, dst: &mut [u8]) { + dst[..2].copy_from_slice(&n.to_le_bytes()); + } + + fn write_u32(n: u32, dst: &mut [u8]) { + dst[..4].copy_from_slice(&n.to_le_bytes()); + } + + fn write_u64(n: u64, dst: &mut [u8]) { + dst[..8].copy_from_slice(&n.to_le_bytes()); + } + + fn write_u128(n: u128, dst: &mut [u8]) { + dst[..16].copy_from_slice(&n.to_le_bytes()); + } +} + +impl Endian for BE { + fn write_u16(n: u16, dst: &mut [u8]) { + dst[..2].copy_from_slice(&n.to_be_bytes()); + } + + fn write_u32(n: u32, dst: &mut [u8]) { + dst[..4].copy_from_slice(&n.to_be_bytes()); + } + + fn write_u64(n: u64, dst: &mut [u8]) { + dst[..8].copy_from_slice(&n.to_be_bytes()); + } + + fn write_u128(n: u128, dst: &mut [u8]) { + dst[..16].copy_from_slice(&n.to_be_bytes()); + } +} + +#[cfg(all(test, feature = "alloc"))] +mod tests { + use super::*; + + #[test] + fn labels() { + let mut buf = [0; 1024]; + + let nwrite = write_label("fooba", &mut buf).unwrap(); + assert_eq!(nwrite, 8); + assert_eq!(&buf[..nwrite], b"fooba\x00\x00\x00"); + + let nread = read_label(&buf, "fooba").unwrap(); + assert_eq!(nread, 8); + } + + #[test] + #[should_panic] + fn bad_label_interior_nul() { + // interior NULs are not allowed + write_label("foo\x00bar", &mut [0; 1024]).unwrap(); + } + + #[test] + fn bad_label_almost_too_long() { + // ok + write_label(&"z".repeat(255), &mut [0; 1024]).unwrap(); + } + + #[test] + #[should_panic] + fn bad_label_too_long() { + // labels longer than 255 bytes are banned + write_label(&"z".repeat(256), &mut [0; 1024]).unwrap(); + } + + #[test] + fn padding() { + assert_eq!(0, padding_len(8)); + assert_eq!(3, padding_len(9)); + assert_eq!(2, padding_len(10)); + assert_eq!(1, padding_len(11)); + assert_eq!(0, padding_len(12)); + assert_eq!(3, padding_len(13)); + assert_eq!(2, padding_len(14)); + assert_eq!(1, padding_len(15)); + assert_eq!(0, padding_len(16)); + } +} diff --git a/vendor/regex-automata/test b/vendor/regex-automata/test new file mode 100755 index 0000000..df3e5ae --- /dev/null +++ b/vendor/regex-automata/test @@ -0,0 +1,95 @@ +#!/bin/bash + +# This is a script that attempts to *approximately* exhaustively run the test +# suite for regex-automata. The main reason for why 'cargo test' isn't enough +# is because of crate features. regex-automata has a ton of them. This script +# tests many of those feature combinations (although not all) to try to get +# decent coverage in a finite amount of time. + +set -e + +# cd to the directory containing this crate's Cargo.toml so that we don't need +# to pass --manifest-path to every `cargo` command. +cd "$(dirname "$0")" + +echo "===== ALL FEATURES TEST ===" +cargo test --all-features + +# Man I don't *want* to have this many crate features, but... I really want +# folks to be able to slim the crate down to just the things they want. But +# the main downside is that I just can't feasibly test every combination of +# features because there are too many of them. Sad, but I'm not sure if there +# is a better alternative. +features=( + "" + "unicode-word-boundary" + "unicode-word-boundary,syntax,unicode-perl" + "unicode-word-boundary,syntax,dfa-build" + "nfa" + "dfa" + "hybrid" + "nfa,dfa" + "nfa,hybrid" + "dfa,hybrid" + "dfa-onepass" + "nfa-pikevm" + "nfa-backtrack" + "std" + "alloc" + "syntax" + "syntax,nfa-pikevm" + "syntax,hybrid" + "perf-literal-substring" + "perf-literal-multisubstring" + "meta" + "meta,nfa-backtrack" + "meta,hybrid" + "meta,dfa-build" + "meta,dfa-onepass" + "meta,nfa,dfa,hybrid,nfa-backtrack" + "meta,nfa,dfa,hybrid,nfa-backtrack,perf-literal-substring" + "meta,nfa,dfa,hybrid,nfa-backtrack,perf-literal-multisubstring" +) +for f in "${features[@]}"; do + echo "===== LIB FEATURES: $f ===" + # It's actually important to do a standard 'cargo build' in addition to a + # 'cargo test'. In particular, in the latter case, the dev-dependencies may + # wind up enabling features in dependencies (like memchr) that make it look + # like everything is well, but actually isn't. For example, the 'regex-test' + # dev-dependency uses 'bstr' and enables its 'std' feature, which in turn + # unconditionally enables 'memchr's 'std' feature. Since we're specifically + # looking to test that certain feature combinations work as expected, this + # can lead to things testing okay, but would actually fail to build. Yikes. + cargo build --no-default-features --lib --features "$f" + cargo test --no-default-features --lib --features "$f" +done + +# We can also run the integration test suite on stripped down features too. +# But the test suite doesn't do well with things like 'std' and 'unicode' +# disabled, so we always enable them. +features=( + "std,unicode,syntax,nfa-pikevm" + "std,unicode,syntax,nfa-backtrack" + "std,unicode,syntax,hybrid" + "std,unicode,syntax,dfa-onepass" + "std,unicode,syntax,dfa-search" + "std,unicode,syntax,dfa-build" + "std,unicode,meta" + # This one is a little tricky because it causes the backtracker to get used + # in more instances and results in failing tests for the 'earliest' tests. + # The actual results are semantically consistent with the API guarantee + # (the backtracker tends to report greater offsets because it isn't an FSM), + # but our tests are less flexible than the API guarantee and demand offsets + # reported by FSM regex engines. (Which is... all of them except for the + # backtracker.) + # "std,unicode,meta,nfa-backtrack" + "std,unicode,meta,hybrid" + "std,unicode,meta,dfa-onepass" + "std,unicode,meta,dfa-build" + "std,unicode,meta,nfa,dfa-onepass,hybrid" +) +for f in "${features[@]}"; do + echo "===== INTEGRATION FEATURES: $f ===" + cargo build --no-default-features --lib --features "$f" + cargo test --no-default-features --test integration --features "$f" +done diff --git a/vendor/regex-automata/tests/dfa/api.rs b/vendor/regex-automata/tests/dfa/api.rs new file mode 100644 index 0000000..96e73af --- /dev/null +++ b/vendor/regex-automata/tests/dfa/api.rs @@ -0,0 +1,69 @@ +use std::error::Error; + +use regex_automata::{ + dfa::{dense, Automaton, OverlappingState}, + nfa::thompson, + HalfMatch, Input, MatchError, +}; + +// Tests that quit bytes in the forward direction work correctly. +#[test] +fn quit_fwd() -> Result<(), Box> { + let dfa = dense::Builder::new() + .configure(dense::Config::new().quit(b'x', true)) + .build("[[:word:]]+$")?; + + assert_eq!( + Err(MatchError::quit(b'x', 3)), + dfa.try_search_fwd(&Input::new(b"abcxyz")) + ); + assert_eq!( + dfa.try_search_overlapping_fwd( + &Input::new(b"abcxyz"), + &mut OverlappingState::start() + ), + Err(MatchError::quit(b'x', 3)), + ); + + Ok(()) +} + +// Tests that quit bytes in the reverse direction work correctly. +#[test] +fn quit_rev() -> Result<(), Box> { + let dfa = dense::Builder::new() + .configure(dense::Config::new().quit(b'x', true)) + .thompson(thompson::Config::new().reverse(true)) + .build("^[[:word:]]+")?; + + assert_eq!( + Err(MatchError::quit(b'x', 3)), + dfa.try_search_rev(&Input::new(b"abcxyz")) + ); + + Ok(()) +} + +// Tests that if we heuristically enable Unicode word boundaries but then +// instruct that a non-ASCII byte should NOT be a quit byte, then the builder +// will panic. +#[test] +#[should_panic] +fn quit_panics() { + dense::Config::new().unicode_word_boundary(true).quit(b'\xFF', false); +} + +// This tests an intesting case where even if the Unicode word boundary option +// is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode +// word boundaries to be enabled. +#[test] +fn unicode_word_implicitly_works() -> Result<(), Box> { + let mut config = dense::Config::new(); + for b in 0x80..=0xFF { + config = config.quit(b, true); + } + let dfa = dense::Builder::new().configure(config).build(r"\b")?; + let expected = HalfMatch::must(0, 1); + assert_eq!(Ok(Some(expected)), dfa.try_search_fwd(&Input::new(b" a"))); + Ok(()) +} diff --git a/vendor/regex-automata/tests/dfa/mod.rs b/vendor/regex-automata/tests/dfa/mod.rs new file mode 100644 index 0000000..0d8f539 --- /dev/null +++ b/vendor/regex-automata/tests/dfa/mod.rs @@ -0,0 +1,8 @@ +#[cfg(all(feature = "dfa-build", feature = "dfa-search"))] +mod api; +#[cfg(feature = "dfa-onepass")] +mod onepass; +#[cfg(all(feature = "dfa-build", feature = "dfa-search"))] +mod regression; +#[cfg(all(not(miri), feature = "dfa-build", feature = "dfa-search"))] +mod suite; diff --git a/vendor/regex-automata/tests/dfa/onepass/mod.rs b/vendor/regex-automata/tests/dfa/onepass/mod.rs new file mode 100644 index 0000000..9d6ab47 --- /dev/null +++ b/vendor/regex-automata/tests/dfa/onepass/mod.rs @@ -0,0 +1,2 @@ +#[cfg(not(miri))] +mod suite; diff --git a/vendor/regex-automata/tests/dfa/onepass/suite.rs b/vendor/regex-automata/tests/dfa/onepass/suite.rs new file mode 100644 index 0000000..20bd696 --- /dev/null +++ b/vendor/regex-automata/tests/dfa/onepass/suite.rs @@ -0,0 +1,197 @@ +use { + anyhow::Result, + regex_automata::{ + dfa::onepass::{self, DFA}, + nfa::thompson, + util::{iter, syntax}, + }, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, + }, +}; + +use crate::{create_input, suite, testify_captures, untestify_kind}; + +const EXPANSIONS: &[&str] = &["is_match", "find", "captures"]; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let builder = DFA::builder(); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when 'starts_for_each_pattern' is enabled for all +/// tests. +#[test] +fn starts_for_each_pattern() -> Result<()> { + let mut builder = DFA::builder(); + builder.configure(DFA::config().starts_for_each_pattern(true)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when byte classes are disabled. +/// +/// N.B. Disabling byte classes doesn't avoid any indirection at search time. +/// All it does is cause every byte value to be its own distinct equivalence +/// class. +#[test] +fn no_byte_classes() -> Result<()> { + let mut builder = DFA::builder(); + builder.configure(DFA::config().byte_classes(false)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +fn compiler( + mut builder: onepass::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + move |test, regexes| { + // Check if our regex contains things that aren't supported by DFAs. + // That is, Unicode word boundaries when searching non-ASCII text. + if !configure_onepass_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = match builder.build_many(®exes) { + Ok(re) => re, + Err(err) => { + let msg = err.to_string(); + // This is pretty gross, but when a regex fails to compile as + // a one-pass regex, then we want to be OK with that and just + // skip the test. But we have to be careful to only skip it + // when the expected result is that the regex compiles. If + // the test is specifically checking that the regex does not + // compile, then we should bubble up that error and allow the + // test to pass. + // + // Since our error types are all generally opaque, we just + // look for an error string. Not great, but not the end of the + // world. + if test.compiles() && msg.contains("not one-pass") { + return Ok(CompiledRegex::skip()); + } + return Err(err.into()); + } + }; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + } +} + +fn run_test( + re: &DFA, + cache: &mut onepass::Cache, + test: &RegexTest, +) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => { + TestResult::matched(re.is_match(cache, input.earliest(true))) + } + "find" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + let input = + input.earliest(test.search_kind() == SearchKind::Earliest); + let mut caps = re.create_captures(); + let it = iter::Searcher::new(input) + .into_matches_iter(|input| { + re.try_search(cache, input, &mut caps)?; + Ok(caps.get_match()) + }) + .infallible() + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }); + TestResult::matches(it) + } + SearchKind::Overlapping => { + // The one-pass DFA does not support any kind of overlapping + // search. This is not just a matter of not having the API. + // It's fundamentally incompatible with the one-pass concept. + // If overlapping matches were possible, then the one-pass DFA + // would fail to build. + TestResult::skip() + } + }, + "captures" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + let input = + input.earliest(test.search_kind() == SearchKind::Earliest); + let it = iter::Searcher::new(input) + .into_captures_iter(re.create_captures(), |input, caps| { + re.try_search(cache, input, caps) + }) + .infallible() + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Overlapping => { + // The one-pass DFA does not support any kind of overlapping + // search. This is not just a matter of not having the API. + // It's fundamentally incompatible with the one-pass concept. + // If overlapping matches were possible, then the one-pass DFA + // would fail to build. + TestResult::skip() + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_onepass_builder( + test: &RegexTest, + builder: &mut onepass::Builder, +) -> bool { + if !test.anchored() { + return false; + } + let match_kind = match untestify_kind(test.match_kind()) { + None => return false, + Some(k) => k, + }; + + let config = DFA::config().match_kind(match_kind); + builder + .configure(config) + .syntax(config_syntax(test)) + .thompson(config_thompson(test)); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} diff --git a/vendor/regex-automata/tests/dfa/regression.rs b/vendor/regex-automata/tests/dfa/regression.rs new file mode 100644 index 0000000..09caffa --- /dev/null +++ b/vendor/regex-automata/tests/dfa/regression.rs @@ -0,0 +1,48 @@ +// A regression test for checking that minimization correctly translates +// whether a state is a match state or not. Previously, it was possible for +// minimization to mark a non-matching state as matching. +#[test] +#[cfg(not(miri))] +fn minimize_sets_correct_match_states() { + use regex_automata::{ + dfa::{dense::DFA, Automaton, StartKind}, + Anchored, Input, + }; + + let pattern = + // This is a subset of the grapheme matching regex. I couldn't seem + // to get a repro any smaller than this unfortunately. + r"(?x) + (?: + \p{gcb=Prepend}* + (?: + (?: + (?: + \p{gcb=L}* + (?:\p{gcb=V}+|\p{gcb=LV}\p{gcb=V}*|\p{gcb=LVT}) + \p{gcb=T}* + ) + | + \p{gcb=L}+ + | + \p{gcb=T}+ + ) + | + \p{Extended_Pictographic} + (?:\p{gcb=Extend}*\p{gcb=ZWJ}\p{Extended_Pictographic})* + | + [^\p{gcb=Control}\p{gcb=CR}\p{gcb=LF}] + ) + [\p{gcb=Extend}\p{gcb=ZWJ}\p{gcb=SpacingMark}]* + ) + "; + + let dfa = DFA::builder() + .configure( + DFA::config().start_kind(StartKind::Anchored).minimize(true), + ) + .build(pattern) + .unwrap(); + let input = Input::new(b"\xE2").anchored(Anchored::Yes); + assert_eq!(Ok(None), dfa.try_search_fwd(&input)); +} diff --git a/vendor/regex-automata/tests/dfa/suite.rs b/vendor/regex-automata/tests/dfa/suite.rs new file mode 100644 index 0000000..8ed6dd0 --- /dev/null +++ b/vendor/regex-automata/tests/dfa/suite.rs @@ -0,0 +1,443 @@ +use { + anyhow::Result, + regex_automata::{ + dfa::{ + self, dense, regex::Regex, sparse, Automaton, OverlappingState, + StartKind, + }, + nfa::thompson, + util::{prefilter::Prefilter, syntax}, + Anchored, Input, PatternSet, + }, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, + }, +}; + +use crate::{create_input, suite, untestify_kind}; + +const EXPANSIONS: &[&str] = &["is_match", "find", "which"]; + +/// Runs the test suite with the default configuration. +#[test] +fn unminimized_default() -> Result<()> { + let builder = Regex::builder(); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite with the default configuration and a prefilter enabled, +/// if one can be built. +#[test] +fn unminimized_prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); + let mut builder = Regex::builder(); + builder.dense(dense::DFA::config().prefilter(pre)); + compiler(builder, |_, _, re| { + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, test) + })) + })(test, regexes) + }; + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), my_compiler) + .assert(); + Ok(()) +} + +/// Runs the test suite with start states specialized. +#[test] +fn unminimized_specialized_start_states() -> Result<()> { + let mut builder = Regex::builder(); + builder.dense(dense::Config::new().specialize_start_states(true)); + + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite with byte classes disabled. +#[test] +fn unminimized_no_byte_class() -> Result<()> { + let mut builder = Regex::builder(); + builder.dense(dense::Config::new().byte_classes(false)); + + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite with NFA shrinking enabled. +#[test] +fn unminimized_nfa_shrink() -> Result<()> { + let mut builder = Regex::builder(); + builder.thompson(thompson::Config::new().shrink(true)); + + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite on a minimized DFA with an otherwise default +/// configuration. +#[test] +fn minimized_default() -> Result<()> { + let mut builder = Regex::builder(); + builder.dense(dense::Config::new().minimize(true)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite on a minimized DFA with byte classes disabled. +#[test] +fn minimized_no_byte_class() -> Result<()> { + let mut builder = Regex::builder(); + builder.dense(dense::Config::new().minimize(true).byte_classes(false)); + + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), dense_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite on a sparse unminimized DFA. +#[test] +fn sparse_unminimized_default() -> Result<()> { + let builder = Regex::builder(); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), sparse_compiler(builder)) + .assert(); + Ok(()) +} + +/// Runs the test suite on a sparse unminimized DFA with prefilters enabled. +#[test] +fn sparse_unminimized_prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); + let mut builder = Regex::builder(); + builder.dense(dense::DFA::config().prefilter(pre)); + compiler(builder, |builder, _, re| { + let fwd = re.forward().to_sparse()?; + let rev = re.reverse().to_sparse()?; + let re = builder.build_from_dfas(fwd, rev); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, test) + })) + })(test, regexes) + }; + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), my_compiler) + .assert(); + Ok(()) +} + +/// Another basic sanity test that checks we can serialize and then deserialize +/// a regex, and that the resulting regex can be used for searching correctly. +#[test] +fn serialization_unminimized_default() -> Result<()> { + let builder = Regex::builder(); + let my_compiler = |builder| { + compiler(builder, |builder, _, re| { + let builder = builder.clone(); + let (fwd_bytes, _) = re.forward().to_bytes_native_endian(); + let (rev_bytes, _) = re.reverse().to_bytes_native_endian(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + let fwd: dense::DFA<&[u32]> = + dense::DFA::from_bytes(&fwd_bytes).unwrap().0; + let rev: dense::DFA<&[u32]> = + dense::DFA::from_bytes(&rev_bytes).unwrap().0; + let re = builder.build_from_dfas(fwd, rev); + + run_test(&re, test) + })) + }) + }; + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), my_compiler(builder)) + .assert(); + Ok(()) +} + +/// A basic sanity test that checks we can serialize and then deserialize a +/// regex using sparse DFAs, and that the resulting regex can be used for +/// searching correctly. +#[test] +fn sparse_serialization_unminimized_default() -> Result<()> { + let builder = Regex::builder(); + let my_compiler = |builder| { + compiler(builder, |builder, _, re| { + let builder = builder.clone(); + let fwd_bytes = re.forward().to_sparse()?.to_bytes_native_endian(); + let rev_bytes = re.reverse().to_sparse()?.to_bytes_native_endian(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + let fwd: sparse::DFA<&[u8]> = + sparse::DFA::from_bytes(&fwd_bytes).unwrap().0; + let rev: sparse::DFA<&[u8]> = + sparse::DFA::from_bytes(&rev_bytes).unwrap().0; + let re = builder.build_from_dfas(fwd, rev); + run_test(&re, test) + })) + }) + }; + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .blacklist("expensive") + .test_iter(suite()?.iter(), my_compiler(builder)) + .assert(); + Ok(()) +} + +fn dense_compiler( + builder: dfa::regex::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + compiler(builder, |_, _, re| { + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, test) + })) + }) +} + +fn sparse_compiler( + builder: dfa::regex::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + compiler(builder, |builder, _, re| { + let fwd = re.forward().to_sparse()?; + let rev = re.reverse().to_sparse()?; + let re = builder.build_from_dfas(fwd, rev); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, test) + })) + }) +} + +fn compiler( + mut builder: dfa::regex::Builder, + mut create_matcher: impl FnMut( + &dfa::regex::Builder, + Option, + Regex, + ) -> Result, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + move |test, regexes| { + // Parse regexes as HIRs for some analysis below. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + + // Get a prefilter in case the test wants it. + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); + + // Check if our regex contains things that aren't supported by DFAs. + // That is, Unicode word boundaries when searching non-ASCII text. + if !test.haystack().is_ascii() { + for hir in hirs.iter() { + if hir.properties().look_set().contains_word_unicode() { + return Ok(CompiledRegex::skip()); + } + } + } + if !configure_regex_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + create_matcher(&builder, pre, builder.build_many(®exes)?) + } +} + +fn run_test(re: &Regex, test: &RegexTest) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(input.earliest(true))), + "find" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + let input = + input.earliest(test.search_kind() == SearchKind::Earliest); + TestResult::matches( + re.find_iter(input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ) + } + SearchKind::Overlapping => { + try_search_overlapping(re, &input).unwrap() + } + }, + "which" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + // There are no "which" APIs for standard searches. + TestResult::skip() + } + SearchKind::Overlapping => { + let dfa = re.forward(); + let mut patset = PatternSet::new(dfa.pattern_len()); + dfa.try_which_overlapping_matches(&input, &mut patset) + .unwrap(); + TestResult::which(patset.iter().map(|p| p.as_usize())) + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_regex_builder( + test: &RegexTest, + builder: &mut dfa::regex::Builder, +) -> bool { + let match_kind = match untestify_kind(test.match_kind()) { + None => return false, + Some(k) => k, + }; + + let starts = if test.anchored() { + StartKind::Anchored + } else { + StartKind::Unanchored + }; + let mut dense_config = dense::Config::new() + .start_kind(starts) + .match_kind(match_kind) + .unicode_word_boundary(true); + // When doing an overlapping search, we might try to find the start of each + // match with a custom search routine. In that case, we need to tell the + // reverse search (for the start offset) which pattern to look for. The + // only way that API works is when anchored starting states are compiled + // for each pattern. This does technically also enable it for the forward + // DFA, but we're okay with that. + if test.search_kind() == SearchKind::Overlapping { + dense_config = dense_config.starts_for_each_pattern(true); + } + + builder + .syntax(config_syntax(test)) + .thompson(config_thompson(test)) + .dense(dense_config); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) +} + +/// Configuration of the regex syntax from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} + +/// Execute an overlapping search, and for each match found, also find its +/// overlapping starting positions. +/// +/// N.B. This routine used to be part of the crate API, but 1) it wasn't clear +/// to me how useful it was and 2) it wasn't clear to me what its semantics +/// should be. In particular, a potentially surprising footgun of this routine +/// that it is worst case *quadratic* in the size of the haystack. Namely, it's +/// possible to report a match at every position, and for every such position, +/// scan all the way to the beginning of the haystack to find the starting +/// position. Typical leftmost non-overlapping searches don't suffer from this +/// because, well, matches can't overlap. So subsequent searches after a match +/// is found don't revisit previously scanned parts of the haystack. +/// +/// Its semantics can be strange for other reasons too. For example, given +/// the regex '.*' and the haystack 'zz', the full set of overlapping matches +/// is: [0, 0], [1, 1], [0, 1], [2, 2], [1, 2], [0, 2]. The ordering of +/// those matches is quite strange, but makes sense when you think about the +/// implementation: an end offset is found left-to-right, and then one or more +/// starting offsets are found right-to-left. +/// +/// Nevertheless, we provide this routine in our test suite because it's +/// useful to test the low level DFA overlapping search and our test suite +/// is written in a way that requires starting offsets. +fn try_search_overlapping( + re: &Regex, + input: &Input<'_>, +) -> Result { + let mut matches = vec![]; + let mut fwd_state = OverlappingState::start(); + let (fwd_dfa, rev_dfa) = (re.forward(), re.reverse()); + while let Some(end) = { + fwd_dfa.try_search_overlapping_fwd(input, &mut fwd_state)?; + fwd_state.get_match() + } { + let revsearch = input + .clone() + .range(input.start()..end.offset()) + .anchored(Anchored::Pattern(end.pattern())) + .earliest(false); + let mut rev_state = OverlappingState::start(); + while let Some(start) = { + rev_dfa.try_search_overlapping_rev(&revsearch, &mut rev_state)?; + rev_state.get_match() + } { + let span = Span { start: start.offset(), end: end.offset() }; + let mat = Match { id: end.pattern().as_usize(), span }; + matches.push(mat); + } + } + Ok(TestResult::matches(matches)) +} diff --git a/vendor/regex-automata/tests/fuzz/dense.rs b/vendor/regex-automata/tests/fuzz/dense.rs new file mode 100644 index 0000000..213891b --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/dense.rs @@ -0,0 +1,52 @@ +// This test was found by a fuzzer input that crafted a way to provide +// an invalid serialization of ByteClasses that passed our verification. +// Specifically, the verification step in the deserialization of ByteClasses +// used an iterator that depends on part of the serialized bytes being correct. +// (Specifically, the encoding of the number of classes.) +#[test] +fn invalid_byte_classes() { + let data = include_bytes!( + "testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9", + ); + let _ = fuzz_run(data); +} + +#[test] +fn invalid_byte_classes_min() { + let data = include_bytes!( + "testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9", + ); + let _ = fuzz_run(data); +} + +// This is the code from the fuzz target. Kind of sucks to duplicate it here, +// but this is fundamentally how we interpret the date. +fn fuzz_run(given_data: &[u8]) -> Option<()> { + use regex_automata::dfa::Automaton; + + if given_data.len() < 2 { + return None; + } + let haystack_len = usize::from(given_data[0]); + let haystack = given_data.get(1..1 + haystack_len)?; + let given_dfa_bytes = given_data.get(1 + haystack_len..)?; + + // We help the fuzzer along by adding a preamble to the bytes that should + // at least make these first parts valid. The preamble expects a very + // specific sequence of bytes, so it makes sense to just force this. + let label = "rust-regex-automata-dfa-dense\x00\x00\x00"; + assert_eq!(0, label.len() % 4); + let endianness_check = 0xFEFFu32.to_ne_bytes().to_vec(); + let version_check = 2u32.to_ne_bytes().to_vec(); + let mut dfa_bytes: Vec = vec![]; + dfa_bytes.extend(label.as_bytes()); + dfa_bytes.extend(&endianness_check); + dfa_bytes.extend(&version_check); + dfa_bytes.extend(given_dfa_bytes); + // This is the real test: checking that any input we give to + // DFA::from_bytes will never result in a panic. + let (dfa, _) = + regex_automata::dfa::dense::DFA::from_bytes(&dfa_bytes).ok()?; + let _ = dfa.try_search_fwd(®ex_automata::Input::new(haystack)); + Some(()) +} diff --git a/vendor/regex-automata/tests/fuzz/mod.rs b/vendor/regex-automata/tests/fuzz/mod.rs new file mode 100644 index 0000000..960cb42 --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/mod.rs @@ -0,0 +1,2 @@ +mod dense; +mod sparse; diff --git a/vendor/regex-automata/tests/fuzz/sparse.rs b/vendor/regex-automata/tests/fuzz/sparse.rs new file mode 100644 index 0000000..837ad10 --- /dev/null +++ b/vendor/regex-automata/tests/fuzz/sparse.rs @@ -0,0 +1,132 @@ +// This is a regression test for a bug in how special states are handled. The +// fuzzer found a case where a state returned true for 'is_special_state' but +// *didn't* return true for 'is_dead_state', 'is_quit_state', 'is_match_state', +// 'is_start_state' or 'is_accel_state'. This in turn tripped a debug assertion +// in the core matching loop that requires 'is_special_state' being true to +// imply that one of the other routines returns true. +// +// We fixed this by adding some validation to both dense and sparse DFAs that +// checks that this property is true for every state ID in the DFA. +#[test] +fn invalid_special_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838", + ); + let _ = fuzz_run(data); +} + +// This is an interesting case where a fuzzer generated a DFA with +// a transition to a state ID that decoded as a valid state, but +// where the ID itself did not point to one of the two existing +// states for this particular DFA. This combined with marking this +// transition's state ID as special but without actually making one of the +// 'is_{dead,quit,match,start,accel}_state' predicates return true ended up +// tripping the 'debug_assert(dfa.is_quit_state(sid))' code in the search +// routine. +// +// We fixed this in alloc mode by checking that every transition points to a +// valid state ID. Technically this bug still exists in core-only mode, but +// it's not clear how to fix it. And it's worth pointing out that the search +// routine won't panic in production. It will just provide invalid results. And +// that's acceptable within the contract of DFA::from_bytes. +#[test] +fn transition_to_invalid_but_valid_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9", + ); + let _ = fuzz_run(data); +} + +// Another one caught by the fuzzer where it generated a DFA that reported a +// start state as a match state. Since matches are always delayed by one byte, +// start states specifically cannot be match states. And indeed, the search +// code relies on this. +#[test] +fn start_state_is_not_match_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000", + ); + let _ = fuzz_run(data); +} + +// This is variation on 'transition_to_invalid_but_valid_state', but happens +// to a start state. Namely, the fuzz data here builds a DFA with a start +// state ID that is incorrect but points to a sequence of bytes that satisfies +// state decoding validation. This errant state in turn has a non-zero number +// of transitions, and its those transitions that point to a state that does +// *not* satisfy state decoding validation. But we never checked those. So the +// fix here was to add validation of the transitions off of the start state. +#[test] +fn start_state_has_valid_transitions() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98", + ); + let _ = fuzz_run(data); +} + +// This fuzz input generated a DFA with a state whose ID was in the match state +// ID range, but where the state itself was encoded with zero pattern IDs. We +// added validation code to check this case. +#[test] +fn match_state_inconsistency() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570", + ); + let _ = fuzz_run(data); +} + +// This fuzz input generated a DFA with a state whose ID was in the accelerator +// range, but who didn't have any accelerators. This violated an invariant that +// assumes that if 'dfa.is_accel_state(sid)' returns true, then the state must +// have some accelerators. +#[test] +fn invalid_accelerators() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b", + ); + let _ = fuzz_run(data); +} + +// This fuzz input generated a DFA with a state whose EOI transition led to +// a quit state, which is generally considered illegal. Why? Because the EOI +// transition is defined over a special sentinel alphabet element and one +// cannot configure a DFA to "quit" on that sentinel. +#[test] +fn eoi_transition_to_quit_state() { + let data = include_bytes!( + "testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9", + ); + let _ = fuzz_run(data); +} + +// This is the code from the fuzz target. Kind of sucks to duplicate it here, +// but this is fundamentally how we interpret the date. +fn fuzz_run(given_data: &[u8]) -> Option<()> { + use regex_automata::dfa::Automaton; + + if given_data.len() < 2 { + return None; + } + let haystack_len = usize::from(given_data[0]); + let haystack = given_data.get(1..1 + haystack_len)?; + let given_dfa_bytes = given_data.get(1 + haystack_len..)?; + + // We help the fuzzer along by adding a preamble to the bytes that should + // at least make these first parts valid. The preamble expects a very + // specific sequence of bytes, so it makes sense to just force this. + let label = "rust-regex-automata-dfa-sparse\x00\x00"; + assert_eq!(0, label.len() % 4); + let endianness_check = 0xFEFFu32.to_ne_bytes().to_vec(); + let version_check = 2u32.to_ne_bytes().to_vec(); + let mut dfa_bytes: Vec = vec![]; + dfa_bytes.extend(label.as_bytes()); + dfa_bytes.extend(&endianness_check); + dfa_bytes.extend(&version_check); + dfa_bytes.extend(given_dfa_bytes); + // This is the real test: checking that any input we give to + // DFA::from_bytes will never result in a panic. + let (dfa, _) = + regex_automata::dfa::sparse::DFA::from_bytes(&dfa_bytes).ok()?; + let _ = dfa.try_search_fwd(®ex_automata::Input::new(haystack)); + Some(()) +} diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9 new file mode 100644 index 0000000..972bfb2 Binary files /dev/null and b/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_crash-9486fb7c8a93b12c12a62166b43d31640c0208a9 differ diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9 new file mode 100644 index 0000000..72dbdad Binary files /dev/null and b/vendor/regex-automata/tests/fuzz/testdata/deserialize_dense_minimized-from-9486fb7c8a93b12c12a62166b43d31640c0208a9 differ diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000 new file mode 100644 index 0000000..5ce5088 Binary files /dev/null and b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-0da59c0434eaf35e5a6b470fa9244bb79c72b000 differ diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9 new file mode 100644 index 0000000..4fa13fb Binary files /dev/null and b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-18cfc246f2ddfc3dfc92b0c7893178c7cf65efa9 differ diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98 new file mode 100644 index 0000000..0f809f3 Binary files /dev/null and b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-61fd8e3003bf9d99f6c1e5a8488727eefd234b98 differ diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838 new file mode 100644 index 0000000..8b435fd Binary files /dev/null and b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-a1b839d899ced76d5d7d0f78f9edb7a421505838 differ diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570 new file mode 100644 index 0000000..69b6516 Binary files /dev/null and b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-c383ae07ec5e191422eadc492117439011816570 differ diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b new file mode 100644 index 0000000..15b43e4 Binary files /dev/null and b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-d07703ceb94b10dcd9e4acb809f2051420449e2b differ diff --git a/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9 b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9 new file mode 100644 index 0000000..aa72eb1 Binary files /dev/null and b/vendor/regex-automata/tests/fuzz/testdata/deserialize_sparse_crash-dbb8172d3984e7e7d03f4b5f8bb86ecd1460eff9 differ diff --git a/vendor/regex-automata/tests/gen/README.md b/vendor/regex-automata/tests/gen/README.md new file mode 100644 index 0000000..59439a1 --- /dev/null +++ b/vendor/regex-automata/tests/gen/README.md @@ -0,0 +1,65 @@ +This directory contains tests for serialized objects from the regex-automata +crate. Currently, there are only two supported such objects: dense and sparse +DFAs. + +The idea behind these tests is to commit some serialized objects and run some +basic tests by deserializing them and running searches and ensuring they are +correct. We also make sure these are run under Miri, since deserialization is +one of the biggest places where undefined behavior might occur in this crate +(at the time of writing). + +The main thing we're testing is that the *current* code can still deserialize +*old* objects correctly. Generally speaking, compatibility extends to semver +compatible releases of this crate. Beyond that, no promises are made, although +in practice callers can at least depend on errors occurring. (The serialized +format always includes a version number, and incompatible changes increment +that version number such that an error will occur if an unsupported version is +detected.) + +To generate the dense DFAs, I used this command: + +``` +$ regex-cli generate serialize dense regex \ + MULTI_PATTERN_V2 \ + tests/gen/dense/ \ + --rustfmt \ + --safe \ + --starts-for-each-pattern \ + --specialize-start-states \ + --start-kind both \ + --unicode-word-boundary \ + --minimize \ + '\b[a-zA-Z]+\b' \ + '(?m)^\S+$' \ + '(?Rm)^\S+$' +``` + +And to generate the sparse DFAs, I used this command, which is the same as +above, but with `s/dense/sparse/g`. + +``` +$ regex-cli generate serialize sparse regex \ + MULTI_PATTERN_V2 \ + tests/gen/sparse/ \ + --rustfmt \ + --safe \ + --starts-for-each-pattern \ + --specialize-start-states \ + --start-kind both \ + --unicode-word-boundary \ + --minimize \ + '\b[a-zA-Z]+\b' \ + '(?m)^\S+$' \ + '(?Rm)^\S+$' +``` + +The idea is to try to enable as many of the DFA's options as possible in order +to test that serialization works for all of them. + +Arguably we should increase test coverage here, but this is a start. Note +that in particular, this does not need to test that serialization and +deserialization correctly roundtrips on its own. Indeed, the normal regex test +suite has a test that does a serialization round trip for every test supported +by DFAs. So that has very good coverage. What we're interested in testing here +is our compatibility promise: do DFAs generated with an older revision of the +code still deserialize correctly? diff --git a/vendor/regex-automata/tests/gen/dense/mod.rs b/vendor/regex-automata/tests/gen/dense/mod.rs new file mode 100644 index 0000000..b4365d4 --- /dev/null +++ b/vendor/regex-automata/tests/gen/dense/mod.rs @@ -0,0 +1,22 @@ +use regex_automata::{Input, Match}; + +mod multi_pattern_v2; + +#[test] +fn multi_pattern_v2() { + use multi_pattern_v2::MULTI_PATTERN_V2 as RE; + + assert_eq!(Some(Match::must(0, 0..4)), RE.find("abcd")); + assert_eq!(Some(Match::must(0, 2..6)), RE.find("@ abcd @")); + assert_eq!(Some(Match::must(1, 0..6)), RE.find("@abcd@")); + assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd\n")); + assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd wxyz\n")); + assert_eq!(Some(Match::must(1, 1..7)), RE.find("\n@abcd@\n")); + assert_eq!(Some(Match::must(2, 0..6)), RE.find("@abcd@\r\n")); + assert_eq!(Some(Match::must(1, 2..8)), RE.find("\r\n@abcd@")); + assert_eq!(Some(Match::must(2, 2..8)), RE.find("\r\n@abcd@\r\n")); + + // Fails because we have heuristic support for Unicode word boundaries + // enabled. + assert!(RE.try_search(&Input::new(b"\xFF@abcd@\xFF")).is_err()); +} diff --git a/vendor/regex-automata/tests/gen/dense/multi_pattern_v2.rs b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2.rs new file mode 100644 index 0000000..a95fd20 --- /dev/null +++ b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2.rs @@ -0,0 +1,43 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// regex-cli generate serialize dense regex MULTI_PATTERN_V2 tests/gen/dense/ --rustfmt --safe --starts-for-each-pattern --specialize-start-states --start-kind both --unicode-word-boundary --minimize \b[a-zA-Z]+\b (?m)^\S+$ (?Rm)^\S+$ +// +// regex-cli 0.0.1 is available on crates.io. + +use regex_automata::{ + dfa::{dense::DFA, regex::Regex}, + util::{lazy::Lazy, wire::AlignAs}, +}; + +pub static MULTI_PATTERN_V2: Lazy>> = + Lazy::new(|| { + let dfafwd = { + static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + _align: [], + #[cfg(target_endian = "big")] + bytes: *include_bytes!("multi_pattern_v2_fwd.bigendian.dfa"), + #[cfg(target_endian = "little")] + bytes: *include_bytes!( + "multi_pattern_v2_fwd.littleendian.dfa" + ), + }; + DFA::from_bytes(&ALIGNED.bytes) + .expect("serialized forward DFA should be valid") + .0 + }; + let dfarev = { + static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + _align: [], + #[cfg(target_endian = "big")] + bytes: *include_bytes!("multi_pattern_v2_rev.bigendian.dfa"), + #[cfg(target_endian = "little")] + bytes: *include_bytes!( + "multi_pattern_v2_rev.littleendian.dfa" + ), + }; + DFA::from_bytes(&ALIGNED.bytes) + .expect("serialized reverse DFA should be valid") + .0 + }; + Regex::builder().build_from_dfas(dfafwd, dfarev) + }); diff --git a/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa new file mode 100644 index 0000000..6d6e040 Binary files /dev/null and b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.bigendian.dfa differ diff --git a/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa new file mode 100644 index 0000000..a1f4b3d Binary files /dev/null and b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_fwd.littleendian.dfa differ diff --git a/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa new file mode 100644 index 0000000..74f74ec Binary files /dev/null and b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.bigendian.dfa differ diff --git a/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa new file mode 100644 index 0000000..663bdb9 Binary files /dev/null and b/vendor/regex-automata/tests/gen/dense/multi_pattern_v2_rev.littleendian.dfa differ diff --git a/vendor/regex-automata/tests/gen/mod.rs b/vendor/regex-automata/tests/gen/mod.rs new file mode 100644 index 0000000..960cb42 --- /dev/null +++ b/vendor/regex-automata/tests/gen/mod.rs @@ -0,0 +1,2 @@ +mod dense; +mod sparse; diff --git a/vendor/regex-automata/tests/gen/sparse/mod.rs b/vendor/regex-automata/tests/gen/sparse/mod.rs new file mode 100644 index 0000000..b4365d4 --- /dev/null +++ b/vendor/regex-automata/tests/gen/sparse/mod.rs @@ -0,0 +1,22 @@ +use regex_automata::{Input, Match}; + +mod multi_pattern_v2; + +#[test] +fn multi_pattern_v2() { + use multi_pattern_v2::MULTI_PATTERN_V2 as RE; + + assert_eq!(Some(Match::must(0, 0..4)), RE.find("abcd")); + assert_eq!(Some(Match::must(0, 2..6)), RE.find("@ abcd @")); + assert_eq!(Some(Match::must(1, 0..6)), RE.find("@abcd@")); + assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd\n")); + assert_eq!(Some(Match::must(0, 1..5)), RE.find("\nabcd wxyz\n")); + assert_eq!(Some(Match::must(1, 1..7)), RE.find("\n@abcd@\n")); + assert_eq!(Some(Match::must(2, 0..6)), RE.find("@abcd@\r\n")); + assert_eq!(Some(Match::must(1, 2..8)), RE.find("\r\n@abcd@")); + assert_eq!(Some(Match::must(2, 2..8)), RE.find("\r\n@abcd@\r\n")); + + // Fails because we have heuristic support for Unicode word boundaries + // enabled. + assert!(RE.try_search(&Input::new(b"\xFF@abcd@\xFF")).is_err()); +} diff --git a/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2.rs b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2.rs new file mode 100644 index 0000000..911e3f5 --- /dev/null +++ b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2.rs @@ -0,0 +1,37 @@ +// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: +// +// regex-cli generate serialize sparse regex MULTI_PATTERN_V2 regex-automata/tests/gen/sparse/ --rustfmt --safe --starts-for-each-pattern --specialize-start-states --start-kind both --unicode-word-boundary --minimize \b[a-zA-Z]+\b (?m)^\S+$ (?Rm)^\S+$ +// +// regex-cli 0.0.1 is available on crates.io. + +use regex_automata::{ + dfa::{regex::Regex, sparse::DFA}, + util::lazy::Lazy, +}; + +pub static MULTI_PATTERN_V2: Lazy>> = + Lazy::new(|| { + let dfafwd = { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_fwd.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_fwd.littleendian.dfa"); + DFA::from_bytes(BYTES) + .expect("serialized forward DFA should be valid") + .0 + }; + let dfarev = { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_rev.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("multi_pattern_v2_rev.littleendian.dfa"); + DFA::from_bytes(BYTES) + .expect("serialized reverse DFA should be valid") + .0 + }; + Regex::builder().build_from_dfas(dfafwd, dfarev) + }); diff --git a/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa new file mode 100644 index 0000000..aa04f63 Binary files /dev/null and b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.bigendian.dfa differ diff --git a/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa new file mode 100644 index 0000000..c27d92a Binary files /dev/null and b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_fwd.littleendian.dfa differ diff --git a/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa new file mode 100644 index 0000000..89867d3 Binary files /dev/null and b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.bigendian.dfa differ diff --git a/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa new file mode 100644 index 0000000..c0ca807 Binary files /dev/null and b/vendor/regex-automata/tests/gen/sparse/multi_pattern_v2_rev.littleendian.dfa differ diff --git a/vendor/regex-automata/tests/hybrid/api.rs b/vendor/regex-automata/tests/hybrid/api.rs new file mode 100644 index 0000000..4b04c4f --- /dev/null +++ b/vendor/regex-automata/tests/hybrid/api.rs @@ -0,0 +1,171 @@ +use std::error::Error; + +use regex_automata::{ + hybrid::dfa::{OverlappingState, DFA}, + nfa::thompson, + HalfMatch, Input, MatchError, +}; + +// Tests that too many cache resets cause the lazy DFA to quit. +// +// We only test this on 64-bit because the test is gingerly crafted based on +// implementation details of cache sizes. It's not a great test because of +// that, but it does check some interesting properties around how positions are +// reported when a search "gives up." +// +// NOTE: If you change something in lazy DFA implementation that causes this +// test to fail by reporting different "gave up" positions, then it's generally +// okay to update the positions in the test below as long as you're sure your +// changes are correct. Namely, it is expected that if there are changes in the +// cache size (or changes in how big things are inside the cache), then its +// utilization may change slightly and thus impact where a search gives up. +// Precisely where a search gives up is not an API guarantee, so changing the +// offsets here is OK. +#[test] +#[cfg(target_pointer_width = "64")] +#[cfg(not(miri))] +fn too_many_cache_resets_cause_quit() -> Result<(), Box> { + // This is a carefully chosen regex. The idea is to pick one that requires + // some decent number of states (hence the bounded repetition). But we + // specifically choose to create a class with an ASCII letter and a + // non-ASCII letter so that we can check that no new states are created + // once the cache is full. Namely, if we fill up the cache on a haystack + // of 'a's, then in order to match one 'β', a new state will need to be + // created since a 'β' is encoded with multiple bytes. + // + // So we proceed by "filling" up the cache by searching a haystack of just + // 'a's. The cache won't have enough room to add enough states to find the + // match (because of the bounded repetition), which should result in it + // giving up before it finds a match. + // + // Since there's now no more room to create states, we search a haystack + // of 'β' and confirm that it gives up immediately. + let pattern = r"[aβ]{99}"; + let dfa = DFA::builder() + .configure( + // Configure it so that we have the minimum cache capacity + // possible. And that if any resets occur, the search quits. + DFA::config() + .skip_cache_capacity_check(true) + .cache_capacity(0) + .minimum_cache_clear_count(Some(0)), + ) + .thompson(thompson::NFA::config()) + .build(pattern)?; + let mut cache = dfa.create_cache(); + + let haystack = "a".repeat(101).into_bytes(); + let err = MatchError::gave_up(24); + // Notice that we make the same amount of progress in each search! That's + // because the cache is reused and already has states to handle the first + // N bytes. + assert_eq!( + Err(err.clone()), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); + assert_eq!( + Err(err.clone()), + dfa.try_search_overlapping_fwd( + &mut cache, + &Input::new(&haystack), + &mut OverlappingState::start() + ), + ); + + let haystack = "β".repeat(101).into_bytes(); + let err = MatchError::gave_up(2); + assert_eq!( + Err(err), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); + // no need to test that other find routines quit, since we did that above + + // OK, if we reset the cache, then we should be able to create more states + // and make more progress with searching for betas. + cache.reset(&dfa); + let err = MatchError::gave_up(26); + assert_eq!( + Err(err), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); + + // ... switching back to ASCII still makes progress since it just needs to + // set transitions on existing states! + let haystack = "a".repeat(101).into_bytes(); + let err = MatchError::gave_up(13); + assert_eq!( + Err(err), + dfa.try_search_fwd(&mut cache, &Input::new(&haystack)) + ); + + Ok(()) +} + +// Tests that quit bytes in the forward direction work correctly. +#[test] +fn quit_fwd() -> Result<(), Box> { + let dfa = DFA::builder() + .configure(DFA::config().quit(b'x', true)) + .build("[[:word:]]+$")?; + let mut cache = dfa.create_cache(); + + assert_eq!( + dfa.try_search_fwd(&mut cache, &Input::new("abcxyz")), + Err(MatchError::quit(b'x', 3)), + ); + assert_eq!( + dfa.try_search_overlapping_fwd( + &mut cache, + &Input::new(b"abcxyz"), + &mut OverlappingState::start() + ), + Err(MatchError::quit(b'x', 3)), + ); + + Ok(()) +} + +// Tests that quit bytes in the reverse direction work correctly. +#[test] +fn quit_rev() -> Result<(), Box> { + let dfa = DFA::builder() + .configure(DFA::config().quit(b'x', true)) + .thompson(thompson::Config::new().reverse(true)) + .build("^[[:word:]]+")?; + let mut cache = dfa.create_cache(); + + assert_eq!( + dfa.try_search_rev(&mut cache, &Input::new("abcxyz")), + Err(MatchError::quit(b'x', 3)), + ); + + Ok(()) +} + +// Tests that if we heuristically enable Unicode word boundaries but then +// instruct that a non-ASCII byte should NOT be a quit byte, then the builder +// will panic. +#[test] +#[should_panic] +fn quit_panics() { + DFA::config().unicode_word_boundary(true).quit(b'\xFF', false); +} + +// This tests an intesting case where even if the Unicode word boundary option +// is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode +// word boundaries to be enabled. +#[test] +fn unicode_word_implicitly_works() -> Result<(), Box> { + let mut config = DFA::config(); + for b in 0x80..=0xFF { + config = config.quit(b, true); + } + let dfa = DFA::builder().configure(config).build(r"\b")?; + let mut cache = dfa.create_cache(); + let expected = HalfMatch::must(0, 1); + assert_eq!( + Ok(Some(expected)), + dfa.try_search_fwd(&mut cache, &Input::new(" a")), + ); + Ok(()) +} diff --git a/vendor/regex-automata/tests/hybrid/mod.rs b/vendor/regex-automata/tests/hybrid/mod.rs new file mode 100644 index 0000000..36667d0 --- /dev/null +++ b/vendor/regex-automata/tests/hybrid/mod.rs @@ -0,0 +1,3 @@ +mod api; +#[cfg(not(miri))] +mod suite; diff --git a/vendor/regex-automata/tests/hybrid/suite.rs b/vendor/regex-automata/tests/hybrid/suite.rs new file mode 100644 index 0000000..4aaca66 --- /dev/null +++ b/vendor/regex-automata/tests/hybrid/suite.rs @@ -0,0 +1,347 @@ +use { + anyhow::Result, + regex_automata::{ + hybrid::{ + dfa::{OverlappingState, DFA}, + regex::{self, Regex}, + }, + nfa::thompson, + util::{prefilter::Prefilter, syntax}, + Anchored, Input, PatternSet, + }, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, + }, +}; + +use crate::{create_input, suite, untestify_kind}; + +const EXPANSIONS: &[&str] = &["is_match", "find", "which"]; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let builder = Regex::builder(); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA with prefilters enabled. +#[test] +fn prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); + let mut builder = Regex::builder(); + builder.dfa(DFA::config().prefilter(pre)); + compiler(builder)(test, regexes) + }; + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), my_compiler) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA with NFA shrinking enabled. +/// +/// This is *usually* not the configuration one wants for a lazy DFA. NFA +/// shrinking is mostly only advantageous when building a full DFA since it +/// can sharply decrease the amount of time determinization takes. But NFA +/// shrinking is itself otherwise fairly expensive currently. Since a lazy DFA +/// has no compilation time (other than for building the NFA of course) before +/// executing a search, it's usually worth it to forgo NFA shrinking. +/// +/// Nevertheless, we test to make sure everything is OK with NFA shrinking. As +/// a bonus, there are some tests we don't need to skip because they now fit in +/// the default cache capacity. +#[test] +fn nfa_shrink() -> Result<()> { + let mut builder = Regex::builder(); + builder.thompson(thompson::Config::new().shrink(true)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when 'starts_for_each_pattern' is enabled for all +/// tests. +#[test] +fn starts_for_each_pattern() -> Result<()> { + let mut builder = Regex::builder(); + builder.dfa(DFA::config().starts_for_each_pattern(true)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when 'specialize_start_states' is enabled. +#[test] +fn specialize_start_states() -> Result<()> { + let mut builder = Regex::builder(); + builder.dfa(DFA::config().specialize_start_states(true)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when byte classes are disabled. +/// +/// N.B. Disabling byte classes doesn't avoid any indirection at search time. +/// All it does is cause every byte value to be its own distinct equivalence +/// class. +#[test] +fn no_byte_classes() -> Result<()> { + let mut builder = Regex::builder(); + builder.dfa(DFA::config().byte_classes(false)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests that hybrid NFA/DFA never clears its cache for any test with the +/// default capacity. +/// +/// N.B. If a regex suite test is added that causes the cache to be cleared, +/// then this should just skip that test. (Which can be done by calling the +/// 'blacklist' method on 'TestRunner'.) +#[test] +fn no_cache_clearing() -> Result<()> { + let mut builder = Regex::builder(); + builder.dfa(DFA::config().minimum_cache_clear_count(Some(0))); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + // Without NFA shrinking, this test blows the default cache capacity. + .blacklist("expensive/regression-many-repeat-no-stack-overflow") + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the hybrid NFA/DFA when the minimum cache capacity is set. +#[test] +fn min_cache_capacity() -> Result<()> { + let mut builder = Regex::builder(); + builder + .dfa(DFA::config().cache_capacity(0).skip_cache_capacity_check(true)); + TestRunner::new()? + .expand(EXPANSIONS, |t| t.compiles()) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +fn compiler( + mut builder: regex::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + move |test, regexes| { + // Parse regexes as HIRs for some analysis below. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + + // Check if our regex contains things that aren't supported by DFAs. + // That is, Unicode word boundaries when searching non-ASCII text. + if !test.haystack().is_ascii() { + for hir in hirs.iter() { + if hir.properties().look_set().contains_word_unicode() { + return Ok(CompiledRegex::skip()); + } + } + } + if !configure_regex_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = builder.build_many(®exes)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + } +} + +fn run_test( + re: &Regex, + cache: &mut regex::Cache, + test: &RegexTest, +) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => { + TestResult::matched(re.is_match(cache, input.earliest(true))) + } + "find" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + let input = + input.earliest(test.search_kind() == SearchKind::Earliest); + TestResult::matches( + re.find_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ) + } + SearchKind::Overlapping => { + try_search_overlapping(re, cache, &input).unwrap() + } + }, + "which" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Leftmost => { + // There are no "which" APIs for standard searches. + TestResult::skip() + } + SearchKind::Overlapping => { + let dfa = re.forward(); + let cache = cache.as_parts_mut().0; + let mut patset = PatternSet::new(dfa.pattern_len()); + dfa.try_which_overlapping_matches(cache, &input, &mut patset) + .unwrap(); + TestResult::which(patset.iter().map(|p| p.as_usize())) + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_regex_builder( + test: &RegexTest, + builder: &mut regex::Builder, +) -> bool { + let match_kind = match untestify_kind(test.match_kind()) { + None => return false, + Some(k) => k, + }; + + let mut dfa_config = + DFA::config().match_kind(match_kind).unicode_word_boundary(true); + // When doing an overlapping search, we might try to find the start of each + // match with a custom search routine. In that case, we need to tell the + // reverse search (for the start offset) which pattern to look for. The + // only way that API works is when anchored starting states are compiled + // for each pattern. This does technically also enable it for the forward + // DFA, but we're okay with that. + if test.search_kind() == SearchKind::Overlapping { + dfa_config = dfa_config.starts_for_each_pattern(true); + } + builder + .syntax(config_syntax(test)) + .thompson(config_thompson(test)) + .dfa(dfa_config); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} + +/// Execute an overlapping search, and for each match found, also find its +/// overlapping starting positions. +/// +/// N.B. This routine used to be part of the crate API, but 1) it wasn't clear +/// to me how useful it was and 2) it wasn't clear to me what its semantics +/// should be. In particular, a potentially surprising footgun of this routine +/// that it is worst case *quadratic* in the size of the haystack. Namely, it's +/// possible to report a match at every position, and for every such position, +/// scan all the way to the beginning of the haystack to find the starting +/// position. Typical leftmost non-overlapping searches don't suffer from this +/// because, well, matches can't overlap. So subsequent searches after a match +/// is found don't revisit previously scanned parts of the haystack. +/// +/// Its semantics can be strange for other reasons too. For example, given +/// the regex '.*' and the haystack 'zz', the full set of overlapping matches +/// is: [0, 0], [1, 1], [0, 1], [2, 2], [1, 2], [0, 2]. The ordering of +/// those matches is quite strange, but makes sense when you think about the +/// implementation: an end offset is found left-to-right, and then one or more +/// starting offsets are found right-to-left. +/// +/// Nevertheless, we provide this routine in our test suite because it's +/// useful to test the low level DFA overlapping search and our test suite +/// is written in a way that requires starting offsets. +fn try_search_overlapping( + re: &Regex, + cache: &mut regex::Cache, + input: &Input<'_>, +) -> Result { + let mut matches = vec![]; + let mut fwd_state = OverlappingState::start(); + let (fwd_dfa, rev_dfa) = (re.forward(), re.reverse()); + let (fwd_cache, rev_cache) = cache.as_parts_mut(); + while let Some(end) = { + fwd_dfa.try_search_overlapping_fwd( + fwd_cache, + input, + &mut fwd_state, + )?; + fwd_state.get_match() + } { + let revsearch = input + .clone() + .range(input.start()..end.offset()) + .anchored(Anchored::Pattern(end.pattern())) + .earliest(false); + let mut rev_state = OverlappingState::start(); + while let Some(start) = { + rev_dfa.try_search_overlapping_rev( + rev_cache, + &revsearch, + &mut rev_state, + )?; + rev_state.get_match() + } { + let span = Span { start: start.offset(), end: end.offset() }; + let mat = Match { id: end.pattern().as_usize(), span }; + matches.push(mat); + } + } + Ok(TestResult::matches(matches)) +} diff --git a/vendor/regex-automata/tests/lib.rs b/vendor/regex-automata/tests/lib.rs new file mode 100644 index 0000000..67c979a --- /dev/null +++ b/vendor/regex-automata/tests/lib.rs @@ -0,0 +1,115 @@ +// We have a similar config in the regex-automata crate root. Basically, it is +// just too annoying to deal with dead code when a subset of features is +// enabled. +#![cfg_attr( + not(all( + feature = "std", + feature = "nfa", + feature = "dfa", + feature = "hybrid", + feature = "perf-literal-substring", + feature = "perf-literal-multisubstring", + )), + allow(dead_code, unused_imports, unused_variables) +)] +// Similar deal with Miri. Just let dead code warnings be. +#![cfg_attr(miri, allow(dead_code, unused_imports, unused_variables))] + +#[cfg(any(feature = "dfa-search", feature = "dfa-onepass"))] +mod dfa; +#[cfg(feature = "dfa-search")] +mod fuzz; +#[cfg(feature = "dfa-search")] +mod gen; +#[cfg(feature = "hybrid")] +mod hybrid; +#[cfg(feature = "meta")] +mod meta; +#[cfg(any(feature = "nfa-backtrack", feature = "nfa-pikevm"))] +mod nfa; + +fn suite() -> anyhow::Result { + let _ = env_logger::try_init(); + + let mut tests = regex_test::RegexTests::new(); + macro_rules! load { + ($name:expr) => {{ + const DATA: &[u8] = + include_bytes!(concat!("../../testdata/", $name, ".toml")); + tests.load_slice($name, DATA)?; + }}; + } + + load!("anchored"); + load!("bytes"); + load!("crazy"); + load!("crlf"); + load!("earliest"); + load!("empty"); + load!("expensive"); + load!("flags"); + load!("iter"); + load!("leftmost-all"); + load!("line-terminator"); + load!("misc"); + load!("multiline"); + load!("no-unicode"); + load!("overlapping"); + load!("regression"); + load!("set"); + load!("substring"); + load!("unicode"); + load!("utf8"); + load!("word-boundary"); + load!("word-boundary-special"); + load!("fowler/basic"); + load!("fowler/nullsubexpr"); + load!("fowler/repetition"); + + Ok(tests) +} + +/// Configure a regex_automata::Input with the given test configuration. +fn create_input<'h>( + test: &'h regex_test::RegexTest, +) -> regex_automata::Input<'h> { + use regex_automata::Anchored; + + let bounds = test.bounds(); + let anchored = if test.anchored() { Anchored::Yes } else { Anchored::No }; + regex_automata::Input::new(test.haystack()) + .range(bounds.start..bounds.end) + .anchored(anchored) +} + +/// Convert capture matches into the test suite's capture values. +/// +/// The given captures must represent a valid match, where the first capturing +/// group has a non-None span. Otherwise this panics. +fn testify_captures( + caps: ®ex_automata::util::captures::Captures, +) -> regex_test::Captures { + assert!(caps.is_match(), "expected captures to represent a match"); + let spans = caps.iter().map(|group| { + group.map(|m| regex_test::Span { start: m.start, end: m.end }) + }); + // These unwraps are OK because we assume our 'caps' represents a match, + // and a match always gives a non-zero number of groups with the first + // group being non-None. + regex_test::Captures::new(caps.pattern().unwrap().as_usize(), spans) + .unwrap() +} + +/// Convert a test harness match kind to a regex-automata match kind. If +/// regex-automata doesn't support the harness kind, then `None` is returned. +fn untestify_kind( + kind: regex_test::MatchKind, +) -> Option { + match kind { + regex_test::MatchKind::All => Some(regex_automata::MatchKind::All), + regex_test::MatchKind::LeftmostFirst => { + Some(regex_automata::MatchKind::LeftmostFirst) + } + regex_test::MatchKind::LeftmostLongest => None, + } +} diff --git a/vendor/regex-automata/tests/meta/mod.rs b/vendor/regex-automata/tests/meta/mod.rs new file mode 100644 index 0000000..9d6ab47 --- /dev/null +++ b/vendor/regex-automata/tests/meta/mod.rs @@ -0,0 +1,2 @@ +#[cfg(not(miri))] +mod suite; diff --git a/vendor/regex-automata/tests/meta/suite.rs b/vendor/regex-automata/tests/meta/suite.rs new file mode 100644 index 0000000..20f97b4 --- /dev/null +++ b/vendor/regex-automata/tests/meta/suite.rs @@ -0,0 +1,200 @@ +use { + anyhow::Result, + regex_automata::{ + meta::{self, Regex}, + util::syntax, + MatchKind, PatternSet, + }, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, + }, +}; + +use crate::{create_input, suite, testify_captures}; + +const BLACKLIST: &[&str] = &[ + // These 'earliest' tests are blacklisted because the meta searcher doesn't + // give the same offsets that the test expects. This is legal because the + // 'earliest' routines don't guarantee a particular match offset other + // than "the earliest the regex engine can report a match." Some regex + // engines will quit earlier than others. The backtracker, for example, + // can't really quit before finding the full leftmost-first match. Many of + // the literal searchers also don't have the ability to quit fully or it's + // otherwise not worth doing. (A literal searcher not quitting as early as + // possible usually means looking at a few more bytes. That's no biggie.) + "earliest/", +]; + +/// Tests the default configuration of the meta regex engine. +#[test] +fn default() -> Result<()> { + let builder = Regex::builder(); + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(BLACKLIST) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the default configuration minus the full DFA. +#[test] +fn no_dfa() -> Result<()> { + let mut builder = Regex::builder(); + builder.configure(Regex::config().dfa(false)); + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(BLACKLIST) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the default configuration minus the full DFA and lazy DFA. +#[test] +fn no_dfa_hybrid() -> Result<()> { + let mut builder = Regex::builder(); + builder.configure(Regex::config().dfa(false).hybrid(false)); + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(BLACKLIST) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the default configuration minus the full DFA, lazy DFA and one-pass +/// DFA. +#[test] +fn no_dfa_hybrid_onepass() -> Result<()> { + let mut builder = Regex::builder(); + builder.configure(Regex::config().dfa(false).hybrid(false).onepass(false)); + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(BLACKLIST) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +/// Tests the default configuration minus the full DFA, lazy DFA, one-pass +/// DFA and backtracker. +#[test] +fn no_dfa_hybrid_onepass_backtrack() -> Result<()> { + let mut builder = Regex::builder(); + builder.configure( + Regex::config() + .dfa(false) + .hybrid(false) + .onepass(false) + .backtrack(false), + ); + let mut runner = TestRunner::new()?; + runner + .expand(&["is_match", "find", "captures"], |test| test.compiles()) + .blacklist_iter(BLACKLIST) + .test_iter(suite()?.iter(), compiler(builder)) + .assert(); + Ok(()) +} + +fn compiler( + mut builder: meta::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + move |test, regexes| { + if !configure_meta_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = builder.build_many(®exes)?; + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, test) + })) + } +} + +fn run_test(re: &Regex, test: &RegexTest) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(input)), + "find" => match test.search_kind() { + SearchKind::Earliest => TestResult::matches( + re.find_iter(input.earliest(true)) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ), + SearchKind::Leftmost => TestResult::matches( + re.find_iter(input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ), + SearchKind::Overlapping => { + let mut patset = PatternSet::new(re.pattern_len()); + re.which_overlapping_matches(&input, &mut patset); + TestResult::which(patset.iter().map(|p| p.as_usize())) + } + }, + "captures" => match test.search_kind() { + SearchKind::Earliest => { + let it = re + .captures_iter(input.earliest(true)) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Leftmost => { + let it = re + .captures_iter(input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Overlapping => { + // There is no overlapping regex API that supports captures. + TestResult::skip() + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_meta_builder( + test: &RegexTest, + builder: &mut meta::Builder, +) -> bool { + let match_kind = match test.match_kind() { + regex_test::MatchKind::All => MatchKind::All, + regex_test::MatchKind::LeftmostFirst => MatchKind::LeftmostFirst, + regex_test::MatchKind::LeftmostLongest => return false, + }; + let meta_config = Regex::config() + .match_kind(match_kind) + .utf8_empty(test.utf8()) + .line_terminator(test.line_terminator()); + builder.configure(meta_config).syntax(config_syntax(test)); + true +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} diff --git a/vendor/regex-automata/tests/nfa/mod.rs b/vendor/regex-automata/tests/nfa/mod.rs new file mode 100644 index 0000000..3268621 --- /dev/null +++ b/vendor/regex-automata/tests/nfa/mod.rs @@ -0,0 +1 @@ +mod thompson; diff --git a/vendor/regex-automata/tests/nfa/thompson/backtrack/mod.rs b/vendor/regex-automata/tests/nfa/thompson/backtrack/mod.rs new file mode 100644 index 0000000..9d6ab47 --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/backtrack/mod.rs @@ -0,0 +1,2 @@ +#[cfg(not(miri))] +mod suite; diff --git a/vendor/regex-automata/tests/nfa/thompson/backtrack/suite.rs b/vendor/regex-automata/tests/nfa/thompson/backtrack/suite.rs new file mode 100644 index 0000000..bce0eef --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/backtrack/suite.rs @@ -0,0 +1,213 @@ +use { + anyhow::Result, + regex_automata::{ + nfa::thompson::{ + self, + backtrack::{self, BoundedBacktracker}, + NFA, + }, + util::{prefilter::Prefilter, syntax}, + Input, + }, + regex_test::{ + CompiledRegex, Match, MatchKind, RegexTest, SearchKind, Span, + TestResult, TestRunner, + }, +}; + +use crate::{create_input, suite, testify_captures}; + +/// Tests the default configuration of the bounded backtracker. +#[test] +fn default() -> Result<()> { + let builder = BoundedBacktracker::builder(); + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + // At the time of writing, every regex search in the test suite fits + // into the backtracker's default visited capacity (except for the + // blacklisted tests below). If regexes are added that blow that capacity, + // then they should be blacklisted here. A tempting alternative is to + // automatically skip them by checking the haystack length against + // BoundedBacktracker::max_haystack_len, but that could wind up hiding + // interesting failure modes. e.g., If the visited capacity is somehow + // wrong or smaller than it should be. + runner.blacklist("expensive/backtrack-blow-visited-capacity"); + runner.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests the backtracker with prefilters enabled. +#[test] +fn prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + // We can always select leftmost-first here because the backtracker + // only supports leftmost-first matching. + let pre = Prefilter::from_hirs_prefix( + regex_automata::MatchKind::LeftmostFirst, + &hirs, + ); + let mut builder = BoundedBacktracker::builder(); + builder.configure(BoundedBacktracker::config().prefilter(pre)); + compiler(builder)(test, regexes) + }; + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner.blacklist("expensive/backtrack-blow-visited-capacity"); + runner.test_iter(suite()?.iter(), my_compiler).assert(); + Ok(()) +} + +/// Tests the bounded backtracker when its visited capacity is set to its +/// minimum amount. +#[test] +fn min_visited_capacity() -> Result<()> { + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner + .test_iter(suite()?.iter(), move |test, regexes| { + let nfa = NFA::compiler() + .configure(config_thompson(test)) + .syntax(config_syntax(test)) + .build_many(®exes)?; + let mut builder = BoundedBacktracker::builder(); + if !configure_backtrack_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + // Setup the bounded backtracker so that its visited capacity is + // the absolute minimum required for the test's haystack. + builder.configure(BoundedBacktracker::config().visited_capacity( + backtrack::min_visited_capacity( + &nfa, + &Input::new(test.haystack()), + ), + )); + + let re = builder.build_from_nfa(nfa)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + }) + .assert(); + Ok(()) +} + +fn compiler( + mut builder: backtrack::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + move |test, regexes| { + if !configure_backtrack_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = builder.build_many(®exes)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + } +} + +fn run_test( + re: &BoundedBacktracker, + cache: &mut backtrack::Cache, + test: &RegexTest, +) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Overlapping => { + TestResult::skip() + } + SearchKind::Leftmost => { + let input = input.earliest(true); + TestResult::matched(re.try_is_match(cache, input).unwrap()) + } + }, + "find" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Overlapping => { + TestResult::skip() + } + SearchKind::Leftmost => TestResult::matches( + re.try_find_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|result| result.unwrap()) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }), + ), + }, + "captures" => match test.search_kind() { + SearchKind::Earliest | SearchKind::Overlapping => { + TestResult::skip() + } + SearchKind::Leftmost => TestResult::captures( + re.try_captures_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|result| result.unwrap()) + .map(|caps| testify_captures(&caps)), + ), + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_backtrack_builder( + test: &RegexTest, + builder: &mut backtrack::Builder, +) -> bool { + match (test.search_kind(), test.match_kind()) { + // For testing the standard search APIs. This is the only supported + // configuration for the backtracker. + (SearchKind::Leftmost, MatchKind::LeftmostFirst) => {} + // Overlapping APIs not supported at all for backtracker. + (SearchKind::Overlapping, _) => return false, + // Backtracking doesn't really support the notion of 'earliest'. + // Namely, backtracking already works by returning as soon as it knows + // it has found a match. It just so happens that this corresponds to + // the standard 'leftmost' formulation. + // + // The 'earliest' definition in this crate does indeed permit this + // behavior, so this is "fine," but our test suite specifically looks + // for the earliest position at which a match is known, which our + // finite automata based regex engines have no problem providing. So + // for backtracking, we just skip these tests. + (SearchKind::Earliest, _) => return false, + // For backtracking, 'all' semantics don't really make sense. + (_, MatchKind::All) => return false, + // Not supported at all in regex-automata. + (_, MatchKind::LeftmostLongest) => return false, + }; + let backtrack_config = BoundedBacktracker::config(); + builder + .configure(backtrack_config) + .syntax(config_syntax(test)) + .thompson(config_thompson(test)); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} diff --git a/vendor/regex-automata/tests/nfa/thompson/mod.rs b/vendor/regex-automata/tests/nfa/thompson/mod.rs new file mode 100644 index 0000000..b2558f7 --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/mod.rs @@ -0,0 +1,4 @@ +#[cfg(feature = "nfa-backtrack")] +mod backtrack; +#[cfg(feature = "nfa-pikevm")] +mod pikevm; diff --git a/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs b/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs new file mode 100644 index 0000000..9d6ab47 --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/pikevm/mod.rs @@ -0,0 +1,2 @@ +#[cfg(not(miri))] +mod suite; diff --git a/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs b/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs new file mode 100644 index 0000000..d32842a --- /dev/null +++ b/vendor/regex-automata/tests/nfa/thompson/pikevm/suite.rs @@ -0,0 +1,162 @@ +use { + anyhow::Result, + regex_automata::{ + nfa::thompson::{ + self, + pikevm::{self, PikeVM}, + }, + util::{prefilter::Prefilter, syntax}, + PatternSet, + }, + regex_test::{ + CompiledRegex, Match, RegexTest, SearchKind, Span, TestResult, + TestRunner, + }, +}; + +use crate::{create_input, suite, testify_captures, untestify_kind}; + +/// Tests the default configuration of the hybrid NFA/DFA. +#[test] +fn default() -> Result<()> { + let builder = PikeVM::builder(); + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner.test_iter(suite()?.iter(), compiler(builder)).assert(); + Ok(()) +} + +/// Tests the PikeVM with prefilters enabled. +#[test] +fn prefilter() -> Result<()> { + let my_compiler = |test: &RegexTest, regexes: &[String]| { + // Parse regexes as HIRs so we can get literals to build a prefilter. + let mut hirs = vec![]; + for pattern in regexes.iter() { + hirs.push(syntax::parse_with(pattern, &config_syntax(test))?); + } + let kind = match untestify_kind(test.match_kind()) { + None => return Ok(CompiledRegex::skip()), + Some(kind) => kind, + }; + let pre = Prefilter::from_hirs_prefix(kind, &hirs); + let mut builder = PikeVM::builder(); + builder.configure(PikeVM::config().prefilter(pre)); + compiler(builder)(test, regexes) + }; + let mut runner = TestRunner::new()?; + runner.expand(&["is_match", "find", "captures"], |test| test.compiles()); + runner.test_iter(suite()?.iter(), my_compiler).assert(); + Ok(()) +} + +fn compiler( + mut builder: pikevm::Builder, +) -> impl FnMut(&RegexTest, &[String]) -> Result { + move |test, regexes| { + if !configure_pikevm_builder(test, &mut builder) { + return Ok(CompiledRegex::skip()); + } + let re = builder.build_many(®exes)?; + let mut cache = re.create_cache(); + Ok(CompiledRegex::compiled(move |test| -> TestResult { + run_test(&re, &mut cache, test) + })) + } +} + +fn run_test( + re: &PikeVM, + cache: &mut pikevm::Cache, + test: &RegexTest, +) -> TestResult { + let input = create_input(test); + match test.additional_name() { + "is_match" => TestResult::matched(re.is_match(cache, input)), + "find" => match test.search_kind() { + SearchKind::Earliest => { + let it = re + .find_iter(cache, input.earliest(true)) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }); + TestResult::matches(it) + } + SearchKind::Leftmost => { + let it = re + .find_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|m| Match { + id: m.pattern().as_usize(), + span: Span { start: m.start(), end: m.end() }, + }); + TestResult::matches(it) + } + SearchKind::Overlapping => { + let mut patset = PatternSet::new(re.get_nfa().pattern_len()); + re.which_overlapping_matches(cache, &input, &mut patset); + TestResult::which(patset.iter().map(|p| p.as_usize())) + } + }, + "captures" => match test.search_kind() { + SearchKind::Earliest => { + let it = re + .captures_iter(cache, input.earliest(true)) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Leftmost => { + let it = re + .captures_iter(cache, input) + .take(test.match_limit().unwrap_or(std::usize::MAX)) + .map(|caps| testify_captures(&caps)); + TestResult::captures(it) + } + SearchKind::Overlapping => { + // There is no overlapping PikeVM API that supports captures. + TestResult::skip() + } + }, + name => TestResult::fail(&format!("unrecognized test name: {}", name)), + } +} + +/// Configures the given regex builder with all relevant settings on the given +/// regex test. +/// +/// If the regex test has a setting that is unsupported, then this returns +/// false (implying the test should be skipped). +fn configure_pikevm_builder( + test: &RegexTest, + builder: &mut pikevm::Builder, +) -> bool { + let match_kind = match untestify_kind(test.match_kind()) { + None => return false, + Some(k) => k, + }; + let pikevm_config = PikeVM::config().match_kind(match_kind); + builder + .configure(pikevm_config) + .syntax(config_syntax(test)) + .thompson(config_thompson(test)); + true +} + +/// Configuration of a Thompson NFA compiler from a regex test. +fn config_thompson(test: &RegexTest) -> thompson::Config { + let mut lookm = regex_automata::util::look::LookMatcher::new(); + lookm.set_line_terminator(test.line_terminator()); + thompson::Config::new().utf8(test.utf8()).look_matcher(lookm) +} + +/// Configuration of the regex parser from a regex test. +fn config_syntax(test: &RegexTest) -> syntax::Config { + syntax::Config::new() + .case_insensitive(test.case_insensitive()) + .unicode(test.unicode()) + .utf8(test.utf8()) + .line_terminator(test.line_terminator()) +} diff --git a/vendor/regex-syntax/.cargo-checksum.json b/vendor/regex-syntax/.cargo-checksum.json index 8d42d1c..8152441 100644 --- a/vendor/regex-syntax/.cargo-checksum.json +++ b/vendor/regex-syntax/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"Cargo.toml":"c34256ce2fab9869300f358f0df64b8be9824c6cf22d479063dcb5880f2f9149","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"1f5f6c3e0f7e452236eb13a0a8627dceb35e7bd9e18798916b74b724ba8161fe","benches/bench.rs":"d2b6ae5b939abd6093064f144b981b7739d7f474ec0698a1268052fc92406635","src/ast/mod.rs":"f18ba2a3a082fa342512feed20da3bf5fb8b6d92d2e246809732860b446f75c9","src/ast/parse.rs":"49478a4ae5b557dc46aa7071c91c7a6905a0ce62910f8c8fefce464e5779e934","src/ast/print.rs":"62d319cd0b7e6f437dc8dcaf798046a44afa03e9aeb6a384d5cffa448383af53","src/ast/visitor.rs":"a58170758852b2a59c9232f3a027a91f0603b26dd0d9acbde73ac1f575ca600b","src/debug.rs":"7a16cca02be9715fdc8c26a32279465774623cd12fab1ec59ac25a6e3047817f","src/either.rs":"1758e3edd056884eccadd995708d1e374ba9aa65846bd0e13b1aae852607c560","src/error.rs":"01a67e3407b0d0d869119363e47a94d92158834bfe5936366c2e3f6f4ed13f36","src/hir/interval.rs":"2358e74b4d4aabfa62f79df855fd5d183779b86c4e14aae4ee42d8695bb3d010","src/hir/literal.rs":"a57f77b49998f4e4be9f9e4512f9934bc61640786dd7ac07c708825ba2b83429","src/hir/mod.rs":"e98bf0989acda1fdd0fd2c7cbf2f8acdf03eec21ac8455ac8ae718ff8c2e2970","src/hir/print.rs":"1f1fb454af939a53ea2799f55b67c2a2615c47c24dbf9f48a7c2a2b402d36e1f","src/hir/translate.rs":"1fbba4c456891ead0298ab6457ac5e9649431e52e75acc85a293d2a17886ac84","src/hir/visitor.rs":"e98aab188f92a92baee7b490d8558383373398697ae97335ae2635b6a5aa45ca","src/lib.rs":"75ac5ef1e37abbad5f1a81788aade5adef9c4ab8d679f1f1e682f6aed40197f0","src/parser.rs":"cac139ed552a63ac4f81d60610cf0c2084421e34729475707681ef9392e1e9ae","src/rank.rs":"ff3d58b0cc5ffa69e2e8c56fc7d9ef41dd399d59a639a253a51551b858cb5bbd","src/unicode.rs":"9829458ef321b3bc22c21eae4b22805b33f8b5e67022928ffd9a9e0287bc7c31","src/unicode_tables/LICENSE-UNICODE":"74db5baf44a41b1000312c673544b3374e4198af5605c7f9080a402cec42cfa3","src/unicode_tables/age.rs":"2a2599a4e406fbbd0efd16aa6ce385c3f97b87c34820d6686a9f9113a5231c67","src/unicode_tables/case_folding_simple.rs":"9583803d4a10486da372b76979dbd26349b40766229467238eff972c1d78e47b","src/unicode_tables/general_category.rs":"36a93ba1cdeed96a00ff29a5ab5afd2c578a89541bf4dd8b18478146cebda0aa","src/unicode_tables/grapheme_cluster_break.rs":"39c388e9805a8391d3d3e69d74d831ce4fb99aa7e13e52c64dd2bd16d4765301","src/unicode_tables/mod.rs":"26c837099cd934c8062e24bc9a0aaecf15fe1de03f9c6da3f3e1e5ac3ca24bee","src/unicode_tables/perl_decimal.rs":"a98ea4afe71c2947023ae12bd25c46bf4c7de48eeb40979eca5c96ba62cee02e","src/unicode_tables/perl_space.rs":"ea2b3b84b4a48334082dadc6c37d9fcc9c9ded84b40e8f5c9c9314898638967e","src/unicode_tables/perl_word.rs":"6f1156bd6af32151ecffea4abe07a38fa04b1fc1b227ec1a8dac5d5f08d9d74b","src/unicode_tables/property_bool.rs":"0bd64f6e3228eaecf47824e238bdf1f8a9eef113ace6e790a57f045a8106701c","src/unicode_tables/property_names.rs":"5ca25437927eb70c62adf7d038e99a601cfb8a718677fd6de832589664d3c481","src/unicode_tables/property_values.rs":"5b4cc02392d382cf7af60455fc87b9980e97409b62a4b8d6c5843190d2e2d21d","src/unicode_tables/script.rs":"ea1d771b6d0a4b12d143f9bad2ea9342a0887878cbbe3c11262b6eabedaf2dd4","src/unicode_tables/script_extension.rs":"beeb8349703d903ff861beb8401bfd2599e457dc25df872e69d6ad1615f8b5e9","src/unicode_tables/sentence_break.rs":"2befe2a27cc4e8aecb624e310ef9f371462470dd3b2f572cec1f5873a5e30aa9","src/unicode_tables/word_break.rs":"94679177731b515f0c360eff394286a1f99b59527bdbc826cbf51d32f9666187","src/utf8.rs":"e9a13623a94295b81969c5483de17219ff74bb20768be13c527010351245acbd","test":"01d6f6e9a689fb794173288a52f40f53b4f782176d0fcd648c7c6d3a2df05c63"},"package":"a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c"} \ No newline at end of file +{"files":{"Cargo.toml":"33c96af38ed9f42d1ccbf85ecfeea1d46202943d01c595b8ee4dddef760e6bd5","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"b2484aa7e66fb92d1378e9a7ce7605af18f77cb12c179866eaf92ba28cfec1d9","benches/bench.rs":"d2b6ae5b939abd6093064f144b981b7739d7f474ec0698a1268052fc92406635","src/ast/mod.rs":"700c2f779fccb529db7b444819d53c38f916b065d3d05a74282f929af581e8b1","src/ast/parse.rs":"fcd45146eaf747d15a2a519d34754638d451ab83e88b5962841cf7a0dd32e988","src/ast/print.rs":"99cb69ece252ef31e0be177fb3364797eb30b785f936532b8dcd8106e7be0738","src/ast/visitor.rs":"f0fdf758801fe70e6b299b73ab63196e814af95ef6eccad7ef4f72075743fcf6","src/debug.rs":"7a16cca02be9715fdc8c26a32279465774623cd12fab1ec59ac25a6e3047817f","src/either.rs":"1758e3edd056884eccadd995708d1e374ba9aa65846bd0e13b1aae852607c560","src/error.rs":"01a67e3407b0d0d869119363e47a94d92158834bfe5936366c2e3f6f4ed13f36","src/hir/interval.rs":"2358e74b4d4aabfa62f79df855fd5d183779b86c4e14aae4ee42d8695bb3d010","src/hir/literal.rs":"6a8108b8919fbfd9ab93072846124c51d2998489810fcd6e7a89fdccc45833e0","src/hir/mod.rs":"eca183b8e173f486c1a11a5fa10895c96067162c8ec936871f937ca7fca5f710","src/hir/print.rs":"ad51c515c933bfd67d307ba3d7e6ac59c9c5903b4f393a9f9a4785c92b88348d","src/hir/translate.rs":"5fbff527c53f217ba2bac9b0948d7de74164625d08674b91a479ced271159ebd","src/hir/visitor.rs":"71ca9c93aa48a5ed445399659fa6455093a1bbd9ef44b66bc7095c1b08b2ec1f","src/lib.rs":"5ae457d402e49443bdb23b71353693dd3b0d263b57a6eeb9eb5b5dae5c901bdd","src/parser.rs":"6b2f4f27e3331a01a25b87c89368dd2e54396bd425dac57941f9c1ebfd238ac8","src/rank.rs":"ff3d58b0cc5ffa69e2e8c56fc7d9ef41dd399d59a639a253a51551b858cb5bbd","src/unicode.rs":"9829458ef321b3bc22c21eae4b22805b33f8b5e67022928ffd9a9e0287bc7c31","src/unicode_tables/LICENSE-UNICODE":"74db5baf44a41b1000312c673544b3374e4198af5605c7f9080a402cec42cfa3","src/unicode_tables/age.rs":"2a2599a4e406fbbd0efd16aa6ce385c3f97b87c34820d6686a9f9113a5231c67","src/unicode_tables/case_folding_simple.rs":"9583803d4a10486da372b76979dbd26349b40766229467238eff972c1d78e47b","src/unicode_tables/general_category.rs":"36a93ba1cdeed96a00ff29a5ab5afd2c578a89541bf4dd8b18478146cebda0aa","src/unicode_tables/grapheme_cluster_break.rs":"39c388e9805a8391d3d3e69d74d831ce4fb99aa7e13e52c64dd2bd16d4765301","src/unicode_tables/mod.rs":"26c837099cd934c8062e24bc9a0aaecf15fe1de03f9c6da3f3e1e5ac3ca24bee","src/unicode_tables/perl_decimal.rs":"a98ea4afe71c2947023ae12bd25c46bf4c7de48eeb40979eca5c96ba62cee02e","src/unicode_tables/perl_space.rs":"ea2b3b84b4a48334082dadc6c37d9fcc9c9ded84b40e8f5c9c9314898638967e","src/unicode_tables/perl_word.rs":"6f1156bd6af32151ecffea4abe07a38fa04b1fc1b227ec1a8dac5d5f08d9d74b","src/unicode_tables/property_bool.rs":"0bd64f6e3228eaecf47824e238bdf1f8a9eef113ace6e790a57f045a8106701c","src/unicode_tables/property_names.rs":"5ca25437927eb70c62adf7d038e99a601cfb8a718677fd6de832589664d3c481","src/unicode_tables/property_values.rs":"5b4cc02392d382cf7af60455fc87b9980e97409b62a4b8d6c5843190d2e2d21d","src/unicode_tables/script.rs":"ea1d771b6d0a4b12d143f9bad2ea9342a0887878cbbe3c11262b6eabedaf2dd4","src/unicode_tables/script_extension.rs":"beeb8349703d903ff861beb8401bfd2599e457dc25df872e69d6ad1615f8b5e9","src/unicode_tables/sentence_break.rs":"2befe2a27cc4e8aecb624e310ef9f371462470dd3b2f572cec1f5873a5e30aa9","src/unicode_tables/word_break.rs":"94679177731b515f0c360eff394286a1f99b59527bdbc826cbf51d32f9666187","src/utf8.rs":"e9a13623a94295b81969c5483de17219ff74bb20768be13c527010351245acbd","test":"c7de5fbc0010d9b5b758cd49956375a64b88601c068167fd366808950257f108"},"package":"c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"} \ No newline at end of file diff --git a/vendor/regex-syntax/Cargo.toml b/vendor/regex-syntax/Cargo.toml index 55c35ac..3602ab3 100644 --- a/vendor/regex-syntax/Cargo.toml +++ b/vendor/regex-syntax/Cargo.toml @@ -11,16 +11,18 @@ [package] edition = "2021" -rust-version = "1.60.0" +rust-version = "1.65" name = "regex-syntax" -version = "0.7.1" -authors = ["The Rust Project Developers"] +version = "0.8.2" +authors = [ + "The Rust Project Developers", + "Andrew Gallant ", +] description = "A regular expression parser." -homepage = "https://github.com/rust-lang/regex" documentation = "https://docs.rs/regex-syntax" readme = "README.md" license = "MIT OR Apache-2.0" -repository = "https://github.com/rust-lang/regex" +repository = "https://github.com/rust-lang/regex/tree/master/regex-syntax" [package.metadata.docs.rs] all-features = true @@ -29,7 +31,13 @@ rustdoc-args = [ "docsrs", ] +[dependencies.arbitrary] +version = "1.3.0" +features = ["derive"] +optional = true + [features] +arbitrary = ["dep:arbitrary"] default = [ "std", "unicode", diff --git a/vendor/regex-syntax/README.md b/vendor/regex-syntax/README.md index ff4fe09..529513b 100644 --- a/vendor/regex-syntax/README.md +++ b/vendor/regex-syntax/README.md @@ -4,7 +4,6 @@ This crate provides a robust regular expression parser. [![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions) [![Crates.io](https://img.shields.io/crates/v/regex-syntax.svg)](https://crates.io/crates/regex-syntax) -[![Rust](https://img.shields.io/badge/rust-1.28.0%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex) ### Documentation diff --git a/vendor/regex-syntax/src/ast/mod.rs b/vendor/regex-syntax/src/ast/mod.rs index a95b1c8..6a77ee1 100644 --- a/vendor/regex-syntax/src/ast/mod.rs +++ b/vendor/regex-syntax/src/ast/mod.rs @@ -20,6 +20,7 @@ mod visitor; /// valid Unicode property name. That particular error is reported when /// translating an AST to the high-level intermediate representation (`HIR`). #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Error { /// The kind of error. kind: ErrorKind, @@ -70,6 +71,7 @@ impl Error { /// new variant is not considered a breaking change. #[non_exhaustive] #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum ErrorKind { /// The capturing group limit was exceeded. /// @@ -160,6 +162,18 @@ pub enum ErrorKind { /// `(?i)*`. It is, however, possible to create a repetition operating on /// an empty sub-expression. For example, `()*` is still considered valid. RepetitionMissing, + /// The special word boundary syntax, `\b{something}`, was used, but + /// either EOF without `}` was seen, or an invalid character in the + /// braces was seen. + SpecialWordBoundaryUnclosed, + /// The special word boundary syntax, `\b{something}`, was used, but + /// `something` was not recognized as a valid word boundary kind. + SpecialWordBoundaryUnrecognized, + /// The syntax `\b{` was observed, but afterwards the end of the pattern + /// was observed without being able to tell whether it was meant to be a + /// bounded repetition on the `\b` or the beginning of a special word + /// boundary assertion. + SpecialWordOrRepetitionUnexpectedEof, /// The Unicode class is not valid. This typically occurs when a `\p` is /// followed by something other than a `{`. UnicodeClassInvalid, @@ -258,6 +272,29 @@ impl core::fmt::Display for ErrorKind { RepetitionMissing => { write!(f, "repetition operator missing expression") } + SpecialWordBoundaryUnclosed => { + write!( + f, + "special word boundary assertion is either \ + unclosed or contains an invalid character", + ) + } + SpecialWordBoundaryUnrecognized => { + write!( + f, + "unrecognized special word boundary assertion, \ + valid choices are: start, end, start-half \ + or end-half", + ) + } + SpecialWordOrRepetitionUnexpectedEof => { + write!( + f, + "found either the beginning of a special word \ + boundary or a bounded repetition on a \\b with \ + an opening brace, but no closing brace", + ) + } UnicodeClassInvalid => { write!(f, "invalid Unicode character class") } @@ -278,6 +315,7 @@ impl core::fmt::Display for ErrorKind { /// All span positions are absolute byte offsets that can be used on the /// original regular expression that was parsed. #[derive(Clone, Copy, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Span { /// The start byte offset. pub start: Position, @@ -308,6 +346,7 @@ impl PartialOrd for Span { /// A position encodes one half of a span, and include the byte offset, line /// number and column number. #[derive(Clone, Copy, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Position { /// The absolute offset of this position, starting at `0` from the /// beginning of the regular expression pattern string. @@ -396,6 +435,7 @@ impl Position { /// comment contains a span of precisely where it occurred in the original /// regular expression. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct WithComments { /// The actual ast. pub ast: Ast, @@ -408,6 +448,7 @@ pub struct WithComments { /// A regular expression can only contain comments when the `x` flag is /// enabled. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Comment { /// The span of this comment, including the beginning `#` and ending `\n`. pub span: Span, @@ -424,31 +465,97 @@ pub struct Comment { /// This type defines its own destructor that uses constant stack space and /// heap space proportional to the size of the `Ast`. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum Ast { /// An empty regex that matches everything. - Empty(Span), + Empty(Box), /// A set of flags, e.g., `(?is)`. - Flags(SetFlags), + Flags(Box), /// A single character literal, which includes escape sequences. - Literal(Literal), + Literal(Box), /// The "any character" class. - Dot(Span), + Dot(Box), /// A single zero-width assertion. - Assertion(Assertion), - /// A single character class. This includes all forms of character classes - /// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`. - Class(Class), + Assertion(Box), + /// A single Unicode character class, e.g., `\pL` or `\p{Greek}`. + ClassUnicode(Box), + /// A single perl character class, e.g., `\d` or `\W`. + ClassPerl(Box), + /// A single bracketed character class set, which may contain zero or more + /// character ranges and/or zero or more nested classes. e.g., + /// `[a-zA-Z\pL]`. + ClassBracketed(Box), /// A repetition operator applied to an arbitrary regular expression. - Repetition(Repetition), + Repetition(Box), /// A grouped regular expression. - Group(Group), + Group(Box), /// An alternation of regular expressions. - Alternation(Alternation), + Alternation(Box), /// A concatenation of regular expressions. - Concat(Concat), + Concat(Box), } impl Ast { + /// Create an "empty" AST item. + pub fn empty(span: Span) -> Ast { + Ast::Empty(Box::new(span)) + } + + /// Create a "flags" AST item. + pub fn flags(e: SetFlags) -> Ast { + Ast::Flags(Box::new(e)) + } + + /// Create a "literal" AST item. + pub fn literal(e: Literal) -> Ast { + Ast::Literal(Box::new(e)) + } + + /// Create a "dot" AST item. + pub fn dot(span: Span) -> Ast { + Ast::Dot(Box::new(span)) + } + + /// Create a "assertion" AST item. + pub fn assertion(e: Assertion) -> Ast { + Ast::Assertion(Box::new(e)) + } + + /// Create a "Unicode class" AST item. + pub fn class_unicode(e: ClassUnicode) -> Ast { + Ast::ClassUnicode(Box::new(e)) + } + + /// Create a "Perl class" AST item. + pub fn class_perl(e: ClassPerl) -> Ast { + Ast::ClassPerl(Box::new(e)) + } + + /// Create a "bracketed class" AST item. + pub fn class_bracketed(e: ClassBracketed) -> Ast { + Ast::ClassBracketed(Box::new(e)) + } + + /// Create a "repetition" AST item. + pub fn repetition(e: Repetition) -> Ast { + Ast::Repetition(Box::new(e)) + } + + /// Create a "group" AST item. + pub fn group(e: Group) -> Ast { + Ast::Group(Box::new(e)) + } + + /// Create a "alternation" AST item. + pub fn alternation(e: Alternation) -> Ast { + Ast::Alternation(Box::new(e)) + } + + /// Create a "concat" AST item. + pub fn concat(e: Concat) -> Ast { + Ast::Concat(Box::new(e)) + } + /// Return the span of this abstract syntax tree. pub fn span(&self) -> &Span { match *self { @@ -457,7 +564,9 @@ impl Ast { Ast::Literal(ref x) => &x.span, Ast::Dot(ref span) => span, Ast::Assertion(ref x) => &x.span, - Ast::Class(ref x) => x.span(), + Ast::ClassUnicode(ref x) => &x.span, + Ast::ClassPerl(ref x) => &x.span, + Ast::ClassBracketed(ref x) => &x.span, Ast::Repetition(ref x) => &x.span, Ast::Group(ref x) => &x.span, Ast::Alternation(ref x) => &x.span, @@ -481,8 +590,10 @@ impl Ast { | Ast::Flags(_) | Ast::Literal(_) | Ast::Dot(_) - | Ast::Assertion(_) => false, - Ast::Class(_) + | Ast::Assertion(_) + | Ast::ClassUnicode(_) + | Ast::ClassPerl(_) => false, + Ast::ClassBracketed(_) | Ast::Repetition(_) | Ast::Group(_) | Ast::Alternation(_) @@ -508,6 +619,7 @@ impl core::fmt::Display for Ast { /// An alternation of regular expressions. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Alternation { /// The span of this alternation. pub span: Span, @@ -518,20 +630,21 @@ pub struct Alternation { impl Alternation { /// Return this alternation as an AST. /// - /// If this alternation contains zero ASTs, then Ast::Empty is - /// returned. If this alternation contains exactly 1 AST, then the - /// corresponding AST is returned. Otherwise, Ast::Alternation is returned. + /// If this alternation contains zero ASTs, then `Ast::empty` is returned. + /// If this alternation contains exactly 1 AST, then the corresponding AST + /// is returned. Otherwise, `Ast::alternation` is returned. pub fn into_ast(mut self) -> Ast { match self.asts.len() { - 0 => Ast::Empty(self.span), + 0 => Ast::empty(self.span), 1 => self.asts.pop().unwrap(), - _ => Ast::Alternation(self), + _ => Ast::alternation(self), } } } /// A concatenation of regular expressions. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Concat { /// The span of this concatenation. pub span: Span, @@ -542,14 +655,14 @@ pub struct Concat { impl Concat { /// Return this concatenation as an AST. /// - /// If this concatenation contains zero ASTs, then Ast::Empty is - /// returned. If this concatenation contains exactly 1 AST, then the - /// corresponding AST is returned. Otherwise, Ast::Concat is returned. + /// If this alternation contains zero ASTs, then `Ast::empty` is returned. + /// If this alternation contains exactly 1 AST, then the corresponding AST + /// is returned. Otherwise, `Ast::concat` is returned. pub fn into_ast(mut self) -> Ast { match self.asts.len() { - 0 => Ast::Empty(self.span), + 0 => Ast::empty(self.span), 1 => self.asts.pop().unwrap(), - _ => Ast::Concat(self), + _ => Ast::concat(self), } } } @@ -560,6 +673,7 @@ impl Concat { /// represented in their literal form, e.g., `a` or in their escaped form, /// e.g., `\x61`. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Literal { /// The span of this literal. pub span: Span, @@ -584,6 +698,7 @@ impl Literal { /// The kind of a single literal expression. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum LiteralKind { /// The literal is written verbatim, e.g., `a` or `☃`. Verbatim, @@ -613,6 +728,7 @@ pub enum LiteralKind { /// A special literal is a special escape sequence recognized by the regex /// parser, e.g., `\f` or `\n`. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum SpecialLiteralKind { /// Bell, spelled `\a` (`\x07`). Bell, @@ -637,6 +753,7 @@ pub enum SpecialLiteralKind { /// differ when used without brackets in the number of hex digits that must /// follow. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum HexLiteralKind { /// A `\x` prefix. When used without brackets, this form is limited to /// two digits. @@ -662,32 +779,9 @@ impl HexLiteralKind { } } -/// A single character class expression. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Class { - /// A Unicode character class, e.g., `\pL` or `\p{Greek}`. - Unicode(ClassUnicode), - /// A perl character class, e.g., `\d` or `\W`. - Perl(ClassPerl), - /// A bracketed character class set, which may contain zero or more - /// character ranges and/or zero or more nested classes. e.g., - /// `[a-zA-Z\pL]`. - Bracketed(ClassBracketed), -} - -impl Class { - /// Return the span of this character class. - pub fn span(&self) -> &Span { - match *self { - Class::Perl(ref x) => &x.span, - Class::Unicode(ref x) => &x.span, - Class::Bracketed(ref x) => &x.span, - } - } -} - /// A Perl character class. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct ClassPerl { /// The span of this class. pub span: Span, @@ -700,6 +794,7 @@ pub struct ClassPerl { /// The available Perl character classes. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum ClassPerlKind { /// Decimal numbers. Digit, @@ -711,6 +806,7 @@ pub enum ClassPerlKind { /// An ASCII character class. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct ClassAscii { /// The span of this class. pub span: Span, @@ -723,6 +819,7 @@ pub struct ClassAscii { /// The available ASCII character classes. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum ClassAsciiKind { /// `[0-9A-Za-z]` Alnum, @@ -786,6 +883,7 @@ impl ClassAsciiKind { /// A Unicode character class. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct ClassUnicode { /// The span of this class. pub span: Span, @@ -838,8 +936,156 @@ pub enum ClassUnicodeKind { }, } +#[cfg(feature = "arbitrary")] +impl arbitrary::Arbitrary<'_> for ClassUnicodeKind { + fn arbitrary( + u: &mut arbitrary::Unstructured, + ) -> arbitrary::Result { + #[cfg(any( + feature = "unicode-age", + feature = "unicode-bool", + feature = "unicode-gencat", + feature = "unicode-perl", + feature = "unicode-script", + feature = "unicode-segment", + ))] + { + use alloc::string::ToString; + + use super::unicode_tables::{ + property_names::PROPERTY_NAMES, + property_values::PROPERTY_VALUES, + }; + + match u.choose_index(3)? { + 0 => { + let all = PROPERTY_VALUES + .iter() + .flat_map(|e| e.1.iter()) + .filter(|(name, _)| name.len() == 1) + .count(); + let idx = u.choose_index(all)?; + let value = PROPERTY_VALUES + .iter() + .flat_map(|e| e.1.iter()) + .take(idx + 1) + .last() + .unwrap() + .0 + .chars() + .next() + .unwrap(); + Ok(ClassUnicodeKind::OneLetter(value)) + } + 1 => { + let all = PROPERTY_VALUES + .iter() + .map(|e| e.1.len()) + .sum::() + + PROPERTY_NAMES.len(); + let idx = u.choose_index(all)?; + let name = PROPERTY_VALUES + .iter() + .flat_map(|e| e.1.iter()) + .chain(PROPERTY_NAMES) + .map(|(_, e)| e) + .take(idx + 1) + .last() + .unwrap(); + Ok(ClassUnicodeKind::Named(name.to_string())) + } + 2 => { + let all = PROPERTY_VALUES + .iter() + .map(|e| e.1.len()) + .sum::(); + let idx = u.choose_index(all)?; + let (prop, value) = PROPERTY_VALUES + .iter() + .flat_map(|e| { + e.1.iter().map(|(_, value)| (e.0, value)) + }) + .take(idx + 1) + .last() + .unwrap(); + Ok(ClassUnicodeKind::NamedValue { + op: u.arbitrary()?, + name: prop.to_string(), + value: value.to_string(), + }) + } + _ => unreachable!("index chosen is impossible"), + } + } + #[cfg(not(any( + feature = "unicode-age", + feature = "unicode-bool", + feature = "unicode-gencat", + feature = "unicode-perl", + feature = "unicode-script", + feature = "unicode-segment", + )))] + { + match u.choose_index(3)? { + 0 => Ok(ClassUnicodeKind::OneLetter(u.arbitrary()?)), + 1 => Ok(ClassUnicodeKind::Named(u.arbitrary()?)), + 2 => Ok(ClassUnicodeKind::NamedValue { + op: u.arbitrary()?, + name: u.arbitrary()?, + value: u.arbitrary()?, + }), + _ => unreachable!("index chosen is impossible"), + } + } + } + + fn size_hint(depth: usize) -> (usize, Option) { + #[cfg(any( + feature = "unicode-age", + feature = "unicode-bool", + feature = "unicode-gencat", + feature = "unicode-perl", + feature = "unicode-script", + feature = "unicode-segment", + ))] + { + arbitrary::size_hint::and_all(&[ + usize::size_hint(depth), + usize::size_hint(depth), + arbitrary::size_hint::or( + (0, Some(0)), + ClassUnicodeOpKind::size_hint(depth), + ), + ]) + } + #[cfg(not(any( + feature = "unicode-age", + feature = "unicode-bool", + feature = "unicode-gencat", + feature = "unicode-perl", + feature = "unicode-script", + feature = "unicode-segment", + )))] + { + arbitrary::size_hint::and( + usize::size_hint(depth), + arbitrary::size_hint::or_all(&[ + char::size_hint(depth), + String::size_hint(depth), + arbitrary::size_hint::and_all(&[ + String::size_hint(depth), + String::size_hint(depth), + ClassUnicodeOpKind::size_hint(depth), + ]), + ]), + ) + } + } +} + /// The type of op used in a Unicode character class. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum ClassUnicodeOpKind { /// A property set to a specific value, e.g., `\p{scx=Katakana}`. Equal, @@ -862,6 +1108,7 @@ impl ClassUnicodeOpKind { /// A bracketed character class, e.g., `[a-z0-9]`. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct ClassBracketed { /// The span of this class. pub span: Span, @@ -880,6 +1127,7 @@ pub struct ClassBracketed { /// items (literals, ranges, other bracketed classes) or a tree of binary set /// operations. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum ClassSet { /// An item, which can be a single literal, range, nested character class /// or a union of items. @@ -913,6 +1161,7 @@ impl ClassSet { /// A single component of a character class set. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum ClassSetItem { /// An empty item. /// @@ -956,6 +1205,7 @@ impl ClassSetItem { /// A single character class range in a set. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct ClassSetRange { /// The span of this range. pub span: Span, @@ -977,6 +1227,7 @@ impl ClassSetRange { /// A union of items inside a character class set. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct ClassSetUnion { /// The span of the items in this operation. e.g., the `a-z0-9` in /// `[^a-z0-9]` @@ -1021,6 +1272,7 @@ impl ClassSetUnion { /// A Unicode character class set operation. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct ClassSetBinaryOp { /// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`. pub span: Span, @@ -1038,6 +1290,7 @@ pub struct ClassSetBinaryOp { /// explicit union operator. Concatenation inside a character class corresponds /// to the union operation. #[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum ClassSetBinaryOpKind { /// The intersection of two sets, e.g., `\pN&&[a-z]`. Intersection, @@ -1051,6 +1304,7 @@ pub enum ClassSetBinaryOpKind { /// A single zero-width assertion. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Assertion { /// The span of this assertion. pub span: Span, @@ -1060,6 +1314,7 @@ pub struct Assertion { /// An assertion kind. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum AssertionKind { /// `^` StartLine, @@ -1073,10 +1328,23 @@ pub enum AssertionKind { WordBoundary, /// `\B` NotWordBoundary, + /// `\b{start}` + WordBoundaryStart, + /// `\b{end}` + WordBoundaryEnd, + /// `\<` (alias for `\b{start}`) + WordBoundaryStartAngle, + /// `\>` (alias for `\b{end}`) + WordBoundaryEndAngle, + /// `\b{start-half}` + WordBoundaryStartHalf, + /// `\b{end-half}` + WordBoundaryEndHalf, } /// A repetition operation applied to a regular expression. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Repetition { /// The span of this operation. pub span: Span, @@ -1090,6 +1358,7 @@ pub struct Repetition { /// The repetition operator itself. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct RepetitionOp { /// The span of this operator. This includes things like `+`, `*?` and /// `{m,n}`. @@ -1100,6 +1369,7 @@ pub struct RepetitionOp { /// The kind of a repetition operator. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum RepetitionKind { /// `?` ZeroOrOne, @@ -1113,6 +1383,7 @@ pub enum RepetitionKind { /// A range repetition operator. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum RepetitionRange { /// `{m}` Exactly(u32), @@ -1142,6 +1413,7 @@ impl RepetitionRange { /// contains a sub-expression, e.g., `(a)`, `(?Pa)`, `(?:a)` and /// `(?is:a)`. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Group { /// The span of this group. pub span: Span, @@ -1183,6 +1455,7 @@ impl Group { /// The kind of a group. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum GroupKind { /// `(a)` CaptureIndex(u32), @@ -1211,8 +1484,38 @@ pub struct CaptureName { pub index: u32, } +#[cfg(feature = "arbitrary")] +impl arbitrary::Arbitrary<'_> for CaptureName { + fn arbitrary( + u: &mut arbitrary::Unstructured, + ) -> arbitrary::Result { + let len = u.arbitrary_len::()?; + if len == 0 { + return Err(arbitrary::Error::NotEnoughData); + } + let mut name: String = String::new(); + for _ in 0..len { + let ch: char = u.arbitrary()?; + let cp = u32::from(ch); + let ascii_letter_offset = u8::try_from(cp % 26).unwrap(); + let ascii_letter = b'a' + ascii_letter_offset; + name.push(char::from(ascii_letter)); + } + Ok(CaptureName { span: u.arbitrary()?, name, index: u.arbitrary()? }) + } + + fn size_hint(depth: usize) -> (usize, Option) { + arbitrary::size_hint::and_all(&[ + Span::size_hint(depth), + usize::size_hint(depth), + u32::size_hint(depth), + ]) + } +} + /// A group of flags that is not applied to a particular regular expression. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct SetFlags { /// The span of these flags, including the grouping parentheses. pub span: Span, @@ -1224,6 +1527,7 @@ pub struct SetFlags { /// /// This corresponds only to the sequence of flags themselves, e.g., `is-u`. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct Flags { /// The span of this group of flags. pub span: Span, @@ -1276,6 +1580,7 @@ impl Flags { /// A single item in a group of flags. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct FlagsItem { /// The span of this item. pub span: Span, @@ -1285,6 +1590,7 @@ pub struct FlagsItem { /// The kind of an item in a group of flags. #[derive(Clone, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum FlagsItemKind { /// A negation operator applied to all subsequent flags in the enclosing /// group. @@ -1305,6 +1611,7 @@ impl FlagsItemKind { /// A single flag. #[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum Flag { /// `i` CaseInsensitive, @@ -1334,8 +1641,10 @@ impl Drop for Ast { | Ast::Literal(_) | Ast::Dot(_) | Ast::Assertion(_) - // Classes are recursive, so they get their own Drop impl. - | Ast::Class(_) => return, + | Ast::ClassUnicode(_) + | Ast::ClassPerl(_) + // Bracketed classes are recursive, they get their own Drop impl. + | Ast::ClassBracketed(_) => return, Ast::Repetition(ref x) if !x.ast.has_subexprs() => return, Ast::Group(ref x) if !x.ast.has_subexprs() => return, Ast::Alternation(ref x) if x.asts.is_empty() => return, @@ -1344,7 +1653,7 @@ impl Drop for Ast { } let empty_span = || Span::splat(Position::new(0, 0, 0)); - let empty_ast = || Ast::Empty(empty_span()); + let empty_ast = || Ast::empty(empty_span()); let mut stack = vec![mem::replace(self, empty_ast())]; while let Some(mut ast) = stack.pop() { match ast { @@ -1353,8 +1662,11 @@ impl Drop for Ast { | Ast::Literal(_) | Ast::Dot(_) | Ast::Assertion(_) - // Classes are recursive, so they get their own Drop impl. - | Ast::Class(_) => {} + | Ast::ClassUnicode(_) + | Ast::ClassPerl(_) + // Bracketed classes are recursive, so they get their own Drop + // impl. + | Ast::ClassBracketed(_) => {} Ast::Repetition(ref mut x) => { stack.push(mem::replace(&mut x.ast, empty_ast())); } @@ -1447,9 +1759,9 @@ mod tests { let run = || { let span = || Span::splat(Position::new(0, 0, 0)); - let mut ast = Ast::Empty(span()); + let mut ast = Ast::empty(span()); for i in 0..200 { - ast = Ast::Group(Group { + ast = Ast::group(Group { span: span(), kind: GroupKind::CaptureIndex(i), ast: Box::new(ast), @@ -1478,4 +1790,20 @@ mod tests { .join() .unwrap(); } + + // This tests that our `Ast` has a reasonable size. This isn't a hard rule + // and it can be increased if given a good enough reason. But this test + // exists because the size of `Ast` was at one point over 200 bytes on a + // 64-bit target. Wow. + #[test] + fn ast_size() { + let max = 2 * core::mem::size_of::(); + let size = core::mem::size_of::(); + assert!( + size <= max, + "Ast size of {} bytes is bigger than suggested max {}", + size, + max + ); + } } diff --git a/vendor/regex-syntax/src/ast/parse.rs b/vendor/regex-syntax/src/ast/parse.rs index 9cf64e9..593b14f 100644 --- a/vendor/regex-syntax/src/ast/parse.rs +++ b/vendor/regex-syntax/src/ast/parse.rs @@ -53,11 +53,11 @@ impl Primitive { /// Convert this primitive into a proper AST. fn into_ast(self) -> Ast { match self { - Primitive::Literal(lit) => Ast::Literal(lit), - Primitive::Assertion(assert) => Ast::Assertion(assert), - Primitive::Dot(span) => Ast::Dot(span), - Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)), - Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)), + Primitive::Literal(lit) => Ast::literal(lit), + Primitive::Assertion(assert) => Ast::assertion(assert), + Primitive::Dot(span) => Ast::dot(span), + Primitive::Perl(cls) => Ast::class_perl(cls), + Primitive::Unicode(cls) => Ast::class_unicode(cls), } } @@ -383,7 +383,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { /// Return a reference to the pattern being parsed. fn pattern(&self) -> &str { - self.pattern.borrow() + self.pattern } /// Create a new error with the given span and error type. @@ -691,7 +691,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { self.parser().ignore_whitespace.set(v); } - concat.asts.push(Ast::Flags(set)); + concat.asts.push(Ast::flags(set)); Ok(concat) } Either::Right(group) => { @@ -764,7 +764,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { group.ast = Box::new(group_concat.into_ast()); } } - prior_concat.asts.push(Ast::Group(group)); + prior_concat.asts.push(Ast::group(group)); Ok(prior_concat) } @@ -783,7 +783,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { Some(GroupState::Alternation(mut alt)) => { alt.span.end = self.pos(); alt.asts.push(concat.into_ast()); - Ok(Ast::Alternation(alt)) + Ok(Ast::alternation(alt)) } Some(GroupState::Group { group, .. }) => { return Err( @@ -850,7 +850,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { fn pop_class( &self, nested_union: ast::ClassSetUnion, - ) -> Result> { + ) -> Result> { assert_eq!(self.char(), ']'); let item = ast::ClassSet::Item(nested_union.into_item()); @@ -882,7 +882,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { set.span.end = self.pos(); set.kind = prevset; if stack.is_empty() { - Ok(Either::Right(ast::Class::Bracketed(set))) + Ok(Either::Right(set)) } else { union.push(ast::ClassSetItem::Bracketed(Box::new(set))); Ok(Either::Left(union)) @@ -976,7 +976,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { '|' => concat = self.push_alternate(concat)?, '[' => { let class = self.parse_set_class()?; - concat.asts.push(Ast::Class(class)); + concat.asts.push(Ast::class_bracketed(class)); } '?' => { concat = self.parse_uncounted_repetition( @@ -1057,7 +1057,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { greedy = false; self.bump(); } - concat.asts.push(Ast::Repetition(ast::Repetition { + concat.asts.push(Ast::repetition(ast::Repetition { span: ast.span().with_end(self.pos()), op: ast::RepetitionOp { span: Span::new(op_start, self.pos()), @@ -1159,7 +1159,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { self.error(op_span, ast::ErrorKind::RepetitionCountInvalid) ); } - concat.asts.push(Ast::Repetition(ast::Repetition { + concat.asts.push(Ast::repetition(ast::Repetition { span: ast.span().with_end(self.pos()), op: ast::RepetitionOp { span: op_span, @@ -1212,7 +1212,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { Ok(Either::Right(ast::Group { span: open_span, kind: ast::GroupKind::CaptureName { starts_with_p, name }, - ast: Box::new(Ast::Empty(self.span())), + ast: Box::new(Ast::empty(self.span())), })) } else if self.bump_if("?") { if self.is_eof() { @@ -1241,7 +1241,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { Ok(Either::Right(ast::Group { span: open_span, kind: ast::GroupKind::NonCapturing(flags), - ast: Box::new(Ast::Empty(self.span())), + ast: Box::new(Ast::empty(self.span())), })) } } else { @@ -1249,7 +1249,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { Ok(Either::Right(ast::Group { span: open_span, kind: ast::GroupKind::CaptureIndex(capture_index), - ast: Box::new(Ast::Empty(self.span())), + ast: Box::new(Ast::empty(self.span())), })) } } @@ -1528,18 +1528,115 @@ impl<'s, P: Borrow> ParserI<'s, P> { span, kind: ast::AssertionKind::EndText, })), - 'b' => Ok(Primitive::Assertion(ast::Assertion { - span, - kind: ast::AssertionKind::WordBoundary, - })), + 'b' => { + let mut wb = ast::Assertion { + span, + kind: ast::AssertionKind::WordBoundary, + }; + // After a \b, we "try" to parse things like \b{start} for + // special word boundary assertions. + if !self.is_eof() && self.char() == '{' { + if let Some(kind) = + self.maybe_parse_special_word_boundary(start)? + { + wb.kind = kind; + wb.span.end = self.pos(); + } + } + Ok(Primitive::Assertion(wb)) + } 'B' => Ok(Primitive::Assertion(ast::Assertion { span, kind: ast::AssertionKind::NotWordBoundary, })), + '<' => Ok(Primitive::Assertion(ast::Assertion { + span, + kind: ast::AssertionKind::WordBoundaryStartAngle, + })), + '>' => Ok(Primitive::Assertion(ast::Assertion { + span, + kind: ast::AssertionKind::WordBoundaryEndAngle, + })), _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)), } } + /// Attempt to parse a specialty word boundary. That is, `\b{start}`, + /// `\b{end}`, `\b{start-half}` or `\b{end-half}`. + /// + /// This is similar to `maybe_parse_ascii_class` in that, in most cases, + /// if it fails it will just return `None` with no error. This is done + /// because `\b{5}` is a valid expression and we want to let that be parsed + /// by the existing counted repetition parsing code. (I thought about just + /// invoking the counted repetition code from here, but it seemed a little + /// ham-fisted.) + /// + /// Unlike `maybe_parse_ascii_class` though, this can return an error. + /// Namely, if we definitely know it isn't a counted repetition, then we + /// return an error specific to the specialty word boundaries. + /// + /// This assumes the parser is positioned at a `{` immediately following + /// a `\b`. When `None` is returned, the parser is returned to the position + /// at which it started: pointing at a `{`. + /// + /// The position given should correspond to the start of the `\b`. + fn maybe_parse_special_word_boundary( + &self, + wb_start: Position, + ) -> Result> { + assert_eq!(self.char(), '{'); + + let is_valid_char = |c| match c { + 'A'..='Z' | 'a'..='z' | '-' => true, + _ => false, + }; + let start = self.pos(); + if !self.bump_and_bump_space() { + return Err(self.error( + Span::new(wb_start, self.pos()), + ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof, + )); + } + let start_contents = self.pos(); + // This is one of the critical bits: if the first non-whitespace + // character isn't in [-A-Za-z] (i.e., this can't be a special word + // boundary), then we bail and let the counted repetition parser deal + // with this. + if !is_valid_char(self.char()) { + self.parser().pos.set(start); + return Ok(None); + } + + // Now collect up our chars until we see a '}'. + let mut scratch = self.parser().scratch.borrow_mut(); + scratch.clear(); + while !self.is_eof() && is_valid_char(self.char()) { + scratch.push(self.char()); + self.bump_and_bump_space(); + } + if self.is_eof() || self.char() != '}' { + return Err(self.error( + Span::new(start, self.pos()), + ast::ErrorKind::SpecialWordBoundaryUnclosed, + )); + } + let end = self.pos(); + self.bump(); + let kind = match scratch.as_str() { + "start" => ast::AssertionKind::WordBoundaryStart, + "end" => ast::AssertionKind::WordBoundaryEnd, + "start-half" => ast::AssertionKind::WordBoundaryStartHalf, + "end-half" => ast::AssertionKind::WordBoundaryEndHalf, + _ => { + return Err(self.error( + Span::new(start_contents, end), + ast::ErrorKind::SpecialWordBoundaryUnrecognized, + )) + } + }; + Ok(Some(kind)) + } + /// Parse an octal representation of a Unicode codepoint up to 3 digits /// long. This expects the parser to be positioned at the first octal /// digit and advances the parser to the first character immediately @@ -1743,7 +1840,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { /// is successful, then the parser is advanced to the position immediately /// following the closing `]`. #[inline(never)] - fn parse_set_class(&self) -> Result { + fn parse_set_class(&self) -> Result { assert_eq!(self.char(), '['); let mut union = @@ -1967,9 +2064,9 @@ impl<'s, P: Borrow> ParserI<'s, P> { // because parsing cannot fail with any interesting error. For example, // in order to use an ASCII character class, it must be enclosed in // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think - // of it as "ASCII character characters have the syntax `[:NAME:]` - // which can only appear within character brackets." This means that - // things like `[[:lower:]A]` are legal constructs. + // of it as "ASCII character classes have the syntax `[:NAME:]` which + // can only appear within character brackets." This means that things + // like `[[:lower:]A]` are legal constructs. // // However, if one types an incorrect ASCII character class, e.g., // `[[:loower:]]`, then we treat that as a normal nested character @@ -2189,12 +2286,12 @@ impl<'p, 's, P: Borrow> ast::Visitor for NestLimiter<'p, 's, P> { | Ast::Literal(_) | Ast::Dot(_) | Ast::Assertion(_) - | Ast::Class(ast::Class::Unicode(_)) - | Ast::Class(ast::Class::Perl(_)) => { + | Ast::ClassUnicode(_) + | Ast::ClassPerl(_) => { // These are all base cases, so we don't increment depth. return Ok(()); } - Ast::Class(ast::Class::Bracketed(ref x)) => &x.span, + Ast::ClassBracketed(ref x) => &x.span, Ast::Repetition(ref x) => &x.span, Ast::Group(ref x) => &x.span, Ast::Alternation(ref x) => &x.span, @@ -2210,12 +2307,12 @@ impl<'p, 's, P: Borrow> ast::Visitor for NestLimiter<'p, 's, P> { | Ast::Literal(_) | Ast::Dot(_) | Ast::Assertion(_) - | Ast::Class(ast::Class::Unicode(_)) - | Ast::Class(ast::Class::Perl(_)) => { + | Ast::ClassUnicode(_) + | Ast::ClassPerl(_) => { // These are all base cases, so we don't decrement depth. Ok(()) } - Ast::Class(ast::Class::Bracketed(_)) + Ast::ClassBracketed(_) | Ast::Repetition(_) | Ast::Group(_) | Ast::Alternation(_) @@ -2426,12 +2523,12 @@ mod tests { /// Create a meta literal starting at the given position. fn meta_lit(c: char, span: Span) -> Ast { - Ast::Literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c }) + Ast::literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c }) } /// Create a verbatim literal with the given span. fn lit_with(c: char, span: Span) -> Ast { - Ast::Literal(ast::Literal { + Ast::literal(ast::Literal { span, kind: ast::LiteralKind::Verbatim, c, @@ -2445,17 +2542,17 @@ mod tests { /// Create a concatenation with the given span. fn concat_with(span: Span, asts: Vec) -> Ast { - Ast::Concat(ast::Concat { span, asts }) + Ast::concat(ast::Concat { span, asts }) } /// Create an alternation with the given span. fn alt(range: Range, asts: Vec) -> Ast { - Ast::Alternation(ast::Alternation { span: span(range), asts }) + Ast::alternation(ast::Alternation { span: span(range), asts }) } /// Create a capturing group with the given span. fn group(range: Range, index: u32, ast: Ast) -> Ast { - Ast::Group(ast::Group { + Ast::group(ast::Group { span: span(range), kind: ast::GroupKind::CaptureIndex(index), ast: Box::new(ast), @@ -2488,7 +2585,7 @@ mod tests { }, ); } - Ast::Flags(ast::SetFlags { + Ast::flags(ast::SetFlags { span: span_range(pat, range.clone()), flags: ast::Flags { span: span_range(pat, (range.start + 2)..(range.end - 1)), @@ -2502,7 +2599,7 @@ mod tests { // A nest limit of 0 still allows some types of regexes. assert_eq!( parser_nest_limit("", 0).parse(), - Ok(Ast::Empty(span(0..0))) + Ok(Ast::empty(span(0..0))) ); assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0))); @@ -2516,7 +2613,7 @@ mod tests { ); assert_eq!( parser_nest_limit("a+", 1).parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..2), op: ast::RepetitionOp { span: span(1..2), @@ -2542,14 +2639,14 @@ mod tests { ); assert_eq!( parser_nest_limit("a+*", 2).parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..3), op: ast::RepetitionOp { span: span(2..3), kind: ast::RepetitionKind::ZeroOrMore, }, greedy: true, - ast: Box::new(Ast::Repetition(ast::Repetition { + ast: Box::new(Ast::repetition(ast::Repetition { span: span(0..2), op: ast::RepetitionOp { span: span(1..2), @@ -2606,7 +2703,7 @@ mod tests { ); assert_eq!( parser_nest_limit("[a]", 1).parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..3), negated: false, kind: ast::ClassSet::Item(ast::ClassSetItem::Literal( @@ -2616,7 +2713,7 @@ mod tests { c: 'a', } )), - }))) + })) ); assert_eq!( parser_nest_limit("[ab]", 1).parse().unwrap_err(), @@ -2776,7 +2873,7 @@ bar vec![ lit_with('a', span_range(pat, 0..1)), lit_with(' ', span_range(pat, 1..2)), - Ast::Group(ast::Group { + Ast::group(ast::Group { span: span_range(pat, 2..9), kind: ast::GroupKind::NonCapturing(ast::Flags { span: span_range(pat, 4..5), @@ -2803,7 +2900,7 @@ bar span_range(pat, 0..pat.len()), vec![ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - Ast::Group(ast::Group { + Ast::group(ast::Group { span: span_range(pat, 4..pat.len()), kind: ast::GroupKind::CaptureName { starts_with_p: true, @@ -2825,7 +2922,7 @@ bar span_range(pat, 0..pat.len()), vec![ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - Ast::Group(ast::Group { + Ast::group(ast::Group { span: span_range(pat, 4..pat.len()), kind: ast::GroupKind::CaptureIndex(1), ast: Box::new(lit_with('a', span_range(pat, 7..8))), @@ -2840,7 +2937,7 @@ bar span_range(pat, 0..pat.len()), vec![ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - Ast::Group(ast::Group { + Ast::group(ast::Group { span: span_range(pat, 4..pat.len()), kind: ast::GroupKind::NonCapturing(ast::Flags { span: span_range(pat, 8..8), @@ -2858,7 +2955,7 @@ bar span_range(pat, 0..pat.len()), vec![ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - Ast::Literal(ast::Literal { + Ast::literal(ast::Literal { span: span(4..13), kind: ast::LiteralKind::HexBrace( ast::HexLiteralKind::X @@ -2877,7 +2974,7 @@ bar span_range(pat, 0..pat.len()), vec![ flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false), - Ast::Literal(ast::Literal { + Ast::literal(ast::Literal { span: span_range(pat, 4..6), kind: ast::LiteralKind::Superfluous, c: ' ', @@ -2895,9 +2992,9 @@ bar Ok(concat_with( span_range(pat, 0..3), vec![ - Ast::Dot(span_range(pat, 0..1)), + Ast::dot(span_range(pat, 0..1)), lit_with('\n', span_range(pat, 1..2)), - Ast::Dot(span_range(pat, 2..3)), + Ast::dot(span_range(pat, 2..3)), ] )) ); @@ -2933,7 +3030,7 @@ bar fn parse_uncounted_repetition() { assert_eq!( parser(r"a*").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..2), op: ast::RepetitionOp { span: span(1..2), @@ -2945,7 +3042,7 @@ bar ); assert_eq!( parser(r"a+").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..2), op: ast::RepetitionOp { span: span(1..2), @@ -2958,7 +3055,7 @@ bar assert_eq!( parser(r"a?").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..2), op: ast::RepetitionOp { span: span(1..2), @@ -2970,7 +3067,7 @@ bar ); assert_eq!( parser(r"a??").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..3), op: ast::RepetitionOp { span: span(1..3), @@ -2982,7 +3079,7 @@ bar ); assert_eq!( parser(r"a?").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..2), op: ast::RepetitionOp { span: span(1..2), @@ -2997,7 +3094,7 @@ bar Ok(concat( 0..3, vec![ - Ast::Repetition(ast::Repetition { + Ast::repetition(ast::Repetition { span: span(0..2), op: ast::RepetitionOp { span: span(1..2), @@ -3015,7 +3112,7 @@ bar Ok(concat( 0..4, vec![ - Ast::Repetition(ast::Repetition { + Ast::repetition(ast::Repetition { span: span(0..3), op: ast::RepetitionOp { span: span(1..3), @@ -3034,7 +3131,7 @@ bar 0..3, vec![ lit('a', 0), - Ast::Repetition(ast::Repetition { + Ast::repetition(ast::Repetition { span: span(1..3), op: ast::RepetitionOp { span: span(2..3), @@ -3048,7 +3145,7 @@ bar ); assert_eq!( parser(r"(ab)?").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..5), op: ast::RepetitionOp { span: span(4..5), @@ -3067,8 +3164,8 @@ bar Ok(alt( 0..3, vec![ - Ast::Empty(span(0..0)), - Ast::Repetition(ast::Repetition { + Ast::empty(span(0..0)), + Ast::repetition(ast::Repetition { span: span(1..3), op: ast::RepetitionOp { span: span(2..3), @@ -3157,7 +3254,7 @@ bar fn parse_counted_repetition() { assert_eq!( parser(r"a{5}").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..4), op: ast::RepetitionOp { span: span(1..4), @@ -3171,7 +3268,7 @@ bar ); assert_eq!( parser(r"a{5,}").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..5), op: ast::RepetitionOp { span: span(1..5), @@ -3185,7 +3282,7 @@ bar ); assert_eq!( parser(r"a{5,9}").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..6), op: ast::RepetitionOp { span: span(1..6), @@ -3199,7 +3296,7 @@ bar ); assert_eq!( parser(r"a{5}?").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..5), op: ast::RepetitionOp { span: span(1..5), @@ -3217,7 +3314,7 @@ bar 0..5, vec![ lit('a', 0), - Ast::Repetition(ast::Repetition { + Ast::repetition(ast::Repetition { span: span(1..5), op: ast::RepetitionOp { span: span(2..5), @@ -3237,7 +3334,7 @@ bar 0..6, vec![ lit('a', 0), - Ast::Repetition(ast::Repetition { + Ast::repetition(ast::Repetition { span: span(1..5), op: ast::RepetitionOp { span: span(2..5), @@ -3255,7 +3352,7 @@ bar assert_eq!( parser(r"a{ 5 }").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..6), op: ast::RepetitionOp { span: span(1..6), @@ -3269,7 +3366,7 @@ bar ); assert_eq!( parser(r"a{ 5 , 9 }").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..10), op: ast::RepetitionOp { span: span(1..10), @@ -3283,7 +3380,7 @@ bar ); assert_eq!( parser_ignore_whitespace(r"a{5,9} ?").parse(), - Ok(Ast::Repetition(ast::Repetition { + Ok(Ast::repetition(ast::Repetition { span: span(0..8), op: ast::RepetitionOp { span: span(1..8), @@ -3295,6 +3392,23 @@ bar ast: Box::new(lit('a', 0)), })) ); + assert_eq!( + parser(r"\b{5,9}").parse(), + Ok(Ast::repetition(ast::Repetition { + span: span(0..7), + op: ast::RepetitionOp { + span: span(2..7), + kind: ast::RepetitionKind::Range( + ast::RepetitionRange::Bounded(5, 9) + ), + }, + greedy: true, + ast: Box::new(Ast::assertion(ast::Assertion { + span: span(0..2), + kind: ast::AssertionKind::WordBoundary, + })), + })) + ); assert_eq!( parser(r"(?i){0}").parse().unwrap_err(), @@ -3414,7 +3528,7 @@ bar fn parse_alternate() { assert_eq!( parser(r"a|b").parse(), - Ok(Ast::Alternation(ast::Alternation { + Ok(Ast::alternation(ast::Alternation { span: span(0..3), asts: vec![lit('a', 0), lit('b', 2)], })) @@ -3424,7 +3538,7 @@ bar Ok(group( 0..5, 1, - Ast::Alternation(ast::Alternation { + Ast::alternation(ast::Alternation { span: span(1..4), asts: vec![lit('a', 1), lit('b', 3)], }) @@ -3433,14 +3547,14 @@ bar assert_eq!( parser(r"a|b|c").parse(), - Ok(Ast::Alternation(ast::Alternation { + Ok(Ast::alternation(ast::Alternation { span: span(0..5), asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)], })) ); assert_eq!( parser(r"ax|by|cz").parse(), - Ok(Ast::Alternation(ast::Alternation { + Ok(Ast::alternation(ast::Alternation { span: span(0..8), asts: vec![ concat(0..2, vec![lit('a', 0), lit('x', 1)]), @@ -3454,7 +3568,7 @@ bar Ok(group( 0..10, 1, - Ast::Alternation(ast::Alternation { + Ast::alternation(ast::Alternation { span: span(1..9), asts: vec![ concat(1..3, vec![lit('a', 1), lit('x', 2)]), @@ -3503,7 +3617,7 @@ bar parser(r"|").parse(), Ok(alt( 0..1, - vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),] + vec![Ast::empty(span(0..0)), Ast::empty(span(1..1)),] )) ); assert_eq!( @@ -3511,19 +3625,19 @@ bar Ok(alt( 0..2, vec![ - Ast::Empty(span(0..0)), - Ast::Empty(span(1..1)), - Ast::Empty(span(2..2)), + Ast::empty(span(0..0)), + Ast::empty(span(1..1)), + Ast::empty(span(2..2)), ] )) ); assert_eq!( parser(r"a|").parse(), - Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),])) + Ok(alt(0..2, vec![lit('a', 0), Ast::empty(span(2..2)),])) ); assert_eq!( parser(r"|a").parse(), - Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),])) + Ok(alt(0..2, vec![Ast::empty(span(0..0)), lit('a', 1),])) ); assert_eq!( @@ -3533,7 +3647,7 @@ bar 1, alt( 1..2, - vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),] + vec![Ast::empty(span(1..1)), Ast::empty(span(2..2)),] ) )) ); @@ -3542,7 +3656,7 @@ bar Ok(group( 0..4, 1, - alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),]) + alt(1..3, vec![lit('a', 1), Ast::empty(span(3..3)),]) )) ); assert_eq!( @@ -3550,7 +3664,7 @@ bar Ok(group( 0..4, 1, - alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),]) + alt(1..3, vec![Ast::empty(span(1..1)), lit('a', 2),]) )) ); @@ -3606,7 +3720,7 @@ bar fn parse_group() { assert_eq!( parser("(?i)").parse(), - Ok(Ast::Flags(ast::SetFlags { + Ok(Ast::flags(ast::SetFlags { span: span(0..4), flags: ast::Flags { span: span(2..3), @@ -3621,7 +3735,7 @@ bar ); assert_eq!( parser("(?iU)").parse(), - Ok(Ast::Flags(ast::SetFlags { + Ok(Ast::flags(ast::SetFlags { span: span(0..5), flags: ast::Flags { span: span(2..4), @@ -3644,7 +3758,7 @@ bar ); assert_eq!( parser("(?i-U)").parse(), - Ok(Ast::Flags(ast::SetFlags { + Ok(Ast::flags(ast::SetFlags { span: span(0..6), flags: ast::Flags { span: span(2..5), @@ -3672,15 +3786,15 @@ bar assert_eq!( parser("()").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..2), kind: ast::GroupKind::CaptureIndex(1), - ast: Box::new(Ast::Empty(span(1..1))), + ast: Box::new(Ast::empty(span(1..1))), })) ); assert_eq!( parser("(a)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..3), kind: ast::GroupKind::CaptureIndex(1), ast: Box::new(lit('a', 1)), @@ -3688,20 +3802,20 @@ bar ); assert_eq!( parser("(())").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..4), kind: ast::GroupKind::CaptureIndex(1), - ast: Box::new(Ast::Group(ast::Group { + ast: Box::new(Ast::group(ast::Group { span: span(1..3), kind: ast::GroupKind::CaptureIndex(2), - ast: Box::new(Ast::Empty(span(2..2))), + ast: Box::new(Ast::empty(span(2..2))), })), })) ); assert_eq!( parser("(?:a)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..5), kind: ast::GroupKind::NonCapturing(ast::Flags { span: span(2..2), @@ -3713,7 +3827,7 @@ bar assert_eq!( parser("(?i:a)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..6), kind: ast::GroupKind::NonCapturing(ast::Flags { span: span(2..3), @@ -3729,7 +3843,7 @@ bar ); assert_eq!( parser("(?i-U:a)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..8), kind: ast::GroupKind::NonCapturing(ast::Flags { span: span(2..5), @@ -3818,7 +3932,7 @@ bar fn parse_capture_name() { assert_eq!( parser("(?z)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..7), kind: ast::GroupKind::CaptureName { starts_with_p: false, @@ -3833,7 +3947,7 @@ bar ); assert_eq!( parser("(?Pz)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..8), kind: ast::GroupKind::CaptureName { starts_with_p: true, @@ -3848,7 +3962,7 @@ bar ); assert_eq!( parser("(?Pz)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..10), kind: ast::GroupKind::CaptureName { starts_with_p: true, @@ -3864,7 +3978,7 @@ bar assert_eq!( parser("(?Pz)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..10), kind: ast::GroupKind::CaptureName { starts_with_p: true, @@ -3880,7 +3994,7 @@ bar assert_eq!( parser("(?Pz)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..10), kind: ast::GroupKind::CaptureName { starts_with_p: true, @@ -3896,7 +4010,7 @@ bar assert_eq!( parser("(?Pz)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: span(0..11), kind: ast::GroupKind::CaptureName { starts_with_p: true, @@ -3912,7 +4026,7 @@ bar assert_eq!( parser("(?P)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: Span::new( Position::new(0, 1, 1), Position::new(9, 1, 9), @@ -3928,7 +4042,7 @@ bar index: 1, } }, - ast: Box::new(Ast::Empty(Span::new( + ast: Box::new(Ast::empty(Span::new( Position::new(8, 1, 8), Position::new(8, 1, 8), ))), @@ -3936,7 +4050,7 @@ bar ); assert_eq!( parser("(?P<名字>)").parse(), - Ok(Ast::Group(ast::Group { + Ok(Ast::group(ast::Group { span: Span::new( Position::new(0, 1, 1), Position::new(12, 1, 9), @@ -3952,7 +4066,7 @@ bar index: 1, } }, - ast: Box::new(Ast::Empty(Span::new( + ast: Box::new(Ast::empty(Span::new( Position::new(11, 1, 8), Position::new(11, 1, 8), ))), @@ -4381,6 +4495,48 @@ bar kind: ast::AssertionKind::WordBoundary, })) ); + assert_eq!( + parser(r"\b{start}").parse_primitive(), + Ok(Primitive::Assertion(ast::Assertion { + span: span(0..9), + kind: ast::AssertionKind::WordBoundaryStart, + })) + ); + assert_eq!( + parser(r"\b{end}").parse_primitive(), + Ok(Primitive::Assertion(ast::Assertion { + span: span(0..7), + kind: ast::AssertionKind::WordBoundaryEnd, + })) + ); + assert_eq!( + parser(r"\b{start-half}").parse_primitive(), + Ok(Primitive::Assertion(ast::Assertion { + span: span(0..14), + kind: ast::AssertionKind::WordBoundaryStartHalf, + })) + ); + assert_eq!( + parser(r"\b{end-half}").parse_primitive(), + Ok(Primitive::Assertion(ast::Assertion { + span: span(0..12), + kind: ast::AssertionKind::WordBoundaryEndHalf, + })) + ); + assert_eq!( + parser(r"\<").parse_primitive(), + Ok(Primitive::Assertion(ast::Assertion { + span: span(0..2), + kind: ast::AssertionKind::WordBoundaryStartAngle, + })) + ); + assert_eq!( + parser(r"\>").parse_primitive(), + Ok(Primitive::Assertion(ast::Assertion { + span: span(0..2), + kind: ast::AssertionKind::WordBoundaryEndAngle, + })) + ); assert_eq!( parser(r"\B").parse_primitive(), Ok(Primitive::Assertion(ast::Assertion { @@ -4418,20 +4574,60 @@ bar kind: ast::ErrorKind::EscapeUnrecognized, } ); - // But also, < and > are banned, so that we may evolve them into - // start/end word boundary assertions. (Not sure if we will...) + + // Starting a special word boundary without any non-whitespace chars + // after the brace makes it ambiguous whether the user meant to write + // a counted repetition (probably not?) or an actual special word + // boundary assertion. assert_eq!( - parser(r"\<").parse_escape().unwrap_err(), + parser(r"\b{").parse_escape().unwrap_err(), TestError { - span: span(0..2), - kind: ast::ErrorKind::EscapeUnrecognized, + span: span(0..3), + kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof, } ); assert_eq!( - parser(r"\>").parse_escape().unwrap_err(), + parser_ignore_whitespace(r"\b{ ").parse_escape().unwrap_err(), TestError { - span: span(0..2), - kind: ast::ErrorKind::EscapeUnrecognized, + span: span(0..4), + kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof, + } + ); + // When 'x' is not enabled, the space is seen as a non-[-A-Za-z] char, + // and thus causes the parser to treat it as a counted repetition. + assert_eq!( + parser(r"\b{ ").parse().unwrap_err(), + TestError { + span: span(4..4), + kind: ast::ErrorKind::RepetitionCountDecimalEmpty, + } + ); + // In this case, we got some valid chars that makes it look like the + // user is writing one of the special word boundary assertions, but + // we forget to close the brace. + assert_eq!( + parser(r"\b{foo").parse_escape().unwrap_err(), + TestError { + span: span(2..6), + kind: ast::ErrorKind::SpecialWordBoundaryUnclosed, + } + ); + // We get the same error as above, except it is provoked by seeing a + // char that we know is invalid before seeing a closing brace. + assert_eq!( + parser(r"\b{foo!}").parse_escape().unwrap_err(), + TestError { + span: span(2..6), + kind: ast::ErrorKind::SpecialWordBoundaryUnclosed, + } + ); + // And this one occurs when, syntactically, everything looks okay, but + // we don't use a valid spelling of a word boundary assertion. + assert_eq!( + parser(r"\b{foo}").parse_escape().unwrap_err(), + TestError { + span: span(3..6), + kind: ast::ErrorKind::SpecialWordBoundaryUnrecognized, } ); @@ -4494,15 +4690,15 @@ bar ); assert_eq!( parser_octal(r"\778").parse(), - Ok(Ast::Concat(ast::Concat { + Ok(Ast::concat(ast::Concat { span: span(0..4), asts: vec![ - Ast::Literal(ast::Literal { + Ast::literal(ast::Literal { span: span(0..3), kind: ast::LiteralKind::Octal, c: '?', }), - Ast::Literal(ast::Literal { + Ast::literal(ast::Literal { span: span(3..4), kind: ast::LiteralKind::Verbatim, c: '8', @@ -4512,15 +4708,15 @@ bar ); assert_eq!( parser_octal(r"\7777").parse(), - Ok(Ast::Concat(ast::Concat { + Ok(Ast::concat(ast::Concat { span: span(0..5), asts: vec![ - Ast::Literal(ast::Literal { + Ast::literal(ast::Literal { span: span(0..4), kind: ast::LiteralKind::Octal, c: '\u{01FF}', }), - Ast::Literal(ast::Literal { + Ast::literal(ast::Literal { span: span(4..5), kind: ast::LiteralKind::Verbatim, c: '7', @@ -4965,15 +5161,15 @@ bar assert_eq!( parser("[[:alnum:]]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..11), negated: false, kind: itemset(item_ascii(alnum(span(1..10), false))), - }))) + })) ); assert_eq!( parser("[[[:alnum:]]]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..13), negated: false, kind: itemset(item_bracket(ast::ClassBracketed { @@ -4981,11 +5177,11 @@ bar negated: false, kind: itemset(item_ascii(alnum(span(2..11), false))), })), - }))) + })) ); assert_eq!( parser("[[:alnum:]&&[:lower:]]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..22), negated: false, kind: intersection( @@ -4993,11 +5189,11 @@ bar itemset(item_ascii(alnum(span(1..10), false))), itemset(item_ascii(lower(span(12..21), false))), ), - }))) + })) ); assert_eq!( parser("[[:alnum:]--[:lower:]]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..22), negated: false, kind: difference( @@ -5005,11 +5201,11 @@ bar itemset(item_ascii(alnum(span(1..10), false))), itemset(item_ascii(lower(span(12..21), false))), ), - }))) + })) ); assert_eq!( parser("[[:alnum:]~~[:lower:]]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..22), negated: false, kind: symdifference( @@ -5017,20 +5213,20 @@ bar itemset(item_ascii(alnum(span(1..10), false))), itemset(item_ascii(lower(span(12..21), false))), ), - }))) + })) ); assert_eq!( parser("[a]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..3), negated: false, kind: itemset(lit(span(1..2), 'a')), - }))) + })) ); assert_eq!( parser(r"[a\]]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..5), negated: false, kind: union( @@ -5044,11 +5240,11 @@ bar }), ] ), - }))) + })) ); assert_eq!( parser(r"[a\-z]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..6), negated: false, kind: union( @@ -5063,44 +5259,44 @@ bar lit(span(4..5), 'z'), ] ), - }))) + })) ); assert_eq!( parser("[ab]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..4), negated: false, kind: union( span(1..3), vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),] ), - }))) + })) ); assert_eq!( parser("[a-]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..4), negated: false, kind: union( span(1..3), vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),] ), - }))) + })) ); assert_eq!( parser("[-a]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..4), negated: false, kind: union( span(1..3), vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),] ), - }))) + })) ); assert_eq!( parser(r"[\pL]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..5), negated: false, kind: itemset(item_unicode(ast::ClassUnicode { @@ -5108,11 +5304,11 @@ bar negated: false, kind: ast::ClassUnicodeKind::OneLetter('L'), })), - }))) + })) ); assert_eq!( parser(r"[\w]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..4), negated: false, kind: itemset(item_perl(ast::ClassPerl { @@ -5120,11 +5316,11 @@ bar kind: ast::ClassPerlKind::Word, negated: false, })), - }))) + })) ); assert_eq!( parser(r"[a\wz]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..6), negated: false, kind: union( @@ -5139,20 +5335,20 @@ bar lit(span(4..5), 'z'), ] ), - }))) + })) ); assert_eq!( parser("[a-z]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..5), negated: false, kind: itemset(range(span(1..4), 'a', 'z')), - }))) + })) ); assert_eq!( parser("[a-cx-z]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..8), negated: false, kind: union( @@ -5162,11 +5358,11 @@ bar range(span(4..7), 'x', 'z'), ] ), - }))) + })) ); assert_eq!( parser(r"[\w&&a-cx-z]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..12), negated: false, kind: intersection( @@ -5184,11 +5380,11 @@ bar ] ), ), - }))) + })) ); assert_eq!( parser(r"[a-cx-z&&\w]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..12), negated: false, kind: intersection( @@ -5206,11 +5402,11 @@ bar negated: false, })), ), - }))) + })) ); assert_eq!( parser(r"[a--b--c]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..9), negated: false, kind: difference( @@ -5222,11 +5418,11 @@ bar ), itemset(lit(span(7..8), 'c')), ), - }))) + })) ); assert_eq!( parser(r"[a~~b~~c]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..9), negated: false, kind: symdifference( @@ -5238,11 +5434,11 @@ bar ), itemset(lit(span(7..8), 'c')), ), - }))) + })) ); assert_eq!( parser(r"[\^&&^]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..7), negated: false, kind: intersection( @@ -5254,11 +5450,11 @@ bar })), itemset(lit(span(5..6), '^')), ), - }))) + })) ); assert_eq!( parser(r"[\&&&&]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..7), negated: false, kind: intersection( @@ -5270,11 +5466,11 @@ bar })), itemset(lit(span(5..6), '&')), ), - }))) + })) ); assert_eq!( parser(r"[&&&&]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..6), negated: false, kind: intersection( @@ -5286,13 +5482,13 @@ bar ), itemset(empty(span(5..5))), ), - }))) + })) ); let pat = "[☃-⛄]"; assert_eq!( parser(pat).parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span_range(pat, 0..9), negated: false, kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange { @@ -5308,20 +5504,20 @@ bar c: '⛄', }, })), - }))) + })) ); assert_eq!( parser(r"[]]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..3), negated: false, kind: itemset(lit(span(1..2), ']')), - }))) + })) ); assert_eq!( parser(r"[]\[]").parse(), - Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ok(Ast::class_bracketed(ast::ClassBracketed { span: span(0..5), negated: false, kind: union( @@ -5335,14 +5531,14 @@ bar }), ] ), - }))) + })) ); assert_eq!( parser(r"[\[]]").parse(), Ok(concat( 0..5, vec![ - Ast::Class(ast::Class::Bracketed(ast::ClassBracketed { + Ast::class_bracketed(ast::ClassBracketed { span: span(0..4), negated: false, kind: itemset(ast::ClassSetItem::Literal( @@ -5352,8 +5548,8 @@ bar c: '[', } )), - })), - Ast::Literal(ast::Literal { + }), + Ast::literal(ast::Literal { span: span(4..5), kind: ast::LiteralKind::Verbatim, c: ']', @@ -5914,15 +6110,15 @@ bar assert_eq!( parser(r"\pNz").parse(), - Ok(Ast::Concat(ast::Concat { + Ok(Ast::concat(ast::Concat { span: span(0..4), asts: vec![ - Ast::Class(ast::Class::Unicode(ast::ClassUnicode { + Ast::class_unicode(ast::ClassUnicode { span: span(0..3), negated: false, kind: ast::ClassUnicodeKind::OneLetter('N'), - })), - Ast::Literal(ast::Literal { + }), + Ast::literal(ast::Literal { span: span(3..4), kind: ast::LiteralKind::Verbatim, c: 'z', @@ -5932,15 +6128,15 @@ bar ); assert_eq!( parser(r"\p{Greek}z").parse(), - Ok(Ast::Concat(ast::Concat { + Ok(Ast::concat(ast::Concat { span: span(0..10), asts: vec![ - Ast::Class(ast::Class::Unicode(ast::ClassUnicode { + Ast::class_unicode(ast::ClassUnicode { span: span(0..9), negated: false, kind: ast::ClassUnicodeKind::Named(s("Greek")), - })), - Ast::Literal(ast::Literal { + }), + Ast::literal(ast::Literal { span: span(9..10), kind: ast::LiteralKind::Verbatim, c: 'z', @@ -6017,23 +6213,23 @@ bar assert_eq!( parser(r"\d").parse(), - Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl { + Ok(Ast::class_perl(ast::ClassPerl { span: span(0..2), kind: ast::ClassPerlKind::Digit, negated: false, - }))) + })) ); assert_eq!( parser(r"\dz").parse(), - Ok(Ast::Concat(ast::Concat { + Ok(Ast::concat(ast::Concat { span: span(0..3), asts: vec![ - Ast::Class(ast::Class::Perl(ast::ClassPerl { + Ast::class_perl(ast::ClassPerl { span: span(0..2), kind: ast::ClassPerlKind::Digit, negated: false, - })), - Ast::Literal(ast::Literal { + }), + Ast::literal(ast::Literal { span: span(2..3), kind: ast::LiteralKind::Verbatim, c: 'z', diff --git a/vendor/regex-syntax/src/ast/print.rs b/vendor/regex-syntax/src/ast/print.rs index 86a87e1..1ceb3c7 100644 --- a/vendor/regex-syntax/src/ast/print.rs +++ b/vendor/regex-syntax/src/ast/print.rs @@ -80,27 +80,21 @@ impl Visitor for Writer { fn visit_pre(&mut self, ast: &Ast) -> fmt::Result { match *ast { Ast::Group(ref x) => self.fmt_group_pre(x), - Ast::Class(ast::Class::Bracketed(ref x)) => { - self.fmt_class_bracketed_pre(x) - } + Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_pre(x), _ => Ok(()), } } fn visit_post(&mut self, ast: &Ast) -> fmt::Result { - use crate::ast::Class; - match *ast { Ast::Empty(_) => Ok(()), Ast::Flags(ref x) => self.fmt_set_flags(x), Ast::Literal(ref x) => self.fmt_literal(x), Ast::Dot(_) => self.wtr.write_str("."), Ast::Assertion(ref x) => self.fmt_assertion(x), - Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x), - Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x), - Ast::Class(Class::Bracketed(ref x)) => { - self.fmt_class_bracketed_post(x) - } + Ast::ClassPerl(ref x) => self.fmt_class_perl(x), + Ast::ClassUnicode(ref x) => self.fmt_class_unicode(x), + Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_post(x), Ast::Repetition(ref x) => self.fmt_repetition(x), Ast::Group(ref x) => self.fmt_group_post(x), Ast::Alternation(_) => Ok(()), @@ -267,6 +261,12 @@ impl Writer { EndText => self.wtr.write_str(r"\z"), WordBoundary => self.wtr.write_str(r"\b"), NotWordBoundary => self.wtr.write_str(r"\B"), + WordBoundaryStart => self.wtr.write_str(r"\b{start}"), + WordBoundaryEnd => self.wtr.write_str(r"\b{end}"), + WordBoundaryStartAngle => self.wtr.write_str(r"\<"), + WordBoundaryEndAngle => self.wtr.write_str(r"\>"), + WordBoundaryStartHalf => self.wtr.write_str(r"\b{start-half}"), + WordBoundaryEndHalf => self.wtr.write_str(r"\b{end-half}"), } } diff --git a/vendor/regex-syntax/src/ast/visitor.rs b/vendor/regex-syntax/src/ast/visitor.rs index ab13673..c1bb24d 100644 --- a/vendor/regex-syntax/src/ast/visitor.rs +++ b/vendor/regex-syntax/src/ast/visitor.rs @@ -48,6 +48,11 @@ pub trait Visitor { Ok(()) } + /// This method is called between child nodes of a concatenation. + fn visit_concat_in(&mut self) -> Result<(), Self::Err> { + Ok(()) + } + /// This method is called on every [`ClassSetItem`](ast::ClassSetItem) /// before descending into child nodes. fn visit_class_set_item_pre( @@ -228,8 +233,14 @@ impl<'a> HeapVisitor<'a> { // If this is a concat/alternate, then we might have additional // inductive steps to process. if let Some(x) = self.pop(frame) { - if let Frame::Alternation { .. } = x { - visitor.visit_alternation_in()?; + match x { + Frame::Alternation { .. } => { + visitor.visit_alternation_in()?; + } + Frame::Concat { .. } => { + visitor.visit_concat_in()?; + } + _ => {} } ast = x.child(); self.stack.push((post_ast, x)); @@ -253,7 +264,7 @@ impl<'a> HeapVisitor<'a> { visitor: &mut V, ) -> Result>, V::Err> { Ok(match *ast { - Ast::Class(ast::Class::Bracketed(ref x)) => { + Ast::ClassBracketed(ref x) => { self.visit_class(x, visitor)?; None } diff --git a/vendor/regex-syntax/src/hir/literal.rs b/vendor/regex-syntax/src/hir/literal.rs index bd3a2d1..a5a3737 100644 --- a/vendor/regex-syntax/src/hir/literal.rs +++ b/vendor/regex-syntax/src/hir/literal.rs @@ -23,7 +23,7 @@ effective literal optimizations: to lead to substring search that is only a little faster than a regex search, and thus the overhead of using literal optimizations in the first place might make things slower overall. -* The literals in your [`Seq`] shoudn't be too short. In general, longer is +* The literals in your [`Seq`] shouldn't be too short. In general, longer is better. A sequence corresponding to single bytes that occur frequently in the haystack, for example, is probably a bad literal optimization because it's likely to produce many false positive candidates. Longer literals are less @@ -51,7 +51,7 @@ the "trickier" parts are how to combine literal sequences, and that is all implemented on [`Seq`]. */ -use core::{cmp, mem}; +use core::{cmp, mem, num::NonZeroUsize}; use alloc::{vec, vec::Vec}; @@ -477,7 +477,7 @@ impl Extractor { } seq } - hir::Repetition { min, max: Some(max), .. } if min < max => { + hir::Repetition { min, .. } => { assert!(min > 0); // handled above let limit = u32::try_from(self.limit_repeat).unwrap_or(u32::MAX); @@ -491,10 +491,6 @@ impl Extractor { seq.make_inexact(); seq } - hir::Repetition { .. } => { - subseq.make_inexact(); - subseq - } } } @@ -692,7 +688,7 @@ impl Default for ExtractKind { /// from making assumptions about what literals are required in order to match /// a particular [`Hir`] expression. Generally speaking, when a set is in this /// state, literal optimizations are inhibited. A good example of a regex that -/// will cause this sort of set to apppear is `[A-Za-z]`. The character class +/// will cause this sort of set to appear is `[A-Za-z]`. The character class /// is just too big (and also too narrow) to be usefully expanded into 52 /// different literals. (Note that the decision for when a seq should become /// infinite is determined by the caller. A seq itself has no hard-coded @@ -1571,7 +1567,7 @@ impl Seq { /// unioning `self` with `other`. If either set is infinite, then this /// returns `None`. #[inline] - fn max_union_len(&self, other: &Seq) -> Option { + pub fn max_union_len(&self, other: &Seq) -> Option { let len1 = self.len()?; let len2 = other.len()?; Some(len1.saturating_add(len2)) @@ -1581,7 +1577,7 @@ impl Seq { /// cross product of `self` with `other`. If either set is infinite, then /// this returns `None`. #[inline] - fn max_cross_len(&self, other: &Seq) -> Option { + pub fn max_cross_len(&self, other: &Seq) -> Option { let len1 = self.len()?; let len2 = other.len()?; Some(len1.saturating_mul(len2)) @@ -1841,6 +1837,14 @@ impl Seq { None => return, Some(len) => len, }; + // Just give up now if our sequence contains an empty string. + if self.min_literal_len().map_or(false, |len| len == 0) { + // We squash the sequence so that nobody else gets any bright + // ideas to try and use it. An empty string implies a match at + // every position. A prefilter cannot help you here. + self.make_infinite(); + return; + } // Make sure we start with the smallest sequence possible. We use a // special version of preference minimization that retains exactness. // This is legal because optimization is only expected to occur once @@ -1910,34 +1914,41 @@ impl Seq { // longest common prefix to be subject to the poison check. } } - // Everything below this check is more-or-less about trying to - // heuristically reduce the false positive rate of a prefilter. But - // if our sequence is completely exact, then it's possible the regex - // engine can be skipped entirely. In this case, the false positive - // rate is zero because every literal match corresponds to a regex - // match. + // If we have an exact sequence, we *probably* just want to keep it + // as-is. But there are some cases where we don't. So we save a copy of + // the exact sequence now, and then try to do some more optimizations + // below. If those don't work out, we go back to this exact sequence. // - // This is OK even if the sequence contains a poison literal. Remember, - // a literal is only poisononous because of what we assume about its - // impact on the false positive rate. However, we do still check for - // an empty string. Empty strings are weird and it's best to let the - // regex engine handle those. + // The specific motivation for this is that we sometimes wind up with + // an exact sequence with a hefty number of literals. Say, 100. If we + // stuck with that, it would be too big for Teddy and would result in + // using Aho-Corasick. Which is fine... but the lazy DFA is plenty + // suitable in such cases. The real issue is that we will wind up not + // using a fast prefilter at all. So in cases like this, even though + // we have an exact sequence, it would be better to try and shrink the + // sequence (which we do below) and use it as a prefilter that can + // produce false positive matches. // - // We do currently do this check after the longest common prefix (or - // suffix) check, under the theory that single-substring search is so - // fast that we want that even if we'd end up turning an exact sequence - // into an inexact one. But this might be wrong... - if self.is_exact() - && self.min_literal_len().map_or(false, |len| len > 0) - { - return; - } + // But if the shrinking below results in a sequence that "sucks," then + // we don't want to use that because we already have an exact sequence + // in hand. + let exact: Option = + if self.is_exact() { Some(self.clone()) } else { None }; // Now we attempt to shorten the sequence. The idea here is that we // don't want to look for too many literals, but we want to shorten // our sequence enough to improve our odds of using better algorithms // downstream (such as Teddy). + // + // The pair of numbers in this list corresponds to the maximal prefix + // (in bytes) to keep for all literals and the length of the sequence + // at which to do it. + // + // So for example, the pair (3, 500) would mean, "if we have more than + // 500 literals in our sequence, then truncate all of our literals + // such that they are at most 3 bytes in length and the minimize the + // sequence." const ATTEMPTS: [(usize, usize); 5] = - [(5, 64), (4, 64), (3, 64), (2, 64), (1, 10)]; + [(5, 10), (4, 10), (3, 64), (2, 64), (1, 10)]; for (keep, limit) in ATTEMPTS { let len = match self.len() { None => break, @@ -1951,7 +1962,11 @@ impl Seq { } else { self.keep_last_bytes(keep); } - self.minimize_by_preference(); + if prefix { + if let Some(ref mut lits) = self.literals { + PreferenceTrie::minimize(lits, true); + } + } } // Check for a poison literal. A poison literal is one that is short // and is believed to have a very high match count. These poisons @@ -1968,6 +1983,30 @@ impl Seq { self.make_infinite(); } } + // OK, if we had an exact sequence before attempting more optimizations + // above and our post-optimized sequence sucks for some reason or + // another, then we go back to the exact sequence. + if let Some(exact) = exact { + // If optimizing resulted in dropping our literals, then certainly + // backup and use the exact sequence that we had. + if !self.is_finite() { + *self = exact; + return; + } + // If our optimized sequence contains a short literal, then it's + // *probably* not so great. So throw it away and revert to the + // exact sequence. + if self.min_literal_len().map_or(true, |len| len <= 2) { + *self = exact; + return; + } + // Finally, if our optimized sequence is "big" (i.e., can't use + // Teddy), then also don't use it and rely on the exact sequence. + if self.len().map_or(true, |len| len > 64) { + *self = exact; + return; + } + } } } @@ -1977,7 +2016,7 @@ impl core::fmt::Debug for Seq { if let Some(lits) = self.literals() { f.debug_list().entries(lits.iter()).finish() } else { - write!(f, "[∅]") + write!(f, "[∞]") } } } @@ -2160,12 +2199,19 @@ impl core::fmt::Debug for Literal { /// never seen this show up on a profile. Because of the heuristic limits /// imposed on literal extractions, the size of the inputs here is usually /// very small.) -#[derive(Debug, Default)] +#[derive(Debug)] struct PreferenceTrie { /// The states in this trie. The index of a state in this vector is its ID. states: Vec, + /// This vec indicates which states are match states. It always has + /// the same length as `states` and is indexed by the same state ID. + /// A state with identifier `sid` is a match state if and only if + /// `matches[sid].is_some()`. The option contains the index of the literal + /// corresponding to the match. The index is offset by 1 so that it fits in + /// a NonZeroUsize. + matches: Vec>, /// The index to allocate to the next literal added to this trie. Starts at - /// 0 and increments by 1 for every literal successfully added to the trie. + /// 1 and increments by 1 for every literal successfully added to the trie. next_literal_index: usize, } @@ -2176,9 +2222,6 @@ struct State { /// are sorted by byte. There is at most one such transition for any /// particular byte. trans: Vec<(u8, usize)>, - /// Whether this is a matching state or not. If it is, then it contains the - /// index to the matching literal. - literal_index: Option, } impl PreferenceTrie { @@ -2192,20 +2235,19 @@ impl PreferenceTrie { /// after them and because any removed literals are guaranteed to never /// match. fn minimize(literals: &mut Vec, keep_exact: bool) { - use core::cell::RefCell; - - // MSRV(1.61): Use retain_mut here to avoid interior mutability. - let trie = RefCell::new(PreferenceTrie::default()); + let mut trie = PreferenceTrie { + states: vec![], + matches: vec![], + next_literal_index: 1, + }; let mut make_inexact = vec![]; - literals.retain(|lit| { - match trie.borrow_mut().insert(lit.as_bytes()) { - Ok(_) => true, - Err(i) => { - if !keep_exact { - make_inexact.push(i); - } - false + literals.retain_mut(|lit| match trie.insert(lit.as_bytes()) { + Ok(_) => true, + Err(i) => { + if !keep_exact { + make_inexact.push(i.checked_sub(1).unwrap()); } + false } }); for i in make_inexact { @@ -2225,15 +2267,15 @@ impl PreferenceTrie { /// search. fn insert(&mut self, bytes: &[u8]) -> Result { let mut prev = self.root(); - if let Some(idx) = self.states[prev].literal_index { - return Err(idx); + if let Some(idx) = self.matches[prev] { + return Err(idx.get()); } for &b in bytes.iter() { match self.states[prev].trans.binary_search_by_key(&b, |t| t.0) { Ok(i) => { prev = self.states[prev].trans[i].1; - if let Some(idx) = self.states[prev].literal_index { - return Err(idx); + if let Some(idx) = self.matches[prev] { + return Err(idx.get()); } } Err(i) => { @@ -2245,7 +2287,7 @@ impl PreferenceTrie { } let idx = self.next_literal_index; self.next_literal_index += 1; - self.states[prev].literal_index = Some(idx); + self.matches[prev] = NonZeroUsize::new(idx); Ok(idx) } @@ -2262,6 +2304,7 @@ impl PreferenceTrie { fn create_state(&mut self) -> usize { let id = self.states.len(); self.states.push(State::default()); + self.matches.push(None); id } } @@ -2603,6 +2646,12 @@ mod tests { ]), e(r"(ab|cd)(ef|gh)(ij|kl)") ); + + assert_eq!(inexact([E("abab")], [E("abab")]), e(r"(ab){2}")); + + assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,3}")); + + assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,}")); } #[test] @@ -2815,13 +2864,13 @@ mod tests { // repeats. #[test] fn crazy_repeats() { - assert_eq!(inexact([I("")], [I("")]), e(r"(?:){4294967295}")); + assert_eq!(inexact([E("")], [E("")]), e(r"(?:){4294967295}")); assert_eq!( - inexact([I("")], [I("")]), + inexact([E("")], [E("")]), e(r"(?:){64}{64}{64}{64}{64}{64}") ); - assert_eq!(inexact([I("")], [I("")]), e(r"x{0}{4294967295}")); - assert_eq!(inexact([I("")], [I("")]), e(r"(?:|){4294967295}")); + assert_eq!(inexact([E("")], [E("")]), e(r"x{0}{4294967295}")); + assert_eq!(inexact([E("")], [E("")]), e(r"(?:|){4294967295}")); assert_eq!( inexact([E("")], [E("")]), diff --git a/vendor/regex-syntax/src/hir/mod.rs b/vendor/regex-syntax/src/hir/mod.rs index e5ea370..ce38ead 100644 --- a/vendor/regex-syntax/src/hir/mod.rs +++ b/vendor/regex-syntax/src/hir/mod.rs @@ -88,6 +88,9 @@ pub enum ErrorKind { /// This error occurs when translating a pattern that could match a byte /// sequence that isn't UTF-8 and `utf8` was enabled. InvalidUtf8, + /// This error occurs when one uses a non-ASCII byte for a line terminator, + /// but where Unicode mode is enabled and UTF-8 mode is disabled. + InvalidLineTerminator, /// This occurs when an unrecognized Unicode property name could not /// be found. UnicodePropertyNotFound, @@ -120,6 +123,7 @@ impl core::fmt::Display for ErrorKind { let msg = match *self { UnicodeNotAllowed => "Unicode not allowed here", InvalidUtf8 => "pattern can match invalid UTF-8", + InvalidLineTerminator => "invalid line terminator, must be ASCII", UnicodePropertyNotFound => "Unicode property not found", UnicodePropertyValueNotFound => "Unicode property value not found", UnicodePerlClassNotFound => { @@ -180,7 +184,7 @@ impl core::fmt::Display for ErrorKind { /// matches. /// /// For empty matches, those can occur at any position. It is the -/// repsonsibility of the regex engine to determine whether empty matches are +/// responsibility of the regex engine to determine whether empty matches are /// permitted between the code units of a single codepoint. /// /// # Stack space @@ -355,7 +359,13 @@ impl Hir { /// Creates a repetition HIR expression. #[inline] - pub fn repetition(rep: Repetition) -> Hir { + pub fn repetition(mut rep: Repetition) -> Hir { + // If the sub-expression of a repetition can only match the empty + // string, then we force its maximum to be at most 1. + if rep.sub.properties().maximum_len() == Some(0) { + rep.min = cmp::min(rep.min, 1); + rep.max = rep.max.map(|n| cmp::min(n, 1)).or(Some(1)); + } // The regex 'a{0}' is always equivalent to the empty regex. This is // true even when 'a' is an expression that never matches anything // (like '\P{any}'). @@ -547,7 +557,7 @@ impl Hir { // We rebuild the alternation by simplifying it. We proceed similarly // as the concatenation case. But in this case, there's no literal // simplification happening. We're just flattening alternations. - let mut new = vec![]; + let mut new = Vec::with_capacity(subs.len()); for sub in subs { let (kind, props) = sub.into_parts(); match kind { @@ -642,6 +652,12 @@ impl Hir { cls.push(ClassBytesRange::new(b'\0', b'\xFF')); Hir::class(Class::Bytes(cls)) } + Dot::AnyCharExcept(ch) => { + let mut cls = + ClassUnicode::new([ClassUnicodeRange::new(ch, ch)]); + cls.negate(); + Hir::class(Class::Unicode(cls)) + } Dot::AnyCharExceptLF => { let mut cls = ClassUnicode::empty(); cls.push(ClassUnicodeRange::new('\0', '\x09')); @@ -655,6 +671,12 @@ impl Hir { cls.push(ClassUnicodeRange::new('\x0E', '\u{10FFFF}')); Hir::class(Class::Unicode(cls)) } + Dot::AnyByteExcept(byte) => { + let mut cls = + ClassBytes::new([ClassBytesRange::new(byte, byte)]); + cls.negate(); + Hir::class(Class::Bytes(cls)) + } Dot::AnyByteExceptLF => { let mut cls = ClassBytes::empty(); cls.push(ClassBytesRange::new(b'\0', b'\x09')); @@ -775,13 +797,18 @@ impl core::fmt::Debug for Literal { /// The high-level intermediate representation of a character class. /// /// A character class corresponds to a set of characters. A character is either -/// defined by a Unicode scalar value or a byte. Unicode characters are used -/// by default, while bytes are used when Unicode mode (via the `u` flag) is -/// disabled. +/// defined by a Unicode scalar value or a byte. /// /// A character class, regardless of its character type, is represented by a /// sequence of non-overlapping non-adjacent ranges of characters. /// +/// There are no guarantees about which class variant is used. Generally +/// speaking, the Unicode variat is used whenever a class needs to contain +/// non-ASCII Unicode scalar values. But the Unicode variant can be used even +/// when Unicode mode is disabled. For example, at the time of writing, the +/// regex `(?-u:a|\xc2\xa0)` will compile down to HIR for the Unicode class +/// `[a\u00A0]` due to optimizations. +/// /// Note that `Bytes` variant may be produced even when it exclusively matches /// valid UTF-8. This is because a `Bytes` variant represents an intention by /// the author of the regular expression to disable Unicode mode, which in turn @@ -1304,8 +1331,9 @@ impl ClassUnicodeRange { } } -/// A set of characters represented by arbitrary bytes (where one byte -/// corresponds to one character). +/// A set of characters represented by arbitrary bytes. +/// +/// Each byte corresponds to one character. #[derive(Clone, Debug, Eq, PartialEq)] pub struct ClassBytes { set: IntervalSet, @@ -1607,6 +1635,42 @@ pub enum Look { WordUnicode = 1 << 8, /// Match a Unicode-aware negation of a word boundary. WordUnicodeNegate = 1 << 9, + /// Match the start of an ASCII-only word boundary. That is, this matches a + /// position at either the beginning of the haystack or where the previous + /// character is not a word character and the following character is a word + /// character. + WordStartAscii = 1 << 10, + /// Match the end of an ASCII-only word boundary. That is, this matches + /// a position at either the end of the haystack or where the previous + /// character is a word character and the following character is not a word + /// character. + WordEndAscii = 1 << 11, + /// Match the start of a Unicode word boundary. That is, this matches a + /// position at either the beginning of the haystack or where the previous + /// character is not a word character and the following character is a word + /// character. + WordStartUnicode = 1 << 12, + /// Match the end of a Unicode word boundary. That is, this matches a + /// position at either the end of the haystack or where the previous + /// character is a word character and the following character is not a word + /// character. + WordEndUnicode = 1 << 13, + /// Match the start half of an ASCII-only word boundary. That is, this + /// matches a position at either the beginning of the haystack or where the + /// previous character is not a word character. + WordStartHalfAscii = 1 << 14, + /// Match the end half of an ASCII-only word boundary. That is, this + /// matches a position at either the end of the haystack or where the + /// following character is not a word character. + WordEndHalfAscii = 1 << 15, + /// Match the start half of a Unicode word boundary. That is, this matches + /// a position at either the beginning of the haystack or where the + /// previous character is not a word character. + WordStartHalfUnicode = 1 << 16, + /// Match the end half of a Unicode word boundary. That is, this matches + /// a position at either the end of the haystack or where the following + /// character is not a word character. + WordEndHalfUnicode = 1 << 17, } impl Look { @@ -1628,6 +1692,14 @@ impl Look { Look::WordAsciiNegate => Look::WordAsciiNegate, Look::WordUnicode => Look::WordUnicode, Look::WordUnicodeNegate => Look::WordUnicodeNegate, + Look::WordStartAscii => Look::WordEndAscii, + Look::WordEndAscii => Look::WordStartAscii, + Look::WordStartUnicode => Look::WordEndUnicode, + Look::WordEndUnicode => Look::WordStartUnicode, + Look::WordStartHalfAscii => Look::WordEndHalfAscii, + Look::WordEndHalfAscii => Look::WordStartHalfAscii, + Look::WordStartHalfUnicode => Look::WordEndHalfUnicode, + Look::WordEndHalfUnicode => Look::WordStartHalfUnicode, } } @@ -1636,28 +1708,36 @@ impl Look { /// constructor is guaranteed to return the same look-around variant that /// one started with within a semver compatible release of this crate. #[inline] - pub const fn as_repr(self) -> u16 { + pub const fn as_repr(self) -> u32 { // AFAIK, 'as' is the only way to zero-cost convert an int enum to an // actual int. - self as u16 + self as u32 } /// Given the underlying representation of a `Look` value, return the /// corresponding `Look` value if the representation is valid. Otherwise /// `None` is returned. #[inline] - pub const fn from_repr(repr: u16) -> Option { + pub const fn from_repr(repr: u32) -> Option { match repr { - 0b00_0000_0001 => Some(Look::Start), - 0b00_0000_0010 => Some(Look::End), - 0b00_0000_0100 => Some(Look::StartLF), - 0b00_0000_1000 => Some(Look::EndLF), - 0b00_0001_0000 => Some(Look::StartCRLF), - 0b00_0010_0000 => Some(Look::EndCRLF), - 0b00_0100_0000 => Some(Look::WordAscii), - 0b00_1000_0000 => Some(Look::WordAsciiNegate), - 0b01_0000_0000 => Some(Look::WordUnicode), - 0b10_0000_0000 => Some(Look::WordUnicodeNegate), + 0b00_0000_0000_0000_0001 => Some(Look::Start), + 0b00_0000_0000_0000_0010 => Some(Look::End), + 0b00_0000_0000_0000_0100 => Some(Look::StartLF), + 0b00_0000_0000_0000_1000 => Some(Look::EndLF), + 0b00_0000_0000_0001_0000 => Some(Look::StartCRLF), + 0b00_0000_0000_0010_0000 => Some(Look::EndCRLF), + 0b00_0000_0000_0100_0000 => Some(Look::WordAscii), + 0b00_0000_0000_1000_0000 => Some(Look::WordAsciiNegate), + 0b00_0000_0001_0000_0000 => Some(Look::WordUnicode), + 0b00_0000_0010_0000_0000 => Some(Look::WordUnicodeNegate), + 0b00_0000_0100_0000_0000 => Some(Look::WordStartAscii), + 0b00_0000_1000_0000_0000 => Some(Look::WordEndAscii), + 0b00_0001_0000_0000_0000 => Some(Look::WordStartUnicode), + 0b00_0010_0000_0000_0000 => Some(Look::WordEndUnicode), + 0b00_0100_0000_0000_0000 => Some(Look::WordStartHalfAscii), + 0b00_1000_0000_0000_0000 => Some(Look::WordEndHalfAscii), + 0b01_0000_0000_0000_0000 => Some(Look::WordStartHalfUnicode), + 0b10_0000_0000_0000_0000 => Some(Look::WordEndHalfUnicode), _ => None, } } @@ -1682,6 +1762,14 @@ impl Look { Look::WordAsciiNegate => 'B', Look::WordUnicode => '𝛃', Look::WordUnicodeNegate => '𝚩', + Look::WordStartAscii => '<', + Look::WordEndAscii => '>', + Look::WordStartUnicode => '〈', + Look::WordEndUnicode => '〉', + Look::WordStartHalfAscii => '◁', + Look::WordEndHalfAscii => '▷', + Look::WordStartHalfUnicode => '◀', + Look::WordEndHalfUnicode => '▶', } } } @@ -1766,6 +1854,18 @@ pub enum Dot { /// /// This is equivalent to `(?s-u:.)` and also `(?-u:[\x00-\xFF])`. AnyByte, + /// Matches the UTF-8 encoding of any Unicode scalar value except for the + /// `char` given. + /// + /// This is equivalent to using `(?u-s:.)` with the line terminator set + /// to a particular ASCII byte. (Because of peculiarities in the regex + /// engines, a line terminator must be a single byte. It follows that when + /// UTF-8 mode is enabled, this single byte must also be a Unicode scalar + /// value. That is, ti must be ASCII.) + /// + /// (This and `AnyCharExceptLF` both exist because of legacy reasons. + /// `AnyCharExceptLF` will be dropped in the next breaking change release.) + AnyCharExcept(char), /// Matches the UTF-8 encoding of any Unicode scalar value except for `\n`. /// /// This is equivalent to `(?u-s:.)` and also `[\p{any}--\n]`. @@ -1775,6 +1875,17 @@ pub enum Dot { /// /// This is equivalent to `(?uR-s:.)` and also `[\p{any}--\r\n]`. AnyCharExceptCRLF, + /// Matches any byte value except for the `u8` given. + /// + /// This is equivalent to using `(?-us:.)` with the line terminator set + /// to a particular ASCII byte. (Because of peculiarities in the regex + /// engines, a line terminator must be a single byte. It follows that when + /// UTF-8 mode is enabled, this single byte must also be a Unicode scalar + /// value. That is, ti must be ASCII.) + /// + /// (This and `AnyByteExceptLF` both exist because of legacy reasons. + /// `AnyByteExceptLF` will be dropped in the next breaking change release.) + AnyByteExcept(u8), /// Matches any byte value except for `\n`. /// /// This is equivalent to `(?-su:.)` and also `(?-u:[[\x00-\xFF]--\n])`. @@ -2410,10 +2521,10 @@ impl Properties { inner.look_set_prefix = p.look_set_prefix(); inner.look_set_suffix = p.look_set_suffix(); } - // If the static captures len of the sub-expression is not known or is - // zero, then it automatically propagates to the repetition, regardless - // of the repetition. Otherwise, it might change, but only when the - // repetition can match 0 times. + // If the static captures len of the sub-expression is not known or + // is greater than zero, then it automatically propagates to the + // repetition, regardless of the repetition. Otherwise, it might + // change, but only when the repetition can match 0 times. if rep.min == 0 && inner.static_explicit_captures_len.map_or(false, |len| len > 0) { @@ -2481,16 +2592,24 @@ impl Properties { props.literal = props.literal && p.is_literal(); props.alternation_literal = props.alternation_literal && p.is_alternation_literal(); - if let Some(ref mut minimum_len) = props.minimum_len { + if let Some(minimum_len) = props.minimum_len { match p.minimum_len() { None => props.minimum_len = None, - Some(len) => *minimum_len += len, + Some(len) => { + // We use saturating arithmetic here because the + // minimum is just a lower bound. We can't go any + // higher than what our number types permit. + props.minimum_len = + Some(minimum_len.saturating_add(len)); + } } } - if let Some(ref mut maximum_len) = props.maximum_len { + if let Some(maximum_len) = props.maximum_len { match p.maximum_len() { None => props.maximum_len = None, - Some(len) => *maximum_len += len, + Some(len) => { + props.maximum_len = maximum_len.checked_add(len) + } } } } @@ -2541,7 +2660,7 @@ pub struct LookSet { /// range of `u16` values to be represented. For example, even if the /// current implementation only makes use of the 10 least significant bits, /// it may use more bits in a future semver compatible release. - pub bits: u16, + pub bits: u32, } impl LookSet { @@ -2644,13 +2763,22 @@ impl LookSet { pub fn contains_word_unicode(self) -> bool { self.contains(Look::WordUnicode) || self.contains(Look::WordUnicodeNegate) + || self.contains(Look::WordStartUnicode) + || self.contains(Look::WordEndUnicode) + || self.contains(Look::WordStartHalfUnicode) + || self.contains(Look::WordEndHalfUnicode) } /// Returns true if and only if this set contains any ASCII word boundary /// or negated ASCII word boundary assertions. #[inline] pub fn contains_word_ascii(self) -> bool { - self.contains(Look::WordAscii) || self.contains(Look::WordAsciiNegate) + self.contains(Look::WordAscii) + || self.contains(Look::WordAsciiNegate) + || self.contains(Look::WordStartAscii) + || self.contains(Look::WordEndAscii) + || self.contains(Look::WordStartHalfAscii) + || self.contains(Look::WordEndHalfAscii) } /// Returns an iterator over all of the look-around assertions in this set. @@ -2729,29 +2857,31 @@ impl LookSet { *self = self.intersect(other); } - /// Return a `LookSet` from the slice given as a native endian 16-bit + /// Return a `LookSet` from the slice given as a native endian 32-bit /// integer. /// /// # Panics /// - /// This panics if `slice.len() < 2`. + /// This panics if `slice.len() < 4`. #[inline] pub fn read_repr(slice: &[u8]) -> LookSet { - let bits = u16::from_ne_bytes(slice[..2].try_into().unwrap()); + let bits = u32::from_ne_bytes(slice[..4].try_into().unwrap()); LookSet { bits } } - /// Write a `LookSet` as a native endian 16-bit integer to the beginning + /// Write a `LookSet` as a native endian 32-bit integer to the beginning /// of the slice given. /// /// # Panics /// - /// This panics if `slice.len() < 2`. + /// This panics if `slice.len() < 4`. #[inline] pub fn write_repr(self, slice: &mut [u8]) { let raw = self.bits.to_ne_bytes(); slice[0] = raw[0]; slice[1] = raw[1]; + slice[2] = raw[2]; + slice[3] = raw[3]; } } @@ -2784,9 +2914,9 @@ impl Iterator for LookSetIter { return None; } // We'll never have more than u8::MAX distinct look-around assertions, - // so 'repr' will always fit into a u16. - let repr = u16::try_from(self.set.bits.trailing_zeros()).unwrap(); - let look = Look::from_repr(1 << repr)?; + // so 'bit' will always fit into a u16. + let bit = u16::try_from(self.set.bits.trailing_zeros()).unwrap(); + let look = Look::from_repr(1 << bit)?; self.set = self.set.remove(look); Some(look) } @@ -3708,7 +3838,7 @@ mod tests { assert_eq!(0, set.iter().count()); let set = LookSet::full(); - assert_eq!(10, set.iter().count()); + assert_eq!(18, set.iter().count()); let set = LookSet::empty().insert(Look::StartLF).insert(Look::WordUnicode); @@ -3726,6 +3856,6 @@ mod tests { let res = format!("{:?}", LookSet::empty()); assert_eq!("∅", res); let res = format!("{:?}", LookSet::full()); - assert_eq!("Az^$rRbB𝛃𝚩", res); + assert_eq!("Az^$rRbB𝛃𝚩<>〈〉◁▷◀▶", res); } } diff --git a/vendor/regex-syntax/src/hir/print.rs b/vendor/regex-syntax/src/hir/print.rs index fcb7cd2..dfa6d40 100644 --- a/vendor/regex-syntax/src/hir/print.rs +++ b/vendor/regex-syntax/src/hir/print.rs @@ -89,9 +89,16 @@ impl Visitor for Writer { fn visit_pre(&mut self, hir: &Hir) -> fmt::Result { match *hir.kind() { - // Empty is represented by nothing in the concrete syntax, and - // repetition operators are strictly suffix oriented. - HirKind::Empty | HirKind::Repetition(_) => {} + HirKind::Empty => { + // Technically an empty sub-expression could be "printed" by + // just ignoring it, but in practice, you could have a + // repetition operator attached to an empty expression, and you + // really need something in the concrete syntax to make that + // work as you'd expect. + self.wtr.write_str(r"(?:)")?; + } + // Repetition operators are strictly suffix oriented. + HirKind::Repetition(_) => {} HirKind::Literal(hir::Literal(ref bytes)) => { // See the comment on the 'Concat' and 'Alternation' case below // for why we put parens here. Literals are, conceptually, @@ -195,6 +202,30 @@ impl Visitor for Writer { hir::Look::WordUnicodeNegate => { self.wtr.write_str(r"\B")?; } + hir::Look::WordStartAscii => { + self.wtr.write_str(r"(?-u:\b{start})")?; + } + hir::Look::WordEndAscii => { + self.wtr.write_str(r"(?-u:\b{end})")?; + } + hir::Look::WordStartUnicode => { + self.wtr.write_str(r"\b{start}")?; + } + hir::Look::WordEndUnicode => { + self.wtr.write_str(r"\b{end}")?; + } + hir::Look::WordStartHalfAscii => { + self.wtr.write_str(r"(?-u:\b{start-half})")?; + } + hir::Look::WordEndHalfAscii => { + self.wtr.write_str(r"(?-u:\b{end-half})")?; + } + hir::Look::WordStartHalfUnicode => { + self.wtr.write_str(r"\b{start-half}")?; + } + hir::Look::WordEndHalfUnicode => { + self.wtr.write_str(r"\b{end-half}")?; + } }, HirKind::Capture(hir::Capture { ref name, .. }) => { self.wtr.write_str("(")?; @@ -424,20 +455,20 @@ mod tests { // Test that various zero-length repetitions always translate to an // empty regex. This is more a property of HIR's smart constructors // than the printer though. - roundtrip("a{0}", ""); - roundtrip("(?:ab){0}", ""); + roundtrip("a{0}", "(?:)"); + roundtrip("(?:ab){0}", "(?:)"); #[cfg(feature = "unicode-gencat")] { - roundtrip(r"\p{any}{0}", ""); - roundtrip(r"\P{any}{0}", ""); + roundtrip(r"\p{any}{0}", "(?:)"); + roundtrip(r"\P{any}{0}", "(?:)"); } } #[test] fn print_group() { - roundtrip("()", "()"); - roundtrip("(?P)", "(?P)"); - roundtrip("(?:)", ""); + roundtrip("()", "((?:))"); + roundtrip("(?P)", "(?P(?:))"); + roundtrip("(?:)", "(?:)"); roundtrip("(a)", "(a)"); roundtrip("(?Pa)", "(?Pa)"); @@ -448,8 +479,8 @@ mod tests { #[test] fn print_alternation() { - roundtrip("|", "(?:|)"); - roundtrip("||", "(?:||)"); + roundtrip("|", "(?:(?:)|(?:))"); + roundtrip("||", "(?:(?:)|(?:)|(?:))"); roundtrip("a|b", "[ab]"); roundtrip("ab|cd", "(?:(?:ab)|(?:cd))"); @@ -503,7 +534,7 @@ mod tests { }), Hir::look(hir::Look::End), ]); - assert_eq!(r"(?:\A(?:\A\z)+\z)", expr.to_string()); + assert_eq!(r"(?:\A\A\z\z)", expr.to_string()); } // Just like regression_repetition_concat, but with the repetition using @@ -540,7 +571,7 @@ mod tests { }), Hir::look(hir::Look::End), ]); - assert_eq!(r"(?:\A(?:\A|\z)+\z)", expr.to_string()); + assert_eq!(r"(?:\A(?:\A|\z)\z)", expr.to_string()); } // This regression test is very similar in flavor to diff --git a/vendor/regex-syntax/src/hir/translate.rs b/vendor/regex-syntax/src/hir/translate.rs index ff9c5ee..313a1e9 100644 --- a/vendor/regex-syntax/src/hir/translate.rs +++ b/vendor/regex-syntax/src/hir/translate.rs @@ -19,6 +19,7 @@ type Result = core::result::Result; #[derive(Clone, Debug)] pub struct TranslatorBuilder { utf8: bool, + line_terminator: u8, flags: Flags, } @@ -31,7 +32,11 @@ impl Default for TranslatorBuilder { impl TranslatorBuilder { /// Create a new translator builder with a default c onfiguration. pub fn new() -> TranslatorBuilder { - TranslatorBuilder { utf8: true, flags: Flags::default() } + TranslatorBuilder { + utf8: true, + line_terminator: b'\n', + flags: Flags::default(), + } } /// Build a translator using the current configuration. @@ -40,6 +45,7 @@ impl TranslatorBuilder { stack: RefCell::new(vec![]), flags: Cell::new(self.flags), utf8: self.utf8, + line_terminator: self.line_terminator, } } @@ -63,6 +69,31 @@ impl TranslatorBuilder { self } + /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`. + /// + /// Namely, instead of `.` (by default) matching everything except for `\n`, + /// this will cause `.` to match everything except for the byte given. + /// + /// If `.` is used in a context where Unicode mode is enabled and this byte + /// isn't ASCII, then an error will be returned. When Unicode mode is + /// disabled, then any byte is permitted, but will return an error if UTF-8 + /// mode is enabled and it is a non-ASCII byte. + /// + /// In short, any ASCII value for a line terminator is always okay. But a + /// non-ASCII byte might result in an error depending on whether Unicode + /// mode or UTF-8 mode are enabled. + /// + /// Note that if `R` mode is enabled then it always takes precedence and + /// the line terminator will be treated as `\r` and `\n` simultaneously. + /// + /// Note also that this *doesn't* impact the look-around assertions + /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional + /// configuration in the regex engine itself. + pub fn line_terminator(&mut self, byte: u8) -> &mut TranslatorBuilder { + self.line_terminator = byte; + self + } + /// Enable or disable the case insensitive flag (`i`) by default. pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder { self.flags.case_insensitive = if yes { Some(true) } else { None }; @@ -120,6 +151,8 @@ pub struct Translator { flags: Cell, /// Whether we're allowed to produce HIR that can match arbitrary bytes. utf8: bool, + /// The line terminator to use for `.`. + line_terminator: u8, } impl Translator { @@ -304,7 +337,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { fn visit_pre(&mut self, ast: &Ast) -> Result<()> { match *ast { - Ast::Class(ast::Class::Bracketed(_)) => { + Ast::ClassBracketed(_) => { if self.flags().unicode() { let cls = hir::ClassUnicode::empty(); self.push(HirFrame::ClassUnicode(cls)); @@ -321,14 +354,14 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { .unwrap_or_else(|| self.flags()); self.push(HirFrame::Group { old_flags }); } - Ast::Concat(ref x) if x.asts.is_empty() => {} Ast::Concat(_) => { self.push(HirFrame::Concat); } - Ast::Alternation(ref x) if x.asts.is_empty() => {} - Ast::Alternation(_) => { + Ast::Alternation(ref x) => { self.push(HirFrame::Alternation); - self.push(HirFrame::AlternationBranch); + if !x.asts.is_empty() { + self.push(HirFrame::AlternationBranch); + } } _ => {} } @@ -353,29 +386,20 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { // consistency sake. self.push(HirFrame::Expr(Hir::empty())); } - Ast::Literal(ref x) => { - match self.ast_literal_to_scalar(x)? { - Either::Right(byte) => self.push_byte(byte), - Either::Left(ch) => { - if !self.flags().unicode() && ch.len_utf8() > 1 { - return Err(self - .error(x.span, ErrorKind::UnicodeNotAllowed)); - } - match self.case_fold_char(x.span, ch)? { - None => self.push_char(ch), - Some(expr) => self.push(HirFrame::Expr(expr)), - } - } - } - // self.push(HirFrame::Expr(self.hir_literal(x)?)); - } - Ast::Dot(span) => { - self.push(HirFrame::Expr(self.hir_dot(span)?)); + Ast::Literal(ref x) => match self.ast_literal_to_scalar(x)? { + Either::Right(byte) => self.push_byte(byte), + Either::Left(ch) => match self.case_fold_char(x.span, ch)? { + None => self.push_char(ch), + Some(expr) => self.push(HirFrame::Expr(expr)), + }, + }, + Ast::Dot(ref span) => { + self.push(HirFrame::Expr(self.hir_dot(**span)?)); } Ast::Assertion(ref x) => { self.push(HirFrame::Expr(self.hir_assertion(x)?)); } - Ast::Class(ast::Class::Perl(ref x)) => { + Ast::ClassPerl(ref x) => { if self.flags().unicode() { let cls = self.hir_perl_unicode_class(x)?; let hcls = hir::Class::Unicode(cls); @@ -386,11 +410,11 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { self.push(HirFrame::Expr(Hir::class(hcls))); } } - Ast::Class(ast::Class::Unicode(ref x)) => { + Ast::ClassUnicode(ref x) => { let cls = hir::Class::Unicode(self.hir_unicode_class(x)?); self.push(HirFrame::Expr(Hir::class(cls))); } - Ast::Class(ast::Class::Bracketed(ref ast)) => { + Ast::ClassBracketed(ref ast) => { if self.flags().unicode() { let mut cls = self.pop().unwrap().unwrap_class_unicode(); self.unicode_fold_and_negate( @@ -841,8 +865,8 @@ impl<'t, 'p> TranslatorI<'t, 'p> { })?; Ok(Some(Hir::class(hir::Class::Unicode(cls)))) } else { - if c.len_utf8() > 1 { - return Err(self.error(span, ErrorKind::UnicodeNotAllowed)); + if !c.is_ascii() { + return Ok(None); } // If case folding won't do anything, then don't bother trying. match c { @@ -862,10 +886,38 @@ impl<'t, 'p> TranslatorI<'t, 'p> { } fn hir_dot(&self, span: Span) -> Result { - if !self.flags().unicode() && self.trans().utf8 { + let (utf8, lineterm, flags) = + (self.trans().utf8, self.trans().line_terminator, self.flags()); + if utf8 && (!flags.unicode() || !lineterm.is_ascii()) { return Err(self.error(span, ErrorKind::InvalidUtf8)); } - Ok(Hir::dot(self.flags().dot())) + let dot = if flags.dot_matches_new_line() { + if flags.unicode() { + hir::Dot::AnyChar + } else { + hir::Dot::AnyByte + } + } else { + if flags.unicode() { + if flags.crlf() { + hir::Dot::AnyCharExceptCRLF + } else { + if !lineterm.is_ascii() { + return Err( + self.error(span, ErrorKind::InvalidLineTerminator) + ); + } + hir::Dot::AnyCharExcept(char::from(lineterm)) + } + } else { + if flags.crlf() { + hir::Dot::AnyByteExceptCRLF + } else { + hir::Dot::AnyByteExcept(lineterm) + } + } + }; + Ok(Hir::dot(dot)) } fn hir_assertion(&self, asst: &ast::Assertion) -> Result { @@ -903,6 +955,34 @@ impl<'t, 'p> TranslatorI<'t, 'p> { } else { hir::Look::WordAsciiNegate }), + ast::AssertionKind::WordBoundaryStart + | ast::AssertionKind::WordBoundaryStartAngle => { + Hir::look(if unicode { + hir::Look::WordStartUnicode + } else { + hir::Look::WordStartAscii + }) + } + ast::AssertionKind::WordBoundaryEnd + | ast::AssertionKind::WordBoundaryEndAngle => { + Hir::look(if unicode { + hir::Look::WordEndUnicode + } else { + hir::Look::WordEndAscii + }) + } + ast::AssertionKind::WordBoundaryStartHalf => { + Hir::look(if unicode { + hir::Look::WordStartHalfUnicode + } else { + hir::Look::WordStartHalfAscii + }) + } + ast::AssertionKind::WordBoundaryEndHalf => Hir::look(if unicode { + hir::Look::WordEndHalfUnicode + } else { + hir::Look::WordEndHalfAscii + }), }) } @@ -1124,9 +1204,8 @@ impl<'t, 'p> TranslatorI<'t, 'p> { match self.ast_literal_to_scalar(ast)? { Either::Right(byte) => Ok(byte), Either::Left(ch) => { - let cp = u32::from(ch); - if cp <= 0x7F { - Ok(u8::try_from(cp).unwrap()) + if ch.is_ascii() { + Ok(u8::try_from(ch).unwrap()) } else { // We can't feasibly support Unicode in // byte oriented classes. Byte classes don't @@ -1209,30 +1288,6 @@ impl Flags { } } - fn dot(&self) -> hir::Dot { - if self.dot_matches_new_line() { - if self.unicode() { - hir::Dot::AnyChar - } else { - hir::Dot::AnyByte - } - } else { - if self.unicode() { - if self.crlf() { - hir::Dot::AnyCharExceptCRLF - } else { - hir::Dot::AnyCharExceptLF - } - } else { - if self.crlf() { - hir::Dot::AnyByteExceptCRLF - } else { - hir::Dot::AnyByteExceptLF - } - } - } - } - fn case_insensitive(&self) -> bool { self.case_insensitive.unwrap_or(false) } @@ -1598,16 +1653,7 @@ mod tests { assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a")); assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF")); - assert_eq!( - t_err("(?-u)☃"), - TestError { - kind: hir::ErrorKind::UnicodeNotAllowed, - span: Span::new( - Position::new(5, 1, 6), - Position::new(8, 1, 7) - ), - } - ); + assert_eq!(t("(?-u)☃"), hir_lit("☃")); assert_eq!( t_err(r"(?-u)\xFF"), TestError { @@ -1685,16 +1731,7 @@ mod tests { ); assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF")); - assert_eq!( - t_err("(?i-u)β"), - TestError { - kind: hir::ErrorKind::UnicodeNotAllowed, - span: Span::new( - Position::new(6, 1, 7), - Position::new(8, 1, 8), - ), - } - ); + assert_eq!(t("(?i-u)β"), hir_lit("β"),); } #[test] @@ -3489,6 +3526,15 @@ mod tests { assert!(!props(r"(?:z|xx)@|xx").is_alternation_literal()); } + // This tests that the smart Hir::repetition constructors does some basic + // simplifications. + #[test] + fn smart_repetition() { + assert_eq!(t(r"a{0}"), Hir::empty()); + assert_eq!(t(r"a{1}"), hir_lit("a")); + assert_eq!(t(r"\B{32111}"), hir_look(hir::Look::WordUnicodeNegate)); + } + // This tests that the smart Hir::concat constructor simplifies the given // exprs in a way we expect. #[test] @@ -3580,4 +3626,99 @@ mod tests { ]), ); } + + #[test] + fn regression_alt_empty_concat() { + use crate::ast::{self, Ast}; + + let span = Span::splat(Position::new(0, 0, 0)); + let ast = Ast::alternation(ast::Alternation { + span, + asts: vec![Ast::concat(ast::Concat { span, asts: vec![] })], + }); + + let mut t = Translator::new(); + assert_eq!(Ok(Hir::empty()), t.translate("", &ast)); + } + + #[test] + fn regression_empty_alt() { + use crate::ast::{self, Ast}; + + let span = Span::splat(Position::new(0, 0, 0)); + let ast = Ast::concat(ast::Concat { + span, + asts: vec![Ast::alternation(ast::Alternation { + span, + asts: vec![], + })], + }); + + let mut t = Translator::new(); + assert_eq!(Ok(Hir::fail()), t.translate("", &ast)); + } + + #[test] + fn regression_singleton_alt() { + use crate::{ + ast::{self, Ast}, + hir::Dot, + }; + + let span = Span::splat(Position::new(0, 0, 0)); + let ast = Ast::concat(ast::Concat { + span, + asts: vec![Ast::alternation(ast::Alternation { + span, + asts: vec![Ast::dot(span)], + })], + }); + + let mut t = Translator::new(); + assert_eq!(Ok(Hir::dot(Dot::AnyCharExceptLF)), t.translate("", &ast)); + } + + // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63168 + #[test] + fn regression_fuzz_match() { + let pat = "[(\u{6} \0-\u{afdf5}] \0 "; + let ast = ParserBuilder::new() + .octal(false) + .ignore_whitespace(true) + .build() + .parse(pat) + .unwrap(); + let hir = TranslatorBuilder::new() + .utf8(true) + .case_insensitive(false) + .multi_line(false) + .dot_matches_new_line(false) + .swap_greed(true) + .unicode(true) + .build() + .translate(pat, &ast) + .unwrap(); + assert_eq!( + hir, + Hir::concat(vec![ + hir_uclass(&[('\0', '\u{afdf5}')]), + hir_lit("\0"), + ]) + ); + } + + // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63155 + #[cfg(feature = "unicode")] + #[test] + fn regression_fuzz_difference1() { + let pat = r"\W\W|\W[^\v--\W\W\P{Script_Extensions:Pau_Cin_Hau}\u10A1A1-\U{3E3E3}--~~~~--~~~~~~~~------~~~~~~--~~~~~~]*"; + let _ = t(pat); // shouldn't panic + } + + // See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63153 + #[test] + fn regression_fuzz_char_decrement1() { + let pat = "w[w[^w?\rw\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\r\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0*\0\0\u{1}\0]\0\0-*\0][^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w?\rw[^w\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0\0\0\0\0\0\0\0x\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\0\0*??\0\u{7f}{2}\u{10}??\0\0\0\0\0\0\0\0\0\u{3}\0\0\0}\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\u{1}\0]\0\0-*\0]\0\0\0\0\0\0\0\u{1}\0]\0\u{1}\u{1}H-i]-]\0\0\0\0\u{1}\0]\0\0\0\u{1}\0]\0\0-*\0\0\0\0\u{1}9-\u{7f}]\0'|-\u{7f}]\0'|(?i-ux)[-\u{7f}]\0'\u{3}\0\0\0}\0-*\0] Result<(), Self::Err> { Ok(()) } + + /// This method is called between child nodes of a concatenation. + fn visit_concat_in(&mut self) -> Result<(), Self::Err> { + Ok(()) + } } /// Executes an implementation of `Visitor` in constant stack space. @@ -131,8 +136,14 @@ impl<'a> HeapVisitor<'a> { // If this is a concat/alternate, then we might have additional // inductive steps to process. if let Some(x) = self.pop(frame) { - if let Frame::Alternation { .. } = x { - visitor.visit_alternation_in()?; + match x { + Frame::Alternation { .. } => { + visitor.visit_alternation_in()?; + } + Frame::Concat { .. } => { + visitor.visit_concat_in()?; + } + _ => {} } hir = x.child(); self.stack.push((post_hir, x)); diff --git a/vendor/regex-syntax/src/lib.rs b/vendor/regex-syntax/src/lib.rs index 4953641..20f25db 100644 --- a/vendor/regex-syntax/src/lib.rs +++ b/vendor/regex-syntax/src/lib.rs @@ -138,7 +138,7 @@ The following features are available: [Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches). * **unicode-gencat** - Provide the data for - [Uncode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values). + [Unicode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values). This includes, but is not limited to, `Decimal_Number`, `Letter`, `Math_Symbol`, `Number` and `Punctuation`. * **unicode-perl** - @@ -157,6 +157,11 @@ The following features are available: [Unicode text segmentation algorithms](https://www.unicode.org/reports/tr29/). This enables using classes like `\p{gcb=Extend}`, `\p{wb=Katakana}` and `\p{sb=ATerm}`. +* **arbitrary** - + Enabling this feature introduces a public dependency on the + [`arbitrary`](https://crates.io/crates/arbitrary) + crate. Namely, it implements the `Arbitrary` trait from that crate for the + [`Ast`](crate::ast::Ast) type. This feature is disabled by default. */ #![no_std] @@ -317,6 +322,9 @@ pub fn is_escapeable_character(c: char) -> bool { // escapeable, \< and \> will result in a parse error. Thus, we can // turn them into something else in the future without it being a // backwards incompatible change. + // + // OK, now we support \< and \>, and we need to retain them as *not* + // escapeable here since the escape sequence is significant. '<' | '>' => false, _ => true, } @@ -364,7 +372,7 @@ pub fn try_is_word_character( /// Returns true if and only if the given character is an ASCII word character. /// /// An ASCII word character is defined by the following character class: -/// `[_0-9a-zA-Z]'. +/// `[_0-9a-zA-Z]`. pub fn is_word_byte(c: u8) -> bool { match c { b'_' | b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' => true, diff --git a/vendor/regex-syntax/src/parser.rs b/vendor/regex-syntax/src/parser.rs index 2e7a2bb..f482b84 100644 --- a/vendor/regex-syntax/src/parser.rs +++ b/vendor/regex-syntax/src/parser.rs @@ -165,6 +165,31 @@ impl ParserBuilder { self } + /// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`. + /// + /// Namely, instead of `.` (by default) matching everything except for `\n`, + /// this will cause `.` to match everything except for the byte given. + /// + /// If `.` is used in a context where Unicode mode is enabled and this byte + /// isn't ASCII, then an error will be returned. When Unicode mode is + /// disabled, then any byte is permitted, but will return an error if UTF-8 + /// mode is enabled and it is a non-ASCII byte. + /// + /// In short, any ASCII value for a line terminator is always okay. But a + /// non-ASCII byte might result in an error depending on whether Unicode + /// mode or UTF-8 mode are enabled. + /// + /// Note that if `R` mode is enabled then it always takes precedence and + /// the line terminator will be treated as `\r` and `\n` simultaneously. + /// + /// Note also that this *doesn't* impact the look-around assertions + /// `(?m:^)` and `(?m:$)`. That's usually controlled by additional + /// configuration in the regex engine itself. + pub fn line_terminator(&mut self, byte: u8) -> &mut ParserBuilder { + self.hir.line_terminator(byte); + self + } + /// Enable or disable the "swap greed" flag by default. /// /// By default this is disabled. It may alternatively be selectively diff --git a/vendor/regex-syntax/test b/vendor/regex-syntax/test index a4d6cfa..8626c3b 100755 --- a/vendor/regex-syntax/test +++ b/vendor/regex-syntax/test @@ -2,6 +2,10 @@ set -e +# cd to the directory containing this crate's Cargo.toml so that we don't need +# to pass --manifest-path to every `cargo` command. +cd "$(dirname "$0")" + # This is a convenience script for running a broad swath of the syntax tests. echo "===== DEFAULT FEATURES ===" cargo test diff --git a/vendor/regex/.cargo-checksum.json b/vendor/regex/.cargo-checksum.json index 559d852..c8bd4de 100644 --- a/vendor/regex/.cargo-checksum.json +++ b/vendor/regex/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"CHANGELOG.md":"c65d6169c94b114559dc113d7e4e8c54a5070f6f76e92e714cbeb7aba279e229","Cargo.lock":"7ef8dcdf3d4c41feec286e4686c0ab465c0ce908eb61d20ec441489ea59c767c","Cargo.toml":"769829b75abcff0149863481a903656e7f71e72e303ce7ca99661c31b2ddd282","HACKING.md":"17818f7a17723608f6bdbe6388ad0a913d4f96f76a16649aaf4e274b1fa0ea97","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","PERFORMANCE.md":"0d5ef3866386918dfdefb1aa9a28cfe33cb3c8ceeb79f3f8ba5b88253dd95991","README.md":"cdcafba78fda99c94f9ea3290ad521fbdbb12043ca6925b10cef801eb4e5e223","UNICODE.md":"a8a8399540eed000d19420135a527f400247a04572e44d124c786b870f518776","examples/regexdna-input.txt":"156a49710bb3e1ed4bc2bbb0af0f383b747b3d0281453cfff39c296124c598f8","examples/regexdna-output.txt":"35e85b19b70a893d752fd43e54e1e9da08bac43559191cea85b33387c24c4cc1","examples/shootout-regex-dna-bytes.rs":"fa2daedb4e0a05f64f33f4af62fbb0176db998e3676f8637ab684b725367a7b4","examples/shootout-regex-dna-cheat.rs":"1f871a6eaaf8372299fa3c762051112fa89a14235b03f734fc50ebd51ecaee72","examples/shootout-regex-dna-replace.rs":"32ffdf13ac6c4ce3fc32116a048e9cc682aa34cdb8e5beaf565a22addbdcd9ab","examples/shootout-regex-dna-single-cheat.rs":"809f75bf1e1917a53623eb6f1a3ce3b7d2ed98a6a1dbc0bd4853bec49a0c6f94","examples/shootout-regex-dna-single.rs":"1ab14f5703cd4be2e75a2e792e0ba1d322b9e4b14535d396805a4316d577f5bb","examples/shootout-regex-dna.rs":"20ea46ab63f91e3ac6a64e997eadd436a9cbc2f1bdade28e4512052f0e25bc34","record/README.md":"02e6f85f8a43f18540e4a52a75d1001494df7aceac3873e9a13e3ceba190206d","record/compile-test/2023-04-19_1.7.3.csv":"460059ba2f10456175ff92bd75d4a365b14a1843e2b46e7b285d58da59e6d3ca","record/compile-test/2023-04-20_master.csv":"6b94df278e4ed82a3fd0d4bfe92a4614714e00435e983c7649ee9f54925f906e","record/compile-test/README.md":"ba2b606993edd8d705ad1677ec954862614e52b028407e1908bb5dfb07767f2d","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/backtrack.rs":"52987d80448f3d7f5d4e3545ddfc09f1f30de7602d9b5489961db4b215a377fd","src/compile.rs":"b982356f394218366d8684b133a064abea6b6475bf1633aea65dcb6ff7df8b5e","src/dfa.rs":"405f24adbf775b0902fd830cc5a5446de80da1a2a5eb950fca357aff5b19163f","src/error.rs":"476a86da4bb115cb85e9327aee6f423c1dade524517178186c747a3baa9be71d","src/exec.rs":"530a24654ef96e57400706dc33b4f623956a4eb16f52cd006992ce1b9ede10c3","src/expand.rs":"59e459a9bbd0ae60478a6cbe48203091245e39bbd064e04b50d848d75f6de920","src/find_byte.rs":"b387247b77e3269f057c3399aefe5a815032c3af918c876f80eb4b282e4eb95e","src/freqs.rs":"255555f3d95b08a5bb3bc2f38d5a06cc100a39c0f0127fe4f50c33afa1cadc65","src/input.rs":"13f49c1bce2fadd04a45b421d374cd0f8b72bef83f7e8fda958962aaccbe799a","src/lib.rs":"7cb5ea7fbb41b71d6a9d0692442b8bdfccd10199dd1340e534202f988cfad493","src/literal/imp.rs":"26239f37d7c79a88f154ffb864be282598486d9df9363c918ac3106537119b3d","src/literal/mod.rs":"59fd8bc37784906d729167b69bd14f91094c4c82749984ee5ffd41ae62c38af2","src/pattern.rs":"993d8b6b4bcea5e02bee3c76e17c356a5a47f8fc53c5555edfd1ebb71c0878bf","src/pikevm.rs":"6c0eaa7e878c945ac4c3c545c98f5706ad04846fc432a5086c8ee78eb030dfa7","src/pool.rs":"942e991ae31ef349bd76efd78b2a712c01166dec965bf93742977ed0870d5a10","src/prog.rs":"8ab44101bb2aaf51f00872798f3d926ef150744898538b4ceb5f3d38fbf861f0","src/re_builder.rs":"943344bf6e2fc90902ee04b11b741c32418ac6814b21b7982cc0a3a817713f3e","src/re_bytes.rs":"15a53cccd7d573f668ac38158b140c0e0e51a31ac16de800f24e72c8d364561e","src/re_set.rs":"8b9b9b78fc1dbd8731298269f410c67689baedf4116fb617afd309fd4bfe116c","src/re_trait.rs":"df29beedc00933e34e7f89b4db645cba18db7f7e4cf3f1d48328bddada5191d5","src/re_unicode.rs":"940be2629a8176065f821b419693135fdfdb016b573e8e00a10d963712bf1fa8","src/sparse.rs":"0da3ddb7972109869248a764dbb10254555f4bb51c375e89fb3fab9cafa47320","src/testdata/LICENSE":"58cf078acc03da3e280a938c2bd9943f554fc9b6ced89ad93ba35ca436872899","src/testdata/README":"45f869e37f798905c773bfbe0ef19a5fb7e585cbf0b7c21b5b5a784e8cec3c14","src/testdata/basic.dat":"b5b33aa89d48a61cd67cb1fbfd8f70e62c83e30b86256f9f915a5190dd38ff06","src/testdata/nullsubexpr.dat":"496ac0278eec3b6d9170faace14554569032dd3d909618364d9326156de39ecf","src/testdata/repetition.dat":"1f7959063015b284b18a4a2c1c8b416d438a2d6c4b1a362da43406b865f50e69","src/utf8.rs":"f85a356ff5d5b19e417b73ce1dd84581b21d283f6dddd195547c30af9c60bd1a","test":"0d62fdca7da12fc19ea5306b5de1d83e68d9365a029c043d524334da138b0304","tests/api.rs":"7b2a0ef75e99b9776094967bd66e9cdeaa8e11359f5f0a12bd08ef0e8d0c11fc","tests/api_str.rs":"2ae38c04e7e8fac008b609a820d0b1561ba75f39b0edc0987d6d3d06132da77f","tests/bytes.rs":"edc50f526c5fee43df89d639ef18b237e4eb91e9d533bfc43f3cbab7417d38ba","tests/consistent.rs":"d69435154c09478076497216e43081a835ac65147181a4fbddad7bff469605b2","tests/crates_regex.rs":"91a59d470e0700b4bcb3ff735d06799f3107b8ef4875a2e9904607b164be0326","tests/crazy.rs":"c0d56380dff19bdd5d7a3eb731d0e2dc564e169a1b73c81e1879b1e87f5f5f77","tests/flags.rs":"05caace2c81a99d2168037f3a38035d4dffe9f85ef3ebd7ef18b1bc6612f1ea8","tests/fowler.rs":"d78cf914de40b1e125cc92b65ccb444d462586bd07b5e05de4e4a1b5de16aa76","tests/macros.rs":"6db70c16fc90df13e6b30d2b606f8b6dd4dc976697967f6ee001b15aab6d0b19","tests/macros_bytes.rs":"a049f528a93173a1bb176cd46932dce1880679f4a1752e099be920f0e4546fd0","tests/macros_str.rs":"e585b1461374c45a2eca44ca045bc3c1fe984b2b4212e432b0c695b420e708b7","tests/misc.rs":"395f52793fa022e4cdda78675b6a6fba1a3106b4b99c834c39f7801574054bd1","tests/multiline.rs":"1b1a3326ed976437c1357f01d81833ece7ea244f38826246eab55cacd5d0862a","tests/noparse.rs":"12b6be0eff3d80779d33c6459396c74c0f6ebf4ddc9f1d33c3e747ea9e3bf268","tests/regression.rs":"4b403cbc88ce81388f6659ef031b26a8f8666727d5f7586f60072aa9313e590c","tests/regression_fuzz.rs":"3c99498af578044159336c63c8ac81d65bfc611a0aa80217400544d5caa66827","tests/replace.rs":"5f1bbf3f89de8cd021406a4affd0d07484ba194ac791ac307efd66f2792a2366","tests/searcher.rs":"ce35e47b0a276a7e8c9060c6a0b225ffba163aebc61fbc15555a6897fa0e552c","tests/set.rs":"a69fab05adabdbf27e788d51d7cea06acfd9017182e2f201d592b45c4fec5618","tests/shortest_match.rs":"a2c94390c0d61bc24796b4c1288c924e90c8c9c6156fdebb858175177a194a42","tests/suffix_reverse.rs":"b95f89397404871227d9efe6df23b9ded147f183db81597e608f693955c668b5","tests/test_backtrack.rs":"b70c5e5f1241efd76dd9f9dd4a4df8a7b38113bd407d1f5f56867f1176177a59","tests/test_backtrack_bytes.rs":"b8a111d4b4109c8bba7e2afb650572c495a14d357fb1f743c1076fb001f704b5","tests/test_backtrack_utf8bytes.rs":"c0c279785d18beac2b4e178e7bf6c14ed235d65f00ca467cfd9c333d79487649","tests/test_crates_regex.rs":"fd9525c2eef0e2f8cb7f787bc2b721bcd0b5d84f3bca49adfe48d657a99c721a","tests/test_default.rs":"b32c11a43da4379a3717dd7a5f152c811257c7d6595c9d3c51f2de102e320c87","tests/test_default_bytes.rs":"831d3e6bfb882feb15f700e30304bd34328f888fb4c15c7169371e25024ce9a7","tests/test_nfa.rs":"f119fc43a018249c39c813d57096b0654ff69f337345f2bbd9b0e61cc9137285","tests/test_nfa_bytes.rs":"89eae3bef6a1d0bcea6b5de5be35ad72f613f2ceb8b58fe82a6c6ef2ccdc07d0","tests/test_nfa_utf8bytes.rs":"7d830b4aa401887d7cf098b62fed4cd8017ef8b61f625c7c9a2159a6b4cfeb71","tests/unicode.rs":"d0a2fec28cb28910a5ec1a51849dcf7923673a2c3bc0ffc24025f7c37667add2","tests/word_boundary.rs":"7081317ddcec1e82dd4a2090a571c6abf2ff4bbfa8cd10395e1eb3f386157fae","tests/word_boundary_ascii.rs":"cd0be5b5b485de0ba7994b42e2864585556c3d2d8bf5eab05b58931d9aaf4b87","tests/word_boundary_unicode.rs":"75dbcc35d3abc0f9795c2ea99e216dc227b0a5b58e9ca5eef767815ff0513921"},"package":"af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370"} \ No newline at end of file +{"files":{"CHANGELOG.md":"83951c87d85883eca89c05b8295fda12164b4496af8a03afed8604eb3489074e","Cargo.toml":"6a753f0cb2d88fd6fbccaadb3517d3b418a2416de2bf3036eb38b2f8648f3b37","Cross.toml":"4a11d6c63ecc919016b59fa0fe23674eb05682fb91ffbe677a4a7077e9e684ff","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"984b116bf94decdcb0fcdeb14641b332c9fe40da63e5a256dd39eb91a2e13387","UNICODE.md":"845fca1982e82e190109a784952579fce54faea120b702b7efd61164a12f601f","bench/README.md":"0aee42206b0e5edcb400a11faa2c536f512bcc6086e5ffdda001b9bfe4d19808","record/README.md":"02e6f85f8a43f18540e4a52a75d1001494df7aceac3873e9a13e3ceba190206d","record/compile-test/2023-04-19_1.7.3.csv":"460059ba2f10456175ff92bd75d4a365b14a1843e2b46e7b285d58da59e6d3ca","record/compile-test/2023-04-20_master.csv":"6b94df278e4ed82a3fd0d4bfe92a4614714e00435e983c7649ee9f54925f906e","record/compile-test/2023-07-05.csv":"cf00b4981b8c12980113810dba40e2063a8400354ad4dab16f7c212ff0b5db74","record/compile-test/README.md":"ba2b606993edd8d705ad1677ec954862614e52b028407e1908bb5dfb07767f2d","record/old-bench-log/01-lazy-dfa/dynamic":"dec9f74b8835403c71edc0c2d93bbdde0f5a0e37d46585e416c80496d5b14497","record/old-bench-log/01-lazy-dfa/dynamic-no-lazy-dfa":"c0ce02bef9ada8cd55672f0a9c3c5fc64f71e08bfb2b45978082a140b4fc111f","record/old-bench-log/01-lazy-dfa/native":"9de61ff787e36f5c6f1eaec68b8bb0583e57b0aad23712afe8c0048988c761b8","record/old-bench-log/01-lazy-dfa/nfa":"38c0be44a00b2caef17101bc425410fec2958e4df6da25d2ba5b6664f8bccad9","record/old-bench-log/01-lazy-dfa/pcre":"3b38026c24e4ca487ff62de83cc093ccb46b918f4875663249ff84ce27636942","record/old-bench-log/02-set/dynamic":"8ef5c00f0ac42e5f008e4b6337669527b48fba38df94c50d3e683c6aac66a48c","record/old-bench-log/03-bytes/onig":"f32347a6e0f25f46ad1b0aa736c29eca47c25f90d32c8823ea0d14204859a35b","record/old-bench-log/03-bytes/pcre":"b90982575c0ad55617b2ce50c2e9853d090502bf07e1eb19edf9009d3c9f2987","record/old-bench-log/03-bytes/rust":"b1e70e5ae48a9c726d8cd8a98019c0efe5a1095563c61cf0ac75e24de32461b4","record/old-bench-log/03-bytes/rust-bytes":"fbf0e6cb8102c7ca8e59bd459bb0ae7f1feaf8103def70b8d4793c59e68e8736","record/old-bench-log/04/onig":"4e34e2ede0a806b8ee540e63e4babee38049e5a8ab3be99c4f5d8b02bbc653fd","record/old-bench-log/04/pcre1-jit":"736c4941e991ef94f76379cf2187d0ea2a41b052cf80c94d0dd0c9ea758a6491","record/old-bench-log/04/pcre2-jit":"00e7bbf7749904fca8dff9b441d15bbe670f37b427e385ddf740f7a49de3b1fb","record/old-bench-log/04/re2":"b8b8595f6b68da127b56dc7c61a9fd15548251fda1be9d2c50c2d48382e887b6","record/old-bench-log/04/rust":"c5a6b918e815294f0c4e3d37267c444d49692ff131c5a08f7462c24d0721fcec","record/old-bench-log/04/tcl":"c4d8d12b8cf48ff2017549e95e49dc95a90ea15483834cd70d2d7d7c237bbd32","record/old-bench-log/05/onig":"70a4da9aafaefa6493cd09d3a529dd5d2d9eacf390bb093681bc7be28a1f926c","record/old-bench-log/05/onig-vs-rust":"b942a79735b7330241437776c15b18f4db3eff01d3e6c35494f4a8732e74a23a","record/old-bench-log/05/pcre1":"b29b7efbe79b55ce0aaf24bbbecc376a865fa219a68d96124e3d95951cdb47f4","record/old-bench-log/05/pcre1-vs-rust":"a458e5c62f0500898e08757753c10981551649656432ec096f0c82b414ef8d82","record/old-bench-log/05/pcre2":"faa93937c3490cfdff88c32dc04e57f2ae881923b87781e5fe876535fd690770","record/old-bench-log/05/pcre2-vs-rust":"bf9faa6a679dd98e9452e52c0941d2eb84dcf0b6632c15507f8334ed7bc309da","record/old-bench-log/05/re2":"692866b28e1bc368c7a59f519b8dfe1da50a135946ce153298a0ab228a5ee59d","record/old-bench-log/05/re2-vs-rust":"55e4cb14c397574751aebe38068c429a4580a5e309857b2715047944903dca58","record/old-bench-log/05/rust":"aac6acda9f63e51613712d0a33bb7fb46dfc7adc425f76b9b71195be8c8a42e7","record/old-bench-log/05/tcl":"f03e39eccd3252162cc6099bb0426014df669d299ba0ef79e89b8401886a5172","record/old-bench-log/05/tcl-vs-rust":"ae6ac4668573bf5488cc235c5da16ad9358d07b7644207d9bcea88ba6f5514a6","record/old-bench-log/06/dphobos-dmd":"473328306be335a0320c690d9c2dbefdf7f2f5a80e4ca69443c7ed2e81bb093f","record/old-bench-log/06/dphobos-dmd-ct":"60341c736382a6db21d9889369ea4617c521acbf30d4b3bf38bcd17f4f85b9b1","record/old-bench-log/06/dphobos-ldc":"ae60c2bed84afb89ae43615f26de4cc5d0042e179089b639507378518eed3252","record/old-bench-log/06/dphobos-ldc-ct":"a157ef450793b73de3a816fab1d93a6d11e90a817082bae5e3da02a66fcc833f","record/old-bench-log/06/pcre1":"ad10fd7db732e8670dd3d4eedb05f48f547b4782495aaadff8ec25a6ea1992a0","record/old-bench-log/06/pcre2":"f789a73bd41a0bc401bdebe7f10a03a8aa587de48643d88507d16181a8fa39d3","record/old-bench-log/06/re2":"203c273a110d71f5edf722630202a6142c39d6b7a9951686adf8b9c20c5db278","record/old-bench-log/06/rust":"6a642a16cd279c99ef5a580a25fb3a63ac6239cd73df9261c02912fa08145753","record/old-bench-log/07/boost":"255bc652c4d9e9d20aa9b22d8d86e952e7ec6c8b9fcde0c3d6e38c967e04d40e","record/old-bench-log/07/dphobos-dmd":"fb3ac60037050858611145ca3e71412164688dcdec52c022787d33304e022260","record/old-bench-log/07/dphobos-dmd-ct":"40a5088441f8ffe3dae0abaf31c105cedfbe3b56c06772f075947d504976d2ed","record/old-bench-log/07/oniguruma":"ae0cd60adb15845eb9ef706111d4ee0e6ad5a58f0276b787d68bd7d637f8f7c6","record/old-bench-log/07/pcre1":"a812d065ec248249f9bb3d6d970f15c18d342f6b443265ad4b07fa91b73575cc","record/old-bench-log/07/pcre2":"88230663eccd0b382cf5be81ce1ae6cfa3fa835a65a31c1eba4369d2e8de5d27","record/old-bench-log/07/re2":"e330ef21ce44351afc3c43821d862e9c625877606569f3af0ddbadcd7b21c602","record/old-bench-log/07/rust":"d8c6bd5c46f5df9d0ac222f7be7793527a8137d273c8826b3715c67e16209aac","record/old-bench-log/07/rust-bytes":"e21d02fa2ef1e5ed7204920b33ed24c9fb620e068ed47ed6879b72e76369a27e","record/old-bench-log/07/stdcpp":"9df02d89dc8232c700b8cf8bc6f1ece3ca7af84ab52e67a660039d6c9168aed4","record/old-bench-log/07/stdcpp-libcxx":"f90849a0b5dc11dc0280ad97886e92e1d91c080403ad7a4ecd638a26fe5e8c5e","record/old-bench-log/07/tcl":"7f6e347bb507f9c00ff664d3e627c0a9cf842b416eeb2af9f3b6cccd041c58e4","record/old-bench-log/08-new-memmem/rust-after-01":"646c7d50aea9c560a35eb60116f301f4fb8d4b03fd5814d8b24adffd070332e3","record/old-bench-log/08-new-memmem/rust-after-02":"14e7fb6c6faa85a8f90617528cef79ae382aeba07c2e5c253c68445902b060ba","record/old-bench-log/08-new-memmem/rust-before-01":"7e3b58de0f502c1a1bf6d27e0e85c654b1189716f7374cec4ed4dd365b13101f","record/old-bench-log/08-new-memmem/rust-before-02":"ab6d09529eeeca7ff0da945d59701dbbcfdec5e05581bb9bf154779d12a35e53","record/old-bench-log/09-new-baseline/pcre2":"28df8e2762f267d1ea628906a6e4bbc21f99e6a445bd322c86d0ca483b21b5b3","record/old-bench-log/09-new-baseline/re2":"421437193cc3f159c178479f98bde8dbe27883ec7757b1ddd8d745862f5899ff","record/old-bench-log/09-new-baseline/rust":"6f932a769171b6cdb717c9d01e44a70762ef660c4045b9d2bb3797a9bdf65405","record/old-bench-log/09-new-baseline/rust-bytes":"9c5acd5c1eeac9acfe76d03588041f9b6d65b4351085c3510888ceeb83e8a7b5","record/old-bench-log/10-last-frontier/rust-after-literal.log":"02baef9b3b49acbbff43e81f48ea5a9287e30ff4fc298a3f3b48991d8374aabf","record/old-bench-log/10-last-frontier/rust-before-literal.log":"e5a3bcc2b9e93cf3cb27bc9e6305b3bc03215751bbeef2a70fb25577d6b42874","record/old-bench-log/10-last-frontier/rust-bytes-after-literal.log":"29834c7a5396ac61acedd07c0b7ca60716865ec3e70f35fbaa7826a2309a79d9","record/old-bench-log/10-last-frontier/rust-bytes-before-literal.log":"4e7468a3e8629814bd4af91e2a8eb42d0899d352b5dff3058b801aa637046be2","record/old-bench-log/11-regex-1.7.3/rust":"d7cc18a62070ea7a999e1ba2458f26cf94595f1af276c2b3e96cee638eccf3f0","record/old-bench-log/11-regex-1.7.3/rust-bytes":"64c7458020139bd7a03d1cb0927b741e6972377b686626563acb86fbc66414ca","record/old-bench-log/12-regex-1.8.1/rust":"a538c42e77e20956e81fb5a4e2e1e7d3fdf60da019d7e3df52d93f57367a3fbd","record/old-bench-log/12-regex-1.8.1/rust-bytes":"fbb00fdf8f039ce312f5346a67dddaa5e129280a93a90d7aaf6b5a9a71d2f212","record/old-bench-log/13-regex-1.9.0/rust":"0ef62700ba3fc24887af74b7942490c90b4cd2814b8fda200f7376e43391bfce","record/old-bench-log/13-regex-1.9.0/rust-bytes":"676d501d4667f0a945c88ebb56839176dd3a5a6b45af7708b1e870bf26d12603","record/old-bench-log/README.md":"d359f536fb4b8c1af9af3465a027c3522f62c3871aad44645a955b650d7deec0","record/old-bench-log/old/01-before":"c2ea2750fca8ac1742003fe2106e9422d49e92967d3fe0267f24b7ec830b07e3","record/old-bench-log/old/02-new-syntax-crate":"27fd8b3d35cf08d434035ff7d9f2e9e3c94a167e45ba655567c73ae96830f1d8","record/old-bench-log/old/03-new-syntax-crate":"d942a2b95c3a2d8f85f3f17934f258bdc84baa33e91986e8a6810ca8d6e9cc50","record/old-bench-log/old/04-fixed-benchmark":"0da29ef39ac07ece411c151ab479a76944946aba992547b15d90ec2d5484e85c","record/old-bench-log/old/05-thread-caching":"e364d87131e43187d6757426839789d1b6b47b3f3af21280daa9193d5ab19f64","record/old-bench-log/old/06-major-dynamic":"3bc2b8fd2714ae9f19b2e4f4219654982522daf01b5d3055b4aec0458afeaf13","record/old-bench-log/old/06-major-macro":"d5617ed23e71d5298ed4d629eee257e401c352fd1c91a2048dfeb1677527d4e7","record/old-bench-log/old/07-prefix-improvements":"9277d1392c85a38db215a9b69e3b0cd4a9901f8f1c72c706ca262e5f099b8819","record/old-bench-log/old/08-case-fixes":"f97cd3675cf5c967e4ca8841f2368e9eadf538b542bfe3035d31492afc5934bf","record/old-bench-log/old/09-before-compiler-rewrite":"b928686819dbd9aeaa6639b01b63a48428653f2f676a4e15d61cddec421e0389","record/old-bench-log/old/10-compiler-rewrite":"697b295ee377a5cb287d403593bfb8c078270b4e19e8d61d0b95b06ee7c903ab","record/old-bench-log/old/11-compiler-rewrite":"3f0ba494a0d82e7419285a9686474dc7763d4da0dd3faaa3bec3f624bbede481","record/old-bench-log/old/12-executor":"962e182f9a1cfddb8c0cd2d8c4681febef1430082c9a38e5373c9117b853e65e","record/old-bench-log/old/12-executor-bytes":"e01a1e878b44c80724e9bf09bb11210eeb8f01518ac7f0e3e7f2ee241281e500","record/old-bench-log/old/13-cache-byte-range-suffixes":"1d67d58a39f9177a79c26f3c6c2a1caaf51f085ce137711ab9ba74071c14680c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/builders.rs":"6dbff8c7ff7febe031dbef3eafe1f02a15112ff1ffd889761a21c10b0dd84f03","src/bytes.rs":"cce2b7012f5896cf82fc3086bf8128dc9efe2b69bf6917d041c1a171eabacdc0","src/error.rs":"4ac8361e900627a87a2ac78e5d475be17c455fe6850d1515bf5b874837c4ae25","src/find_byte.rs":"e17cd3b765467685946707840b92ea4e37d3c11081fbf316174a15858cd4bd99","src/lib.rs":"1be6ce04d4fce06a7e46bc006bbf536f5a7f82d97dc71e7f7489a6d2610f790b","src/pattern.rs":"5f37755a7c16902d861377645f57a20314961f86298d4b35ae6e1058ca4e9801","src/regex/bytes.rs":"b97bae6e559948573eaccc93bc316885266302fd70057cf2fae5827e97825b07","src/regex/mod.rs":"c220b6dd7a5e1945f8e743d1dcd796c5f782c91b0c34eb9915c588174a517fe8","src/regex/string.rs":"edc0613e6cd6da6204549598dce089a66175b3e29baa7629466db85b51031137","src/regexset/bytes.rs":"6290dd81f47fb6cdbaa358be2340398a2a640320373d334e4c977bf30b5a9220","src/regexset/mod.rs":"c220b6dd7a5e1945f8e743d1dcd796c5f782c91b0c34eb9915c588174a517fe8","src/regexset/string.rs":"977bc167c48c7c99187599c5071ca197e61a56359d32a26b9dbc1b58a5ef1c4d","test":"c0122c20a2c9b7ba6e9a8aaeb2b7d9910315ef31063539949f28d9501ef3193c","testdata/README.md":"c0514501526560d7f6171eb6d982ad61b4527760cb38a4bfbe8e28036ff37b95","testdata/anchored.toml":"7a1b5cd81deed2099796a451bf764a3f9bd21f0d60c0fa46accd3a35666866f2","testdata/bytes.toml":"1d84179165fd25f3b94bd2bfbeb43fc8a162041f7bf98b717e0f85cef7fb652b","testdata/crazy.toml":"a146e2d2e23f1a57168979d9b1fc193c2ba38dca66294b61140d6d2a2958ec86","testdata/crlf.toml":"d19cf22756434d145dd20946c00af01c102a556a252070405c3c8294129d9ece","testdata/earliest.toml":"d561e643623ee1889b5b049fdcf3c7cb71b0c746d7eb822ddbd09d0acda2620b","testdata/empty.toml":"738dbe92fbd8971385a1cf3affb0e956e5b692c858b9b48439d718f10801c08e","testdata/expensive.toml":"5ce2f60209c99cdd2cdcb9d3069d1d5ca13d5e08a85e913efe57267b2f5f0e9d","testdata/flags.toml":"9a7e001808195c84f2a7d3e18bc0a82c7386e60f03a616e99af00c3f7f2c3fd4","testdata/fowler/basic.toml":"a82c7e233451cd7cfe0c3d817f3a1ab44478bb81ae62432efdd515fa8370275e","testdata/fowler/dat/README":"e53d6c37b5931cb26dc9ae4c40358eea63f7a469c4db6ca816c072a8ced6a61a","testdata/fowler/dat/basic.dat":"b1126dda59075c08f574987090273c9977790115f1e1941d0708c0b82b256905","testdata/fowler/dat/nullsubexpr.dat":"e5cd4145dffa8bc66f2d39079950b2bb7bae21a521514b83b557b92f4a871a9e","testdata/fowler/dat/repetition.dat":"2b8b2b191229a804fba49e6b888d8194bf488f7744057b550da9d95a2aa6617a","testdata/fowler/nullsubexpr.toml":"cd812e7e8fa0469253b34f0db93b5883c9d8b9740fc4f7825a38e7df880a4eed","testdata/fowler/repetition.toml":"8c09164f064b3db81309c53483863bdcec493781644de162416e9f485e772615","testdata/iter.toml":"6875460302974a5b3073a7304a865c45aba9653c54afea2c4d26e1ea248a81f7","testdata/leftmost-all.toml":"903bfbeff888b7664296f4d5aa367ce53d1dafe249ab0a3359223ae94d596396","testdata/line-terminator.toml":"02148068137b69d95587966917bdf0697bf7eb41ad6d47387f2eb30f67d04fd9","testdata/misc.toml":"32c9591655c6fb118dfefcb4de49a04820a63cb960533dfc2538cdaabf4f4047","testdata/multiline.toml":"eb07cf5427e6ddbcf61f4cc64c2d74ff41b5ef75ef857959651b20196f3cd157","testdata/no-unicode.toml":"d209da04506900fd5f69e48170cddaad0702355ac6176c3a75ab3ff96974457c","testdata/overlapping.toml":"5d96497a7233566d40b05ba22047e483fa8662e45515a9be86da45cf6c28703a","testdata/regex-lite.toml":"fecca7cc8c9cea2e1f84f846a89fd9b3ca7011c83698211a2eeda8924deb900c","testdata/regression.toml":"6006ef4fcfbfd7155ce5ce8b8427904f7261c5549396f20cb065c0294733686d","testdata/set.toml":"dfd265dc1aee80026e881616840df0236ae9abf12467d7ec0e141a52c236128c","testdata/substring.toml":"48122d9f3477ed81f95e3ad42c06e9bb25f849b66994601a75ceae0693b81866","testdata/unicode.toml":"7e4b013039b0cdd85fa73f32d15d096182fe901643d4e40c0910087a736cd46d","testdata/utf8.toml":"2eabce0582bcacb2073e08bbe7ca413f096d14d06e917b107949691e24f84b20","testdata/word-boundary-special.toml":"7d0ea2f796478d1ca2a6954430cb1cfbd04031a182f8611cb50a7c73e443ce33","testdata/word-boundary.toml":"51bc1c498ab825420340a2dd3e6623de4054937ba6d5020ff8cd14b1c1e45271","tests/fuzz/mod.rs":"7b01a803e1c0b5a45c062d493723553f263c57e269eade1475eb789694635d5c","tests/fuzz/testdata/crash-7eb3351f0965e5d6c1cb98aa8585949ef96531ff":"be4f42497ac9358eb020bf17cd8fdb9743691824e01d744504613ea2bfb2f663","tests/fuzz/testdata/crash-8760b19b25d74e3603d4c643e9c7404fdd3631f9":"19df9a1e1b5a3c0f31cc038b9f2991b161d8577b4a0c8b2fc391cdfecdb6dd85","tests/fuzz/testdata/crash-cd33b13df59ea9d74503986f9d32a270dd43cc04":"2fde1668e9e3e60943c28d97c01c90dd3d3882f48475f060ccaf961c228069e8","tests/fuzz/testdata/minimized-from-8760b19b25d74e3603d4c643e9c7404fdd3631f9":"c9e00f7a31453708560900aa51e358dd5551df494439860594be97bb1fb933ba","tests/fuzz/testdata/slow-unit-3ab758ea520027fefd3f00e1384d9aeef155739e":"4433011f7af46e855e843635cf24a49713bd5705f67176ed928f04d24eda1857","tests/fuzz/testdata/slow-unit-5345fccadf3812c53c3ccc7af5aa2741b7b2106c":"95782a847fc64e9cccdf76e9540b0d16ce80db5d05157a88b958b763f9b8479b","tests/fuzz/testdata/slow-unit-6bd643eec330166e4ada91da2d3f284268481085":"8ddff12288f6f20cc9d65db76bd8187834f64f844aad48a340d082555ad5cb56","tests/fuzz/testdata/slow-unit-93c73a43581f205f9aaffd9c17e52b34b17becd0":"eea6919a75fde163634b890e2253a0918cf0ba092357fa617f368bbfa131ba30","tests/fuzz/testdata/slow-unit-9ca9cc9929fee1fcbb847a78384effb8b98ea18a":"a806f73b900046977267acceb83b105bac7ee21ede2edc2927afe1e1f0149f00","tests/fuzz/testdata/slow-unit-b8a052f4254802edbe5f569b6ce6e9b6c927e9d6":"9540cf58241cde3bc0db8364e0ccff67ff1ff9721c85b0d2ca27354c0cbf2650","tests/lib.rs":"9bffc95568c09ac95b6a3e7ca64b6e858a0552d0c0b0fca2c447da3b9c0a45a2","tests/misc.rs":"5ac5858325451e1d70f308ca0bcead5a354d095a7473800c11065231c319c456","tests/regression.rs":"3490aac99fdbf3f0949ba1f338d5184a84b505ebd96d0b6d6145c610587aa60b","tests/regression_fuzz.rs":"57e0bcba0fdfa7797865e35ae547cd7fe1c6132b80a7bfdfb06eb053a568b00d","tests/replace.rs":"78ff9bf7f78783ad83a78041bb7ee0705c7efc85b4d12301581d0ce5b2a59325","tests/searcher.rs":"04152e5c86431deec0c196d2564a11bc4ec36f14c77e8c16a2f9d1cbc9fc574e","tests/suite_bytes.rs":"7697b04e5b181aa78b3654bd2dbe1c792d9626197573ed8c649f1da8b481817d","tests/suite_bytes_set.rs":"d970168fab57a9edc60ff26a2bb7d0cc714d4298e4ee9eadba9da44a6569f2bb","tests/suite_string.rs":"1be0cf8922171f8323f99e8ecedbbf1846d339620d0dc2fd490901cbbbd2622e","tests/suite_string_set.rs":"22743107206d913521f9adb728482aed3a9625ff7b15a83df057bbf1f7050e03"},"package":"b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"} \ No newline at end of file diff --git a/vendor/regex/CHANGELOG.md b/vendor/regex/CHANGELOG.md index 1fdf493..3ffd961 100644 --- a/vendor/regex/CHANGELOG.md +++ b/vendor/regex/CHANGELOG.md @@ -1,3 +1,357 @@ +1.10.3 (2024-01-21) +=================== +This is a new patch release that fixes the feature configuration of optional +dependencies, and fixes an unsound use of bounds check elision. + +Bug fixes: + +* [BUG #1147](https://github.com/rust-lang/regex/issues/1147): +Set `default-features=false` for the `memchr` and `aho-corasick` dependencies. +* [BUG #1154](https://github.com/rust-lang/regex/pull/1154): +Fix unsound bounds check elision. + + +1.10.2 (2023-10-16) +=================== +This is a new patch release that fixes a search regression where incorrect +matches could be reported. + +Bug fixes: + +* [BUG #1110](https://github.com/rust-lang/regex/issues/1110): +Revert broadening of reverse suffix literal optimization introduced in 1.10.1. + + +1.10.1 (2023-10-14) +=================== +This is a new patch release with a minor increase in the number of valid +patterns and a broadening of some literal optimizations. + +New features: + +* [FEATURE 04f5d7be](https://github.com/rust-lang/regex/commit/04f5d7be4efc542864cc400f5d43fbea4eb9bab6): +Loosen ASCII-compatible rules such that regexes like `(?-u:☃)` are now allowed. + +Performance improvements: + +* [PERF 8a8d599f](https://github.com/rust-lang/regex/commit/8a8d599f9d2f2d78e9ad84e4084788c2d563afa5): +Broader the reverse suffix optimization to apply in more cases. + + +1.10.0 (2023-10-09) +=================== +This is a new minor release of `regex` that adds support for start and end +word boundary assertions. That is, `\<` and `\>`. The minimum supported Rust +version has also been raised to 1.65, which was released about one year ago. + +The new word boundary assertions are: + +* `\<` or `\b{start}`: a Unicode start-of-word boundary (`\W|\A` on the left, +`\w` on the right). +* `\>` or `\b{end}`: a Unicode end-of-word boundary (`\w` on the left, `\W|\z` +on the right)). +* `\b{start-half}`: half of a Unicode start-of-word boundary (`\W|\A` on the +left). +* `\b{end-half}`: half of a Unicode end-of-word boundary (`\W|\z` on the +right). + +The `\<` and `\>` are GNU extensions to POSIX regexes. They have been added +to the `regex` crate because they enjoy somewhat broad support in other regex +engines as well (for example, vim). The `\b{start}` and `\b{end}` assertions +are aliases for `\<` and `\>`, respectively. + +The `\b{start-half}` and `\b{end-half}` assertions are not found in any +other regex engine (although regex engines with general look-around support +can certainly express them). They were added principally to support the +implementation of word matching in grep programs, where one generally wants to +be a bit more flexible in what is considered a word boundary. + +New features: + +* [FEATURE #469](https://github.com/rust-lang/regex/issues/469): +Add support for `\<` and `\>` word boundary assertions. +* [FEATURE(regex-automata) #1031](https://github.com/rust-lang/regex/pull/1031): +DFAs now have a `start_state` method that doesn't use an `Input`. + +Performance improvements: + +* [PERF #1051](https://github.com/rust-lang/regex/pull/1051): +Unicode character class operations have been optimized in `regex-syntax`. +* [PERF #1090](https://github.com/rust-lang/regex/issues/1090): +Make patterns containing lots of literal characters use less memory. + +Bug fixes: + +* [BUG #1046](https://github.com/rust-lang/regex/issues/1046): +Fix a bug that could result in incorrect match spans when using a Unicode word +boundary and searching non-ASCII strings. +* [BUG(regex-syntax) #1047](https://github.com/rust-lang/regex/issues/1047): +Fix panics that can occur in `Ast->Hir` translation (not reachable from `regex` +crate). +* [BUG(regex-syntax) #1088](https://github.com/rust-lang/regex/issues/1088): +Remove guarantees in the API that connect the `u` flag with a specific HIR +representation. + +`regex-automata` breaking change release: + +This release includes a `regex-automata 0.4.0` breaking change release, which +was necessary in order to support the new word boundary assertions. For +example, the `Look` enum has new variants and the `LookSet` type now uses `u32` +instead of `u16` to represent a bitset of look-around assertions. These are +overall very minor changes, and most users of `regex-automata` should be able +to move to `0.4` from `0.3` without any changes at all. + +`regex-syntax` breaking change release: + +This release also includes a `regex-syntax 0.8.0` breaking change release, +which, like `regex-automata`, was necessary in order to support the new word +boundary assertions. This release also includes some changes to the `Ast` +type to reduce heap usage in some cases. If you are using the `Ast` type +directly, your code may require some minor modifications. Otherwise, users of +`regex-syntax 0.7` should be able to migrate to `0.8` without any code changes. + +`regex-lite` release: + +The `regex-lite 0.1.1` release contains support for the new word boundary +assertions. There are no breaking changes. + + +1.9.6 (2023-09-30) +================== +This is a patch release that fixes a panic that can occur when the default +regex size limit is increased to a large number. + +* [BUG aa4e4c71](https://github.com/rust-lang/regex/commit/aa4e4c7120b0090ce0624e3c42a2ed06dd8b918a): +Fix a bug where computing the maximum haystack length for the bounded +backtracker could result underflow and thus provoke a panic later in a search +due to a broken invariant. + + +1.9.5 (2023-09-02) +================== +This is a patch release that hopefully mostly fixes a performance bug that +occurs when sharing a regex across multiple threads. + +Issue [#934](https://github.com/rust-lang/regex/issues/934) +explains this in more detail. It is [also noted in the crate +documentation](https://docs.rs/regex/latest/regex/#sharing-a-regex-across-threads-can-result-in-contention). +The bug can appear when sharing a regex across multiple threads simultaneously, +as might be the case when using a regex from a `OnceLock`, `lazy_static` or +similar primitive. Usually high contention only results when using many threads +to execute searches on small haystacks. + +One can avoid the contention problem entirely through one of two methods. +The first is to use lower level APIs from `regex-automata` that require passing +state explicitly, such as [`meta::Regex::search_with`](https://docs.rs/regex-automata/latest/regex_automata/meta/struct.Regex.html#method.search_with). +The second is to clone a regex and send it to other threads explicitly. This +will not use any additional memory usage compared to sharing the regex. The +only downside of this approach is that it may be less convenient, for example, +it won't work with things like `OnceLock` or `lazy_static` or `once_cell`. + +With that said, as of this release, the contention performance problems have +been greatly reduced. This was achieved by changing the free-list so that it +was sharded across threads, and that ensuring each sharded mutex occupies a +single cache line to mitigate false sharing. So while contention may still +impact performance in some cases, it should be a lot better now. + +Because of the changes to how the free-list works, please report any issues you +find with this release. That not only includes search time regressions but also +significant regressions in memory usage. Reporting improvements is also welcome +as well! If possible, provide a reproduction. + +Bug fixes: + +* [BUG #934](https://github.com/rust-lang/regex/issues/934): +Fix a performance bug where high contention on a single regex led to massive +slow downs. + + +1.9.4 (2023-08-26) +================== +This is a patch release that fixes a bug where `RegexSet::is_match(..)` could +incorrectly return false (even when `RegexSet::matches(..).matched_any()` +returns true). + +Bug fixes: + +* [BUG #1070](https://github.com/rust-lang/regex/issues/1070): +Fix a bug where a prefilter was incorrectly configured for a `RegexSet`. + + +1.9.3 (2023-08-05) +================== +This is a patch release that fixes a bug where some searches could result in +incorrect match offsets being reported. It is difficult to characterize the +types of regexes susceptible to this bug. They generally involve patterns +that contain no prefix or suffix literals, but have an inner literal along with +a regex prefix that can conditionally match. + +Bug fixes: + +* [BUG #1060](https://github.com/rust-lang/regex/issues/1060): +Fix a bug with the reverse inner literal optimization reporting incorrect match +offsets. + + +1.9.2 (2023-08-05) +================== +This is a patch release that fixes another memory usage regression. This +particular regression occurred only when using a `RegexSet`. In some cases, +much more heap memory (by one or two orders of magnitude) was allocated than in +versions prior to 1.9.0. + +Bug fixes: + +* [BUG #1059](https://github.com/rust-lang/regex/issues/1059): +Fix a memory usage regression when using a `RegexSet`. + + +1.9.1 (2023-07-07) +================== +This is a patch release which fixes a memory usage regression. In the regex +1.9 release, one of the internal engines used a more aggressive allocation +strategy than what was done previously. This patch release reverts to the +prior on-demand strategy. + +Bug fixes: + +* [BUG #1027](https://github.com/rust-lang/regex/issues/1027): +Change the allocation strategy for the backtracker to be less aggressive. + + +1.9.0 (2023-07-05) +================== +This release marks the end of a [years long rewrite of the regex crate +internals](https://github.com/rust-lang/regex/issues/656). Since this is +such a big release, please report any issues or regressions you find. We would +also love to hear about improvements as well. + +In addition to many internal improvements that should hopefully result in +"my regex searches are faster," there have also been a few API additions: + +* A new `Captures::extract` method for quickly accessing the substrings +that match each capture group in a regex. +* A new inline flag, `R`, which enables CRLF mode. This makes `.` match any +Unicode scalar value except for `\r` and `\n`, and also makes `(?m:^)` and +`(?m:$)` match after and before both `\r` and `\n`, respectively, but never +between a `\r` and `\n`. +* `RegexBuilder::line_terminator` was added to further customize the line +terminator used by `(?m:^)` and `(?m:$)` to be any arbitrary byte. +* The `std` Cargo feature is now actually optional. That is, the `regex` crate +can be used without the standard library. +* Because `regex 1.9` may make binary size and compile times even worse, a +new experimental crate called `regex-lite` has been published. It prioritizes +binary size and compile times over functionality (like Unicode) and +performance. It shares no code with the `regex` crate. + +New features: + +* [FEATURE #244](https://github.com/rust-lang/regex/issues/244): +One can opt into CRLF mode via the `R` flag. +e.g., `(?mR:$)` matches just before `\r\n`. +* [FEATURE #259](https://github.com/rust-lang/regex/issues/259): +Multi-pattern searches with offsets can be done with `regex-automata 0.3`. +* [FEATURE #476](https://github.com/rust-lang/regex/issues/476): +`std` is now an optional feature. `regex` may be used with only `alloc`. +* [FEATURE #644](https://github.com/rust-lang/regex/issues/644): +`RegexBuilder::line_terminator` configures how `(?m:^)` and `(?m:$)` behave. +* [FEATURE #675](https://github.com/rust-lang/regex/issues/675): +Anchored search APIs are now available in `regex-automata 0.3`. +* [FEATURE #824](https://github.com/rust-lang/regex/issues/824): +Add new `Captures::extract` method for easier capture group access. +* [FEATURE #961](https://github.com/rust-lang/regex/issues/961): +Add `regex-lite` crate with smaller binary sizes and faster compile times. +* [FEATURE #1022](https://github.com/rust-lang/regex/pull/1022): +Add `TryFrom` implementations for the `Regex` type. + +Performance improvements: + +* [PERF #68](https://github.com/rust-lang/regex/issues/68): +Added a one-pass DFA engine for faster capture group matching. +* [PERF #510](https://github.com/rust-lang/regex/issues/510): +Inner literals are now used to accelerate searches, e.g., `\w+@\w+` will scan +for `@`. +* [PERF #787](https://github.com/rust-lang/regex/issues/787), +[PERF #891](https://github.com/rust-lang/regex/issues/891): +Makes literal optimizations apply to regexes of the form `\b(foo|bar|quux)\b`. + +(There are many more performance improvements as well, but not all of them have +specific issues devoted to them.) + +Bug fixes: + +* [BUG #429](https://github.com/rust-lang/regex/issues/429): +Fix matching bugs related to `\B` and inconsistencies across internal engines. +* [BUG #517](https://github.com/rust-lang/regex/issues/517): +Fix matching bug with capture groups. +* [BUG #579](https://github.com/rust-lang/regex/issues/579): +Fix matching bug with word boundaries. +* [BUG #779](https://github.com/rust-lang/regex/issues/779): +Fix bug where some regexes like `(re)+` were not equivalent to `(re)(re)*`. +* [BUG #850](https://github.com/rust-lang/regex/issues/850): +Fix matching bug inconsistency between NFA and DFA engines. +* [BUG #921](https://github.com/rust-lang/regex/issues/921): +Fix matching bug where literal extraction got confused by `$`. +* [BUG #976](https://github.com/rust-lang/regex/issues/976): +Add documentation to replacement routines about dealing with fallibility. +* [BUG #1002](https://github.com/rust-lang/regex/issues/1002): +Use corpus rejection in fuzz testing. + + +1.8.4 (2023-06-05) +================== +This is a patch release that fixes a bug where `(?-u:\B)` was allowed in +Unicode regexes, despite the fact that the current matching engines can report +match offsets between the code units of a single UTF-8 encoded codepoint. That +in turn means that match offsets that split a codepoint could be reported, +which in turn results in panicking when one uses them to slice a `&str`. + +This bug occurred in the transition to `regex 1.8` because the underlying +syntactical error that prevented this regex from compiling was intentionally +removed. That's because `(?-u:\B)` will be permitted in Unicode regexes in +`regex 1.9`, but the matching engines will guarantee to never report match +offsets that split a codepoint. When the underlying syntactical error was +removed, no code was added to ensure that `(?-u:\B)` didn't compile in the +`regex 1.8` transition release. This release, `regex 1.8.4`, adds that code +such that `Regex::new(r"(?-u:\B)")` returns to the `regex <1.8` behavior of +not compiling. (A `bytes::Regex` can still of course compile it.) + +Bug fixes: + +* [BUG #1006](https://github.com/rust-lang/regex/issues/1006): +Fix a bug where `(?-u:\B)` was allowed in Unicode regexes, and in turn could +lead to match offsets that split a codepoint in `&str`. + + +1.8.3 (2023-05-25) +================== +This is a patch release that fixes a bug where the regex would report a +match at every position even when it shouldn't. This could occur in a very +small subset of regexes, usually an alternation of simple literals that +have particular properties. (See the issue linked below for a more precise +description.) + +Bug fixes: + +* [BUG #999](https://github.com/rust-lang/regex/issues/999): +Fix a bug where a match at every position is erroneously reported. + + +1.8.2 (2023-05-22) +================== +This is a patch release that fixes a bug where regex compilation could panic +in debug mode for regexes with large counted repetitions. For example, +`a{2147483516}{2147483416}{5}` resulted in an integer overflow that wrapped +in release mode but panicking in debug mode. Despite the unintended wrapping +arithmetic in release mode, it didn't cause any other logical bugs since the +errant code was for new analysis that wasn't used yet. + +Bug fixes: + +* [BUG #995](https://github.com/rust-lang/regex/issues/995): +Fix a bug where regex compilation with large counted repetitions could panic. + + 1.8.1 (2023-04-21) ================== This is a patch release that fixes a bug where a regex match could be reported @@ -17,11 +371,11 @@ optimizations and lead to a false positive match. This is a sizeable release that will be soon followed by another sizeable release. Both of them will combined close over 40 existing issues and PRs. -This first release, despite its size, essentially represent preparatory work +This first release, despite its size, essentially represents preparatory work for the second release, which will be even bigger. Namely, this release: * Increases the MSRV to Rust 1.60.0, which was released about 1 year ago. -* Upgrades its dependency on `aho-corasick` to the recently release 1.0 +* Upgrades its dependency on `aho-corasick` to the recently released 1.0 version. * Upgrades its dependency on `regex-syntax` to the simultaneously released `0.7` version. The changes to `regex-syntax` principally revolve around a @@ -54,7 +408,7 @@ More specifically, any ASCII character except for `[0-9A-Za-z<>]` can now be escaped. Also, a new routine, `is_escapeable_character`, has been added to `regex-syntax` to query whether a character is escapeable or not. * [FEATURE #547](https://github.com/rust-lang/regex/issues/547): -Add `Regex::captures_at`. This filles a hole in the API, but doesn't otherwise +Add `Regex::captures_at`. This fills a hole in the API, but doesn't otherwise introduce any new expressive power. * [FEATURE #595](https://github.com/rust-lang/regex/issues/595): Capture group names are now Unicode-aware. They can now begin with either a `_` diff --git a/vendor/regex/Cargo.toml b/vendor/regex/Cargo.toml index 79dbf23..dc035bc 100644 --- a/vendor/regex/Cargo.toml +++ b/vendor/regex/Cargo.toml @@ -11,10 +11,13 @@ [package] edition = "2021" -rust-version = "1.60.0" +rust-version = "1.65" name = "regex" -version = "1.8.1" -authors = ["The Rust Project Developers"] +version = "1.10.3" +authors = [ + "The Rust Project Developers", + "Andrew Gallant ", +] exclude = [ "/scripts/*", "/.github/*", @@ -31,82 +34,80 @@ categories = ["text-processing"] license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/regex" +[package.metadata.docs.rs] +all-features = true +rustdoc-args = [ + "--cfg", + "docsrs", +] + [profile.bench] -debug = true +debug = 2 + +[profile.dev] +opt-level = 3 +debug = 2 [profile.release] -debug = true +debug = 2 [profile.test] -debug = true - -[lib] -doctest = false -bench = false - -[[test]] -name = "default" -path = "tests/test_default.rs" - -[[test]] -name = "default-bytes" -path = "tests/test_default_bytes.rs" +opt-level = 3 +debug = 2 [[test]] -name = "nfa" -path = "tests/test_nfa.rs" - -[[test]] -name = "nfa-utf8bytes" -path = "tests/test_nfa_utf8bytes.rs" - -[[test]] -name = "nfa-bytes" -path = "tests/test_nfa_bytes.rs" - -[[test]] -name = "backtrack" -path = "tests/test_backtrack.rs" - -[[test]] -name = "backtrack-utf8bytes" -path = "tests/test_backtrack_utf8bytes.rs" - -[[test]] -name = "backtrack-bytes" -path = "tests/test_backtrack_bytes.rs" - -[[test]] -name = "crates-regex" -path = "tests/test_crates_regex.rs" +name = "integration" +path = "tests/lib.rs" [dependencies.aho-corasick] version = "1.0.0" optional = true +default-features = false [dependencies.memchr] -version = "2.5.0" +version = "2.6.0" optional = true +default-features = false + +[dependencies.regex-automata] +version = "0.4.4" +features = [ + "alloc", + "syntax", + "meta", + "nfa-pikevm", +] +default-features = false [dependencies.regex-syntax] -version = "0.7.1" +version = "0.8.2" default-features = false -[dev-dependencies.lazy_static] -version = "1" +[dev-dependencies.anyhow] +version = "1.0.69" -[dev-dependencies.quickcheck] -version = "1.0.3" -default-features = false +[dev-dependencies.doc-comment] +version = "0.3" -[dev-dependencies.rand] -version = "0.8.3" +[dev-dependencies.env_logger] +version = "0.9.3" features = [ - "getrandom", - "small_rng", + "atty", + "humantime", + "termcolor", ] default-features = false +[dev-dependencies.once_cell] +version = "1.17.1" + +[dev-dependencies.quickcheck] +version = "1.0.3" +default-features = false + +[dev-dependencies.regex-test] +version = "0.1.0" + [features] default = [ "std", @@ -114,21 +115,40 @@ default = [ "unicode", "regex-syntax/default", ] +logging = [ + "aho-corasick?/logging", + "memchr?/logging", + "regex-automata/logging", +] pattern = [] perf = [ "perf-cache", "perf-dfa", + "perf-onepass", + "perf-backtrack", "perf-inline", "perf-literal", ] +perf-backtrack = ["regex-automata/nfa-backtrack"] perf-cache = [] -perf-dfa = [] -perf-inline = [] +perf-dfa = ["regex-automata/hybrid"] +perf-dfa-full = [ + "regex-automata/dfa-build", + "regex-automata/dfa-search", +] +perf-inline = ["regex-automata/perf-inline"] perf-literal = [ - "aho-corasick", - "memchr", + "dep:aho-corasick", + "dep:memchr", + "regex-automata/perf-literal", +] +perf-onepass = ["regex-automata/dfa-onepass"] +std = [ + "aho-corasick?/std", + "memchr?/std", + "regex-automata/std", + "regex-syntax/std", ] -std = [] unicode = [ "unicode-age", "unicode-bool", @@ -137,14 +157,37 @@ unicode = [ "unicode-perl", "unicode-script", "unicode-segment", + "regex-automata/unicode", "regex-syntax/unicode", ] -unicode-age = ["regex-syntax/unicode-age"] -unicode-bool = ["regex-syntax/unicode-bool"] -unicode-case = ["regex-syntax/unicode-case"] -unicode-gencat = ["regex-syntax/unicode-gencat"] -unicode-perl = ["regex-syntax/unicode-perl"] -unicode-script = ["regex-syntax/unicode-script"] -unicode-segment = ["regex-syntax/unicode-segment"] +unicode-age = [ + "regex-automata/unicode-age", + "regex-syntax/unicode-age", +] +unicode-bool = [ + "regex-automata/unicode-bool", + "regex-syntax/unicode-bool", +] +unicode-case = [ + "regex-automata/unicode-case", + "regex-syntax/unicode-case", +] +unicode-gencat = [ + "regex-automata/unicode-gencat", + "regex-syntax/unicode-gencat", +] +unicode-perl = [ + "regex-automata/unicode-perl", + "regex-automata/unicode-word-boundary", + "regex-syntax/unicode-perl", +] +unicode-script = [ + "regex-automata/unicode-script", + "regex-syntax/unicode-script", +] +unicode-segment = [ + "regex-automata/unicode-segment", + "regex-syntax/unicode-segment", +] unstable = ["pattern"] use_std = ["std"] diff --git a/vendor/regex/Cross.toml b/vendor/regex/Cross.toml new file mode 100644 index 0000000..5415e7a --- /dev/null +++ b/vendor/regex/Cross.toml @@ -0,0 +1,7 @@ +[build.env] +passthrough = [ + "RUST_BACKTRACE", + "RUST_LOG", + "REGEX_TEST", + "REGEX_TEST_VERBOSE", +] diff --git a/vendor/regex/HACKING.md b/vendor/regex/HACKING.md deleted file mode 100644 index 34af5b5..0000000 --- a/vendor/regex/HACKING.md +++ /dev/null @@ -1,341 +0,0 @@ -Your friendly guide to hacking and navigating the regex library. - -This guide assumes familiarity with Rust and Cargo, and at least a perusal of -the user facing documentation for this crate. - -If you're looking for background on the implementation in this library, then -you can do no better than Russ Cox's article series on implementing regular -expressions using finite automata: https://swtch.com/~rsc/regexp/ - - -## Architecture overview - -As you probably already know, this library executes regular expressions using -finite automata. In particular, a design goal is to make searching linear -with respect to both the regular expression and the text being searched. -Meeting that design goal on its own is not so hard and can be done with an -implementation of the Pike VM (similar to Thompson's construction, but supports -capturing groups), as described in: https://swtch.com/~rsc/regexp/regexp2.html ---- This library contains such an implementation in src/pikevm.rs. - -Making it fast is harder. One of the key problems with the Pike VM is that it -can be in more than one state at any point in time, and must shuffle capture -positions between them. The Pike VM also spends a lot of time following the -same epsilon transitions over and over again. We can employ one trick to -speed up the Pike VM: extract one or more literal prefixes from the regular -expression and execute specialized code to quickly find matches of those -prefixes in the search text. The Pike VM can then be avoided for most the -search, and instead only executed when a prefix is found. The code to find -prefixes is in the regex-syntax crate (in this repository). The code to search -for literals is in src/literals.rs. When more than one literal prefix is found, -we fall back to an Aho-Corasick DFA using the aho-corasick crate. For one -literal, we use a variant of the Boyer-Moore algorithm. Both Aho-Corasick and -Boyer-Moore use `memchr` when appropriate. The Boyer-Moore variant in this -library also uses elementary frequency analysis to choose the right byte to run -`memchr` with. - -Of course, detecting prefix literals can only take us so far. Not all regular -expressions have literal prefixes. To remedy this, we try another approach -to executing the Pike VM: backtracking, whose implementation can be found in -src/backtrack.rs. One reason why backtracking can be faster is that it avoids -excessive shuffling of capture groups. Of course, backtracking is susceptible -to exponential runtimes, so we keep track of every state we've visited to make -sure we never visit it again. This guarantees linear time execution, but we -pay for it with the memory required to track visited states. Because of the -memory requirement, we only use this engine on small search strings *and* small -regular expressions. - -Lastly, the real workhorse of this library is the "lazy" DFA in src/dfa.rs. -It is distinct from the Pike VM in that the DFA is explicitly represented in -memory and is only ever in one state at a time. It is said to be "lazy" because -the DFA is computed as text is searched, where each byte in the search text -results in at most one new DFA state. It is made fast by caching states. DFAs -are susceptible to exponential state blow up (where the worst case is computing -a new state for every input byte, regardless of what's in the state cache). To -avoid using a lot of memory, the lazy DFA uses a bounded cache. Once the cache -is full, it is wiped and state computation starts over again. If the cache is -wiped too frequently, then the DFA gives up and searching falls back to one of -the aforementioned algorithms. - -All of the above matching engines expose precisely the same matching semantics. -This is indeed tested. (See the section below about testing.) - -The following sub-sections describe the rest of the library and how each of the -matching engines are actually used. - -### Parsing - -Regular expressions are parsed using the regex-syntax crate, which is -maintained in this repository. The regex-syntax crate defines an abstract -syntax and provides very detailed error messages when a parse error is -encountered. Parsing is done in a separate crate so that others may benefit -from its existence, and because it is relatively divorced from the rest of the -regex library. - -The regex-syntax crate also provides sophisticated support for extracting -prefix and suffix literals from regular expressions. - -### Compilation - -The compiler is in src/compile.rs. The input to the compiler is some abstract -syntax for a regular expression and the output is a sequence of opcodes that -matching engines use to execute a search. (One can think of matching engines as -mini virtual machines.) The sequence of opcodes is a particular encoding of a -non-deterministic finite automaton. In particular, the opcodes explicitly rely -on epsilon transitions. - -Consider a simple regular expression like `a|b`. Its compiled form looks like -this: - - 000 Save(0) - 001 Split(2, 3) - 002 'a' (goto: 4) - 003 'b' - 004 Save(1) - 005 Match - -The first column is the instruction pointer and the second column is the -instruction. Save instructions indicate that the current position in the input -should be stored in a captured location. Split instructions represent a binary -branch in the program (i.e., epsilon transitions). The instructions `'a'` and -`'b'` indicate that the literal bytes `'a'` or `'b'` should match. - -In older versions of this library, the compilation looked like this: - - 000 Save(0) - 001 Split(2, 3) - 002 'a' - 003 Jump(5) - 004 'b' - 005 Save(1) - 006 Match - -In particular, empty instructions that merely served to move execution from one -point in the program to another were removed. Instead, every instruction has a -`goto` pointer embedded into it. This resulted in a small performance boost for -the Pike VM, because it was one fewer epsilon transition that it had to follow. - -There exist more instructions and they are defined and documented in -src/prog.rs. - -Compilation has several knobs and a few unfortunately complicated invariants. -Namely, the output of compilation can be one of two types of programs: a -program that executes on Unicode scalar values or a program that executes -on raw bytes. In the former case, the matching engine is responsible for -performing UTF-8 decoding and executing instructions using Unicode codepoints. -In the latter case, the program handles UTF-8 decoding implicitly, so that the -matching engine can execute on raw bytes. All matching engines can execute -either Unicode or byte based programs except for the lazy DFA, which requires -byte based programs. In general, both representations were kept because (1) the -lazy DFA requires byte based programs so that states can be encoded in a memory -efficient manner and (2) the Pike VM benefits greatly from inlining Unicode -character classes into fewer instructions as it results in fewer epsilon -transitions. - -N.B. UTF-8 decoding is built into the compiled program by making use of the -utf8-ranges crate. The compiler in this library factors out common suffixes to -reduce the size of huge character classes (e.g., `\pL`). - -A regrettable consequence of this split in instruction sets is we generally -need to compile two programs; one for NFA execution and one for the lazy DFA. - -In fact, it is worse than that: the lazy DFA is not capable of finding the -starting location of a match in a single scan, and must instead execute a -backwards search after finding the end location. To execute a backwards search, -we must have compiled the regular expression *in reverse*. - -This means that every compilation of a regular expression generally results in -three distinct programs. It would be possible to lazily compile the Unicode -program, since it is never needed if (1) the regular expression uses no word -boundary assertions and (2) the caller never asks for sub-capture locations. - -### Execution - -At the time of writing, there are four matching engines in this library: - -1. The Pike VM (supports captures). -2. Bounded backtracking (supports captures). -3. Literal substring or multi-substring search. -4. Lazy DFA (no support for Unicode word boundary assertions). - -Only the first two matching engines are capable of executing every regular -expression program. They also happen to be the slowest, which means we need -some logic that (1) knows various facts about the regular expression and (2) -knows what the caller wants. Using this information, we can determine which -engine (or engines) to use. - -The logic for choosing which engine to execute is in src/exec.rs and is -documented on the Exec type. Exec values contain regular expression Programs -(defined in src/prog.rs), which contain all the necessary tidbits for actually -executing a regular expression on search text. - -For the most part, the execution logic is straight-forward and follows the -limitations of each engine described above pretty faithfully. The hairiest -part of src/exec.rs by far is the execution of the lazy DFA, since it requires -a forwards and backwards search, and then falls back to either the Pike VM or -backtracking if the caller requested capture locations. - -The Exec type also contains mutable scratch space for each type of matching -engine. This scratch space is used during search (for example, for the lazy -DFA, it contains compiled states that are reused on subsequent searches). - -### Programs - -A regular expression program is essentially a sequence of opcodes produced by -the compiler plus various facts about the regular expression (such as whether -it is anchored, its capture names, etc.). - -### The regex! macro - -The `regex!` macro no longer exists. It was developed in a bygone era as a -compiler plugin during the infancy of the regex crate. Back then, then only -matching engine in the crate was the Pike VM. The `regex!` macro was, itself, -also a Pike VM. The only advantages it offered over the dynamic Pike VM that -was built at runtime were the following: - - 1. Syntax checking was done at compile time. Your Rust program wouldn't - compile if your regex didn't compile. - 2. Reduction of overhead that was proportional to the size of the regex. - For the most part, this overhead consisted of heap allocation, which - was nearly eliminated in the compiler plugin. - -The main takeaway here is that the compiler plugin was a marginally faster -version of a slow regex engine. As the regex crate evolved, it grew other regex -engines (DFA, bounded backtracker) and sophisticated literal optimizations. -The regex macro didn't keep pace, and it therefore became (dramatically) slower -than the dynamic engines. The only reason left to use it was for the compile -time guarantee that your regex is correct. Fortunately, Clippy (the Rust lint -tool) has a lint that checks your regular expression validity, which mostly -replaces that use case. - -Additionally, the regex compiler plugin stopped receiving maintenance. Nobody -complained. At that point, it seemed prudent to just remove it. - -Will a compiler plugin be brought back? The future is murky, but there is -definitely an opportunity there to build something that is faster than the -dynamic engines in some cases. But it will be challenging! As of now, there -are no plans to work on this. - - -## Testing - -A key aspect of any mature regex library is its test suite. A subset of the -tests in this library come from Glenn Fowler's AT&T test suite (its online -presence seems gone at the time of writing). The source of the test suite is -located in src/testdata. The scripts/regex-match-tests.py takes the test suite -in src/testdata and generates tests/matches.rs. - -There are also many other manually crafted tests and regression tests in -tests/tests.rs. Some of these tests were taken from RE2. - -The biggest source of complexity in the tests is related to answering this -question: how can we reuse the tests to check all of our matching engines? One -approach would have been to encode every test into some kind of format (like -the AT&T test suite) and code generate tests for each matching engine. The -approach we use in this library is to create a Cargo.toml entry point for each -matching engine we want to test. The entry points are: - -* `tests/test_default.rs` - tests `Regex::new` -* `tests/test_default_bytes.rs` - tests `bytes::Regex::new` -* `tests/test_nfa.rs` - tests `Regex::new`, forced to use the NFA - algorithm on every regex. -* `tests/test_nfa_bytes.rs` - tests `Regex::new`, forced to use the NFA - algorithm on every regex and use *arbitrary* byte based programs. -* `tests/test_nfa_utf8bytes.rs` - tests `Regex::new`, forced to use the NFA - algorithm on every regex and use *UTF-8* byte based programs. -* `tests/test_backtrack.rs` - tests `Regex::new`, forced to use - backtracking on every regex. -* `tests/test_backtrack_bytes.rs` - tests `Regex::new`, forced to use - backtracking on every regex and use *arbitrary* byte based programs. -* `tests/test_backtrack_utf8bytes.rs` - tests `Regex::new`, forced to use - backtracking on every regex and use *UTF-8* byte based programs. -* `tests/test_crates_regex.rs` - tests to make sure that all of the - backends behave in the same way against a number of quickcheck - generated random inputs. These tests need to be enabled through - the `RUST_REGEX_RANDOM_TEST` environment variable (see - below). - -The lazy DFA and pure literal engines are absent from this list because -they cannot be used on every regular expression. Instead, we rely on -`tests/test_dynamic.rs` to test the lazy DFA and literal engines when possible. - -Since the tests are repeated several times, and because `cargo test` runs all -entry points, it can take a while to compile everything. To reduce compile -times slightly, try using `cargo test --test default`, which will only use the -`tests/test_default.rs` entry point. - -The random testing takes quite a while, so it is not enabled by default. -In order to run the random testing you can set the -`RUST_REGEX_RANDOM_TEST` environment variable to anything before -invoking `cargo test`. Note that this variable is inspected at compile -time, so if the tests don't seem to be running, you may need to run -`cargo clean`. - -## Benchmarking - -The benchmarking in this crate is made up of many micro-benchmarks. Currently, -there are two primary sets of benchmarks: the benchmarks that were adopted -at this library's inception (in `bench/src/misc.rs`) and a newer set of -benchmarks meant to test various optimizations. Specifically, the latter set -contain some analysis and are in `bench/src/sherlock.rs`. Also, the latter -set are all executed on the same lengthy input whereas the former benchmarks -are executed on strings of varying length. - -There is also a smattering of benchmarks for parsing and compilation. - -Benchmarks are in a separate crate so that its dependencies can be managed -separately from the main regex crate. - -Benchmarking follows a similarly wonky setup as tests. There are multiple entry -points: - -* `bench_rust.rs` - benchmarks `Regex::new` -* `bench_rust_bytes.rs` benchmarks `bytes::Regex::new` -* `bench_pcre.rs` - benchmarks PCRE -* `bench_onig.rs` - benchmarks Oniguruma - -The PCRE and Oniguruma benchmarks exist as a comparison point to a mature -regular expression library. In general, this regex library compares favorably -(there are even a few benchmarks that PCRE simply runs too slowly on or -outright can't execute at all). I would love to add other regular expression -library benchmarks (especially RE2). - -If you're hacking on one of the matching engines and just want to see -benchmarks, then all you need to run is: - - $ (cd bench && ./run rust) - -If you want to compare your results with older benchmarks, then try: - - $ (cd bench && ./run rust | tee old) - $ ... make it faster - $ (cd bench && ./run rust | tee new) - $ cargo benchcmp old new --improvements - -The `cargo-benchcmp` utility is available here: -https://github.com/BurntSushi/cargo-benchcmp - -The `./bench/run` utility can run benchmarks for PCRE and Oniguruma too. See -`./bench/bench --help`. - -## Dev Docs - -When digging your teeth into the codebase for the first time, the -crate documentation can be a great resource. By default `rustdoc` -will strip out all documentation of private crate members in an -effort to help consumers of the crate focus on the *interface* -without having to concern themselves with the *implementation*. -Normally this is a great thing, but if you want to start hacking -on regex internals it is not what you want. Many of the private members -of this crate are well documented with rustdoc style comments, and -it would be a shame to miss out on the opportunity that presents. -You can generate the private docs with: - -``` -$ rustdoc --crate-name docs src/lib.rs -o target/doc -L target/debug/deps --no-defaults --passes collapse-docs --passes unindent-comments -``` - -Then just point your browser at `target/doc/regex/index.html`. - -See https://github.com/rust-lang/rust/issues/15347 for more info -about generating developer docs for internal use. diff --git a/vendor/regex/PERFORMANCE.md b/vendor/regex/PERFORMANCE.md deleted file mode 100644 index 8cd0d9c..0000000 --- a/vendor/regex/PERFORMANCE.md +++ /dev/null @@ -1,277 +0,0 @@ -Your friendly guide to understanding the performance characteristics of this -crate. - -This guide assumes some familiarity with the public API of this crate, which -can be found here: https://docs.rs/regex - -## Theory vs. Practice - -One of the design goals of this crate is to provide worst case linear time -behavior with respect to the text searched using finite state automata. This -means that, *in theory*, the performance of this crate is much better than most -regex implementations, which typically use backtracking which has worst case -exponential time. - -For example, try opening a Python interpreter and typing this: - - >>> import re - >>> re.search('(a*)*c', 'a' * 30).span() - -I'll wait. - -At some point, you'll figure out that it won't terminate any time soon. ^C it. - -The promise of this crate is that *this pathological behavior can't happen*. - -With that said, just because we have protected ourselves against worst case -exponential behavior doesn't mean we are immune from large constant factors -or places where the current regex engine isn't quite optimal. This guide will -detail those cases and provide guidance on how to avoid them, among other -bits of general advice. - -## Thou Shalt Not Compile Regular Expressions In A Loop - -**Advice**: Use `lazy_static` to amortize the cost of `Regex` compilation. - -Don't do it unless you really don't mind paying for it. Compiling a regular -expression in this crate is quite expensive. It is conceivable that it may get -faster some day, but I wouldn't hold out hope for, say, an order of magnitude -improvement. In particular, compilation can take any where from a few dozen -microseconds to a few dozen milliseconds. Yes, milliseconds. Unicode character -classes, in particular, have the largest impact on compilation performance. At -the time of writing, for example, `\pL{100}` takes around 44ms to compile. This -is because `\pL` corresponds to every letter in Unicode and compilation must -turn it into a proper automaton that decodes a subset of UTF-8 which -corresponds to those letters. Compilation also spends some cycles shrinking the -size of the automaton. - -This means that in order to realize efficient regex matching, one must -*amortize the cost of compilation*. Trivially, if a call to `is_match` is -inside a loop, then make sure your call to `Regex::new` is *outside* that loop. - -In many programming languages, regular expressions can be conveniently defined -and compiled in a global scope, and code can reach out and use them as if -they were global static variables. In Rust, there is really no concept of -life-before-main, and therefore, one cannot utter this: - - static MY_REGEX: Regex = Regex::new("...").unwrap(); - -Unfortunately, this would seem to imply that one must pass `Regex` objects -around to everywhere they are used, which can be especially painful depending -on how your program is structured. Thankfully, the -[`lazy_static`](https://crates.io/crates/lazy_static) -crate provides an answer that works well: - - use lazy_static::lazy_static; - use regex::Regex; - - fn some_helper_function(text: &str) -> bool { - lazy_static! { - static ref MY_REGEX: Regex = Regex::new("...").unwrap(); - } - MY_REGEX.is_match(text) - } - -In other words, the `lazy_static!` macro enables us to define a `Regex` *as if* -it were a global static value. What is actually happening under the covers is -that the code inside the macro (i.e., `Regex::new(...)`) is run on *first use* -of `MY_REGEX` via a `Deref` impl. The implementation is admittedly magical, but -it's self contained and everything works exactly as you expect. In particular, -`MY_REGEX` can be used from multiple threads without wrapping it in an `Arc` or -a `Mutex`. On that note... - -## Using a regex from multiple threads - -**Advice**: The performance impact from using a `Regex` from multiple threads -is likely negligible. If necessary, clone the `Regex` so that each thread gets -its own copy. Cloning a regex does not incur any additional memory overhead -than what would be used by using a `Regex` from multiple threads -simultaneously. *Its only cost is ergonomics.* - -It is supported and encouraged to define your regexes using `lazy_static!` as -if they were global static values, and then use them to search text from -multiple threads simultaneously. - -One might imagine that this is possible because a `Regex` represents a -*compiled* program, so that any allocation or mutation is already done, and is -therefore read-only. Unfortunately, this is not true. Each type of search -strategy in this crate requires some kind of mutable scratch space to use -*during search*. For example, when executing a DFA, its states are computed -lazily and reused on subsequent searches. Those states go into that mutable -scratch space. - -The mutable scratch space is an implementation detail, and in general, its -mutation should not be observable from users of this crate. Therefore, it uses -interior mutability. This implies that `Regex` can either only be used from one -thread, or it must do some sort of synchronization. Either choice is -reasonable, but this crate chooses the latter, in particular because it is -ergonomic and makes use with `lazy_static!` straight forward. - -Synchronization implies *some* amount of overhead. When a `Regex` is used from -a single thread, this overhead is negligible. When a `Regex` is used from -multiple threads simultaneously, it is possible for the overhead of -synchronization from contention to impact performance. The specific cases where -contention may happen is if you are calling any of these methods repeatedly -from multiple threads simultaneously: - -* shortest_match -* is_match -* find -* captures - -In particular, every invocation of one of these methods must synchronize with -other threads to retrieve its mutable scratch space before searching can start. -If, however, you are using one of these methods: - -* find_iter -* captures_iter - -Then you may not suffer from contention since the cost of synchronization is -amortized on *construction of the iterator*. That is, the mutable scratch space -is obtained when the iterator is created and retained throughout its lifetime. - -## Only ask for what you need - -**Advice**: Prefer in this order: `is_match`, `find`, `captures`. - -There are three primary search methods on a `Regex`: - -* is_match -* find -* captures - -In general, these are ordered from fastest to slowest. - -`is_match` is fastest because it doesn't actually need to find the start or the -end of the leftmost-first match. It can quit immediately after it knows there -is a match. For example, given the regex `a+` and the haystack, `aaaaa`, the -search will quit after examining the first byte. - -In contrast, `find` must return both the start and end location of the -leftmost-first match. It can use the DFA matcher for this, but must run it -forwards once to find the end of the match *and then run it backwards* to find -the start of the match. The two scans and the cost of finding the real end of -the leftmost-first match make this more expensive than `is_match`. - -`captures` is the most expensive of them all because it must do what `find` -does, and then run either the bounded backtracker or the Pike VM to fill in the -capture group locations. Both of these are simulations of an NFA, which must -spend a lot of time shuffling states around. The DFA limits the performance hit -somewhat by restricting the amount of text that must be searched via an NFA -simulation. - -One other method not mentioned is `shortest_match`. This method has precisely -the same performance characteristics as `is_match`, except it will return the -end location of when it discovered a match. For example, given the regex `a+` -and the haystack `aaaaa`, `shortest_match` may return `1` as opposed to `5`, -the latter of which being the correct end location of the leftmost-first match. - -## Literals in your regex may make it faster - -**Advice**: Literals can reduce the work that the regex engine needs to do. Use -them if you can, especially as prefixes. - -In particular, if your regex starts with a prefix literal, the prefix is -quickly searched before entering the (much slower) regex engine. For example, -given the regex `foo\w+`, the literal `foo` will be searched for using -Boyer-Moore. If there's no match, then no regex engine is ever used. Only when -there's a match is the regex engine invoked at the location of the match, which -effectively permits the regex engine to skip large portions of a haystack. -If a regex is comprised entirely of literals (possibly more than one), then -it's possible that the regex engine can be avoided entirely even when there's a -match. - -When one literal is found, Boyer-Moore is used. When multiple literals are -found, then an optimized version of Aho-Corasick is used. - -This optimization is in particular extended quite a bit in this crate. Here are -a few examples of regexes that get literal prefixes detected: - -* `(foo|bar)` detects `foo` and `bar` -* `(a|b)c` detects `ac` and `bc` -* `[ab]foo[yz]` detects `afooy`, `afooz`, `bfooy` and `bfooz` -* `a?b` detects `a` and `b` -* `a*b` detects `a` and `b` -* `(ab){3,6}` detects `ababab` - -Literals in anchored regexes can also be used for detecting non-matches very -quickly. For example, `^foo\w+` and `\w+foo$` may be able to detect a non-match -just by examining the first (or last) three bytes of the haystack. - -## Unicode word boundaries may prevent the DFA from being used - -**Advice**: In most cases, `\b` should work well. If not, use `(?-u:\b)` -instead of `\b` if you care about consistent performance more than correctness. - -It's a sad state of the current implementation. At the moment, the DFA will try -to interpret Unicode word boundaries as if they were ASCII word boundaries. -If the DFA comes across any non-ASCII byte, it will quit and fall back to an -alternative matching engine that can handle Unicode word boundaries correctly. -The alternate matching engine is generally quite a bit slower (perhaps by an -order of magnitude). If necessary, this can be ameliorated in two ways. - -The first way is to add some number of literal prefixes to your regular -expression. Even though the DFA may not be used, specialized routines will -still kick in to find prefix literals quickly, which limits how much work the -NFA simulation will need to do. - -The second way is to give up on Unicode and use an ASCII word boundary instead. -One can use an ASCII word boundary by disabling Unicode support. That is, -instead of using `\b`, use `(?-u:\b)`. Namely, given the regex `\b.+\b`, it -can be transformed into a regex that uses the DFA with `(?-u:\b).+(?-u:\b)`. It -is important to limit the scope of disabling the `u` flag, since it might lead -to a syntax error if the regex could match arbitrary bytes. For example, if one -wrote `(?-u)\b.+\b`, then a syntax error would be returned because `.` matches -any *byte* when the Unicode flag is disabled. - -The second way isn't appreciably different than just using a Unicode word -boundary in the first place, since the DFA will speculatively interpret it as -an ASCII word boundary anyway. The key difference is that if an ASCII word -boundary is used explicitly, then the DFA won't quit in the presence of -non-ASCII UTF-8 bytes. This results in giving up correctness in exchange for -more consistent performance. - -N.B. When using `bytes::Regex`, Unicode support is disabled by default, so one -can simply write `\b` to get an ASCII word boundary. - -## Excessive counting can lead to exponential state blow up in the DFA - -**Advice**: Don't write regexes that cause DFA state blow up if you care about -match performance. - -Wait, didn't I say that this crate guards against exponential worst cases? -Well, it turns out that the process of converting an NFA to a DFA can lead to -an exponential blow up in the number of states. This crate specifically guards -against exponential blow up by doing two things: - -1. The DFA is computed lazily. That is, a state in the DFA only exists in - memory if it is visited. In particular, the lazy DFA guarantees that *at - most* one state is created for every byte of input. This, on its own, - guarantees linear time complexity. -2. Of course, creating a new state for *every* byte of input means that search - will go incredibly slow because of very large constant factors. On top of - that, creating a state for every byte in a large haystack could result in - exorbitant memory usage. To ameliorate this, the DFA bounds the number of - states it can store. Once it reaches its limit, it flushes its cache. This - prevents reuse of states that it already computed. If the cache is flushed - too frequently, then the DFA will give up and execution will fall back to - one of the NFA simulations. - -In effect, this crate will detect exponential state blow up and fall back to -a search routine with fixed memory requirements. This does, however, mean that -searching will be much slower than one might expect. Regexes that rely on -counting in particular are strong aggravators of this behavior. For example, -matching `[01]*1[01]{20}$` against a random sequence of `0`s and `1`s. - -In the future, it may be possible to increase the bound that the DFA uses, -which would allow the caller to choose how much memory they're willing to -spend. - -## Resist the temptation to "optimize" regexes - -**Advice**: This ain't a backtracking engine. - -An entire book was written on how to optimize Perl-style regular expressions. -Most of those techniques are not applicable for this library. For example, -there is no problem with using non-greedy matching or having lots of -alternations in your regex. diff --git a/vendor/regex/README.md b/vendor/regex/README.md index 020b353..f1e4c40 100644 --- a/vendor/regex/README.md +++ b/vendor/regex/README.md @@ -1,15 +1,17 @@ regex ===== -A Rust library for parsing, compiling, and executing regular expressions. Its -syntax is similar to Perl-style regular expressions, but lacks a few features -like look around and backreferences. In exchange, all searches execute in -linear time with respect to the size of the regular expression and search text. -Much of the syntax and implementation is inspired -by [RE2](https://github.com/google/re2). +This crate provides routines for searching strings for matches of a [regular +expression] (aka "regex"). The regex syntax supported by this crate is similar +to other regex engines, but it lacks several features that are not known how to +implement efficiently. This includes, but is not limited to, look-around and +backreferences. In exchange, all regex searches in this crate have worst case +`O(m * n)` time complexity, where `m` is proportional to the size of the regex +and `n` is proportional to the size of the string being searched. + +[regular expression]: https://en.wikipedia.org/wiki/Regular_expression [![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions) [![Crates.io](https://img.shields.io/crates/v/regex.svg)](https://crates.io/crates/regex) -[![Rust](https://img.shields.io/badge/rust-1.60.0%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex) ### Documentation @@ -40,8 +42,8 @@ fn main() { - (?P\d{2}) # the day ").unwrap(); - let caps = re.captures("2010-03-14").unwrap(); + let caps = re.captures("2010-03-14").unwrap(); assert_eq!("2010", &caps["year"]); assert_eq!("03", &caps["month"]); assert_eq!("14", &caps["day"]); @@ -54,32 +56,21 @@ easy to adapt the above example with an iterator: ```rust use regex::Regex; -const TO_SEARCH: &'static str = " -On 2010-03-14, foo happened. On 2014-10-14, bar happened. -"; - fn main() { let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap(); + let hay = "On 2010-03-14, foo happened. On 2014-10-14, bar happened."; - for caps in re.captures_iter(TO_SEARCH) { - // Note that all of the unwraps are actually OK for this regex - // because the only way for the regex to match is if all of the - // capture groups match. This is not true in general though! - println!("year: {}, month: {}, day: {}", - caps.get(1).unwrap().as_str(), - caps.get(2).unwrap().as_str(), - caps.get(3).unwrap().as_str()); + let mut dates = vec![]; + for (_, [year, month, day]) in re.captures_iter(hay).map(|c| c.extract()) { + dates.push((year, month, day)); } + assert_eq!(dates, vec![ + ("2010", "03", "14"), + ("2014", "10", "14"), + ]); } ``` -This example outputs: - -```text -year: 2010, month: 03, day: 14 -year: 2014, month: 10, day: 14 -``` - ### Usage: Avoid compiling the same regex in a loop It is an anti-pattern to compile the same regular expression in a loop since @@ -90,19 +81,23 @@ allocations internally to the matching engines. In Rust, it can sometimes be a pain to pass regular expressions around if they're used from inside a helper function. Instead, we recommend using the -[`lazy_static`](https://crates.io/crates/lazy_static) crate to ensure that -regular expressions are compiled exactly once. - -For example: +[`once_cell`](https://crates.io/crates/once_cell) crate to ensure that +regular expressions are compiled exactly once. For example: -```rust,ignore -use regex::Regex; +```rust +use { + once_cell::sync::Lazy, + regex::Regex, +}; + +fn some_helper_function(haystack: &str) -> bool { + static RE: Lazy = Lazy::new(|| Regex::new(r"...").unwrap()); + RE.is_match(haystack) +} -fn some_helper_function(text: &str) -> bool { - lazy_static! { - static ref RE: Regex = Regex::new("...").unwrap(); - } - RE.is_match(text) +fn main() { + assert!(some_helper_function("abc")); + assert!(!some_helper_function("ac")); } ``` @@ -115,19 +110,21 @@ The main API of this crate (`regex::Regex`) requires the caller to pass a `&str` for searching. In Rust, an `&str` is required to be valid UTF-8, which means the main API can't be used for searching arbitrary bytes. -To match on arbitrary bytes, use the `regex::bytes::Regex` API. The API -is identical to the main API, except that it takes an `&[u8]` to search -on instead of an `&str`. By default, `.` will match any *byte* using -`regex::bytes::Regex`, while `.` will match any *UTF-8 encoded Unicode scalar -value* using the main API. +To match on arbitrary bytes, use the `regex::bytes::Regex` API. The API is +identical to the main API, except that it takes an `&[u8]` to search on instead +of an `&str`. The `&[u8]` APIs also permit disabling Unicode mode in the regex +even when the pattern would match invalid UTF-8. For example, `(?-u:.)` is +not allowed in `regex::Regex` but is allowed in `regex::bytes::Regex` since +`(?-u:.)` matches any byte except for `\n`. Conversely, `.` will match the +UTF-8 encoding of any Unicode scalar value except for `\n`. This example shows how to find all null-terminated strings in a slice of bytes: ```rust use regex::bytes::Regex; -let re = Regex::new(r"(?P[^\x00]+)\x00").unwrap(); -let text = b"foo\x00bar\x00baz\x00"; +let re = Regex::new(r"(?-u)(?[^\x00]+)\x00").unwrap(); +let text = b"foo\xFFbar\x00baz\x00"; // Extract all of the strings without the null terminator from each match. // The unwrap is OK here since a match requires the `cstr` capture to match. @@ -135,12 +132,12 @@ let cstrs: Vec<&[u8]> = re.captures_iter(text) .map(|c| c.name("cstr").unwrap().as_bytes()) .collect(); -assert_eq!(vec![&b"foo"[..], &b"bar"[..], &b"baz"[..]], cstrs); +assert_eq!(vec![&b"foo\xFFbar"[..], &b"baz"[..]], cstrs); ``` -Notice here that the `[^\x00]+` will match any *byte* except for `NUL`. When -using the main API, `[^\x00]+` would instead match any valid UTF-8 sequence -except for `NUL`. +Notice here that the `[^\x00]+` will match any *byte* except for `NUL`, +including bytes like `\xFF` which are not valid UTF-8. When using the main API, +`[^\x00]+` would instead match any valid UTF-8 sequence except for `NUL`. ### Usage: match multiple regular expressions simultaneously @@ -170,11 +167,15 @@ assert!(!matches.matched(5)); assert!(matches.matched(6)); ``` -### Usage: enable SIMD optimizations -SIMD optimizations are enabled automatically on Rust stable 1.27 and newer. -For nightly versions of Rust, this requires a recent version with the SIMD -features stabilized. +### Usage: regex internals as a library + +The [`regex-automata` directory](./regex-automata/) contains a crate that +exposes all of the internal matching engines used by the `regex` crate. The +idea is that the `regex` crate exposes a simple API for 99% of use cases, but +`regex-automata` exposes oodles of customizable behaviors. + +[Documentation for `regex-automata`.](https://docs.rs/regex-automata) ### Usage: a regular expression parser @@ -186,7 +187,7 @@ This may be useful if you're implementing your own regex engine or otherwise need to do analysis on the syntax of a regular expression. It is otherwise not recommended for general use. -[Documentation `regex-syntax`.](https://docs.rs/regex-syntax) +[Documentation for `regex-syntax`.](https://docs.rs/regex-syntax) ### Crate features @@ -205,29 +206,114 @@ all such features, use the following `Cargo.toml` dependency configuration: [dependencies.regex] version = "1.3" default-features = false -# regex currently requires the standard library, you must re-enable it. +# Unless you have a specific reason not to, it's good sense to enable standard +# library support. It enables several optimizations and avoids spin locks. It +# also shouldn't meaningfully impact compile times or binary size. features = ["std"] ``` -This will reduce the dependency tree of `regex` down to a single crate -(`regex-syntax`). +This will reduce the dependency tree of `regex` down to two crates: +`regex-syntax` and `regex-automata`. The full set of features one can disable are -[in the "Crate features" section of the documentation](https://docs.rs/regex/*/#crate-features). +[in the "Crate features" section of the documentation](https://docs.rs/regex/1.*/#crate-features). + + +### Performance + +One of the goals of this crate is for the regex engine to be "fast." What that +is a somewhat nebulous goal, it is usually interpreted in one of two ways. +First, it means that all searches take worst case `O(m * n)` time, where +`m` is proportional to `len(regex)` and `n` is proportional to `len(haystack)`. +Second, it means that even aside from the time complexity constraint, regex +searches are "fast" in practice. + +While the first interpretation is pretty unambiguous, the second one remains +nebulous. While nebulous, it guides this crate's architecture and the sorts of +the trade offs it makes. For example, here are some general architectural +statements that follow as a result of the goal to be "fast": + +* When given the choice between faster regex searches and faster _Rust compile +times_, this crate will generally choose faster regex searches. +* When given the choice between faster regex searches and faster _regex compile +times_, this crate will generally choose faster regex searches. That is, it is +generally acceptable for `Regex::new` to get a little slower if it means that +searches get faster. (This is a somewhat delicate balance to strike, because +the speed of `Regex::new` needs to remain somewhat reasonable. But this is why +one should avoid re-compiling the same regex over and over again.) +* When given the choice between faster regex searches and simpler API +design, this crate will generally choose faster regex searches. For example, +if one didn't care about performance, we could like get rid of both of +the `Regex::is_match` and `Regex::find` APIs and instead just rely on +`Regex::captures`. + +There are perhaps more ways that being "fast" influences things. + +While this repository used to provide its own benchmark suite, it has since +been moved to [rebar](https://github.com/BurntSushi/rebar). The benchmarks are +quite extensive, and there are many more than what is shown in rebar's README +(which is just limited to a "curated" set meant to compare performance between +regex engines). To run all of this crate's benchmarks, first start by cloning +and installing `rebar`: +```text +$ git clone https://github.com/BurntSushi/rebar +$ cd rebar +$ cargo install --path ./ +``` -### Minimum Rust version policy +Then build the benchmark harness for just this crate: -This crate's minimum supported `rustc` version is `1.60.0`. +```text +$ rebar build -e '^rust/regex$' +``` + +Run all benchmarks for this crate as tests (each benchmark is executed once to +ensure it works): + +```text +$ rebar measure -e '^rust/regex$' -t +``` + +Record measurements for all benchmarks and save them to a CSV file: + +```text +$ rebar measure -e '^rust/regex$' | tee results.csv +``` + +Explore benchmark timings: + +```text +$ rebar cmp results.csv +``` + +See the `rebar` documentation for more details on how it works and how to +compare results with other regex engines. + + +### Hacking + +The `regex` crate is, for the most part, a pretty thin wrapper around the +[`meta::Regex`](https://docs.rs/regex-automata/latest/regex_automata/meta/struct.Regex.html) +from the +[`regex-automata` crate](https://docs.rs/regex-automata/latest/regex_automata/). +Therefore, if you're looking to work on the internals of this crate, you'll +likely either want to look in `regex-syntax` (for parsing) or `regex-automata` +(for construction of finite automata and the search routines). + +My [blog on regex internals](https://blog.burntsushi.net/regex-internals/) +goes into more depth. + + +### Minimum Rust version policy -The current **tentative** policy is that the minimum Rust version required -to use this crate can be increased in minor version updates. For example, if -regex 1.0 requires Rust 1.20.0, then regex 1.0.z for all values of `z` will -also require Rust 1.20.0 or newer. However, regex 1.y for `y > 0` may require a -newer minimum version of Rust. +This crate's minimum supported `rustc` version is `1.65.0`. -In general, this crate will be conservative with respect to the minimum -supported version of Rust. +The policy is that the minimum Rust version required to use this crate can be +increased in minor version updates. For example, if regex 1.0 requires Rust +1.20.0, then regex 1.0.z for all values of `z` will also require Rust 1.20.0 or +newer. However, regex 1.y for `y > 0` may require a newer minimum version of +Rust. ### License diff --git a/vendor/regex/UNICODE.md b/vendor/regex/UNICODE.md index df7d21e..60db0aa 100644 --- a/vendor/regex/UNICODE.md +++ b/vendor/regex/UNICODE.md @@ -8,7 +8,8 @@ Full support for Level 1 ("Basic Unicode Support") is provided with two exceptions: 1. Line boundaries are not Unicode aware. Namely, only the `\n` - (`END OF LINE`) character is recognized as a line boundary. + (`END OF LINE`) character is recognized as a line boundary by default. + One can opt into `\r\n|\r|\n` being a line boundary via CRLF mode. 2. The compatibility properties specified by [RL1.2a](https://unicode.org/reports/tr18/#RL1.2a) are ASCII-only definitions. @@ -229,12 +230,10 @@ then all characters classes are case folded as well. [UTS#18 RL1.6](https://unicode.org/reports/tr18/#Line_Boundaries) The regex crate only provides support for recognizing the `\n` (`END OF LINE`) -character as a line boundary. This choice was made mostly for implementation -convenience, and to avoid performance cliffs that Unicode word boundaries are -subject to. - -Ideally, it would be nice to at least support `\r\n` as a line boundary as -well, and in theory, this could be done efficiently. +character as a line boundary by default. One can also opt into treating +`\r\n|\r|\n` as a line boundary via CRLF mode. This choice was made mostly for +implementation convenience, and to avoid performance cliffs that Unicode word +boundaries are subject to. ## RL1.7 Code Points diff --git a/vendor/regex/bench/README.md b/vendor/regex/bench/README.md new file mode 100644 index 0000000..3cc6a1a --- /dev/null +++ b/vendor/regex/bench/README.md @@ -0,0 +1,2 @@ +Benchmarks for this crate have been moved into the rebar project: +https://github.com/BurntSushi/rebar diff --git a/vendor/regex/examples/regexdna-input.txt b/vendor/regex/examples/regexdna-input.txt deleted file mode 100644 index fb23263..0000000 --- a/vendor/regex/examples/regexdna-input.txt +++ /dev/null @@ -1,1671 +0,0 @@ ->ONE Homo sapiens alu -GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA -TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT -AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG -GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG -CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT -GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA -GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA -TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG -AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA -GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT -AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC -AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG -GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC -CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG -AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT -TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA -TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT -GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG -TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT -CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG -CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG -TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA -CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG -AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG -GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC -TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA -TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA -GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT -GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC -ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT -TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC -CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG -CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG -GGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCC -CAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCT -GGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGC -GCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGA -GGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGA -GACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGA -GGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTG -AAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT -CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCA -GTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAA -AAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGC -GGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCT -ACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGG -GAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATC -GCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGC -GGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGG -TCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAA -AAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAG -GAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACT -CCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCC -TGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAG -ACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGC -GTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGA -ACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGA -CAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCA -CTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCA -ACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCG -CCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGG -AGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTC -CGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCG -AGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACC -CCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAG -CTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAG -CCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGG -CCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATC -ACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAA -AAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGC -TGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCC -ACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGG -CTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGG -AGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATT -AGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAA -TCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGC -CTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAA -TCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAG -CCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGT -GGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCG -GGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAG -CGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTG -GGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATG -GTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGT -AATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTT -GCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCT -CAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCG -GGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTC -TCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACT -CGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAG -ATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGG -CGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTG -AGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATA -CAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGG -CAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGC -ACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCAC -GCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTC -GAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCG -GGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCT -TGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGG -CGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCA -GCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGG -CCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGC -GCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGG -CGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGA -CTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGG -CCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAA -ACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCC -CAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGT -GAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAA -AGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGG -ATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTAC -TAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGA -GGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGC -GCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGG -TGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTC -AGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAA -ATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGA -GAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC -AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTG -TAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGAC -CAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGT -GGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAAC -CCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACA -GAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACT -TTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAAC -ATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCC -TGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAG -GTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCG -TCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAG -GCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCC -GTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCT -ACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCC -GAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCC -GGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCAC -CTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAA -ATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTG -AGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCAC -TGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCT -CACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAG -TTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAG -CCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATC -GCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCT -GGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATC -CCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCC -TGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGG -CGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG -AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCG -AGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGG -AGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGT -GAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAA -TCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGC -AGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCA -AAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGG -CGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTC -TACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCG -GGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGAT -CGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCG -CGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAG -GTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACA -AAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCA -GGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCAC -TCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGC -CTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGA -GACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGG -CGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTG -AACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCG -ACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGC -ACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCC -AACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGC -GCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCG -GAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACT -CCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCC -GAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAAC -CCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA -GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGA -GCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAG -GCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGAT -CACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTA -AAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGG -CTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGC -CACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTG -GCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAG -GAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAAT -TAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGA -ATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAG -CCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTA -ATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCA -GCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGG -TGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCC -GGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGA -GCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTT -GGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACAT -GGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTG -TAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGT -TGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTC -TCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGC -GGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGT -CTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTAC -TCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGA -GATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGG -GCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCT -GAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT -ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAG -GCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTG -CACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCA -CGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTT -CGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCC -GGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGC -TTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGG -GCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCC -AGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTG -GCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCG -CGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAG -GCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAG -ACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAG -GCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGA -AACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATC -CCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAG -TGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAA -AAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCG -GATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTA -CTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGG -AGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCG -CGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCG -GTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGT -CAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAA -AATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGG -AGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTC -CAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCT -GTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA -CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCG -TGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAA -CCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGAC -AGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCAC -TTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAA -CATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGC -CTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGA -GGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCC -GTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGA -GGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCC -CGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGC -TACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGC -CGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGC -CGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCA -CCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAA -AATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCT -GAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCA -CTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGC -TCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGA -GTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTA -GCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAAT -CGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCC -TGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAAT -CCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGC -CTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTG -GCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGG -GAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGC -GAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG -GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGG -TGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTA -ATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTG -CAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTC -AAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGG -GCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCT -CTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTC -GGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGA -TCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGC -GCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGA -GGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATAC -AAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGC -AGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCA -CTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACG -CCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCG -AGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGG -GCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTT -GAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGC -GACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAG -CACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGC -CAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCG -CGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGC -GGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGAC -TCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGC -CGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAA -CCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCC -AGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTG -AGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA -GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA -TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT -AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG -GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG -CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT -GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA -GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA -TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG -AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA -GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT -AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC -AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG -GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC -CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG -AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT -TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA -TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT -GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG -TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT -CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG -CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG -TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA -CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG -AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG -GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC -TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA -TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA -GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT -GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC -ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT -TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC -CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG -CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG -GGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCC -CAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCT -GGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGC -GCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGA -GGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGA -GACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGA -GGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTG -AAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT -CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCA -GTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAA -AAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGC -GGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCT -ACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGG -GAGGCTGAGGCAGGAGAATC ->TWO IUB ambiguity codes -cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg -tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa -NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt -cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga -gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa -HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca -tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt -tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt -acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct -tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt -gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa -accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt -RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt -tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag -cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg -ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat -actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg -YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa -KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata -aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa -aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg -gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc -tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK -tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt -ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg -ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa -BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt -aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc -tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc -cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac -aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga -tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga -aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD -gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg -ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV -taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa -ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat -gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg -gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa -tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt -tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt -taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca -cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag -aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt -cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt -ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW -attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag -ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa -attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc -tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta -aagacYRcaggattHaYgtKtaatgcVcaataMYacccatatcacgWDBtgaatcBaata -cKcttRaRtgatgaBDacggtaattaaYtataStgVHDtDctgactcaaatKtacaatgc -gYatBtRaDatHaactgtttatatDttttaaaKVccYcaaccNcBcgHaaVcattHctcg -attaaatBtatgcaaaaatYMctSactHatacgaWacattacMBgHttcgaatVaaaaca -BatatVtctgaaaaWtctRacgBMaatSgRgtgtcgactatcRtattaScctaStagKga -DcWgtYtDDWKRgRtHatRtggtcgaHgggcgtattaMgtcagccaBggWVcWctVaaat -tcgNaatcKWagcNaHtgaaaSaaagctcYctttRVtaaaatNtataaccKtaRgtttaM -tgtKaBtRtNaggaSattHatatWactcagtgtactaKctatttgRYYatKatgtccgtR -tttttatttaatatVgKtttgtatgtNtataRatWYNgtRtHggtaaKaYtKSDcatcKg -taaYatcSRctaVtSMWtVtRWHatttagataDtVggacagVcgKWagBgatBtaaagNc -aRtagcataBggactaacacRctKgttaatcctHgDgttKHHagttgttaatgHBtatHc -DaagtVaBaRccctVgtgDtacRHSctaagagcggWYaBtSaKtHBtaaactYacgNKBa -VYgtaacttagtVttcttaatgtBtatMtMtttaattaatBWccatRtttcatagVgMMt -agctStKctaMactacDNYgKYHgaWcgaHgagattacVgtttgtRaSttaWaVgataat -gtgtYtaStattattMtNgWtgttKaccaatagNYttattcgtatHcWtctaaaNVYKKt -tWtggcDtcgaagtNcagatacgcattaagaccWctgcagcttggNSgaNcHggatgtVt -catNtRaaBNcHVagagaaBtaaSggDaatWaatRccaVgggStctDaacataKttKatt -tggacYtattcSatcttagcaatgaVBMcttDattctYaaRgatgcattttNgVHtKcYR -aatRKctgtaaacRatVSagctgtWacBtKVatctgttttKcgtctaaDcaagtatcSat -aWVgcKKataWaYttcccSaatgaaaacccWgcRctWatNcWtBRttYaattataaNgac -acaatagtttVNtataNaYtaatRaVWKtBatKagtaatataDaNaaaaataMtaagaaS -tccBcaatNgaataWtHaNactgtcDtRcYaaVaaaaaDgtttRatctatgHtgttKtga -aNSgatactttcgagWaaatctKaaDaRttgtggKKagcDgataaattgSaacWaVtaNM -acKtcaDaaatttctRaaVcagNacaScRBatatctRatcctaNatWgRtcDcSaWSgtt -RtKaRtMtKaatgttBHcYaaBtgatSgaSWaScMgatNtctcctatttctYtatMatMt -RRtSaattaMtagaaaaStcgVgRttSVaScagtgDtttatcatcatacRcatatDctta -tcatVRtttataaHtattcYtcaaaatactttgVctagtaaYttagatagtSYacKaaac -gaaKtaaatagataatSatatgaaatSgKtaatVtttatcctgKHaatHattagaaccgt -YaaHactRcggSBNgtgctaaBagBttgtRttaaattYtVRaaaattgtaatVatttctc -ttcatgBcVgtgKgaHaaatattYatagWacNctgaaMcgaattStagWaSgtaaKagtt -ttaagaDgatKcctgtaHtcatggKttVDatcaaggtYcgccagNgtgcVttttagagat -gctaccacggggtNttttaSHaNtatNcctcatSaaVgtactgBHtagcaYggYVKNgta -KBcRttgaWatgaatVtagtcgattYgatgtaatttacDacSctgctaaaStttaWMagD -aaatcaVYctccgggcgaVtaaWtStaKMgDtttcaaMtVgBaatccagNaaatcYRMBg -gttWtaaScKttMWtYataRaDBMaDataatHBcacDaaKDactaMgagttDattaHatH -taYatDtattDcRNStgaatattSDttggtattaaNSYacttcDMgYgBatWtaMagact -VWttctttgYMaYaacRgHWaattgRtaagcattctMKVStatactacHVtatgatcBtV -NataaBttYtSttacKgggWgYDtgaVtYgatDaacattYgatggtRDaVDttNactaSa -MtgNttaacaaSaBStcDctaccacagacgcaHatMataWKYtaYattMcaMtgSttDag -cHacgatcaHttYaKHggagttccgatYcaatgatRaVRcaagatcagtatggScctata -ttaNtagcgacgtgKaaWaactSgagtMYtcttccaKtStaacggMtaagNttattatcg -tctaRcactctctDtaacWYtgaYaSaagaWtNtatttRacatgNaatgttattgWDDcN -aHcctgaaHacSgaataaRaataMHttatMtgaSDSKatatHHaNtacagtccaYatWtc -actaactatKDacSaStcggataHgYatagKtaatKagStaNgtatactatggRHacttg -tattatgtDVagDVaRctacMYattDgtttYgtctatggtKaRSttRccRtaaccttaga -gRatagSaaMaacgcaNtatgaaatcaRaagataatagatactcHaaYKBctccaagaRa -BaStNagataggcgaatgaMtagaatgtcaKttaaatgtaWcaBttaatRcggtgNcaca -aKtttScRtWtgcatagtttWYaagBttDKgcctttatMggNttattBtctagVtacata -aaYttacacaaRttcYtWttgHcaYYtaMgBaBatctNgcDtNttacgacDcgataaSat -YaSttWtcctatKaatgcagHaVaacgctgcatDtgttaSataaaaYSNttatagtaNYt -aDaaaNtggggacttaBggcHgcgtNtaaMcctggtVtaKcgNacNtatVaSWctWtgaW -cggNaBagctctgaYataMgaagatBSttctatacttgtgtKtaattttRagtDtacata -tatatgatNHVgBMtKtaKaNttDHaagatactHaccHtcatttaaagttVaMcNgHata -tKtaNtgYMccttatcaaNagctggacStttcNtggcaVtattactHaSttatgNMVatt -MMDtMactattattgWMSgtHBttStStgatatRaDaagattttctatMtaaaaaggtac -taaVttaSacNaatactgMttgacHaHRttgMacaaaatagttaatatWKRgacDgaRta -tatttattatcYttaWtgtBRtWatgHaaattHataagtVaDtWaVaWtgStcgtMSgaS -RgMKtaaataVacataatgtaSaatttagtcgaaHtaKaatgcacatcggRaggSKctDc -agtcSttcccStYtccRtctctYtcaaKcgagtaMttttcRaYDttgttatctaatcata -NctctgctatcaMatactataggDaHaaSttMtaDtcNatataattctMcStaaBYtaNa -gatgtaatHagagSttgWHVcttatKaYgDctcttggtgttMcRaVgSgggtagacaata -aDtaattSaDaNaHaBctattgNtaccaaRgaVtKNtaaYggHtaKKgHcatctWtctDt -ttctttggSDtNtaStagttataaacaattgcaBaBWggHgcaaaBtYgctaatgaaatW -cDcttHtcMtWWattBHatcatcaaatctKMagtDNatttWaBtHaaaNgMttaaStagt -tctctaatDtcRVaYttgttMtRtgtcaSaaYVgSWDRtaatagctcagDgcWWaaaBaa -RaBctgVgggNgDWStNaNBKcBctaaKtttDcttBaaggBttgaccatgaaaNgttttt -tttatctatgttataccaaDRaaSagtaVtDtcaWatBtacattaWacttaSgtattggD -gKaaatScaattacgWcagKHaaccaYcRcaRttaDttRtttHgaHVggcttBaRgtccc -tDatKaVtKtcRgYtaKttacgtatBtStaagcaattaagaRgBagSaattccSWYttta -ttVaataNctgHgttaaNBgcVYgtRtcccagWNaaaacaDNaBcaaaaRVtcWMgBagM -tttattacgDacttBtactatcattggaaatVccggttRttcatagttVYcatYaSHaHc -ttaaagcNWaHataaaRWtctVtRYtagHtaaaYMataHYtNBctNtKaatattStgaMc -BtRgctaKtgcScSttDgYatcVtggaaKtaagatWccHccgKYctaNNctacaWctttt -gcRtgtVcgaKttcMRHgctaHtVaataaDtatgKDcttatBtDttggNtacttttMtga -acRattaaNagaactcaaaBBVtcDtcgaStaDctgaaaSgttMaDtcgttcaccaaaag -gWtcKcgSMtcDtatgtttStaaBtatagDcatYatWtaaaBacaKgcaDatgRggaaYc -taRtccagattDaWtttggacBaVcHtHtaacDacYgtaatataMagaatgHMatcttat -acgtatttttatattacHactgttataMgStYaattYaccaattgagtcaaattaYtgta -tcatgMcaDcgggtcttDtKgcatgWRtataatatRacacNRBttcHtBgcRttgtgcgt -catacMtttBctatctBaatcattMttMYgattaaVYatgDaatVagtattDacaacDMa -tcMtHcccataagatgBggaccattVWtRtSacatgctcaaggggYtttDtaaNgNtaaB -atggaatgtctRtaBgBtcNYatatNRtagaacMgagSaSDDSaDcctRagtVWSHtVSR -ggaacaBVaccgtttaStagaacaMtactccagtttVctaaRaaHttNcttagcaattta -ttaatRtaaaatctaacDaBttggSagagctacHtaaRWgattcaaBtctRtSHaNtgta -cattVcaHaNaagtataccacaWtaRtaaVKgMYaWgttaKggKMtKcgWatcaDatYtK -SttgtacgaccNctSaattcDcatcttcaaaDKttacHtggttHggRRaRcaWacaMtBW -VHSHgaaMcKattgtaRWttScNattBBatYtaNRgcggaagacHSaattRtttcYgacc -BRccMacccKgatgaacttcgDgHcaaaaaRtatatDtatYVtttttHgSHaSaatagct -NYtaHYaVYttattNtttgaaaYtaKttWtctaNtgagaaaNctNDctaaHgttagDcRt -tatagccBaacgcaRBtRctRtggtaMYYttWtgataatcgaataattattataVaaaaa -ttacNRVYcaaMacNatRttcKatMctgaagactaattataaYgcKcaSYaatMNctcaa -cgtgatttttBacNtgatDccaattattKWWcattttatatatgatBcDtaaaagttgaa -VtaHtaHHtBtataRBgtgDtaataMttRtDgDcttattNtggtctatctaaBcatctaR -atgNacWtaatgaagtcMNaacNgHttatactaWgcNtaStaRgttaaHacccgaYStac -aaaatWggaYaWgaattattcMaactcBKaaaRVNcaNRDcYcgaBctKaacaaaaaSgc -tccYBBHYaVagaatagaaaacagYtctVccaMtcgtttVatcaatttDRtgWctagtac -RttMctgtDctttcKtWttttataaatgVttgBKtgtKWDaWagMtaaagaaattDVtag -gttacatcatttatgtcgMHaVcttaBtVRtcgtaYgBRHatttHgaBcKaYWaatcNSc -tagtaaaaatttacaatcactSWacgtaatgKttWattagttttNaggtctcaagtcact -attcttctaagKggaataMgtttcataagataaaaatagattatDgcBVHWgaBKttDgc -atRHaagcaYcRaattattatgtMatatattgHDtcaDtcaaaHctStattaatHaccga -cNattgatatattttgtgtDtRatagSacaMtcRtcattcccgacacSattgttKaWatt -NHcaacttccgtttSRtgtctgDcgctcaaMagVtBctBMcMcWtgtaacgactctcttR -ggRKSttgYtYatDccagttDgaKccacgVatWcataVaaagaataMgtgataaKYaaat -cHDaacgataYctRtcYatcgcaMgtNttaBttttgatttaRtStgcaacaaaataccVg -aaDgtVgDcStctatatttattaaaaRKDatagaaagaKaaYYcaYSgKStctccSttac -agtcNactttDVttagaaagMHttRaNcSaRaMgBttattggtttaRMggatggcKDgWR -tNaataataWKKacttcKWaaagNaBttaBatMHtccattaacttccccYtcBcYRtaga -ttaagctaaYBDttaNtgaaaccHcaRMtKtaaHMcNBttaNaNcVcgVttWNtDaBatg -ataaVtcWKcttRggWatcattgaRagHgaattNtatttctctattaattaatgaDaaMa -tacgttgggcHaYVaaNaDDttHtcaaHtcVVDgBVagcMacgtgttaaBRNtatRtcag -taagaggtttaagacaVaaggttaWatctccgtVtaDtcDatttccVatgtacNtttccg -tHttatKgScBatgtVgHtYcWagcaKtaMYaaHgtaattaSaHcgcagtWNaatNccNN -YcacgVaagaRacttctcattcccRtgtgtaattagcSttaaStWaMtctNNcSMacatt -ataaactaDgtatWgtagtttaagaaaattgtagtNagtcaataaatttgatMMYactaa -tatcggBWDtVcYttcDHtVttatacYaRgaMaacaStaatcRttttVtagaDtcacWat -ttWtgaaaagaaagNRacDtttStVatBaDNtaactatatcBSMcccaSttccggaMatg -attaaWatKMaBaBatttgataNctgttKtVaagtcagScgaaaDggaWgtgttttKtWt -atttHaatgtagttcactaaKMagttSYBtKtaYgaactcagagRtatagtVtatcaaaW -YagcgNtaDagtacNSaaYDgatBgtcgataacYDtaaactacagWDcYKaagtttatta -gcatcgagttKcatDaattgattatDtcagRtWSKtcgNtMaaaaacaMttKcaWcaaSV -MaaaccagMVtaMaDtMaHaBgaacataBBVtaatVYaNSWcSgNtDNaaKacacBttta -tKtgtttcaaHaMctcagtaacgtcgYtactDcgcctaNgagagcYgatattttaaattt -ccattttacatttDaaRctattttWctttacgtDatYtttcagacgcaaVttagtaaKaa -aRtgVtccataBggacttatttgtttaWNtgttVWtaWNVDaattgtatttBaagcBtaa -BttaaVatcHcaVgacattccNggtcgacKttaaaRtagRtctWagaYggtgMtataatM -tgaaRttattttgWcttNtDRRgMDKacagaaaaggaaaRStcccagtYccVattaNaaK -StNWtgacaVtagaagcttSaaDtcacaacgDYacWDYtgtttKatcVtgcMaDaSKStV -cgtagaaWaKaagtttcHaHgMgMtctataagBtKaaaKKcactggagRRttaagaBaaN -atVVcgRcKSttDaactagtSttSattgttgaaRYatggttVttaataaHttccaagDtg -atNWtaagHtgcYtaactRgcaatgMgtgtRaatRaNaacHKtagactactggaatttcg -ccataacgMctRgatgttaccctaHgtgWaYcactcacYaattcttaBtgacttaaacct -gYgaWatgBttcttVttcgttWttMcNYgtaaaatctYgMgaaattacNgaHgaacDVVM -tttggtHtctaaRgtacagacgHtVtaBMNBgattagcttaRcttacaHcRctgttcaaD -BggttKaacatgKtttYataVaNattccgMcgcgtagtRaVVaattaKaatggttRgaMc -agtatcWBttNtHagctaatctagaaNaaacaYBctatcgcVctBtgcaaagDgttVtga -HtactSNYtaaNccatgtgDacgaVtDcgKaRtacDcttgctaagggcagMDagggtBWR -tttSgccttttttaacgtcHctaVtVDtagatcaNMaVtcVacatHctDWNaataRgcgt -aVHaggtaaaaSgtttMtattDgBtctgatSgtRagagYtctSaKWaataMgattRKtaa -catttYcgtaacacattRWtBtcggtaaatMtaaacBatttctKagtcDtttgcBtKYYB -aKttctVttgttaDtgattttcttccacttgSaaacggaaaNDaattcYNNaWcgaaYat -tttMgcBtcatRtgtaaagatgaWtgaccaYBHgaatagataVVtHtttVgYBtMctaMt -cctgaDcYttgtccaaaRNtacagcMctKaaaggatttacatgtttaaWSaYaKttBtag -DacactagctMtttNaKtctttcNcSattNacttggaacaatDagtattRtgSHaataat -gccVgacccgatactatccctgtRctttgagaSgatcatatcgDcagWaaHSgctYYWta -tHttggttctttatVattatcgactaagtgtagcatVgtgHMtttgtttcgttaKattcM -atttgtttWcaaStNatgtHcaaaDtaagBaKBtRgaBgDtSagtatMtaacYaatYtVc -KatgtgcaacVaaaatactKcRgtaYtgtNgBBNcKtcttaccttKgaRaYcaNKtactt -tgagSBtgtRagaNgcaaaNcacagtVtttHWatgttaNatBgtttaatNgVtctgaata -tcaRtattcttttttttRaaKcRStctcggDgKagattaMaaaKtcaHacttaataataK -taRgDtKVBttttcgtKaggHHcatgttagHggttNctcgtatKKagVagRaaaggaaBt -NatttVKcRttaHctaHtcaaatgtaggHccaBataNaNaggttgcWaatctgatYcaaa -HaatWtaVgaaBttagtaagaKKtaaaKtRHatMaDBtBctagcatWtatttgWttVaaa -ScMNattRactttgtYtttaaaagtaagtMtaMaSttMBtatgaBtttaKtgaatgagYg -tNNacMtcNRacMMHcttWtgtRtctttaacaacattattcYaMagBaacYttMatcttK -cRMtgMNccattaRttNatHaHNaSaaHMacacaVaatacaKaSttHatattMtVatWga -ttttttaYctttKttHgScWaacgHtttcaVaaMgaacagNatcgttaacaaaaagtaca -HBNaattgttKtcttVttaaBtctgctacgBgcWtttcaggacacatMgacatcccagcg -gMgaVKaBattgacttaatgacacacaaaaaatRKaaBctacgtRaDcgtagcVBaacDS -BHaaaaSacatatacagacRNatcttNaaVtaaaataHattagtaaaaSWccgtatWatg -gDttaactattgcccatcttHaSgYataBttBaactattBtcHtgatcaataSttaBtat -KSHYttWggtcYtttBttaataccRgVatStaHaKagaatNtagRMNgtcttYaaSaact -cagDSgagaaYtMttDtMRVgWKWtgMaKtKaDttttgactatacataatcNtatNaHat -tVagacgYgatatatttttgtStWaaatctWaMgagaRttRatacgStgattcttaagaD -taWccaaatRcagcagaaNKagtaaDggcgccBtYtagSBMtactaaataMataBSacRM -gDgattMMgtcHtcaYDtRaDaacggttDaggcMtttatgttaNctaattaVacgaaMMt -aatDccSgtattgaRtWWaccaccgagtactMcgVNgctDctaMScatagcgtcaactat -acRacgHRttgctatttaatgaattataYKttgtaagWgtYttgcHgMtaMattWaWVta -RgcttgYgttBHtYataSccStBtgtagMgtDtggcVaaSBaatagDttgBgtctttctc -attttaNagtHKtaMWcYactVcgcgtatMVtttRacVagDaatcttgctBBcRDgcaac -KttgatSKtYtagBMagaRtcgBattHcBWcaactgatttaatttWDccatttatcgagS -KaWttataHactaHMttaatHtggaHtHagaatgtKtaaRactgtttMatacgatcaagD -gatKaDctataMggtHDtggHacctttRtatcttYattttgacttgaaSaataaatYcgB -aaaaccgNatVBttMacHaKaataagtatKgtcaagactcttaHttcggaattgttDtct -aaccHttttWaaatgaaatataaaWattccYDtKtaaaacggtgaggWVtctattagtga -ctattaagtMgtttaagcatttgSgaaatatccHaaggMaaaattttcWtatKctagDtY -tMcctagagHcactttactatacaaacattaacttaHatcVMYattYgVgtMttaaRtga -aataaDatcaHgtHHatKcDYaatcttMtNcgatYatgSaMaNtcttKcWataScKggta -tcttacgcttWaaagNatgMgHtctttNtaacVtgttcMaaRatccggggactcMtttaY -MtcWRgNctgNccKatcttgYDcMgattNYaRagatHaaHgKctcataRDttacatBatc -cattgDWttatttaWgtcggagaaaaatacaatacSNtgggtttccttacSMaagBatta -caMaNcactMttatgaRBacYcYtcaaaWtagctSaacttWgDMHgaggatgBVgcHaDt -ggaactttggtcNatNgtaKaBcccaNtaagttBaacagtatacDYttcctNgWgcgSMc -acatStctHatgRcNcgtacacaatRttMggaNKKggataaaSaYcMVcMgtaMaHtgat -tYMatYcggtcttcctHtcDccgtgRatcattgcgccgatatMaaYaataaYSggatagc -gcBtNtaaaScaKgttBgagVagttaKagagtatVaactaSacWactSaKatWccaKaaa -atBKgaaKtDMattttgtaaatcRctMatcaaMagMttDgVatggMaaWgttcgaWatga -aatttgRtYtattaWHKcRgctacatKttctaccaaHttRatctaYattaaWatVNccat -NgagtcKttKataStRaatatattcctRWatDctVagttYDgSBaatYgttttgtVaatt -taatagcagMatRaacttBctattgtMagagattaaactaMatVtHtaaatctRgaaaaa -aaatttWacaacaYccYDSaattMatgaccKtaBKWBattgtcaagcHKaagttMMtaat -ttcKcMagNaaKagattggMagaggtaatttYacatcWaaDgatMgKHacMacgcVaaca -DtaDatatYggttBcgtatgWgaSatttgtagaHYRVacaRtctHaaRtatgaactaata -tctSSBgggaaHMWtcaagatKgagtDaSatagttgattVRatNtctMtcSaagaSHaat -aNataataRaaRgattctttaataaagWaRHcYgcatgtWRcttgaaggaMcaataBRaa -ccagStaaacNtttcaatataYtaatatgHaDgcStcWttaacctaRgtYaRtataKtgM -ttttatgactaaaatttacYatcccRWtttHRtattaaatgtttatatttgttYaatMca -RcSVaaDatcgtaYMcatgtagacatgaaattgRtcaaYaaYtRBatKacttataccaNa -aattVaBtctggacaagKaaYaaatatWtMtatcYaaVNtcgHaactBaagKcHgtctac -aatWtaDtSgtaHcataHtactgataNctRgttMtDcDttatHtcgtacatcccaggStt -aBgtcacacWtccNMcNatMVaVgtccDYStatMaccDatggYaRKaaagataRatttHK -tSaaatDgataaacttaHgttgVBtcttVttHgDacgaKatgtatatNYataactctSat -atatattgcHRRYttStggaactHgttttYtttaWtatMcttttctatctDtagVHYgMR -BgtHttcctaatYRttKtaagatggaVRataKDctaMtKBNtMtHNtWtttYcVtattMc -gRaacMcctNSctcatttaaagDcaHtYccSgatgcaatYaaaaDcttcgtaWtaattct -cgttttScttggtaatctttYgtctaactKataHacctMctcttacHtKataacacagcN -RatgKatttttSaaatRYcgDttaMRcgaaattactMtgcgtaagcgttatBtttttaat -taagtNacatHgttcRgacKcBBtVgatKttcgaBaatactDRgtRtgaNacWtcacYtt -aaKcgttctHaKttaNaMgWgWaggtctRgaKgWttSttBtDcNtgtttacaaatYcDRt -gVtgcctattcNtctaaaDMNttttNtggctgagaVctDaacVtWccaagtaacacaNct -gaScattccDHcVBatcgatgtMtaatBgHaatDctMYgagaatgYWKcctaatNaStHa -aaKccgHgcgtYaaYtattgtStgtgcaaRtattaKatattagaWVtcaMtBagttatta -gNaWHcVgcaattttDcMtgtaRHVYtHtctgtaaaaHVtMKacatcgNaatttMatatg -ttgttactagWYtaRacgataKagYNKcattataNaRtgaacKaYgcaaYYacaNccHat -MatDcNgtHttRaWttagaaDcaaaaaatagggtKDtStaDaRtaVtHWKNtgtattVct -SVgRgataDaRaWataBgaagaaKtaataaYgDcaStaNgtaDaaggtattHaRaWMYaY -aWtggttHYgagVtgtgcttttcaaDKcagVcgttagacNaaWtagtaataDttctggtt -VcatcataaagtgKaaaNaMtaBBaattaatWaattgctHaVKaSgDaaVKaHtatatat -HatcatSBagNgHtatcHYMHgttDgtaHtBttWatcgtttaRaattgStKgSKNWKatc -agDtctcagatttctRtYtBatBgHHtKaWtgYBgacVVWaKtacKcDttKMaKaVcggt -gttataagaataaHaatattagtataatMHgttYgaRttagtaRtcaaVatacggtcMcg -agtaaRttacWgactKRYataaaagSattYaWgagatYagKagatgSaagKgttaatMgg -tataatgttWYttatgagaaacctNVataatHcccKtDctcctaatactggctHggaSag -gRtKHaWaattcgSatMatttagaggcYtctaMcgctcataSatatgRagacNaaDagga -VBagaYttKtacNaKgtSYtagttggaWcatcWttaatctatgaVtcgtgtMtatcaYcg -tRccaaYgDctgcMgtgtWgacWtgataacacgcgctBtgttaKtYDtatDcatcagKaV -MctaatcttgVcaaRgcRMtDcgattaHttcaNatgaatMtactacVgtRgatggaWttt -actaaKatgagSaaKggtaNtactVaYtaaKRagaacccacaMtaaMtKtatBcttgtaa -WBtMctaataaVcDaaYtcRHBtcgttNtaaHatttBNgRStVDattBatVtaagttaYa -tVattaagaBcacggtSgtVtatttaRattgatgtaHDKgcaatattKtggcctatgaWD -KRYcggattgRctatNgatacaatMNttctgtcRBYRaaaHctNYattcHtaWcaattct -BtMKtVgYataatMgYtcagcttMDataVtggRtKtgaatgccNcRttcaMtRgattaac -attRcagcctHtWMtgtDRagaKaBtgDttYaaaaKatKgatctVaaYaacWcgcatagB -VtaNtRtYRaggBaaBtgKgttacataagagcatgtRattccacttaccatRaaatgWgD -aMHaYVgVtaSctatcgKaatatattaDgacccYagtgtaYNaaatKcagtBRgagtcca -tgKgaaaccBgaagBtgSttWtacgatWHaYatcgatttRaaNRgcaNaKVacaNtDgat -tgHVaatcDaagcgtatgcNttaDataatcSataaKcaataaHWataBtttatBtcaKtK -tatagttaDgSaYctacaRatNtaWctSaatatttYaKaKtaccWtatcRagacttaYtt -VcKgSDcgagaagatccHtaattctSttatggtKYgtMaHagVaBRatttctgtRgtcta -tgggtaHKgtHacHtSYacgtacacHatacKaaBaVaccaDtatcSaataaHaagagaat -ScagactataaRttagcaaVcaHataKgDacatWccccaagcaBgagWatctaYttgaaa -tctVNcYtttWagHcgcgcDcVaaatgttKcHtNtcaatagtgtNRaactttttcaatgg -WgBcgDtgVgtttctacMtaaataaaRggaaacWaHttaRtNtgctaaRRtVBctYtVta -tDcattDtgaccYatagatYRKatNYKttNgcctagtaWtgaactaMVaacctgaStttc -tgaKVtaaVaRKDttVtVctaDNtataaaDtccccaagtWtcgatcactDgYaBcatcct -MtVtacDaaBtYtMaKNatNtcaNacgDatYcatcgcaRatWBgaacWttKttagYtaat -tcggttgSWttttDWctttacYtatatWtcatDtMgtBttgRtVDggttaacYtacgtac -atgaattgaaWcttMStaDgtatattgaDtcRBcattSgaaVBRgagccaaKtttcDgcg -aSMtatgWattaKttWtgDBMaggBBttBaatWttRtgcNtHcgttttHtKtcWtagHSt -aacagttgatatBtaWSaWggtaataaMttaKacDaatactcBttcaatatHttcBaaSa -aatYggtaRtatNtHcaatcaHtagVtgtattataNggaMtcttHtNagctaaaggtaga -YctMattNaMVNtcKtactBKcaHHcBttaSagaKacataYgctaKaYgttYcgacWVtt -WtSagcaacatcccHaccKtcttaacgaKttcacKtNtacHtatatRtaaatacactaBt -ttgaHaRttggttWtatYagcatYDatcggagagcWBataagRtacctataRKgtBgatg -aDatataSttagBaHtaatNtaDWcWtgtaattacagKttcNtMagtattaNgtctcgtc -ctcttBaHaKcKccgtRcaaYagSattaagtKataDatatatagtcDtaacaWHcaKttD -gaaRcgtgYttgtcatatNtatttttatggccHtgDtYHtWgttatYaacaattcaWtat -NgctcaaaSttRgctaatcaaatNatcgtttaBtNNVtgttataagcaaagattBacgtD -atttNatttaaaDcBgtaSKgacgtagataatttcHMVNttgttBtDtgtaWKaaRMcKM -tHtaVtagataWctccNNaSWtVaHatctcMgggDgtNHtDaDttatatVWttgttattt -aacctttcacaaggaSaDcggttttttatatVtctgVtaacaStDVaKactaMtttaSNa -gtgaaattaNacttSKctattcctctaSagKcaVttaagNaVcttaVaaRNaHaaHttat -gtHttgtgatMccaggtaDcgaccgtWgtWMtttaHcRtattgScctatttKtaaccaag -tYagaHgtWcHaatgccKNRtttagtMYSgaDatctgtgaWDtccMNcgHgcaaacNDaa -aRaStDWtcaaaaHKtaNBctagBtgtattaactaattttVctagaatggcWSatMaccc -ttHttaSgSgtgMRcatRVKtatctgaaaccDNatYgaaVHNgatMgHRtacttaaaRta -tStRtDtatDttYatattHggaBcttHgcgattgaKcKtttcRataMtcgaVttWacatN -catacctRataDDatVaWNcggttgaHtgtMacVtttaBHtgagVttMaataattatgtt -cttagtttgtgcDtSatttgBtcaacHattaaBagVWcgcaSYttMgcttacYKtVtatc -aYaKctgBatgcgggcYcaaaaacgNtctagKBtattatctttKtaVttatagtaYtRag -NtaYataaVtgaatatcHgcaaRataHtacacatgtaNtgtcgYatWMatttgaactacR -ctaWtWtatacaatctBatatgYtaagtatgtgtatSttactVatcttYtaBcKgRaSgg -RaaaaatgcagtaaaWgtaRgcgataatcBaataccgtatttttccatcNHtatWYgatH -SaaaDHttgctgtccHtggggcctaataatttttctatattYWtcattBtgBRcVttaVM -RSgctaatMagtYtttaaaaatBRtcBttcaaVtaacagctccSaaSttKNtHtKYcagc -agaaaccccRtttttaaDcDtaStatccaagcgctHtatcttaDRYgatDHtWcaaaBcW -gKWHttHataagHacgMNKttMKHccaYcatMVaacgttaKgYcaVaaBtacgcaacttt -MctaaHaatgtBatgagaSatgtatgSRgHgWaVWgataaatatttccKagVgataattW -aHNcYggaaatgctHtKtaDtctaaagtMaatVDVactWtSaaWaaMtaHtaSKtcBRaN -cttStggtBttacNagcatagRgtKtgcgaacaacBcgKaatgataagatgaaaattgta -ctgcgggtccHHWHaaNacaBttNKtKtcaaBatatgctaHNgtKcDWgtttatNgVDHg -accaacWctKaaggHttgaRgYaatHcaBacaatgagcaaattactgtaVaaYaDtagat -tgagNKggtggtgKtWKaatacagDRtatRaMRtgattDggtcaaYRtatttNtagaDtc -acaaSDctDtataatcgtactaHttatacaatYaacaaHttHatHtgcgatRRttNgcat -SVtacWWgaaggagtatVMaVaaattScDDKNcaYBYaDatHgtctatBagcaacaagaa -tgagaaRcataaKNaRtBDatcaaacgcattttttaaBtcSgtacaRggatgtMNaattg -gatatWtgagtattaaaVctgcaYMtatgatttttYgaHtgtcttaagWBttHttgtctt -attDtcgtatWtataataSgctaHagcDVcNtaatcaagtaBDaWaDgtttagYctaNcc -DtaKtaHcttaataacccaRKtacaVaatNgcWRaMgaattatgaBaaagattVYaHMDc -aDHtcRcgYtcttaaaWaaaVKgatacRtttRRKYgaatacaWVacVcRtatMacaBtac -tggMataaattttHggNagSctacHgtBagcgtcgtgattNtttgatSaaggMttctttc -ttNtYNagBtaaacaaatttMgaccttacataattgYtcgacBtVMctgStgMDtagtaR -ctHtatgttcatatVRNWataDKatWcgaaaaagttaaaagcacgHNacgtaatctttMR -tgacttttDacctataaacgaaatatgattagaactccSYtaBctttaataacWgaaaYa -tagatgWttcatKtNgatttttcaagHtaYgaaRaDaagtaggagcttatVtagtctttc -attaaaatcgKtattaRttacagVaDatgcatVgattgggtctttHVtagKaaRBtaHta -aggccccaaaaKatggtttaMWgtBtaaacttcactttKHtcgatctccctaYaBacMgt -cttBaBaNgcgaaacaatctagtHccHtKttcRtRVttccVctttcatacYagMVtMcag -aMaaacaataBctgYtaatRaaagattaaccatVRatHtaRagcgcaBcgDttStttttc -VtttaDtKgcaaWaaaaatSccMcVatgtKgtaKgcgatatgtagtSaaaDttatacaaa -catYaRRcVRHctKtcgacKttaaVctaDaatgttMggRcWaacttttHaDaKaDaBctg -taggcgtttaHBccatccattcNHtDaYtaataMttacggctNVaacDattgatatttta -cVttSaattacaaRtataNDgacVtgaacataVRttttaDtcaaacataYDBtttaatBa -DtttYDaDaMccMttNBttatatgagaaMgaNtattHccNataattcaHagtgaaggDga -tgtatatatgYatgaStcataaBStWacgtcccataRMaaDattggttaaattcMKtctM -acaBSactcggaatDDgatDgcWctaacaccgggaVcacWKVacggtaNatatacctMta -tgatagtgcaKagggVaDtgtaacttggagtcKatatcgMcttRaMagcattaBRaStct -YSggaHYtacaactMBaagDcaBDRaaacMYacaHaattagcattaaaHgcgctaaggSc -cKtgaaKtNaBtatDDcKBSaVtgatVYaagVtctSgMctacgttaacWaaattctSgtD -actaaStaaattgcagBBRVctaatatacctNttMcRggctttMttagacRaHcaBaacV -KgaataHttttMgYgattcYaNRgttMgcVaaacaVVcDHaatttgKtMYgtatBtVVct -WgVtatHtacaaHttcacgatagcagtaaNattBatatatttcVgaDagcggttMaagtc -ScHagaaatgcYNggcgtttttMtStggtRatctacttaaatVVtBacttHNttttaRca -aatcacagHgagagtMgatcSWaNRacagDtatactaaDKaSRtgattctccatSaaRtt -aaYctacacNtaRtaactggatgaccYtacactttaattaattgattYgttcagDtNKtt -agDttaaaaaaaBtttaaNaYWKMBaaaacVcBMtatWtgBatatgaacVtattMtYatM -NYDKNcKgDttDaVtaaaatgggatttctgtaaatWtctcWgtVVagtcgRgacttcccc -taDcacagcRcagagtgtWSatgtacatgttaaSttgtaaHcgatgggMagtgaacttat -RtttaVcaccaWaMgtactaatSSaHtcMgaaYtatcgaaggYgggcgtgaNDtgttMNg -aNDMtaattcgVttttaacatgVatgtWVMatatcaKgaaattcaBcctccWcttgaaWH -tWgHtcgNWgaRgctcBgSgaattgcaaHtgattgtgNagtDttHHgBttaaWcaaWagc -aSaHHtaaaVctRaaMagtaDaatHtDMtcVaWMtagSagcttHSattaacaaagtRacM -tRtctgttagcMtcaBatVKtKtKacgagaSNatSactgtatatcBctgagVtYactgta -aattaaaggcYgDHgtaacatSRDatMMccHatKgttaacgactKtgKagtcttcaaHRV -tccttKgtSataatttacaactggatDNgaacttcaRtVaagDcaWatcBctctHYatHa -DaaatttagYatSatccaWtttagaaatVaacBatHcatcgtacaatatcgcNYRcaata -YaRaYtgattVttgaatgaVaactcRcaNStgtgtattMtgaggtNttBaDRcgaaaagc -tNgBcWaWgtSaDcVtgVaatMKBtttcgtttctaaHctaaagYactgMtatBDtcStga -ccgtSDattYaataHctgggaYYttcggttaWaatctggtRagWMaDagtaacBccacta -cgHWMKaatgatWatcctgHcaBaSctVtcMtgtDttacctaVgatYcWaDRaaaaRtag -atcgaMagtggaRaWctctgMgcWttaagKBRtaaDaaWtctgtaagYMttactaHtaat -cttcataacggcacBtSgcgttNHtgtHccatgttttaaagtatcgaKtMttVcataYBB -aKtaMVaVgtattNDSataHcagtWMtaggtaSaaKgttgBtVtttgttatcatKcgHac -acRtctHatNVagSBgatgHtgaRaSgttRcctaacaaattDNttgacctaaYtBgaaaa -tagttattactcttttgatgtNNtVtgtatMgtcttRttcatttgatgacacttcHSaaa -ccaWWDtWagtaRDDVNacVaRatgttBccttaatHtgtaaacStcVNtcacaSRttcYa -gacagaMMttttgMcNttBcgWBtactgVtaRttctccaaYHBtaaagaBattaYacgat -ttacatctgtaaMKaRYtttttactaaVatWgctBtttDVttctggcDaHaggDaagtcg -aWcaagtagtWttHtgKtVataStccaMcWcaagataagatcactctHatgtcYgaKcat -cagatactaagNSStHcctRRNtattgtccttagttagMVgtatagactaactctVcaat -MctgtttgtgttgccttatWgtaBVtttctggMcaaKgDWtcgtaaYStgSactatttHg -atctgKagtagBtVacRaagRtMctatgggcaaaKaaaatacttcHctaRtgtDcttDat -taggaaatttcYHaRaaBttaatggcacKtgctHVcaDcaaaVDaaaVcgMttgtNagcg -taDWgtcgttaatDgKgagcSatatcSHtagtagttggtgtHaWtaHKtatagctgtVga -ttaBVaatgaataagtaatVatSttaHctttKtttgtagttaccttaatcgtagtcctgB -cgactatttVcMacHaaaggaatgDatggKtaHtgStatattaaSagctWcctccRtata -BaDYcgttgcNaagaggatRaaaYtaWgNtSMcaatttactaacatttaaWttHtatBat -tgtcgacaatNgattgcNgtMaaaKaBDattHacttggtRtttaYaacgVactBtaBaKt -gBttatgVttgtVttcaatcWcNctDBaaBgaDHacBttattNtgtDtatttVSaaacag -gatgcRatSgtaSaNtgBatagttcHBgcBBaaattaHgtDattatDaKaatBaaYaaMa -ataaataKtttYtagtBgMatNcatgtttgaNagtgttgtgKaNaSagtttgaSMaYBca -aaacDStagttVacaaaaactaaWttBaagtctgtgcgtMgtaattctcctacctcaNtt -taaccaaaaVtBcacataacaccccBcWMtatVtggaatgaWtcaaWaaaaaaaaWtDta -atatRcctDWtcctaccMtVVatKttaWaaKaaatataaagScHBagaggBaSMtaWaVt -atattactSaaaKNaactatNatccttgaYctattcaaaVgatttYHcRagattttaSat -aggttattcVtaaagaKgtattattKtRttNcggcRgtgtgtWYtaacHgKatKgatYta -cYagDtWcHBDctctgRaYKaYagcactKcacSaRtBttttBHKcMtNtcBatttatttt -tgSatVgaaagaWtcDtagDatatgMacaacRgatatatgtttgtKtNRaatatNatgYc -aHtgHataacKtgagtagtaacYttaNccaaatHcacaacaVDtagtaYtccagcattNt -acKtBtactaaagaBatVtKaaHBctgStgtBgtatgaSNtgDataaccctgtagcaBgt -gatcttaDataStgaMaccaSBBgWagtacKcgattgaDgNNaaaacacagtSatBacKD -gcgtataBKcatacactaSaatYtYcDaactHttcatRtttaatcaattataRtttgtaa -gMcgNttcatcBtYBagtNWNMtSHcattcRctttttRWgaKacKttgggagBcgttcgc -MaWHtaatactgtctctatttataVgtttaBScttttaBMaNaatMacactYtBMggtHa -cMagtaRtctgcatttaHtcaaaatttgagKtgNtactBacaHtcgtatttctMaSRagc -agttaatgtNtaaattgagagWcKtaNttagVtacgatttgaatttcgRtgtWcVatcgt -taaDVctgtttBWgaccagaaagtcSgtVtatagaBccttttcctaaattgHtatcggRa -ttttcaaggcYSKaagWaWtRactaaaacccBatMtttBaatYtaagaactSttcgaaSc -aatagtattgaccaagtgttttctaacatgtttNVaatcaaagagaaaNattaaRtttta -VaaaccgcaggNMtatattVctcaagaggaacgBgtttaacaagttcKcYaatatactaa -ccBaaaSggttcNtattctagttRtBacgScVctcaatttaatYtaaaaaaatgSaatga -tagaMBRatgRcMcgttgaWHtcaVYgaatYtaatctttYttatRaWtctgBtDcgatNa -tcKaBaDgatgtaNatWKctccgatattaacattNaaacDatgBgttctgtDtaaaMggt -gaBaSHataacgccSctaBtttaRBtcNHcDatcDcctagagtcRtaBgWttDRVHagat -tYatgtatcWtaHtttYcattWtaaagtctNgtStggRNcgcggagSSaaagaaaatYcH -DtcgctttaatgYcKBVSgtattRaYBaDaaatBgtatgaHtaaRaRgcaSWNtagatHa -acttNctBtcaccatctMcatattccaSatttgcgaDagDgtatYtaaaVDtaagtttWV -aagtagYatRttaagDcNgacKBcScagHtattatcDaDactaaaaaYgHttBcgaDttg -gataaaKSRcBMaBcgaBSttcWtgNBatRaccgattcatttataacggHVtaattcaca -agagVttaaRaatVVRKcgWtVgacctgDgYaaHaWtctttcacMagggatVgactagMa -aataKaaNWagKatagNaaWtaaaatttgaattttatttgctaaVgaHatBatcaaBWcB -gttcMatcgBaaNgttcgSNaggSaRtttgHtRtattaNttcDcatSaVttttcgaaaaa -ttgHatctaRaggSaNatMDaaatDcacgattttagaHgHaWtYgattaatHNSttatMS -gggNtcKtYatRggtttgtMWVtttaYtagcagBagHaYagttatatggtBacYcattaR -SataBatMtttaaatctHcaaaSaaaagttNSaaWcWRccRtKaagtBWtcaaattSttM -tattggaaaccttaacgttBtWatttatatWcDaatagattcctScacctaagggRaaYt -aNaatgVtBcttaaBaacaMVaaattatStYgRcctgtactatcMcVKatttcgSgatRH -MaaaHtagtaaHtVgcaaataatatcgKKtgccaatBNgaaWcVttgagttaKatagttc -aggKDatDtattgaKaVcaKtaataDataataHSaHcattagttaatRVYcNaHtaRcaa -ggtNHcgtcaaccaBaaagYtHWaaaRcKgaYaaDttgcWYtataRgaatatgtYtgcKt -aNttWacatYHctRaDtYtattcBttttatcSataYaYgttWaRagcacHMgtttHtYtt -YaatcggtatStttcgtRSattaaDaKMaatatactaNBaWgctacacYtgaYVgtgHta -aaRaaRgHtagtWattataaaSDaaWtgMattatcgaaaagtaYRSaWtSgNtBgagcRY -aMDtactaacttaWgtatctagacaagNtattHggataatYttYatcataDcgHgttBtt -ctttVttgccgaaWtaaaacgKgtatctaaaaaNtccDtaDatBMaMggaatNKtatBaa -atVtccRaHtaSacataHattgtttKVYattcataVaattWtcgtgMttcttKtgtctaa -cVtatctatatBRataactcgKatStatattcatHHRttKtccaacgtgggtgRgtgaMt -attattggctatcgtgacMtRcBDtcttgtactaatRHttttaagatcgVMDStattatY -BtttDttgtBtNttgRcMtYtgBacHaWaBaatDKctaagtgaaactaatgRaaKgatcc -aagNaaaatattaggWNtaagtatacttttKcgtcggSYtcttgRctataYcttatataa -agtatattaatttataVaacacaDHatctatttttKYVatHRactttaBHccaWagtact -BtcacgaVgcgttRtttttttSVgtSagtBaaattctgaHgactcttgMcattttagVta -agaattHctHtcaDaaNtaacRggWatagttcgtSttgaDatcNgNagctagDgatcNtt -KgttgtaDtctttRaaYStRatDtgMggactSttaDtagSaVtBDttgtDgccatcacaM -attaaaMtNacaVcgSWcVaaDatcaHaatgaattaMtatccVtctBtaattgtWattat -BRcWcaatgNNtactWYtDaKttaaatcactcagtRaaRgatggtKgcgccaaHgaggat -StattYcaNMtcaBttacttatgagDaNtaMgaaWtgtttcttctaHtMNgttatctaWW -atMtBtaaatagDVatgtBYtatcggcttaagacMRtaHScgatatYgRDtcattatSDa -HggaaataNgaWSRRaaaBaatagBattaDctttgHWNttacaataaaaaaatacggttt -gHgVtaHtWMttNtBtctagtMcgKMgHgYtataHaNagWtcaacYattaataYRgtaWK -gaBctataaccgatttaHaNBRaRaMtccggtNgacMtctcatttgcaattcWgMactta -caaDaaNtactWatVtttagccttMaatcagVaagtctVaaDaBtattaattaYtNaYtg -gattaKtaKctYaMtattYgatattataatKtVgDcttatatNBtcgttgtStttttMag -aggttaHYSttcKgtcKtDNtataagttataagSgttatDtRttattgttttSNggRtca -aKMNatgaatattgtBWtaMacctgggYgaSgaagYataagattacgagaatBtggtRcV -HtgYggaDgaYaKagWagctatagacgaaHgtWaNgacttHRatVaWacKYtgRVNgVcS -gRWctacatcKSactctgWYtBggtataagcttNRttVtgRcaWaaatDMatYattaact -ttcgaagRatSctgccttgcRKaccHtttSNVagtagHagBagttagaccaRtataBcca -taatSHatRtcHagacBWatagcaMtacaRtgtgaaBatctKRtScttccaNaatcNgta -atatWtcaMgactctBtWtaaNactHaaaaRctcgcatggctMcaaNtcagaaaaacaca -gtggggWttRttagtaagaVctVMtcgaatcttcMaaaHcaHBttcgattatgtcaDagc -YRtBtYcgacMgtDcagcgaNgttaataatagcagKYYtcgtaBtYctMaRtaRtDagaa -aacacatgYaBttgattattcgaaNttBctSataaMataWRgaHtttccgtDgaYtatgg -tDgHKgMtatttVtMtVagttaRatMattRagataaccctKctMtSttgaHagtcStcta -tttccSagatgttccacgaggYNttHRacgattcDatatDcataaaatBBttatcgaHtN -HaaatatDNaggctgaNcaaggagttBttMgRagVatBcRtaWgatgBtSgaKtcgHttt -gaatcaaDaHttcSBgHcagtVaaSttDcagccgttNBtgttHagYtattctttRWaaVt -SttcatatKaaRaaaNacaVtVctMtSDtDtRHRcgtaatgctcttaaatSacacaatcg -HattcaWcttaaaatHaaatcNctWttaNMcMtaKctVtcctaagYgatgatcYaaaRac -tctaRDaYagtaacgtDgaggaaatctcaaacatcaScttcKttNtaccatNtaNataca -tttHaaDHgcaDatMWaaBttcRggctMaagctVYcacgatcaDttatYtaatcKatWat -caatVYtNagatttgattgaYttttYgacttVtcKaRagaaaHVgDtaMatKYagagttN -atWttaccNtYtcDWgSatgaRgtMatgKtcgacaagWtacttaagtcgKtgatccttNc -ttatagMatHVggtagcgHctatagccctYttggtaattKNaacgaaYatatVctaataM -aaaYtgVtcKaYtaataacagaatHcacVagatYWHttagaaSMaatWtYtgtaaagNaa -acaVgaWtcacNWgataNttcaSagctMDaRttgNactaccgataMaaatgtttattDtc -aagacgctDHYYatggttcaagccNctccttcMctttagacBtaaWtaWVHggaaaaNat -ttaDtDtgctaaHHtMtatNtMtagtcatttgcaaaRatacagRHtatDNtgtDgaatVg -tVNtcaaatYBMaaaagcaKgtgatgatMgWWMaHttttMgMagatDtataaattaacca -actMtacataaattgRataatacgBtKtaataattRgtatDagDtcRDacctatRcagag -cSHatNtcaScNtttggacNtaaggaccgtgKNttgttNcttgaaRgYgRtNtcagttBc -ttttcHtKtgcttYaaNgYagtaaatgaatggWaMattBHtatctatSgtcYtgcHtaat -tHgaaMtHcagaaSatggtatgccaHBtYtcNattWtgtNgctttaggtttgtWatNtgH -tgcDttactttttttgcNtactKtWRaVcttcatagtgSNKaNccgaataaBttataata -YtSagctttaaatSttggctaaKSaatRccgWHgagDttaaatcatgagMtcgagtVtaD -ggaBtatttgDacataaacgtagYRagBWtgDStKDgatgaagttcattatttaKWcata -aatWRgatataRgttRacaaNKttNtKagaaYaStaactScattattaacgatttaaatg -DtaattagatHgaYataaactatggggatVHtgccgtNgatNYcaStRtagaccacWcaM -tatRagHgVactYtWHtcttcatgatWgagaKggagtatgaWtDtVtNaNtcgYYgtaaa -ctttaDtBactagtaDctatagtaatatttatatataacgHaaaRagKattSagttYtSt ->THREE Homo sapiens frequency -agagagacgatgaaaattaatcgtcaatacgctggcgaacactgagggggacccaatgct -cttctcggtctaaaaaggaatgtgtcagaaattggtcagttcaaaagtagaccggatctt -tgcggagaacaattcacggaacgtagcgttgggaaatatcctttctaccacacatcggat -tttcgccctctcccattatttattgtgttctcacatagaattattgtttagacatccctc -gttgtatggagagttgcccgagcgtaaaggcataatccatataccgccgggtgagtgacc -tgaaattgtttttagttgggatttcgctatggattagcttacacgaagagattctaatgg -tactataggataattataatgctgcgtggcgcagtacaccgttacaaacgtcgttcgcat -atgtggctaacacggtgaaaatacctacatcgtatttgcaatttcggtcgtttcatagag -cgcattgaattactcaaaaattatatatgttgattatttgattagactgcgtggaaagaa -ggggtactcaagccatttgtaaaagctgcatctcgcttaagtttgagagcttacattagt -ctatttcagtcttctaggaaatgtctgtgtgagtggttgtcgtccataggtcactggcat -atgcgattcatgacatgctaaactaagaaagtagattactattaccggcatgcctaatgc -gattgcactgctatgaaggtgcggacgtcgcgcccatgtagccctgataataccaatact -tacatttggtcagcaattctgacattatacctagcacccataaatttactcagacttgag -gacaggctcttggagtcgatcttctgtttgtatgcatgtgatcatatagatgaataagcg -atgcgactagttagggcatagtatagatctgtgtatacagttcagctgaacgtccgcgag -tggaagtacagctgagatctatcctaaaatgcaaccatatcgttcacacatgatatgaac -ccagggggaaacattgagttcagttaaattggcagcgaatcccccaagaagaaggcggag -tgacgttgaacgggcttatggtttttcagtacttcctccgtataagttgagcgaaatgta -aacagaataatcgttgtgttaacaacattaaaatcgcggaatatgatgagaatacacagt -gtgagcatttcacttgtaaaatatctttggtagaacttactttgctttaaatatgttaaa -ccgatctaataatctacaaaacggtagattttgcctagcacattgcgtccttctctattc -agatagaggcaatactcagaaggttttatccaaagcactgtgttgactaacctaagtttt -agtctaataatcatgattgattataggtgccgtggactacatgactcgtccacaaataat -acttagcagatcagcaattggccaagcacccgacttttatttaatggttgtgcaatagtc -cagattcgtattcgggactctttcaaataatagtttcctggcatctaagtaagaaaagct -cataaggaagcgatattatgacacgctcttccgccgctgttttgaaacttgagtattgct -cgtccgaaattgagggtcacttcaaaatttactgagaagacgaagatcgactaaagttaa -aatgctagtccacagttggtcaagttgaattcatccacgagttatatagctattttaatt -tatagtcgagtgtacaaaaaacatccacaataagatttatcttagaataacaacccccgt -atcatcgaaatcctccgttatggcctgactcctcgagcttatagcatttgtgctggcgct -cttgccaggaacttgctcgcgaggtggtgacgagtgagatgatcagtttcattatgatga -tacgattttatcgcgactagttaatcatcatagcaagtaaaatttgaattatgtcattat -catgctccattaacaggttatttaattgatactgacgaaattttttcacaatgggttttc -tagaatttaatatcagtaattgaagccttcataggggtcctactagtatcctacacgacg -caggtccgcagtatcctggagggacgtgttactgattaaaagggtcaaaggaatgaaggc -tcacaatgttacctgcttcaccatagtgagccgatgagttttacattagtactaaatccc -aaatcatactttacgatgaggcttgctagcgctaaagagaatacatacaccaccacatag -aattgttagcgatgatatcaaatagactcctggaagtgtcagggggaaactgttcaatat -ttcgtccacaggactgaccaggcatggaaaagactgacgttggaaactataccatctcac -gcccgacgcttcactaattgatgatccaaaaaatatagcccggattcctgattagcaaag -ggttcacagagaaagatattatcgacgtatatcccaaaaaacagacgtaatgtgcatctt -cgaatcgggatgaatacttgtatcataaaaatgtgacctctagtatacaggttaatgtta -gtgatacacaatactcgtgggccatgggttctcaaataaaatgtaatattgcgtcgatca -ctcacccacgtatttggtctaattatgttttatttagtgacaatccaatagataaccggt -cctattaagggctatatttttagcgaccacgcgtttaaacaaaggattgtatgtagatgg -taccagtttaattgccagtgggcaatcctaagcaaaatgagattctatcctaaagtttgg -gcttgatataagatttcggatgtatgggttttataatcgttggagagctcaatcatgagc -taatacatggatttcgctacctcaccgagagaccttgcatgaagaattctaaccaaaagt -ttaataggccggattggattgagttaattaagaccttgttcagtcatagtaaaaaccctt -aaattttaccgattgacaaagtgagcagtcgcaataccctatgcgaaacgcctcgatagt -gactaggtatacaaggtttttgagttcctttgaaatagttaactaatttaaaattaatta -acgacatggaaatcacagaacctaatgctttgtaggagttatttatgctgtttactgcct -ctacaaccctaataaagcagtcctaagaatgaaacgcatcttttagttcagaaagtggta -tccagggtggtcaatttaataaattcaacatcgggtctcaggatattcggtcatataatt -tattaagggctcttcgagtcttactctgagtgaaattggaaacagtcatccttttcgttg -tgaggcatcttacaccgctatcgatatacaatgcattccaccgcggtgtcccgtacacaa -ggaaacttgttaccttggggatataagaaaactcacacgtctcattattaaactgagtac -aatttttgcacgagaaagtaatgcaatacaatatgatgaaagccagctaatgaaaaggga -tggaacgcacctcggatctgttgcactggattaaaatccgattatttttaaaaatattca -gtgctagagcatatcaggtctacttttttatctggtatgtaaagcccacggagcgatagt -gagatccttacgactcaacgaaaagttataacataactcccgttagccaaagcccaatcc -cgattactgccctaccctaacgtctgccatctaaatatcgaacttgttatgatcaatgtg -actacctcccaccctttccccttcatttgttccactggggataagctagcgttttcagaa -tcaatgcaataagaatagccaattgtctcacttcatcagagctcttggcaattccaggcg -ctacgtggttctggaatatattcatttttcaaatagtaatacgtttagtgttgctattgt -ctacacgtttggatattacgttatgtgagcggacatcaatagttgtctaactctttagta -agccagagatagcactcttagcgaatggataccatcttccataagtttagttaatagtcc -gaaacaactgcttcgagcatatttgaacctccttgtaggcaaatagcctcttcaaagcaa -tcttactaatagatagagtttgttttaagggactactagaaatgggacaatcttaatagt -atgacctaaactgacatttaaagatatatccaggtggcaagcataaagatcattgcgcca -cctccaccgtgggattacttatcagtcgatatcctatatgctaagtttgcgacggcagaa -tacaaactaagctgagttgatgctaaccttacctatgataccccattggaccggttaaca -gccctacttattccaaataaaagaacttttatgctgtagaagctattatagtgatgcctg -gtaacttcagtatattaaaatgacacacatacgccatatagagctcctggaactttgaat -aatgagcgaacttcgaagttgaagagcaagaaaccatatgtcacggttgcctaaagcccg -gtaaccagacatgtgctatcattgatcattatcgaggttttcataaccttgacccattat -cggctgtgcgcggacaagtacttaaatcactagtttcttcacctgcttatcggtaagaaa -taaggttggcaaagaatcgcataagacggacgtagagccgcagcgttgtgcgagtccagg -tgcatgcgcagcaataggattttaaattttgttccatttttaatttagccgtaaggatgt -ccgtaaatgattgaaaattggattcaatctttgggcctatgctactggaacctgatcgac -aaaatttcaaacatacgttaactccgaaagaccgtatttttgcggctagaatagtcagtc -gcttggagccatataccttaccacttaaacgacgtgctcctgtagttgaaatataaacag -aacacaaagactaccgatcatatcaactgaagatctttgtaactttgaggcgaagcaccc -tcttcgagacaactaagagtaaagtaccgggcgccgcaaggagtcgattgggaccctaaa -tcttgacgaattgctaagaggctcagagctaccactgtaatttctctagagcccataata -aatgaacgatacatccgtaggtagcacctaagggattataatggaagccaaatgcagtta -ataatattatatactggcgtacacgattcgacggatctctcacatagtgattcacgaccc -ccccctttgattgacacagcgtcagcattttgcaagaacgatcttctgcatagggtgcgc -caccgtaaggatgacgtcgaagctacaactgggtataatttaccatgcttccctgatgct -gagtgcaatacactaagaatgagtttttaccccatatcaccagtatttgttctgttattg -cgaagaaatggctatgctgagttggcgactaaagtcacccatcctttttattaggtaacc -ccctcccttaaactaactgatttgctggagctgccctgcatacatatactttatcattta -tggacgtccgtgacgcttattatccaccatagtcgatatgctacacggattcattaatgg -atcgtaggagtttaagttatatttactaagatcggtctcggctactatcccgccttaccc -ggcgctatttacggccatttttaatatattgacggtaattattcctatggtttcgaccgc -acgtccttggacaagaaagaatggcaaaaaaaatgtaaaagaaaaaaaatattgagtccc -taccatcatataaaaaatatgtgatgagtaacttgacgaaatgttagtggttattaaaga -ctatctattacaccttttgttttctgtcgtagtatattaaagtctagaagccttacagga -aaatcagggttatacagccgatactccgcagcatgaatcatcgaggaggtgtcctaccat -cgcgccttgtaatcttgtctgtgtatactgtatttagaccttttatacaaagtaaatatc -tcggctttatgtgattgggaggggcctactcaaacatgatgacttgacctaataatcact -gtgcgggcgtcttatgactagctattccttgaaatccaccaccaaatggttaatatgtaa -aaactttgacgatgaaacaaggtgaatgtgtagttactttgtgtaattagctgcgtcgag -cattgcttgtaaaaccgtcaatcgcacacgttacttccataaaatttctacgaatacacc -cttcttaaaaaaaacgtaggaattcacgagtttaacaaacgataactgtataaagtggaa -gtccgaagaaagcagatgcccgaactactcgaagatgtttcgttttcttaaccatagggg -cttcttaatggcccactacgcacattttgttcaagcccgagagggacatccccattacgg -gagtattactaaaactgttccgtaatacgttcagcaagggatgaaaaaggccactgctca -agttattgacgtgggagtattacatcggaagcctgaatcccacactatgatggtctgtac -aggcctagggactgcgtctagacggtattaccggcttctaatcatacgatcgtgagtctt -aacgggaagtaaggctcacacctaccccaaaccatttatctatgtaagtataaaattgtg -cgtaagtgttcaaagtggacaataaagacgtggcaaaaacccccgcacataagccgcttt -agatttcacaaataccaatgcggttaaaaacatccttgagtcgtacatacaccatactcg -cgttaaacggatataacagaagataataaatccggatgtggagtcggtgtaactatagaa -agccaagtgaaataatgcttaccagtcatttagctatacggctttcatttcatgtcaaga -gggtggagtttgacctgtacagttgatatatcaccgatacttagaactcacctaaagcta -aaattgctcgcagcgtgtaatccgcatattacaaacaatagatgggattcattatacata -agacacgatgatctgctttttcaggttgcgagatgttgcctatcgtcaatcgagtcctgc -cttacaccacttaaacaaaagtattgacagggaacctattttcgaggtattatatagtcc -agcttgaatatcaatttgacagttaacctagtgaaaatcagtaagaggaaatacgccaca -ttctccagtgaaattctacgggttatcgtctagtccaactatcaattataactcacgaga -tataagtaaattctcgtacttggcctgatttttattatactttggatccttagtaaacag -gaagggagaaaccttcaacgaaaaacactggattttgttttactctcaaagctcttatat -gacggaaataccctgtcaagtcttaactttattactagactaatgaaatgggcttggggt -ggccagaatcatagtacaatttagcggatacactattcggactttcctatcggctgtctg -gttggataagtatggggactaataggctagacatacctatacttaaactatacaggcgtc -atctatctctgcaactttggagttccctgatgttctcccgccctttgggttcacatcttc -tataccgacacccctaataacgattagtttgtgggttagagtaaattaatacggttaata -ttaatgtatcgttgaaaagctggtgtcgccaataaggtaaccggctaggcagagtatatg -tcacgaagtataactaccctaatgataagctgtaggaataaaattaatgctgtctctaag -cgaagagatatttccgactctgttttaatgacgaatctcattacttctgacttgcaaatg -ttcaatatggcacggtttcacggcacctttgtgacgcatataatgaacttagaagattat -aacgacggaactttatatgataatccgttacgattaaagaatctgttaaatatcataatg -gcattcagttctagaccgtgcatcatggtaaacttactttctctgcatggcgacatacat -ttcgctattcaaattcgcgtgtggttacacccactcgcacctttggaatattaagagaag -atgatcagaaaatccattcgctcaatttttctgacgtacgtctaatttatcctaggagac -aaatcgttttatgtctctcacatttttgaagaaaggttcgagagacaatactcaggtcct -gaactgctagaagatactcggtggagcgtggcaacaatgaaaaactcgtgacataaatga -atgatacttttccaagttcagttaagtgaatatgtttaacatacccggcttttcgatctt -aagctgacgctggacgtgcgagtaatgtcagtctcttacatacactagtgactccaagtt -tcgtcaaaaacgccccctcccttctcgagcccactcacgctatgtattgacgcgaacttg -ttcgggatcagacttttcaggagttcggtcgcgtgtccctatgtgctaatatataagtta -gatcgcattagatgctaatctgaatacttatagacgaccttcaacgagaacgggtaccac -cttgaggctagagttaggtgtgaaacgacaggtagggacatataaaatttgagtgcggct -ttagttaagggtttaattacctactcaaacatcacgctcgcgcccttcgtacgtaatcga -ccatctagaggctaaggggactgtactaggtagtgattaatgatatcctagacgcacgtg -ccttagatcttcagactctgatggtccgcgatcaccgtaattgtagtcctccaactcgat -cactttgttggcgtcaaagaaattacgatatctaaatacttataatacaataaccaagga -tgagaatgactcatcgcgttggagttatattgcttgaagttctatggaatgaaagcacgt -tatctgccgtcccaatatctccagtgagctaattcattggacggtccactttgatcaatc -cccgaggagatgttcggacactttagtctgtaacacttagcgttgagaccacgaacaatt -gattactcagtcttgaaggtgttttccaaagttcattttaaataagactacgataggcct -ttcctattgatataaactacccggctctgttgttcgtgtgagtcgtacttctctgtgttt -ttctgattatagcaagattcgattcttagtgtaaacagcgatttttatttgacccgtcaa -tgagaagcgcataggatctaagcaaaattatcaagttgtgccacaaggtaagatctttcc -agttattgcaggtaggatgtatcccacgttgatagtatgaggtctgacgtcaactgtcta -ggagagttgaccgcgtgcgggtacaccggatttgcatcgatgttgagaacgcagaactcc -cactgtcgtggcggcgttcctgatatttagcaagaggcgttgataaagccctcatcatct -agatctcgacctcatctgccctcttgctccatcattttctacacagactactttcctatc -tacgttagtataattgctttctatcttagtatcatttagagcttctccgtcaacaggttc -gtgctattaaagttagtacgaaagggacaacttgtagcaacgcatttaatcggttttcga -ctacttcgcacaaaatcagataaagaagtttgtcattctattagacattgaattgcgcaa -ttgacttgtaccacttatgatcgaacactgaatcaagactgtgattaactaaaatagaca -agccactatatcaactaataaaaacgcccctggtggtcgaacatagttgactacaggata -attaattggactggagccattacattctctacaatcgtatcacttcccaagtagacaact -ttgaccttgtagtttcatgtacaaaaaaatgctttcgcaggagcacattggtagttcaat -agtttcatgggaacctcttgagccgtcttctgtgggtgtgttcggatagtaggtactgat -aaagtcgtgtcgctttcgatgagagggaattcaccggaaaacaccttggttaacaggata -gtctatgtaaacttcgagacatgtttaagagttaccagcttaatccacggtgctctacta -gtatcatcagctgtcttgcctcgcctagaaatatgcattctatcgttatcctatcaacgg -ttgccgtactgagcagccttattgtggaagagtaatatataaatgtagtcttgtctttac -gaagcagacgtaagtaataatgacttggaataccaaaactaaacatagtggattatcata -ctcaagaactctccagataaataacagtttttacgatacgtcaccaatgagcttaaagat -taggatcctcaaaactgatacaaacgctaattcatttgttattggatccagtatcagtta -aactgaatggagtgaagattgtagaatgttgttctggcctcgcatggggtctaggtgata -tacaatttctcatacttacacggtagtggaaatctgattctagcttcgtagctgactata -ctcaaggaaccactgctcaaggtaggagactagttccgaccctacagtcaaagtggccga -agcttaaactatagactagttgttaaatgctgatttcaagatatcatctatatacagttt -ggacaattatgtgtgcgaaactaaaattcatgctattcagatggatttcacttatgcctt -agaaacagatattgcccgagctcaatcaacagttttagccggaaacaatcgaagcatagg -gacaatgtatcttttcctaaattgccatgtgcagatttctgagtgtcacgaagcgcataa -tagaatcttgtgttgcctcaactcgttgaaaagtttaaaacaatcgcagcagtctttttg -gggtctactgtgtgtttgcaaaataactgaaagaaacgcttgaacaactctgaagtagct -cgagtactcattaaagtgtaacacattagtgaatatcggccaatgaaccaaacgcttccc -ggtacgctatctctctcatcgggaggcgatgtgcaggttatctacgaaagcatcccttta -cgttgagagtgtcgatgcatgaacctcattgtaacaatagcccagcaaattctcatacgt -gcctcagggtccgggcgtactcctccatggaagggcgcgcatctagtgttataccaactc -gctttttaactactatgctgtagttctacaggcatagtggccagtattttctaacttctc -tggatagatgctctcactcctcatccatcacggcttcagtttacgtcttacttgcttgtt -cagcaacggatggaggcattaagtatcttcactgttccctaaaattgctgttcaatatca -aagtaaggacgatacagggaaagctcaagcacactcattgaatactgccccagttgcaac -ctcacttaatctgacaaaaataatgactactctaagtgttgcggaagcagtctcttccac -gagcttgtctgtatcacttcgtataggcatgtaactcgatagacacgaacaccgagtgag -aaactatattcttgcttccgtgtgtgtgacaccaggtaattgatgcggatataagctgga -gatcactcacgcccacacaaggcgctgctacctctttattccaatgtgtaagaatttgct -aacttcatttctagaccgcagctttgcggtcataatttcacggtacggacccttgggtta -gagacttgataacacacttcgcagtttccaccgcgcacatgttttagtggcttctaacat -agaatttttgttgtgacataaagagtgcgtgggagacttgcccgaccgttaagccataat -caattgaaagccccgtgagtcacatctaattggttgtactgcgcatttagctatccttta -gctgactcgaagagattcgattcctaatataggttaattagatggctgccgcgcgaagta -aaacgtgaaaaacgtagtgcgcagatctgcataactcgcgcttaattacttatgagtagt -tccaagttcgctacgttatgagagagattggaattaagcaaatatgttttatggtgattt -tgggatgagaaggactgctaagtacggctactaaacaaatttctaaaaccgccatctacc -ttatcttggagacatttaagttgtatatgtcactagtctagcttttgtctgtgggacgcg -ttctcggaatgagggaaatgcaagagccgattcatcaaatgcttatctaagaaagtagtg -gactattacaccaagcacgaatgccagggaactgctttcttgctcaggacctcgcgacaa -ggtaccccgcataagtcctagaattacatttggtcagcaatgctgacatttgaccgtgaa -aacataattttaatcagaaggcagctcacccgcttgctctagatcttatctttgtatgaa -tgtcagaatttactgcaatatccgttccgaatagtgagggcttagtatagttctctgtat -acaggtcacatcaaactccccctgtcctagtacagctctgagctttaattaattgcatac -atttccttcaatcatcagatgaaaacaccgcgaatcatgctcttctcgtatagggcaaga -gaagcaacaaacaactagcccgactcacgttcatccgccgtatccttgttcagttcttac -tccgtattaggtcagcgaaatctaatcagaataatcggtcgcgtatcaaaattaaaatcc -cgcttgaggttgacaattaaaacgctgagcagttatcggctattagatagtggggtgaaa -gtaattggctggaattatgttaaaacgtgatattaagctaaaatacgctacttgttgccg -acctaattcagtcattcgatattcagttagagccaagaataacaagcttgtataaattga -acggggtgcactaaacgatgtgttactctaatattcagcttggagtatacctgaaggcga -attcatgtatcggccaataataagacgttgaagatcacaatttggactagcaaaagaagg -tgatttatgcgtggggattgagtccactgtacgagtacggtctctggaaaattataggtt -cagggaatataaggaagtaaagataattaccaagagatttttggtatcgctatgacccag -aggtgttctaacgtctgttttgatccgcagaatttctgcctcaatgcatatttgacggac -ttgaactagagcctctaaagttaaatggcgacgcaactgttcctaaacttcaattattac -tactctttttttcctagggtattgtagaggccagtggacaaaataaatcaaatttaagat -gtttcggacattaacatcccccgtagcatagaaatcatcagttatccaatctctcatcga -gcttttacaatttctgctggcgctatggacagcatatgccgcgagacctccgcaagactc -acttgatcactgtaagtatcttcattagaggttagagcctatagttaagctgctgaccta -gtaaaattggtattttctaattttattgctcaagttaaaggttagtgaagggataatgac -gttatttttgaacaatgggttgtattcaattttatatcacgaatggaacccttcattccc -ggcataatactagacgacacgaacaagctccgatctatcagccaggcacgtgttaaggtt -taattccggcaaaccaatgaagcatcaaaaggtgacctgatgcaacttagggtcacgatg -agtttttcaggactacttattacctattaataagttaacatgagccttcataccccgtaa -gacaatacatactccaccaattagaattctgagccatcttatctttttgtatcatcgaag -ggtatggccgaataggttaattagttactcctaacgtctctacaggcatgcatttgacgc -accttcgaaaatagtcaatctctcgccacacgcgtctagtatgcagcatcaaaaatatag -tccacggtttccggattaccaaacgcggcaaagagaaacattgtatcgacggagataact -taatacagaaggaaggggcatcttcgaatacggatgaataattctatctgtttattctga -catcttgttttcaggttaatcttacgcattcaaatgacgcctgccccatgcgtgcgcaat -tattttctaatattgacgagagcaatctcactccttttgggtctatttatgttttattga -ggcacaagcctatacagaacaggtactattaaggccgtgagtgtgagactcaaaccgtgg -aaacaaaggatgggttgttcttggtacaagttttagtgcatgtgggcaatccttaccaaa -atcagatgctatccttaactttgggctgcatttaagatggcggttggaggcctgtgagaa -tcctgcgtgtcatctttaatgaccgaattcatccatgtagattcagatcacacactcatt -ccttgatgttgtctaaacaaaagttgttgtggacgcattggagggagttaagtaacaact -tgggatcgcatacttataaaaattatatgttaaactttcacaaacgctgaagtccaaagt -aactagcccaaacgcctcgagagtcactaggtattaatggtgtttgagttcctgtgaaat -agtgttcgaaggtaaaatttatgtaccaaatcgaaagaacacttaataaggcttgcttgc -acggaggtatgatgtttactgactctacaaccctaattttccagtacgtacattcattcc -aataggttagttctcaaagtgctatacaggctcctcaattgatgatatgcttcagccgct -ctatggatattagctcattttatttaggaagcccgcttagaggcttactatgagggaaat -gccaaaatgtcatacttttcggtgtgtcccatatgacaccgctttacatagaatttgaat -taaaacgcgctctcccgttcactaccatacttggtaccgtgcgcatattacatatagata -taggatcattttttaaagctgtactaggtttgatcgacaatcttatgctatactatatga -tgtaaccctcataatcaataccgatcgtacgatcctagcataggtggcaagcgattttat -gccgattattgtgttaaatagtctgtgagtgtgattatcagggctacgttggtagagggg -ttgtatagacctcgcacacattgtgacatacttaacaatatacgaaaactgatataataa -atccccttacccaaacaccaatcccgttgaatcaactaccataacgtctcccatataaat -tgcctacttgtttgcataaatctgaatacataacaccattgcaccttcttgtgttccaat -cccgttaagattgccttgtcagatgatatgcaagaacaatagcatttgctagcaattatt -aacagctcttcgaattgcctccacataacgcgggagggtatattttaatttggcaaatac -taagtactgttggcgtcatatgctattaacggttggatattaagttatgtcagccgtaag -caagagtgggcgaaatattttgttacccagtgagagcactcttagagtttggatacaata -ggccatatgttgacttaagaggacgtaactacgccgtacaccattgttcaaccgacttct -tggcaaatagaatcgtattagcaatcttaagaatagagacacgttcgtgttagggtatac -tacaaatccgaaaatcttaagaggatcacctaaactgaaatttatacatatttcaacgtg -gatagatttaacataattcagccacctccaacctgggagtaattttcagtagatttacta -gatgattagtggcccaacgcacttgactatataagatctggggatcctaacctgacctat -gagacaaaattggaaacgttaacagcccttatgtgtacaaagaaaagtaagttgttgctg -ttcaacagatgatagtcatgacgcgtaacttcactatagtaaattgaaacaaatacgcaa -tttagacagaatggtacggtcatgaatgacagtaattcgaagtgctagaccaacttaaaa -taggtaaacgtgcccgaaaccccccttaacagaaagctgctatcatggtgcagtatcgac -gtgttcagaaacttgtaacttttgagcaggtccgagcacatggaagtatatcacgtgttt -ctgaaccggcttatccctaagatatatccgtcgcaaactttcgatttagtcccacgtaga -gcccaagcgttgtgcgactccacgtgcatgcccagaaatacgagtttaaatttggttaca -tggttaattttgaccgaagcatcgcactttatgattgataattggattcaatatgtcgcc -ctatgcgaatgcaacatgatccacaatttggctataagacgtttaatccgtatcacactt -tgtttgcggctagtatagtaacgcccgtgcaccaagagtcagtaacaattataagtactc -cgcaggtacttcaaatataaaaactaatcaaacacgacccatatgatcatctgaagatat -ttggaactttctcgacaaccaccctcgtactcaatacttacactaatcgacaggcacacg -caacgtgtacagtcgcaccatattgagtcaagatttgcttagtggcgatgagcgtacacg -cttatttctctagtcacaattagttatctacgagacatcacgagggagcaaataagcgat -gttatggctacacataggcacgtatgaatatgatataagccagttaaacagtcgaaccat -cgagcaaattctcatgcaccaacccacacgttgaggcacaaagagtaagctgtttgaatg -taacttcttctgctgagcgggccccaacgtaaggatcaactagaagagaaaactcggtat -tagtttaaatgcgtcacggagcatgagtgcatttcactaagaatgtctgtgtaaccaata -taacatctatttgttatctgattgcctacttatggctttgcggtcgtggcgactaatgtc -tccaatccttttgaggtcggtaccaactccctttaaattacgctgtgcaggctcatgcac -tgcatacatatacggtagcaggtagggacctcacgcacccttattataatcaatagtagt -tatcagtcaacgaggcaggaatgctgaggtcgaggtgttggtatattttctatgtgccgt -ctaggcgactatcacgcattaccaggcgagatttaagccaattttgaatatagtcaacgt -aatttttactatgggttccaccgaaacgccttgcacaactaagaatcccataaaatatcg -atatcaaataaaagattgtgtcaataccttcatatatattttttcggttgactaacgtga -actaaggttaggggttttgtatgtctatataggaaacagtttcttttctgtcctacttta -gtaaagtcttcaagccttactccaaaatcacggtgattaagccgttactcagcagcatga -ttctgcctgctcgggtcctaaaatccagccttgtaagagtcgctgtgtattagctaggga -gacctttgttaaaaaggatatatcgcggcgggatgtgagtgcgtggcgcatactcaatct -tcagctcgtgtcattataatatctctcccccacgcttttcactagatatgccgtgtaagc -aaacaccttatgcttaatttcgaaaatattggtacttgaaaaaagctgtaggggtactta -atgtctggtaggagatcaggagagaattgagtgtaaaaccgtaaagccctcacctgactt -catgtaaatggcttagaagactccatgatttaataaatactacgaaggaaagactggatc -taaagataactctagtaaggccaactcccttcaatgctgttgccagttataatccaagag -ctgtccttttctgaaccatagcggcttctgaagcgaactagaagcaaagttggttctagc -cagacagccacataccctgtacgggtgtattactaaaactggtccggtattagttcacca -agggaggaattaggcaaaggatctaggtatgcaagtcggagtattacatccctaccctga -atccatcaataggttcctctgtactggccttcgcaatgagtattcaaggttgtacagccg -tataataataagatagtgactatgaacgggaagtaacccgctcaccttccccaaaacatt -gttatatctaagtattaaagtctgccgtagtgttaatactcgaaaataaacaactggcaa -attacaccgcacttaagccgcttttgatttatatttttccaatgcgcttttaaaaataat -tcagtcctacatactaattaagacccttaaacggagatatcacaagttaagttttaacca -tctcgactaggtggaactatagatacccaactcaatttatcattacctgtaatgttccta -gaaggattgcatttcatgtcaagacggtggagtttcacagcgaaacttcagtgtgaacag -attctgagaaatcacctaaacctattagtcagagcacccggttagaaccagttgtcaaaa -aatagagcggttgcatgagacagaagtaacgatgagatccgttgtaacgttgagacatct -ggcctatcgtcaatacagtcctcccttaaaaatatttttaaatactaggcaaacccaaca -taggttagtcctatgtgatacgccacatggtatatcattttgtaacgttacctagggata -atcaggaagtggaattacgcaaaagtagacagtgaaatgcttagggttatagtctagtcc -aaagataaaggataaagcacgtcagagaactatattagccgaatgggaatcattgttagg -agactgtggatcatgtctaaaaagcaacgcagaaacagtcatcgaaaaaatctcgttttt -gtttgaatctaaaagagctttgatgaccgatagtacctgtatactagttactgtattacg -tgtctaatgatttcggattggggtccccagaatcagacgtcattgtagacgattcaagtt -taccaatttaatttcccagctctccttggagaactatcgccaataattgcagtcactttc -cttttctgaaacgataaagccgtcagagttctctgcaacgttggacttacctgaggttct -aacccactttcggttctaatagtagttaacgacacaacgaataacctttactgtggggct -ttcacgatattttttcgcttattattaatggttacgtcataagctggtgtccaaattaag -gttaccggcttcgcagagtagttgtatccaagtataacttccctaatcataagatcgagg -tagaaaattaatgctgtctctaaccgaacagatatgtcccactatgtggtatggacgttg -ctaattacttctgaagggaaattggtcattatggatacgtgtctaccatcaggtcggacg -cagatatggttctgtcttcagttgatccaccgttctttataggataataactgacgatta -aagattatggtaaatagattaagccaattctcttcttgtcagtgaagcatccttaactga -cttgctctgcagcccctcatacatttagctattcaaagtaccggctcgtttcaaactctc -ccacctttggaagaggttgtcaacttgataagtatatcatttacagcattttttcggacg -tacctctaatgtttcattgcagaaaattagttttttctatcgcacattttgcaagtaacg -ttagagacacaattatctgcgaatgaactgctagatctgacgaccgggagcctcgcaaat -atcaaaaaagactgacatatatcaaggagtcgttgacaagtgctggtaagtcaattggtt -tatctgtcccggcgtttcgatcttaagctgaccatgcacggcagagtaatgtcactctcg -ttcttacaagtctgtctccaagggtcggcaaaaaagacccctccattctcgagcccactc -acgatatgtagggacgacaacttgtgcggcttatgaattgtctggactgcgggcgagggt -ccatatctccgaagttagaagggacatacctttagatgataagatcaattcttattgacg -aaattcatccacaacggggaacaacttcaccctagacttacgtctgaaaagacacctagc -gtcttataaaaggtcagtgccccgtttcgtaaggctggaattacctacgcaaacttaaac -ctcgcgcccttccttacgtatcgacaagatagaggctatcgcgaatgtactacggaggca -tgaatcatatactagaaccaagtgcctgtgatattaacaagatgatccgacgcgagcacc -gtaattctaggcataaaactccagcaatttgggggccgaaaacaaatgacgttagctaat -taattatatgacatgatcaaaggaggtcaatcacgcatcgagttcgacgtatattcattg -aacttcgtgcgtttgaaagaaacttttatgaaggcaaaattgatcctgtctcctatttca -tgcgtacctcctagttgataattccccgagcagtggttaggacacttttgtcggtatcaa -gttccggtctcaaaacgtaaaattctgtaatctgtatggatggtctgtgaattagttaat -ttttatgaagtcgtcgagacgcagttcctattgatttattctaaacggagatgtgcttcg -tgggactcggaagtagatctgtgtttatgattattgctactttagatgctgactgttaac -tccgtgttgtttttcaaccgtatatcacaaccgaattggatagaacctatagtttcaagt -tctgccacaaggtatcatatttacagttagtgctggttgcttctttcaaacgtggtgagt -ttgtgctatcacgtcaacggtagagctcagtggaccgagtgcgcgttcaaccctgttcca -gagagggtgtgatagcacatataccacgctcgtcgaggcgttcatgatagtttgcaagag -ccggtgttaaacacatattattattgttatccaactaatcggacctatgcataaagcatt -gtctaaacagaataattgcctatatacggtagttttagtgatttatatcttagtatcagt -tagagcttcgaactcttcaggttcctcatatttaacgttcttcgaaagcgaaaacttcta -caaacgaatgtaagcggttttccaagtagtacctataaatcacagaaagatctgtctcag -tatagttgaaatggtattcagctagtgacgtgtaccaattatcatagttcactcaagcaa -gacgctcattaacgaatatagacaagacactatatcatataataaaaaagaacatggtgc -tcgaacatagttgaattcaccatattgaaggggaatgctgacatgtaattcgctactaga -cgatcaattccctacttgtcaaagttgaactggtacgttcttggaattaaatatgattgc -gctggaccaaattgcgacttcttgagtttcagggcaaacgattgagccggaggatgtccg -tctcttacctttcttgcttatgataaacgacggtccctgtacatcactgggaattctcag -caaaaataattgggtaaatcgagactcgatgtattcggccacaaaggtgttagacgttaa -agattattcaacggggcgataataggatcataaccggtatgcaagcgcattgaaagagcc -atgagatccttatccgataaacgctgcacggtatgtgcagccttattgtcgatcacgaat -ttataaatgtagtctgggctgtaagttgaagacctaagttataatgaagtgcaataccaa -atcgattcatagtggattatcagactcaagatatctcctgataaattacagttgttaaga -tacggataaaatgagatttaagattagcagcctctaatctgtttcaatcccgttggaatg -tggtatgcgatcaaggttaagttaaaatcaagcctgtcttcagtcttgattcttgttctg -ccatcgcatgcggtctacgtgagttaatatgtagcttacgttctagcttgtgctaatctg -agtatagattcgtagaggaatattatcaagcttccacgcctcaacgtacgtgtattggtc -acacaagacactaaaagtggaagtagcgtaaactatagtctagttgttaaatgctcagtt -cttgttatattcgatatactcttggctaatttatgtctgagtatataaaattaatgatat -taacttgcatttcacggatcccttagaaaaagattttgaccgagcgcattataaacggtt -acaccgaatcaatagaagcatacccaatagctttctttgaatttattgcctgcgcaactt -ggctgactctctagatccgaataattctatatggtcgtgacgaaactagttcattactgt -ttaaaatgccaacatgtcttttgggccgataatggctctttgcaaaattactcaatgata -cgattgatcaaagcggtagttgctagtggtagcatgtaagtctatcaaatgtctgattat -ccgaaaatcttccaaaagagtccacgtaccatatctatctcatagcgacgcgaggggaac -cttatctaactatcattccatttaccgggtgactctcgatgcaggatccgattgggataa -attgcccagaaatggctcattcctgactaagggtaaggccgttctcagcaagggaacccc -gcgaatctaggcttataccatctagattgttaactacttgcctgtagttctacagccata -ctggacagttgtttctaaatgatcgggattcatgctagcactcctctgaatgcaccgcgt -aagtttaactattacgtccgtgggcagataaggatggaggctgtatgtatcttaactgtt -acctaatatggctggtaattatcaaagtaaggaccttaatgccatagcgctagcaatcgc -tttgtatactgaccatgtgccaacctctcttaatctgtaaaatataatgtcttagctaac -tgtggacgatcatgtctctgcctagagcttcgctgtatcaattcctatagccagcgtact -agtgacacaacaacaccgtgtgagaaaagatattagtccttacgtctgtctctctacagc -ttattgatgaggattgaacatggacatatagctccccctcaaaagcagatgctacctctt -tattccattctcgaacatttgccgaacttaatttcgacaaacctgaggtcacgtcttaat -ttatcggtaacgtcacgtccctttgagactggataaatatattaccaggggccaacgagc -aattgttggaggcgcttctataatacaaggtgtcttgtcaaagaaagacggcgtgcgtct -cgtgcaactcacttaaccaatattaatgtgaaacccccctctctcacatcttatgcggtg -tactgccctggtacatttcctgtacaggactccaacagtgtagattcctaagatagctgt -tggagttgcctcacgccagatcgaaaaactgaataaactagtgagctgagctgcagaaat -accgcttaattacttatgactagttcaaagggacctacgtgatgtcagacattgcaagga -agaaattaggtttgtgcgtcattttggctggactagcactccttacttcccctactattc -aaatgtcgtaaacagcatgagacaggatcgtgctgacatttaaggtctattgggaacgag -gctacctttggtcgcgcgctcgcgttctccgaatgaccgaaatgcatgagcacagtatgc -aattgcttatagatctaaggtctggtcgttgaaaccaagcacgtaggcctgggaaatcag -ttcttcctcagcaactacacaaaagcgtccaagcattagtacttgtagtaaatgtccgaa -cctatgcgctcatttgaaagtcaaaaaatatttttaagcagtaggcacctaacccgattc -ctctacttagtagctttctttgattctcagaattgactgcaatatcactgcacaattctg -tgccattactagacttctctgtattaacgtctcatcttactaacactcgcctaggacaca -tctgagagtgaagtatttcaatacatttactgaaatcttcagttctaaaatccccgaata -aggctcttatcggtttggccaacacaagaaaaaaacttcttgcaccactcaccttcatac -gcaggagcctggggaacttagtaataactatttcggcagacaaagcttataacaagttgc -cggcgcgtataatatttaaaagaccccttgagctgctcaattaaaacgctcacctggtat -aggctattagatagtgccgtcttagtaaggggcgggaattatcggataaactgatatttt -gataaaataaccgacttgttcacgacataagtcactaaggagattttatctttctccaaa -gtatatcttccttggataatttcaaagcgctgcaatttaagttctgttactagtttatgc -tgctgggaggtgaccggaaggcgtagtaatctagaggcaaattataagaagttcatcata -tcattttcgactacaaaaacaaggtgttgtatgccggcgcattgtgtaaactggacgagt -accctagatggaaaattatacgttaagccaagatttcgatgtaatgataattacctacac -atttttgctatccataggaacaagagctgttctataggctcgtggcatacgaacatttgc -tgccgctatgaatattggaagctcttcaactacagactctattcttaattgccgtcgaaa -atgggccgaatcggctattattaatactcggtttttccgaggggattgttgtcgacagtc -gtaattattattaatattgatgttggtgaggtcatttaaatacaaccttgcagacaatga -ataagggatccaatctctcatactccttttacaattgctcatgcccctatgcaaacctta -tgccgccacacctccgcaactctctcttctgaactgtaagtagcttcattactggtttga -gactatactgaagctgatgacattctaaaatggctattttcgaatgtgattcataatgtt -tatcgtttgggatggcagaatcacgttatttttgatatagcccgggtattctattgtata -gaacgtatgctacaagtcattccccgaagaagactagaagtaaacaacatgcgaccatcg -ttaagccacgcaaggctgtagctttatttcccgataacctatcttccataaatagcggac -agcaggatactgacgctcaacatcagtggttatggtctaatttttaacttttaataaggt -aacttcagcaggcatacacagtaactctttaatttataatcaaattagaagtctgacact -tcttatatttttctatcatccaacgcgatcgcccattagcttattgtgttactaataacg -tatctaaaccaatccttttcaagctactgcctatattgtcaatatatacaaacaacagga -tagtaggctgcttaaaaaatattgtcaaccgtgtacgctttacaatacccggaaatcaca -aactttgtagacaacgagtgaaatttatacactacgaagggccagcgtacaagacccatg -aattaggcgatatgtttattctgacatattggtttatccttaatctgtcgctgtaaaatg -aagccgcccccatccctgcgaattttttttcgaagattcacgactgaaatataaatacgt -ttggctatatttatgttggagggaggcaatagcctttactgttaaccgaagatttagcca -gtgagtgtgacactaaaacactggaataaatgcaggcgttcttctgggtaaaaggtttag -tcaatctcgcctataagttcatatagctctggatataattatctggcccatgcatttatc -atggcgcttggtgccctgtgtgaagccggcctctcatattgaaggtccgaagtattccat -gtacattaagatcactctctcattcatgcatcttggcttaacaaatctggttgtccaagc -tttccaggcacgtatggtacaaattcggatcgaatacttataaaaatgatatgttaaact -gtctaaaacgctcatctacaaagtaaagtgcactaaccaatagagtctcaagaccgtgta -atgctggtgcactgaatgtgtaatacggttagaagggattagttatgttacaaatccatt -gaaaacttaagaagcattgcgtgctcggagggtgcatcttttatcaagagactaacatta -ttttcaacgacgtacatgctttacaatagggtacttatcaaacgccgagaaacgcgccta -tagtgatgttatgattatgacccgatatccattggaccgaattttatgtaggttcccagc -gtactcgcgtaatatctcggtattgccataatgtaatacttgtcggtctctcccagatga -aaaagcgttacagagtatttcaatgaaaaacagcgcgcaacgtcaatacctttaggggta -acggccgctgatttcatatagatatacgataagttggtatagctctactaggtggcatcc -acaatcgttgcatttactatagctggttacaatcataatctataccgttccttacatact -accatagcgggatagcgtttttttgccgttgattgggtttaagaggatgtcagtctcatt -atatccgattcggtgggagagccgttgttttcaaatcgcacactttgtgacataatgtac -aagataacaaaactgatataagatataaactgtcaatatcaccttgacacttgaatcaaa -gtaaattaactcgcaaatataatttgactaattgggtgcagatttctcaattaataaaaa -aatggcaccggatgggcttacaagccccttatcattcacttgtatcatgatttccaagaa -caatagaatttgctagcaagtatgaacagagattcgaattgcatccacagtacgccggag -cgtttattttaatgtggatatgacgatgtactgttggcggcatttgctagtaaccggtcc -ttatttacgtagcgcacacgtaagcatgtctgggagaaatatggtggtacaatctcagag -aaagattacagtttggtttaaataggacttatcgggtcggaagtggaacttaataagcag -tacacaattgggcaacagacgtcttgcctattacaataggattacaatgcgttagatttc -agacacgttcgtgtttggctattcgtcaattccctaaatagttagacgatcaactattat -caaagtgattctttgttcatcctccattcatgtaacagatggcacactacgcataacgcc -gaggaattttaacgagatttaagagagcagttcgggcacaacccacttgactttataaca -gctcggcagcataaacggtaatatgtgacaaatttccaaacgttataagaacgtatgtgt -acttagaaaactaagtggttcatgttcaacagatgtgacgcagcaagcctaacttatcta -ttggttttgctataaaagaacaaagttacacagaatcctaagggcttgtttcacacttat -gcctagtgcttcaccatcttaaaatagcgaaaccggcacgaatcaaaccttaaaacaatg -cgcagatattggtgatggtgactccgggtatgataatggtaactgttgaccagcgcccac -ctcatcgaagtatagaaagtggttaggataaggatgagaccgaacttatttccggccata -actttagattttctacctagtacacaacatcagggcggacacgaaaccgccatcacatca -tataccaggtttaatttgcttaatgggggaagtgtcaacgaaccttcgaactttagcagg -catatggccattatatatggccccagagcagaatgctacagcagacaaaatttggattta -tgtagtttaatacctatcaaacttggtgtgaccatacttgtctaacgacagtgcacaaag -tgtaagttacaattattactactcagcagcttctgcaatgataaaatcttatcatacacg -tcacatatgataatatctacttagggggaacgggctccacaacctacatagtactcaata -cttacactattcgacaggcacaccaaacctgtacagtcccaaaagattgagtcaactttg -cagtactgcagatcacagtaatagcttagttagcgagtcaaaattagttttctacgagac -tgcacgaccgtgcaaatttccgatgtgttggctacaaatagcaacgtatgaatttgtttg -aagccacgtaaactgtacaaccttagagataagtctcaggctactaaaaacacgttgtgg -cactaacaggatcatggttgattcttacttattcggctgaccggcccaataagtaacctt -caactagaacagaataatcgggagtagtttaattcagtcaaggtgcaggtctcattgtaa -ctaacaagctctgtgtaaccaagttaaaatcgttttcttagcggattccctacttatgga -tttgagctcgtccacaatattcgatacaagaagtttgtggtccgtaacaacgaaatttta -attacgctgtgcagcctcatccaaggaattaatagaaggttgatggtaggctccgaacgc -tccatgattataatcaagtggactgtgcagtaaacgaggaaggtatcctgacgtcgtggt -gttcgtttttgttatttgtgccctatacgagtagataaaccatgaacagcacagtgtgaa -cccatggttgattttaggctaccttatttttaatttccgttacacagaaacgaattccac -aactaacatgccattaatttttcgatatcttataaaagatggtcgaaattcattcattta -ttttttttcggttctcgaaagtcaactaagctgtcgcgttttgtttctctttagaggtaa -aagtggctttgatctcctacgtttggatactagtcaaccattactccatttgatccgtga -gtatcacctgtctaacatccagcattatgactcctcggcgaagaaaagacacacttctta -gagtcgatgtgtattagctagggacacagttgtttaatacgatagtgagcccagggaggg -cagtgcgtcccccagtagatttattcagctagtgtaagtataagatatctcacccacgag -gttcaagtgatatgcagtcttagaataatacttatcctgaatttcgatattatgggtact -tcaataatccgctagcgctactttatgtctcgttggacagcaggacacatggcagtctta -aacactaaagacatcacctgaatgaatgtaatgggattacaagaatcaatgaggtattat -atacgacgtaggaaactctggatatatacagtaatctagttacgccatcgcacttcattc -ctctggaaacttagaagacatcagctgtacgtggaggaaccagacccccgtatgtagcca -aatagaaccaaagttgcttatacaaacacacccaatgacaatggaccgctggagttcgta -aactcggaacgtagtactgcacaaacccagcatttagcaataggagctacgtatgcaact -cccacgtggtaataccttcaagctatcaatatataggtgcctagctaatcgcattcgcaa -gcagtattcaagcttgtaaaccagtataataattacagaggctctatgaaacccaacttt -ccagctaaaagtcccaattaaatggttatttcgtacttttaaagtcgcccgttctgttat -tacgcgaattgattctactccaaaattaaacacaaattatcaaccgtttcatttatattt -gtcaatgcagctgtttaaaataaggctctactaaattataattaagacacttattaccag -atttctctagttaagtttgaaccagctcgactaccgcgaaagatacattcccttctctat -ttttcagttcatctatgggtcagagaagcattgaatttattctattcaccctcgtcgttc -acagcgaatcgtcagtgtgatcagtgtatgagaaatatcctaaaccgtttagtcagacca -cacgcttagaacaagtggtctaaaaagactgccctggaaggagtaagaagtatacagctg -atccggtgtatccttcagtcatctgccctatactaattacacgacgcaaggaaaaatagg -tttattttctaggcaaacccttcataggtgactccgatgtgttacgaatcatgcttgaga -atgtgctatcgttaccgacggataataacgatctccaatgaaccaaatgtagaatgtcta -ttgattacccttttactattcgacttagagataggagatagaacctcagtgtactttttt -agccgaatgggaatctttgggaggtgaatggccataaggtcgtaaatccaaccctcttaa -agtcttccatattatatcgttgttcgtggaatcgataacagatttgttgacccatagtaa -atgtatactagtttatgttgtaagtgtagattgttttccgattgccgtccaaactttatg -tcgtaattgtagaccagtaaagttgaccaaggtaagtgcccagcgatcctgcgagatcga -tcgccaatttttccagtcactgtaagtgtaggtttagataaagccgtatgagttatatca -taagggcctcggaaagcagcttcgaaccaaagttcccttataatagtagtttaactataa -aagtatatactggtctgtcgccctttcacgatttgttttaccggtttatgaagcgttacg -tcattagagcggctccaatttaaggttaacggcttccatgtgtagttgtatacaaggata -acttaaagtatctgttcagcgagctagttaagttatcctcgatagaacacaactcagagg -tcccaagatcgggtttgcaacttgctaatttattctcaaggcaaattgggaattatcgat -acctgtataccataaggtcgctcgatgtgatgcttatgtcttctggtgatcctaccttag -ttagtgctgattaacggaacattaatgtttatcgttttgagatttagccaattctctgat -tctaactcaagatgccttatctgacgtgctatgcagcccctaagtattttacattgtaat -aggacacgctcctttaaaactcgccaaaaggtcgttgtggttctctactggttaactata -taatttacagctttgttgagctagttcctctttggtttaagtcctcaatattagttggtt -cgagcgataagttggctagttaccttagtcactatattagatccgaatgttatgcttcat -ctgaagaccgccaccctccaaaatttcttttaagactcacttattgcaaggtgtaggtga -attcggctcgtttctcaagtggtgtatctgtacacgagtttccatattttcatcaacagc -caccgcacacttatgtcactctaggtattaaaagtcgctctacaaggggacgcaattaag -aaacagacatgctagtcaaaaataaacatagcgaggcaccactaattcggccgcttatca -atgggatgctctgcgcgagacgcgccagagctcagtagttagttcggacatacatttact -tcagatgatcaattagttttctacaaatgcttactctaccccgaaaaaagtcaccagact -cttacgtctctttagtatccttccgtcttatataaggtcagtcccccgtttcggtaccct -ggaatttactaagaataatgaaacagcccccaaggacgtacgtttacaaatgatagacca -gatcgcctagcttattccgacgcatgttgcatagaattgaaccaacggaatgtgagagta -actagatgagccgaccacagcacccgtttgcgtcgcagaatacgcctgatagttcggcca -cgaaatcatatgtcctttgagtattaagtatttgtaatgatcaatcgagctcaagcaagc -ttacacttcctcggatattcagggaacttagtgcctttgaaagatacgttgatcaacgaa -aaattgataatggctcatatggaatgcctacctcatagtgctgaattaacacagcactgc -ggacctaacttttcgaggtttcaagttcacgtctcaaaacctaataggctggaatatgta -gggatcctcggtgaatttgtgattgggtttgttgtagtactgaccaagtgaatattcttt -ttttctaaaagcagatctgctgccgggcactacgaaggagatctctgtgtatcattattg -cttcttgacatgatgactcttaaatcactgtgggtgtgcaaaacgatagcacaacccaat -tcgatagtacatattgttgatacttcgcactaaaccgttcatatttaaaggttgtgctcc -ttccttcgttaaatactggtgacttggtcctatctactattagctagacctctggggaac -cacgcccccgtaaaacctgtgcaagagagggggtcatacatcttagacatcgcgcctcca -ccagggaagcattgggtgattgaccaggtgtgtaacaaatatgattattcttatactaat -attagcaaagatgcataatgatttgtattaaatgtataattgaattgataagggtctttt -agtcagtgatagagtagtataaggtagacattagaactcttaaccggacgcagatttttc -ggtcttagtaagccaattagtcgacaaaacaaggtaagagcggttactagtagtacctat -aatgcactgaatcttcggtcgaagtatagttctaatgctatgcagattgtgacggcgaca -aatgttcagacttatatcatgaaacaagctcttgtaagtattgacaaatgaaaagattga -atatttttaaatacaaaatgcgcctacttattaggggaattaaccagattgaaggccaat -cctcacatgtaatgagataatagacgataaatgaaattcttgtaatagttgaactgctac -gtgatgggtattatatatgattgagatcctccaattgccgacgtcttgtcttgatgccca -aaagattgtcaacgaggagctccctcgcgtacctgtcgtccgtatcataaacgacgcgac -atgtacagcactccgaagtataagcaataataatgcgggtaatccagactagatcttttc -ggactcaatgcggtttcacggtaaacatgattaataccggagagtagtcgagcttatcag -cgatgcaagcgaattcattgtgccaggagatacgttgcagataaaaccggcaacgtatgt -caacaagttttggcgatctcgttgtttgtattcgacgaggcgcgggaacttcaagaacta -tcgtatattcaagtccattaccttttagtttcagactggtggagctgactaaagttatat -catcattttgtacactggtttagttaacgataatttcagatttaacatgaccagacgata -atcgctgtatatccagttggaatgtggtttgccagaaaggttaacttataatcaagcctc -tcttcagtcttgattcgtcgtatcccatccattgcgctatacctcagtgtatttggagct -gtagttataccgtgtgctaagatcagtagacatgacgagagcaatattatctaccttaca -agcatcaacggacgtctagtcggaacaaaagactctaaaactcgaacttcaggttaatat -actatagttctgtattcagcagttattcttatattcgatattatcttgcctattggatgt -ctgactttagtatattaatcatagtatctgccatgtaaaggtgccagtactaaatctgtt -tcacagtgcgaattataaacggttacaaccattaaagacaacaagaccctatagctttat -ttgaattttgtcaatgcgcaacttggagctcgcgatacatcccaattagtctatagggtc -gggacgattctacggcatttctggttataatgacaacatggattgtggcccgagaatcgc -tctttcattaattaagcaatcattacagtcttataagcgctacttccgagtggtagcagg -taactcgatataaggtcgcatgagccgaatagcttaaaaaacaggccaccgaacattgat -agagaataccgaccacagcgcaacctttgattactttcattaaattgtacggctcactcg -acatcaagcttaagattgcgataatgtgaactcaaatggatcagtactgaagaaccgtaa -cccacttcgcagaaagcgtacccagagaagatacgctgttacaatatacagggtgaaatt -attgcctgttcttcgtaaccatttcgccaaacttggttagaaatgatagccattcatgat -agaaataagctgaatgataccagtatctttaactatgtagtcagggggaagataacgatg -gtccatgtatgtttctgatatgtgacagtattggccgcgtaatttgctaacgaagctact -taatgcctttgagcttcatatagatttctttaatcaaaatcggcaaaaagatagtatgag -ctataatatatgctagtagagaactctggaccatcatctatatgaatactgattcgagcg -tgcaattactttagcctgcgtactactgactctacaaaacactctgagataagtttgtag -tcagtaagtcgctctctataaaccttttggatgaccattgtacagccacttatagatccc -aataaatagcacaggagacagagtttttcaatgctcgatcatttgccgatagtattttcg -tctaacctcagggcacctattatttgatacctaacctaacggccctttcacaatggagaa -atatatgacatcgggacaaacacaaatggtgggtggccaggagatatgacatggtggcgt -ctctaagaaacacggactccctctaggcaaactcacgtaaccaattttaatgtcaaacaa -aacgctcgaaaagattttgccgtgtaatgacctggtacattgactggtcaggaatacatc -actgtagttgccgtagtgtcctgttggtgttccatcaagacacatcgtataacgcaattt -acgacggacatcagatcaagttatacagattatttaagtatcacgtgtgcattgggacat -aagggatctcacacatgccttggaacatttttgctttgtgccgctttttcgctgcactac -caatccttacttaccagtatattcaaaggtcgttaacagaatgagaaaggttagggctct -aagttatcgtcgattgggatagacgagacatttgcgagcgccctccacggatacgaatct -cccatatcaatgtgaactggatgctatgcagtttagttcttacgtctcctagtggtaaaa -atcaaagtagcactcgcatagcagttattcagaacctaatacacaaaaccgtcaaacatt -ttctaattctaggtatgggccgatcataggagctaaggtgaaactcataaatgttttgtt -agatctagcatcctaaaaagatgcatatactgagtagctggcgtgcattctctcaattgt -atcctttttaactgaactagtcggtcccatttcgtgactgagatctattaaccgataaga -ttaataacactcgcattcgtatcagctcagagtgaagtttttcaataatttgactgatat -attaacttctaaaataaccctttaagcctcggatccgtttcccaatcacatcaaaaattc -ttattccaactatctacggattaacaacgtgcatggggatcgtagtaagaacttgttccg -atcactttgagtatatcaagttgacggcccggttattattgaatagaaacattcacctgc -taaattaaataccgcacatcggatacccgatttcagagggccgtcttactaagggcaggc -tttgttcggtttaactgagatgttcattattttacagtatgcttcaactaatatgtaacg -aaggacagtggatctgtctccatagtagatcttcagtcgtgaatttcataccgctcctat -ttaagttcgcgttcgagttgttgatcatggcacgtgaaagcaacccctagtattctagac -gaaaattttttctagttcatctgataatttgccaattcaaaaacaaccgctggtttcccg -gcgcattctctaaaatggaagtcgaacctagagccattatttgtcggtaacccatgagtt -ccttcttttcagaagttaatacactgtggtcctatacagaggaaaaacagcggttatata -cgatcgtggcataacaacattggatcaagatagcaatttggctacctattctaattctca -ctagattcggtattccactacaatatcggcagattaggattggatgaataatcggtgttt -aagtccggttgcgtctccaatctcctaatttttattaatattgatcttggtgacctattg -taaataaaaacttcaagactttgaataacggtgaaaagatagaagactcatttgaaaatg -gatcatccacagatccaaacattagcaagacactaatccccaactagctattctgatcgc -gatcgtgctgcagtactcctgtcacaatagtctgttcatgatctaattctttttgggctt -tgttcgatggtgattcagaatctttatccggtcgcttccctgtagctactttgtggggat -attgcccggggattatagggttgagatcgtttcctaaaagtatttaaaccaagtagactt -caactaaactacatcagaacatcgtgaagacaccatacgcggtacctttatttaccgata -acatttcttcaagaaataccggtaagcagcataatgaccctaaacagctcggggtatcgt -cgtagttttaaattttatttaggttactgctcaaggaataaaaactaactatttaattta -taataatattacaaggctcacactgattagatttgtctataagacttcgcgatcccccat -taccggattgtcttaagaataaactagataaaccatgcattttctagataaggcctttag -tctaattagatacaaaaaacacgatagttgcatccttaatttattgtgtcaaacctggaa -ccttttaattacccgcaaatcactttatgtcgagactacctctgaaatttattatctacc -taccgcatgaggacttgaaccatcttgtaggagttatgtttattagctaagattcgttta -tcctgtagcggtccatgtatattcaacaagcaaaaagcactcagaattgtttttagttga -gtcaagactgatatataaataagtttccctagttttttcgtggtgggacgatattgaatt -gaatcttaaccgaagagtttcccactctgtcgcacaataatacacgccaatatttccagc -cctgcttatgccttaatcggttactcaatctcccattgaagttcattttgatctgcatag -aagtttcgggcccagccttttttctgccaccttcctccaagctctgtagacgcactctaa -gattgatgctcacatgtattaattctacattaacataaatatataagtcatgcatcttcg -agtaaaatatctggttctccaacatgtcctggcacgtatcgttataatgcccatacatgt -agtattaaaatgattgggttaactggatattaagatcatcgaaattgtaaagtcaaatta -acaatactgtctcaagaccgtgtattcctcgtgctcggaagggctattacgcttacttcc -gttttggtatcttaatatgactttcaaaaattaagttgcagtgagtcctacctgcgtgca -tcggttagcaagagtataaaagttgtttaaacgaactacttgctttacaataccggtcgt -atatatcgccgtgaatccagaagattgtcttctttggattatcaaccgagatcctgtgga -ccgatgttttgggaccttcacagaggactccaggtagagctcgcttttgcattaatctaa -gaattgtacctctctaaaagatctaaaacagtgaatgtgtatttcatggaaaaacacaga -gaaacgtaaattactttaggccgaaaggcacatgagttattatacatatacgagatggtg -gtatacatcgaattcggggcatacactatagttgcattgtatttagctgctttaaataat -atgatattaccttccttacataagacattaccggcataccctggttttcaacttgtgggg -ctttttgacgatcgcactctcatttgatccgagtagggcggtgacccctgcttttcaaat -acaaaaatttcgctatgaaggtaatagattacttttcgctgttatgatagaaacggtaaa -tttaaaattgaaacttctagaaaagtaaagtaacgagaaatgattttgtgaataatgcgg -tcatgattgcgcaagtaagaaaaaaaggcaaaaggatgcgcggaatagaaacttatcagt -cacgggtatcttgatttcattcttcttgtcaattgccgacataggatgaaatcagattcc -aatgcaatacacagtaacccccacccttgattgtaatgtcgatttgaagttgtacgcgtc -gacgaagtggatagtatacgggccttttgtacggtgcgatcaactatgaatctcggcgag -ttagatggtcgtacaatctcacacatagaggtcacttgcctgtaatgacgaattttcggc -taggtactcgaactttattagaagtaaaaatgtgggcaaaagaaggattccattttacaa -gacgattacaatgagttacatgtctctcaacgtagtctttccctagtagtctttgaacta -tttaggtactccagaaaattttagcaaagggtttctgtgtgaatccgccattcatgttta -tgatggaacaataagaataacgccctcgtatgttatcgacagtgaagtcagcagttcggc -caaaaacatattcaatttagtacagatccccagaagttaagctaagtgctctaaaatggc -ctaaacggttatcaaagtaggtctaattactatactaacgggtgcatcgtaataactgct -gtcgatgcaacactatatgatagtgtcgttttgctatatatgtacaatgtgacaaagaag -ccttagcgattcttgcaaacttaggacttcggattctcaatcttaaatgtccgaaaacgc -aaagattcaaaaatttaatctatgagcagatatgcctgatggtgactacgcgtatgttaa -ggctaaatgttgacaaccgcacacataatcgaactattgatagtcgggagcataaccagg -tgaacgtactttgttcacgacatttattgacatgttctaaatacgtctcaaaatcacggc -gcactagaaaacgcaatcaaatcattgtcctggtttaagggccgtaatgccggtagtgtc -aaacttcatgagaactttagctggcttttggccagtatttagggaccaagagcactagcc -ttaagctgaatattttgccatttatctactgttataactttaaaacttggtggcaccaga -cttgtcgatacacacgcatcaatctgtaacgtaaaaggtttactaagaacaagcgtagga -attgagtttatattatatttaaactaaaagatgatattagcttctgagggcgatagggct -ccaaatcataaagaggaatatattattacacgattagaaacccacaacatacctcgaatc -gcccaaaagtttgacgaaacttggcagtactccacatctcagtaatacagttgggagagt -ctcaaatgttgttttattactcaatgaaccaccctcataatttcactgctgttccattaa -atttgcaaacgatcatttgctttgaagaaacgtaaaatcgacaaaattacagataagtag -atgcataataaaaaaaactgctcgctataacacgatcatcgtgcattcttacttaggagc -atcacccgcacaataacgtaccttaaactacaacactattagaccgagtactgtaattca -cgaaagctcaagctcgcattgtaaagaacttgctctctcgtaaaatgtgataatagtttg -cggagaggattcaattattttccattgcacctactccactagattcgataaaagaaggtg -gtcctcccttaaaaagaaatgttaagtaacatcggaaccataagcaaagcatgtaagtga -accgtcatccttccctaagaaacataaaggtttttaataatgtcgactgtgaactataac -tgcatcctttcctgacctactccggttccttgttgttatttctgaacgagaccagtagat -aaacaatgtaaaccacagtgggtaccaatggtgcatgtgacgctaccgttgttttaagtg -cccgtacaaacataagaagtcataatcttacttgaaattaattttgccttttattttttt -tcaggctcgaaattaatgatttgttttttttgaccttctagttacgctaatatgcggtcg -cctgtggtttctattgagtcctataacgggatgggatctaatacgtttggttactagtaa -acaaggtataaatttgataccggagtatcaactgtataacatcaagctttatgactcata -cgcgaagtaatgacacaaggctttcaggagatcgcgagtacagagccactaaggggtgta -ttacgatagtgacaccaccgagcgcactcactccccaagtagatttatgatcctacgcta -agtattagatatataaccaaagaggttctagtcagtgcaactcttagaataataattagc -cggttttgcctttttaggcctaatgcaatattcagctagcccttatgtatctcgcgttcc -acagcaccactcatggcacgcgtttaaactaatcaaatataatctatgaatgttatgcca -gtacttgaataaatcaggttttttataagtccttgcatactctcgttatatactgttaga -gtcttaccccatagaaattctttcatctgcaaacttagaagaattctcagctacggggag -cataaagtccccaggatgttgacaaatacaacaaatgtggcttatacaaacactccatat -gaaaatcgaaccctcgtggtagttttagccgaaccttgtacggataaatccctccatttt -ccaatagcagatacctatcctactacctcgtggtattaaattaaagcttgaaatatagag -ctgcatagcttatccaattcccaagcacgagtctaccgtcgtaaccacgatttgatttac -agacgctagagcaaacccatctttaaacatataagtaaaaattaaagggtgagtgcgtac -gtgtttactagcaacttcgcttattaagacaattgtttataagccataattaaaaacata -tgttcaacaggttcattgatatttgtaattgcacaggtttttaataaggatctacgtaag -tataatgaacaaactttttaccagagttatattctgtactttgaaaatgctcctctaccg -ccttagagactttcaattagattttttgcagttaatctatgcgtaagtgaaccatgcaag -ggatgcgattcaaccgcctcgtgctaaccctatcgtctgtctcataactgtaggtctaat -ataattttcagttttcgaacacataaccctttgaaaatctgctatttaatgtctcacctg -catgcactatcttctatactgctcagaacggctatacgtcactatgctccaagtgacgat -ttaaacgaagcaaggaataataggtttattttagtgcaaaacaattaagtgcggactacg -tgctctttacaataagccttgtgattgggctataggttaagtcccatattaacgatctcc -aatgtacaaaatcgacaatcgctttgcattacccggttactagtcgaattacagatagct -gttagatactcactctaattttggacaacaatcccaatcttggggtcgtctatcgcctga -agctcgtaaatccttccatcttaaacgattacatattatagacttgttcggggtagagat -atcacagttgtgcaaacattgtaaatcgatactagtttatgttggtagtctagttgcttt -taccattccccgaaaaacttgatctactatttcgacaacagtaaacttgaactaggtaag -tgaaaacagagaatgcctcatagtgccactatttgtccactatatgtaagtgtagcttta -cataatccactatgactgagatcattacggcctaggaaagcagcgtagaaaaaaagggcc -cggatattacgactgtaactataaaactagttactggtagcgcgccatgtatagatttgt -tttaccggttgtggttgcgttaacgaatttcagccgcgaaaattgatccgttaaccagtc -catctcgacttctataaaacgataaagtaaagttgatgttcagcctccttcttatggttg -catcgagagtacactactcagtgggaaatagatcggggttcctacttcagattgtattat -ctaggcaattgccgattgtgccatacctggataaaataagctacctacatgtgatgctta -tctattatcgtcatactaccttagggtgtcctgttgaacgctacattaatctttagccgt -ttgagatgttccaatggataggagtctaacgcatgatgaagtttaggaaggcagagcatc -ccactaagtatgtgacagtgtatttcgaaacgagacgttataaatagaaaaaaggtcctt -ctggttctattctgctgaactattgaatggaaagattggttgacctacgtactatttgct -tgaagtcatcaatttgacggggtgagagacatatggtgcatactttacggactctatatt -ttagatcagaagcttagcagtcttctctacaccccctcacgacataattgcttttaagaa -tctatgtttgattcctctacgggaattcggatccgttcgcatgtgcggtttatctaaacc -aggggacatatgttcagctaaagcatacgaacactttgctaactagacgtatgtatagta -gctataaatcccgacgatatttacaaaaagaaatgagactcaaatatatacatagcgacc -ctacacttattcgcaccctgatctaggcgatcctagcacccacacccgaaagtgagcact -agtgtcttccgtattaaatttactgcagttgagattttagttgtctactaaggattactc -taacccgtaataaggatcaagactcggtactagctttactatcattccctatgtgttttc -ctaactcacaagggtacgtaccagcctatgtaattacaataatgataaagacacaaagga -agtaactttacaaatgagtctccagttacactagcttagtccctcccatcttgctttgaa -gtctaaatacgcaatctctgaggatatacagcagaagaacactcataacgttggagtcca -agaattagactcatagggcccccaacatttaatatgtactgtgagtttgaaggtgttcta -ttgttaattcctgctcttgatacatgacacgtactccgtgtttaaggcttcggactgact -ttctttcataagttgagcaacgaaaatttcagaatcgataagttggattcactaactaat -acggctgattgaaaactccactccggacctatatggtcgacctttatacgtaaccgatat -aaaacttataggctggtatatcgagccttcctagcgcaatttcggatggggtttcttcta -ctactcaacaacggaatagtctttgtttagtaaaccagagctcaggacgcccaatacgta -ggagagcgctgtggagcatgtgtcattatggactggagcactcttaaatcactctgcgtg -tgctaaacgatagatcataacatgtcctgagtaaattttcttgatacgtcgcaatatacc -gttattagttaaacgttctcatccgtcatgcgtgaaatacggctgtcgtgctcagatata -ctattagcgactcatctcgcctaacacgcacacgtataaactcggaatgactgccgctct -tacatattagaaatacagactacaccacggaagcattgggtcattctcaaccgctgtata -aaagatgattagtcttataataagattaccaaagaggcagaatcatgggtagtaaatcta -ttattcaagtgattaccgtcgtgtaggcagggagtgaggacgagatggtactcaggacaa -atattaaccggacgaagtggtttacgtcgtactttcactattagtagtaaatacaaggta -acaccggggaatagtactaaatataatgatatctatcttcgggagaacgagtcgtctatt -gctttgaacattctcaaggcgtaaaatgtgctgacttatagcatgatacaaccgattgtt -acttttgtctattcaaaagattgaatagttttttatacaaaagccgcatacttatgacgg -ctagtatacagtttcatcccctagcatcaatgctatggacagtattgaacttataggaaa -ttcttctaatagggcaaatccgtcgtgatgcctattttttttcagtcacatcctcaaatg -gcactagtattgtcgggatcccattaacaggctcaaccacgagctcacgcgaggacatgt -agtccgtatctttaacgaagcgacagcgacagaactcccatggataaccaattataaggc -ccgtaatcctctagacatcgtttaccaataaatccgctttctccgtaatcatgttgaata -ccccagagtagtccagatgataaccgatgaaacacaagtctttctcaatgcacttacggt -gaacttattaccgccaacgtagctcatcaaggttgcgacatctagttgtgtgtttgcgac -gagcccagcgaacttcatcaactttcgtatattcaacgccttgtaattttactttaagac -gcctggtgatgtagattcttagataatcagtttgttatcggctgtactttaccataattt -cacaggtttcaggtcaagaagattatagctgtatatacagttccatgctcggtgcacaga -aacgtgatcggataataatcaatcgcttatgtcgtctttaggcgtatccaatacatgccc -cgataccgcagtgtatttcgacatgtaggtataccgtcgcatttgagctcgagtcaggac -gtcagctagattagattccttaatagaatataccgacctctagtccgaactaaactatag -ataacgccaacttcaggttaattgtctagtcgtctgtttgcagatgggattcttagatga -gtgagtatcggccatattggttcgagcactttagtttttgatgcataggatatgcaatgt -atagctgaaagtactttatctgtttcaaactcacattgattaaaccggtaaacctttaaa -gactacaagaaaatattcagtgagggcaattttgtcaatcacaatcttccagctagagat -acttcacaatttgtcttgaggctacgcaacattagacggattttcgcgttttattgaaat -aatcgaggggcccaagagtatccatagttcattttgtaagatttctttacaggcttatta -cagcttcttcagactcctacatgcttacgagttatatgctagcatgtgaacaatagatta -atatacaggaaaacgtacattgagagagatgaccctacacagcgcaaccgttgagtactt -tcattaaagggtaacgctctcgagacagcatccttaagatggccttattgtcaaatcatt -tgcagaagtacgcaagatccctaaccaacgtagaagaatccctacaaacacatgagacgc -ggtgaaaatagacagggtgttagtattcaatcttcggagtatcaatttcgccaatcttgg -tgagaaagcataccctttcttcagagaaagaagatcaatcataacactatctttaacgag -gtacgcacgcgcatcattacctgcctccatggatctttaggatagcggaaagtattggca -gcgtattgtgatttcgttcctactttatcaatttcacattcatatacatgtcttttatca -aaatcgccaataagataggatgagctatattagatgctagtagagttcgcgccaacatca -tcgataggaatactcaggacagcgtgataggacttttcaatccctaatactctctataat -tataactctctcttaagtttggaggcagtaacgcgctctatataatcagtttgctgcacc -attcttcagcctctgatacatacaaataaattccacagcagtaagagggtttaattgaga -catcttgggaacttaggattttactctaacatcaccgaaacgattattggataccgtacc -taaacgaactttctcaaggcagtaatataggacatccgcaataacacaaatgctgcctcc -ccaggagttatgtcttcctggaggctatatcttacacccactcactataggcaaactaaa -gtttaaatgttgattgtctaaaaaaaagatagataagagttggccggcgtagcacatgcg -aaagtgaatcgtaagctataattctctggacttgaagttctgtcctgttcctctgcaaga -aacaaacttcctttaaagctatttacgacgcacatctcagcaagttataaacatgttgga -agtttctagtcggaattcccaaagaacggatctatctaatgcattcctacatttttcctg -tctgccgatggtgccatcctattcaaagaatttcttaaaagtagattaaatgggactttt -aacaatgagtaaccttacgcctctaagggttcctcgagtgccatacaccagtcaggtccg -agccacatacacggagaacattctaacatagcattctcaactcgatcatttgcaggttac -ttctttcctatcctagtgctaaaaatcatacttgcaatcccatagcacggattaagaacc -taagaaacaattcagtaaaacatgttcgaattcttggtatgggaacatcattgcagctat -ggtctaacgcattaatgtttgggtacatcttccatcatataaacaggaagagtctgacga -cagggagtgcttgcgatcatgtctatcattgtgaaatcaaattgtagctcacatgtcgtc -tatgagagcgtgtatccgataagatttagaaaaatagaagtcgtataagatctcactgaa -cttttgaatgaatgtgaagcatatatgatctgctttaataaaactttatccataggatac -gtttccaaatcaattcaataattattagtcaaaatagataaggatgaacaacctgaaggc -cgatcggacgtagaaagtggtcccatcactttgagttgatattgttgaaccacacgttat -tatggttttcaaacagtctcaggatattgtatatacagataatccgataccagttgtctg -acgcccctcttacgtaccccaccctttgtgacgtttaaagcagttgttcagtattttaaa -ctaggcggcaactaatttggaaagaagcacagtggatatgtctaaattcttgttattcag -gcctgaatttaatacaccgcatagttaacttcgcggtagagttgttcatcatgcctcctc -taagctaccacttctatgatacaccaatagttgttctacggaatctgataattggccaag -tcataaacttccgctgcgttcaacccccttgctcgaatatccaactcgaaaagacagcct -tttggtgtccggaacaaatcagttacttcttttctgatgttaattctctgtggtcagata -cagaccaaaaactccgcggatttaccatcctccaagaacaaatttgcatcaacatagcat -tttggctacatattctaagtctcaatagtttaggttttcaactacattatcccaacatta -ggattggaggaataatagctgggtaagtccccttgcgtctacaatcgactattttttatg -aatatgcttctgccgcacctatggttattaaaaaagtcatgactttgaagaaccctgaaa -agatagatgaatcaggtgtaatggcagcagccaaagagcatataattagcaacactctaa -gaacattatagatatgatgatagcgatcgtcatgatgttatccggtcacaatagtagctt -catcagctaattcgttttgccagtggtgacttgcgctggaagaatcgttatacggtccct -tccctcttgatacggtgggggcttattcaaccgcgtggattgggttgtcatacttgcatt -aaacgatgtaaaccatctagtagtcaactatactaaatcacaaaatagtgatcaatacat -acccgcttcatggttttaaccatttaattgattaaagatattccgctaagaaccattatc -tacctaaactgatcgccgtatcctagtagtttgaaatttgatgtaccgtaatgatcaacg -aagtaaaacgttatattgtatgtagaataataggtcttggagctaaatgatgtgattggt -agtgaagacttacccttacaactttaccggtttctcggaagaatatactagagaatcaat -gcatgggctacataagcactttagtctaatgagataaaaaatacacgagtcttccatcat -gaattttttgtcgaaaaactcgaacctggtaatttaaaccatatatctttatgtcgtcaa -taactctcatatgttttatataacttcccaatcacgacttgtaactgcttgttcgactga -gctgtttgagctatgaggccgggatccggttgagctacatctatttgctacaagaaaaat -gaaagcacatttgttgggagttctggctacactcatagagaaataagtggcccgagtggg -tgcggcctgcctccatattcaagtgtatcttaaaccaagtggttccaacgctcgcgctaa -agaattaaagcctttatttcctccacggagtagcccgtaatccggttcgaaagagaccat -tgaagttaattttcatatccagtgaagtttaggcacaagcatgtgttctgccacatgcct -caaagcgctcttcaaccaagatatgattcatcctaacttcgatgaatgcgtctgtaacat -aaatatagaaggaatgattcggcgagttaattttcgccttctccaacatggcatccctac -gttcgttataaggaccatacatgtaggttttaaaggtttgcggttaatcgatatttacat -catagaaattctatagtcaaatttacaagactctagatactcactcgttgcagccggcta -ggaagcgctttgtaccttacttcccttttcgttgcgtaatatgaatttcatatagtaagt -tcaaggcactcatacctccgtgaagagggtagatagactattaaagttgtttaatagtac -gtattgatggaaatgacccgtaggagatttaccactcaatccacaagattcgctgctgtg -cattatcaaaacagtgcatgtcgaaacatgggttgggtccttcaaacacgaatccaggta -gagatacctttgcaattttt diff --git a/vendor/regex/examples/regexdna-output.txt b/vendor/regex/examples/regexdna-output.txt deleted file mode 100644 index d36baa5..0000000 --- a/vendor/regex/examples/regexdna-output.txt +++ /dev/null @@ -1,13 +0,0 @@ -agggtaaa|tttaccct 0 -[cgt]gggtaaa|tttaccc[acg] 3 -a[act]ggtaaa|tttacc[agt]t 9 -ag[act]gtaaa|tttac[agt]ct 8 -agg[act]taaa|ttta[agt]cct 10 -aggg[acg]aaa|ttt[cgt]ccct 3 -agggt[cgt]aa|tt[acg]accct 4 -agggta[cgt]a|t[acg]taccct 3 -agggtaa[cgt]|[acg]ttaccct 5 - -101745 -100000 -133640 diff --git a/vendor/regex/examples/shootout-regex-dna-bytes.rs b/vendor/regex/examples/shootout-regex-dna-bytes.rs deleted file mode 100644 index 773fd9b..0000000 --- a/vendor/regex/examples/shootout-regex-dna-bytes.rs +++ /dev/null @@ -1,68 +0,0 @@ -// The Computer Language Benchmarks Game -// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi -// contributed by BurntSushi - -use std::io::{self, Read}; -use std::sync::Arc; -use std::thread; - -macro_rules! regex { - ($re:expr) => { - ::regex::bytes::Regex::new($re).unwrap() - }; -} - -fn main() { - let mut seq = Vec::with_capacity(51 * (1 << 20)); - io::stdin().read_to_end(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, &b""[..]).into_owned(); - let clen = seq.len(); - let seq_arc = Arc::new(seq.clone()); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - let mut counts = vec![]; - for variant in variants { - let seq = seq_arc.clone(); - let restr = variant.to_string(); - let future = thread::spawn(move || variant.find_iter(&seq).count()); - counts.push((restr, future)); - } - - let substs = vec![ - (regex!("B"), &b"(c|g|t)"[..]), - (regex!("D"), &b"(a|g|t)"[..]), - (regex!("H"), &b"(a|c|t)"[..]), - (regex!("K"), &b"(g|t)"[..]), - (regex!("M"), &b"(a|c)"[..]), - (regex!("N"), &b"(a|c|g|t)"[..]), - (regex!("R"), &b"(a|g)"[..]), - (regex!("S"), &b"(c|g)"[..]), - (regex!("V"), &b"(a|c|g)"[..]), - (regex!("W"), &b"(a|t)"[..]), - (regex!("Y"), &b"(c|t)"[..]), - ]; - let mut seq = seq; - for (re, replacement) in substs { - seq = re.replace_all(&seq, replacement).into_owned(); - } - - for (variant, count) in counts { - println!("{} {}", variant, count.join().unwrap()); - } - println!("\n{}\n{}\n{}", ilen, clen, seq.len()); -} diff --git a/vendor/regex/examples/shootout-regex-dna-cheat.rs b/vendor/regex/examples/shootout-regex-dna-cheat.rs deleted file mode 100644 index 1bde7ab..0000000 --- a/vendor/regex/examples/shootout-regex-dna-cheat.rs +++ /dev/null @@ -1,90 +0,0 @@ -// The Computer Language Benchmarks Game -// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi -// contributed by BurntSushi - -// This technically solves the problem posed in the `regex-dna` benchmark, but -// it cheats by combining all of the replacements into a single regex and -// replacing them with a single linear scan. i.e., it re-implements -// `replace_all`. As a result, this is around 25% faster. ---AG - -use std::io::{self, Read}; -use std::sync::Arc; -use std::thread; - -macro_rules! regex { - ($re:expr) => { - ::regex::Regex::new($re).unwrap() - }; -} - -fn main() { - let mut seq = String::with_capacity(50 * (1 << 20)); - io::stdin().read_to_string(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); - let clen = seq.len(); - let seq_arc = Arc::new(seq.clone()); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - let mut counts = vec![]; - for variant in variants { - let seq = seq_arc.clone(); - let restr = variant.to_string(); - let future = thread::spawn(move || variant.find_iter(&seq).count()); - counts.push((restr, future)); - } - - let substs = vec![ - (b'B', "(c|g|t)"), - (b'D', "(a|g|t)"), - (b'H', "(a|c|t)"), - (b'K', "(g|t)"), - (b'M', "(a|c)"), - (b'N', "(a|c|g|t)"), - (b'R', "(a|g)"), - (b'S', "(c|g)"), - (b'V', "(a|c|g)"), - (b'W', "(a|t)"), - (b'Y', "(c|t)"), - ]; // combined into one regex in `replace_all` - let seq = replace_all(&seq, substs); - - for (variant, count) in counts { - println!("{} {}", variant, count.join().unwrap()); - } - println!("\n{}\n{}\n{}", ilen, clen, seq.len()); -} - -fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String { - let mut replacements = vec![""; 256]; - let mut alternates = vec![]; - for (re, replacement) in substs { - replacements[re as usize] = replacement; - alternates.push((re as char).to_string()); - } - - let re = regex!(&alternates.join("|")); - let mut new = String::with_capacity(text.len()); - let mut last_match = 0; - for m in re.find_iter(text) { - new.push_str(&text[last_match..m.start()]); - new.push_str(replacements[text.as_bytes()[m.start()] as usize]); - last_match = m.end(); - } - new.push_str(&text[last_match..]); - new -} diff --git a/vendor/regex/examples/shootout-regex-dna-replace.rs b/vendor/regex/examples/shootout-regex-dna-replace.rs deleted file mode 100644 index 20694e0..0000000 --- a/vendor/regex/examples/shootout-regex-dna-replace.rs +++ /dev/null @@ -1,17 +0,0 @@ -use std::io::{self, Read}; - -macro_rules! regex { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re).build().unwrap().into_regex() - }}; -} - -fn main() { - let mut seq = String::with_capacity(50 * (1 << 20)); - io::stdin().read_to_string(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); - println!("original: {}, replaced: {}", ilen, seq.len()); -} diff --git a/vendor/regex/examples/shootout-regex-dna-single-cheat.rs b/vendor/regex/examples/shootout-regex-dna-single-cheat.rs deleted file mode 100644 index 70a979c..0000000 --- a/vendor/regex/examples/shootout-regex-dna-single-cheat.rs +++ /dev/null @@ -1,75 +0,0 @@ -// The Computer Language Benchmarks Game -// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi -// contributed by BurntSushi - -use std::io::{self, Read}; - -macro_rules! regex { - ($re:expr) => { - ::regex::Regex::new($re).unwrap() - }; -} - -fn main() { - let mut seq = String::with_capacity(50 * (1 << 20)); - io::stdin().read_to_string(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); - let clen = seq.len(); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - for re in variants { - println!("{} {}", re.to_string(), re.find_iter(&seq).count()); - } - - let substs = vec![ - (b'B', "(c|g|t)"), - (b'D', "(a|g|t)"), - (b'H', "(a|c|t)"), - (b'K', "(g|t)"), - (b'M', "(a|c)"), - (b'N', "(a|c|g|t)"), - (b'R', "(a|g)"), - (b'S', "(c|g)"), - (b'V', "(a|c|g)"), - (b'W', "(a|t)"), - (b'Y', "(c|t)"), - ]; // combined into one regex in `replace_all` - let seq = replace_all(&seq, substs); - - println!("\n{}\n{}\n{}", ilen, clen, seq.len()); -} - -fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String { - let mut replacements = vec![""; 256]; - let mut alternates = vec![]; - for (re, replacement) in substs { - replacements[re as usize] = replacement; - alternates.push((re as char).to_string()); - } - - let re = regex!(&alternates.join("|")); - let mut new = String::with_capacity(text.len()); - let mut last_match = 0; - for m in re.find_iter(text) { - new.push_str(&text[last_match..m.start()]); - new.push_str(replacements[text.as_bytes()[m.start()] as usize]); - last_match = m.end(); - } - new.push_str(&text[last_match..]); - new -} diff --git a/vendor/regex/examples/shootout-regex-dna-single.rs b/vendor/regex/examples/shootout-regex-dna-single.rs deleted file mode 100644 index b474059..0000000 --- a/vendor/regex/examples/shootout-regex-dna-single.rs +++ /dev/null @@ -1,57 +0,0 @@ -// The Computer Language Benchmarks Game -// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi -// contributed by BurntSushi - -use std::io::{self, Read}; - -macro_rules! regex { - ($re:expr) => { - ::regex::Regex::new($re).unwrap() - }; -} - -fn main() { - let mut seq = String::with_capacity(50 * (1 << 20)); - io::stdin().read_to_string(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); - let clen = seq.len(); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - for re in variants { - println!("{} {}", re.to_string(), re.find_iter(&seq).count()); - } - - let substs = vec![ - (regex!("B"), "(c|g|t)"), - (regex!("D"), "(a|g|t)"), - (regex!("H"), "(a|c|t)"), - (regex!("K"), "(g|t)"), - (regex!("M"), "(a|c)"), - (regex!("N"), "(a|c|g|t)"), - (regex!("R"), "(a|g)"), - (regex!("S"), "(c|g)"), - (regex!("V"), "(a|c|g)"), - (regex!("W"), "(a|t)"), - (regex!("Y"), "(c|t)"), - ]; - let mut seq = seq; - for (re, replacement) in substs { - seq = re.replace_all(&seq, replacement).into_owned(); - } - println!("\n{}\n{}\n{}", ilen, clen, seq.len()); -} diff --git a/vendor/regex/examples/shootout-regex-dna.rs b/vendor/regex/examples/shootout-regex-dna.rs deleted file mode 100644 index b96518e..0000000 --- a/vendor/regex/examples/shootout-regex-dna.rs +++ /dev/null @@ -1,68 +0,0 @@ -// The Computer Language Benchmarks Game -// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi -// contributed by BurntSushi - -use std::io::{self, Read}; -use std::sync::Arc; -use std::thread; - -macro_rules! regex { - ($re:expr) => { - ::regex::Regex::new($re).unwrap() - }; -} - -fn main() { - let mut seq = String::with_capacity(51 * (1 << 20)); - io::stdin().read_to_string(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); - let clen = seq.len(); - let seq_arc = Arc::new(seq.clone()); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - let mut counts = vec![]; - for variant in variants { - let seq = seq_arc.clone(); - let restr = variant.to_string(); - let future = thread::spawn(move || variant.find_iter(&seq).count()); - counts.push((restr, future)); - } - - let substs = vec![ - (regex!("B"), "(c|g|t)"), - (regex!("D"), "(a|g|t)"), - (regex!("H"), "(a|c|t)"), - (regex!("K"), "(g|t)"), - (regex!("M"), "(a|c)"), - (regex!("N"), "(a|c|g|t)"), - (regex!("R"), "(a|g)"), - (regex!("S"), "(c|g)"), - (regex!("V"), "(a|c|g)"), - (regex!("W"), "(a|t)"), - (regex!("Y"), "(c|t)"), - ]; - let mut seq = seq; - for (re, replacement) in substs { - seq = re.replace_all(&seq, replacement).into_owned(); - } - - for (variant, count) in counts { - println!("{} {}", variant, count.join().unwrap()); - } - println!("\n{}\n{}\n{}", ilen, clen, seq.len()); -} diff --git a/vendor/regex/record/compile-test/2023-07-05.csv b/vendor/regex/record/compile-test/2023-07-05.csv new file mode 100644 index 0000000..6ec81f5 --- /dev/null +++ b/vendor/regex/record/compile-test/2023-07-05.csv @@ -0,0 +1,37 @@ +name,crate,revision,profile,duration,size,relative-size +regex__dev__std_perf_unicode,regex,53786ce797,dev,2.414172223s,4143600,3764328 +regex__dev__std_perf_unicode_perf-dfa-full,regex,53786ce797,dev,2.900927164s,4815368,4436096 +regex__dev__std,regex,53786ce797,dev,1.662626059s,2062808,1683536 +regex__dev__std_perf,regex,53786ce797,dev,2.136755026s,3574256,3194984 +regex__dev__std_unicode,regex,53786ce797,dev,1.943953132s,2623960,2244688 +regex__dev__std_unicode-case_unicode-perl,regex,53786ce797,dev,1.753222606s,2374104,1994832 +regex-lite__dev__std_string,regex,53786ce797,dev,498.158769ms,727504,348232 +regex-automata__dev__std_syntax_perf_unicode_meta_nfa_dfa_hybrid,regex-automata,53786ce797,dev,2.900832296s,4872712,4493440 +regex-automata__dev__std_syntax_nfa-pikevm,regex-automata,53786ce797,dev,1.413429089s,1501648,1122376 +regex-automata__dev__std_syntax_nfa-backtrack,regex-automata,53786ce797,dev,1.412429191s,1505744,1126472 +regex-automata__dev__std_syntax_hybrid,regex-automata,53786ce797,dev,1.678331978s,1632720,1253448 +regex-automata__dev__std_syntax_dfa-onepass,regex-automata,53786ce797,dev,1.594526299s,1526224,1146952 +regex-automata__dev__std_syntax_unicode_meta_nfa_dfa_hybrid,regex-automata,53786ce797,dev,2.992024402s,3500504,3121232 +regex-automata__dev__std_syntax_perf_unicode_meta_nfa_hybrid_dfa-onepass,regex-automata,53786ce797,dev,2.378489598s,4119024,3739752 +regex-automata__dev__std_syntax_perf_meta_nfa_dfa_hybrid,regex-automata,53786ce797,dev,2.695475914s,4299272,3920000 +regex-automata__dev__std_syntax_perf_meta_nfa_hybrid_dfa-onepass,regex-automata,53786ce797,dev,2.120929251s,3549680,3170408 +regex-automata__dev__std_unicode_meta,regex-automata,53786ce797,dev,1.89728585s,2492888,2113616 +regex-automata__dev__std_meta,regex-automata,53786ce797,dev,1.604628942s,1927640,1548368 +regex__release__std_perf_unicode,regex,53786ce797,release,3.333636908s,2025816,1650720 +regex__release__std_perf_unicode_perf-dfa-full,regex,53786ce797,release,3.805434309s,2210160,1835064 +regex__release__std,regex,53786ce797,release,1.789749444s,932160,557064 +regex__release__std_perf,regex,53786ce797,release,2.734249431s,1505624,1130528 +regex__release__std_unicode,regex,53786ce797,release,2.04945845s,1431872,1056776 +regex__release__std_unicode-case_unicode-perl,regex,53786ce797,release,1.893829903s,1173824,798728 +regex-lite__release__std_string,regex,53786ce797,release,648.517079ms,473400,98304 +regex-automata__release__std_syntax_perf_unicode_meta_nfa_dfa_hybrid,regex-automata,53786ce797,release,3.893237683s,2242928,1867832 +regex-automata__release__std_syntax_nfa-pikevm,regex-automata,53786ce797,release,1.556952008s,780600,405504 +regex-automata__release__std_syntax_nfa-backtrack,regex-automata,53786ce797,release,1.576471926s,768312,393216 +regex-automata__release__std_syntax_hybrid,regex-automata,53786ce797,release,1.819539266s,813368,438272 +regex-automata__release__std_syntax_dfa-onepass,regex-automata,53786ce797,release,1.672511482s,776504,401408 +regex-automata__release__std_syntax_unicode_meta_nfa_dfa_hybrid,regex-automata,53786ce797,release,3.227157436s,1767744,1392648 +regex-automata__release__std_syntax_perf_unicode_meta_nfa_hybrid_dfa-onepass,regex-automata,53786ce797,release,3.340235296s,2005336,1630240 +regex-automata__release__std_syntax_perf_meta_nfa_dfa_hybrid,regex-automata,53786ce797,release,3.640335773s,1718640,1343544 +regex-automata__release__std_syntax_perf_meta_nfa_hybrid_dfa-onepass,regex-automata,53786ce797,release,2.876306297s,1489240,1114144 +regex-automata__release__std_unicode_meta,regex-automata,53786ce797,release,1.945654415s,1362240,987144 +regex-automata__release__std_meta,regex-automata,53786ce797,release,1.740500411s,862528,487432 diff --git a/vendor/regex/record/old-bench-log/01-lazy-dfa/dynamic b/vendor/regex/record/old-bench-log/01-lazy-dfa/dynamic new file mode 100644 index 0000000..9ef2173 --- /dev/null +++ b/vendor/regex/record/old-bench-log/01-lazy-dfa/dynamic @@ -0,0 +1,73 @@ + Running target/release/dynamic-e87a67d7ea67f0eb + +running 67 tests +test bench::anchored_literal_long_match ... bench: 75 ns/iter (+/- 3) = 5200 MB/s +test bench::anchored_literal_long_non_match ... bench: 61 ns/iter (+/- 2) = 6393 MB/s +test bench::anchored_literal_short_match ... bench: 75 ns/iter (+/- 3) = 346 MB/s +test bench::anchored_literal_short_non_match ... bench: 61 ns/iter (+/- 1) = 426 MB/s +test bench::easy0_1K ... bench: 196 ns/iter (+/- 8) = 5224 MB/s +test bench::easy0_1MB ... bench: 255,138 ns/iter (+/- 4,820) = 4109 MB/s +test bench::easy0_32 ... bench: 71 ns/iter (+/- 2) = 450 MB/s +test bench::easy0_32K ... bench: 5,392 ns/iter (+/- 108) = 6077 MB/s +test bench::easy1_1K ... bench: 241 ns/iter (+/- 37) = 4248 MB/s +test bench::easy1_1MB ... bench: 334,872 ns/iter (+/- 3,433) = 3131 MB/s +test bench::easy1_32 ... bench: 65 ns/iter (+/- 2) = 492 MB/s +test bench::easy1_32K ... bench: 6,139 ns/iter (+/- 703) = 5337 MB/s +test bench::hard_1K ... bench: 4,654 ns/iter (+/- 63) = 220 MB/s +test bench::hard_1MB ... bench: 4,719,487 ns/iter (+/- 71,818) = 222 MB/s +test bench::hard_32 ... bench: 199 ns/iter (+/- 8) = 160 MB/s +test bench::hard_32K ... bench: 147,389 ns/iter (+/- 4,391) = 222 MB/s +test bench::literal ... bench: 20 ns/iter (+/- 4) = 2550 MB/s +test bench::match_class ... bench: 85 ns/iter (+/- 4) = 952 MB/s +test bench::match_class_in_range ... bench: 32 ns/iter (+/- 3) = 2531 MB/s +test bench::match_class_unicode ... bench: 783 ns/iter (+/- 13) = 205 MB/s +test bench::medium_1K ... bench: 1,334 ns/iter (+/- 154) = 767 MB/s +test bench::medium_1MB ... bench: 2,044,757 ns/iter (+/- 72,936) = 512 MB/s +test bench::medium_32 ... bench: 99 ns/iter (+/- 18) = 323 MB/s +test bench::medium_32K ... bench: 59,603 ns/iter (+/- 13,750) = 549 MB/s +test bench::no_exponential ... bench: 553 ns/iter (+/- 150) = 180 MB/s +test bench::not_literal ... bench: 293 ns/iter (+/- 59) = 174 MB/s +test bench::one_pass_long_prefix ... bench: 177 ns/iter (+/- 35) = 146 MB/s +test bench::one_pass_long_prefix_not ... bench: 175 ns/iter (+/- 47) = 148 MB/s +test bench::one_pass_short ... bench: 134 ns/iter (+/- 34) = 126 MB/s +test bench::one_pass_short_not ... bench: 136 ns/iter (+/- 39) = 125 MB/s +test bench::replace_all ... bench: 153 ns/iter (+/- 17) +test bench_dynamic_compile::compile_huge ... bench: 165,209 ns/iter (+/- 4,396) +test bench_dynamic_compile::compile_huge_bytes ... bench: 18,795,770 ns/iter (+/- 2,674,909) +test bench_dynamic_compile::compile_simple ... bench: 6,883 ns/iter (+/- 391) +test bench_dynamic_compile::compile_simple_bytes ... bench: 7,281 ns/iter (+/- 751) +test bench_dynamic_compile::compile_small ... bench: 9,091 ns/iter (+/- 1,125) +test bench_dynamic_compile::compile_small_bytes ... bench: 182,815 ns/iter (+/- 3,814) +test bench_dynamic_parse::parse_huge ... bench: 1,233 ns/iter (+/- 123) +test bench_dynamic_parse::parse_simple ... bench: 2,015 ns/iter (+/- 108) +test bench_dynamic_parse::parse_small ... bench: 2,500 ns/iter (+/- 76) +test bench_sherlock::before_holmes ... bench: 2,741,811 ns/iter (+/- 58,389) = 216 MB/s +test bench_sherlock::everything_greedy ... bench: 7,807,696 ns/iter (+/- 328,585) = 76 MB/s +test bench_sherlock::everything_greedy_nl ... bench: 5,424,922 ns/iter (+/- 78,937) = 109 MB/s +test bench_sherlock::holmes_cochar_watson ... bench: 266,557 ns/iter (+/- 3,832) = 2231 MB/s +test bench_sherlock::holmes_coword_watson ... bench: 1,327,967 ns/iter (+/- 12,773) = 448 MB/s +test bench_sherlock::line_boundary_sherlock_holmes ... bench: 2,690,485 ns/iter (+/- 17,393) = 221 MB/s +test bench_sherlock::name_alt1 ... bench: 77,206 ns/iter (+/- 951) = 7705 MB/s +test bench_sherlock::name_alt2 ... bench: 303,775 ns/iter (+/- 5,030) = 1958 MB/s +test bench_sherlock::name_alt3 ... bench: 1,385,153 ns/iter (+/- 15,871) = 429 MB/s +test bench_sherlock::name_alt3_nocase ... bench: 1,473,833 ns/iter (+/- 9,825) = 403 MB/s +test bench_sherlock::name_alt4 ... bench: 300,912 ns/iter (+/- 3,896) = 1977 MB/s +test bench_sherlock::name_alt4_nocase ... bench: 1,421,519 ns/iter (+/- 16,246) = 418 MB/s +test bench_sherlock::name_holmes ... bench: 52,027 ns/iter (+/- 785) = 11435 MB/s +test bench_sherlock::name_holmes_nocase ... bench: 1,241,204 ns/iter (+/- 16,862) = 479 MB/s +test bench_sherlock::name_sherlock ... bench: 34,378 ns/iter (+/- 677) = 17305 MB/s +test bench_sherlock::name_sherlock_holmes ... bench: 34,463 ns/iter (+/- 580) = 17262 MB/s +test bench_sherlock::name_sherlock_holmes_nocase ... bench: 1,281,540 ns/iter (+/- 11,054) = 464 MB/s +test bench_sherlock::name_sherlock_nocase ... bench: 1,281,293 ns/iter (+/- 13,129) = 464 MB/s +test bench_sherlock::name_whitespace ... bench: 60,463 ns/iter (+/- 815) = 9839 MB/s +test bench_sherlock::no_match_common ... bench: 568,357 ns/iter (+/- 11,237) = 1046 MB/s +test bench_sherlock::no_match_uncommon ... bench: 23,656 ns/iter (+/- 340) = 25149 MB/s +test bench_sherlock::quotes ... bench: 977,907 ns/iter (+/- 13,926) = 608 MB/s +test bench_sherlock::the_lower ... bench: 794,285 ns/iter (+/- 8,513) = 749 MB/s +test bench_sherlock::the_nocase ... bench: 1,837,240 ns/iter (+/- 22,738) = 323 MB/s +test bench_sherlock::the_upper ... bench: 54,083 ns/iter (+/- 1,153) = 11000 MB/s +test bench_sherlock::the_whitespace ... bench: 1,986,579 ns/iter (+/- 9,292) = 299 MB/s +test bench_sherlock::word_ending_n ... bench: 55,205,101 ns/iter (+/- 93,542) = 10 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 67 measured + diff --git a/vendor/regex/record/old-bench-log/01-lazy-dfa/dynamic-no-lazy-dfa b/vendor/regex/record/old-bench-log/01-lazy-dfa/dynamic-no-lazy-dfa new file mode 100644 index 0000000..50d3a13 --- /dev/null +++ b/vendor/regex/record/old-bench-log/01-lazy-dfa/dynamic-no-lazy-dfa @@ -0,0 +1,85 @@ + Compiling regex v0.1.48 (file:///home/andrew/data/projects/rust/regex) +src/dfa.rs:73:1: 94:2 warning: function is never used: `can_exec`, #[warn(dead_code)] on by default +src/dfa.rs:73 pub fn can_exec(insts: &Insts) -> bool { +src/dfa.rs:74 use inst::EmptyLook::*; +src/dfa.rs:75 // If for some reason we manage to allocate a regex program with more +src/dfa.rs:76 // than 2^32-1 instructions, then we can't execute the DFA because we +src/dfa.rs:77 // use 32 bit pointers. +src/dfa.rs:78 if insts.len() > ::std::u32::MAX as usize { + ... +src/exec.rs:12:11: 12:15 warning: unused import, #[warn(unused_imports)] on by default +src/exec.rs:12 use dfa::{self, Dfa, DfaResult}; + ^~~~ + Running target/release/dynamic-e87a67d7ea67f0eb + +running 67 tests +test bench::anchored_literal_long_match ... bench: 169 ns/iter (+/- 1) = 2307 MB/s +test bench::anchored_literal_long_non_match ... bench: 85 ns/iter (+/- 0) = 4588 MB/s +test bench::anchored_literal_short_match ... bench: 158 ns/iter (+/- 3) = 164 MB/s +test bench::anchored_literal_short_non_match ... bench: 84 ns/iter (+/- 2) = 309 MB/s +test bench::easy0_1K ... bench: 318 ns/iter (+/- 2) = 3220 MB/s +test bench::easy0_1MB ... bench: 257,205 ns/iter (+/- 2,448) = 4076 MB/s +test bench::easy0_32 ... bench: 82 ns/iter (+/- 1) = 390 MB/s +test bench::easy0_32K ... bench: 8,666 ns/iter (+/- 104) = 3781 MB/s +test bench::easy1_1K ... bench: 293 ns/iter (+/- 2) = 3494 MB/s +test bench::easy1_1MB ... bench: 329,774 ns/iter (+/- 6,296) = 3179 MB/s +test bench::easy1_32 ... bench: 77 ns/iter (+/- 0) = 415 MB/s +test bench::easy1_32K ... bench: 8,856 ns/iter (+/- 93) = 3700 MB/s +test bench::hard_1K ... bench: 31,888 ns/iter (+/- 83) = 32 MB/s +test bench::hard_1MB ... bench: 58,435,108 ns/iter (+/- 64,537) = 17 MB/s +test bench::hard_32 ... bench: 1,048 ns/iter (+/- 12) = 30 MB/s +test bench::hard_32K ... bench: 1,033,930 ns/iter (+/- 4,224) = 31 MB/s +test bench::literal ... bench: 20 ns/iter (+/- 0) = 2550 MB/s +test bench::match_class ... bench: 84 ns/iter (+/- 0) = 964 MB/s +test bench::match_class_in_range ... bench: 33 ns/iter (+/- 0) = 2454 MB/s +test bench::match_class_unicode ... bench: 2,218 ns/iter (+/- 8) = 72 MB/s +test bench::medium_1K ... bench: 1,368 ns/iter (+/- 9) = 748 MB/s +test bench::medium_1MB ... bench: 2,034,481 ns/iter (+/- 3,608) = 515 MB/s +test bench::medium_32 ... bench: 141 ns/iter (+/- 0) = 226 MB/s +test bench::medium_32K ... bench: 59,949 ns/iter (+/- 421) = 546 MB/s +test bench::no_exponential ... bench: 336,653 ns/iter (+/- 1,757) +test bench::not_literal ... bench: 1,247 ns/iter (+/- 5) = 40 MB/s +test bench::one_pass_long_prefix ... bench: 264 ns/iter (+/- 2) = 98 MB/s +test bench::one_pass_long_prefix_not ... bench: 267 ns/iter (+/- 1) = 97 MB/s +test bench::one_pass_short ... bench: 768 ns/iter (+/- 5) = 22 MB/s +test bench::one_pass_short_not ... bench: 797 ns/iter (+/- 20) = 21 MB/s +test bench::replace_all ... bench: 149 ns/iter (+/- 0) +test bench_dynamic_compile::compile_huge ... bench: 161,349 ns/iter (+/- 1,462) +test bench_dynamic_compile::compile_huge_bytes ... bench: 18,050,519 ns/iter (+/- 105,846) +test bench_dynamic_compile::compile_simple ... bench: 6,664 ns/iter (+/- 390) +test bench_dynamic_compile::compile_simple_bytes ... bench: 7,035 ns/iter (+/- 370) +test bench_dynamic_compile::compile_small ... bench: 8,914 ns/iter (+/- 347) +test bench_dynamic_compile::compile_small_bytes ... bench: 186,970 ns/iter (+/- 2,134) +test bench_dynamic_parse::parse_huge ... bench: 1,238 ns/iter (+/- 11) +test bench_dynamic_parse::parse_simple ... bench: 2,005 ns/iter (+/- 19) +test bench_dynamic_parse::parse_small ... bench: 2,494 ns/iter (+/- 11) +test bench_sherlock::before_holmes ... bench: 42,005,594 ns/iter (+/- 57,752) = 14 MB/s +test bench_sherlock::everything_greedy ... bench: 38,431,063 ns/iter (+/- 28,840) = 15 MB/s +test bench_sherlock::everything_greedy_nl ... bench: 32,003,966 ns/iter (+/- 50,270) = 18 MB/s +test bench_sherlock::holmes_cochar_watson ... bench: 1,457,068 ns/iter (+/- 3,202) = 408 MB/s +test bench_sherlock::holmes_coword_watson ... bench: 136,035,549 ns/iter (+/- 75,381) = 4 MB/s +test bench_sherlock::line_boundary_sherlock_holmes ... bench: 33,024,291 ns/iter (+/- 67,902) = 18 MB/s +test bench_sherlock::name_alt1 ... bench: 157,989 ns/iter (+/- 917) = 3765 MB/s +test bench_sherlock::name_alt2 ... bench: 545,254 ns/iter (+/- 1,908) = 1091 MB/s +test bench_sherlock::name_alt3 ... bench: 2,245,964 ns/iter (+/- 2,478) = 264 MB/s +test bench_sherlock::name_alt3_nocase ... bench: 4,792,290 ns/iter (+/- 31,760) = 124 MB/s +test bench_sherlock::name_alt4 ... bench: 584,204 ns/iter (+/- 2,084) = 1018 MB/s +test bench_sherlock::name_alt4_nocase ... bench: 2,318,020 ns/iter (+/- 8,493) = 256 MB/s +test bench_sherlock::name_holmes ... bench: 51,880 ns/iter (+/- 299) = 11467 MB/s +test bench_sherlock::name_holmes_nocase ... bench: 1,414,500 ns/iter (+/- 2,497) = 420 MB/s +test bench_sherlock::name_sherlock ... bench: 34,294 ns/iter (+/- 349) = 17348 MB/s +test bench_sherlock::name_sherlock_holmes ... bench: 34,531 ns/iter (+/- 199) = 17228 MB/s +test bench_sherlock::name_sherlock_holmes_nocase ... bench: 1,692,651 ns/iter (+/- 8,846) = 351 MB/s +test bench_sherlock::name_sherlock_nocase ... bench: 1,657,413 ns/iter (+/- 5,534) = 358 MB/s +test bench_sherlock::name_whitespace ... bench: 131,372 ns/iter (+/- 605) = 4528 MB/s +test bench_sherlock::no_match_common ... bench: 567,065 ns/iter (+/- 2,763) = 1049 MB/s +test bench_sherlock::no_match_uncommon ... bench: 23,782 ns/iter (+/- 85) = 25016 MB/s +test bench_sherlock::quotes ... bench: 11,251,366 ns/iter (+/- 24,960) = 52 MB/s +test bench_sherlock::the_lower ... bench: 789,781 ns/iter (+/- 2,072) = 753 MB/s +test bench_sherlock::the_nocase ... bench: 1,807,509 ns/iter (+/- 4,685) = 329 MB/s +test bench_sherlock::the_upper ... bench: 53,542 ns/iter (+/- 198) = 11111 MB/s +test bench_sherlock::the_whitespace ... bench: 5,410,444 ns/iter (+/- 14,766) = 109 MB/s +test bench_sherlock::word_ending_n ... bench: 56,017,874 ns/iter (+/- 60,047) = 10 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 67 measured + diff --git a/vendor/regex/record/old-bench-log/01-lazy-dfa/native b/vendor/regex/record/old-bench-log/01-lazy-dfa/native new file mode 100644 index 0000000..61fc08d --- /dev/null +++ b/vendor/regex/record/old-bench-log/01-lazy-dfa/native @@ -0,0 +1,65 @@ + Compiling regex_macros v0.1.28 (file:///home/andrew/data/projects/rust/regex/regex_macros) + Running regex_macros/target/release/native-f2ffefeeda527264 + +running 58 tests +test bench::anchored_literal_long_match ... bench: 189 ns/iter (+/- 16) = 2063 MB/s +test bench::anchored_literal_long_non_match ... bench: 47 ns/iter (+/- 1) = 8297 MB/s +test bench::anchored_literal_short_match ... bench: 177 ns/iter (+/- 5) = 146 MB/s +test bench::anchored_literal_short_non_match ... bench: 46 ns/iter (+/- 1) = 565 MB/s +test bench::easy0_1K ... bench: 26,578 ns/iter (+/- 1,140) = 38 MB/s +test bench::easy0_1MB ... bench: 27,229,730 ns/iter (+/- 261,126) = 38 MB/s +test bench::easy0_32 ... bench: 867 ns/iter (+/- 45) = 36 MB/s +test bench::easy0_32K ... bench: 847,113 ns/iter (+/- 276,910) = 38 MB/s +test bench::easy1_1K ... bench: 23,525 ns/iter (+/- 278) = 43 MB/s +test bench::easy1_1MB ... bench: 24,075,047 ns/iter (+/- 40,396) = 43 MB/s +test bench::easy1_32 ... bench: 767 ns/iter (+/- 14) = 41 MB/s +test bench::easy1_32K ... bench: 752,730 ns/iter (+/- 9,284) = 43 MB/s +test bench::hard_1K ... bench: 44,053 ns/iter (+/- 513) = 23 MB/s +test bench::hard_1MB ... bench: 44,982,170 ns/iter (+/- 76,683) = 23 MB/s +test bench::hard_32 ... bench: 1,418 ns/iter (+/- 26) = 22 MB/s +test bench::hard_32K ... bench: 1,407,013 ns/iter (+/- 13,426) = 23 MB/s +test bench::literal ... bench: 1,202 ns/iter (+/- 16) = 42 MB/s +test bench::match_class ... bench: 2,057 ns/iter (+/- 29) = 39 MB/s +test bench::match_class_in_range ... bench: 2,060 ns/iter (+/- 34) = 39 MB/s +test bench::match_class_unicode ... bench: 12,945 ns/iter (+/- 156) = 12 MB/s +test bench::medium_1K ... bench: 27,874 ns/iter (+/- 315) = 36 MB/s +test bench::medium_1MB ... bench: 28,614,500 ns/iter (+/- 544,256) = 36 MB/s +test bench::medium_32 ... bench: 896 ns/iter (+/- 85) = 35 MB/s +test bench::medium_32K ... bench: 892,349 ns/iter (+/- 35,511) = 36 MB/s +test bench::no_exponential ... bench: 319,270 ns/iter (+/- 19,837) +test bench::not_literal ... bench: 1,477 ns/iter (+/- 104) = 34 MB/s +test bench::one_pass_long_prefix ... bench: 653 ns/iter (+/- 10) = 39 MB/s +test bench::one_pass_long_prefix_not ... bench: 651 ns/iter (+/- 6) = 39 MB/s +test bench::one_pass_short ... bench: 1,016 ns/iter (+/- 24) = 16 MB/s +test bench::one_pass_short_not ... bench: 1,588 ns/iter (+/- 28) = 10 MB/s +test bench::replace_all ... bench: 1,078 ns/iter (+/- 55) +test bench_sherlock::before_holmes ... bench: 54,264,124 ns/iter (+/- 564,692) = 10 MB/s +test bench_sherlock::everything_greedy ... bench: 22,724,158 ns/iter (+/- 44,361) = 26 MB/s +test bench_sherlock::everything_greedy_nl ... bench: 22,168,804 ns/iter (+/- 66,296) = 26 MB/s +test bench_sherlock::holmes_cochar_watson ... bench: 24,791,824 ns/iter (+/- 37,522) = 23 MB/s +test bench_sherlock::holmes_coword_watson ... bench: 885,999,793 ns/iter (+/- 39,704,278) +test bench_sherlock::line_boundary_sherlock_holmes ... bench: 25,113,805 ns/iter (+/- 672,050) = 23 MB/s +test bench_sherlock::name_alt1 ... bench: 23,382,716 ns/iter (+/- 3,696,517) = 25 MB/s +test bench_sherlock::name_alt2 ... bench: 23,585,220 ns/iter (+/- 3,724,922) = 25 MB/s +test bench_sherlock::name_alt3 ... bench: 80,283,635 ns/iter (+/- 3,165,029) = 7 MB/s +test bench_sherlock::name_alt3_nocase ... bench: 77,357,394 ns/iter (+/- 268,133) = 7 MB/s +test bench_sherlock::name_alt4 ... bench: 22,736,520 ns/iter (+/- 43,231) = 26 MB/s +test bench_sherlock::name_alt4_nocase ... bench: 26,921,524 ns/iter (+/- 140,162) = 22 MB/s +test bench_sherlock::name_holmes ... bench: 15,145,735 ns/iter (+/- 65,980) = 39 MB/s +test bench_sherlock::name_holmes_nocase ... bench: 16,285,042 ns/iter (+/- 71,956) = 36 MB/s +test bench_sherlock::name_sherlock ... bench: 16,189,653 ns/iter (+/- 99,929) = 36 MB/s +test bench_sherlock::name_sherlock_holmes ... bench: 14,975,742 ns/iter (+/- 118,052) = 39 MB/s +test bench_sherlock::name_sherlock_holmes_nocase ... bench: 16,904,928 ns/iter (+/- 201,104) = 35 MB/s +test bench_sherlock::name_sherlock_nocase ... bench: 16,335,907 ns/iter (+/- 118,725) = 36 MB/s +test bench_sherlock::name_whitespace ... bench: 14,837,905 ns/iter (+/- 52,201) = 40 MB/s +test bench_sherlock::no_match_common ... bench: 16,036,625 ns/iter (+/- 108,268) = 37 MB/s +test bench_sherlock::no_match_uncommon ... bench: 15,278,356 ns/iter (+/- 81,123) = 38 MB/s +test bench_sherlock::quotes ... bench: 21,580,801 ns/iter (+/- 198,772) = 27 MB/s +test bench_sherlock::the_lower ... bench: 16,059,120 ns/iter (+/- 160,640) = 37 MB/s +test bench_sherlock::the_nocase ... bench: 17,376,836 ns/iter (+/- 103,371) = 34 MB/s +test bench_sherlock::the_upper ... bench: 15,259,087 ns/iter (+/- 93,807) = 38 MB/s +test bench_sherlock::the_whitespace ... bench: 18,835,951 ns/iter (+/- 160,674) = 31 MB/s +test bench_sherlock::word_ending_n ... bench: 59,832,390 ns/iter (+/- 4,478,911) = 9 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 58 measured + diff --git a/vendor/regex/record/old-bench-log/01-lazy-dfa/nfa b/vendor/regex/record/old-bench-log/01-lazy-dfa/nfa new file mode 100644 index 0000000..994137b --- /dev/null +++ b/vendor/regex/record/old-bench-log/01-lazy-dfa/nfa @@ -0,0 +1,74 @@ + Compiling regex v0.1.48 (file:///home/andrew/data/projects/rust/regex) + Running target/release/dynamic_nfa-1e40ce11bcb7c666 + +running 67 tests +test bench::anchored_literal_long_match ... bench: 306 ns/iter (+/- 6) = 1274 MB/s +test bench::anchored_literal_long_non_match ... bench: 95 ns/iter (+/- 1) = 4105 MB/s +test bench::anchored_literal_short_match ... bench: 315 ns/iter (+/- 2) = 82 MB/s +test bench::anchored_literal_short_non_match ... bench: 96 ns/iter (+/- 2) = 270 MB/s +test bench::easy0_1K ... bench: 206 ns/iter (+/- 1) = 4970 MB/s +test bench::easy0_1MB ... bench: 255,834 ns/iter (+/- 1,273) = 4098 MB/s +test bench::easy0_32 ... bench: 72 ns/iter (+/- 2) = 444 MB/s +test bench::easy0_32K ... bench: 5,315 ns/iter (+/- 25) = 6165 MB/s +test bench::easy1_1K ... bench: 274 ns/iter (+/- 0) = 3737 MB/s +test bench::easy1_1MB ... bench: 337,047 ns/iter (+/- 1,972) = 3111 MB/s +test bench::easy1_32 ... bench: 76 ns/iter (+/- 2) = 421 MB/s +test bench::easy1_32K ... bench: 6,111 ns/iter (+/- 39) = 5362 MB/s +test bench::hard_1K ... bench: 59,596 ns/iter (+/- 264) = 17 MB/s +test bench::hard_1MB ... bench: 58,947,188 ns/iter (+/- 205,874) = 17 MB/s +test bench::hard_32 ... bench: 1,978 ns/iter (+/- 22) = 16 MB/s +test bench::hard_32K ... bench: 1,846,347 ns/iter (+/- 14,253) = 17 MB/s +test bench::literal ... bench: 172 ns/iter (+/- 1) = 296 MB/s +test bench::match_class ... bench: 240 ns/iter (+/- 1) = 337 MB/s +test bench::match_class_in_range ... bench: 190 ns/iter (+/- 2) = 426 MB/s +test bench::match_class_unicode ... bench: 4,145 ns/iter (+/- 24) = 38 MB/s +test bench::medium_1K ... bench: 1,195 ns/iter (+/- 8) = 856 MB/s +test bench::medium_1MB ... bench: 2,028,649 ns/iter (+/- 11,235) = 516 MB/s +test bench::medium_32 ... bench: 84 ns/iter (+/- 0) = 380 MB/s +test bench::medium_32K ... bench: 56,134 ns/iter (+/- 369) = 583 MB/s +test bench::no_exponential ... bench: 536 ns/iter (+/- 4) = 186 MB/s +test bench::not_literal ... bench: 2,428 ns/iter (+/- 31) = 21 MB/s +test bench::one_pass_long_prefix ... bench: 756 ns/iter (+/- 2) = 34 MB/s +test bench::one_pass_long_prefix_not ... bench: 756 ns/iter (+/- 12) = 34 MB/s +test bench::one_pass_short ... bench: 1,813 ns/iter (+/- 5) = 9 MB/s +test bench::one_pass_short_not ... bench: 2,588 ns/iter (+/- 8) = 6 MB/s +test bench::replace_all ... bench: 905 ns/iter (+/- 7) +test bench_dynamic_compile::compile_huge ... bench: 161,517 ns/iter (+/- 1,287) +test bench_dynamic_compile::compile_huge_bytes ... bench: 18,395,715 ns/iter (+/- 98,986) +test bench_dynamic_compile::compile_simple ... bench: 6,623 ns/iter (+/- 296) +test bench_dynamic_compile::compile_simple_bytes ... bench: 7,047 ns/iter (+/- 232) +test bench_dynamic_compile::compile_small ... bench: 8,948 ns/iter (+/- 526) +test bench_dynamic_compile::compile_small_bytes ... bench: 186,796 ns/iter (+/- 817) +test bench_dynamic_parse::parse_huge ... bench: 1,238 ns/iter (+/- 6) +test bench_dynamic_parse::parse_simple ... bench: 1,977 ns/iter (+/- 12) +test bench_dynamic_parse::parse_small ... bench: 2,502 ns/iter (+/- 18) +test bench_sherlock::before_holmes ... bench: 45,045,123 ns/iter (+/- 261,188) = 13 MB/s +test bench_sherlock::everything_greedy ... bench: 38,685,654 ns/iter (+/- 107,136) = 15 MB/s +test bench_sherlock::everything_greedy_nl ... bench: 36,407,787 ns/iter (+/- 160,253) = 16 MB/s +test bench_sherlock::holmes_cochar_watson ... bench: 1,417,371 ns/iter (+/- 6,533) = 419 MB/s +test bench_sherlock::holmes_coword_watson ... bench: 139,298,695 ns/iter (+/- 154,012) = 4 MB/s +test bench_sherlock::line_boundary_sherlock_holmes ... bench: 32,734,005 ns/iter (+/- 98,729) = 18 MB/s +test bench_sherlock::name_alt1 ... bench: 153,016 ns/iter (+/- 739) = 3888 MB/s +test bench_sherlock::name_alt2 ... bench: 534,038 ns/iter (+/- 1,909) = 1114 MB/s +test bench_sherlock::name_alt3 ... bench: 2,220,778 ns/iter (+/- 6,374) = 267 MB/s +test bench_sherlock::name_alt3_nocase ... bench: 4,744,134 ns/iter (+/- 11,703) = 125 MB/s +test bench_sherlock::name_alt4 ... bench: 569,971 ns/iter (+/- 2,256) = 1043 MB/s +test bench_sherlock::name_alt4_nocase ... bench: 2,324,966 ns/iter (+/- 3,082) = 255 MB/s +test bench_sherlock::name_holmes ... bench: 268,146 ns/iter (+/- 1,238) = 2218 MB/s +test bench_sherlock::name_holmes_nocase ... bench: 1,409,583 ns/iter (+/- 2,808) = 422 MB/s +test bench_sherlock::name_sherlock ... bench: 95,280 ns/iter (+/- 316) = 6244 MB/s +test bench_sherlock::name_sherlock_holmes ... bench: 116,097 ns/iter (+/- 461) = 5124 MB/s +test bench_sherlock::name_sherlock_holmes_nocase ... bench: 1,691,210 ns/iter (+/- 3,712) = 351 MB/s +test bench_sherlock::name_sherlock_nocase ... bench: 1,651,722 ns/iter (+/- 7,070) = 360 MB/s +test bench_sherlock::name_whitespace ... bench: 130,960 ns/iter (+/- 923) = 4542 MB/s +test bench_sherlock::no_match_common ... bench: 568,008 ns/iter (+/- 1,723) = 1047 MB/s +test bench_sherlock::no_match_uncommon ... bench: 23,669 ns/iter (+/- 84) = 25135 MB/s +test bench_sherlock::quotes ... bench: 11,055,260 ns/iter (+/- 24,883) = 53 MB/s +test bench_sherlock::the_lower ... bench: 2,934,498 ns/iter (+/- 4,553) = 202 MB/s +test bench_sherlock::the_nocase ... bench: 4,268,193 ns/iter (+/- 8,164) = 139 MB/s +test bench_sherlock::the_upper ... bench: 272,832 ns/iter (+/- 1,436) = 2180 MB/s +test bench_sherlock::the_whitespace ... bench: 5,409,934 ns/iter (+/- 7,678) = 109 MB/s +test bench_sherlock::word_ending_n ... bench: 55,252,656 ns/iter (+/- 68,442) = 10 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 67 measured + diff --git a/vendor/regex/record/old-bench-log/01-lazy-dfa/pcre b/vendor/regex/record/old-bench-log/01-lazy-dfa/pcre new file mode 100644 index 0000000..22a66e6 --- /dev/null +++ b/vendor/regex/record/old-bench-log/01-lazy-dfa/pcre @@ -0,0 +1,60 @@ + Compiling regex v0.1.48 (file:///home/andrew/data/projects/rust/regex) + Running target/release/pcre-781840b9a3e9c199 + +running 53 tests +test anchored_literal_long_match ... bench: 90 ns/iter (+/- 7) = 4333 MB/s +test anchored_literal_long_non_match ... bench: 60 ns/iter (+/- 2) = 6500 MB/s +test anchored_literal_short_match ... bench: 87 ns/iter (+/- 6) = 298 MB/s +test anchored_literal_short_non_match ... bench: 58 ns/iter (+/- 4) = 448 MB/s +test easy0_1K ... bench: 258 ns/iter (+/- 14) = 3968 MB/s +test easy0_1MB ... bench: 226,139 ns/iter (+/- 1,637) = 4636 MB/s +test easy0_32 ... bench: 60 ns/iter (+/- 7) = 533 MB/s +test easy0_32K ... bench: 7,028 ns/iter (+/- 120) = 4662 MB/s +test easy1_1K ... bench: 794 ns/iter (+/- 20) = 1289 MB/s +test easy1_1MB ... bench: 751,438 ns/iter (+/- 11,372) = 1395 MB/s +test easy1_32 ... bench: 71 ns/iter (+/- 3) = 450 MB/s +test easy1_32K ... bench: 23,042 ns/iter (+/- 1,453) = 1422 MB/s +test hard_1K ... bench: 30,841 ns/iter (+/- 1,287) = 33 MB/s +test hard_1MB ... bench: 35,239,100 ns/iter (+/- 632,179) = 29 MB/s +test hard_32 ... bench: 86 ns/iter (+/- 11) = 372 MB/s +test hard_32K ... bench: 993,011 ns/iter (+/- 63,648) = 32 MB/s +test literal ... bench: 130 ns/iter (+/- 11) = 392 MB/s +test match_class ... bench: 183 ns/iter (+/- 33) = 442 MB/s +test match_class_in_range ... bench: 175 ns/iter (+/- 18) = 462 MB/s +test match_class_unicode ... bench: 513 ns/iter (+/- 8) = 313 MB/s +test medium_1K ... bench: 278 ns/iter (+/- 6) = 3683 MB/s +test medium_1MB ... bench: 240,699 ns/iter (+/- 17,344) = 4356 MB/s +test medium_32 ... bench: 61 ns/iter (+/- 13) = 524 MB/s +test medium_32K ... bench: 7,369 ns/iter (+/- 105) = 4446 MB/s +test not_literal ... bench: 274 ns/iter (+/- 17) = 186 MB/s +test one_pass_long_prefix ... bench: 87 ns/iter (+/- 19) = 298 MB/s +test one_pass_long_prefix_not ... bench: 86 ns/iter (+/- 13) = 302 MB/s +test one_pass_short ... bench: 117 ns/iter (+/- 44) = 145 MB/s +test one_pass_short_not ... bench: 122 ns/iter (+/- 6) = 139 MB/s +test sherlock::before_holmes ... bench: 14,450,308 ns/iter (+/- 617,786) = 41 MB/s +test sherlock::holmes_cochar_watson ... bench: 546,919 ns/iter (+/- 4,880) = 1087 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 194,524 ns/iter (+/- 6,230) = 3058 MB/s +test sherlock::name_alt1 ... bench: 457,899 ns/iter (+/- 7,781) = 1299 MB/s +test sherlock::name_alt2 ... bench: 496,659 ns/iter (+/- 6,529) = 1197 MB/s +test sherlock::name_alt3 ... bench: 983,620 ns/iter (+/- 45,359) = 604 MB/s +test sherlock::name_alt3_nocase ... bench: 3,500,367 ns/iter (+/- 79,807) = 169 MB/s +test sherlock::name_alt4 ... bench: 972,128 ns/iter (+/- 22,195) = 611 MB/s +test sherlock::name_alt4_nocase ... bench: 1,877,017 ns/iter (+/- 39,079) = 316 MB/s +test sherlock::name_holmes ... bench: 398,258 ns/iter (+/- 4,338) = 1493 MB/s +test sherlock::name_holmes_nocase ... bench: 492,292 ns/iter (+/- 4,667) = 1208 MB/s +test sherlock::name_sherlock ... bench: 268,891 ns/iter (+/- 18,063) = 2212 MB/s +test sherlock::name_sherlock_holmes ... bench: 197,067 ns/iter (+/- 8,027) = 3018 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,112,501 ns/iter (+/- 44,457) = 534 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,332,423 ns/iter (+/- 39,227) = 446 MB/s +test sherlock::name_whitespace ... bench: 267,257 ns/iter (+/- 964) = 2226 MB/s +test sherlock::no_match_common ... bench: 595,211 ns/iter (+/- 3,739) = 999 MB/s +test sherlock::no_match_uncommon ... bench: 584,057 ns/iter (+/- 6,825) = 1018 MB/s +test sherlock::quotes ... bench: 1,208,235 ns/iter (+/- 37,629) = 492 MB/s +test sherlock::the_lower ... bench: 1,210,851 ns/iter (+/- 35,900) = 491 MB/s +test sherlock::the_nocase ... bench: 1,286,611 ns/iter (+/- 35,689) = 462 MB/s +test sherlock::the_upper ... bench: 776,113 ns/iter (+/- 6,236) = 766 MB/s +test sherlock::the_whitespace ... bench: 1,368,468 ns/iter (+/- 135,282) = 434 MB/s +test sherlock::word_ending_n ... bench: 12,018,618 ns/iter (+/- 266,497) = 49 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 53 measured + diff --git a/vendor/regex/record/old-bench-log/02-set/dynamic b/vendor/regex/record/old-bench-log/02-set/dynamic new file mode 100644 index 0000000..69c9f71 --- /dev/null +++ b/vendor/regex/record/old-bench-log/02-set/dynamic @@ -0,0 +1,78 @@ + Compiling regex v0.1.52 (file:///home/andrew/data/projects/rust/regex) + Running target/release/dynamic-a76738dddf3bdc6b + +running 71 tests +test misc::anchored_literal_long_match ... bench: 74 ns/iter (+/- 8) = 5270 MB/s +test misc::anchored_literal_long_non_match ... bench: 58 ns/iter (+/- 0) = 6724 MB/s +test misc::anchored_literal_short_match ... bench: 73 ns/iter (+/- 0) = 356 MB/s +test misc::anchored_literal_short_non_match ... bench: 58 ns/iter (+/- 0) = 448 MB/s +test misc::easy0_1K ... bench: 214 ns/iter (+/- 2) = 4785 MB/s +test misc::easy0_1MB ... bench: 247,056 ns/iter (+/- 1,777) = 4244 MB/s +test misc::easy0_32 ... bench: 64 ns/iter (+/- 0) = 500 MB/s +test misc::easy0_32K ... bench: 5,281 ns/iter (+/- 29) = 6204 MB/s +test misc::easy1_1K ... bench: 278 ns/iter (+/- 5) = 3683 MB/s +test misc::easy1_1MB ... bench: 320,041 ns/iter (+/- 4,243) = 3276 MB/s +test misc::easy1_32 ... bench: 65 ns/iter (+/- 0) = 492 MB/s +test misc::easy1_32K ... bench: 5,885 ns/iter (+/- 83) = 5568 MB/s +test misc::hard_1K ... bench: 4,685 ns/iter (+/- 20) = 218 MB/s +test misc::hard_1MB ... bench: 4,745,020 ns/iter (+/- 19,440) = 220 MB/s +test misc::hard_32 ... bench: 197 ns/iter (+/- 1) = 162 MB/s +test misc::hard_32K ... bench: 147,409 ns/iter (+/- 656) = 222 MB/s +test misc::literal ... bench: 20 ns/iter (+/- 1) = 2550 MB/s +test misc::match_class ... bench: 86 ns/iter (+/- 3) = 941 MB/s +test misc::match_class_in_range ... bench: 32 ns/iter (+/- 2) = 2531 MB/s +test misc::match_class_unicode ... bench: 801 ns/iter (+/- 36) = 200 MB/s +test misc::medium_1K ... bench: 1,213 ns/iter (+/- 237) = 844 MB/s +test misc::medium_1MB ... bench: 1,991,418 ns/iter (+/- 239,612) = 526 MB/s +test misc::medium_32 ... bench: 100 ns/iter (+/- 8) = 320 MB/s +test misc::medium_32K ... bench: 57,080 ns/iter (+/- 709) = 574 MB/s +test misc::no_exponential ... bench: 522 ns/iter (+/- 17) = 191 MB/s +test misc::not_literal ... bench: 290 ns/iter (+/- 6) = 175 MB/s +test misc::one_pass_long_prefix ... bench: 176 ns/iter (+/- 15) = 147 MB/s +test misc::one_pass_long_prefix_not ... bench: 183 ns/iter (+/- 28) = 142 MB/s +test misc::one_pass_short ... bench: 136 ns/iter (+/- 8) = 125 MB/s +test misc::one_pass_short_not ... bench: 135 ns/iter (+/- 14) = 125 MB/s +test misc::replace_all ... bench: 149 ns/iter (+/- 34) +test rust_compile::compile_huge ... bench: 158,759 ns/iter (+/- 4,546) +test rust_compile::compile_huge_bytes ... bench: 17,538,290 ns/iter (+/- 1,735,383) +test rust_compile::compile_simple ... bench: 5,935 ns/iter (+/- 429) +test rust_compile::compile_simple_bytes ... bench: 6,682 ns/iter (+/- 293) +test rust_compile::compile_small ... bench: 7,664 ns/iter (+/- 473) +test rust_compile::compile_small_bytes ... bench: 175,272 ns/iter (+/- 4,492) +test rust_parse::parse_huge ... bench: 1,199 ns/iter (+/- 38) +test rust_parse::parse_simple ... bench: 1,849 ns/iter (+/- 28) +test rust_parse::parse_small ... bench: 2,470 ns/iter (+/- 35) +test sherlock::before_holmes ... bench: 2,750,028 ns/iter (+/- 21,847) = 216 MB/s +test sherlock::everything_greedy ... bench: 7,896,337 ns/iter (+/- 68,883) = 75 MB/s +test sherlock::everything_greedy_nl ... bench: 5,498,247 ns/iter (+/- 65,952) = 108 MB/s +test sherlock::holmes_cochar_watson ... bench: 260,499 ns/iter (+/- 4,984) = 2283 MB/s +test sherlock::holmes_coword_watson ... bench: 1,331,443 ns/iter (+/- 34,716) = 446 MB/s +test sherlock::letters ... bench: 60,985,848 ns/iter (+/- 592,838) = 9 MB/s +test sherlock::letters_lower ... bench: 59,041,695 ns/iter (+/- 186,034) = 10 MB/s +test sherlock::letters_upper ... bench: 4,714,214 ns/iter (+/- 35,672) = 126 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,730,524 ns/iter (+/- 69,565) = 217 MB/s +test sherlock::name_alt1 ... bench: 41,866 ns/iter (+/- 682) = 14210 MB/s +test sherlock::name_alt2 ... bench: 194,322 ns/iter (+/- 6,628) = 3061 MB/s +test sherlock::name_alt3 ... bench: 1,252,965 ns/iter (+/- 18,828) = 474 MB/s +test sherlock::name_alt3_nocase ... bench: 1,476,169 ns/iter (+/- 14,557) = 403 MB/s +test sherlock::name_alt4 ... bench: 298,639 ns/iter (+/- 3,905) = 1992 MB/s +test sherlock::name_alt4_nocase ... bench: 1,426,191 ns/iter (+/- 23,584) = 417 MB/s +test sherlock::name_holmes ... bench: 49,719 ns/iter (+/- 811) = 11965 MB/s +test sherlock::name_holmes_nocase ... bench: 1,191,400 ns/iter (+/- 19,175) = 499 MB/s +test sherlock::name_sherlock ... bench: 34,091 ns/iter (+/- 877) = 17451 MB/s +test sherlock::name_sherlock_holmes ... bench: 33,785 ns/iter (+/- 1,207) = 17609 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,235,442 ns/iter (+/- 18,023) = 481 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,236,252 ns/iter (+/- 26,934) = 481 MB/s +test sherlock::name_whitespace ... bench: 60,200 ns/iter (+/- 1,873) = 9882 MB/s +test sherlock::no_match_common ... bench: 559,886 ns/iter (+/- 20,306) = 1062 MB/s +test sherlock::no_match_uncommon ... bench: 23,631 ns/iter (+/- 497) = 25175 MB/s +test sherlock::quotes ... bench: 967,379 ns/iter (+/- 12,856) = 614 MB/s +test sherlock::the_lower ... bench: 766,950 ns/iter (+/- 21,944) = 775 MB/s +test sherlock::the_nocase ... bench: 1,706,539 ns/iter (+/- 26,003) = 348 MB/s +test sherlock::the_upper ... bench: 52,529 ns/iter (+/- 1,208) = 11325 MB/s +test sherlock::the_whitespace ... bench: 2,012,952 ns/iter (+/- 26,968) = 295 MB/s +test sherlock::word_ending_n ... bench: 55,578,841 ns/iter (+/- 537,463) = 10 MB/s +test sherlock::words ... bench: 19,103,327 ns/iter (+/- 102,828) = 31 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 71 measured + diff --git a/vendor/regex/record/old-bench-log/03-bytes/onig b/vendor/regex/record/old-bench-log/03-bytes/onig new file mode 100644 index 0000000..aaf666b --- /dev/null +++ b/vendor/regex/record/old-bench-log/03-bytes/onig @@ -0,0 +1,68 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/benches) + Running benches/target/release/onig-e3bc363aa56fb408 + +running 61 tests +test misc::anchored_literal_long_match ... bench: 70 ns/iter (+/- 1) = 5571 MB/s +test misc::anchored_literal_long_non_match ... bench: 424 ns/iter (+/- 4) = 919 MB/s +test misc::anchored_literal_short_match ... bench: 70 ns/iter (+/- 1) = 371 MB/s +test misc::anchored_literal_short_non_match ... bench: 38 ns/iter (+/- 0) = 684 MB/s +test misc::easy0_1K ... bench: 176 ns/iter (+/- 2) = 5818 MB/s +test misc::easy0_1MB ... bench: 163,547 ns/iter (+/- 1,451) = 6411 MB/s +test misc::easy0_32 ... bench: 20 ns/iter (+/- 1) = 1600 MB/s +test misc::easy0_32K ... bench: 5,056 ns/iter (+/- 64) = 6481 MB/s +test misc::easy1_1K ... bench: 4,103 ns/iter (+/- 11) = 249 MB/s +test misc::easy1_1MB ... bench: 4,198,406 ns/iter (+/- 62,171) = 249 MB/s +test misc::easy1_32 ... bench: 139 ns/iter (+/- 1) = 230 MB/s +test misc::easy1_32K ... bench: 131,083 ns/iter (+/- 1,310) = 249 MB/s +test misc::hard_1K ... bench: 163 ns/iter (+/- 3) = 6282 MB/s +test misc::hard_1MB ... bench: 163,910 ns/iter (+/- 2,368) = 6397 MB/s +test misc::hard_32 ... bench: 20 ns/iter (+/- 1) = 1600 MB/s +test misc::hard_32K ... bench: 5,002 ns/iter (+/- 306) = 6550 MB/s +test misc::literal ... bench: 226 ns/iter (+/- 0) = 225 MB/s +test misc::match_class ... bench: 337 ns/iter (+/- 2) = 240 MB/s +test misc::match_class_in_range ... bench: 337 ns/iter (+/- 1) = 240 MB/s +test misc::match_class_unicode ... bench: 2,004 ns/iter (+/- 26) = 80 MB/s +test misc::medium_1K ... bench: 191 ns/iter (+/- 2) = 5361 MB/s +test misc::medium_1MB ... bench: 164,027 ns/iter (+/- 2,494) = 6392 MB/s +test misc::medium_32 ... bench: 22 ns/iter (+/- 1) = 1454 MB/s +test misc::medium_32K ... bench: 4,962 ns/iter (+/- 60) = 6603 MB/s +test misc::not_literal ... bench: 359 ns/iter (+/- 5) = 142 MB/s +test misc::one_pass_long_prefix ... bench: 94 ns/iter (+/- 3) = 276 MB/s +test misc::one_pass_long_prefix_not ... bench: 101 ns/iter (+/- 1) = 257 MB/s +test misc::one_pass_short ... bench: 332 ns/iter (+/- 6) = 51 MB/s +test misc::one_pass_short_not ... bench: 318 ns/iter (+/- 4) = 53 MB/s +test sherlock::before_holmes ... bench: 70,859,542 ns/iter (+/- 594,306) = 8 MB/s +test sherlock::everything_greedy ... bench: 5,129,894 ns/iter (+/- 33,792) = 115 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,388,047 ns/iter (+/- 19,666) = 249 MB/s +test sherlock::ing_suffix ... bench: 28,413,935 ns/iter (+/- 800,513) = 20 MB/s +test sherlock::ing_suffix_limited_space ... bench: 2,636,327 ns/iter (+/- 66,410) = 225 MB/s +test sherlock::letters ... bench: 26,471,724 ns/iter (+/- 872,994) = 22 MB/s +test sherlock::letters_lower ... bench: 26,124,489 ns/iter (+/- 556,750) = 22 MB/s +test sherlock::letters_upper ... bench: 11,268,144 ns/iter (+/- 338,510) = 52 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 195,797 ns/iter (+/- 1,621) = 3038 MB/s +test sherlock::name_alt1 ... bench: 2,100,763 ns/iter (+/- 16,823) = 283 MB/s +test sherlock::name_alt2 ... bench: 2,212,816 ns/iter (+/- 17,997) = 268 MB/s +test sherlock::name_alt3 ... bench: 3,031,567 ns/iter (+/- 35,631) = 196 MB/s +test sherlock::name_alt3_nocase ... bench: 39,737,911 ns/iter (+/- 166,863) = 14 MB/s +test sherlock::name_alt4 ... bench: 2,230,681 ns/iter (+/- 18,856) = 266 MB/s +test sherlock::name_alt4_nocase ... bench: 8,294,698 ns/iter (+/- 36,887) = 71 MB/s +test sherlock::name_holmes ... bench: 402,600 ns/iter (+/- 6,232) = 1477 MB/s +test sherlock::name_holmes_nocase ... bench: 4,074,155 ns/iter (+/- 23,317) = 146 MB/s +test sherlock::name_sherlock ... bench: 270,225 ns/iter (+/- 2,815) = 2201 MB/s +test sherlock::name_sherlock_holmes ... bench: 196,502 ns/iter (+/- 2,168) = 3027 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 4,397,347 ns/iter (+/- 28,567) = 135 MB/s +test sherlock::name_sherlock_nocase ... bench: 4,400,574 ns/iter (+/- 25,127) = 135 MB/s +test sherlock::name_whitespace ... bench: 274,462 ns/iter (+/- 3,180) = 2167 MB/s +test sherlock::no_match_common ... bench: 596,601 ns/iter (+/- 9,285) = 997 MB/s +test sherlock::no_match_uncommon ... bench: 586,258 ns/iter (+/- 7,702) = 1014 MB/s +test sherlock::quotes ... bench: 4,069,570 ns/iter (+/- 20,372) = 146 MB/s +test sherlock::repeated_class_negation ... bench: 44,936,445 ns/iter (+/- 103,467) = 13 MB/s +test sherlock::the_lower ... bench: 1,300,513 ns/iter (+/- 12,884) = 457 MB/s +test sherlock::the_nocase ... bench: 5,141,237 ns/iter (+/- 25,487) = 115 MB/s +test sherlock::the_upper ... bench: 821,454 ns/iter (+/- 13,420) = 724 MB/s +test sherlock::the_whitespace ... bench: 2,009,530 ns/iter (+/- 14,082) = 296 MB/s +test sherlock::word_ending_n ... bench: 27,847,316 ns/iter (+/- 47,618) = 21 MB/s +test sherlock::words ... bench: 21,105,627 ns/iter (+/- 33,436) = 28 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 61 measured + diff --git a/vendor/regex/record/old-bench-log/03-bytes/pcre b/vendor/regex/record/old-bench-log/03-bytes/pcre new file mode 100644 index 0000000..236613a --- /dev/null +++ b/vendor/regex/record/old-bench-log/03-bytes/pcre @@ -0,0 +1,66 @@ + Running benches/target/release/pcre-855c18fb35cdf072 + +running 60 tests +test misc::anchored_literal_long_match ... bench: 88 ns/iter (+/- 12) = 4431 MB/s +test misc::anchored_literal_long_non_match ... bench: 58 ns/iter (+/- 1) = 6724 MB/s +test misc::anchored_literal_short_match ... bench: 88 ns/iter (+/- 1) = 295 MB/s +test misc::anchored_literal_short_non_match ... bench: 60 ns/iter (+/- 3) = 433 MB/s +test misc::easy0_1K ... bench: 266 ns/iter (+/- 1) = 3849 MB/s +test misc::easy0_1MB ... bench: 227,366 ns/iter (+/- 794) = 4611 MB/s +test misc::easy0_32 ... bench: 62 ns/iter (+/- 2) = 516 MB/s +test misc::easy0_32K ... bench: 7,061 ns/iter (+/- 109) = 4640 MB/s +test misc::easy1_1K ... bench: 805 ns/iter (+/- 10) = 1272 MB/s +test misc::easy1_1MB ... bench: 751,948 ns/iter (+/- 6,995) = 1394 MB/s +test misc::easy1_32 ... bench: 71 ns/iter (+/- 1) = 450 MB/s +test misc::easy1_32K ... bench: 23,635 ns/iter (+/- 213) = 1386 MB/s +test misc::hard_1K ... bench: 31,008 ns/iter (+/- 299) = 33 MB/s +test misc::hard_1MB ... bench: 35,078,241 ns/iter (+/- 94,197) = 29 MB/s +test misc::hard_32 ... bench: 313 ns/iter (+/- 1) = 102 MB/s +test misc::hard_32K ... bench: 995,958 ns/iter (+/- 10,945) = 32 MB/s +test misc::literal ... bench: 130 ns/iter (+/- 1) = 392 MB/s +test misc::match_class ... bench: 176 ns/iter (+/- 2) = 460 MB/s +test misc::match_class_in_range ... bench: 178 ns/iter (+/- 1) = 455 MB/s +test misc::match_class_unicode ... bench: 511 ns/iter (+/- 6) = 315 MB/s +test misc::medium_1K ... bench: 275 ns/iter (+/- 4) = 3723 MB/s +test misc::medium_1MB ... bench: 239,603 ns/iter (+/- 1,808) = 4376 MB/s +test misc::medium_32 ... bench: 62 ns/iter (+/- 1) = 516 MB/s +test misc::medium_32K ... bench: 7,385 ns/iter (+/- 43) = 4437 MB/s +test misc::not_literal ... bench: 274 ns/iter (+/- 3) = 186 MB/s +test misc::one_pass_long_prefix ... bench: 87 ns/iter (+/- 1) = 298 MB/s +test misc::one_pass_long_prefix_not ... bench: 88 ns/iter (+/- 0) = 295 MB/s +test misc::one_pass_short ... bench: 115 ns/iter (+/- 0) = 147 MB/s +test misc::one_pass_short_not ... bench: 118 ns/iter (+/- 0) = 144 MB/s +test sherlock::before_holmes ... bench: 14,338,348 ns/iter (+/- 23,734) = 41 MB/s +test sherlock::holmes_cochar_watson ... bench: 547,196 ns/iter (+/- 4,100) = 1087 MB/s +test sherlock::ing_suffix ... bench: 6,012,620 ns/iter (+/- 51,777) = 98 MB/s +test sherlock::ing_suffix_limited_space ... bench: 6,374,577 ns/iter (+/- 46,486) = 93 MB/s +test sherlock::letters ... bench: 28,575,184 ns/iter (+/- 65,051) = 20 MB/s +test sherlock::letters_lower ... bench: 25,819,606 ns/iter (+/- 180,823) = 23 MB/s +test sherlock::letters_upper ... bench: 3,227,381 ns/iter (+/- 11,443) = 184 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 193,512 ns/iter (+/- 1,316) = 3074 MB/s +test sherlock::name_alt1 ... bench: 454,510 ns/iter (+/- 2,721) = 1308 MB/s +test sherlock::name_alt2 ... bench: 499,453 ns/iter (+/- 4,692) = 1191 MB/s +test sherlock::name_alt3 ... bench: 1,085,732 ns/iter (+/- 6,841) = 547 MB/s +test sherlock::name_alt3_nocase ... bench: 3,194,995 ns/iter (+/- 12,655) = 186 MB/s +test sherlock::name_alt4 ... bench: 944,353 ns/iter (+/- 12,661) = 629 MB/s +test sherlock::name_alt4_nocase ... bench: 1,646,368 ns/iter (+/- 12,376) = 361 MB/s +test sherlock::name_holmes ... bench: 395,019 ns/iter (+/- 3,929) = 1506 MB/s +test sherlock::name_holmes_nocase ... bench: 493,327 ns/iter (+/- 7,213) = 1205 MB/s +test sherlock::name_sherlock ... bench: 266,400 ns/iter (+/- 1,591) = 2233 MB/s +test sherlock::name_sherlock_holmes ... bench: 196,357 ns/iter (+/- 1,770) = 3029 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,259,747 ns/iter (+/- 4,939) = 472 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,128,970 ns/iter (+/- 6,730) = 526 MB/s +test sherlock::name_whitespace ... bench: 267,323 ns/iter (+/- 1,296) = 2225 MB/s +test sherlock::no_match_common ... bench: 595,372 ns/iter (+/- 5,690) = 999 MB/s +test sherlock::no_match_uncommon ... bench: 585,406 ns/iter (+/- 5,719) = 1016 MB/s +test sherlock::quotes ... bench: 1,223,528 ns/iter (+/- 6,579) = 486 MB/s +test sherlock::repeated_class_negation ... bench: 6,440,584 ns/iter (+/- 20,444) = 92 MB/s +test sherlock::the_lower ... bench: 1,220,999 ns/iter (+/- 7,595) = 487 MB/s +test sherlock::the_nocase ... bench: 1,263,078 ns/iter (+/- 15,321) = 471 MB/s +test sherlock::the_upper ... bench: 781,141 ns/iter (+/- 15,408) = 761 MB/s +test sherlock::the_whitespace ... bench: 1,383,414 ns/iter (+/- 548,289) = 430 MB/s +test sherlock::word_ending_n ... bench: 12,709,045 ns/iter (+/- 51,420) = 46 MB/s +test sherlock::words ... bench: 10,798,918 ns/iter (+/- 40,027) = 55 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 60 measured + diff --git a/vendor/regex/record/old-bench-log/03-bytes/rust b/vendor/regex/record/old-bench-log/03-bytes/rust new file mode 100644 index 0000000..6dec097 --- /dev/null +++ b/vendor/regex/record/old-bench-log/03-bytes/rust @@ -0,0 +1,83 @@ + Compiling regex-syntax v0.2.5 (file:///home/andrew/data/projects/rust/regex/benches) + Compiling regex v0.1.55 (file:///home/andrew/data/projects/rust/regex/benches) + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/benches) + Running benches/target/release/rust-50db306d093e5666 + +running 74 tests +test misc::anchored_literal_long_match ... bench: 75 ns/iter (+/- 5) = 5200 MB/s +test misc::anchored_literal_long_non_match ... bench: 56 ns/iter (+/- 0) = 6964 MB/s +test misc::anchored_literal_short_match ... bench: 79 ns/iter (+/- 0) = 329 MB/s +test misc::anchored_literal_short_non_match ... bench: 56 ns/iter (+/- 1) = 464 MB/s +test misc::easy0_1K ... bench: 138 ns/iter (+/- 0) = 7420 MB/s +test misc::easy0_1MB ... bench: 247,159 ns/iter (+/- 724) = 4242 MB/s +test misc::easy0_32 ... bench: 71 ns/iter (+/- 0) = 450 MB/s +test misc::easy0_32K ... bench: 5,474 ns/iter (+/- 34) = 5986 MB/s +test misc::easy1_1K ... bench: 273 ns/iter (+/- 1) = 3750 MB/s +test misc::easy1_1MB ... bench: 317,946 ns/iter (+/- 2,512) = 3297 MB/s +test misc::easy1_32 ... bench: 67 ns/iter (+/- 0) = 477 MB/s +test misc::easy1_32K ... bench: 5,882 ns/iter (+/- 32) = 5570 MB/s +test misc::hard_1K ... bench: 4,713 ns/iter (+/- 13) = 217 MB/s +test misc::hard_1MB ... bench: 4,732,901 ns/iter (+/- 6,948) = 221 MB/s +test misc::hard_32 ... bench: 201 ns/iter (+/- 0) = 159 MB/s +test misc::hard_32K ... bench: 147,994 ns/iter (+/- 900) = 221 MB/s +test misc::literal ... bench: 19 ns/iter (+/- 0) = 2684 MB/s +test misc::match_class ... bench: 85 ns/iter (+/- 0) = 952 MB/s +test misc::match_class_in_range ... bench: 30 ns/iter (+/- 1) = 2700 MB/s +test misc::match_class_unicode ... bench: 806 ns/iter (+/- 2) = 199 MB/s +test misc::medium_1K ... bench: 1,384 ns/iter (+/- 10) = 739 MB/s +test misc::medium_1MB ... bench: 1,974,381 ns/iter (+/- 7,383) = 531 MB/s +test misc::medium_32 ... bench: 130 ns/iter (+/- 0) = 246 MB/s +test misc::medium_32K ... bench: 52,783 ns/iter (+/- 465) = 620 MB/s +test misc::no_exponential ... bench: 536 ns/iter (+/- 13) = 186 MB/s +test misc::not_literal ... bench: 293 ns/iter (+/- 1) = 174 MB/s +test misc::one_pass_long_prefix ... bench: 179 ns/iter (+/- 1) = 145 MB/s +test misc::one_pass_long_prefix_not ... bench: 180 ns/iter (+/- 2) = 144 MB/s +test misc::one_pass_short ... bench: 139 ns/iter (+/- 1) = 122 MB/s +test misc::one_pass_short_not ... bench: 142 ns/iter (+/- 1) = 119 MB/s +test misc::replace_all ... bench: 171 ns/iter (+/- 1) +test rust_compile::compile_huge ... bench: 126,158 ns/iter (+/- 1,790) +test rust_compile::compile_huge_bytes ... bench: 18,088,719 ns/iter (+/- 518,980) +test rust_compile::compile_simple ... bench: 6,141 ns/iter (+/- 394) +test rust_compile::compile_simple_bytes ... bench: 6,669 ns/iter (+/- 306) +test rust_compile::compile_small ... bench: 7,431 ns/iter (+/- 275) +test rust_compile::compile_small_bytes ... bench: 191,002 ns/iter (+/- 1,297) +test rust_parse::parse_huge ... bench: 1,204 ns/iter (+/- 9) +test rust_parse::parse_simple ... bench: 1,905 ns/iter (+/- 16) +test rust_parse::parse_small ... bench: 2,454 ns/iter (+/- 24) +test sherlock::before_holmes ... bench: 2,748,082 ns/iter (+/- 11,406) = 216 MB/s +test sherlock::everything_greedy ... bench: 7,833,414 ns/iter (+/- 42,538) = 75 MB/s +test sherlock::everything_greedy_nl ... bench: 5,426,141 ns/iter (+/- 31,378) = 109 MB/s +test sherlock::holmes_cochar_watson ... bench: 262,322 ns/iter (+/- 5,243) = 2267 MB/s +test sherlock::holmes_coword_watson ... bench: 1,324,677 ns/iter (+/- 21,666) = 449 MB/s +test sherlock::ing_suffix ... bench: 3,179,928 ns/iter (+/- 40,246) = 187 MB/s +test sherlock::ing_suffix_limited_space ... bench: 3,525,004 ns/iter (+/- 37,262) = 168 MB/s +test sherlock::letters ... bench: 60,268,445 ns/iter (+/- 1,958,610) = 9 MB/s +test sherlock::letters_lower ... bench: 57,743,679 ns/iter (+/- 84,675) = 10 MB/s +test sherlock::letters_upper ... bench: 4,549,709 ns/iter (+/- 9,312) = 130 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,690,794 ns/iter (+/- 2,796) = 221 MB/s +test sherlock::name_alt1 ... bench: 42,476 ns/iter (+/- 346) = 14006 MB/s +test sherlock::name_alt2 ... bench: 199,058 ns/iter (+/- 1,498) = 2988 MB/s +test sherlock::name_alt3 ... bench: 1,248,439 ns/iter (+/- 3,051) = 476 MB/s +test sherlock::name_alt3_nocase ... bench: 1,463,628 ns/iter (+/- 2,799) = 406 MB/s +test sherlock::name_alt4 ... bench: 296,390 ns/iter (+/- 798) = 2007 MB/s +test sherlock::name_alt4_nocase ... bench: 1,415,770 ns/iter (+/- 3,400) = 420 MB/s +test sherlock::name_holmes ... bench: 49,713 ns/iter (+/- 317) = 11967 MB/s +test sherlock::name_holmes_nocase ... bench: 1,181,147 ns/iter (+/- 2,842) = 503 MB/s +test sherlock::name_sherlock ... bench: 34,263 ns/iter (+/- 136) = 17363 MB/s +test sherlock::name_sherlock_holmes ... bench: 34,179 ns/iter (+/- 188) = 17406 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,236,384 ns/iter (+/- 5,012) = 481 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,232,613 ns/iter (+/- 5,009) = 482 MB/s +test sherlock::name_whitespace ... bench: 60,024 ns/iter (+/- 187) = 9911 MB/s +test sherlock::no_match_common ... bench: 558,607 ns/iter (+/- 2,595) = 1065 MB/s +test sherlock::no_match_uncommon ... bench: 24,049 ns/iter (+/- 54) = 24738 MB/s +test sherlock::quotes ... bench: 966,792 ns/iter (+/- 2,982) = 615 MB/s +test sherlock::repeated_class_negation ... bench: 84,186,484 ns/iter (+/- 66,800) = 7 MB/s +test sherlock::the_lower ... bench: 773,759 ns/iter (+/- 2,759) = 768 MB/s +test sherlock::the_nocase ... bench: 1,705,648 ns/iter (+/- 4,604) = 348 MB/s +test sherlock::the_upper ... bench: 52,729 ns/iter (+/- 209) = 11282 MB/s +test sherlock::the_whitespace ... bench: 1,981,215 ns/iter (+/- 8,080) = 300 MB/s +test sherlock::word_ending_n ... bench: 53,482,650 ns/iter (+/- 73,844) = 11 MB/s +test sherlock::words ... bench: 18,961,987 ns/iter (+/- 27,794) = 31 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 74 measured + diff --git a/vendor/regex/record/old-bench-log/03-bytes/rust-bytes b/vendor/regex/record/old-bench-log/03-bytes/rust-bytes new file mode 100644 index 0000000..735d259 --- /dev/null +++ b/vendor/regex/record/old-bench-log/03-bytes/rust-bytes @@ -0,0 +1,66 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/benches) + Running benches/target/release/rust_bytes-9f3b188bc741e04b + +running 59 tests +test misc::anchored_literal_long_match ... bench: 75 ns/iter (+/- 6) = 5200 MB/s +test misc::anchored_literal_long_non_match ... bench: 55 ns/iter (+/- 0) = 7090 MB/s +test misc::anchored_literal_short_match ... bench: 75 ns/iter (+/- 0) = 346 MB/s +test misc::anchored_literal_short_non_match ... bench: 55 ns/iter (+/- 0) = 472 MB/s +test misc::easy0_1K ... bench: 245 ns/iter (+/- 0) = 4179 MB/s +test misc::easy0_1MB ... bench: 251,614 ns/iter (+/- 1,143) = 4167 MB/s +test misc::easy0_32 ... bench: 62 ns/iter (+/- 1) = 516 MB/s +test misc::easy0_32K ... bench: 5,281 ns/iter (+/- 66) = 6204 MB/s +test misc::easy1_1K ... bench: 266 ns/iter (+/- 1) = 3849 MB/s +test misc::easy1_1MB ... bench: 325,060 ns/iter (+/- 2,011) = 3225 MB/s +test misc::easy1_32 ... bench: 73 ns/iter (+/- 0) = 438 MB/s +test misc::easy1_32K ... bench: 5,609 ns/iter (+/- 41) = 5842 MB/s +test misc::hard_1K ... bench: 4,678 ns/iter (+/- 38) = 218 MB/s +test misc::hard_1MB ... bench: 4,736,631 ns/iter (+/- 26,227) = 221 MB/s +test misc::hard_32 ... bench: 199 ns/iter (+/- 0) = 160 MB/s +test misc::hard_32K ... bench: 148,282 ns/iter (+/- 1,353) = 220 MB/s +test misc::literal ... bench: 18 ns/iter (+/- 0) = 2833 MB/s +test misc::match_class ... bench: 83 ns/iter (+/- 0) = 975 MB/s +test misc::match_class_in_range ... bench: 30 ns/iter (+/- 0) = 2700 MB/s +test misc::medium_1K ... bench: 1,147 ns/iter (+/- 10) = 892 MB/s +test misc::medium_1MB ... bench: 1,953,230 ns/iter (+/- 10,530) = 536 MB/s +test misc::medium_32 ... bench: 99 ns/iter (+/- 0) = 323 MB/s +test misc::medium_32K ... bench: 54,705 ns/iter (+/- 349) = 598 MB/s +test misc::no_exponential ... bench: 534 ns/iter (+/- 4) = 187 MB/s +test misc::not_literal ... bench: 292 ns/iter (+/- 3) = 174 MB/s +test misc::one_pass_long_prefix ... bench: 179 ns/iter (+/- 1) = 145 MB/s +test misc::one_pass_long_prefix_not ... bench: 180 ns/iter (+/- 2) = 144 MB/s +test misc::one_pass_short ... bench: 139 ns/iter (+/- 0) = 122 MB/s +test misc::one_pass_short_not ... bench: 139 ns/iter (+/- 0) = 122 MB/s +test sherlock::before_holmes ... bench: 2,778,686 ns/iter (+/- 8,735) = 214 MB/s +test sherlock::everything_greedy ... bench: 7,884,691 ns/iter (+/- 37,268) = 75 MB/s +test sherlock::everything_greedy_nl ... bench: 5,406,627 ns/iter (+/- 24,707) = 110 MB/s +test sherlock::holmes_cochar_watson ... bench: 262,175 ns/iter (+/- 1,995) = 2269 MB/s +test sherlock::holmes_coword_watson ... bench: 1,299,904 ns/iter (+/- 5,090) = 457 MB/s +test sherlock::ing_suffix ... bench: 3,202,899 ns/iter (+/- 20,810) = 185 MB/s +test sherlock::ing_suffix_limited_space ... bench: 3,367,381 ns/iter (+/- 14,143) = 176 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,725,593 ns/iter (+/- 10,736) = 218 MB/s +test sherlock::name_alt1 ... bench: 42,161 ns/iter (+/- 355) = 14110 MB/s +test sherlock::name_alt2 ... bench: 195,390 ns/iter (+/- 1,112) = 3044 MB/s +test sherlock::name_alt3 ... bench: 1,248,432 ns/iter (+/- 3,244) = 476 MB/s +test sherlock::name_alt3_nocase ... bench: 3,371,906 ns/iter (+/- 42,421) = 176 MB/s +test sherlock::name_alt4 ... bench: 296,423 ns/iter (+/- 1,812) = 2007 MB/s +test sherlock::name_alt4_nocase ... bench: 1,753,178 ns/iter (+/- 23,269) = 339 MB/s +test sherlock::name_holmes ... bench: 49,554 ns/iter (+/- 261) = 12005 MB/s +test sherlock::name_holmes_nocase ... bench: 1,347,682 ns/iter (+/- 5,678) = 441 MB/s +test sherlock::name_sherlock ... bench: 33,937 ns/iter (+/- 208) = 17530 MB/s +test sherlock::name_sherlock_holmes ... bench: 33,870 ns/iter (+/- 225) = 17565 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,212,233 ns/iter (+/- 5,452) = 490 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,190,590 ns/iter (+/- 3,248) = 499 MB/s +test sherlock::name_whitespace ... bench: 59,434 ns/iter (+/- 253) = 10009 MB/s +test sherlock::no_match_common ... bench: 565,962 ns/iter (+/- 4,601) = 1051 MB/s +test sherlock::no_match_uncommon ... bench: 23,729 ns/iter (+/- 218) = 25071 MB/s +test sherlock::quotes ... bench: 966,904 ns/iter (+/- 7,115) = 615 MB/s +test sherlock::repeated_class_negation ... bench: 121,271,073 ns/iter (+/- 242,789) = 4 MB/s +test sherlock::the_lower ... bench: 778,850 ns/iter (+/- 6,781) = 763 MB/s +test sherlock::the_nocase ... bench: 2,876,190 ns/iter (+/- 8,611) = 206 MB/s +test sherlock::the_upper ... bench: 52,617 ns/iter (+/- 315) = 11306 MB/s +test sherlock::the_whitespace ... bench: 1,982,270 ns/iter (+/- 11,079) = 300 MB/s +test sherlock::word_ending_n ... bench: 76,442,330 ns/iter (+/- 236,690) = 7 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 59 measured + diff --git a/vendor/regex/record/old-bench-log/04/onig b/vendor/regex/record/old-bench-log/04/onig new file mode 100644 index 0000000..81b4098 --- /dev/null +++ b/vendor/regex/record/old-bench-log/04/onig @@ -0,0 +1,78 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 71 tests +test misc::anchored_literal_long_match ... bench: 66 ns/iter (+/- 1) = 5909 MB/s +test misc::anchored_literal_long_non_match ... bench: 414 ns/iter (+/- 2) = 942 MB/s +test misc::anchored_literal_short_match ... bench: 66 ns/iter (+/- 1) = 393 MB/s +test misc::anchored_literal_short_non_match ... bench: 36 ns/iter (+/- 0) = 722 MB/s +test misc::easy0_1K ... bench: 217 ns/iter (+/- 2) = 4843 MB/s +test misc::easy0_1MB ... bench: 130,657 ns/iter (+/- 365) = 8025 MB/s +test misc::easy0_32 ... bench: 84 ns/iter (+/- 1) = 702 MB/s +test misc::easy0_32K ... bench: 4,092 ns/iter (+/- 25) = 8014 MB/s +test misc::easy1_1K ... bench: 3,682 ns/iter (+/- 25) = 283 MB/s +test misc::easy1_1MB ... bench: 3,613,381 ns/iter (+/- 5,960) = 290 MB/s +test misc::easy1_32 ... bench: 237 ns/iter (+/- 2) = 219 MB/s +test misc::easy1_32K ... bench: 113,040 ns/iter (+/- 303) = 290 MB/s +test misc::hard_1K ... bench: 184,299 ns/iter (+/- 2,508) = 5 MB/s +test misc::hard_1MB ... bench: 198,378,531 ns/iter (+/- 150,404) = 5 MB/s +test misc::hard_32 ... bench: 5,765 ns/iter (+/- 26) = 10 MB/s +test misc::hard_32K ... bench: 6,177,362 ns/iter (+/- 21,959) = 5 MB/s +test misc::literal ... bench: 219 ns/iter (+/- 1) = 232 MB/s +test misc::long_needle1 ... bench: 6,978,321 ns/iter (+/- 120,792) = 14 MB/s +test misc::long_needle2 ... bench: 6,981,122 ns/iter (+/- 120,371) = 14 MB/s +test misc::match_class ... bench: 329 ns/iter (+/- 5) = 246 MB/s +test misc::match_class_in_range ... bench: 332 ns/iter (+/- 1) = 243 MB/s +test misc::match_class_unicode ... bench: 1,980 ns/iter (+/- 23) = 81 MB/s +test misc::medium_1K ... bench: 232 ns/iter (+/- 0) = 4534 MB/s +test misc::medium_1MB ... bench: 130,702 ns/iter (+/- 997) = 8022 MB/s +test misc::medium_32 ... bench: 95 ns/iter (+/- 1) = 631 MB/s +test misc::medium_32K ... bench: 4,103 ns/iter (+/- 13) = 7993 MB/s +test misc::not_literal ... bench: 353 ns/iter (+/- 2) = 144 MB/s +test misc::one_pass_long_prefix ... bench: 89 ns/iter (+/- 1) = 292 MB/s +test misc::one_pass_long_prefix_not ... bench: 97 ns/iter (+/- 0) = 268 MB/s +test misc::one_pass_short ... bench: 329 ns/iter (+/- 4) = 51 MB/s +test misc::one_pass_short_not ... bench: 324 ns/iter (+/- 4) = 52 MB/s +test misc::reallyhard2_1K ... bench: 563,552 ns/iter (+/- 2,559) = 1 MB/s +test misc::reallyhard_1K ... bench: 184,200 ns/iter (+/- 553) = 5 MB/s +test misc::reallyhard_1MB ... bench: 198,336,145 ns/iter (+/- 149,796) = 5 MB/s +test misc::reallyhard_32 ... bench: 5,766 ns/iter (+/- 16) = 10 MB/s +test misc::reallyhard_32K ... bench: 6,174,904 ns/iter (+/- 5,491) = 5 MB/s +test sherlock::before_holmes ... bench: 70,476,093 ns/iter (+/- 271,168) = 8 MB/s +test sherlock::everything_greedy ... bench: 5,175,140 ns/iter (+/- 19,413) = 114 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,379,427 ns/iter (+/- 5,816) = 250 MB/s +test sherlock::ing_suffix ... bench: 28,275,131 ns/iter (+/- 49,569) = 21 MB/s +test sherlock::ing_suffix_limited_space ... bench: 2,648,838 ns/iter (+/- 9,247) = 224 MB/s +test sherlock::letters ... bench: 25,940,039 ns/iter (+/- 57,724) = 22 MB/s +test sherlock::letters_lower ... bench: 25,680,050 ns/iter (+/- 48,209) = 23 MB/s +test sherlock::letters_upper ... bench: 11,122,063 ns/iter (+/- 28,302) = 53 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 195,903 ns/iter (+/- 1,162) = 3036 MB/s +test sherlock::name_alt1 ... bench: 2,100,175 ns/iter (+/- 4,251) = 283 MB/s +test sherlock::name_alt2 ... bench: 2,210,122 ns/iter (+/- 7,514) = 269 MB/s +test sherlock::name_alt3 ... bench: 3,025,653 ns/iter (+/- 9,375) = 196 MB/s +test sherlock::name_alt3_nocase ... bench: 39,475,102 ns/iter (+/- 51,488) = 15 MB/s +test sherlock::name_alt4 ... bench: 2,225,952 ns/iter (+/- 7,340) = 267 MB/s +test sherlock::name_alt4_nocase ... bench: 8,227,413 ns/iter (+/- 18,088) = 72 MB/s +test sherlock::name_alt5 ... bench: 2,300,803 ns/iter (+/- 6,325) = 258 MB/s +test sherlock::name_alt5_nocase ... bench: 11,488,783 ns/iter (+/- 28,880) = 51 MB/s +test sherlock::name_holmes ... bench: 400,760 ns/iter (+/- 907) = 1484 MB/s +test sherlock::name_holmes_nocase ... bench: 4,044,850 ns/iter (+/- 11,665) = 147 MB/s +test sherlock::name_sherlock ... bench: 269,021 ns/iter (+/- 791) = 2211 MB/s +test sherlock::name_sherlock_holmes ... bench: 196,161 ns/iter (+/- 899) = 3032 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 4,363,621 ns/iter (+/- 5,339) = 136 MB/s +test sherlock::name_sherlock_nocase ... bench: 4,389,375 ns/iter (+/- 11,077) = 135 MB/s +test sherlock::name_whitespace ... bench: 273,691 ns/iter (+/- 957) = 2173 MB/s +test sherlock::no_match_common ... bench: 588,744 ns/iter (+/- 1,732) = 1010 MB/s +test sherlock::no_match_really_common ... bench: 673,335 ns/iter (+/- 1,407) = 883 MB/s +test sherlock::no_match_uncommon ... bench: 578,009 ns/iter (+/- 5,111) = 1029 MB/s +test sherlock::quotes ... bench: 4,066,005 ns/iter (+/- 10,116) = 146 MB/s +test sherlock::repeated_class_negation ... bench: 43,374,733 ns/iter (+/- 48,409) = 13 MB/s +test sherlock::the_lower ... bench: 1,275,300 ns/iter (+/- 5,351) = 466 MB/s +test sherlock::the_nocase ... bench: 5,100,832 ns/iter (+/- 11,024) = 116 MB/s +test sherlock::the_upper ... bench: 816,606 ns/iter (+/- 3,370) = 728 MB/s +test sherlock::the_whitespace ... bench: 2,079,544 ns/iter (+/- 4,585) = 286 MB/s +test sherlock::word_ending_n ... bench: 27,699,175 ns/iter (+/- 58,998) = 21 MB/s +test sherlock::words ... bench: 19,460,356 ns/iter (+/- 29,406) = 30 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 71 measured + diff --git a/vendor/regex/record/old-bench-log/04/pcre1-jit b/vendor/regex/record/old-bench-log/04/pcre1-jit new file mode 100644 index 0000000..2118d1f --- /dev/null +++ b/vendor/regex/record/old-bench-log/04/pcre1-jit @@ -0,0 +1,77 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 70 tests +test misc::anchored_literal_long_match ... bench: 32 ns/iter (+/- 0) = 12187 MB/s +test misc::anchored_literal_long_non_match ... bench: 27 ns/iter (+/- 1) = 14444 MB/s +test misc::anchored_literal_short_match ... bench: 31 ns/iter (+/- 0) = 838 MB/s +test misc::anchored_literal_short_non_match ... bench: 27 ns/iter (+/- 2) = 962 MB/s +test misc::easy0_1K ... bench: 247 ns/iter (+/- 1) = 4255 MB/s +test misc::easy0_1MB ... bench: 193,485 ns/iter (+/- 906) = 5419 MB/s +test misc::easy0_32 ... bench: 55 ns/iter (+/- 1) = 1072 MB/s +test misc::easy0_32K ... bench: 6,057 ns/iter (+/- 19) = 5414 MB/s +test misc::easy1_1K ... bench: 604 ns/iter (+/- 3) = 1728 MB/s +test misc::easy1_1MB ... bench: 553,893 ns/iter (+/- 1,299) = 1893 MB/s +test misc::easy1_32 ... bench: 81 ns/iter (+/- 1) = 641 MB/s +test misc::easy1_32K ... bench: 17,335 ns/iter (+/- 33) = 1891 MB/s +test misc::hard_1K ... bench: 56,956 ns/iter (+/- 148) = 18 MB/s +test misc::hard_1MB ... bench: 63,576,485 ns/iter (+/- 93,278) = 16 MB/s +test misc::hard_32 ... bench: 1,744 ns/iter (+/- 10) = 33 MB/s +test misc::hard_32K ... bench: 1,931,799 ns/iter (+/- 7,752) = 16 MB/s +test misc::literal ... bench: 73 ns/iter (+/- 1) = 698 MB/s +test misc::long_needle1 ... bench: 532,256 ns/iter (+/- 4,633) = 187 MB/s +test misc::long_needle2 ... bench: 532,131 ns/iter (+/- 3,771) = 187 MB/s +test misc::match_class ... bench: 120 ns/iter (+/- 0) = 675 MB/s +test misc::match_class_in_range ... bench: 119 ns/iter (+/- 0) = 680 MB/s +test misc::match_class_unicode ... bench: 456 ns/iter (+/- 2) = 353 MB/s +test misc::medium_1K ... bench: 260 ns/iter (+/- 1) = 4046 MB/s +test misc::medium_1MB ... bench: 206,175 ns/iter (+/- 983) = 5085 MB/s +test misc::medium_32 ... bench: 58 ns/iter (+/- 0) = 1034 MB/s +test misc::medium_32K ... bench: 6,443 ns/iter (+/- 26) = 5090 MB/s +test misc::not_literal ... bench: 216 ns/iter (+/- 0) = 236 MB/s +test misc::one_pass_long_prefix ... bench: 31 ns/iter (+/- 0) = 838 MB/s +test misc::one_pass_long_prefix_not ... bench: 31 ns/iter (+/- 0) = 838 MB/s +test misc::one_pass_short ... bench: 59 ns/iter (+/- 0) = 288 MB/s +test misc::one_pass_short_not ... bench: 63 ns/iter (+/- 2) = 269 MB/s +test misc::reallyhard2_1K ... bench: 96,070 ns/iter (+/- 238) = 10 MB/s +test misc::reallyhard_1K ... bench: 60,783 ns/iter (+/- 170) = 17 MB/s +test misc::reallyhard_1MB ... bench: 60,899,076 ns/iter (+/- 483,661) = 17 MB/s +test misc::reallyhard_32 ... bench: 1,822 ns/iter (+/- 58) = 32 MB/s +test misc::reallyhard_32K ... bench: 1,809,770 ns/iter (+/- 45,348) = 18 MB/s +test sherlock::before_holmes ... bench: 14,513,309 ns/iter (+/- 146,332) = 40 MB/s +test sherlock::holmes_cochar_watson ... bench: 543,738 ns/iter (+/- 4,549) = 1094 MB/s +test sherlock::ing_suffix ... bench: 5,561,653 ns/iter (+/- 44,720) = 106 MB/s +test sherlock::ing_suffix_limited_space ... bench: 6,116,112 ns/iter (+/- 91,799) = 97 MB/s +test sherlock::letters ... bench: 15,633,185 ns/iter (+/- 313,036) = 38 MB/s +test sherlock::letters_lower ... bench: 15,228,423 ns/iter (+/- 290,879) = 39 MB/s +test sherlock::letters_upper ... bench: 3,279,472 ns/iter (+/- 48,073) = 181 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 192,935 ns/iter (+/- 1,405) = 3083 MB/s +test sherlock::name_alt1 ... bench: 452,708 ns/iter (+/- 4,728) = 1314 MB/s +test sherlock::name_alt2 ... bench: 477,092 ns/iter (+/- 6,192) = 1246 MB/s +test sherlock::name_alt3 ... bench: 959,514 ns/iter (+/- 25,214) = 620 MB/s +test sherlock::name_alt3_nocase ... bench: 3,478,546 ns/iter (+/- 52,300) = 171 MB/s +test sherlock::name_alt4 ... bench: 947,187 ns/iter (+/- 9,985) = 628 MB/s +test sherlock::name_alt4_nocase ... bench: 1,852,289 ns/iter (+/- 30,616) = 321 MB/s +test sherlock::name_alt5 ... bench: 655,616 ns/iter (+/- 9,327) = 907 MB/s +test sherlock::name_alt5_nocase ... bench: 1,957,627 ns/iter (+/- 47,271) = 303 MB/s +test sherlock::name_holmes ... bench: 383,813 ns/iter (+/- 1,185) = 1550 MB/s +test sherlock::name_holmes_nocase ... bench: 478,335 ns/iter (+/- 4,851) = 1243 MB/s +test sherlock::name_sherlock ... bench: 263,611 ns/iter (+/- 875) = 2256 MB/s +test sherlock::name_sherlock_holmes ... bench: 193,687 ns/iter (+/- 1,070) = 3071 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,258,447 ns/iter (+/- 32,369) = 472 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,330,069 ns/iter (+/- 36,657) = 447 MB/s +test sherlock::name_whitespace ... bench: 264,340 ns/iter (+/- 2,723) = 2250 MB/s +test sherlock::no_match_common ... bench: 589,309 ns/iter (+/- 5,038) = 1009 MB/s +test sherlock::no_match_really_common ... bench: 683,909 ns/iter (+/- 4,987) = 869 MB/s +test sherlock::no_match_uncommon ... bench: 578,309 ns/iter (+/- 2,831) = 1028 MB/s +test sherlock::quotes ... bench: 1,184,492 ns/iter (+/- 27,247) = 502 MB/s +test sherlock::repeated_class_negation ... bench: 7,208,342 ns/iter (+/- 17,978) = 82 MB/s +test sherlock::the_lower ... bench: 1,001,754 ns/iter (+/- 6,215) = 593 MB/s +test sherlock::the_nocase ... bench: 1,043,260 ns/iter (+/- 10,217) = 570 MB/s +test sherlock::the_upper ... bench: 753,058 ns/iter (+/- 1,640) = 790 MB/s +test sherlock::the_whitespace ... bench: 1,195,227 ns/iter (+/- 9,524) = 497 MB/s +test sherlock::word_ending_n ... bench: 11,767,448 ns/iter (+/- 15,460) = 50 MB/s +test sherlock::words ... bench: 7,551,361 ns/iter (+/- 25,566) = 78 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 70 measured + diff --git a/vendor/regex/record/old-bench-log/04/pcre2-jit b/vendor/regex/record/old-bench-log/04/pcre2-jit new file mode 100644 index 0000000..9a110b5 --- /dev/null +++ b/vendor/regex/record/old-bench-log/04/pcre2-jit @@ -0,0 +1,77 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 70 tests +test misc::anchored_literal_long_match ... bench: 22 ns/iter (+/- 0) = 17727 MB/s +test misc::anchored_literal_long_non_match ... bench: 14 ns/iter (+/- 0) = 27857 MB/s +test misc::anchored_literal_short_match ... bench: 21 ns/iter (+/- 0) = 1238 MB/s +test misc::anchored_literal_short_non_match ... bench: 14 ns/iter (+/- 1) = 1857 MB/s +test misc::easy0_1K ... bench: 235 ns/iter (+/- 2) = 4472 MB/s +test misc::easy0_1MB ... bench: 193,652 ns/iter (+/- 524) = 5414 MB/s +test misc::easy0_32 ... bench: 43 ns/iter (+/- 0) = 1372 MB/s +test misc::easy0_32K ... bench: 6,024 ns/iter (+/- 12) = 5444 MB/s +test misc::easy1_1K ... bench: 235 ns/iter (+/- 4) = 4442 MB/s +test misc::easy1_1MB ... bench: 193,685 ns/iter (+/- 617) = 5413 MB/s +test misc::easy1_32 ... bench: 45 ns/iter (+/- 0) = 1155 MB/s +test misc::easy1_32K ... bench: 6,018 ns/iter (+/- 9) = 5448 MB/s +test misc::hard_1K ... bench: 1,880 ns/iter (+/- 7) = 559 MB/s +test misc::hard_1MB ... bench: 1,283,101 ns/iter (+/- 4,420) = 817 MB/s +test misc::hard_32 ... bench: 119 ns/iter (+/- 2) = 495 MB/s +test misc::hard_32K ... bench: 39,919 ns/iter (+/- 95) = 821 MB/s +test misc::literal ... bench: 18 ns/iter (+/- 1) = 2833 MB/s +test misc::long_needle1 ... bench: 513,050 ns/iter (+/- 2,267) = 194 MB/s +test misc::long_needle2 ... bench: 518,009 ns/iter (+/- 3,066) = 193 MB/s +test misc::match_class ... bench: 106 ns/iter (+/- 1) = 764 MB/s +test misc::match_class_in_range ... bench: 24 ns/iter (+/- 1) = 3375 MB/s +test misc::match_class_unicode ... bench: 370 ns/iter (+/- 2) = 435 MB/s +test misc::medium_1K ... bench: 237 ns/iter (+/- 0) = 4438 MB/s +test misc::medium_1MB ... bench: 193,478 ns/iter (+/- 540) = 5419 MB/s +test misc::medium_32 ... bench: 46 ns/iter (+/- 0) = 1304 MB/s +test misc::medium_32K ... bench: 6,024 ns/iter (+/- 15) = 5444 MB/s +test misc::not_literal ... bench: 274 ns/iter (+/- 1) = 186 MB/s +test misc::one_pass_long_prefix ... bench: 19 ns/iter (+/- 1) = 1368 MB/s +test misc::one_pass_long_prefix_not ... bench: 19 ns/iter (+/- 0) = 1368 MB/s +test misc::one_pass_short ... bench: 47 ns/iter (+/- 0) = 361 MB/s +test misc::one_pass_short_not ... bench: 50 ns/iter (+/- 2) = 340 MB/s +test misc::reallyhard2_1K ... bench: 4,959 ns/iter (+/- 34) = 209 MB/s +test misc::reallyhard_1K ... bench: 2,145 ns/iter (+/- 17) = 489 MB/s +test misc::reallyhard_1MB ... bench: 1,292,683 ns/iter (+/- 3,342) = 811 MB/s +test misc::reallyhard_32 ... bench: 124 ns/iter (+/- 4) = 475 MB/s +test misc::reallyhard_32K ... bench: 47,263 ns/iter (+/- 173) = 693 MB/s +test sherlock::before_holmes ... bench: 4,706,445 ns/iter (+/- 23,483) = 126 MB/s +test sherlock::holmes_cochar_watson ... bench: 488,613 ns/iter (+/- 2,921) = 1217 MB/s +test sherlock::ing_suffix ... bench: 1,886,092 ns/iter (+/- 9,951) = 315 MB/s +test sherlock::ing_suffix_limited_space ... bench: 5,091,401 ns/iter (+/- 21,315) = 116 MB/s +test sherlock::letters ... bench: 10,082,811 ns/iter (+/- 41,989) = 59 MB/s +test sherlock::letters_lower ... bench: 9,640,481 ns/iter (+/- 46,499) = 61 MB/s +test sherlock::letters_upper ... bench: 1,772,105 ns/iter (+/- 8,833) = 335 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 192,075 ns/iter (+/- 1,043) = 3097 MB/s +test sherlock::name_alt1 ... bench: 447,382 ns/iter (+/- 2,142) = 1329 MB/s +test sherlock::name_alt2 ... bench: 447,421 ns/iter (+/- 2,077) = 1329 MB/s +test sherlock::name_alt3 ... bench: 963,775 ns/iter (+/- 1,684) = 617 MB/s +test sherlock::name_alt3_nocase ... bench: 3,152,920 ns/iter (+/- 5,757) = 188 MB/s +test sherlock::name_alt4 ... bench: 80,204 ns/iter (+/- 379) = 7417 MB/s +test sherlock::name_alt4_nocase ... bench: 1,665,405 ns/iter (+/- 7,134) = 357 MB/s +test sherlock::name_alt5 ... bench: 649,701 ns/iter (+/- 1,722) = 915 MB/s +test sherlock::name_alt5_nocase ... bench: 1,773,323 ns/iter (+/- 9,648) = 335 MB/s +test sherlock::name_holmes ... bench: 377,003 ns/iter (+/- 3,390) = 1578 MB/s +test sherlock::name_holmes_nocase ... bench: 472,947 ns/iter (+/- 1,011) = 1257 MB/s +test sherlock::name_sherlock ... bench: 262,237 ns/iter (+/- 1,268) = 2268 MB/s +test sherlock::name_sherlock_holmes ... bench: 192,306 ns/iter (+/- 520) = 3093 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,318,573 ns/iter (+/- 1,462) = 451 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,110,629 ns/iter (+/- 13,163) = 535 MB/s +test sherlock::name_whitespace ... bench: 262,889 ns/iter (+/- 637) = 2263 MB/s +test sherlock::no_match_common ... bench: 388,869 ns/iter (+/- 1,512) = 1529 MB/s +test sherlock::no_match_really_common ... bench: 422,058 ns/iter (+/- 1,788) = 1409 MB/s +test sherlock::no_match_uncommon ... bench: 30,594 ns/iter (+/- 166) = 19446 MB/s +test sherlock::quotes ... bench: 569,628 ns/iter (+/- 2,052) = 1044 MB/s +test sherlock::repeated_class_negation ... bench: 6,410,128 ns/iter (+/- 19,866) = 92 MB/s +test sherlock::the_lower ... bench: 648,366 ns/iter (+/- 5,142) = 917 MB/s +test sherlock::the_nocase ... bench: 694,035 ns/iter (+/- 4,844) = 857 MB/s +test sherlock::the_upper ... bench: 54,007 ns/iter (+/- 486) = 11015 MB/s +test sherlock::the_whitespace ... bench: 850,430 ns/iter (+/- 9,641) = 699 MB/s +test sherlock::word_ending_n ... bench: 5,768,961 ns/iter (+/- 20,924) = 103 MB/s +test sherlock::words ... bench: 5,866,550 ns/iter (+/- 34,451) = 101 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 70 measured + diff --git a/vendor/regex/record/old-bench-log/04/re2 b/vendor/regex/record/old-bench-log/04/re2 new file mode 100644 index 0000000..31a6e6d --- /dev/null +++ b/vendor/regex/record/old-bench-log/04/re2 @@ -0,0 +1,79 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 72 tests +test misc::anchored_literal_long_match ... bench: 119 ns/iter (+/- 2) = 3277 MB/s +test misc::anchored_literal_long_non_match ... bench: 45 ns/iter (+/- 0) = 8666 MB/s +test misc::anchored_literal_short_match ... bench: 120 ns/iter (+/- 1) = 216 MB/s +test misc::anchored_literal_short_non_match ... bench: 45 ns/iter (+/- 0) = 577 MB/s +test misc::easy0_1K ... bench: 187 ns/iter (+/- 0) = 5620 MB/s +test misc::easy0_1MB ... bench: 39,573 ns/iter (+/- 600) = 26497 MB/s +test misc::easy0_32 ... bench: 165 ns/iter (+/- 1) = 357 MB/s +test misc::easy0_32K ... bench: 971 ns/iter (+/- 20) = 33774 MB/s +test misc::easy1_1K ... bench: 175 ns/iter (+/- 1) = 5965 MB/s +test misc::easy1_1MB ... bench: 39,451 ns/iter (+/- 183) = 26579 MB/s +test misc::easy1_32 ... bench: 153 ns/iter (+/- 1) = 339 MB/s +test misc::easy1_32K ... bench: 942 ns/iter (+/- 24) = 34806 MB/s +test misc::hard_1K ... bench: 2,362 ns/iter (+/- 11) = 444 MB/s +test misc::hard_1MB ... bench: 2,386,627 ns/iter (+/- 12,925) = 439 MB/s +test misc::hard_32 ... bench: 228 ns/iter (+/- 1) = 258 MB/s +test misc::hard_32K ... bench: 74,482 ns/iter (+/- 190) = 440 MB/s +test misc::literal ... bench: 120 ns/iter (+/- 0) = 425 MB/s +test misc::long_needle1 ... bench: 184,777 ns/iter (+/- 1,644) = 541 MB/s +test misc::long_needle2 ... bench: 184,685 ns/iter (+/- 289) = 541 MB/s +test misc::match_class ... bench: 267 ns/iter (+/- 1) = 303 MB/s +test misc::match_class_in_range ... bench: 267 ns/iter (+/- 1) = 303 MB/s +test misc::match_class_unicode ... bench: 491 ns/iter (+/- 3) = 327 MB/s +test misc::medium_1K ... bench: 2,065 ns/iter (+/- 4) = 509 MB/s +test misc::medium_1MB ... bench: 1,938,951 ns/iter (+/- 11,278) = 540 MB/s +test misc::medium_32 ... bench: 302 ns/iter (+/- 149) = 198 MB/s +test misc::medium_32K ... bench: 60,766 ns/iter (+/- 1,018) = 539 MB/s +test misc::not_literal ... bench: 203 ns/iter (+/- 2) = 251 MB/s +test misc::one_pass_long_prefix ... bench: 119 ns/iter (+/- 1) = 218 MB/s +test misc::one_pass_long_prefix_not ... bench: 161 ns/iter (+/- 0) = 161 MB/s +test misc::one_pass_short ... bench: 143 ns/iter (+/- 0) = 118 MB/s +test misc::one_pass_short_not ... bench: 145 ns/iter (+/- 1) = 117 MB/s +test misc::reallyhard2_1K ... bench: 2,030 ns/iter (+/- 22) = 512 MB/s +test misc::reallyhard_1K ... bench: 2,362 ns/iter (+/- 18) = 444 MB/s +test misc::reallyhard_1MB ... bench: 2,386,760 ns/iter (+/- 22,075) = 439 MB/s +test misc::reallyhard_32 ... bench: 230 ns/iter (+/- 2) = 256 MB/s +test misc::reallyhard_32K ... bench: 74,506 ns/iter (+/- 740) = 440 MB/s +test sherlock::before_holmes ... bench: 1,446,270 ns/iter (+/- 5,771) = 411 MB/s +test sherlock::everything_greedy ... bench: 9,111,570 ns/iter (+/- 54,091) = 65 MB/s +test sherlock::everything_greedy_nl ... bench: 2,489,649 ns/iter (+/- 23,310) = 238 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,176,642 ns/iter (+/- 2,181) = 505 MB/s +test sherlock::holmes_coword_watson ... bench: 1,389,000 ns/iter (+/- 258,245) = 428 MB/s +test sherlock::ing_suffix ... bench: 3,050,918 ns/iter (+/- 16,854) = 195 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,954,264 ns/iter (+/- 4,656) = 304 MB/s +test sherlock::letters ... bench: 111,162,180 ns/iter (+/- 108,719) = 5 MB/s +test sherlock::letters_lower ... bench: 106,751,460 ns/iter (+/- 414,985) = 5 MB/s +test sherlock::letters_upper ... bench: 4,705,474 ns/iter (+/- 10,913) = 126 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,539,425 ns/iter (+/- 5,440) = 234 MB/s +test sherlock::name_alt1 ... bench: 77,719 ns/iter (+/- 275) = 7654 MB/s +test sherlock::name_alt2 ... bench: 1,319,600 ns/iter (+/- 2,771) = 450 MB/s +test sherlock::name_alt3 ... bench: 1,433,629 ns/iter (+/- 2,943) = 414 MB/s +test sherlock::name_alt3_nocase ... bench: 2,748,137 ns/iter (+/- 4,343) = 216 MB/s +test sherlock::name_alt4 ... bench: 1,354,024 ns/iter (+/- 2,312) = 439 MB/s +test sherlock::name_alt4_nocase ... bench: 2,018,381 ns/iter (+/- 2,442) = 294 MB/s +test sherlock::name_alt5 ... bench: 1,348,150 ns/iter (+/- 3,870) = 441 MB/s +test sherlock::name_alt5_nocase ... bench: 2,114,276 ns/iter (+/- 3,365) = 281 MB/s +test sherlock::name_holmes ... bench: 168,436 ns/iter (+/- 1,503) = 3532 MB/s +test sherlock::name_holmes_nocase ... bench: 1,645,658 ns/iter (+/- 3,816) = 361 MB/s +test sherlock::name_sherlock ... bench: 59,010 ns/iter (+/- 380) = 10081 MB/s +test sherlock::name_sherlock_holmes ... bench: 60,467 ns/iter (+/- 179) = 9838 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,539,137 ns/iter (+/- 5,506) = 386 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,535,058 ns/iter (+/- 3,352) = 387 MB/s +test sherlock::name_whitespace ... bench: 62,700 ns/iter (+/- 440) = 9488 MB/s +test sherlock::no_match_common ... bench: 439,560 ns/iter (+/- 1,545) = 1353 MB/s +test sherlock::no_match_really_common ... bench: 439,333 ns/iter (+/- 1,020) = 1354 MB/s +test sherlock::no_match_uncommon ... bench: 23,882 ns/iter (+/- 134) = 24911 MB/s +test sherlock::quotes ... bench: 1,396,564 ns/iter (+/- 2,785) = 425 MB/s +test sherlock::the_lower ... bench: 2,478,251 ns/iter (+/- 5,859) = 240 MB/s +test sherlock::the_nocase ... bench: 3,708,713 ns/iter (+/- 6,919) = 160 MB/s +test sherlock::the_upper ... bench: 232,490 ns/iter (+/- 4,478) = 2558 MB/s +test sherlock::the_whitespace ... bench: 2,286,399 ns/iter (+/- 5,006) = 260 MB/s +test sherlock::word_ending_n ... bench: 3,295,919 ns/iter (+/- 27,810) = 180 MB/s +test sherlock::words ... bench: 30,375,810 ns/iter (+/- 37,415) = 19 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 72 measured + diff --git a/vendor/regex/record/old-bench-log/04/rust b/vendor/regex/record/old-bench-log/04/rust new file mode 100644 index 0000000..01e6f44 --- /dev/null +++ b/vendor/regex/record/old-bench-log/04/rust @@ -0,0 +1,81 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 74 tests +test misc::anchored_literal_long_match ... bench: 24 ns/iter (+/- 1) = 16250 MB/s +test misc::anchored_literal_long_non_match ... bench: 21 ns/iter (+/- 0) = 18571 MB/s +test misc::anchored_literal_short_match ... bench: 22 ns/iter (+/- 1) = 1181 MB/s +test misc::anchored_literal_short_non_match ... bench: 21 ns/iter (+/- 0) = 1238 MB/s +test misc::easy0_1K ... bench: 18 ns/iter (+/- 6) = 58388 MB/s +test misc::easy0_1MB ... bench: 21 ns/iter (+/- 4) = 49933476 MB/s +test misc::easy0_32 ... bench: 17 ns/iter (+/- 0) = 3470 MB/s +test misc::easy0_32K ... bench: 18 ns/iter (+/- 9) = 1821944 MB/s +test misc::easy1_1K ... bench: 52 ns/iter (+/- 0) = 20076 MB/s +test misc::easy1_1MB ... bench: 55 ns/iter (+/- 0) = 19065381 MB/s +test misc::easy1_32 ... bench: 50 ns/iter (+/- 0) = 1040 MB/s +test misc::easy1_32K ... bench: 50 ns/iter (+/- 0) = 655760 MB/s +test misc::hard_1K ... bench: 66 ns/iter (+/- 0) = 15924 MB/s +test misc::hard_1MB ... bench: 70 ns/iter (+/- 1) = 14980042 MB/s +test misc::hard_32 ... bench: 62 ns/iter (+/- 1) = 951 MB/s +test misc::hard_32K ... bench: 62 ns/iter (+/- 1) = 528951 MB/s +test misc::literal ... bench: 17 ns/iter (+/- 0) = 3000 MB/s +test misc::long_needle1 ... bench: 2,359 ns/iter (+/- 37) = 42391 MB/s +test misc::long_needle2 ... bench: 634,783 ns/iter (+/- 4,313) = 157 MB/s +test misc::match_class ... bench: 82 ns/iter (+/- 1) = 987 MB/s +test misc::match_class_in_range ... bench: 30 ns/iter (+/- 15) = 2700 MB/s +test misc::match_class_unicode ... bench: 317 ns/iter (+/- 2) = 507 MB/s +test misc::medium_1K ... bench: 18 ns/iter (+/- 0) = 58444 MB/s +test misc::medium_1MB ... bench: 22 ns/iter (+/- 0) = 47663818 MB/s +test misc::medium_32 ... bench: 18 ns/iter (+/- 0) = 3333 MB/s +test misc::medium_32K ... bench: 18 ns/iter (+/- 0) = 1822000 MB/s +test misc::not_literal ... bench: 115 ns/iter (+/- 0) = 443 MB/s +test misc::one_pass_long_prefix ... bench: 69 ns/iter (+/- 1) = 376 MB/s +test misc::one_pass_long_prefix_not ... bench: 68 ns/iter (+/- 0) = 382 MB/s +test misc::one_pass_short ... bench: 50 ns/iter (+/- 0) = 340 MB/s +test misc::one_pass_short_not ... bench: 52 ns/iter (+/- 0) = 326 MB/s +test misc::reallyhard2_1K ... bench: 1,939 ns/iter (+/- 12) = 536 MB/s +test misc::reallyhard_1K ... bench: 1,964 ns/iter (+/- 7) = 535 MB/s +test misc::reallyhard_1MB ... bench: 1,934,967 ns/iter (+/- 4,626) = 541 MB/s +test misc::reallyhard_32 ... bench: 130 ns/iter (+/- 0) = 453 MB/s +test misc::reallyhard_32K ... bench: 60,581 ns/iter (+/- 176) = 541 MB/s +test misc::replace_all ... bench: 142 ns/iter (+/- 1) +test sherlock::before_holmes ... bench: 1,127,747 ns/iter (+/- 2,052) = 527 MB/s +test sherlock::everything_greedy ... bench: 2,598,664 ns/iter (+/- 6,137) = 228 MB/s +test sherlock::everything_greedy_nl ... bench: 1,202,183 ns/iter (+/- 1,965) = 494 MB/s +test sherlock::holmes_cochar_watson ... bench: 220,378 ns/iter (+/- 1,229) = 2699 MB/s +test sherlock::holmes_coword_watson ... bench: 631,731 ns/iter (+/- 2,071) = 941 MB/s +test sherlock::ing_suffix ... bench: 1,344,980 ns/iter (+/- 1,799) = 442 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,286,425 ns/iter (+/- 2,965) = 462 MB/s +test sherlock::letters ... bench: 24,356,951 ns/iter (+/- 47,224) = 24 MB/s +test sherlock::letters_lower ... bench: 23,816,732 ns/iter (+/- 44,203) = 24 MB/s +test sherlock::letters_upper ... bench: 2,051,873 ns/iter (+/- 8,712) = 289 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 1,102,534 ns/iter (+/- 6,071) = 539 MB/s +test sherlock::name_alt1 ... bench: 36,474 ns/iter (+/- 308) = 16311 MB/s +test sherlock::name_alt2 ... bench: 185,668 ns/iter (+/- 1,023) = 3204 MB/s +test sherlock::name_alt3 ... bench: 1,152,554 ns/iter (+/- 1,991) = 516 MB/s +test sherlock::name_alt3_nocase ... bench: 1,254,885 ns/iter (+/- 5,387) = 474 MB/s +test sherlock::name_alt4 ... bench: 228,721 ns/iter (+/- 854) = 2601 MB/s +test sherlock::name_alt4_nocase ... bench: 1,223,457 ns/iter (+/- 2,307) = 486 MB/s +test sherlock::name_alt5 ... bench: 317,372 ns/iter (+/- 951) = 1874 MB/s +test sherlock::name_alt5_nocase ... bench: 1,224,434 ns/iter (+/- 3,886) = 485 MB/s +test sherlock::name_holmes ... bench: 42,905 ns/iter (+/- 217) = 13866 MB/s +test sherlock::name_holmes_nocase ... bench: 1,080,290 ns/iter (+/- 5,686) = 550 MB/s +test sherlock::name_sherlock ... bench: 70,041 ns/iter (+/- 444) = 8494 MB/s +test sherlock::name_sherlock_holmes ... bench: 36,092 ns/iter (+/- 189) = 16483 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,156,696 ns/iter (+/- 3,922) = 514 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,156,088 ns/iter (+/- 2,453) = 514 MB/s +test sherlock::name_whitespace ... bench: 79,560 ns/iter (+/- 426) = 7477 MB/s +test sherlock::no_match_common ... bench: 25,940 ns/iter (+/- 119) = 22934 MB/s +test sherlock::no_match_really_common ... bench: 364,911 ns/iter (+/- 1,302) = 1630 MB/s +test sherlock::no_match_uncommon ... bench: 25,851 ns/iter (+/- 112) = 23013 MB/s +test sherlock::quotes ... bench: 561,575 ns/iter (+/- 2,083) = 1059 MB/s +test sherlock::repeated_class_negation ... bench: 88,961,089 ns/iter (+/- 132,661) = 6 MB/s +test sherlock::the_lower ... bench: 609,891 ns/iter (+/- 1,451) = 975 MB/s +test sherlock::the_nocase ... bench: 1,622,541 ns/iter (+/- 6,851) = 366 MB/s +test sherlock::the_upper ... bench: 48,810 ns/iter (+/- 245) = 12188 MB/s +test sherlock::the_whitespace ... bench: 1,192,755 ns/iter (+/- 4,168) = 498 MB/s +test sherlock::word_ending_n ... bench: 1,991,440 ns/iter (+/- 7,313) = 298 MB/s +test sherlock::words ... bench: 9,688,357 ns/iter (+/- 17,267) = 61 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 74 measured + diff --git a/vendor/regex/record/old-bench-log/04/tcl b/vendor/regex/record/old-bench-log/04/tcl new file mode 100644 index 0000000..934bf6e --- /dev/null +++ b/vendor/regex/record/old-bench-log/04/tcl @@ -0,0 +1,72 @@ + Compiling regex-benchmark v0.1.0 (file:///home/andrew/data/projects/rust/regex/bench) + Running target/release/bench-0d58c0af2e68ae0d + +running 65 tests +test misc::anchored_literal_long_match ... bench: 925 ns/iter (+/- 16) = 421 MB/s +test misc::anchored_literal_long_non_match ... bench: 144 ns/iter (+/- 2) = 2708 MB/s +test misc::anchored_literal_short_match ... bench: 920 ns/iter (+/- 11) = 28 MB/s +test misc::anchored_literal_short_non_match ... bench: 144 ns/iter (+/- 1) = 180 MB/s +test misc::easy0_1K ... bench: 14,228 ns/iter (+/- 204) = 73 MB/s +test misc::easy0_1MB ... bench: 3,728,677 ns/iter (+/- 4,564) = 281 MB/s +test misc::easy0_32 ... bench: 10,023 ns/iter (+/- 156) = 5 MB/s +test misc::easy0_32K ... bench: 125,851 ns/iter (+/- 287) = 260 MB/s +test misc::easy1_1K ... bench: 8,797 ns/iter (+/- 90) = 118 MB/s +test misc::easy1_1MB ... bench: 3,722,675 ns/iter (+/- 4,912) = 281 MB/s +test misc::easy1_32 ... bench: 5,189 ns/iter (+/- 77) = 10 MB/s +test misc::easy1_32K ... bench: 121,106 ns/iter (+/- 694) = 270 MB/s +test misc::hard_1K ... bench: 17,111 ns/iter (+/- 251) = 61 MB/s +test misc::hard_1MB ... bench: 3,743,313 ns/iter (+/- 7,634) = 280 MB/s +test misc::hard_32 ... bench: 13,489 ns/iter (+/- 220) = 4 MB/s +test misc::hard_32K ... bench: 129,358 ns/iter (+/- 257) = 253 MB/s +test misc::literal ... bench: 629 ns/iter (+/- 5) = 81 MB/s +test misc::long_needle1 ... bench: 21,495,182 ns/iter (+/- 41,993) = 4 MB/s +test misc::long_needle2 ... bench: 21,501,034 ns/iter (+/- 34,033) = 4 MB/s +test misc::match_class ... bench: 732 ns/iter (+/- 3) = 110 MB/s +test misc::match_class_in_range ... bench: 736 ns/iter (+/- 6) = 110 MB/s +test misc::medium_1K ... bench: 14,433 ns/iter (+/- 49) = 72 MB/s +test misc::medium_1MB ... bench: 3,729,861 ns/iter (+/- 4,198) = 281 MB/s +test misc::medium_32 ... bench: 10,756 ns/iter (+/- 75) = 5 MB/s +test misc::medium_32K ... bench: 126,593 ns/iter (+/- 169) = 259 MB/s +test misc::not_literal ... bench: 2,350 ns/iter (+/- 13) = 21 MB/s +test misc::one_pass_long_prefix ... bench: 9,183 ns/iter (+/- 198) = 2 MB/s +test misc::one_pass_long_prefix_not ... bench: 8,470 ns/iter (+/- 110) = 3 MB/s +test misc::one_pass_short ... bench: 956 ns/iter (+/- 4) = 17 MB/s +test misc::one_pass_short_not ... bench: 1,042 ns/iter (+/- 13) = 16 MB/s +test misc::reallyhard2_1K ... bench: 129,563 ns/iter (+/- 336) = 8 MB/s +test misc::reallyhard_1K ... bench: 16,656 ns/iter (+/- 152) = 63 MB/s +test misc::reallyhard_1MB ... bench: 3,744,123 ns/iter (+/- 4,556) = 280 MB/s +test misc::reallyhard_32 ... bench: 12,910 ns/iter (+/- 112) = 4 MB/s +test misc::reallyhard_32K ... bench: 129,293 ns/iter (+/- 301) = 253 MB/s +test sherlock::before_holmes ... bench: 3,593,560 ns/iter (+/- 8,574) = 165 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,906,271 ns/iter (+/- 5,153) = 204 MB/s +test sherlock::ing_suffix ... bench: 7,016,213 ns/iter (+/- 30,321) = 84 MB/s +test sherlock::ing_suffix_limited_space ... bench: 24,592,817 ns/iter (+/- 78,720) = 24 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,457,984 ns/iter (+/- 3,932) = 242 MB/s +test sherlock::name_alt1 ... bench: 2,569,156 ns/iter (+/- 5,789) = 231 MB/s +test sherlock::name_alt2 ... bench: 3,686,183 ns/iter (+/- 13,550) = 161 MB/s +test sherlock::name_alt3 ... bench: 6,715,311 ns/iter (+/- 15,208) = 88 MB/s +test sherlock::name_alt3_nocase ... bench: 9,702,060 ns/iter (+/- 32,628) = 61 MB/s +test sherlock::name_alt4 ... bench: 3,834,029 ns/iter (+/- 3,955) = 155 MB/s +test sherlock::name_alt4_nocase ... bench: 4,762,730 ns/iter (+/- 751,201) = 124 MB/s +test sherlock::name_alt5 ... bench: 4,582,303 ns/iter (+/- 8,073) = 129 MB/s +test sherlock::name_alt5_nocase ... bench: 5,583,652 ns/iter (+/- 14,573) = 106 MB/s +test sherlock::name_holmes ... bench: 2,968,764 ns/iter (+/- 6,198) = 200 MB/s +test sherlock::name_holmes_nocase ... bench: 3,066,080 ns/iter (+/- 8,986) = 194 MB/s +test sherlock::name_sherlock ... bench: 2,372,708 ns/iter (+/- 3,272) = 250 MB/s +test sherlock::name_sherlock_holmes ... bench: 2,607,914 ns/iter (+/- 3,361) = 228 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 2,641,260 ns/iter (+/- 9,409) = 225 MB/s +test sherlock::name_sherlock_nocase ... bench: 2,420,591 ns/iter (+/- 11,053) = 245 MB/s +test sherlock::name_whitespace ... bench: 2,592,553 ns/iter (+/- 3,476) = 229 MB/s +test sherlock::no_match_common ... bench: 2,114,367 ns/iter (+/- 1,665) = 281 MB/s +test sherlock::no_match_really_common ... bench: 2,114,835 ns/iter (+/- 2,491) = 281 MB/s +test sherlock::no_match_uncommon ... bench: 2,105,274 ns/iter (+/- 1,657) = 282 MB/s +test sherlock::quotes ... bench: 10,978,890 ns/iter (+/- 30,645) = 54 MB/s +test sherlock::repeated_class_negation ... bench: 69,836,043 ns/iter (+/- 117,415) = 8 MB/s +test sherlock::the_lower ... bench: 9,343,518 ns/iter (+/- 29,387) = 63 MB/s +test sherlock::the_nocase ... bench: 9,690,676 ns/iter (+/- 42,585) = 61 MB/s +test sherlock::the_upper ... bench: 2,780,398 ns/iter (+/- 6,949) = 213 MB/s +test sherlock::the_whitespace ... bench: 11,562,612 ns/iter (+/- 78,789) = 51 MB/s +test sherlock::words ... bench: 64,139,234 ns/iter (+/- 491,422) = 9 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 65 measured + diff --git a/vendor/regex/record/old-bench-log/05/onig b/vendor/regex/record/old-bench-log/05/onig new file mode 100644 index 0000000..373b149 --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/onig @@ -0,0 +1,99 @@ + +running 94 tests +test misc::anchored_literal_long_match ... bench: 158 ns/iter (+/- 4) = 2468 MB/s +test misc::anchored_literal_long_non_match ... bench: 495 ns/iter (+/- 7) = 787 MB/s +test misc::anchored_literal_short_match ... bench: 160 ns/iter (+/- 3) = 162 MB/s +test misc::anchored_literal_short_non_match ... bench: 44 ns/iter (+/- 4) = 590 MB/s +test misc::easy0_1K ... bench: 315 ns/iter (+/- 15) = 3336 MB/s +test misc::easy0_1MB ... bench: 136,864 ns/iter (+/- 5,984) = 7661 MB/s +test misc::easy0_32 ... bench: 163 ns/iter (+/- 11) = 361 MB/s +test misc::easy0_32K ... bench: 4,562 ns/iter (+/- 255) = 7188 MB/s +test misc::easy1_1K ... bench: 3,947 ns/iter (+/- 199) = 264 MB/s +test misc::easy1_1MB ... bench: 3,920,564 ns/iter (+/- 122,902) = 267 MB/s +test misc::easy1_32 ... bench: 321 ns/iter (+/- 20) = 161 MB/s +test misc::easy1_32K ... bench: 121,449 ns/iter (+/- 4,899) = 269 MB/s +test misc::hard_1K ... bench: 125,960 ns/iter (+/- 7,255) = 8 MB/s +test misc::hard_1MB ... bench: 134,129,947 ns/iter (+/- 4,797,942) = 7 MB/s +test misc::hard_32 ... bench: 4,044 ns/iter (+/- 227) = 14 MB/s +test misc::hard_32K ... bench: 4,183,228 ns/iter (+/- 127,808) = 7 MB/s +test misc::literal ... bench: 331 ns/iter (+/- 21) = 154 MB/s +test misc::long_needle1 ... bench: 5,715,563 ns/iter (+/- 250,535) = 17 MB/s +test misc::long_needle2 ... bench: 5,779,968 ns/iter (+/- 195,784) = 17 MB/s +test misc::match_class ... bench: 431 ns/iter (+/- 5) = 187 MB/s +test misc::match_class_in_range ... bench: 427 ns/iter (+/- 27) = 189 MB/s +test misc::match_class_unicode ... bench: 1,946 ns/iter (+/- 88) = 82 MB/s +test misc::medium_1K ... bench: 325 ns/iter (+/- 23) = 3236 MB/s +test misc::medium_1MB ... bench: 138,022 ns/iter (+/- 5,142) = 7597 MB/s +test misc::medium_32 ... bench: 182 ns/iter (+/- 7) = 329 MB/s +test misc::medium_32K ... bench: 4,511 ns/iter (+/- 190) = 7270 MB/s +test misc::not_literal ... bench: 436 ns/iter (+/- 25) = 116 MB/s +test misc::one_pass_long_prefix ... bench: 168 ns/iter (+/- 6) = 154 MB/s +test misc::one_pass_long_prefix_not ... bench: 176 ns/iter (+/- 7) = 147 MB/s +test misc::one_pass_short ... bench: 325 ns/iter (+/- 16) = 52 MB/s +test misc::one_pass_short_not ... bench: 322 ns/iter (+/- 21) = 52 MB/s +test misc::reallyhard2_1K ... bench: 289,956 ns/iter (+/- 16,350) = 3 MB/s +test misc::reallyhard_1K ... bench: 126,089 ns/iter (+/- 5,350) = 8 MB/s +test misc::reallyhard_1MB ... bench: 133,197,312 ns/iter (+/- 3,057,491) = 7 MB/s +test misc::reallyhard_32 ... bench: 4,060 ns/iter (+/- 11) = 14 MB/s +test misc::reallyhard_32K ... bench: 4,215,469 ns/iter (+/- 200,526) = 7 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 27,622 ns/iter (+/- 778) = 289 MB/s +test regexdna::find_new_lines ... bench: 30,503,604 ns/iter (+/- 1,120,697) = 166 MB/s +test regexdna::subst1 ... bench: 23,276,552 ns/iter (+/- 1,019,308) = 218 MB/s +test regexdna::subst10 ... bench: 23,199,415 ns/iter (+/- 790,938) = 219 MB/s +test regexdna::subst11 ... bench: 23,138,469 ns/iter (+/- 884,700) = 219 MB/s +test regexdna::subst2 ... bench: 23,076,376 ns/iter (+/- 644,391) = 220 MB/s +test regexdna::subst3 ... bench: 23,115,770 ns/iter (+/- 737,666) = 219 MB/s +test regexdna::subst4 ... bench: 23,093,288 ns/iter (+/- 1,003,519) = 220 MB/s +test regexdna::subst5 ... bench: 23,618,534 ns/iter (+/- 773,260) = 215 MB/s +test regexdna::subst6 ... bench: 23,301,581 ns/iter (+/- 679,681) = 218 MB/s +test regexdna::subst7 ... bench: 23,371,339 ns/iter (+/- 714,433) = 217 MB/s +test regexdna::subst8 ... bench: 23,187,513 ns/iter (+/- 863,031) = 219 MB/s +test regexdna::subst9 ... bench: 23,143,027 ns/iter (+/- 890,422) = 219 MB/s +test regexdna::variant1 ... bench: 104,906,982 ns/iter (+/- 3,391,942) = 48 MB/s +test regexdna::variant2 ... bench: 118,326,728 ns/iter (+/- 3,378,748) = 42 MB/s +test regexdna::variant3 ... bench: 109,348,596 ns/iter (+/- 3,647,056) = 46 MB/s +test regexdna::variant4 ... bench: 104,574,675 ns/iter (+/- 3,236,753) = 48 MB/s +test regexdna::variant5 ... bench: 102,968,132 ns/iter (+/- 2,792,754) = 49 MB/s +test regexdna::variant6 ... bench: 103,783,112 ns/iter (+/- 2,851,581) = 48 MB/s +test regexdna::variant7 ... bench: 103,939,805 ns/iter (+/- 3,118,277) = 48 MB/s +test regexdna::variant8 ... bench: 109,722,594 ns/iter (+/- 3,739,958) = 46 MB/s +test regexdna::variant9 ... bench: 128,702,724 ns/iter (+/- 3,739,103) = 39 MB/s +test sherlock::before_after_holmes ... bench: 39,219,739 ns/iter (+/- 1,622,425) = 15 MB/s +test sherlock::before_holmes ... bench: 37,454,934 ns/iter (+/- 1,055,140) = 15 MB/s +test sherlock::everything_greedy ... bench: 7,341,629 ns/iter (+/- 241,072) = 81 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,298,534 ns/iter (+/- 94,224) = 258 MB/s +test sherlock::ing_suffix ... bench: 18,533,670 ns/iter (+/- 505,855) = 32 MB/s +test sherlock::ing_suffix_limited_space ... bench: 2,419,034 ns/iter (+/- 124,616) = 245 MB/s +test sherlock::letters ... bench: 61,910,045 ns/iter (+/- 2,122,755) = 9 MB/s +test sherlock::letters_lower ... bench: 60,831,022 ns/iter (+/- 2,559,720) = 9 MB/s +test sherlock::letters_upper ... bench: 10,747,265 ns/iter (+/- 761,147) = 55 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 215,376 ns/iter (+/- 13,622) = 2762 MB/s +test sherlock::name_alt1 ... bench: 2,282,320 ns/iter (+/- 154,104) = 260 MB/s +test sherlock::name_alt2 ... bench: 2,206,087 ns/iter (+/- 158,376) = 269 MB/s +test sherlock::name_alt3 ... bench: 2,771,932 ns/iter (+/- 181,216) = 214 MB/s +test sherlock::name_alt3_nocase ... bench: 19,198,056 ns/iter (+/- 816,668) = 30 MB/s +test sherlock::name_alt4 ... bench: 2,254,798 ns/iter (+/- 135,379) = 263 MB/s +test sherlock::name_alt4_nocase ... bench: 5,734,254 ns/iter (+/- 411,596) = 103 MB/s +test sherlock::name_alt5 ... bench: 2,276,779 ns/iter (+/- 172,557) = 261 MB/s +test sherlock::name_alt5_nocase ... bench: 7,314,318 ns/iter (+/- 377,963) = 81 MB/s +test sherlock::name_holmes ... bench: 477,888 ns/iter (+/- 37,472) = 1244 MB/s +test sherlock::name_holmes_nocase ... bench: 3,487,005 ns/iter (+/- 278,896) = 170 MB/s +test sherlock::name_sherlock ... bench: 295,313 ns/iter (+/- 16,739) = 2014 MB/s +test sherlock::name_sherlock_holmes ... bench: 216,522 ns/iter (+/- 15,594) = 2747 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 3,480,703 ns/iter (+/- 272,332) = 170 MB/s +test sherlock::name_sherlock_nocase ... bench: 3,511,444 ns/iter (+/- 283,013) = 169 MB/s +test sherlock::name_whitespace ... bench: 304,043 ns/iter (+/- 19,186) = 1956 MB/s +test sherlock::no_match_common ... bench: 632,615 ns/iter (+/- 44,676) = 940 MB/s +test sherlock::no_match_really_common ... bench: 727,565 ns/iter (+/- 54,169) = 817 MB/s +test sherlock::no_match_uncommon ... bench: 624,061 ns/iter (+/- 37,791) = 953 MB/s +test sherlock::quotes ... bench: 3,776,688 ns/iter (+/- 186,393) = 157 MB/s +test sherlock::repeated_class_negation ... bench: 34,354,179 ns/iter (+/- 1,534,267) = 17 MB/s +test sherlock::the_lower ... bench: 1,965,787 ns/iter (+/- 137,099) = 302 MB/s +test sherlock::the_nocase ... bench: 4,853,843 ns/iter (+/- 259,890) = 122 MB/s +test sherlock::the_upper ... bench: 949,071 ns/iter (+/- 66,016) = 626 MB/s +test sherlock::the_whitespace ... bench: 2,173,683 ns/iter (+/- 142,384) = 273 MB/s +test sherlock::word_ending_n ... bench: 19,711,057 ns/iter (+/- 942,152) = 30 MB/s +test sherlock::words ... bench: 21,979,387 ns/iter (+/- 1,250,588) = 27 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 94 measured + diff --git a/vendor/regex/record/old-bench-log/05/onig-vs-rust b/vendor/regex/record/old-bench-log/05/onig-vs-rust new file mode 100644 index 0000000..4625e8f --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/onig-vs-rust @@ -0,0 +1,95 @@ + name onig ns/iter rust ns/iter diff ns/iter diff % + misc::anchored_literal_long_match 158 (2468 MB/s) 24 (16250 MB/s) -134 -84.81% + misc::anchored_literal_long_non_match 495 (787 MB/s) 27 (14444 MB/s) -468 -94.55% + misc::anchored_literal_short_match 160 (162 MB/s) 22 (1181 MB/s) -138 -86.25% + misc::anchored_literal_short_non_match 44 (590 MB/s) 24 (1083 MB/s) -20 -45.45% + misc::easy0_1K 315 (3336 MB/s) 16 (65687 MB/s) -299 -94.92% + misc::easy0_1MB 136,864 (7661 MB/s) 20 (52430150 MB/s) -136,844 -99.99% + misc::easy0_32 163 (361 MB/s) 16 (3687 MB/s) -147 -90.18% + misc::easy0_32K 4,562 (7188 MB/s) 16 (2049687 MB/s) -4,546 -99.65% + misc::easy1_1K 3,947 (264 MB/s) 48 (21750 MB/s) -3,899 -98.78% + misc::easy1_1MB 3,920,564 (267 MB/s) 48 (21845750 MB/s) -3,920,516 -100.00% + misc::easy1_32 321 (161 MB/s) 46 (1130 MB/s) -275 -85.67% + misc::easy1_32K 121,449 (269 MB/s) 47 (697617 MB/s) -121,402 -99.96% + misc::hard_1K 125,960 (8 MB/s) 58 (18120 MB/s) -125,902 -99.95% + misc::hard_1MB 134,129,947 (7 MB/s) 61 (17190213 MB/s) -134,129,886 -100.00% + misc::hard_32 4,044 (14 MB/s) 58 (1017 MB/s) -3,986 -98.57% + misc::hard_32K 4,183,228 (7 MB/s) 56 (585625 MB/s) -4,183,172 -100.00% + misc::literal 331 (154 MB/s) 16 (3187 MB/s) -315 -95.17% + misc::long_needle1 5,715,563 (17 MB/s) 2,226 (44924 MB/s) -5,713,337 -99.96% + misc::long_needle2 5,779,968 (17 MB/s) 576,997 (173 MB/s) -5,202,971 -90.02% + misc::match_class 431 (187 MB/s) 65 (1246 MB/s) -366 -84.92% + misc::match_class_in_range 427 (189 MB/s) 27 (3000 MB/s) -400 -93.68% + misc::match_class_unicode 1,946 (82 MB/s) 283 (568 MB/s) -1,663 -85.46% + misc::medium_1K 325 (3236 MB/s) 16 (65750 MB/s) -309 -95.08% + misc::medium_1MB 138,022 (7597 MB/s) 21 (49933523 MB/s) -138,001 -99.98% + misc::medium_32 182 (329 MB/s) 17 (3529 MB/s) -165 -90.66% + misc::medium_32K 4,511 (7270 MB/s) 17 (1929176 MB/s) -4,494 -99.62% + misc::not_literal 436 (116 MB/s) 105 (485 MB/s) -331 -75.92% + misc::one_pass_long_prefix 168 (154 MB/s) 68 (382 MB/s) -100 -59.52% + misc::one_pass_long_prefix_not 176 (147 MB/s) 58 (448 MB/s) -118 -67.05% + misc::one_pass_short 325 (52 MB/s) 45 (377 MB/s) -280 -86.15% + misc::one_pass_short_not 322 (52 MB/s) 50 (340 MB/s) -272 -84.47% + misc::reallyhard2_1K 289,956 (3 MB/s) 83 (12530 MB/s) -289,873 -99.97% + misc::reallyhard_1K 126,089 (8 MB/s) 1,822 (576 MB/s) -124,267 -98.55% + misc::reallyhard_1MB 133,197,312 (7 MB/s) 1,768,327 (592 MB/s) -131,428,985 -98.67% + misc::reallyhard_32 4,060 (14 MB/s) 121 (487 MB/s) -3,939 -97.02% + misc::reallyhard_32K 4,215,469 (7 MB/s) 56,375 (581 MB/s) -4,159,094 -98.66% + misc::reverse_suffix_no_quadratic 27,622 (289 MB/s) 5,803 (1378 MB/s) -21,819 -78.99% + regexdna::find_new_lines 30,503,604 (166 MB/s) 14,818,233 (343 MB/s) -15,685,371 -51.42% + regexdna::subst1 23,276,552 (218 MB/s) 896,790 (5668 MB/s) -22,379,762 -96.15% + regexdna::subst10 23,199,415 (219 MB/s) 957,325 (5310 MB/s) -22,242,090 -95.87% + regexdna::subst11 23,138,469 (219 MB/s) 917,248 (5542 MB/s) -22,221,221 -96.04% + regexdna::subst2 23,076,376 (220 MB/s) 892,129 (5698 MB/s) -22,184,247 -96.13% + regexdna::subst3 23,115,770 (219 MB/s) 929,250 (5470 MB/s) -22,186,520 -95.98% + regexdna::subst4 23,093,288 (220 MB/s) 872,581 (5825 MB/s) -22,220,707 -96.22% + regexdna::subst5 23,618,534 (215 MB/s) 875,804 (5804 MB/s) -22,742,730 -96.29% + regexdna::subst6 23,301,581 (218 MB/s) 884,639 (5746 MB/s) -22,416,942 -96.20% + regexdna::subst7 23,371,339 (217 MB/s) 872,791 (5824 MB/s) -22,498,548 -96.27% + regexdna::subst8 23,187,513 (219 MB/s) 873,833 (5817 MB/s) -22,313,680 -96.23% + regexdna::subst9 23,143,027 (219 MB/s) 886,744 (5732 MB/s) -22,256,283 -96.17% + regexdna::variant1 104,906,982 (48 MB/s) 3,699,267 (1374 MB/s) -101,207,715 -96.47% + regexdna::variant2 118,326,728 (42 MB/s) 6,760,952 (751 MB/s) -111,565,776 -94.29% + regexdna::variant3 109,348,596 (46 MB/s) 8,030,646 (633 MB/s) -101,317,950 -92.66% + regexdna::variant4 104,574,675 (48 MB/s) 8,077,290 (629 MB/s) -96,497,385 -92.28% + regexdna::variant5 102,968,132 (49 MB/s) 6,787,242 (748 MB/s) -96,180,890 -93.41% + regexdna::variant6 103,783,112 (48 MB/s) 6,577,777 (772 MB/s) -97,205,335 -93.66% + regexdna::variant7 103,939,805 (48 MB/s) 6,705,580 (758 MB/s) -97,234,225 -93.55% + regexdna::variant8 109,722,594 (46 MB/s) 6,818,785 (745 MB/s) -102,903,809 -93.79% + regexdna::variant9 128,702,724 (39 MB/s) 6,821,453 (745 MB/s) -121,881,271 -94.70% + sherlock::before_after_holmes 39,219,739 (15 MB/s) 1,029,866 (577 MB/s) -38,189,873 -97.37% + sherlock::before_holmes 37,454,934 (15 MB/s) 76,633 (7763 MB/s) -37,378,301 -99.80% + sherlock::everything_greedy 7,341,629 (81 MB/s) 2,375,079 (250 MB/s) -4,966,550 -67.65% + sherlock::holmes_cochar_watson 2,298,534 (258 MB/s) 144,725 (4110 MB/s) -2,153,809 -93.70% + sherlock::ing_suffix 18,533,670 (32 MB/s) 436,202 (1363 MB/s) -18,097,468 -97.65% + sherlock::ing_suffix_limited_space 2,419,034 (245 MB/s) 1,182,943 (502 MB/s) -1,236,091 -51.10% + sherlock::letters 61,910,045 (9 MB/s) 24,390,452 (24 MB/s) -37,519,593 -60.60% + sherlock::letters_lower 60,831,022 (9 MB/s) 23,784,108 (25 MB/s) -37,046,914 -60.90% + sherlock::letters_upper 10,747,265 (55 MB/s) 1,993,838 (298 MB/s) -8,753,427 -81.45% + sherlock::line_boundary_sherlock_holmes 215,376 (2762 MB/s) 999,414 (595 MB/s) 784,038 364.03% + sherlock::name_alt1 2,282,320 (260 MB/s) 34,298 (17345 MB/s) -2,248,022 -98.50% + sherlock::name_alt2 2,206,087 (269 MB/s) 124,226 (4789 MB/s) -2,081,861 -94.37% + sherlock::name_alt3 2,771,932 (214 MB/s) 137,742 (4319 MB/s) -2,634,190 -95.03% + sherlock::name_alt3_nocase 19,198,056 (30 MB/s) 1,293,763 (459 MB/s) -17,904,293 -93.26% + sherlock::name_alt4 2,254,798 (263 MB/s) 164,900 (3607 MB/s) -2,089,898 -92.69% + sherlock::name_alt4_nocase 5,734,254 (103 MB/s) 235,023 (2531 MB/s) -5,499,231 -95.90% + sherlock::name_alt5 2,276,779 (261 MB/s) 127,928 (4650 MB/s) -2,148,851 -94.38% + sherlock::name_alt5_nocase 7,314,318 (81 MB/s) 659,591 (901 MB/s) -6,654,727 -90.98% + sherlock::name_holmes 477,888 (1244 MB/s) 40,902 (14545 MB/s) -436,986 -91.44% + sherlock::name_holmes_nocase 3,487,005 (170 MB/s) 198,658 (2994 MB/s) -3,288,347 -94.30% + sherlock::name_sherlock 295,313 (2014 MB/s) 68,924 (8631 MB/s) -226,389 -76.66% + sherlock::name_sherlock_holmes 216,522 (2747 MB/s) 31,640 (18803 MB/s) -184,882 -85.39% + sherlock::name_sherlock_holmes_nocase 3,480,703 (170 MB/s) 173,522 (3428 MB/s) -3,307,181 -95.01% + sherlock::name_sherlock_nocase 3,511,444 (169 MB/s) 170,888 (3481 MB/s) -3,340,556 -95.13% + sherlock::name_whitespace 304,043 (1956 MB/s) 84,314 (7056 MB/s) -219,729 -72.27% + sherlock::no_match_common 632,615 (940 MB/s) 20,727 (28703 MB/s) -611,888 -96.72% + sherlock::no_match_really_common 727,565 (817 MB/s) 381,476 (1559 MB/s) -346,089 -47.57% + sherlock::no_match_uncommon 624,061 (953 MB/s) 20,786 (28621 MB/s) -603,275 -96.67% + sherlock::quotes 3,776,688 (157 MB/s) 531,487 (1119 MB/s) -3,245,201 -85.93% + sherlock::repeated_class_negation 34,354,179 (17 MB/s) 85,881,944 (6 MB/s) 51,527,765 149.99% + sherlock::the_lower 1,965,787 (302 MB/s) 654,110 (909 MB/s) -1,311,677 -66.73% + sherlock::the_nocase 4,853,843 (122 MB/s) 474,456 (1253 MB/s) -4,379,387 -90.23% + sherlock::the_upper 949,071 (626 MB/s) 43,746 (13599 MB/s) -905,325 -95.39% + sherlock::the_whitespace 2,173,683 (273 MB/s) 1,181,974 (503 MB/s) -991,709 -45.62% + sherlock::word_ending_n 19,711,057 (30 MB/s) 1,925,578 (308 MB/s) -17,785,479 -90.23% + sherlock::words 21,979,387 (27 MB/s) 9,697,201 (61 MB/s) -12,282,186 -55.88% diff --git a/vendor/regex/record/old-bench-log/05/pcre1 b/vendor/regex/record/old-bench-log/05/pcre1 new file mode 100644 index 0000000..51af361 --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/pcre1 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 30 ns/iter (+/- 0) = 13000 MB/s +test misc::anchored_literal_long_non_match ... bench: 24 ns/iter (+/- 1) = 16250 MB/s +test misc::anchored_literal_short_match ... bench: 29 ns/iter (+/- 1) = 896 MB/s +test misc::anchored_literal_short_non_match ... bench: 24 ns/iter (+/- 1) = 1083 MB/s +test misc::easy0_1K ... bench: 260 ns/iter (+/- 15) = 4042 MB/s +test misc::easy0_1MB ... bench: 202,849 ns/iter (+/- 7,973) = 5169 MB/s +test misc::easy0_32 ... bench: 47 ns/iter (+/- 3) = 1255 MB/s +test misc::easy0_32K ... bench: 6,378 ns/iter (+/- 236) = 5141 MB/s +test misc::easy1_1K ... bench: 248 ns/iter (+/- 15) = 4209 MB/s +test misc::easy1_1MB ... bench: 203,105 ns/iter (+/- 7,590) = 5162 MB/s +test misc::easy1_32 ... bench: 51 ns/iter (+/- 1) = 1019 MB/s +test misc::easy1_32K ... bench: 6,508 ns/iter (+/- 160) = 5038 MB/s +test misc::hard_1K ... bench: 1,324 ns/iter (+/- 46) = 793 MB/s +test misc::hard_1MB ... bench: 1,134,691 ns/iter (+/- 41,296) = 924 MB/s +test misc::hard_32 ... bench: 113 ns/iter (+/- 13) = 522 MB/s +test misc::hard_32K ... bench: 42,269 ns/iter (+/- 2,298) = 775 MB/s +test misc::literal ... bench: 28 ns/iter (+/- 0) = 1821 MB/s +test misc::long_needle1 ... bench: 547,122 ns/iter (+/- 34,029) = 182 MB/s +test misc::long_needle2 ... bench: 546,018 ns/iter (+/- 24,721) = 183 MB/s +test misc::match_class ... bench: 97 ns/iter (+/- 5) = 835 MB/s +test misc::match_class_in_range ... bench: 30 ns/iter (+/- 1) = 2700 MB/s +test misc::match_class_unicode ... bench: 343 ns/iter (+/- 2) = 469 MB/s +test misc::medium_1K ... bench: 253 ns/iter (+/- 15) = 4158 MB/s +test misc::medium_1MB ... bench: 202,025 ns/iter (+/- 11,252) = 5190 MB/s +test misc::medium_32 ... bench: 51 ns/iter (+/- 2) = 1176 MB/s +test misc::medium_32K ... bench: 6,406 ns/iter (+/- 318) = 5119 MB/s +test misc::not_literal ... bench: 169 ns/iter (+/- 6) = 301 MB/s +test misc::one_pass_long_prefix ... bench: 28 ns/iter (+/- 1) = 928 MB/s +test misc::one_pass_long_prefix_not ... bench: 28 ns/iter (+/- 0) = 928 MB/s +test misc::one_pass_short ... bench: 54 ns/iter (+/- 0) = 314 MB/s +test misc::one_pass_short_not ... bench: 55 ns/iter (+/- 3) = 309 MB/s +test misc::reallyhard2_1K ... bench: 4,664 ns/iter (+/- 123) = 222 MB/s +test misc::reallyhard_1K ... bench: 1,595 ns/iter (+/- 34) = 658 MB/s +test misc::reallyhard_1MB ... bench: 1,377,542 ns/iter (+/- 2,203) = 761 MB/s +test misc::reallyhard_32 ... bench: 106 ns/iter (+/- 2) = 556 MB/s +test misc::reallyhard_32K ... bench: 43,256 ns/iter (+/- 1,230) = 758 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,607 ns/iter (+/- 68) = 1736 MB/s +test regexdna::find_new_lines ... bench: 2,840,298 ns/iter (+/- 128,040) = 1789 MB/s +test regexdna::subst1 ... bench: 1,284,283 ns/iter (+/- 39,986) = 3958 MB/s +test regexdna::subst10 ... bench: 1,269,531 ns/iter (+/- 63,116) = 4004 MB/s +test regexdna::subst11 ... bench: 1,286,171 ns/iter (+/- 49,256) = 3952 MB/s +test regexdna::subst2 ... bench: 1,303,022 ns/iter (+/- 1,553) = 3901 MB/s +test regexdna::subst3 ... bench: 1,295,961 ns/iter (+/- 57,880) = 3922 MB/s +test regexdna::subst4 ... bench: 1,313,706 ns/iter (+/- 2,115) = 3869 MB/s +test regexdna::subst5 ... bench: 1,286,339 ns/iter (+/- 2,093) = 3951 MB/s +test regexdna::subst6 ... bench: 1,385,644 ns/iter (+/- 3,387) = 3668 MB/s +test regexdna::subst7 ... bench: 1,286,743 ns/iter (+/- 2,339) = 3950 MB/s +test regexdna::subst8 ... bench: 1,306,406 ns/iter (+/- 1,686) = 3891 MB/s +test regexdna::subst9 ... bench: 1,280,365 ns/iter (+/- 52,649) = 3970 MB/s +test regexdna::variant1 ... bench: 15,271,875 ns/iter (+/- 510,399) = 332 MB/s +test regexdna::variant2 ... bench: 16,704,090 ns/iter (+/- 446,145) = 304 MB/s +test regexdna::variant3 ... bench: 20,745,546 ns/iter (+/- 500,573) = 245 MB/s +test regexdna::variant4 ... bench: 19,285,154 ns/iter (+/- 543,793) = 263 MB/s +test regexdna::variant5 ... bench: 17,234,130 ns/iter (+/- 291,232) = 294 MB/s +test regexdna::variant6 ... bench: 17,462,350 ns/iter (+/- 510,036) = 291 MB/s +test regexdna::variant7 ... bench: 19,671,680 ns/iter (+/- 562,610) = 258 MB/s +test regexdna::variant8 ... bench: 24,515,319 ns/iter (+/- 725,298) = 207 MB/s +test regexdna::variant9 ... bench: 22,623,755 ns/iter (+/- 637,538) = 224 MB/s +test sherlock::before_after_holmes ... bench: 4,510,830 ns/iter (+/- 170,864) = 131 MB/s +test sherlock::before_holmes ... bench: 4,706,836 ns/iter (+/- 186,202) = 126 MB/s +test sherlock::holmes_cochar_watson ... bench: 523,122 ns/iter (+/- 988) = 1137 MB/s +test sherlock::ing_suffix ... bench: 2,030,438 ns/iter (+/- 9,228) = 293 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,996,956 ns/iter (+/- 197,705) = 119 MB/s +test sherlock::letters ... bench: 13,529,105 ns/iter (+/- 496,645) = 43 MB/s +test sherlock::letters_lower ... bench: 13,681,607 ns/iter (+/- 448,932) = 43 MB/s +test sherlock::letters_upper ... bench: 1,904,757 ns/iter (+/- 94,484) = 312 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 207,695 ns/iter (+/- 8,892) = 2864 MB/s +test sherlock::name_alt1 ... bench: 486,857 ns/iter (+/- 21,004) = 1221 MB/s +test sherlock::name_alt2 ... bench: 483,926 ns/iter (+/- 26,860) = 1229 MB/s +test sherlock::name_alt3 ... bench: 978,827 ns/iter (+/- 43,851) = 607 MB/s +test sherlock::name_alt3_nocase ... bench: 2,986,143 ns/iter (+/- 78,155) = 199 MB/s +test sherlock::name_alt4 ... bench: 78,104 ns/iter (+/- 4,056) = 7617 MB/s +test sherlock::name_alt4_nocase ... bench: 1,638,351 ns/iter (+/- 62,542) = 363 MB/s +test sherlock::name_alt5 ... bench: 685,723 ns/iter (+/- 26,092) = 867 MB/s +test sherlock::name_alt5_nocase ... bench: 1,817,760 ns/iter (+/- 80,781) = 327 MB/s +test sherlock::name_holmes ... bench: 411,102 ns/iter (+/- 1,887) = 1447 MB/s +test sherlock::name_holmes_nocase ... bench: 516,003 ns/iter (+/- 2,295) = 1152 MB/s +test sherlock::name_sherlock ... bench: 284,300 ns/iter (+/- 1,117) = 2092 MB/s +test sherlock::name_sherlock_holmes ... bench: 209,139 ns/iter (+/- 380) = 2844 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,118,324 ns/iter (+/- 1,654) = 531 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,126,992 ns/iter (+/- 1,180) = 527 MB/s +test sherlock::name_whitespace ... bench: 284,672 ns/iter (+/- 510) = 2089 MB/s +test sherlock::no_match_common ... bench: 439,955 ns/iter (+/- 939) = 1352 MB/s +test sherlock::no_match_really_common ... bench: 439,266 ns/iter (+/- 3,751) = 1354 MB/s +test sherlock::no_match_uncommon ... bench: 28,872 ns/iter (+/- 31) = 20605 MB/s +test sherlock::quotes ... bench: 522,877 ns/iter (+/- 32,723) = 1137 MB/s +test sherlock::repeated_class_negation ... bench: 5,997,745 ns/iter (+/- 209,544) = 99 MB/s +test sherlock::the_lower ... bench: 747,234 ns/iter (+/- 43,110) = 796 MB/s +test sherlock::the_nocase ... bench: 802,320 ns/iter (+/- 27,715) = 741 MB/s +test sherlock::the_upper ... bench: 58,163 ns/iter (+/- 2,202) = 10228 MB/s +test sherlock::the_whitespace ... bench: 920,781 ns/iter (+/- 30,609) = 646 MB/s +test sherlock::word_ending_n ... bench: 5,703,864 ns/iter (+/- 191,007) = 104 MB/s +test sherlock::words ... bench: 6,786,318 ns/iter (+/- 168,049) = 87 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured + diff --git a/vendor/regex/record/old-bench-log/05/pcre1-vs-rust b/vendor/regex/record/old-bench-log/05/pcre1-vs-rust new file mode 100644 index 0000000..1d8c0d6 --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/pcre1-vs-rust @@ -0,0 +1,94 @@ + name pcre1 ns/iter rust ns/iter diff ns/iter diff % + misc::anchored_literal_long_match 30 (13000 MB/s) 24 (16250 MB/s) -6 -20.00% + misc::anchored_literal_long_non_match 24 (16250 MB/s) 27 (14444 MB/s) 3 12.50% + misc::anchored_literal_short_match 29 (896 MB/s) 22 (1181 MB/s) -7 -24.14% + misc::anchored_literal_short_non_match 24 (1083 MB/s) 24 (1083 MB/s) 0 0.00% + misc::easy0_1K 260 (4042 MB/s) 16 (65687 MB/s) -244 -93.85% + misc::easy0_1MB 202,849 (5169 MB/s) 20 (52430150 MB/s) -202,829 -99.99% + misc::easy0_32 47 (1255 MB/s) 16 (3687 MB/s) -31 -65.96% + misc::easy0_32K 6,378 (5141 MB/s) 16 (2049687 MB/s) -6,362 -99.75% + misc::easy1_1K 248 (4209 MB/s) 48 (21750 MB/s) -200 -80.65% + misc::easy1_1MB 203,105 (5162 MB/s) 48 (21845750 MB/s) -203,057 -99.98% + misc::easy1_32 51 (1019 MB/s) 46 (1130 MB/s) -5 -9.80% + misc::easy1_32K 6,508 (5038 MB/s) 47 (697617 MB/s) -6,461 -99.28% + misc::hard_1K 1,324 (793 MB/s) 58 (18120 MB/s) -1,266 -95.62% + misc::hard_1MB 1,134,691 (924 MB/s) 61 (17190213 MB/s) -1,134,630 -99.99% + misc::hard_32 113 (522 MB/s) 58 (1017 MB/s) -55 -48.67% + misc::hard_32K 42,269 (775 MB/s) 56 (585625 MB/s) -42,213 -99.87% + misc::literal 28 (1821 MB/s) 16 (3187 MB/s) -12 -42.86% + misc::long_needle1 547,122 (182 MB/s) 2,226 (44924 MB/s) -544,896 -99.59% + misc::long_needle2 546,018 (183 MB/s) 576,997 (173 MB/s) 30,979 5.67% + misc::match_class 97 (835 MB/s) 65 (1246 MB/s) -32 -32.99% + misc::match_class_in_range 30 (2700 MB/s) 27 (3000 MB/s) -3 -10.00% + misc::match_class_unicode 343 (469 MB/s) 283 (568 MB/s) -60 -17.49% + misc::medium_1K 253 (4158 MB/s) 16 (65750 MB/s) -237 -93.68% + misc::medium_1MB 202,025 (5190 MB/s) 21 (49933523 MB/s) -202,004 -99.99% + misc::medium_32 51 (1176 MB/s) 17 (3529 MB/s) -34 -66.67% + misc::medium_32K 6,406 (5119 MB/s) 17 (1929176 MB/s) -6,389 -99.73% + misc::not_literal 169 (301 MB/s) 105 (485 MB/s) -64 -37.87% + misc::one_pass_long_prefix 28 (928 MB/s) 68 (382 MB/s) 40 142.86% + misc::one_pass_long_prefix_not 28 (928 MB/s) 58 (448 MB/s) 30 107.14% + misc::one_pass_short 54 (314 MB/s) 45 (377 MB/s) -9 -16.67% + misc::one_pass_short_not 55 (309 MB/s) 50 (340 MB/s) -5 -9.09% + misc::reallyhard2_1K 4,664 (222 MB/s) 83 (12530 MB/s) -4,581 -98.22% + misc::reallyhard_1K 1,595 (658 MB/s) 1,822 (576 MB/s) 227 14.23% + misc::reallyhard_1MB 1,377,542 (761 MB/s) 1,768,327 (592 MB/s) 390,785 28.37% + misc::reallyhard_32 106 (556 MB/s) 121 (487 MB/s) 15 14.15% + misc::reallyhard_32K 43,256 (758 MB/s) 56,375 (581 MB/s) 13,119 30.33% + misc::reverse_suffix_no_quadratic 4,607 (1736 MB/s) 5,803 (1378 MB/s) 1,196 25.96% + regexdna::find_new_lines 2,840,298 (1789 MB/s) 14,818,233 (343 MB/s) 11,977,935 421.71% + regexdna::subst1 1,284,283 (3958 MB/s) 896,790 (5668 MB/s) -387,493 -30.17% + regexdna::subst10 1,269,531 (4004 MB/s) 957,325 (5310 MB/s) -312,206 -24.59% + regexdna::subst11 1,286,171 (3952 MB/s) 917,248 (5542 MB/s) -368,923 -28.68% + regexdna::subst2 1,303,022 (3901 MB/s) 892,129 (5698 MB/s) -410,893 -31.53% + regexdna::subst3 1,295,961 (3922 MB/s) 929,250 (5470 MB/s) -366,711 -28.30% + regexdna::subst4 1,313,706 (3869 MB/s) 872,581 (5825 MB/s) -441,125 -33.58% + regexdna::subst5 1,286,339 (3951 MB/s) 875,804 (5804 MB/s) -410,535 -31.91% + regexdna::subst6 1,385,644 (3668 MB/s) 884,639 (5746 MB/s) -501,005 -36.16% + regexdna::subst7 1,286,743 (3950 MB/s) 872,791 (5824 MB/s) -413,952 -32.17% + regexdna::subst8 1,306,406 (3891 MB/s) 873,833 (5817 MB/s) -432,573 -33.11% + regexdna::subst9 1,280,365 (3970 MB/s) 886,744 (5732 MB/s) -393,621 -30.74% + regexdna::variant1 15,271,875 (332 MB/s) 3,699,267 (1374 MB/s) -11,572,608 -75.78% + regexdna::variant2 16,704,090 (304 MB/s) 6,760,952 (751 MB/s) -9,943,138 -59.53% + regexdna::variant3 20,745,546 (245 MB/s) 8,030,646 (633 MB/s) -12,714,900 -61.29% + regexdna::variant4 19,285,154 (263 MB/s) 8,077,290 (629 MB/s) -11,207,864 -58.12% + regexdna::variant5 17,234,130 (294 MB/s) 6,787,242 (748 MB/s) -10,446,888 -60.62% + regexdna::variant6 17,462,350 (291 MB/s) 6,577,777 (772 MB/s) -10,884,573 -62.33% + regexdna::variant7 19,671,680 (258 MB/s) 6,705,580 (758 MB/s) -12,966,100 -65.91% + regexdna::variant8 24,515,319 (207 MB/s) 6,818,785 (745 MB/s) -17,696,534 -72.19% + regexdna::variant9 22,623,755 (224 MB/s) 6,821,453 (745 MB/s) -15,802,302 -69.85% + sherlock::before_after_holmes 4,510,830 (131 MB/s) 1,029,866 (577 MB/s) -3,480,964 -77.17% + sherlock::before_holmes 4,706,836 (126 MB/s) 76,633 (7763 MB/s) -4,630,203 -98.37% + sherlock::holmes_cochar_watson 523,122 (1137 MB/s) 144,725 (4110 MB/s) -378,397 -72.33% + sherlock::ing_suffix 2,030,438 (293 MB/s) 436,202 (1363 MB/s) -1,594,236 -78.52% + sherlock::ing_suffix_limited_space 4,996,956 (119 MB/s) 1,182,943 (502 MB/s) -3,814,013 -76.33% + sherlock::letters 13,529,105 (43 MB/s) 24,390,452 (24 MB/s) 10,861,347 80.28% + sherlock::letters_lower 13,681,607 (43 MB/s) 23,784,108 (25 MB/s) 10,102,501 73.84% + sherlock::letters_upper 1,904,757 (312 MB/s) 1,993,838 (298 MB/s) 89,081 4.68% + sherlock::line_boundary_sherlock_holmes 207,695 (2864 MB/s) 999,414 (595 MB/s) 791,719 381.19% + sherlock::name_alt1 486,857 (1221 MB/s) 34,298 (17345 MB/s) -452,559 -92.96% + sherlock::name_alt2 483,926 (1229 MB/s) 124,226 (4789 MB/s) -359,700 -74.33% + sherlock::name_alt3 978,827 (607 MB/s) 137,742 (4319 MB/s) -841,085 -85.93% + sherlock::name_alt3_nocase 2,986,143 (199 MB/s) 1,293,763 (459 MB/s) -1,692,380 -56.67% + sherlock::name_alt4 78,104 (7617 MB/s) 164,900 (3607 MB/s) 86,796 111.13% + sherlock::name_alt4_nocase 1,638,351 (363 MB/s) 235,023 (2531 MB/s) -1,403,328 -85.65% + sherlock::name_alt5 685,723 (867 MB/s) 127,928 (4650 MB/s) -557,795 -81.34% + sherlock::name_alt5_nocase 1,817,760 (327 MB/s) 659,591 (901 MB/s) -1,158,169 -63.71% + sherlock::name_holmes 411,102 (1447 MB/s) 40,902 (14545 MB/s) -370,200 -90.05% + sherlock::name_holmes_nocase 516,003 (1152 MB/s) 198,658 (2994 MB/s) -317,345 -61.50% + sherlock::name_sherlock 284,300 (2092 MB/s) 68,924 (8631 MB/s) -215,376 -75.76% + sherlock::name_sherlock_holmes 209,139 (2844 MB/s) 31,640 (18803 MB/s) -177,499 -84.87% + sherlock::name_sherlock_holmes_nocase 1,118,324 (531 MB/s) 173,522 (3428 MB/s) -944,802 -84.48% + sherlock::name_sherlock_nocase 1,126,992 (527 MB/s) 170,888 (3481 MB/s) -956,104 -84.84% + sherlock::name_whitespace 284,672 (2089 MB/s) 84,314 (7056 MB/s) -200,358 -70.38% + sherlock::no_match_common 439,955 (1352 MB/s) 20,727 (28703 MB/s) -419,228 -95.29% + sherlock::no_match_really_common 439,266 (1354 MB/s) 381,476 (1559 MB/s) -57,790 -13.16% + sherlock::no_match_uncommon 28,872 (20605 MB/s) 20,786 (28621 MB/s) -8,086 -28.01% + sherlock::quotes 522,877 (1137 MB/s) 531,487 (1119 MB/s) 8,610 1.65% + sherlock::repeated_class_negation 5,997,745 (99 MB/s) 85,881,944 (6 MB/s) 79,884,199 1331.90% + sherlock::the_lower 747,234 (796 MB/s) 654,110 (909 MB/s) -93,124 -12.46% + sherlock::the_nocase 802,320 (741 MB/s) 474,456 (1253 MB/s) -327,864 -40.86% + sherlock::the_upper 58,163 (10228 MB/s) 43,746 (13599 MB/s) -14,417 -24.79% + sherlock::the_whitespace 920,781 (646 MB/s) 1,181,974 (503 MB/s) 261,193 28.37% + sherlock::word_ending_n 5,703,864 (104 MB/s) 1,925,578 (308 MB/s) -3,778,286 -66.24% + sherlock::words 6,786,318 (87 MB/s) 9,697,201 (61 MB/s) 2,910,883 42.89% diff --git a/vendor/regex/record/old-bench-log/05/pcre2 b/vendor/regex/record/old-bench-log/05/pcre2 new file mode 100644 index 0000000..76b3242 --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/pcre2 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 20 ns/iter (+/- 0) = 19500 MB/s +test misc::anchored_literal_long_non_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_short_match ... bench: 19 ns/iter (+/- 1) = 1368 MB/s +test misc::anchored_literal_short_non_match ... bench: 13 ns/iter (+/- 0) = 2000 MB/s +test misc::easy0_1K ... bench: 241 ns/iter (+/- 9) = 4360 MB/s +test misc::easy0_1MB ... bench: 207,103 ns/iter (+/- 8,557) = 5063 MB/s +test misc::easy0_32 ... bench: 39 ns/iter (+/- 0) = 1512 MB/s +test misc::easy0_32K ... bench: 6,522 ns/iter (+/- 20) = 5028 MB/s +test misc::easy1_1K ... bench: 247 ns/iter (+/- 3) = 4226 MB/s +test misc::easy1_1MB ... bench: 206,893 ns/iter (+/- 9,489) = 5068 MB/s +test misc::easy1_32 ... bench: 41 ns/iter (+/- 0) = 1268 MB/s +test misc::easy1_32K ... bench: 6,516 ns/iter (+/- 301) = 5031 MB/s +test misc::hard_1K ... bench: 1,566 ns/iter (+/- 79) = 671 MB/s +test misc::hard_1MB ... bench: 1,119,234 ns/iter (+/- 38,605) = 936 MB/s +test misc::hard_32 ... bench: 95 ns/iter (+/- 4) = 621 MB/s +test misc::hard_32K ... bench: 34,411 ns/iter (+/- 1,542) = 953 MB/s +test misc::literal ... bench: 18 ns/iter (+/- 0) = 2833 MB/s +test misc::long_needle1 ... bench: 550,340 ns/iter (+/- 30,668) = 181 MB/s +test misc::long_needle2 ... bench: 553,056 ns/iter (+/- 25,618) = 180 MB/s +test misc::match_class ... bench: 82 ns/iter (+/- 1) = 987 MB/s +test misc::match_class_in_range ... bench: 20 ns/iter (+/- 1) = 4050 MB/s +test misc::match_class_unicode ... bench: 351 ns/iter (+/- 14) = 458 MB/s +test misc::medium_1K ... bench: 242 ns/iter (+/- 13) = 4347 MB/s +test misc::medium_1MB ... bench: 207,290 ns/iter (+/- 1,458) = 5058 MB/s +test misc::medium_32 ... bench: 41 ns/iter (+/- 0) = 1463 MB/s +test misc::medium_32K ... bench: 6,529 ns/iter (+/- 293) = 5023 MB/s +test misc::not_literal ... bench: 161 ns/iter (+/- 7) = 316 MB/s +test misc::one_pass_long_prefix ... bench: 17 ns/iter (+/- 1) = 1529 MB/s +test misc::one_pass_long_prefix_not ... bench: 18 ns/iter (+/- 1) = 1444 MB/s +test misc::one_pass_short ... bench: 45 ns/iter (+/- 2) = 377 MB/s +test misc::one_pass_short_not ... bench: 49 ns/iter (+/- 2) = 346 MB/s +test misc::reallyhard2_1K ... bench: 4,487 ns/iter (+/- 190) = 231 MB/s +test misc::reallyhard_1K ... bench: 1,260 ns/iter (+/- 46) = 834 MB/s +test misc::reallyhard_1MB ... bench: 1,361,796 ns/iter (+/- 46,490) = 770 MB/s +test misc::reallyhard_32 ... bench: 93 ns/iter (+/- 8) = 634 MB/s +test misc::reallyhard_32K ... bench: 42,503 ns/iter (+/- 1,721) = 771 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,559 ns/iter (+/- 171) = 1754 MB/s +test regexdna::find_new_lines ... bench: 1,887,325 ns/iter (+/- 74,026) = 2693 MB/s +test regexdna::subst1 ... bench: 963,089 ns/iter (+/- 3,478) = 5278 MB/s +test regexdna::subst10 ... bench: 968,221 ns/iter (+/- 5,406) = 5250 MB/s +test regexdna::subst11 ... bench: 961,661 ns/iter (+/- 45,597) = 5286 MB/s +test regexdna::subst2 ... bench: 956,430 ns/iter (+/- 32,654) = 5314 MB/s +test regexdna::subst3 ... bench: 961,204 ns/iter (+/- 48,799) = 5288 MB/s +test regexdna::subst4 ... bench: 961,897 ns/iter (+/- 50,762) = 5284 MB/s +test regexdna::subst5 ... bench: 953,412 ns/iter (+/- 69,554) = 5331 MB/s +test regexdna::subst6 ... bench: 962,362 ns/iter (+/- 42,136) = 5282 MB/s +test regexdna::subst7 ... bench: 961,694 ns/iter (+/- 100,348) = 5285 MB/s +test regexdna::subst8 ... bench: 963,230 ns/iter (+/- 10,882) = 5277 MB/s +test regexdna::subst9 ... bench: 960,246 ns/iter (+/- 27,407) = 5293 MB/s +test regexdna::variant1 ... bench: 15,553,281 ns/iter (+/- 566,810) = 326 MB/s +test regexdna::variant2 ... bench: 16,563,452 ns/iter (+/- 546,097) = 306 MB/s +test regexdna::variant3 ... bench: 20,405,916 ns/iter (+/- 809,236) = 249 MB/s +test regexdna::variant4 ... bench: 19,489,291 ns/iter (+/- 710,721) = 260 MB/s +test regexdna::variant5 ... bench: 17,406,769 ns/iter (+/- 656,024) = 292 MB/s +test regexdna::variant6 ... bench: 17,412,027 ns/iter (+/- 730,347) = 291 MB/s +test regexdna::variant7 ... bench: 19,509,193 ns/iter (+/- 783,850) = 260 MB/s +test regexdna::variant8 ... bench: 24,295,734 ns/iter (+/- 816,832) = 209 MB/s +test regexdna::variant9 ... bench: 22,541,558 ns/iter (+/- 783,104) = 225 MB/s +test sherlock::before_after_holmes ... bench: 4,583,804 ns/iter (+/- 124,057) = 129 MB/s +test sherlock::before_holmes ... bench: 4,640,546 ns/iter (+/- 241,311) = 128 MB/s +test sherlock::holmes_cochar_watson ... bench: 509,088 ns/iter (+/- 25,069) = 1168 MB/s +test sherlock::ing_suffix ... bench: 1,865,631 ns/iter (+/- 68,625) = 318 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,922,883 ns/iter (+/- 232,825) = 120 MB/s +test sherlock::letters ... bench: 9,848,144 ns/iter (+/- 206,915) = 60 MB/s +test sherlock::letters_lower ... bench: 9,723,642 ns/iter (+/- 370,000) = 61 MB/s +test sherlock::letters_upper ... bench: 1,762,773 ns/iter (+/- 86,671) = 337 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 206,367 ns/iter (+/- 8,874) = 2882 MB/s +test sherlock::name_alt1 ... bench: 485,953 ns/iter (+/- 15,036) = 1224 MB/s +test sherlock::name_alt2 ... bench: 483,813 ns/iter (+/- 17,822) = 1229 MB/s +test sherlock::name_alt3 ... bench: 903,013 ns/iter (+/- 38,445) = 658 MB/s +test sherlock::name_alt3_nocase ... bench: 2,993,633 ns/iter (+/- 131,218) = 198 MB/s +test sherlock::name_alt4 ... bench: 78,831 ns/iter (+/- 2,012) = 7546 MB/s +test sherlock::name_alt4_nocase ... bench: 1,647,202 ns/iter (+/- 5,838) = 361 MB/s +test sherlock::name_alt5 ... bench: 678,798 ns/iter (+/- 1,146) = 876 MB/s +test sherlock::name_alt5_nocase ... bench: 1,792,461 ns/iter (+/- 3,532) = 331 MB/s +test sherlock::name_holmes ... bench: 406,138 ns/iter (+/- 1,157) = 1464 MB/s +test sherlock::name_holmes_nocase ... bench: 517,884 ns/iter (+/- 8,548) = 1148 MB/s +test sherlock::name_sherlock ... bench: 282,357 ns/iter (+/- 13,583) = 2107 MB/s +test sherlock::name_sherlock_holmes ... bench: 207,894 ns/iter (+/- 1,847) = 2861 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,122,583 ns/iter (+/- 52,189) = 529 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,092,252 ns/iter (+/- 43,130) = 544 MB/s +test sherlock::name_whitespace ... bench: 280,360 ns/iter (+/- 12,136) = 2122 MB/s +test sherlock::no_match_common ... bench: 436,303 ns/iter (+/- 19,423) = 1363 MB/s +test sherlock::no_match_really_common ... bench: 417,686 ns/iter (+/- 15,258) = 1424 MB/s +test sherlock::no_match_uncommon ... bench: 28,504 ns/iter (+/- 1,032) = 20871 MB/s +test sherlock::quotes ... bench: 541,513 ns/iter (+/- 21,121) = 1098 MB/s +test sherlock::repeated_class_negation ... bench: 5,489,721 ns/iter (+/- 185,165) = 108 MB/s +test sherlock::the_lower ... bench: 680,710 ns/iter (+/- 29,403) = 873 MB/s +test sherlock::the_nocase ... bench: 737,040 ns/iter (+/- 4,391) = 807 MB/s +test sherlock::the_upper ... bench: 50,026 ns/iter (+/- 205) = 11892 MB/s +test sherlock::the_whitespace ... bench: 885,922 ns/iter (+/- 9,145) = 671 MB/s +test sherlock::word_ending_n ... bench: 5,424,773 ns/iter (+/- 154,353) = 109 MB/s +test sherlock::words ... bench: 5,753,231 ns/iter (+/- 177,890) = 103 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured + diff --git a/vendor/regex/record/old-bench-log/05/pcre2-vs-rust b/vendor/regex/record/old-bench-log/05/pcre2-vs-rust new file mode 100644 index 0000000..3d89e19 --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/pcre2-vs-rust @@ -0,0 +1,94 @@ + name pcre2 ns/iter rust ns/iter diff ns/iter diff % + misc::anchored_literal_long_match 20 (19500 MB/s) 24 (16250 MB/s) 4 20.00% + misc::anchored_literal_long_non_match 15 (26000 MB/s) 27 (14444 MB/s) 12 80.00% + misc::anchored_literal_short_match 19 (1368 MB/s) 22 (1181 MB/s) 3 15.79% + misc::anchored_literal_short_non_match 13 (2000 MB/s) 24 (1083 MB/s) 11 84.62% + misc::easy0_1K 241 (4360 MB/s) 16 (65687 MB/s) -225 -93.36% + misc::easy0_1MB 207,103 (5063 MB/s) 20 (52430150 MB/s) -207,083 -99.99% + misc::easy0_32 39 (1512 MB/s) 16 (3687 MB/s) -23 -58.97% + misc::easy0_32K 6,522 (5028 MB/s) 16 (2049687 MB/s) -6,506 -99.75% + misc::easy1_1K 247 (4226 MB/s) 48 (21750 MB/s) -199 -80.57% + misc::easy1_1MB 206,893 (5068 MB/s) 48 (21845750 MB/s) -206,845 -99.98% + misc::easy1_32 41 (1268 MB/s) 46 (1130 MB/s) 5 12.20% + misc::easy1_32K 6,516 (5031 MB/s) 47 (697617 MB/s) -6,469 -99.28% + misc::hard_1K 1,566 (671 MB/s) 58 (18120 MB/s) -1,508 -96.30% + misc::hard_1MB 1,119,234 (936 MB/s) 61 (17190213 MB/s) -1,119,173 -99.99% + misc::hard_32 95 (621 MB/s) 58 (1017 MB/s) -37 -38.95% + misc::hard_32K 34,411 (953 MB/s) 56 (585625 MB/s) -34,355 -99.84% + misc::literal 18 (2833 MB/s) 16 (3187 MB/s) -2 -11.11% + misc::long_needle1 550,340 (181 MB/s) 2,226 (44924 MB/s) -548,114 -99.60% + misc::long_needle2 553,056 (180 MB/s) 576,997 (173 MB/s) 23,941 4.33% + misc::match_class 82 (987 MB/s) 65 (1246 MB/s) -17 -20.73% + misc::match_class_in_range 20 (4050 MB/s) 27 (3000 MB/s) 7 35.00% + misc::match_class_unicode 351 (458 MB/s) 283 (568 MB/s) -68 -19.37% + misc::medium_1K 242 (4347 MB/s) 16 (65750 MB/s) -226 -93.39% + misc::medium_1MB 207,290 (5058 MB/s) 21 (49933523 MB/s) -207,269 -99.99% + misc::medium_32 41 (1463 MB/s) 17 (3529 MB/s) -24 -58.54% + misc::medium_32K 6,529 (5023 MB/s) 17 (1929176 MB/s) -6,512 -99.74% + misc::not_literal 161 (316 MB/s) 105 (485 MB/s) -56 -34.78% + misc::one_pass_long_prefix 17 (1529 MB/s) 68 (382 MB/s) 51 300.00% + misc::one_pass_long_prefix_not 18 (1444 MB/s) 58 (448 MB/s) 40 222.22% + misc::one_pass_short 45 (377 MB/s) 45 (377 MB/s) 0 0.00% + misc::one_pass_short_not 49 (346 MB/s) 50 (340 MB/s) 1 2.04% + misc::reallyhard2_1K 4,487 (231 MB/s) 83 (12530 MB/s) -4,404 -98.15% + misc::reallyhard_1K 1,260 (834 MB/s) 1,822 (576 MB/s) 562 44.60% + misc::reallyhard_1MB 1,361,796 (770 MB/s) 1,768,327 (592 MB/s) 406,531 29.85% + misc::reallyhard_32 93 (634 MB/s) 121 (487 MB/s) 28 30.11% + misc::reallyhard_32K 42,503 (771 MB/s) 56,375 (581 MB/s) 13,872 32.64% + misc::reverse_suffix_no_quadratic 4,559 (1754 MB/s) 5,803 (1378 MB/s) 1,244 27.29% + regexdna::find_new_lines 1,887,325 (2693 MB/s) 14,818,233 (343 MB/s) 12,930,908 685.14% + regexdna::subst1 963,089 (5278 MB/s) 896,790 (5668 MB/s) -66,299 -6.88% + regexdna::subst10 968,221 (5250 MB/s) 957,325 (5310 MB/s) -10,896 -1.13% + regexdna::subst11 961,661 (5286 MB/s) 917,248 (5542 MB/s) -44,413 -4.62% + regexdna::subst2 956,430 (5314 MB/s) 892,129 (5698 MB/s) -64,301 -6.72% + regexdna::subst3 961,204 (5288 MB/s) 929,250 (5470 MB/s) -31,954 -3.32% + regexdna::subst4 961,897 (5284 MB/s) 872,581 (5825 MB/s) -89,316 -9.29% + regexdna::subst5 953,412 (5331 MB/s) 875,804 (5804 MB/s) -77,608 -8.14% + regexdna::subst6 962,362 (5282 MB/s) 884,639 (5746 MB/s) -77,723 -8.08% + regexdna::subst7 961,694 (5285 MB/s) 872,791 (5824 MB/s) -88,903 -9.24% + regexdna::subst8 963,230 (5277 MB/s) 873,833 (5817 MB/s) -89,397 -9.28% + regexdna::subst9 960,246 (5293 MB/s) 886,744 (5732 MB/s) -73,502 -7.65% + regexdna::variant1 15,553,281 (326 MB/s) 3,699,267 (1374 MB/s) -11,854,014 -76.22% + regexdna::variant2 16,563,452 (306 MB/s) 6,760,952 (751 MB/s) -9,802,500 -59.18% + regexdna::variant3 20,405,916 (249 MB/s) 8,030,646 (633 MB/s) -12,375,270 -60.65% + regexdna::variant4 19,489,291 (260 MB/s) 8,077,290 (629 MB/s) -11,412,001 -58.56% + regexdna::variant5 17,406,769 (292 MB/s) 6,787,242 (748 MB/s) -10,619,527 -61.01% + regexdna::variant6 17,412,027 (291 MB/s) 6,577,777 (772 MB/s) -10,834,250 -62.22% + regexdna::variant7 19,509,193 (260 MB/s) 6,705,580 (758 MB/s) -12,803,613 -65.63% + regexdna::variant8 24,295,734 (209 MB/s) 6,818,785 (745 MB/s) -17,476,949 -71.93% + regexdna::variant9 22,541,558 (225 MB/s) 6,821,453 (745 MB/s) -15,720,105 -69.74% + sherlock::before_after_holmes 4,583,804 (129 MB/s) 1,029,866 (577 MB/s) -3,553,938 -77.53% + sherlock::before_holmes 4,640,546 (128 MB/s) 76,633 (7763 MB/s) -4,563,913 -98.35% + sherlock::holmes_cochar_watson 509,088 (1168 MB/s) 144,725 (4110 MB/s) -364,363 -71.57% + sherlock::ing_suffix 1,865,631 (318 MB/s) 436,202 (1363 MB/s) -1,429,429 -76.62% + sherlock::ing_suffix_limited_space 4,922,883 (120 MB/s) 1,182,943 (502 MB/s) -3,739,940 -75.97% + sherlock::letters 9,848,144 (60 MB/s) 24,390,452 (24 MB/s) 14,542,308 147.67% + sherlock::letters_lower 9,723,642 (61 MB/s) 23,784,108 (25 MB/s) 14,060,466 144.60% + sherlock::letters_upper 1,762,773 (337 MB/s) 1,993,838 (298 MB/s) 231,065 13.11% + sherlock::line_boundary_sherlock_holmes 206,367 (2882 MB/s) 999,414 (595 MB/s) 793,047 384.29% + sherlock::name_alt1 485,953 (1224 MB/s) 34,298 (17345 MB/s) -451,655 -92.94% + sherlock::name_alt2 483,813 (1229 MB/s) 124,226 (4789 MB/s) -359,587 -74.32% + sherlock::name_alt3 903,013 (658 MB/s) 137,742 (4319 MB/s) -765,271 -84.75% + sherlock::name_alt3_nocase 2,993,633 (198 MB/s) 1,293,763 (459 MB/s) -1,699,870 -56.78% + sherlock::name_alt4 78,831 (7546 MB/s) 164,900 (3607 MB/s) 86,069 109.18% + sherlock::name_alt4_nocase 1,647,202 (361 MB/s) 235,023 (2531 MB/s) -1,412,179 -85.73% + sherlock::name_alt5 678,798 (876 MB/s) 127,928 (4650 MB/s) -550,870 -81.15% + sherlock::name_alt5_nocase 1,792,461 (331 MB/s) 659,591 (901 MB/s) -1,132,870 -63.20% + sherlock::name_holmes 406,138 (1464 MB/s) 40,902 (14545 MB/s) -365,236 -89.93% + sherlock::name_holmes_nocase 517,884 (1148 MB/s) 198,658 (2994 MB/s) -319,226 -61.64% + sherlock::name_sherlock 282,357 (2107 MB/s) 68,924 (8631 MB/s) -213,433 -75.59% + sherlock::name_sherlock_holmes 207,894 (2861 MB/s) 31,640 (18803 MB/s) -176,254 -84.78% + sherlock::name_sherlock_holmes_nocase 1,122,583 (529 MB/s) 173,522 (3428 MB/s) -949,061 -84.54% + sherlock::name_sherlock_nocase 1,092,252 (544 MB/s) 170,888 (3481 MB/s) -921,364 -84.35% + sherlock::name_whitespace 280,360 (2122 MB/s) 84,314 (7056 MB/s) -196,046 -69.93% + sherlock::no_match_common 436,303 (1363 MB/s) 20,727 (28703 MB/s) -415,576 -95.25% + sherlock::no_match_really_common 417,686 (1424 MB/s) 381,476 (1559 MB/s) -36,210 -8.67% + sherlock::no_match_uncommon 28,504 (20871 MB/s) 20,786 (28621 MB/s) -7,718 -27.08% + sherlock::quotes 541,513 (1098 MB/s) 531,487 (1119 MB/s) -10,026 -1.85% + sherlock::repeated_class_negation 5,489,721 (108 MB/s) 85,881,944 (6 MB/s) 80,392,223 1464.41% + sherlock::the_lower 680,710 (873 MB/s) 654,110 (909 MB/s) -26,600 -3.91% + sherlock::the_nocase 737,040 (807 MB/s) 474,456 (1253 MB/s) -262,584 -35.63% + sherlock::the_upper 50,026 (11892 MB/s) 43,746 (13599 MB/s) -6,280 -12.55% + sherlock::the_whitespace 885,922 (671 MB/s) 1,181,974 (503 MB/s) 296,052 33.42% + sherlock::word_ending_n 5,424,773 (109 MB/s) 1,925,578 (308 MB/s) -3,499,195 -64.50% + sherlock::words 5,753,231 (103 MB/s) 9,697,201 (61 MB/s) 3,943,970 68.55% diff --git a/vendor/regex/record/old-bench-log/05/re2 b/vendor/regex/record/old-bench-log/05/re2 new file mode 100644 index 0000000..d1f0bea --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/re2 @@ -0,0 +1,101 @@ + +running 96 tests +test misc::anchored_literal_long_match ... bench: 102 ns/iter (+/- 3) = 3823 MB/s +test misc::anchored_literal_long_non_match ... bench: 20 ns/iter (+/- 0) = 19500 MB/s +test misc::anchored_literal_short_match ... bench: 95 ns/iter (+/- 8) = 273 MB/s +test misc::anchored_literal_short_non_match ... bench: 17 ns/iter (+/- 0) = 1529 MB/s +test misc::easy0_1K ... bench: 149 ns/iter (+/- 10) = 7053 MB/s +test misc::easy0_1MB ... bench: 29,234 ns/iter (+/- 886) = 35869 MB/s +test misc::easy0_32 ... bench: 126 ns/iter (+/- 4) = 468 MB/s +test misc::easy0_32K ... bench: 1,266 ns/iter (+/- 42) = 25904 MB/s +test misc::easy1_1K ... bench: 130 ns/iter (+/- 4) = 8030 MB/s +test misc::easy1_1MB ... bench: 29,218 ns/iter (+/- 791) = 35888 MB/s +test misc::easy1_32 ... bench: 112 ns/iter (+/- 7) = 464 MB/s +test misc::easy1_32K ... bench: 1,251 ns/iter (+/- 45) = 26209 MB/s +test misc::hard_1K ... bench: 2,357 ns/iter (+/- 33) = 445 MB/s +test misc::hard_1MB ... bench: 2,149,909 ns/iter (+/- 151,258) = 487 MB/s +test misc::hard_32 ... bench: 195 ns/iter (+/- 16) = 302 MB/s +test misc::hard_32K ... bench: 105,137 ns/iter (+/- 6,252) = 311 MB/s +test misc::literal ... bench: 89 ns/iter (+/- 3) = 573 MB/s +test misc::long_needle1 ... bench: 170,090 ns/iter (+/- 5,891) = 587 MB/s +test misc::long_needle2 ... bench: 174,341 ns/iter (+/- 7,949) = 573 MB/s +test misc::match_class ... bench: 220 ns/iter (+/- 16) = 368 MB/s +test misc::match_class_in_range ... bench: 215 ns/iter (+/- 16) = 376 MB/s +test misc::match_class_unicode ... bench: 382 ns/iter (+/- 27) = 421 MB/s +test misc::medium_1K ... bench: 1,939 ns/iter (+/- 153) = 542 MB/s +test misc::medium_1MB ... bench: 1,775,335 ns/iter (+/- 91,241) = 590 MB/s +test misc::medium_32 ... bench: 190 ns/iter (+/- 12) = 315 MB/s +test misc::medium_32K ... bench: 83,245 ns/iter (+/- 5,385) = 393 MB/s +test misc::no_exponential ... bench: 269 ns/iter (+/- 22) = 371 MB/s +test misc::not_literal ... bench: 167 ns/iter (+/- 13) = 305 MB/s +test misc::one_pass_long_prefix ... bench: 84 ns/iter (+/- 7) = 309 MB/s +test misc::one_pass_long_prefix_not ... bench: 137 ns/iter (+/- 12) = 189 MB/s +test misc::one_pass_short ... bench: 108 ns/iter (+/- 3) = 157 MB/s +test misc::one_pass_short_not ... bench: 105 ns/iter (+/- 6) = 161 MB/s +test misc::reallyhard2_1K ... bench: 1,811 ns/iter (+/- 44) = 574 MB/s +test misc::reallyhard_1K ... bench: 2,324 ns/iter (+/- 223) = 452 MB/s +test misc::reallyhard_1MB ... bench: 2,033,298 ns/iter (+/- 148,939) = 515 MB/s +test misc::reallyhard_32 ... bench: 185 ns/iter (+/- 8) = 318 MB/s +test misc::reallyhard_32K ... bench: 83,263 ns/iter (+/- 4,231) = 393 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 13,501 ns/iter (+/- 1,380) = 592 MB/s +test regexdna::find_new_lines ... bench: 31,464,067 ns/iter (+/- 2,248,457) = 161 MB/s +test regexdna::subst1 ... bench: 5,257,629 ns/iter (+/- 142,910) = 966 MB/s +test regexdna::subst10 ... bench: 5,189,384 ns/iter (+/- 130,525) = 979 MB/s +test regexdna::subst11 ... bench: 5,261,936 ns/iter (+/- 309,355) = 966 MB/s +test regexdna::subst2 ... bench: 5,268,281 ns/iter (+/- 348,592) = 964 MB/s +test regexdna::subst3 ... bench: 5,245,664 ns/iter (+/- 403,198) = 969 MB/s +test regexdna::subst4 ... bench: 5,264,833 ns/iter (+/- 312,063) = 965 MB/s +test regexdna::subst5 ... bench: 5,181,850 ns/iter (+/- 117,306) = 981 MB/s +test regexdna::subst6 ... bench: 5,200,226 ns/iter (+/- 124,723) = 977 MB/s +test regexdna::subst7 ... bench: 5,233,678 ns/iter (+/- 367,749) = 971 MB/s +test regexdna::subst8 ... bench: 5,242,400 ns/iter (+/- 317,859) = 969 MB/s +test regexdna::subst9 ... bench: 5,325,464 ns/iter (+/- 395,485) = 954 MB/s +test regexdna::variant1 ... bench: 24,377,246 ns/iter (+/- 733,355) = 208 MB/s +test regexdna::variant2 ... bench: 26,405,686 ns/iter (+/- 771,755) = 192 MB/s +test regexdna::variant3 ... bench: 25,130,419 ns/iter (+/- 1,245,527) = 202 MB/s +test regexdna::variant4 ... bench: 32,527,780 ns/iter (+/- 5,073,721) = 156 MB/s +test regexdna::variant5 ... bench: 31,081,800 ns/iter (+/- 1,256,796) = 163 MB/s +test regexdna::variant6 ... bench: 28,744,478 ns/iter (+/- 1,243,565) = 176 MB/s +test regexdna::variant7 ... bench: 26,693,756 ns/iter (+/- 886,566) = 190 MB/s +test regexdna::variant8 ... bench: 21,478,184 ns/iter (+/- 1,374,415) = 236 MB/s +test regexdna::variant9 ... bench: 18,639,814 ns/iter (+/- 519,136) = 272 MB/s +test sherlock::before_after_holmes ... bench: 1,552,265 ns/iter (+/- 105,467) = 383 MB/s +test sherlock::before_holmes ... bench: 1,360,446 ns/iter (+/- 111,123) = 437 MB/s +test sherlock::everything_greedy ... bench: 6,356,610 ns/iter (+/- 343,163) = 93 MB/s +test sherlock::everything_greedy_nl ... bench: 2,380,946 ns/iter (+/- 36,936) = 249 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,144,439 ns/iter (+/- 25,948) = 519 MB/s +test sherlock::holmes_coword_watson ... bench: 1,503,311 ns/iter (+/- 99,075) = 395 MB/s +test sherlock::ing_suffix ... bench: 3,003,144 ns/iter (+/- 239,408) = 198 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,721,656 ns/iter (+/- 129,795) = 345 MB/s +test sherlock::letters ... bench: 73,833,131 ns/iter (+/- 2,542,107) = 8 MB/s +test sherlock::letters_lower ... bench: 72,250,289 ns/iter (+/- 1,280,826) = 8 MB/s +test sherlock::letters_upper ... bench: 3,397,481 ns/iter (+/- 160,294) = 175 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 3,694,486 ns/iter (+/- 403,679) = 161 MB/s +test sherlock::name_alt1 ... bench: 70,121 ns/iter (+/- 3,926) = 8484 MB/s +test sherlock::name_alt2 ... bench: 1,120,245 ns/iter (+/- 36,040) = 531 MB/s +test sherlock::name_alt3 ... bench: 1,247,630 ns/iter (+/- 127,226) = 476 MB/s +test sherlock::name_alt3_nocase ... bench: 2,894,586 ns/iter (+/- 201,023) = 205 MB/s +test sherlock::name_alt4 ... bench: 1,142,872 ns/iter (+/- 82,896) = 520 MB/s +test sherlock::name_alt4_nocase ... bench: 1,785,266 ns/iter (+/- 166,100) = 333 MB/s +test sherlock::name_alt5 ... bench: 1,167,553 ns/iter (+/- 91,672) = 509 MB/s +test sherlock::name_alt5_nocase ... bench: 2,023,732 ns/iter (+/- 74,558) = 293 MB/s +test sherlock::name_holmes ... bench: 126,480 ns/iter (+/- 6,959) = 4703 MB/s +test sherlock::name_holmes_nocase ... bench: 1,420,548 ns/iter (+/- 75,407) = 418 MB/s +test sherlock::name_sherlock ... bench: 57,090 ns/iter (+/- 1,392) = 10420 MB/s +test sherlock::name_sherlock_holmes ... bench: 57,965 ns/iter (+/- 2,996) = 10263 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,837,721 ns/iter (+/- 66,965) = 323 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,672,451 ns/iter (+/- 204,590) = 355 MB/s +test sherlock::name_whitespace ... bench: 60,342 ns/iter (+/- 3,290) = 9859 MB/s +test sherlock::no_match_common ... bench: 434,496 ns/iter (+/- 35,617) = 1369 MB/s +test sherlock::no_match_really_common ... bench: 431,778 ns/iter (+/- 11,799) = 1377 MB/s +test sherlock::no_match_uncommon ... bench: 19,313 ns/iter (+/- 1,167) = 30804 MB/s +test sherlock::quotes ... bench: 1,301,485 ns/iter (+/- 92,772) = 457 MB/s +test sherlock::the_lower ... bench: 1,846,403 ns/iter (+/- 39,799) = 322 MB/s +test sherlock::the_nocase ... bench: 2,956,115 ns/iter (+/- 136,011) = 201 MB/s +test sherlock::the_upper ... bench: 165,976 ns/iter (+/- 5,838) = 3584 MB/s +test sherlock::the_whitespace ... bench: 1,816,669 ns/iter (+/- 117,437) = 327 MB/s +test sherlock::word_ending_n ... bench: 2,601,847 ns/iter (+/- 166,024) = 228 MB/s +test sherlock::words ... bench: 21,137,049 ns/iter (+/- 750,253) = 28 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 96 measured + diff --git a/vendor/regex/record/old-bench-log/05/re2-vs-rust b/vendor/regex/record/old-bench-log/05/re2-vs-rust new file mode 100644 index 0000000..180e431 --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/re2-vs-rust @@ -0,0 +1,97 @@ + name re2 ns/iter rust ns/iter diff ns/iter diff % + misc::anchored_literal_long_match 102 (3823 MB/s) 24 (16250 MB/s) -78 -76.47% + misc::anchored_literal_long_non_match 20 (19500 MB/s) 27 (14444 MB/s) 7 35.00% + misc::anchored_literal_short_match 95 (273 MB/s) 22 (1181 MB/s) -73 -76.84% + misc::anchored_literal_short_non_match 17 (1529 MB/s) 24 (1083 MB/s) 7 41.18% + misc::easy0_1K 149 (7053 MB/s) 16 (65687 MB/s) -133 -89.26% + misc::easy0_1MB 29,234 (35869 MB/s) 20 (52430150 MB/s) -29,214 -99.93% + misc::easy0_32 126 (468 MB/s) 16 (3687 MB/s) -110 -87.30% + misc::easy0_32K 1,266 (25904 MB/s) 16 (2049687 MB/s) -1,250 -98.74% + misc::easy1_1K 130 (8030 MB/s) 48 (21750 MB/s) -82 -63.08% + misc::easy1_1MB 29,218 (35888 MB/s) 48 (21845750 MB/s) -29,170 -99.84% + misc::easy1_32 112 (464 MB/s) 46 (1130 MB/s) -66 -58.93% + misc::easy1_32K 1,251 (26209 MB/s) 47 (697617 MB/s) -1,204 -96.24% + misc::hard_1K 2,357 (445 MB/s) 58 (18120 MB/s) -2,299 -97.54% + misc::hard_1MB 2,149,909 (487 MB/s) 61 (17190213 MB/s) -2,149,848 -100.00% + misc::hard_32 195 (302 MB/s) 58 (1017 MB/s) -137 -70.26% + misc::hard_32K 105,137 (311 MB/s) 56 (585625 MB/s) -105,081 -99.95% + misc::literal 89 (573 MB/s) 16 (3187 MB/s) -73 -82.02% + misc::long_needle1 170,090 (587 MB/s) 2,226 (44924 MB/s) -167,864 -98.69% + misc::long_needle2 174,341 (573 MB/s) 576,997 (173 MB/s) 402,656 230.96% + misc::match_class 220 (368 MB/s) 65 (1246 MB/s) -155 -70.45% + misc::match_class_in_range 215 (376 MB/s) 27 (3000 MB/s) -188 -87.44% + misc::match_class_unicode 382 (421 MB/s) 283 (568 MB/s) -99 -25.92% + misc::medium_1K 1,939 (542 MB/s) 16 (65750 MB/s) -1,923 -99.17% + misc::medium_1MB 1,775,335 (590 MB/s) 21 (49933523 MB/s) -1,775,314 -100.00% + misc::medium_32 190 (315 MB/s) 17 (3529 MB/s) -173 -91.05% + misc::medium_32K 83,245 (393 MB/s) 17 (1929176 MB/s) -83,228 -99.98% + misc::no_exponential 269 (371 MB/s) 394 (253 MB/s) 125 46.47% + misc::not_literal 167 (305 MB/s) 105 (485 MB/s) -62 -37.13% + misc::one_pass_long_prefix 84 (309 MB/s) 68 (382 MB/s) -16 -19.05% + misc::one_pass_long_prefix_not 137 (189 MB/s) 58 (448 MB/s) -79 -57.66% + misc::one_pass_short 108 (157 MB/s) 45 (377 MB/s) -63 -58.33% + misc::one_pass_short_not 105 (161 MB/s) 50 (340 MB/s) -55 -52.38% + misc::reallyhard2_1K 1,811 (574 MB/s) 83 (12530 MB/s) -1,728 -95.42% + misc::reallyhard_1K 2,324 (452 MB/s) 1,822 (576 MB/s) -502 -21.60% + misc::reallyhard_1MB 2,033,298 (515 MB/s) 1,768,327 (592 MB/s) -264,971 -13.03% + misc::reallyhard_32 185 (318 MB/s) 121 (487 MB/s) -64 -34.59% + misc::reallyhard_32K 83,263 (393 MB/s) 56,375 (581 MB/s) -26,888 -32.29% + misc::reverse_suffix_no_quadratic 13,501 (592 MB/s) 5,803 (1378 MB/s) -7,698 -57.02% + regexdna::find_new_lines 31,464,067 (161 MB/s) 14,818,233 (343 MB/s) -16,645,834 -52.90% + regexdna::subst1 5,257,629 (966 MB/s) 896,790 (5668 MB/s) -4,360,839 -82.94% + regexdna::subst10 5,189,384 (979 MB/s) 957,325 (5310 MB/s) -4,232,059 -81.55% + regexdna::subst11 5,261,936 (966 MB/s) 917,248 (5542 MB/s) -4,344,688 -82.57% + regexdna::subst2 5,268,281 (964 MB/s) 892,129 (5698 MB/s) -4,376,152 -83.07% + regexdna::subst3 5,245,664 (969 MB/s) 929,250 (5470 MB/s) -4,316,414 -82.29% + regexdna::subst4 5,264,833 (965 MB/s) 872,581 (5825 MB/s) -4,392,252 -83.43% + regexdna::subst5 5,181,850 (981 MB/s) 875,804 (5804 MB/s) -4,306,046 -83.10% + regexdna::subst6 5,200,226 (977 MB/s) 884,639 (5746 MB/s) -4,315,587 -82.99% + regexdna::subst7 5,233,678 (971 MB/s) 872,791 (5824 MB/s) -4,360,887 -83.32% + regexdna::subst8 5,242,400 (969 MB/s) 873,833 (5817 MB/s) -4,368,567 -83.33% + regexdna::subst9 5,325,464 (954 MB/s) 886,744 (5732 MB/s) -4,438,720 -83.35% + regexdna::variant1 24,377,246 (208 MB/s) 3,699,267 (1374 MB/s) -20,677,979 -84.82% + regexdna::variant2 26,405,686 (192 MB/s) 6,760,952 (751 MB/s) -19,644,734 -74.40% + regexdna::variant3 25,130,419 (202 MB/s) 8,030,646 (633 MB/s) -17,099,773 -68.04% + regexdna::variant4 32,527,780 (156 MB/s) 8,077,290 (629 MB/s) -24,450,490 -75.17% + regexdna::variant5 31,081,800 (163 MB/s) 6,787,242 (748 MB/s) -24,294,558 -78.16% + regexdna::variant6 28,744,478 (176 MB/s) 6,577,777 (772 MB/s) -22,166,701 -77.12% + regexdna::variant7 26,693,756 (190 MB/s) 6,705,580 (758 MB/s) -19,988,176 -74.88% + regexdna::variant8 21,478,184 (236 MB/s) 6,818,785 (745 MB/s) -14,659,399 -68.25% + regexdna::variant9 18,639,814 (272 MB/s) 6,821,453 (745 MB/s) -11,818,361 -63.40% + sherlock::before_after_holmes 1,552,265 (383 MB/s) 1,029,866 (577 MB/s) -522,399 -33.65% + sherlock::before_holmes 1,360,446 (437 MB/s) 76,633 (7763 MB/s) -1,283,813 -94.37% + sherlock::everything_greedy 6,356,610 (93 MB/s) 2,375,079 (250 MB/s) -3,981,531 -62.64% + sherlock::everything_greedy_nl 2,380,946 (249 MB/s) 916,250 (649 MB/s) -1,464,696 -61.52% + sherlock::holmes_cochar_watson 1,144,439 (519 MB/s) 144,725 (4110 MB/s) -999,714 -87.35% + sherlock::holmes_coword_watson 1,503,311 (395 MB/s) 565,247 (1052 MB/s) -938,064 -62.40% + sherlock::ing_suffix 3,003,144 (198 MB/s) 436,202 (1363 MB/s) -2,566,942 -85.48% + sherlock::ing_suffix_limited_space 1,721,656 (345 MB/s) 1,182,943 (502 MB/s) -538,713 -31.29% + sherlock::letters 73,833,131 (8 MB/s) 24,390,452 (24 MB/s) -49,442,679 -66.97% + sherlock::letters_lower 72,250,289 (8 MB/s) 23,784,108 (25 MB/s) -48,466,181 -67.08% + sherlock::letters_upper 3,397,481 (175 MB/s) 1,993,838 (298 MB/s) -1,403,643 -41.31% + sherlock::line_boundary_sherlock_holmes 3,694,486 (161 MB/s) 999,414 (595 MB/s) -2,695,072 -72.95% + sherlock::name_alt1 70,121 (8484 MB/s) 34,298 (17345 MB/s) -35,823 -51.09% + sherlock::name_alt2 1,120,245 (531 MB/s) 124,226 (4789 MB/s) -996,019 -88.91% + sherlock::name_alt3 1,247,630 (476 MB/s) 137,742 (4319 MB/s) -1,109,888 -88.96% + sherlock::name_alt3_nocase 2,894,586 (205 MB/s) 1,293,763 (459 MB/s) -1,600,823 -55.30% + sherlock::name_alt4 1,142,872 (520 MB/s) 164,900 (3607 MB/s) -977,972 -85.57% + sherlock::name_alt4_nocase 1,785,266 (333 MB/s) 235,023 (2531 MB/s) -1,550,243 -86.84% + sherlock::name_alt5 1,167,553 (509 MB/s) 127,928 (4650 MB/s) -1,039,625 -89.04% + sherlock::name_alt5_nocase 2,023,732 (293 MB/s) 659,591 (901 MB/s) -1,364,141 -67.41% + sherlock::name_holmes 126,480 (4703 MB/s) 40,902 (14545 MB/s) -85,578 -67.66% + sherlock::name_holmes_nocase 1,420,548 (418 MB/s) 198,658 (2994 MB/s) -1,221,890 -86.02% + sherlock::name_sherlock 57,090 (10420 MB/s) 68,924 (8631 MB/s) 11,834 20.73% + sherlock::name_sherlock_holmes 57,965 (10263 MB/s) 31,640 (18803 MB/s) -26,325 -45.42% + sherlock::name_sherlock_holmes_nocase 1,837,721 (323 MB/s) 173,522 (3428 MB/s) -1,664,199 -90.56% + sherlock::name_sherlock_nocase 1,672,451 (355 MB/s) 170,888 (3481 MB/s) -1,501,563 -89.78% + sherlock::name_whitespace 60,342 (9859 MB/s) 84,314 (7056 MB/s) 23,972 39.73% + sherlock::no_match_common 434,496 (1369 MB/s) 20,727 (28703 MB/s) -413,769 -95.23% + sherlock::no_match_really_common 431,778 (1377 MB/s) 381,476 (1559 MB/s) -50,302 -11.65% + sherlock::no_match_uncommon 19,313 (30804 MB/s) 20,786 (28621 MB/s) 1,473 7.63% + sherlock::quotes 1,301,485 (457 MB/s) 531,487 (1119 MB/s) -769,998 -59.16% + sherlock::the_lower 1,846,403 (322 MB/s) 654,110 (909 MB/s) -1,192,293 -64.57% + sherlock::the_nocase 2,956,115 (201 MB/s) 474,456 (1253 MB/s) -2,481,659 -83.95% + sherlock::the_upper 165,976 (3584 MB/s) 43,746 (13599 MB/s) -122,230 -73.64% + sherlock::the_whitespace 1,816,669 (327 MB/s) 1,181,974 (503 MB/s) -634,695 -34.94% + sherlock::word_ending_n 2,601,847 (228 MB/s) 1,925,578 (308 MB/s) -676,269 -25.99% + sherlock::words 21,137,049 (28 MB/s) 9,697,201 (61 MB/s) -11,439,848 -54.12% diff --git a/vendor/regex/record/old-bench-log/05/rust b/vendor/regex/record/old-bench-log/05/rust new file mode 100644 index 0000000..22848cc --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/rust @@ -0,0 +1,103 @@ + +running 98 tests +test misc::anchored_literal_long_match ... bench: 24 ns/iter (+/- 0) = 16250 MB/s +test misc::anchored_literal_long_non_match ... bench: 27 ns/iter (+/- 0) = 14444 MB/s +test misc::anchored_literal_short_match ... bench: 22 ns/iter (+/- 0) = 1181 MB/s +test misc::anchored_literal_short_non_match ... bench: 24 ns/iter (+/- 1) = 1083 MB/s +test misc::easy0_1K ... bench: 16 ns/iter (+/- 0) = 65687 MB/s +test misc::easy0_1MB ... bench: 20 ns/iter (+/- 0) = 52430150 MB/s +test misc::easy0_32 ... bench: 16 ns/iter (+/- 0) = 3687 MB/s +test misc::easy0_32K ... bench: 16 ns/iter (+/- 0) = 2049687 MB/s +test misc::easy1_1K ... bench: 48 ns/iter (+/- 2) = 21750 MB/s +test misc::easy1_1MB ... bench: 48 ns/iter (+/- 2) = 21845750 MB/s +test misc::easy1_32 ... bench: 46 ns/iter (+/- 0) = 1130 MB/s +test misc::easy1_32K ... bench: 47 ns/iter (+/- 0) = 697617 MB/s +test misc::hard_1K ... bench: 58 ns/iter (+/- 0) = 18120 MB/s +test misc::hard_1MB ... bench: 61 ns/iter (+/- 0) = 17190213 MB/s +test misc::hard_32 ... bench: 58 ns/iter (+/- 0) = 1017 MB/s +test misc::hard_32K ... bench: 56 ns/iter (+/- 2) = 585625 MB/s +test misc::literal ... bench: 16 ns/iter (+/- 0) = 3187 MB/s +test misc::long_needle1 ... bench: 2,226 ns/iter (+/- 139) = 44924 MB/s +test misc::long_needle2 ... bench: 576,997 ns/iter (+/- 21,660) = 173 MB/s +test misc::match_class ... bench: 65 ns/iter (+/- 3) = 1246 MB/s +test misc::match_class_in_range ... bench: 27 ns/iter (+/- 0) = 3000 MB/s +test misc::match_class_unicode ... bench: 283 ns/iter (+/- 15) = 568 MB/s +test misc::medium_1K ... bench: 16 ns/iter (+/- 0) = 65750 MB/s +test misc::medium_1MB ... bench: 21 ns/iter (+/- 1) = 49933523 MB/s +test misc::medium_32 ... bench: 17 ns/iter (+/- 0) = 3529 MB/s +test misc::medium_32K ... bench: 17 ns/iter (+/- 0) = 1929176 MB/s +test misc::no_exponential ... bench: 394 ns/iter (+/- 0) = 253 MB/s +test misc::not_literal ... bench: 105 ns/iter (+/- 0) = 485 MB/s +test misc::one_pass_long_prefix ... bench: 68 ns/iter (+/- 0) = 382 MB/s +test misc::one_pass_long_prefix_not ... bench: 58 ns/iter (+/- 3) = 448 MB/s +test misc::one_pass_short ... bench: 45 ns/iter (+/- 2) = 377 MB/s +test misc::one_pass_short_not ... bench: 50 ns/iter (+/- 16) = 340 MB/s +test misc::reallyhard2_1K ... bench: 83 ns/iter (+/- 4) = 12530 MB/s +test misc::reallyhard_1K ... bench: 1,822 ns/iter (+/- 72) = 576 MB/s +test misc::reallyhard_1MB ... bench: 1,768,327 ns/iter (+/- 67,421) = 592 MB/s +test misc::reallyhard_32 ... bench: 121 ns/iter (+/- 4) = 487 MB/s +test misc::reallyhard_32K ... bench: 56,375 ns/iter (+/- 1,404) = 581 MB/s +test misc::replace_all ... bench: 142 ns/iter (+/- 0) +test misc::reverse_suffix_no_quadratic ... bench: 5,803 ns/iter (+/- 6) = 1378 MB/s +test regexdna::find_new_lines ... bench: 14,818,233 ns/iter (+/- 430,454) = 343 MB/s +test regexdna::subst1 ... bench: 896,790 ns/iter (+/- 2,273) = 5668 MB/s +test regexdna::subst10 ... bench: 957,325 ns/iter (+/- 7,490) = 5310 MB/s +test regexdna::subst11 ... bench: 917,248 ns/iter (+/- 12,886) = 5542 MB/s +test regexdna::subst2 ... bench: 892,129 ns/iter (+/- 36,230) = 5698 MB/s +test regexdna::subst3 ... bench: 929,250 ns/iter (+/- 38,312) = 5470 MB/s +test regexdna::subst4 ... bench: 872,581 ns/iter (+/- 27,431) = 5825 MB/s +test regexdna::subst5 ... bench: 875,804 ns/iter (+/- 30,611) = 5804 MB/s +test regexdna::subst6 ... bench: 884,639 ns/iter (+/- 44,927) = 5746 MB/s +test regexdna::subst7 ... bench: 872,791 ns/iter (+/- 31,810) = 5824 MB/s +test regexdna::subst8 ... bench: 873,833 ns/iter (+/- 37,335) = 5817 MB/s +test regexdna::subst9 ... bench: 886,744 ns/iter (+/- 42,880) = 5732 MB/s +test regexdna::variant1 ... bench: 3,699,267 ns/iter (+/- 134,945) = 1374 MB/s +test regexdna::variant2 ... bench: 6,760,952 ns/iter (+/- 228,082) = 751 MB/s +test regexdna::variant3 ... bench: 8,030,646 ns/iter (+/- 271,204) = 633 MB/s +test regexdna::variant4 ... bench: 8,077,290 ns/iter (+/- 266,264) = 629 MB/s +test regexdna::variant5 ... bench: 6,787,242 ns/iter (+/- 226,071) = 748 MB/s +test regexdna::variant6 ... bench: 6,577,777 ns/iter (+/- 226,332) = 772 MB/s +test regexdna::variant7 ... bench: 6,705,580 ns/iter (+/- 232,953) = 758 MB/s +test regexdna::variant8 ... bench: 6,818,785 ns/iter (+/- 241,075) = 745 MB/s +test regexdna::variant9 ... bench: 6,821,453 ns/iter (+/- 257,044) = 745 MB/s +test sherlock::before_after_holmes ... bench: 1,029,866 ns/iter (+/- 42,662) = 577 MB/s +test sherlock::before_holmes ... bench: 76,633 ns/iter (+/- 1,135) = 7763 MB/s +test sherlock::everything_greedy ... bench: 2,375,079 ns/iter (+/- 102,532) = 250 MB/s +test sherlock::everything_greedy_nl ... bench: 916,250 ns/iter (+/- 37,950) = 649 MB/s +test sherlock::holmes_cochar_watson ... bench: 144,725 ns/iter (+/- 8,793) = 4110 MB/s +test sherlock::holmes_coword_watson ... bench: 565,247 ns/iter (+/- 24,056) = 1052 MB/s +test sherlock::ing_suffix ... bench: 436,202 ns/iter (+/- 19,863) = 1363 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,182,943 ns/iter (+/- 38,658) = 502 MB/s +test sherlock::letters ... bench: 24,390,452 ns/iter (+/- 869,008) = 24 MB/s +test sherlock::letters_lower ... bench: 23,784,108 ns/iter (+/- 796,195) = 25 MB/s +test sherlock::letters_upper ... bench: 1,993,838 ns/iter (+/- 77,697) = 298 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 999,414 ns/iter (+/- 31,202) = 595 MB/s +test sherlock::name_alt1 ... bench: 34,298 ns/iter (+/- 1,091) = 17345 MB/s +test sherlock::name_alt2 ... bench: 124,226 ns/iter (+/- 5,579) = 4789 MB/s +test sherlock::name_alt3 ... bench: 137,742 ns/iter (+/- 6,496) = 4319 MB/s +test sherlock::name_alt3_nocase ... bench: 1,293,763 ns/iter (+/- 51,097) = 459 MB/s +test sherlock::name_alt4 ... bench: 164,900 ns/iter (+/- 10,023) = 3607 MB/s +test sherlock::name_alt4_nocase ... bench: 235,023 ns/iter (+/- 14,465) = 2531 MB/s +test sherlock::name_alt5 ... bench: 127,928 ns/iter (+/- 6,882) = 4650 MB/s +test sherlock::name_alt5_nocase ... bench: 659,591 ns/iter (+/- 20,587) = 901 MB/s +test sherlock::name_holmes ... bench: 40,902 ns/iter (+/- 402) = 14545 MB/s +test sherlock::name_holmes_nocase ... bench: 198,658 ns/iter (+/- 3,782) = 2994 MB/s +test sherlock::name_sherlock ... bench: 68,924 ns/iter (+/- 1,456) = 8631 MB/s +test sherlock::name_sherlock_holmes ... bench: 31,640 ns/iter (+/- 383) = 18803 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 173,522 ns/iter (+/- 7,812) = 3428 MB/s +test sherlock::name_sherlock_nocase ... bench: 170,888 ns/iter (+/- 612) = 3481 MB/s +test sherlock::name_whitespace ... bench: 84,314 ns/iter (+/- 508) = 7056 MB/s +test sherlock::no_match_common ... bench: 20,727 ns/iter (+/- 565) = 28703 MB/s +test sherlock::no_match_really_common ... bench: 381,476 ns/iter (+/- 2,338) = 1559 MB/s +test sherlock::no_match_uncommon ... bench: 20,786 ns/iter (+/- 717) = 28621 MB/s +test sherlock::quotes ... bench: 531,487 ns/iter (+/- 5,517) = 1119 MB/s +test sherlock::repeated_class_negation ... bench: 85,881,944 ns/iter (+/- 4,906,514) = 6 MB/s +test sherlock::the_lower ... bench: 654,110 ns/iter (+/- 34,542) = 909 MB/s +test sherlock::the_nocase ... bench: 474,456 ns/iter (+/- 16,549) = 1253 MB/s +test sherlock::the_upper ... bench: 43,746 ns/iter (+/- 579) = 13599 MB/s +test sherlock::the_whitespace ... bench: 1,181,974 ns/iter (+/- 3,005) = 503 MB/s +test sherlock::word_ending_n ... bench: 1,925,578 ns/iter (+/- 3,811) = 308 MB/s +test sherlock::words ... bench: 9,697,201 ns/iter (+/- 156,772) = 61 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 98 measured + diff --git a/vendor/regex/record/old-bench-log/05/tcl b/vendor/regex/record/old-bench-log/05/tcl new file mode 100644 index 0000000..3e1778b --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/tcl @@ -0,0 +1,94 @@ + +running 89 tests +test misc::anchored_literal_long_match ... bench: 662 ns/iter (+/- 12) = 589 MB/s +test misc::anchored_literal_long_non_match ... bench: 133 ns/iter (+/- 1) = 2932 MB/s +test misc::anchored_literal_short_match ... bench: 616 ns/iter (+/- 18) = 42 MB/s +test misc::anchored_literal_short_non_match ... bench: 122 ns/iter (+/- 1) = 213 MB/s +test misc::easy0_1K ... bench: 11,816 ns/iter (+/- 92) = 88 MB/s +test misc::easy0_1MB ... bench: 3,409,439 ns/iter (+/- 94,972) = 307 MB/s +test misc::easy0_32 ... bench: 8,785 ns/iter (+/- 183) = 6 MB/s +test misc::easy0_32K ... bench: 115,371 ns/iter (+/- 2,279) = 284 MB/s +test misc::easy1_1K ... bench: 7,038 ns/iter (+/- 145) = 148 MB/s +test misc::easy1_1MB ... bench: 3,396,028 ns/iter (+/- 100,173) = 308 MB/s +test misc::easy1_32 ... bench: 3,687 ns/iter (+/- 44) = 14 MB/s +test misc::easy1_32K ... bench: 109,689 ns/iter (+/- 3,757) = 298 MB/s +test misc::hard_1K ... bench: 14,836 ns/iter (+/- 518) = 70 MB/s +test misc::hard_1MB ... bench: 3,376,015 ns/iter (+/- 95,045) = 310 MB/s +test misc::hard_32 ... bench: 11,278 ns/iter (+/- 389) = 5 MB/s +test misc::hard_32K ... bench: 115,400 ns/iter (+/- 4,738) = 284 MB/s +test misc::literal ... bench: 511 ns/iter (+/- 11) = 99 MB/s +test misc::long_needle1 ... bench: 18,076,901 ns/iter (+/- 523,761) = 5 MB/s +test misc::long_needle2 ... bench: 18,497,725 ns/iter (+/- 465,516) = 5 MB/s +test misc::match_class ... bench: 620 ns/iter (+/- 23) = 130 MB/s +test misc::match_class_in_range ... bench: 605 ns/iter (+/- 26) = 133 MB/s +test misc::medium_1K ... bench: 12,355 ns/iter (+/- 390) = 85 MB/s +test misc::medium_1MB ... bench: 3,410,978 ns/iter (+/- 112,021) = 307 MB/s +test misc::medium_32 ... bench: 9,086 ns/iter (+/- 287) = 6 MB/s +test misc::medium_32K ... bench: 116,944 ns/iter (+/- 5,654) = 280 MB/s +test misc::no_exponential ... bench: 2,379,518 ns/iter (+/- 92,628) +test misc::not_literal ... bench: 1,979 ns/iter (+/- 116) = 25 MB/s +test misc::one_pass_long_prefix ... bench: 6,932 ns/iter (+/- 464) = 3 MB/s +test misc::one_pass_long_prefix_not ... bench: 6,242 ns/iter (+/- 384) = 4 MB/s +test misc::one_pass_short ... bench: 630 ns/iter (+/- 42) = 26 MB/s +test misc::one_pass_short_not ... bench: 718 ns/iter (+/- 64) = 23 MB/s +test misc::reallyhard2_1K ... bench: 108,421 ns/iter (+/- 6,489) = 9 MB/s +test misc::reallyhard_1K ... bench: 14,330 ns/iter (+/- 814) = 73 MB/s +test misc::reallyhard_1MB ... bench: 3,287,965 ns/iter (+/- 203,546) = 318 MB/s +test misc::reallyhard_32 ... bench: 11,193 ns/iter (+/- 683) = 5 MB/s +test misc::reallyhard_32K ... bench: 112,731 ns/iter (+/- 5,966) = 290 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 26,907 ns/iter (+/- 2,396) = 297 MB/s +test regexdna::find_new_lines ... bench: 48,223,361 ns/iter (+/- 2,855,654) = 105 MB/s +test regexdna::subst1 ... bench: 27,177,359 ns/iter (+/- 1,359,987) = 187 MB/s +test regexdna::subst10 ... bench: 26,722,144 ns/iter (+/- 1,090,216) = 190 MB/s +test regexdna::subst11 ... bench: 27,382,875 ns/iter (+/- 1,656,754) = 185 MB/s +test regexdna::subst2 ... bench: 26,957,766 ns/iter (+/- 1,433,630) = 188 MB/s +test regexdna::subst3 ... bench: 27,195,925 ns/iter (+/- 1,828,460) = 186 MB/s +test regexdna::subst4 ... bench: 26,342,249 ns/iter (+/- 1,949,172) = 192 MB/s +test regexdna::subst5 ... bench: 26,543,675 ns/iter (+/- 2,143,336) = 191 MB/s +test regexdna::subst6 ... bench: 26,185,452 ns/iter (+/- 2,199,220) = 194 MB/s +test regexdna::subst7 ... bench: 26,338,573 ns/iter (+/- 2,124,778) = 193 MB/s +test regexdna::subst8 ... bench: 26,468,652 ns/iter (+/- 1,923,567) = 192 MB/s +test regexdna::subst9 ... bench: 26,487,784 ns/iter (+/- 1,250,319) = 191 MB/s +test regexdna::variant1 ... bench: 16,325,983 ns/iter (+/- 491,000) = 311 MB/s +test regexdna::variant2 ... bench: 16,845,952 ns/iter (+/- 470,062) = 301 MB/s +test regexdna::variant3 ... bench: 19,258,030 ns/iter (+/- 525,045) = 263 MB/s +test regexdna::variant4 ... bench: 18,018,713 ns/iter (+/- 1,235,670) = 282 MB/s +test regexdna::variant5 ... bench: 19,583,528 ns/iter (+/- 1,756,762) = 259 MB/s +test regexdna::variant6 ... bench: 17,630,308 ns/iter (+/- 973,191) = 288 MB/s +test regexdna::variant7 ... bench: 17,121,666 ns/iter (+/- 1,274,478) = 296 MB/s +test regexdna::variant8 ... bench: 17,154,863 ns/iter (+/- 425,504) = 296 MB/s +test regexdna::variant9 ... bench: 17,930,482 ns/iter (+/- 587,712) = 283 MB/s +test sherlock::before_after_holmes ... bench: 2,600,503 ns/iter (+/- 383,440) = 228 MB/s +test sherlock::before_holmes ... bench: 3,145,648 ns/iter (+/- 37,316) = 189 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,668,355 ns/iter (+/- 193,724) = 222 MB/s +test sherlock::ing_suffix ... bench: 5,638,296 ns/iter (+/- 69,345) = 105 MB/s +test sherlock::ing_suffix_limited_space ... bench: 22,466,946 ns/iter (+/- 659,956) = 26 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,251,996 ns/iter (+/- 66,639) = 264 MB/s +test sherlock::name_alt1 ... bench: 2,276,056 ns/iter (+/- 64,088) = 261 MB/s +test sherlock::name_alt2 ... bench: 3,196,348 ns/iter (+/- 202,979) = 186 MB/s +test sherlock::name_alt3 ... bench: 5,260,374 ns/iter (+/- 426,028) = 113 MB/s +test sherlock::name_alt3_nocase ... bench: 8,529,394 ns/iter (+/- 558,731) = 69 MB/s +test sherlock::name_alt4 ... bench: 2,787,972 ns/iter (+/- 153,839) = 213 MB/s +test sherlock::name_alt4_nocase ... bench: 3,370,452 ns/iter (+/- 140,385) = 176 MB/s +test sherlock::name_alt5 ... bench: 3,795,793 ns/iter (+/- 182,240) = 156 MB/s +test sherlock::name_alt5_nocase ... bench: 4,691,422 ns/iter (+/- 161,515) = 126 MB/s +test sherlock::name_holmes ... bench: 2,513,139 ns/iter (+/- 72,157) = 236 MB/s +test sherlock::name_holmes_nocase ... bench: 2,636,441 ns/iter (+/- 78,402) = 225 MB/s +test sherlock::name_sherlock ... bench: 2,015,753 ns/iter (+/- 104,000) = 295 MB/s +test sherlock::name_sherlock_holmes ... bench: 2,180,684 ns/iter (+/- 162,201) = 272 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 2,306,664 ns/iter (+/- 165,960) = 257 MB/s +test sherlock::name_sherlock_nocase ... bench: 2,065,630 ns/iter (+/- 155,223) = 288 MB/s +test sherlock::name_whitespace ... bench: 2,266,188 ns/iter (+/- 173,380) = 262 MB/s +test sherlock::no_match_common ... bench: 1,881,887 ns/iter (+/- 123,883) = 316 MB/s +test sherlock::no_match_really_common ... bench: 1,804,352 ns/iter (+/- 33,396) = 329 MB/s +test sherlock::no_match_uncommon ... bench: 1,809,300 ns/iter (+/- 123,888) = 328 MB/s +test sherlock::quotes ... bench: 9,682,507 ns/iter (+/- 1,200,909) = 61 MB/s +test sherlock::repeated_class_negation ... bench: 68,600,251 ns/iter (+/- 2,043,582) = 8 MB/s +test sherlock::the_lower ... bench: 6,849,558 ns/iter (+/- 517,709) = 86 MB/s +test sherlock::the_nocase ... bench: 7,354,742 ns/iter (+/- 390,834) = 80 MB/s +test sherlock::the_upper ... bench: 2,442,364 ns/iter (+/- 174,452) = 243 MB/s +test sherlock::the_whitespace ... bench: 9,210,338 ns/iter (+/- 651,675) = 64 MB/s +test sherlock::words ... bench: 47,863,652 ns/iter (+/- 3,536,998) = 12 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 89 measured + diff --git a/vendor/regex/record/old-bench-log/05/tcl-vs-rust b/vendor/regex/record/old-bench-log/05/tcl-vs-rust new file mode 100644 index 0000000..0faefe9 --- /dev/null +++ b/vendor/regex/record/old-bench-log/05/tcl-vs-rust @@ -0,0 +1,90 @@ + name tcl ns/iter rust ns/iter diff ns/iter diff % + misc::anchored_literal_long_match 662 (589 MB/s) 24 (16250 MB/s) -638 -96.37% + misc::anchored_literal_long_non_match 133 (2932 MB/s) 27 (14444 MB/s) -106 -79.70% + misc::anchored_literal_short_match 616 (42 MB/s) 22 (1181 MB/s) -594 -96.43% + misc::anchored_literal_short_non_match 122 (213 MB/s) 24 (1083 MB/s) -98 -80.33% + misc::easy0_1K 11,816 (88 MB/s) 16 (65687 MB/s) -11,800 -99.86% + misc::easy0_1MB 3,409,439 (307 MB/s) 20 (52430150 MB/s) -3,409,419 -100.00% + misc::easy0_32 8,785 (6 MB/s) 16 (3687 MB/s) -8,769 -99.82% + misc::easy0_32K 115,371 (284 MB/s) 16 (2049687 MB/s) -115,355 -99.99% + misc::easy1_1K 7,038 (148 MB/s) 48 (21750 MB/s) -6,990 -99.32% + misc::easy1_1MB 3,396,028 (308 MB/s) 48 (21845750 MB/s) -3,395,980 -100.00% + misc::easy1_32 3,687 (14 MB/s) 46 (1130 MB/s) -3,641 -98.75% + misc::easy1_32K 109,689 (298 MB/s) 47 (697617 MB/s) -109,642 -99.96% + misc::hard_1K 14,836 (70 MB/s) 58 (18120 MB/s) -14,778 -99.61% + misc::hard_1MB 3,376,015 (310 MB/s) 61 (17190213 MB/s) -3,375,954 -100.00% + misc::hard_32 11,278 (5 MB/s) 58 (1017 MB/s) -11,220 -99.49% + misc::hard_32K 115,400 (284 MB/s) 56 (585625 MB/s) -115,344 -99.95% + misc::literal 511 (99 MB/s) 16 (3187 MB/s) -495 -96.87% + misc::long_needle1 18,076,901 (5 MB/s) 2,226 (44924 MB/s) -18,074,675 -99.99% + misc::long_needle2 18,497,725 (5 MB/s) 576,997 (173 MB/s) -17,920,728 -96.88% + misc::match_class 620 (130 MB/s) 65 (1246 MB/s) -555 -89.52% + misc::match_class_in_range 605 (133 MB/s) 27 (3000 MB/s) -578 -95.54% + misc::medium_1K 12,355 (85 MB/s) 16 (65750 MB/s) -12,339 -99.87% + misc::medium_1MB 3,410,978 (307 MB/s) 21 (49933523 MB/s) -3,410,957 -100.00% + misc::medium_32 9,086 (6 MB/s) 17 (3529 MB/s) -9,069 -99.81% + misc::medium_32K 116,944 (280 MB/s) 17 (1929176 MB/s) -116,927 -99.99% + misc::no_exponential 2,379,518 394 (253 MB/s) -2,379,124 -99.98% + misc::not_literal 1,979 (25 MB/s) 105 (485 MB/s) -1,874 -94.69% + misc::one_pass_long_prefix 6,932 (3 MB/s) 68 (382 MB/s) -6,864 -99.02% + misc::one_pass_long_prefix_not 6,242 (4 MB/s) 58 (448 MB/s) -6,184 -99.07% + misc::one_pass_short 630 (26 MB/s) 45 (377 MB/s) -585 -92.86% + misc::one_pass_short_not 718 (23 MB/s) 50 (340 MB/s) -668 -93.04% + misc::reallyhard2_1K 108,421 (9 MB/s) 83 (12530 MB/s) -108,338 -99.92% + misc::reallyhard_1K 14,330 (73 MB/s) 1,822 (576 MB/s) -12,508 -87.29% + misc::reallyhard_1MB 3,287,965 (318 MB/s) 1,768,327 (592 MB/s) -1,519,638 -46.22% + misc::reallyhard_32 11,193 (5 MB/s) 121 (487 MB/s) -11,072 -98.92% + misc::reallyhard_32K 112,731 (290 MB/s) 56,375 (581 MB/s) -56,356 -49.99% + misc::reverse_suffix_no_quadratic 26,907 (297 MB/s) 5,803 (1378 MB/s) -21,104 -78.43% + regexdna::find_new_lines 48,223,361 (105 MB/s) 14,818,233 (343 MB/s) -33,405,128 -69.27% + regexdna::subst1 27,177,359 (187 MB/s) 896,790 (5668 MB/s) -26,280,569 -96.70% + regexdna::subst10 26,722,144 (190 MB/s) 957,325 (5310 MB/s) -25,764,819 -96.42% + regexdna::subst11 27,382,875 (185 MB/s) 917,248 (5542 MB/s) -26,465,627 -96.65% + regexdna::subst2 26,957,766 (188 MB/s) 892,129 (5698 MB/s) -26,065,637 -96.69% + regexdna::subst3 27,195,925 (186 MB/s) 929,250 (5470 MB/s) -26,266,675 -96.58% + regexdna::subst4 26,342,249 (192 MB/s) 872,581 (5825 MB/s) -25,469,668 -96.69% + regexdna::subst5 26,543,675 (191 MB/s) 875,804 (5804 MB/s) -25,667,871 -96.70% + regexdna::subst6 26,185,452 (194 MB/s) 884,639 (5746 MB/s) -25,300,813 -96.62% + regexdna::subst7 26,338,573 (193 MB/s) 872,791 (5824 MB/s) -25,465,782 -96.69% + regexdna::subst8 26,468,652 (192 MB/s) 873,833 (5817 MB/s) -25,594,819 -96.70% + regexdna::subst9 26,487,784 (191 MB/s) 886,744 (5732 MB/s) -25,601,040 -96.65% + regexdna::variant1 16,325,983 (311 MB/s) 3,699,267 (1374 MB/s) -12,626,716 -77.34% + regexdna::variant2 16,845,952 (301 MB/s) 6,760,952 (751 MB/s) -10,085,000 -59.87% + regexdna::variant3 19,258,030 (263 MB/s) 8,030,646 (633 MB/s) -11,227,384 -58.30% + regexdna::variant4 18,018,713 (282 MB/s) 8,077,290 (629 MB/s) -9,941,423 -55.17% + regexdna::variant5 19,583,528 (259 MB/s) 6,787,242 (748 MB/s) -12,796,286 -65.34% + regexdna::variant6 17,630,308 (288 MB/s) 6,577,777 (772 MB/s) -11,052,531 -62.69% + regexdna::variant7 17,121,666 (296 MB/s) 6,705,580 (758 MB/s) -10,416,086 -60.84% + regexdna::variant8 17,154,863 (296 MB/s) 6,818,785 (745 MB/s) -10,336,078 -60.25% + regexdna::variant9 17,930,482 (283 MB/s) 6,821,453 (745 MB/s) -11,109,029 -61.96% + sherlock::before_after_holmes 2,600,503 (228 MB/s) 1,029,866 (577 MB/s) -1,570,637 -60.40% + sherlock::before_holmes 3,145,648 (189 MB/s) 76,633 (7763 MB/s) -3,069,015 -97.56% + sherlock::holmes_cochar_watson 2,668,355 (222 MB/s) 144,725 (4110 MB/s) -2,523,630 -94.58% + sherlock::ing_suffix 5,638,296 (105 MB/s) 436,202 (1363 MB/s) -5,202,094 -92.26% + sherlock::ing_suffix_limited_space 22,466,946 (26 MB/s) 1,182,943 (502 MB/s) -21,284,003 -94.73% + sherlock::line_boundary_sherlock_holmes 2,251,996 (264 MB/s) 999,414 (595 MB/s) -1,252,582 -55.62% + sherlock::name_alt1 2,276,056 (261 MB/s) 34,298 (17345 MB/s) -2,241,758 -98.49% + sherlock::name_alt2 3,196,348 (186 MB/s) 124,226 (4789 MB/s) -3,072,122 -96.11% + sherlock::name_alt3 5,260,374 (113 MB/s) 137,742 (4319 MB/s) -5,122,632 -97.38% + sherlock::name_alt3_nocase 8,529,394 (69 MB/s) 1,293,763 (459 MB/s) -7,235,631 -84.83% + sherlock::name_alt4 2,787,972 (213 MB/s) 164,900 (3607 MB/s) -2,623,072 -94.09% + sherlock::name_alt4_nocase 3,370,452 (176 MB/s) 235,023 (2531 MB/s) -3,135,429 -93.03% + sherlock::name_alt5 3,795,793 (156 MB/s) 127,928 (4650 MB/s) -3,667,865 -96.63% + sherlock::name_alt5_nocase 4,691,422 (126 MB/s) 659,591 (901 MB/s) -4,031,831 -85.94% + sherlock::name_holmes 2,513,139 (236 MB/s) 40,902 (14545 MB/s) -2,472,237 -98.37% + sherlock::name_holmes_nocase 2,636,441 (225 MB/s) 198,658 (2994 MB/s) -2,437,783 -92.46% + sherlock::name_sherlock 2,015,753 (295 MB/s) 68,924 (8631 MB/s) -1,946,829 -96.58% + sherlock::name_sherlock_holmes 2,180,684 (272 MB/s) 31,640 (18803 MB/s) -2,149,044 -98.55% + sherlock::name_sherlock_holmes_nocase 2,306,664 (257 MB/s) 173,522 (3428 MB/s) -2,133,142 -92.48% + sherlock::name_sherlock_nocase 2,065,630 (288 MB/s) 170,888 (3481 MB/s) -1,894,742 -91.73% + sherlock::name_whitespace 2,266,188 (262 MB/s) 84,314 (7056 MB/s) -2,181,874 -96.28% + sherlock::no_match_common 1,881,887 (316 MB/s) 20,727 (28703 MB/s) -1,861,160 -98.90% + sherlock::no_match_really_common 1,804,352 (329 MB/s) 381,476 (1559 MB/s) -1,422,876 -78.86% + sherlock::no_match_uncommon 1,809,300 (328 MB/s) 20,786 (28621 MB/s) -1,788,514 -98.85% + sherlock::quotes 9,682,507 (61 MB/s) 531,487 (1119 MB/s) -9,151,020 -94.51% + sherlock::repeated_class_negation 68,600,251 (8 MB/s) 85,881,944 (6 MB/s) 17,281,693 25.19% + sherlock::the_lower 6,849,558 (86 MB/s) 654,110 (909 MB/s) -6,195,448 -90.45% + sherlock::the_nocase 7,354,742 (80 MB/s) 474,456 (1253 MB/s) -6,880,286 -93.55% + sherlock::the_upper 2,442,364 (243 MB/s) 43,746 (13599 MB/s) -2,398,618 -98.21% + sherlock::the_whitespace 9,210,338 (64 MB/s) 1,181,974 (503 MB/s) -8,028,364 -87.17% + sherlock::words 47,863,652 (12 MB/s) 9,697,201 (61 MB/s) -38,166,451 -79.74% diff --git a/vendor/regex/record/old-bench-log/06/dphobos-dmd b/vendor/regex/record/old-bench-log/06/dphobos-dmd new file mode 100644 index 0000000..bffdd29 --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/dphobos-dmd @@ -0,0 +1,98 @@ +running 95 tests +test misc::anchored_literal_long_match ... bench: 356 ns/iter (+/- 4) = 1095 MB/s +test misc::anchored_literal_long_non_match ... bench: 280 ns/iter (+/- 18) = 1392 MB/s +test misc::anchored_literal_short_match ... bench: 351 ns/iter (+/- 16) = 74 MB/s +test misc::anchored_literal_short_non_match ... bench: 274 ns/iter (+/- 17) = 94 MB/s +test misc::easy0_1K ... bench: 810 ns/iter (+/- 38) = 1297 MB/s +test misc::easy0_1MB ... bench: 25,296 ns/iter (+/- 3,592) = 41453 MB/s +test misc::easy0_32 ... bench: 745 ns/iter (+/- 60) = 79 MB/s +test misc::easy0_32K ... bench: 1,111 ns/iter (+/- 82) = 29518 MB/s +test misc::easy1_1K ... bench: 730 ns/iter (+/- 20) = 1430 MB/s +test misc::easy1_1MB ... bench: 25,442 ns/iter (+/- 2,076) = 41215 MB/s +test misc::easy1_32 ... bench: 730 ns/iter (+/- 79) = 71 MB/s +test misc::easy1_32K ... bench: 1,104 ns/iter (+/- 93) = 29699 MB/s +test misc::hard_1K ... bench: 18,238 ns/iter (+/- 1,173) = 57 MB/s +test misc::hard_1MB ... bench: 19,302,344 ns/iter (+/- 2,039,538) = 54 MB/s +test misc::hard_32 ... bench: 2,508 ns/iter (+/- 119) = 23 MB/s +test misc::hard_32K ... bench: 666,948 ns/iter (+/- 58,067) = 49 MB/s +test misc::literal ... bench: 196 ns/iter (+/- 17) = 260 MB/s +test misc::long_needle1 ... bench: 82,532 ns/iter (+/- 4,618) = 1211 MB/s +test misc::long_needle2 ... bench: 84,079 ns/iter (+/- 5,930) = 1189 MB/s +test misc::match_class ... bench: 300 ns/iter (+/- 41) = 270 MB/s +test misc::match_class_in_range ... bench: 258 ns/iter (+/- 16) = 313 MB/s +test misc::match_class_unicode ... bench: 1,563 ns/iter (+/- 171) = 103 MB/s +test misc::medium_1K ... bench: 1,541 ns/iter (+/- 127) = 682 MB/s +test misc::medium_1MB ... bench: 617,650 ns/iter (+/- 59,618) = 1697 MB/s +test misc::medium_32 ... bench: 985 ns/iter (+/- 62) = 60 MB/s +test misc::medium_32K ... bench: 19,948 ns/iter (+/- 1,388) = 1644 MB/s +test misc::no_exponential ... bench: 430,777 ns/iter (+/- 52,435) +test misc::not_literal ... bench: 1,202 ns/iter (+/- 60) = 42 MB/s +test misc::one_pass_long_prefix ... bench: 630 ns/iter (+/- 45) = 41 MB/s +test misc::one_pass_long_prefix_not ... bench: 617 ns/iter (+/- 60) = 42 MB/s +test misc::one_pass_short ... bench: 1,102 ns/iter (+/- 38) = 15 MB/s +test misc::one_pass_short_not ... bench: 1,481 ns/iter (+/- 44) = 11 MB/s +test misc::reallyhard2_1K ... bench: 40,749 ns/iter (+/- 2,027) = 25 MB/s +test misc::reallyhard_1K ... bench: 18,987 ns/iter (+/- 1,419) = 55 MB/s +test misc::reallyhard_1MB ... bench: 19,923,786 ns/iter (+/- 1,499,750) = 52 MB/s +test misc::reallyhard_32 ... bench: 2,369 ns/iter (+/- 115) = 24 MB/s +test misc::reallyhard_32K ... bench: 627,664 ns/iter (+/- 30,507) = 52 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,933 ns/iter (+/- 700) = 1621 MB/s +test regexdna::find_new_lines ... bench: 28,886,666 ns/iter (+/- 1,969,669) = 175 MB/s +test regexdna::subst1 ... bench: 6,722,884 ns/iter (+/- 431,722) = 756 MB/s +test regexdna::subst10 ... bench: 6,923,833 ns/iter (+/- 677,840) = 734 MB/s +test regexdna::subst11 ... bench: 6,917,738 ns/iter (+/- 306,829) = 734 MB/s +test regexdna::subst2 ... bench: 6,914,593 ns/iter (+/- 625,342) = 735 MB/s +test regexdna::subst3 ... bench: 6,582,793 ns/iter (+/- 297,052) = 772 MB/s +test regexdna::subst4 ... bench: 6,528,804 ns/iter (+/- 463,331) = 778 MB/s +test regexdna::subst5 ... bench: 6,886,457 ns/iter (+/- 1,015,943) = 738 MB/s +test regexdna::subst6 ... bench: 6,789,493 ns/iter (+/- 573,137) = 748 MB/s +test regexdna::subst7 ... bench: 6,533,609 ns/iter (+/- 372,293) = 778 MB/s +test regexdna::subst8 ... bench: 6,536,845 ns/iter (+/- 290,249) = 777 MB/s +test regexdna::subst9 ... bench: 6,509,834 ns/iter (+/- 402,426) = 780 MB/s +test regexdna::variant1 ... bench: 5,746,639 ns/iter (+/- 205,103) = 884 MB/s +test regexdna::variant2 ... bench: 7,661,372 ns/iter (+/- 145,811) = 663 MB/s +test regexdna::variant3 ... bench: 12,801,668 ns/iter (+/- 337,572) = 397 MB/s +test regexdna::variant4 ... bench: 11,109,679 ns/iter (+/- 357,680) = 457 MB/s +test regexdna::variant5 ... bench: 11,238,093 ns/iter (+/- 1,571,929) = 452 MB/s +test regexdna::variant6 ... bench: 8,453,224 ns/iter (+/- 185,044) = 601 MB/s +test regexdna::variant7 ... bench: 8,784,446 ns/iter (+/- 153,626) = 578 MB/s +test regexdna::variant8 ... bench: 11,151,797 ns/iter (+/- 366,593) = 455 MB/s +test regexdna::variant9 ... bench: 22,206,248 ns/iter (+/- 1,143,965) = 228 MB/s +test sherlock::before_after_holmes ... bench: 23,458,512 ns/iter (+/- 1,982,069) = 25 MB/s +test sherlock::before_holmes ... bench: 23,040,796 ns/iter (+/- 688,881) = 25 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,035,156 ns/iter (+/- 113,881) = 574 MB/s +test sherlock::holmes_coword_watson ... bench: 118,126,447 ns/iter (+/- 8,394,250) = 5 MB/s +test sherlock::ing_suffix ... bench: 16,122,434 ns/iter (+/- 236,636) = 36 MB/s +test sherlock::ing_suffix_limited_space ... bench: 22,239,435 ns/iter (+/- 364,604) = 26 MB/s +test sherlock::letters ... bench: 92,002,273 ns/iter (+/- 2,056,908) = 6 MB/s +test sherlock::letters_lower ... bench: 90,778,580 ns/iter (+/- 4,179,255) = 6 MB/s +test sherlock::letters_upper ... bench: 3,392,415 ns/iter (+/- 143,338) = 175 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 428,636 ns/iter (+/- 14,993) = 1387 MB/s +test sherlock::name_alt1 ... bench: 432,574 ns/iter (+/- 13,731) = 1375 MB/s +test sherlock::name_alt2 ... bench: 644,165 ns/iter (+/- 15,049) = 923 MB/s +test sherlock::name_alt3 ... bench: 1,176,979 ns/iter (+/- 105,694) = 505 MB/s +test sherlock::name_alt3_nocase ... bench: 2,054,990 ns/iter (+/- 91,909) = 289 MB/s +test sherlock::name_alt4 ... bench: 712,039 ns/iter (+/- 36,911) = 835 MB/s +test sherlock::name_alt4_nocase ... bench: 993,415 ns/iter (+/- 27,355) = 598 MB/s +test sherlock::name_alt5 ... bench: 757,045 ns/iter (+/- 29,126) = 785 MB/s +test sherlock::name_alt5_nocase ... bench: 953,821 ns/iter (+/- 37,252) = 623 MB/s +test sherlock::name_holmes ... bench: 186,801 ns/iter (+/- 6,676) = 3184 MB/s +test sherlock::name_holmes_nocase ... bench: 539,857 ns/iter (+/- 40,614) = 1102 MB/s +test sherlock::name_sherlock ... bench: 56,113 ns/iter (+/- 4,566) = 10602 MB/s +test sherlock::name_sherlock_holmes ... bench: 67,558 ns/iter (+/- 6,746) = 8806 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 429,123 ns/iter (+/- 51,647) = 1386 MB/s +test sherlock::name_sherlock_nocase ... bench: 396,070 ns/iter (+/- 33,934) = 1502 MB/s +test sherlock::name_whitespace ... bench: 84,630 ns/iter (+/- 6,285) = 7029 MB/s +test sherlock::no_match_common ... bench: 292,844 ns/iter (+/- 24,013) = 2031 MB/s +test sherlock::no_match_really_common ... bench: 290,986 ns/iter (+/- 10,163) = 2044 MB/s +test sherlock::no_match_uncommon ... bench: 14,041 ns/iter (+/- 599) = 42371 MB/s +test sherlock::quotes ... bench: 6,489,945 ns/iter (+/- 132,983) = 91 MB/s +test sherlock::repeated_class_negation ... bench: 49,479,000 ns/iter (+/- 965,144) = 12 MB/s +test sherlock::the_lower ... bench: 2,268,881 ns/iter (+/- 134,889) = 262 MB/s +test sherlock::the_nocase ... bench: 2,906,824 ns/iter (+/- 72,615) = 204 MB/s +test sherlock::the_upper ... bench: 211,138 ns/iter (+/- 9,935) = 2817 MB/s +test sherlock::the_whitespace ... bench: 3,488,249 ns/iter (+/- 254,294) = 170 MB/s +test sherlock::word_ending_n ... bench: 30,917,395 ns/iter (+/- 2,298,620) = 19 MB/s +test sherlock::words ... bench: 39,830,572 ns/iter (+/- 2,662,348) = 14 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 95 measured; 0 filtered out diff --git a/vendor/regex/record/old-bench-log/06/dphobos-dmd-ct b/vendor/regex/record/old-bench-log/06/dphobos-dmd-ct new file mode 100644 index 0000000..426fa6c --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/dphobos-dmd-ct @@ -0,0 +1,99 @@ + +running 94 tests +test misc::anchored_literal_long_match ... bench: 336 ns/iter (+/- 145) = 1160 MB/s +test misc::anchored_literal_long_non_match ... bench: 246 ns/iter (+/- 27) = 1585 MB/s +test misc::anchored_literal_short_match ... bench: 313 ns/iter (+/- 32) = 83 MB/s +test misc::anchored_literal_short_non_match ... bench: 248 ns/iter (+/- 31) = 104 MB/s +test misc::easy0_1K ... bench: 792 ns/iter (+/- 109) = 1327 MB/s +test misc::easy0_1MB ... bench: 24,706 ns/iter (+/- 812) = 42443 MB/s +test misc::easy0_32 ... bench: 793 ns/iter (+/- 77) = 74 MB/s +test misc::easy0_32K ... bench: 1,179 ns/iter (+/- 55) = 27815 MB/s +test misc::easy1_1K ... bench: 720 ns/iter (+/- 85) = 1450 MB/s +test misc::easy1_1MB ... bench: 24,647 ns/iter (+/- 761) = 42544 MB/s +test misc::easy1_32 ... bench: 717 ns/iter (+/- 28) = 72 MB/s +test misc::easy1_32K ... bench: 1,140 ns/iter (+/- 116) = 28761 MB/s +test misc::hard_1K ... bench: 19,153 ns/iter (+/- 2,063) = 54 MB/s +test misc::hard_1MB ... bench: 19,966,822 ns/iter (+/- 1,979,640) = 52 MB/s +test misc::hard_32 ... bench: 2,617 ns/iter (+/- 354) = 22 MB/s +test misc::hard_32K ... bench: 621,150 ns/iter (+/- 24,244) = 52 MB/s +test misc::literal ... bench: 194 ns/iter (+/- 28) = 262 MB/s +test misc::long_needle1 ... bench: 83,293 ns/iter (+/- 3,287) = 1200 MB/s +test misc::long_needle2 ... bench: 83,214 ns/iter (+/- 3,344) = 1201 MB/s +test misc::match_class ... bench: 301 ns/iter (+/- 38) = 269 MB/s +test misc::match_class_in_range ... bench: 258 ns/iter (+/- 27) = 313 MB/s +test misc::match_class_unicode ... bench: 1,565 ns/iter (+/- 187) = 102 MB/s +test misc::medium_1K ... bench: 1,572 ns/iter (+/- 230) = 669 MB/s +test misc::medium_1MB ... bench: 609,944 ns/iter (+/- 23,088) = 1719 MB/s +test misc::medium_32 ... bench: 980 ns/iter (+/- 112) = 61 MB/s +test misc::medium_32K ... bench: 20,058 ns/iter (+/- 884) = 1635 MB/s +test misc::not_literal ... bench: 1,218 ns/iter (+/- 67) = 41 MB/s +test misc::one_pass_long_prefix ... bench: 588 ns/iter (+/- 93) = 44 MB/s +test misc::one_pass_long_prefix_not ... bench: 595 ns/iter (+/- 77) = 43 MB/s +test misc::one_pass_short ... bench: 1,114 ns/iter (+/- 52) = 15 MB/s +test misc::one_pass_short_not ... bench: 1,481 ns/iter (+/- 183) = 11 MB/s +test misc::reallyhard2_1K ... bench: 40,858 ns/iter (+/- 1,860) = 25 MB/s +test misc::reallyhard_1K ... bench: 18,678 ns/iter (+/- 835) = 56 MB/s +test misc::reallyhard_1MB ... bench: 19,824,750 ns/iter (+/- 354,159) = 52 MB/s +test misc::reallyhard_32 ... bench: 2,340 ns/iter (+/- 68) = 25 MB/s +test misc::reallyhard_32K ... bench: 621,351 ns/iter (+/- 21,369) = 52 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,919 ns/iter (+/- 224) = 1626 MB/s +test regexdna::find_new_lines ... bench: 27,265,128 ns/iter (+/- 1,416,486) = 186 MB/s +test regexdna::subst1 ... bench: 6,414,636 ns/iter (+/- 696,943) = 792 MB/s +test regexdna::subst10 ... bench: 6,426,829 ns/iter (+/- 206,773) = 790 MB/s +test regexdna::subst11 ... bench: 6,435,800 ns/iter (+/- 439,175) = 789 MB/s +test regexdna::subst2 ... bench: 6,428,455 ns/iter (+/- 214,961) = 790 MB/s +test regexdna::subst3 ... bench: 6,428,692 ns/iter (+/- 681,910) = 790 MB/s +test regexdna::subst4 ... bench: 6,425,781 ns/iter (+/- 129,718) = 791 MB/s +test regexdna::subst5 ... bench: 6,414,376 ns/iter (+/- 151,827) = 792 MB/s +test regexdna::subst6 ... bench: 6,455,032 ns/iter (+/- 423,915) = 787 MB/s +test regexdna::subst7 ... bench: 6,668,649 ns/iter (+/- 686,734) = 762 MB/s +test regexdna::subst8 ... bench: 6,393,791 ns/iter (+/- 172,533) = 795 MB/s +test regexdna::subst9 ... bench: 6,426,100 ns/iter (+/- 175,951) = 791 MB/s +test regexdna::variant1 ... bench: 5,612,507 ns/iter (+/- 128,406) = 905 MB/s +test regexdna::variant2 ... bench: 7,572,661 ns/iter (+/- 159,047) = 671 MB/s +test regexdna::variant3 ... bench: 12,287,183 ns/iter (+/- 378,305) = 413 MB/s +test regexdna::variant4 ... bench: 11,223,976 ns/iter (+/- 1,191,250) = 452 MB/s +test regexdna::variant5 ... bench: 11,016,081 ns/iter (+/- 714,537) = 461 MB/s +test regexdna::variant6 ... bench: 8,198,798 ns/iter (+/- 471,338) = 620 MB/s +test regexdna::variant7 ... bench: 8,895,886 ns/iter (+/- 885,690) = 571 MB/s +test regexdna::variant8 ... bench: 11,000,942 ns/iter (+/- 886,538) = 462 MB/s +test regexdna::variant9 ... bench: 20,761,109 ns/iter (+/- 629,876) = 244 MB/s +test sherlock::before_after_holmes ... bench: 24,417,513 ns/iter (+/- 2,359,425) = 24 MB/s +test sherlock::before_holmes ... bench: 24,435,196 ns/iter (+/- 2,164,187) = 24 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,025,780 ns/iter (+/- 121,876) = 579 MB/s +test sherlock::holmes_coword_watson ... bench: 122,988,753 ns/iter (+/- 7,606,302) = 4 MB/s +test sherlock::ing_suffix ... bench: 16,322,427 ns/iter (+/- 321,746) = 36 MB/s +test sherlock::ing_suffix_limited_space ... bench: 21,993,282 ns/iter (+/- 434,365) = 27 MB/s +test sherlock::letters ... bench: 88,877,258 ns/iter (+/- 504,024) = 6 MB/s +test sherlock::letters_lower ... bench: 87,709,419 ns/iter (+/- 659,859) = 6 MB/s +test sherlock::letters_upper ... bench: 3,299,811 ns/iter (+/- 78,850) = 180 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 431,548 ns/iter (+/- 54,716) = 1378 MB/s +test sherlock::name_alt1 ... bench: 470,052 ns/iter (+/- 49,985) = 1265 MB/s +test sherlock::name_alt2 ... bench: 705,694 ns/iter (+/- 45,724) = 843 MB/s +test sherlock::name_alt3 ... bench: 1,148,456 ns/iter (+/- 51,018) = 518 MB/s +test sherlock::name_alt3_nocase ... bench: 2,026,355 ns/iter (+/- 220,043) = 293 MB/s +test sherlock::name_alt4 ... bench: 699,625 ns/iter (+/- 40,361) = 850 MB/s +test sherlock::name_alt4_nocase ... bench: 979,151 ns/iter (+/- 41,460) = 607 MB/s +test sherlock::name_alt5 ... bench: 751,646 ns/iter (+/- 31,601) = 791 MB/s +test sherlock::name_alt5_nocase ... bench: 950,701 ns/iter (+/- 102,078) = 625 MB/s +test sherlock::name_holmes ... bench: 184,935 ns/iter (+/- 6,633) = 3216 MB/s +test sherlock::name_holmes_nocase ... bench: 532,703 ns/iter (+/- 33,919) = 1116 MB/s +test sherlock::name_sherlock ... bench: 55,468 ns/iter (+/- 1,776) = 10725 MB/s +test sherlock::name_sherlock_holmes ... bench: 67,327 ns/iter (+/- 5,464) = 8836 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 417,724 ns/iter (+/- 45,520) = 1424 MB/s +test sherlock::name_sherlock_nocase ... bench: 392,285 ns/iter (+/- 14,778) = 1516 MB/s +test sherlock::name_whitespace ... bench: 77,112 ns/iter (+/- 2,785) = 7715 MB/s +test sherlock::no_match_common ... bench: 291,222 ns/iter (+/- 10,477) = 2042 MB/s +test sherlock::no_match_really_common ... bench: 291,393 ns/iter (+/- 10,834) = 2041 MB/s +test sherlock::no_match_uncommon ... bench: 14,016 ns/iter (+/- 376) = 42446 MB/s +test sherlock::quotes ... bench: 6,557,639 ns/iter (+/- 158,929) = 90 MB/s +test sherlock::repeated_class_negation ... bench: 49,697,910 ns/iter (+/- 773,749) = 11 MB/s +test sherlock::the_lower ... bench: 2,236,055 ns/iter (+/- 72,024) = 266 MB/s +test sherlock::the_nocase ... bench: 2,892,430 ns/iter (+/- 89,222) = 205 MB/s +test sherlock::the_upper ... bench: 207,035 ns/iter (+/- 8,624) = 2873 MB/s +test sherlock::the_whitespace ... bench: 3,435,267 ns/iter (+/- 416,560) = 173 MB/s +test sherlock::word_ending_n ... bench: 31,751,871 ns/iter (+/- 374,472) = 18 MB/s +test sherlock::words ... bench: 38,793,659 ns/iter (+/- 3,022,370) = 15 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 94 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/06/dphobos-ldc b/vendor/regex/record/old-bench-log/06/dphobos-ldc new file mode 100644 index 0000000..29f5595 --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/dphobos-ldc @@ -0,0 +1,100 @@ + +running 95 tests +test misc::anchored_literal_long_match ... bench: 203 ns/iter (+/- 13) = 1921 MB/s +test misc::anchored_literal_long_non_match ... bench: 126 ns/iter (+/- 5) = 3095 MB/s +test misc::anchored_literal_short_match ... bench: 204 ns/iter (+/- 4) = 127 MB/s +test misc::anchored_literal_short_non_match ... bench: 127 ns/iter (+/- 8) = 204 MB/s +test misc::easy0_1K ... bench: 571 ns/iter (+/- 44) = 1840 MB/s +test misc::easy0_1MB ... bench: 25,321 ns/iter (+/- 421) = 41412 MB/s +test misc::easy0_32 ... bench: 553 ns/iter (+/- 9) = 106 MB/s +test misc::easy0_32K ... bench: 971 ns/iter (+/- 29) = 33774 MB/s +test misc::easy1_1K ... bench: 508 ns/iter (+/- 22) = 2055 MB/s +test misc::easy1_1MB ... bench: 24,181 ns/iter (+/- 704) = 43364 MB/s +test misc::easy1_32 ... bench: 494 ns/iter (+/- 14) = 105 MB/s +test misc::easy1_32K ... bench: 892 ns/iter (+/- 82) = 36757 MB/s +test misc::hard_1K ... bench: 15,335 ns/iter (+/- 1,224) = 68 MB/s +test misc::hard_1MB ... bench: 16,105,838 ns/iter (+/- 319,567) = 65 MB/s +test misc::hard_32 ... bench: 1,798 ns/iter (+/- 79) = 32 MB/s +test misc::hard_32K ... bench: 504,123 ns/iter (+/- 44,829) = 65 MB/s +test misc::literal ... bench: 74 ns/iter (+/- 9) = 689 MB/s +test misc::long_needle1 ... bench: 56,853 ns/iter (+/- 3,662) = 1758 MB/s +test misc::long_needle2 ... bench: 57,038 ns/iter (+/- 2,532) = 1753 MB/s +test misc::match_class ... bench: 140 ns/iter (+/- 15) = 578 MB/s +test misc::match_class_in_range ... bench: 126 ns/iter (+/- 17) = 642 MB/s +test misc::match_class_unicode ... bench: 1,407 ns/iter (+/- 122) = 114 MB/s +test misc::medium_1K ... bench: 1,199 ns/iter (+/- 80) = 877 MB/s +test misc::medium_1MB ... bench: 558,323 ns/iter (+/- 20,908) = 1878 MB/s +test misc::medium_32 ... bench: 661 ns/iter (+/- 30) = 90 MB/s +test misc::medium_32K ... bench: 18,148 ns/iter (+/- 1,038) = 1807 MB/s +test misc::no_exponential ... bench: 334,786 ns/iter (+/- 18,234) +test misc::not_literal ... bench: 1,347 ns/iter (+/- 49) = 37 MB/s +test misc::one_pass_long_prefix ... bench: 499 ns/iter (+/- 59) = 52 MB/s +test misc::one_pass_long_prefix_not ... bench: 522 ns/iter (+/- 64) = 49 MB/s +test misc::one_pass_short ... bench: 804 ns/iter (+/- 37) = 21 MB/s +test misc::one_pass_short_not ... bench: 1,260 ns/iter (+/- 130) = 13 MB/s +test misc::reallyhard2_1K ... bench: 37,726 ns/iter (+/- 1,284) = 27 MB/s +test misc::reallyhard_1K ... bench: 15,246 ns/iter (+/- 901) = 68 MB/s +test misc::reallyhard_1MB ... bench: 16,187,692 ns/iter (+/- 1,552,760) = 64 MB/s +test misc::reallyhard_32 ... bench: 1,882 ns/iter (+/- 237) = 31 MB/s +test misc::reallyhard_32K ... bench: 541,567 ns/iter (+/- 64,929) = 60 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,576 ns/iter (+/- 185) = 1748 MB/s +test regexdna::find_new_lines ... bench: 14,744,849 ns/iter (+/- 1,141,621) = 344 MB/s +test regexdna::subst1 ... bench: 2,801,370 ns/iter (+/- 105,875) = 1814 MB/s +test regexdna::subst10 ... bench: 3,015,410 ns/iter (+/- 446,982) = 1685 MB/s +test regexdna::subst11 ... bench: 2,923,557 ns/iter (+/- 193,230) = 1738 MB/s +test regexdna::subst2 ... bench: 2,948,002 ns/iter (+/- 306,203) = 1724 MB/s +test regexdna::subst3 ... bench: 2,899,076 ns/iter (+/- 174,958) = 1753 MB/s +test regexdna::subst4 ... bench: 2,908,685 ns/iter (+/- 221,436) = 1747 MB/s +test regexdna::subst5 ... bench: 3,780,044 ns/iter (+/- 150,740) = 1344 MB/s +test regexdna::subst6 ... bench: 2,920,193 ns/iter (+/- 142,191) = 1740 MB/s +test regexdna::subst7 ... bench: 2,918,785 ns/iter (+/- 175,109) = 1741 MB/s +test regexdna::subst8 ... bench: 2,932,075 ns/iter (+/- 152,745) = 1733 MB/s +test regexdna::subst9 ... bench: 2,914,694 ns/iter (+/- 176,327) = 1744 MB/s +test regexdna::variant1 ... bench: 5,172,617 ns/iter (+/- 269,855) = 982 MB/s +test regexdna::variant2 ... bench: 6,770,702 ns/iter (+/- 474,076) = 750 MB/s +test regexdna::variant3 ... bench: 11,124,754 ns/iter (+/- 649,591) = 456 MB/s +test regexdna::variant4 ... bench: 9,751,982 ns/iter (+/- 460,679) = 521 MB/s +test regexdna::variant5 ... bench: 9,791,229 ns/iter (+/- 461,486) = 519 MB/s +test regexdna::variant6 ... bench: 7,417,031 ns/iter (+/- 275,225) = 685 MB/s +test regexdna::variant7 ... bench: 7,873,097 ns/iter (+/- 451,115) = 645 MB/s +test regexdna::variant8 ... bench: 9,707,683 ns/iter (+/- 418,865) = 523 MB/s +test regexdna::variant9 ... bench: 18,696,520 ns/iter (+/- 742,018) = 271 MB/s +test sherlock::before_after_holmes ... bench: 22,314,084 ns/iter (+/- 888,249) = 26 MB/s +test sherlock::before_holmes ... bench: 22,501,540 ns/iter (+/- 892,027) = 26 MB/s +test sherlock::holmes_cochar_watson ... bench: 929,372 ns/iter (+/- 46,859) = 640 MB/s +test sherlock::holmes_coword_watson ... bench: 125,548,613 ns/iter (+/- 3,297,687) = 4 MB/s +test sherlock::ing_suffix ... bench: 18,023,803 ns/iter (+/- 1,079,960) = 33 MB/s +test sherlock::ing_suffix_limited_space ... bench: 21,809,497 ns/iter (+/- 1,259,989) = 27 MB/s +test sherlock::letters ... bench: 39,512,315 ns/iter (+/- 3,309,084) = 15 MB/s +test sherlock::letters_lower ... bench: 37,160,354 ns/iter (+/- 3,084,525) = 16 MB/s +test sherlock::letters_upper ... bench: 1,721,867 ns/iter (+/- 66,812) = 345 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 378,307 ns/iter (+/- 30,914) = 1572 MB/s +test sherlock::name_alt1 ... bench: 381,242 ns/iter (+/- 41,954) = 1560 MB/s +test sherlock::name_alt2 ... bench: 503,558 ns/iter (+/- 46,044) = 1181 MB/s +test sherlock::name_alt3 ... bench: 912,340 ns/iter (+/- 79,787) = 652 MB/s +test sherlock::name_alt3_nocase ... bench: 1,515,048 ns/iter (+/- 74,623) = 392 MB/s +test sherlock::name_alt4 ... bench: 580,652 ns/iter (+/- 60,407) = 1024 MB/s +test sherlock::name_alt4_nocase ... bench: 826,866 ns/iter (+/- 58,485) = 719 MB/s +test sherlock::name_alt5 ... bench: 651,281 ns/iter (+/- 64,134) = 913 MB/s +test sherlock::name_alt5_nocase ... bench: 808,974 ns/iter (+/- 49,119) = 735 MB/s +test sherlock::name_holmes ... bench: 120,010 ns/iter (+/- 9,458) = 4957 MB/s +test sherlock::name_holmes_nocase ... bench: 441,316 ns/iter (+/- 56,990) = 1348 MB/s +test sherlock::name_sherlock ... bench: 39,935 ns/iter (+/- 4,078) = 14897 MB/s +test sherlock::name_sherlock_holmes ... bench: 49,126 ns/iter (+/- 3,082) = 12110 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 366,865 ns/iter (+/- 18,520) = 1621 MB/s +test sherlock::name_sherlock_nocase ... bench: 349,337 ns/iter (+/- 18,365) = 1703 MB/s +test sherlock::name_whitespace ... bench: 57,076 ns/iter (+/- 6,314) = 10423 MB/s +test sherlock::no_match_common ... bench: 291,022 ns/iter (+/- 30,143) = 2044 MB/s +test sherlock::no_match_really_common ... bench: 286,214 ns/iter (+/- 15,722) = 2078 MB/s +test sherlock::no_match_uncommon ... bench: 13,963 ns/iter (+/- 759) = 42607 MB/s +test sherlock::quotes ... bench: 5,580,378 ns/iter (+/- 295,941) = 106 MB/s +test sherlock::repeated_class_negation ... bench: 52,797,981 ns/iter (+/- 2,731,805) = 11 MB/s +test sherlock::the_lower ... bench: 1,295,105 ns/iter (+/- 62,365) = 459 MB/s +test sherlock::the_nocase ... bench: 1,620,713 ns/iter (+/- 73,503) = 367 MB/s +test sherlock::the_upper ... bench: 112,911 ns/iter (+/- 5,843) = 5269 MB/s +test sherlock::the_whitespace ... bench: 2,441,986 ns/iter (+/- 133,012) = 243 MB/s +test sherlock::word_ending_n ... bench: 26,478,327 ns/iter (+/- 1,361,757) = 22 MB/s +test sherlock::words ... bench: 23,948,872 ns/iter (+/- 2,323,993) = 24 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 95 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/06/dphobos-ldc-ct b/vendor/regex/record/old-bench-log/06/dphobos-ldc-ct new file mode 100644 index 0000000..6aaa5de --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/dphobos-ldc-ct @@ -0,0 +1,99 @@ + +running 94 tests +test misc::anchored_literal_long_match ... bench: 189 ns/iter (+/- 23) = 2063 MB/s +test misc::anchored_literal_long_non_match ... bench: 128 ns/iter (+/- 14) = 3046 MB/s +test misc::anchored_literal_short_match ... bench: 191 ns/iter (+/- 20) = 136 MB/s +test misc::anchored_literal_short_non_match ... bench: 120 ns/iter (+/- 13) = 216 MB/s +test misc::easy0_1K ... bench: 536 ns/iter (+/- 49) = 1960 MB/s +test misc::easy0_1MB ... bench: 24,516 ns/iter (+/- 2,181) = 42772 MB/s +test misc::easy0_32 ... bench: 551 ns/iter (+/- 36) = 107 MB/s +test misc::easy0_32K ... bench: 961 ns/iter (+/- 105) = 34125 MB/s +test misc::easy1_1K ... bench: 518 ns/iter (+/- 59) = 2015 MB/s +test misc::easy1_1MB ... bench: 25,352 ns/iter (+/- 2,847) = 41361 MB/s +test misc::easy1_32 ... bench: 501 ns/iter (+/- 42) = 103 MB/s +test misc::easy1_32K ... bench: 919 ns/iter (+/- 69) = 35677 MB/s +test misc::hard_1K ... bench: 16,146 ns/iter (+/- 1,124) = 65 MB/s +test misc::hard_1MB ... bench: 16,482,695 ns/iter (+/- 805,077) = 63 MB/s +test misc::hard_32 ... bench: 1,807 ns/iter (+/- 173) = 32 MB/s +test misc::hard_32K ... bench: 516,772 ns/iter (+/- 33,884) = 63 MB/s +test misc::literal ... bench: 77 ns/iter (+/- 9) = 662 MB/s +test misc::long_needle1 ... bench: 56,900 ns/iter (+/- 3,087) = 1757 MB/s +test misc::long_needle2 ... bench: 57,364 ns/iter (+/- 4,166) = 1743 MB/s +test misc::match_class ... bench: 156 ns/iter (+/- 21) = 519 MB/s +test misc::match_class_in_range ... bench: 121 ns/iter (+/- 12) = 669 MB/s +test misc::match_class_unicode ... bench: 1,515 ns/iter (+/- 207) = 106 MB/s +test misc::medium_1K ... bench: 1,186 ns/iter (+/- 120) = 887 MB/s +test misc::medium_1MB ... bench: 559,677 ns/iter (+/- 59,284) = 1873 MB/s +test misc::medium_32 ... bench: 657 ns/iter (+/- 86) = 91 MB/s +test misc::medium_32K ... bench: 18,142 ns/iter (+/- 915) = 1807 MB/s +test misc::not_literal ... bench: 1,319 ns/iter (+/- 128) = 38 MB/s +test misc::one_pass_long_prefix ... bench: 509 ns/iter (+/- 56) = 51 MB/s +test misc::one_pass_long_prefix_not ... bench: 517 ns/iter (+/- 38) = 50 MB/s +test misc::one_pass_short ... bench: 783 ns/iter (+/- 83) = 21 MB/s +test misc::one_pass_short_not ... bench: 1,239 ns/iter (+/- 98) = 13 MB/s +test misc::reallyhard2_1K ... bench: 40,580 ns/iter (+/- 3,041) = 25 MB/s +test misc::reallyhard_1K ... bench: 15,162 ns/iter (+/- 652) = 69 MB/s +test misc::reallyhard_1MB ... bench: 16,065,920 ns/iter (+/- 886,245) = 65 MB/s +test misc::reallyhard_32 ... bench: 1,829 ns/iter (+/- 90) = 32 MB/s +test misc::reallyhard_32K ... bench: 520,572 ns/iter (+/- 88,290) = 62 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,423 ns/iter (+/- 493) = 1808 MB/s +test regexdna::find_new_lines ... bench: 14,658,357 ns/iter (+/- 1,784,941) = 346 MB/s +test regexdna::subst1 ... bench: 2,984,959 ns/iter (+/- 422,186) = 1703 MB/s +test regexdna::subst10 ... bench: 2,836,747 ns/iter (+/- 274,300) = 1791 MB/s +test regexdna::subst11 ... bench: 2,809,880 ns/iter (+/- 309,516) = 1809 MB/s +test regexdna::subst2 ... bench: 2,868,765 ns/iter (+/- 435,511) = 1771 MB/s +test regexdna::subst3 ... bench: 2,837,000 ns/iter (+/- 319,135) = 1791 MB/s +test regexdna::subst4 ... bench: 2,856,540 ns/iter (+/- 320,458) = 1779 MB/s +test regexdna::subst5 ... bench: 2,820,953 ns/iter (+/- 340,996) = 1802 MB/s +test regexdna::subst6 ... bench: 3,588,607 ns/iter (+/- 462,158) = 1416 MB/s +test regexdna::subst7 ... bench: 2,896,235 ns/iter (+/- 165,525) = 1755 MB/s +test regexdna::subst8 ... bench: 2,982,961 ns/iter (+/- 315,768) = 1704 MB/s +test regexdna::subst9 ... bench: 3,024,311 ns/iter (+/- 300,274) = 1680 MB/s +test regexdna::variant1 ... bench: 5,234,342 ns/iter (+/- 269,577) = 971 MB/s +test regexdna::variant2 ... bench: 6,463,683 ns/iter (+/- 532,663) = 786 MB/s +test regexdna::variant3 ... bench: 10,720,523 ns/iter (+/- 414,684) = 474 MB/s +test regexdna::variant4 ... bench: 9,882,647 ns/iter (+/- 297,904) = 514 MB/s +test regexdna::variant5 ... bench: 9,664,151 ns/iter (+/- 659,587) = 526 MB/s +test regexdna::variant6 ... bench: 7,174,368 ns/iter (+/- 322,025) = 708 MB/s +test regexdna::variant7 ... bench: 7,605,668 ns/iter (+/- 411,605) = 668 MB/s +test regexdna::variant8 ... bench: 9,580,481 ns/iter (+/- 373,332) = 530 MB/s +test regexdna::variant9 ... bench: 18,270,186 ns/iter (+/- 986,510) = 278 MB/s +test sherlock::before_after_holmes ... bench: 21,982,853 ns/iter (+/- 1,032,853) = 27 MB/s +test sherlock::before_holmes ... bench: 21,947,949 ns/iter (+/- 848,014) = 27 MB/s +test sherlock::holmes_cochar_watson ... bench: 909,691 ns/iter (+/- 48,847) = 653 MB/s +test sherlock::holmes_coword_watson ... bench: 124,771,191 ns/iter (+/- 8,084,768) = 4 MB/s +test sherlock::ing_suffix ... bench: 17,864,129 ns/iter (+/- 1,343,114) = 33 MB/s +test sherlock::ing_suffix_limited_space ... bench: 21,009,249 ns/iter (+/- 452,676) = 28 MB/s +test sherlock::letters ... bench: 37,888,421 ns/iter (+/- 2,482,541) = 15 MB/s +test sherlock::letters_lower ... bench: 37,029,883 ns/iter (+/- 481,280) = 16 MB/s +test sherlock::letters_upper ... bench: 1,627,107 ns/iter (+/- 51,063) = 365 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 370,850 ns/iter (+/- 15,384) = 1604 MB/s +test sherlock::name_alt1 ... bench: 371,780 ns/iter (+/- 28,486) = 1600 MB/s +test sherlock::name_alt2 ... bench: 506,859 ns/iter (+/- 17,553) = 1173 MB/s +test sherlock::name_alt3 ... bench: 915,729 ns/iter (+/- 99,429) = 649 MB/s +test sherlock::name_alt3_nocase ... bench: 1,512,050 ns/iter (+/- 186,130) = 393 MB/s +test sherlock::name_alt4 ... bench: 578,710 ns/iter (+/- 18,089) = 1028 MB/s +test sherlock::name_alt4_nocase ... bench: 752,912 ns/iter (+/- 51,342) = 790 MB/s +test sherlock::name_alt5 ... bench: 595,803 ns/iter (+/- 15,053) = 998 MB/s +test sherlock::name_alt5_nocase ... bench: 730,149 ns/iter (+/- 40,662) = 814 MB/s +test sherlock::name_holmes ... bench: 115,596 ns/iter (+/- 4,597) = 5146 MB/s +test sherlock::name_holmes_nocase ... bench: 429,765 ns/iter (+/- 16,685) = 1384 MB/s +test sherlock::name_sherlock ... bench: 38,985 ns/iter (+/- 2,195) = 15260 MB/s +test sherlock::name_sherlock_holmes ... bench: 49,610 ns/iter (+/- 2,005) = 11992 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 374,335 ns/iter (+/- 37,062) = 1589 MB/s +test sherlock::name_sherlock_nocase ... bench: 355,559 ns/iter (+/- 40,873) = 1673 MB/s +test sherlock::name_whitespace ... bench: 57,616 ns/iter (+/- 5,124) = 10325 MB/s +test sherlock::no_match_common ... bench: 284,228 ns/iter (+/- 29,087) = 2093 MB/s +test sherlock::no_match_really_common ... bench: 287,263 ns/iter (+/- 22,755) = 2071 MB/s +test sherlock::no_match_uncommon ... bench: 14,030 ns/iter (+/- 526) = 42404 MB/s +test sherlock::quotes ... bench: 5,563,019 ns/iter (+/- 537,611) = 106 MB/s +test sherlock::repeated_class_negation ... bench: 54,831,275 ns/iter (+/- 5,982,214) = 10 MB/s +test sherlock::the_lower ... bench: 1,298,205 ns/iter (+/- 73,265) = 458 MB/s +test sherlock::the_nocase ... bench: 1,572,579 ns/iter (+/- 63,536) = 378 MB/s +test sherlock::the_upper ... bench: 112,795 ns/iter (+/- 4,179) = 5274 MB/s +test sherlock::the_whitespace ... bench: 2,630,026 ns/iter (+/- 227,760) = 226 MB/s +test sherlock::word_ending_n ... bench: 26,975,356 ns/iter (+/- 2,531,982) = 22 MB/s +test sherlock::words ... bench: 23,116,326 ns/iter (+/- 458,721) = 25 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 94 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/06/pcre1 b/vendor/regex/record/old-bench-log/06/pcre1 new file mode 100644 index 0000000..f8a9100 --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/pcre1 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 32 ns/iter (+/- 38) = 12187 MB/s +test misc::anchored_literal_long_non_match ... bench: 23 ns/iter (+/- 1) = 16956 MB/s +test misc::anchored_literal_short_match ... bench: 30 ns/iter (+/- 1) = 866 MB/s +test misc::anchored_literal_short_non_match ... bench: 23 ns/iter (+/- 0) = 1130 MB/s +test misc::easy0_1K ... bench: 261 ns/iter (+/- 21) = 4026 MB/s +test misc::easy0_1MB ... bench: 202,218 ns/iter (+/- 16,050) = 5185 MB/s +test misc::easy0_32 ... bench: 49 ns/iter (+/- 3) = 1204 MB/s +test misc::easy0_32K ... bench: 6,305 ns/iter (+/- 448) = 5201 MB/s +test misc::easy1_1K ... bench: 245 ns/iter (+/- 5) = 4261 MB/s +test misc::easy1_1MB ... bench: 198,215 ns/iter (+/- 10,461) = 5290 MB/s +test misc::easy1_32 ... bench: 49 ns/iter (+/- 1) = 1061 MB/s +test misc::easy1_32K ... bench: 6,309 ns/iter (+/- 358) = 5197 MB/s +test misc::hard_1K ... bench: 1,306 ns/iter (+/- 50) = 804 MB/s +test misc::hard_1MB ... bench: 1,219,034 ns/iter (+/- 92,693) = 860 MB/s +test misc::hard_32 ... bench: 95 ns/iter (+/- 7) = 621 MB/s +test misc::hard_32K ... bench: 37,713 ns/iter (+/- 948) = 869 MB/s +test misc::literal ... bench: 29 ns/iter (+/- 1) = 1758 MB/s +test misc::long_needle1 ... bench: 548,012 ns/iter (+/- 26,029) = 182 MB/s +test misc::long_needle2 ... bench: 538,536 ns/iter (+/- 54,612) = 185 MB/s +test misc::match_class ... bench: 94 ns/iter (+/- 3) = 861 MB/s +test misc::match_class_in_range ... bench: 29 ns/iter (+/- 1) = 2793 MB/s +test misc::match_class_unicode ... bench: 370 ns/iter (+/- 19) = 435 MB/s +test misc::medium_1K ... bench: 256 ns/iter (+/- 13) = 4109 MB/s +test misc::medium_1MB ... bench: 207,655 ns/iter (+/- 9,168) = 5049 MB/s +test misc::medium_32 ... bench: 51 ns/iter (+/- 5) = 1176 MB/s +test misc::medium_32K ... bench: 6,144 ns/iter (+/- 327) = 5337 MB/s +test misc::not_literal ... bench: 166 ns/iter (+/- 14) = 307 MB/s +test misc::one_pass_long_prefix ... bench: 27 ns/iter (+/- 2) = 962 MB/s +test misc::one_pass_long_prefix_not ... bench: 29 ns/iter (+/- 1) = 896 MB/s +test misc::one_pass_short ... bench: 55 ns/iter (+/- 2) = 309 MB/s +test misc::one_pass_short_not ... bench: 55 ns/iter (+/- 3) = 309 MB/s +test misc::reallyhard2_1K ... bench: 4,404 ns/iter (+/- 346) = 236 MB/s +test misc::reallyhard_1K ... bench: 1,365 ns/iter (+/- 52) = 769 MB/s +test misc::reallyhard_1MB ... bench: 1,118,777 ns/iter (+/- 72,209) = 937 MB/s +test misc::reallyhard_32 ... bench: 112 ns/iter (+/- 4) = 526 MB/s +test misc::reallyhard_32K ... bench: 41,164 ns/iter (+/- 2,351) = 796 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,413 ns/iter (+/- 304) = 1812 MB/s +test regexdna::find_new_lines ... bench: 2,802,109 ns/iter (+/- 129,768) = 1814 MB/s +test regexdna::subst1 ... bench: 1,263,401 ns/iter (+/- 54,374) = 4023 MB/s +test regexdna::subst10 ... bench: 1,254,544 ns/iter (+/- 101,656) = 4051 MB/s +test regexdna::subst11 ... bench: 1,408,321 ns/iter (+/- 121,522) = 3609 MB/s +test regexdna::subst2 ... bench: 1,364,704 ns/iter (+/- 106,508) = 3724 MB/s +test regexdna::subst3 ... bench: 1,258,687 ns/iter (+/- 84,504) = 4038 MB/s +test regexdna::subst4 ... bench: 1,301,822 ns/iter (+/- 62,866) = 3904 MB/s +test regexdna::subst5 ... bench: 1,338,338 ns/iter (+/- 313,996) = 3798 MB/s +test regexdna::subst6 ... bench: 1,349,310 ns/iter (+/- 117,181) = 3767 MB/s +test regexdna::subst7 ... bench: 1,390,090 ns/iter (+/- 210,430) = 3656 MB/s +test regexdna::subst8 ... bench: 1,293,481 ns/iter (+/- 38,532) = 3930 MB/s +test regexdna::subst9 ... bench: 1,245,652 ns/iter (+/- 58,026) = 4080 MB/s +test regexdna::variant1 ... bench: 15,239,324 ns/iter (+/- 414,621) = 333 MB/s +test regexdna::variant2 ... bench: 16,489,922 ns/iter (+/- 825,229) = 308 MB/s +test regexdna::variant3 ... bench: 19,945,871 ns/iter (+/- 665,046) = 254 MB/s +test regexdna::variant4 ... bench: 18,604,011 ns/iter (+/- 712,670) = 273 MB/s +test regexdna::variant5 ... bench: 17,084,919 ns/iter (+/- 1,379,879) = 297 MB/s +test regexdna::variant6 ... bench: 16,918,130 ns/iter (+/- 975,620) = 300 MB/s +test regexdna::variant7 ... bench: 19,114,194 ns/iter (+/- 857,330) = 265 MB/s +test regexdna::variant8 ... bench: 23,831,138 ns/iter (+/- 878,576) = 213 MB/s +test regexdna::variant9 ... bench: 21,835,777 ns/iter (+/- 1,339,143) = 232 MB/s +test sherlock::before_after_holmes ... bench: 4,401,834 ns/iter (+/- 218,696) = 135 MB/s +test sherlock::before_holmes ... bench: 4,436,717 ns/iter (+/- 109,324) = 134 MB/s +test sherlock::holmes_cochar_watson ... bench: 497,667 ns/iter (+/- 19,212) = 1195 MB/s +test sherlock::ing_suffix ... bench: 1,852,390 ns/iter (+/- 77,888) = 321 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,775,078 ns/iter (+/- 152,556) = 124 MB/s +test sherlock::letters ... bench: 13,888,750 ns/iter (+/- 668,831) = 42 MB/s +test sherlock::letters_lower ... bench: 13,452,405 ns/iter (+/- 453,184) = 44 MB/s +test sherlock::letters_upper ... bench: 1,870,502 ns/iter (+/- 57,825) = 318 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 198,649 ns/iter (+/- 9,080) = 2994 MB/s +test sherlock::name_alt1 ... bench: 464,513 ns/iter (+/- 29,935) = 1280 MB/s +test sherlock::name_alt2 ... bench: 470,746 ns/iter (+/- 12,931) = 1263 MB/s +test sherlock::name_alt3 ... bench: 874,352 ns/iter (+/- 38,618) = 680 MB/s +test sherlock::name_alt3_nocase ... bench: 2,821,106 ns/iter (+/- 113,055) = 210 MB/s +test sherlock::name_alt4 ... bench: 78,753 ns/iter (+/- 3,111) = 7554 MB/s +test sherlock::name_alt4_nocase ... bench: 1,596,406 ns/iter (+/- 62,919) = 372 MB/s +test sherlock::name_alt5 ... bench: 655,870 ns/iter (+/- 32,597) = 907 MB/s +test sherlock::name_alt5_nocase ... bench: 1,732,595 ns/iter (+/- 75,827) = 343 MB/s +test sherlock::name_holmes ... bench: 400,037 ns/iter (+/- 16,935) = 1487 MB/s +test sherlock::name_holmes_nocase ... bench: 501,467 ns/iter (+/- 20,805) = 1186 MB/s +test sherlock::name_sherlock ... bench: 267,873 ns/iter (+/- 10,199) = 2220 MB/s +test sherlock::name_sherlock_holmes ... bench: 202,107 ns/iter (+/- 10,314) = 2943 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,070,780 ns/iter (+/- 43,144) = 555 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,074,139 ns/iter (+/- 48,270) = 553 MB/s +test sherlock::name_whitespace ... bench: 271,978 ns/iter (+/- 10,137) = 2187 MB/s +test sherlock::no_match_common ... bench: 411,484 ns/iter (+/- 13,213) = 1445 MB/s +test sherlock::no_match_really_common ... bench: 403,709 ns/iter (+/- 12,415) = 1473 MB/s +test sherlock::no_match_uncommon ... bench: 27,730 ns/iter (+/- 928) = 21454 MB/s +test sherlock::quotes ... bench: 515,141 ns/iter (+/- 17,799) = 1154 MB/s +test sherlock::repeated_class_negation ... bench: 5,842,243 ns/iter (+/- 282,478) = 101 MB/s +test sherlock::the_lower ... bench: 725,059 ns/iter (+/- 36,233) = 820 MB/s +test sherlock::the_nocase ... bench: 812,888 ns/iter (+/- 34,200) = 731 MB/s +test sherlock::the_upper ... bench: 56,746 ns/iter (+/- 2,186) = 10484 MB/s +test sherlock::the_whitespace ... bench: 920,705 ns/iter (+/- 37,325) = 646 MB/s +test sherlock::word_ending_n ... bench: 5,625,614 ns/iter (+/- 199,408) = 105 MB/s +test sherlock::words ... bench: 7,122,561 ns/iter (+/- 161,013) = 83 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/06/pcre2 b/vendor/regex/record/old-bench-log/06/pcre2 new file mode 100644 index 0000000..5185301 --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/pcre2 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 16 ns/iter (+/- 0) = 24375 MB/s +test misc::anchored_literal_long_non_match ... bench: 13 ns/iter (+/- 1) = 30000 MB/s +test misc::anchored_literal_short_match ... bench: 16 ns/iter (+/- 1) = 1625 MB/s +test misc::anchored_literal_short_non_match ... bench: 13 ns/iter (+/- 1) = 2000 MB/s +test misc::easy0_1K ... bench: 104 ns/iter (+/- 5) = 10105 MB/s +test misc::easy0_1MB ... bench: 64,102 ns/iter (+/- 4,103) = 16358 MB/s +test misc::easy0_32 ... bench: 32 ns/iter (+/- 4) = 1843 MB/s +test misc::easy0_32K ... bench: 2,042 ns/iter (+/- 152) = 16060 MB/s +test misc::easy1_1K ... bench: 102 ns/iter (+/- 11) = 10235 MB/s +test misc::easy1_1MB ... bench: 63,117 ns/iter (+/- 4,547) = 16613 MB/s +test misc::easy1_32 ... bench: 33 ns/iter (+/- 4) = 1575 MB/s +test misc::easy1_32K ... bench: 2,019 ns/iter (+/- 181) = 16239 MB/s +test misc::hard_1K ... bench: 1,236 ns/iter (+/- 82) = 850 MB/s +test misc::hard_1MB ... bench: 1,041,354 ns/iter (+/- 39,123) = 1006 MB/s +test misc::hard_32 ... bench: 86 ns/iter (+/- 8) = 686 MB/s +test misc::hard_32K ... bench: 33,054 ns/iter (+/- 1,813) = 992 MB/s +test misc::literal ... bench: 20 ns/iter (+/- 2) = 2550 MB/s +test misc::long_needle1 ... bench: 501,732 ns/iter (+/- 52,173) = 199 MB/s +test misc::long_needle2 ... bench: 515,127 ns/iter (+/- 48,790) = 194 MB/s +test misc::match_class ... bench: 55 ns/iter (+/- 7) = 1472 MB/s +test misc::match_class_in_range ... bench: 19 ns/iter (+/- 2) = 4263 MB/s +test misc::match_class_unicode ... bench: 342 ns/iter (+/- 60) = 470 MB/s +test misc::medium_1K ... bench: 106 ns/iter (+/- 4) = 9924 MB/s +test misc::medium_1MB ... bench: 63,011 ns/iter (+/- 4,942) = 16641 MB/s +test misc::medium_32 ... bench: 32 ns/iter (+/- 3) = 1875 MB/s +test misc::medium_32K ... bench: 2,068 ns/iter (+/- 189) = 15858 MB/s +test misc::not_literal ... bench: 147 ns/iter (+/- 13) = 346 MB/s +test misc::one_pass_long_prefix ... bench: 15 ns/iter (+/- 1) = 1733 MB/s +test misc::one_pass_long_prefix_not ... bench: 15 ns/iter (+/- 1) = 1733 MB/s +test misc::one_pass_short ... bench: 42 ns/iter (+/- 3) = 404 MB/s +test misc::one_pass_short_not ... bench: 43 ns/iter (+/- 5) = 395 MB/s +test misc::reallyhard2_1K ... bench: 4,356 ns/iter (+/- 499) = 238 MB/s +test misc::reallyhard_1K ... bench: 1,196 ns/iter (+/- 113) = 878 MB/s +test misc::reallyhard_1MB ... bench: 1,070,155 ns/iter (+/- 90,895) = 979 MB/s +test misc::reallyhard_32 ... bench: 93 ns/iter (+/- 12) = 634 MB/s +test misc::reallyhard_32K ... bench: 33,521 ns/iter (+/- 2,663) = 978 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 3,065 ns/iter (+/- 393) = 2610 MB/s +test regexdna::find_new_lines ... bench: 1,891,736 ns/iter (+/- 232,990) = 2687 MB/s +test regexdna::subst1 ... bench: 920,853 ns/iter (+/- 75,276) = 5520 MB/s +test regexdna::subst10 ... bench: 892,533 ns/iter (+/- 77,177) = 5695 MB/s +test regexdna::subst11 ... bench: 869,335 ns/iter (+/- 75,754) = 5847 MB/s +test regexdna::subst2 ... bench: 901,876 ns/iter (+/- 75,287) = 5636 MB/s +test regexdna::subst3 ... bench: 870,185 ns/iter (+/- 53,535) = 5841 MB/s +test regexdna::subst4 ... bench: 859,924 ns/iter (+/- 63,888) = 5911 MB/s +test regexdna::subst5 ... bench: 886,748 ns/iter (+/- 87,929) = 5732 MB/s +test regexdna::subst6 ... bench: 870,428 ns/iter (+/- 47,015) = 5840 MB/s +test regexdna::subst7 ... bench: 865,513 ns/iter (+/- 41,507) = 5873 MB/s +test regexdna::subst8 ... bench: 870,030 ns/iter (+/- 110,449) = 5842 MB/s +test regexdna::subst9 ... bench: 875,649 ns/iter (+/- 32,905) = 5805 MB/s +test regexdna::variant1 ... bench: 9,234,989 ns/iter (+/- 127,076) = 550 MB/s +test regexdna::variant2 ... bench: 11,759,628 ns/iter (+/- 575,788) = 432 MB/s +test regexdna::variant3 ... bench: 11,229,965 ns/iter (+/- 522,759) = 452 MB/s +test regexdna::variant4 ... bench: 10,040,716 ns/iter (+/- 309,357) = 506 MB/s +test regexdna::variant5 ... bench: 10,052,052 ns/iter (+/- 522,338) = 505 MB/s +test regexdna::variant6 ... bench: 10,719,366 ns/iter (+/- 577,988) = 474 MB/s +test regexdna::variant7 ... bench: 11,076,094 ns/iter (+/- 1,291,237) = 458 MB/s +test regexdna::variant8 ... bench: 11,855,290 ns/iter (+/- 667,429) = 428 MB/s +test regexdna::variant9 ... bench: 12,531,240 ns/iter (+/- 606,198) = 405 MB/s +test sherlock::before_after_holmes ... bench: 4,169,656 ns/iter (+/- 222,900) = 142 MB/s +test sherlock::before_holmes ... bench: 4,144,394 ns/iter (+/- 170,133) = 143 MB/s +test sherlock::holmes_cochar_watson ... bench: 74,437 ns/iter (+/- 4,266) = 7992 MB/s +test sherlock::ing_suffix ... bench: 1,731,507 ns/iter (+/- 162,892) = 343 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,418,194 ns/iter (+/- 240,220) = 134 MB/s +test sherlock::letters ... bench: 8,847,041 ns/iter (+/- 392,402) = 67 MB/s +test sherlock::letters_lower ... bench: 8,547,432 ns/iter (+/- 304,256) = 69 MB/s +test sherlock::letters_upper ... bench: 1,584,248 ns/iter (+/- 51,331) = 375 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 38,057 ns/iter (+/- 1,666) = 15632 MB/s +test sherlock::name_alt1 ... bench: 50,415 ns/iter (+/- 3,173) = 11800 MB/s +test sherlock::name_alt2 ... bench: 66,062 ns/iter (+/- 2,807) = 9005 MB/s +test sherlock::name_alt3 ... bench: 720,097 ns/iter (+/- 32,351) = 826 MB/s +test sherlock::name_alt3_nocase ... bench: 2,591,049 ns/iter (+/- 86,537) = 229 MB/s +test sherlock::name_alt4 ... bench: 65,860 ns/iter (+/- 2,780) = 9033 MB/s +test sherlock::name_alt4_nocase ... bench: 1,204,839 ns/iter (+/- 41,087) = 493 MB/s +test sherlock::name_alt5 ... bench: 615,483 ns/iter (+/- 24,177) = 966 MB/s +test sherlock::name_alt5_nocase ... bench: 1,467,461 ns/iter (+/- 71,032) = 405 MB/s +test sherlock::name_holmes ... bench: 48,997 ns/iter (+/- 2,471) = 12142 MB/s +test sherlock::name_holmes_nocase ... bench: 88,549 ns/iter (+/- 4,814) = 6718 MB/s +test sherlock::name_sherlock ... bench: 38,309 ns/iter (+/- 1,354) = 15529 MB/s +test sherlock::name_sherlock_holmes ... bench: 39,062 ns/iter (+/- 4,253) = 15230 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 713,355 ns/iter (+/- 77,990) = 833 MB/s +test sherlock::name_sherlock_nocase ... bench: 719,747 ns/iter (+/- 85,736) = 826 MB/s +test sherlock::name_whitespace ... bench: 39,161 ns/iter (+/- 3,678) = 15191 MB/s +test sherlock::no_match_common ... bench: 35,574 ns/iter (+/- 3,433) = 16723 MB/s +test sherlock::no_match_really_common ... bench: 56,847 ns/iter (+/- 7,068) = 10465 MB/s +test sherlock::no_match_uncommon ... bench: 36,185 ns/iter (+/- 4,938) = 16441 MB/s +test sherlock::quotes ... bench: 454,135 ns/iter (+/- 18,816) = 1310 MB/s +test sherlock::repeated_class_negation ... bench: 5,724,068 ns/iter (+/- 342,211) = 103 MB/s +test sherlock::the_lower ... bench: 256,190 ns/iter (+/- 25,452) = 2322 MB/s +test sherlock::the_nocase ... bench: 284,080 ns/iter (+/- 17,165) = 2094 MB/s +test sherlock::the_upper ... bench: 56,120 ns/iter (+/- 2,826) = 10601 MB/s +test sherlock::the_whitespace ... bench: 456,734 ns/iter (+/- 23,405) = 1302 MB/s +test sherlock::word_ending_n ... bench: 5,079,288 ns/iter (+/- 214,895) = 117 MB/s +test sherlock::words ... bench: 5,200,092 ns/iter (+/- 250,085) = 114 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/06/re2 b/vendor/regex/record/old-bench-log/06/re2 new file mode 100644 index 0000000..3e1585a --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/re2 @@ -0,0 +1,101 @@ + +running 96 tests +test misc::anchored_literal_long_match ... bench: 73 ns/iter (+/- 8) = 5342 MB/s +test misc::anchored_literal_long_non_match ... bench: 16 ns/iter (+/- 1) = 24375 MB/s +test misc::anchored_literal_short_match ... bench: 73 ns/iter (+/- 9) = 356 MB/s +test misc::anchored_literal_short_non_match ... bench: 16 ns/iter (+/- 1) = 1625 MB/s +test misc::easy0_1K ... bench: 119 ns/iter (+/- 11) = 8831 MB/s +test misc::easy0_1MB ... bench: 25,312 ns/iter (+/- 875) = 41427 MB/s +test misc::easy0_32 ... bench: 112 ns/iter (+/- 5) = 526 MB/s +test misc::easy0_32K ... bench: 534 ns/iter (+/- 43) = 61413 MB/s +test misc::easy1_1K ... bench: 109 ns/iter (+/- 9) = 9577 MB/s +test misc::easy1_1MB ... bench: 23,892 ns/iter (+/- 715) = 43889 MB/s +test misc::easy1_32 ... bench: 102 ns/iter (+/- 8) = 509 MB/s +test misc::easy1_32K ... bench: 519 ns/iter (+/- 54) = 63175 MB/s +test misc::hard_1K ... bench: 1,859 ns/iter (+/- 202) = 565 MB/s +test misc::hard_1MB ... bench: 1,871,446 ns/iter (+/- 99,961) = 560 MB/s +test misc::hard_32 ... bench: 162 ns/iter (+/- 20) = 364 MB/s +test misc::hard_32K ... bench: 57,459 ns/iter (+/- 4,672) = 570 MB/s +test misc::literal ... bench: 70 ns/iter (+/- 8) = 728 MB/s +test misc::long_needle1 ... bench: 130,995 ns/iter (+/- 4,935) = 763 MB/s +test misc::long_needle2 ... bench: 129,668 ns/iter (+/- 8,852) = 771 MB/s +test misc::match_class ... bench: 195 ns/iter (+/- 16) = 415 MB/s +test misc::match_class_in_range ... bench: 194 ns/iter (+/- 22) = 417 MB/s +test misc::match_class_unicode ... bench: 630 ns/iter (+/- 61) = 255 MB/s +test misc::medium_1K ... bench: 1,699 ns/iter (+/- 147) = 619 MB/s +test misc::medium_1MB ... bench: 1,633,131 ns/iter (+/- 65,889) = 642 MB/s +test misc::medium_32 ... bench: 169 ns/iter (+/- 18) = 355 MB/s +test misc::medium_32K ... bench: 51,313 ns/iter (+/- 1,855) = 639 MB/s +test misc::no_exponential ... bench: 216 ns/iter (+/- 13) = 462 MB/s +test misc::not_literal ... bench: 140 ns/iter (+/- 6) = 364 MB/s +test misc::one_pass_long_prefix ... bench: 71 ns/iter (+/- 2) = 366 MB/s +test misc::one_pass_long_prefix_not ... bench: 109 ns/iter (+/- 9) = 238 MB/s +test misc::one_pass_short ... bench: 99 ns/iter (+/- 7) = 171 MB/s +test misc::one_pass_short_not ... bench: 96 ns/iter (+/- 5) = 177 MB/s +test misc::reallyhard2_1K ... bench: 1,405 ns/iter (+/- 134) = 740 MB/s +test misc::reallyhard_1K ... bench: 1,875 ns/iter (+/- 168) = 560 MB/s +test misc::reallyhard_1MB ... bench: 1,853,207 ns/iter (+/- 103,218) = 565 MB/s +test misc::reallyhard_32 ... bench: 157 ns/iter (+/- 11) = 375 MB/s +test misc::reallyhard_32K ... bench: 57,880 ns/iter (+/- 5,319) = 566 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 12,686 ns/iter (+/- 536) = 630 MB/s +test regexdna::find_new_lines ... bench: 28,761,913 ns/iter (+/- 1,447,326) = 176 MB/s +test regexdna::subst1 ... bench: 4,629,782 ns/iter (+/- 142,214) = 1097 MB/s +test regexdna::subst10 ... bench: 4,692,819 ns/iter (+/- 156,805) = 1083 MB/s +test regexdna::subst11 ... bench: 4,652,438 ns/iter (+/- 206,457) = 1092 MB/s +test regexdna::subst2 ... bench: 4,682,943 ns/iter (+/- 176,335) = 1085 MB/s +test regexdna::subst3 ... bench: 4,646,162 ns/iter (+/- 241,873) = 1094 MB/s +test regexdna::subst4 ... bench: 4,653,380 ns/iter (+/- 188,899) = 1092 MB/s +test regexdna::subst5 ... bench: 4,770,480 ns/iter (+/- 238,930) = 1065 MB/s +test regexdna::subst6 ... bench: 4,671,427 ns/iter (+/- 286,241) = 1088 MB/s +test regexdna::subst7 ... bench: 4,658,214 ns/iter (+/- 210,723) = 1091 MB/s +test regexdna::subst8 ... bench: 4,909,600 ns/iter (+/- 417,894) = 1035 MB/s +test regexdna::subst9 ... bench: 4,910,285 ns/iter (+/- 587,024) = 1035 MB/s +test regexdna::variant1 ... bench: 20,895,772 ns/iter (+/- 2,313,771) = 243 MB/s +test regexdna::variant2 ... bench: 20,465,984 ns/iter (+/- 1,913,613) = 248 MB/s +test regexdna::variant3 ... bench: 19,469,527 ns/iter (+/- 1,367,226) = 261 MB/s +test regexdna::variant4 ... bench: 21,662,238 ns/iter (+/- 1,489,235) = 234 MB/s +test regexdna::variant5 ... bench: 21,808,098 ns/iter (+/- 2,294,522) = 233 MB/s +test regexdna::variant6 ... bench: 21,208,952 ns/iter (+/- 986,848) = 239 MB/s +test regexdna::variant7 ... bench: 20,289,473 ns/iter (+/- 595,084) = 250 MB/s +test regexdna::variant8 ... bench: 17,765,356 ns/iter (+/- 503,529) = 286 MB/s +test regexdna::variant9 ... bench: 13,222,010 ns/iter (+/- 509,278) = 384 MB/s +test sherlock::before_after_holmes ... bench: 1,313,676 ns/iter (+/- 52,992) = 452 MB/s +test sherlock::before_holmes ... bench: 1,337,432 ns/iter (+/- 37,054) = 444 MB/s +test sherlock::everything_greedy ... bench: 6,080,272 ns/iter (+/- 110,011) = 97 MB/s +test sherlock::everything_greedy_nl ... bench: 2,395,932 ns/iter (+/- 123,521) = 248 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,052,245 ns/iter (+/- 33,929) = 565 MB/s +test sherlock::holmes_coword_watson ... bench: 1,063,007 ns/iter (+/- 34,462) = 559 MB/s +test sherlock::ing_suffix ... bench: 2,703,395 ns/iter (+/- 63,263) = 220 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,608,756 ns/iter (+/- 42,100) = 369 MB/s +test sherlock::letters ... bench: 68,220,129 ns/iter (+/- 3,602,216) = 8 MB/s +test sherlock::letters_lower ... bench: 67,390,101 ns/iter (+/- 6,032,867) = 8 MB/s +test sherlock::letters_upper ... bench: 3,708,482 ns/iter (+/- 235,128) = 160 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,816,517 ns/iter (+/- 99,081) = 211 MB/s +test sherlock::name_alt1 ... bench: 53,193 ns/iter (+/- 1,575) = 11184 MB/s +test sherlock::name_alt2 ... bench: 1,133,704 ns/iter (+/- 36,634) = 524 MB/s +test sherlock::name_alt3 ... bench: 1,227,785 ns/iter (+/- 31,742) = 484 MB/s +test sherlock::name_alt3_nocase ... bench: 2,451,285 ns/iter (+/- 103,766) = 242 MB/s +test sherlock::name_alt4 ... bench: 1,168,955 ns/iter (+/- 87,785) = 508 MB/s +test sherlock::name_alt4_nocase ... bench: 1,699,899 ns/iter (+/- 91,762) = 349 MB/s +test sherlock::name_alt5 ... bench: 1,167,232 ns/iter (+/- 51,695) = 509 MB/s +test sherlock::name_alt5_nocase ... bench: 1,805,463 ns/iter (+/- 74,631) = 329 MB/s +test sherlock::name_holmes ... bench: 108,195 ns/iter (+/- 3,815) = 5498 MB/s +test sherlock::name_holmes_nocase ... bench: 1,360,092 ns/iter (+/- 60,416) = 437 MB/s +test sherlock::name_sherlock ... bench: 40,376 ns/iter (+/- 5,104) = 14734 MB/s +test sherlock::name_sherlock_holmes ... bench: 41,361 ns/iter (+/- 2,553) = 14383 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,317,594 ns/iter (+/- 168,248) = 451 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,284,026 ns/iter (+/- 92,188) = 463 MB/s +test sherlock::name_whitespace ... bench: 44,973 ns/iter (+/- 5,888) = 13228 MB/s +test sherlock::no_match_common ... bench: 341,867 ns/iter (+/- 32,045) = 1740 MB/s +test sherlock::no_match_really_common ... bench: 331,760 ns/iter (+/- 43,608) = 1793 MB/s +test sherlock::no_match_uncommon ... bench: 14,285 ns/iter (+/- 760) = 41647 MB/s +test sherlock::quotes ... bench: 1,342,144 ns/iter (+/- 96,471) = 443 MB/s +test sherlock::the_lower ... bench: 1,722,919 ns/iter (+/- 83,873) = 345 MB/s +test sherlock::the_nocase ... bench: 2,866,258 ns/iter (+/- 117,349) = 207 MB/s +test sherlock::the_upper ... bench: 151,020 ns/iter (+/- 13,454) = 3939 MB/s +test sherlock::the_whitespace ... bench: 1,597,329 ns/iter (+/- 149,689) = 372 MB/s +test sherlock::word_ending_n ... bench: 2,193,027 ns/iter (+/- 136,408) = 271 MB/s +test sherlock::words ... bench: 20,721,148 ns/iter (+/- 1,968,912) = 28 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 96 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/06/rust b/vendor/regex/record/old-bench-log/06/rust new file mode 100644 index 0000000..53ab222 --- /dev/null +++ b/vendor/regex/record/old-bench-log/06/rust @@ -0,0 +1,113 @@ + +running 108 tests +test misc::anchored_literal_long_match ... bench: 22 ns/iter (+/- 2) = 17727 MB/s +test misc::anchored_literal_long_non_match ... bench: 27 ns/iter (+/- 2) = 14444 MB/s +test misc::anchored_literal_short_match ... bench: 22 ns/iter (+/- 1) = 1181 MB/s +test misc::anchored_literal_short_non_match ... bench: 26 ns/iter (+/- 2) = 1000 MB/s +test misc::easy0_1K ... bench: 16 ns/iter (+/- 1) = 65687 MB/s +test misc::easy0_1MB ... bench: 19 ns/iter (+/- 2) = 55189631 MB/s +test misc::easy0_32 ... bench: 15 ns/iter (+/- 1) = 3933 MB/s +test misc::easy0_32K ... bench: 16 ns/iter (+/- 0) = 2049687 MB/s +test misc::easy1_1K ... bench: 43 ns/iter (+/- 2) = 24279 MB/s +test misc::easy1_1MB ... bench: 45 ns/iter (+/- 4) = 23302133 MB/s +test misc::easy1_32 ... bench: 43 ns/iter (+/- 5) = 1209 MB/s +test misc::easy1_32K ... bench: 43 ns/iter (+/- 2) = 762511 MB/s +test misc::hard_1K ... bench: 53 ns/iter (+/- 6) = 19830 MB/s +test misc::hard_1MB ... bench: 57 ns/iter (+/- 1) = 18396543 MB/s +test misc::hard_32 ... bench: 53 ns/iter (+/- 4) = 1113 MB/s +test misc::hard_32K ... bench: 53 ns/iter (+/- 6) = 618773 MB/s +test misc::literal ... bench: 13 ns/iter (+/- 1) = 3923 MB/s +test misc::long_needle1 ... bench: 1,203 ns/iter (+/- 55) = 83126 MB/s +test misc::long_needle2 ... bench: 149,418 ns/iter (+/- 13,825) = 669 MB/s +test misc::match_class ... bench: 62 ns/iter (+/- 6) = 1306 MB/s +test misc::match_class_in_range ... bench: 23 ns/iter (+/- 2) = 3521 MB/s +test misc::match_class_unicode ... bench: 268 ns/iter (+/- 30) = 600 MB/s +test misc::medium_1K ... bench: 16 ns/iter (+/- 0) = 65750 MB/s +test misc::medium_1MB ... bench: 20 ns/iter (+/- 15) = 52430200 MB/s +test misc::medium_32 ... bench: 16 ns/iter (+/- 2) = 3750 MB/s +test misc::medium_32K ... bench: 16 ns/iter (+/- 1) = 2049750 MB/s +test misc::no_exponential ... bench: 353 ns/iter (+/- 26) = 283 MB/s +test misc::not_literal ... bench: 97 ns/iter (+/- 9) = 525 MB/s +test misc::one_pass_long_prefix ... bench: 58 ns/iter (+/- 5) = 448 MB/s +test misc::one_pass_long_prefix_not ... bench: 60 ns/iter (+/- 6) = 433 MB/s +test misc::one_pass_short ... bench: 43 ns/iter (+/- 4) = 395 MB/s +test misc::one_pass_short_not ... bench: 46 ns/iter (+/- 2) = 369 MB/s +test misc::reallyhard2_1K ... bench: 62 ns/iter (+/- 5) = 16774 MB/s +test misc::reallyhard_1K ... bench: 1,650 ns/iter (+/- 176) = 636 MB/s +test misc::reallyhard_1MB ... bench: 1,635,447 ns/iter (+/- 97,611) = 641 MB/s +test misc::reallyhard_32 ... bench: 109 ns/iter (+/- 9) = 541 MB/s +test misc::reallyhard_32K ... bench: 50,991 ns/iter (+/- 4,031) = 643 MB/s +test misc::replace_all ... bench: 155 ns/iter (+/- 8) +test misc::reverse_suffix_no_quadratic ... bench: 4,254 ns/iter (+/- 489) = 1880 MB/s +test misc::short_haystack_1000000x ... bench: 91,124 ns/iter (+/- 4,584) = 87792 MB/s +test misc::short_haystack_100000x ... bench: 10,681 ns/iter (+/- 420) = 74900 MB/s +test misc::short_haystack_10000x ... bench: 3,240 ns/iter (+/- 395) = 24694 MB/s +test misc::short_haystack_1000x ... bench: 403 ns/iter (+/- 48) = 19878 MB/s +test misc::short_haystack_100x ... bench: 303 ns/iter (+/- 27) = 2676 MB/s +test misc::short_haystack_10x ... bench: 272 ns/iter (+/- 27) = 334 MB/s +test misc::short_haystack_1x ... bench: 264 ns/iter (+/- 32) = 71 MB/s +test misc::short_haystack_2x ... bench: 269 ns/iter (+/- 25) = 100 MB/s +test misc::short_haystack_3x ... bench: 264 ns/iter (+/- 26) = 132 MB/s +test misc::short_haystack_4x ... bench: 271 ns/iter (+/- 28) = 158 MB/s +test regexdna::find_new_lines ... bench: 13,700,405 ns/iter (+/- 647,840) = 371 MB/s +test regexdna::subst1 ... bench: 806,342 ns/iter (+/- 48,014) = 6304 MB/s +test regexdna::subst10 ... bench: 794,403 ns/iter (+/- 40,393) = 6399 MB/s +test regexdna::subst11 ... bench: 801,963 ns/iter (+/- 46,164) = 6338 MB/s +test regexdna::subst2 ... bench: 779,768 ns/iter (+/- 81,505) = 6519 MB/s +test regexdna::subst3 ... bench: 777,024 ns/iter (+/- 52,795) = 6542 MB/s +test regexdna::subst4 ... bench: 769,862 ns/iter (+/- 48,980) = 6603 MB/s +test regexdna::subst5 ... bench: 779,754 ns/iter (+/- 39,784) = 6519 MB/s +test regexdna::subst6 ... bench: 769,400 ns/iter (+/- 69,980) = 6606 MB/s +test regexdna::subst7 ... bench: 771,457 ns/iter (+/- 40,490) = 6589 MB/s +test regexdna::subst8 ... bench: 808,468 ns/iter (+/- 53,093) = 6287 MB/s +test regexdna::subst9 ... bench: 771,869 ns/iter (+/- 50,966) = 6585 MB/s +test regexdna::variant1 ... bench: 3,093,422 ns/iter (+/- 222,818) = 1643 MB/s +test regexdna::variant2 ... bench: 6,520,178 ns/iter (+/- 400,704) = 779 MB/s +test regexdna::variant3 ... bench: 7,297,818 ns/iter (+/- 319,866) = 696 MB/s +test regexdna::variant4 ... bench: 7,356,045 ns/iter (+/- 530,375) = 691 MB/s +test regexdna::variant5 ... bench: 5,977,343 ns/iter (+/- 296,375) = 850 MB/s +test regexdna::variant6 ... bench: 6,045,776 ns/iter (+/- 270,954) = 840 MB/s +test regexdna::variant7 ... bench: 5,447,060 ns/iter (+/- 223,542) = 933 MB/s +test regexdna::variant8 ... bench: 5,615,676 ns/iter (+/- 419,756) = 905 MB/s +test regexdna::variant9 ... bench: 5,457,949 ns/iter (+/- 439,821) = 931 MB/s +test sherlock::before_after_holmes ... bench: 957,660 ns/iter (+/- 96,491) = 621 MB/s +test sherlock::before_holmes ... bench: 65,680 ns/iter (+/- 3,085) = 9058 MB/s +test sherlock::everything_greedy ... bench: 2,151,577 ns/iter (+/- 70,114) = 276 MB/s +test sherlock::everything_greedy_nl ... bench: 836,942 ns/iter (+/- 81,010) = 710 MB/s +test sherlock::holmes_cochar_watson ... bench: 137,441 ns/iter (+/- 14,157) = 4328 MB/s +test sherlock::holmes_coword_watson ... bench: 514,100 ns/iter (+/- 48,210) = 1157 MB/s +test sherlock::ing_suffix ... bench: 409,126 ns/iter (+/- 23,370) = 1454 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,117,457 ns/iter (+/- 53,545) = 532 MB/s +test sherlock::letters ... bench: 23,152,671 ns/iter (+/- 1,002,203) = 25 MB/s +test sherlock::letters_lower ... bench: 22,521,833 ns/iter (+/- 1,178,375) = 26 MB/s +test sherlock::letters_upper ... bench: 1,841,871 ns/iter (+/- 108,471) = 323 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 946,126 ns/iter (+/- 53,405) = 628 MB/s +test sherlock::name_alt1 ... bench: 25,830 ns/iter (+/- 1,054) = 23032 MB/s +test sherlock::name_alt2 ... bench: 116,879 ns/iter (+/- 6,000) = 5090 MB/s +test sherlock::name_alt3 ... bench: 125,746 ns/iter (+/- 7,121) = 4731 MB/s +test sherlock::name_alt3_nocase ... bench: 1,203,114 ns/iter (+/- 72,037) = 494 MB/s +test sherlock::name_alt4 ... bench: 156,208 ns/iter (+/- 5,188) = 3808 MB/s +test sherlock::name_alt4_nocase ... bench: 222,618 ns/iter (+/- 30,017) = 2672 MB/s +test sherlock::name_alt5 ... bench: 133,440 ns/iter (+/- 14,831) = 4458 MB/s +test sherlock::name_alt5_nocase ... bench: 558,482 ns/iter (+/- 22,435) = 1065 MB/s +test sherlock::name_holmes ... bench: 30,800 ns/iter (+/- 2,933) = 19316 MB/s +test sherlock::name_holmes_nocase ... bench: 190,736 ns/iter (+/- 24,310) = 3119 MB/s +test sherlock::name_sherlock ... bench: 56,238 ns/iter (+/- 3,310) = 10578 MB/s +test sherlock::name_sherlock_holmes ... bench: 24,129 ns/iter (+/- 2,662) = 24656 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 174,649 ns/iter (+/- 13,487) = 3406 MB/s +test sherlock::name_sherlock_nocase ... bench: 157,674 ns/iter (+/- 7,888) = 3773 MB/s +test sherlock::name_whitespace ... bench: 74,637 ns/iter (+/- 6,523) = 7971 MB/s +test sherlock::no_match_common ... bench: 15,140 ns/iter (+/- 969) = 39295 MB/s +test sherlock::no_match_really_common ... bench: 305,112 ns/iter (+/- 31,314) = 1949 MB/s +test sherlock::no_match_uncommon ... bench: 15,539 ns/iter (+/- 1,269) = 38286 MB/s +test sherlock::quotes ... bench: 482,180 ns/iter (+/- 33,736) = 1233 MB/s +test sherlock::repeated_class_negation ... bench: 78,428,426 ns/iter (+/- 6,705,217) = 7 MB/s +test sherlock::the_lower ... bench: 576,511 ns/iter (+/- 21,735) = 1031 MB/s +test sherlock::the_nocase ... bench: 413,565 ns/iter (+/- 42,941) = 1438 MB/s +test sherlock::the_upper ... bench: 34,491 ns/iter (+/- 1,901) = 17248 MB/s +test sherlock::the_whitespace ... bench: 1,061,365 ns/iter (+/- 66,639) = 560 MB/s +test sherlock::word_ending_n ... bench: 1,763,795 ns/iter (+/- 83,031) = 337 MB/s +test sherlock::words ... bench: 9,281,896 ns/iter (+/- 934,308) = 64 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 108 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/boost b/vendor/regex/record/old-bench-log/07/boost new file mode 100644 index 0000000..5a13a10 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/boost @@ -0,0 +1,97 @@ + +running 92 tests +test misc::anchored_literal_long_match ... bench: 174 ns/iter (+/- 0) = 2241 MB/s +test misc::anchored_literal_long_non_match ... bench: 329 ns/iter (+/- 3) = 1185 MB/s +test misc::anchored_literal_short_match ... bench: 168 ns/iter (+/- 0) = 154 MB/s +test misc::anchored_literal_short_non_match ... bench: 121 ns/iter (+/- 0) = 214 MB/s +test misc::easy0_1K ... bench: 660 ns/iter (+/- 3) = 1592 MB/s +test misc::easy0_1MB ... bench: 514,707 ns/iter (+/- 2,689) = 2037 MB/s +test misc::easy0_32 ... bench: 170 ns/iter (+/- 2) = 347 MB/s +test misc::easy0_32K ... bench: 16,208 ns/iter (+/- 99) = 2023 MB/s +test misc::easy1_1K ... bench: 756 ns/iter (+/- 1) = 1380 MB/s +test misc::easy1_1MB ... bench: 514,816 ns/iter (+/- 2,832) = 2036 MB/s +test misc::easy1_32 ... bench: 271 ns/iter (+/- 3) = 191 MB/s +test misc::easy1_32K ... bench: 16,316 ns/iter (+/- 93) = 2009 MB/s +test misc::hard_1K ... bench: 63,089 ns/iter (+/- 594) = 16 MB/s +test misc::hard_1MB ... bench: 66,537,328 ns/iter (+/- 866,695) = 15 MB/s +test misc::hard_32 ... bench: 2,125 ns/iter (+/- 8) = 27 MB/s +test misc::hard_32K ... bench: 2,075,568 ns/iter (+/- 6,634) = 15 MB/s +test misc::literal ... bench: 143 ns/iter (+/- 1) = 356 MB/s +test misc::long_needle1 ... bench: 6,557,839 ns/iter (+/- 27,779) = 15 MB/s +test misc::long_needle2 ... bench: 6,557,332 ns/iter (+/- 101,494) = 15 MB/s +test misc::match_class ... bench: 157 ns/iter (+/- 0) = 515 MB/s +test misc::match_class_in_range ... bench: 157 ns/iter (+/- 4) = 515 MB/s +test misc::medium_1K ... bench: 665 ns/iter (+/- 2) = 1581 MB/s +test misc::medium_1MB ... bench: 514,869 ns/iter (+/- 5,832) = 2036 MB/s +test misc::medium_32 ... bench: 167 ns/iter (+/- 1) = 359 MB/s +test misc::medium_32K ... bench: 16,253 ns/iter (+/- 74) = 2017 MB/s +test misc::no_exponential ... bench: 1,717 ns/iter (+/- 13) = 58 MB/s +test misc::not_literal ... bench: 1,084 ns/iter (+/- 16) = 47 MB/s +test misc::one_pass_long_prefix ... bench: 169 ns/iter (+/- 2) = 153 MB/s +test misc::one_pass_long_prefix_not ... bench: 169 ns/iter (+/- 6) = 153 MB/s +test misc::one_pass_short ... bench: 1,105 ns/iter (+/- 2) = 15 MB/s +test misc::one_pass_short_not ... bench: 1,076 ns/iter (+/- 10) = 15 MB/s +test misc::reallyhard2_1K ... bench: 4,935 ns/iter (+/- 39) = 210 MB/s +test misc::reallyhard_1K ... bench: 63,076 ns/iter (+/- 226) = 16 MB/s +test misc::reallyhard_1MB ... bench: 68,534,102 ns/iter (+/- 125,043) = 15 MB/s +test misc::reallyhard_32 ... bench: 2,134 ns/iter (+/- 8) = 27 MB/s +test misc::reallyhard_32K ... bench: 2,074,582 ns/iter (+/- 5,943) = 15 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,001 ns/iter (+/- 5) = 1999 MB/s +test regexdna::find_new_lines ... bench: 12,942,765 ns/iter (+/- 21,828) = 392 MB/s +test regexdna::subst1 ... bench: 6,241,036 ns/iter (+/- 13,806) = 814 MB/s +test regexdna::subst10 ... bench: 6,247,896 ns/iter (+/- 28,406) = 813 MB/s +test regexdna::subst11 ... bench: 6,240,960 ns/iter (+/- 20,660) = 814 MB/s +test regexdna::subst2 ... bench: 6,245,156 ns/iter (+/- 17,639) = 813 MB/s +test regexdna::subst3 ... bench: 6,276,881 ns/iter (+/- 14,851) = 809 MB/s +test regexdna::subst4 ... bench: 6,249,549 ns/iter (+/- 30,600) = 813 MB/s +test regexdna::subst5 ... bench: 6,251,942 ns/iter (+/- 33,889) = 813 MB/s +test regexdna::subst6 ... bench: 6,244,011 ns/iter (+/- 11,642) = 814 MB/s +test regexdna::subst7 ... bench: 6,283,445 ns/iter (+/- 11,693) = 809 MB/s +test regexdna::subst8 ... bench: 6,247,310 ns/iter (+/- 11,590) = 813 MB/s +test regexdna::subst9 ... bench: 6,249,184 ns/iter (+/- 8,159) = 813 MB/s +test regexdna::variant1 ... bench: 73,947,890 ns/iter (+/- 930,039) = 68 MB/s +test regexdna::variant2 ... bench: 108,486,922 ns/iter (+/- 181,287) = 46 MB/s +test regexdna::variant3 ... bench: 93,241,161 ns/iter (+/- 143,224) = 54 MB/s +test regexdna::variant4 ... bench: 75,615,061 ns/iter (+/- 107,918) = 67 MB/s +test regexdna::variant5 ... bench: 74,484,623 ns/iter (+/- 121,807) = 68 MB/s +test regexdna::variant6 ... bench: 74,594,078 ns/iter (+/- 121,252) = 68 MB/s +test regexdna::variant7 ... bench: 77,064,066 ns/iter (+/- 123,262) = 65 MB/s +test regexdna::variant8 ... bench: 87,267,656 ns/iter (+/- 128,639) = 58 MB/s +test regexdna::variant9 ... bench: 98,197,000 ns/iter (+/- 149,379) = 51 MB/s +test sherlock::before_after_holmes ... bench: 7,100,744 ns/iter (+/- 29,939) = 83 MB/s +test sherlock::before_holmes ... bench: 7,120,564 ns/iter (+/- 32,659) = 83 MB/s +test sherlock::everything_greedy ... bench: 3,777,458 ns/iter (+/- 8,802) = 157 MB/s +test sherlock::everything_greedy_nl ... bench: 282 ns/iter (+/- 2) = 2109691 MB/s +test sherlock::holmes_cochar_watson ... bench: 389,335 ns/iter (+/- 1,472) = 1528 MB/s +test sherlock::ing_suffix ... bench: 6,256,416 ns/iter (+/- 8,735) = 95 MB/s +test sherlock::ing_suffix_limited_space ... bench: 7,572,167 ns/iter (+/- 15,521) = 78 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 343,402 ns/iter (+/- 3,693) = 1732 MB/s +test sherlock::name_alt1 ... bench: 347,605 ns/iter (+/- 916) = 1711 MB/s +test sherlock::name_alt2 ... bench: 420,500 ns/iter (+/- 2,846) = 1414 MB/s +test sherlock::name_alt3 ... bench: 762,684 ns/iter (+/- 3,834) = 780 MB/s +test sherlock::name_alt3_nocase ... bench: 9,980,804 ns/iter (+/- 49,424) = 59 MB/s +test sherlock::name_alt4 ... bench: 431,744 ns/iter (+/- 682) = 1377 MB/s +test sherlock::name_alt4_nocase ... bench: 3,464,135 ns/iter (+/- 11,476) = 171 MB/s +test sherlock::name_alt5 ... bench: 472,923 ns/iter (+/- 846) = 1257 MB/s +test sherlock::name_alt5_nocase ... bench: 4,338,924 ns/iter (+/- 31,118) = 137 MB/s +test sherlock::name_holmes ... bench: 378,950 ns/iter (+/- 865) = 1569 MB/s +test sherlock::name_holmes_nocase ... bench: 1,952,035 ns/iter (+/- 8,233) = 304 MB/s +test sherlock::name_sherlock ... bench: 324,845 ns/iter (+/- 8,376) = 1831 MB/s +test sherlock::name_sherlock_holmes ... bench: 324,647 ns/iter (+/- 2,901) = 1832 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,870,400 ns/iter (+/- 10,609) = 318 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,814,172 ns/iter (+/- 16,455) = 327 MB/s +test sherlock::name_whitespace ... bench: 326,252 ns/iter (+/- 1,557) = 1823 MB/s +test sherlock::no_match_common ... bench: 1,154,445 ns/iter (+/- 8,544) = 515 MB/s +test sherlock::no_match_really_common ... bench: 1,154,314 ns/iter (+/- 5,467) = 515 MB/s +test sherlock::no_match_uncommon ... bench: 295,301 ns/iter (+/- 906) = 2014 MB/s +test sherlock::quotes ... bench: 863,138 ns/iter (+/- 3,072) = 689 MB/s +test sherlock::repeated_class_negation ... bench: 13,594,294 ns/iter (+/- 40,354) = 43 MB/s +test sherlock::the_lower ... bench: 2,171,731 ns/iter (+/- 7,148) = 273 MB/s +test sherlock::the_nocase ... bench: 3,556,278 ns/iter (+/- 7,269) = 167 MB/s +test sherlock::the_upper ... bench: 404,851 ns/iter (+/- 865) = 1469 MB/s +test sherlock::the_whitespace ... bench: 2,139,597 ns/iter (+/- 7,427) = 278 MB/s +test sherlock::word_ending_n ... bench: 7,824,965 ns/iter (+/- 30,691) = 76 MB/s +test sherlock::words ... bench: 18,386,285 ns/iter (+/- 34,161) = 32 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 92 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/dphobos-dmd b/vendor/regex/record/old-bench-log/07/dphobos-dmd new file mode 100644 index 0000000..835a096 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/dphobos-dmd @@ -0,0 +1,100 @@ + +running 95 tests +test misc::anchored_literal_long_match ... bench: 365 ns/iter (+/- 2) = 1068 MB/s +test misc::anchored_literal_long_non_match ... bench: 300 ns/iter (+/- 0) = 1300 MB/s +test misc::anchored_literal_short_match ... bench: 364 ns/iter (+/- 2) = 71 MB/s +test misc::anchored_literal_short_non_match ... bench: 306 ns/iter (+/- 9) = 84 MB/s +test misc::easy0_1K ... bench: 768 ns/iter (+/- 5) = 1368 MB/s +test misc::easy0_1MB ... bench: 17,062 ns/iter (+/- 252) = 61458 MB/s +test misc::easy0_32 ... bench: 759 ns/iter (+/- 7) = 77 MB/s +test misc::easy0_32K ... bench: 1,095 ns/iter (+/- 20) = 29949 MB/s +test misc::easy1_1K ... bench: 723 ns/iter (+/- 1) = 1443 MB/s +test misc::easy1_1MB ... bench: 17,021 ns/iter (+/- 229) = 61606 MB/s +test misc::easy1_32 ... bench: 714 ns/iter (+/- 1) = 72 MB/s +test misc::easy1_32K ... bench: 1,052 ns/iter (+/- 12) = 31167 MB/s +test misc::hard_1K ... bench: 17,044 ns/iter (+/- 176) = 61 MB/s +test misc::hard_1MB ... bench: 17,965,420 ns/iter (+/- 72,226) = 58 MB/s +test misc::hard_32 ... bench: 2,171 ns/iter (+/- 2) = 27 MB/s +test misc::hard_32K ... bench: 561,207 ns/iter (+/- 5,654) = 58 MB/s +test misc::literal ... bench: 240 ns/iter (+/- 0) = 212 MB/s +test misc::long_needle1 ... bench: 76,640 ns/iter (+/- 1,043) = 1304 MB/s +test misc::long_needle2 ... bench: 76,747 ns/iter (+/- 3,299) = 1302 MB/s +test misc::match_class ... bench: 344 ns/iter (+/- 1) = 235 MB/s +test misc::match_class_in_range ... bench: 306 ns/iter (+/- 9) = 264 MB/s +test misc::match_class_unicode ... bench: 1,435 ns/iter (+/- 9) = 112 MB/s +test misc::medium_1K ... bench: 1,480 ns/iter (+/- 16) = 710 MB/s +test misc::medium_1MB ... bench: 564,378 ns/iter (+/- 18,695) = 1857 MB/s +test misc::medium_32 ... bench: 941 ns/iter (+/- 32) = 63 MB/s +test misc::medium_32K ... bench: 18,465 ns/iter (+/- 116) = 1776 MB/s +test misc::no_exponential ... bench: 367,476 ns/iter (+/- 15,176) +test misc::not_literal ... bench: 1,165 ns/iter (+/- 9) = 43 MB/s +test misc::one_pass_long_prefix ... bench: 596 ns/iter (+/- 2) = 43 MB/s +test misc::one_pass_long_prefix_not ... bench: 602 ns/iter (+/- 6) = 43 MB/s +test misc::one_pass_short ... bench: 1,068 ns/iter (+/- 3) = 15 MB/s +test misc::one_pass_short_not ... bench: 1,434 ns/iter (+/- 11) = 11 MB/s +test misc::reallyhard2_1K ... bench: 36,539 ns/iter (+/- 281) = 28 MB/s +test misc::reallyhard_1K ... bench: 17,086 ns/iter (+/- 94) = 61 MB/s +test misc::reallyhard_1MB ... bench: 17,973,007 ns/iter (+/- 64,010) = 58 MB/s +test misc::reallyhard_32 ... bench: 2,200 ns/iter (+/- 16) = 26 MB/s +test misc::reallyhard_32K ... bench: 561,371 ns/iter (+/- 8,688) = 58 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,606 ns/iter (+/- 16) = 1736 MB/s +test regexdna::find_new_lines ... bench: 31,579,756 ns/iter (+/- 121,047) = 160 MB/s +test regexdna::subst1 ... bench: 7,930,333 ns/iter (+/- 27,118) = 641 MB/s +test regexdna::subst10 ... bench: 7,942,534 ns/iter (+/- 36,470) = 640 MB/s +test regexdna::subst11 ... bench: 7,934,646 ns/iter (+/- 42,013) = 640 MB/s +test regexdna::subst2 ... bench: 7,947,802 ns/iter (+/- 53,427) = 639 MB/s +test regexdna::subst3 ... bench: 7,941,691 ns/iter (+/- 122,303) = 640 MB/s +test regexdna::subst4 ... bench: 7,928,228 ns/iter (+/- 30,493) = 641 MB/s +test regexdna::subst5 ... bench: 7,936,901 ns/iter (+/- 37,894) = 640 MB/s +test regexdna::subst6 ... bench: 7,936,211 ns/iter (+/- 46,269) = 640 MB/s +test regexdna::subst7 ... bench: 7,946,477 ns/iter (+/- 62,660) = 639 MB/s +test regexdna::subst8 ... bench: 7,930,830 ns/iter (+/- 31,234) = 640 MB/s +test regexdna::subst9 ... bench: 7,937,951 ns/iter (+/- 36,425) = 640 MB/s +test regexdna::variant1 ... bench: 5,104,224 ns/iter (+/- 26,612) = 995 MB/s +test regexdna::variant2 ... bench: 6,847,162 ns/iter (+/- 31,233) = 742 MB/s +test regexdna::variant3 ... bench: 11,153,739 ns/iter (+/- 114,193) = 455 MB/s +test regexdna::variant4 ... bench: 9,665,797 ns/iter (+/- 47,148) = 525 MB/s +test regexdna::variant5 ... bench: 9,645,193 ns/iter (+/- 35,250) = 527 MB/s +test regexdna::variant6 ... bench: 7,280,069 ns/iter (+/- 21,171) = 698 MB/s +test regexdna::variant7 ... bench: 7,841,177 ns/iter (+/- 20,797) = 648 MB/s +test regexdna::variant8 ... bench: 9,783,978 ns/iter (+/- 35,231) = 519 MB/s +test regexdna::variant9 ... bench: 19,157,329 ns/iter (+/- 445,911) = 265 MB/s +test sherlock::before_after_holmes ... bench: 20,995,307 ns/iter (+/- 258,419) = 28 MB/s +test sherlock::before_holmes ... bench: 20,899,416 ns/iter (+/- 122,256) = 28 MB/s +test sherlock::holmes_cochar_watson ... bench: 904,439 ns/iter (+/- 6,934) = 657 MB/s +test sherlock::holmes_coword_watson ... bench: 103,706,930 ns/iter (+/- 176,711) = 5 MB/s +test sherlock::ing_suffix ... bench: 14,927,612 ns/iter (+/- 90,346) = 39 MB/s +test sherlock::ing_suffix_limited_space ... bench: 19,743,662 ns/iter (+/- 78,506) = 30 MB/s +test sherlock::letters ... bench: 112,708,213 ns/iter (+/- 251,690) = 5 MB/s +test sherlock::letters_lower ... bench: 111,058,829 ns/iter (+/- 192,793) = 5 MB/s +test sherlock::letters_upper ... bench: 4,072,062 ns/iter (+/- 20,273) = 146 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 399,796 ns/iter (+/- 4,222) = 1488 MB/s +test sherlock::name_alt1 ... bench: 411,415 ns/iter (+/- 2,257) = 1446 MB/s +test sherlock::name_alt2 ... bench: 626,671 ns/iter (+/- 5,745) = 949 MB/s +test sherlock::name_alt3 ... bench: 1,086,570 ns/iter (+/- 6,105) = 547 MB/s +test sherlock::name_alt3_nocase ... bench: 1,827,028 ns/iter (+/- 12,788) = 325 MB/s +test sherlock::name_alt4 ... bench: 687,454 ns/iter (+/- 11,421) = 865 MB/s +test sherlock::name_alt4_nocase ... bench: 943,925 ns/iter (+/- 4,108) = 630 MB/s +test sherlock::name_alt5 ... bench: 734,969 ns/iter (+/- 7,215) = 809 MB/s +test sherlock::name_alt5_nocase ... bench: 895,903 ns/iter (+/- 5,647) = 664 MB/s +test sherlock::name_holmes ... bench: 199,880 ns/iter (+/- 1,654) = 2976 MB/s +test sherlock::name_holmes_nocase ... bench: 529,590 ns/iter (+/- 1,288) = 1123 MB/s +test sherlock::name_sherlock ... bench: 57,720 ns/iter (+/- 555) = 10307 MB/s +test sherlock::name_sherlock_holmes ... bench: 67,596 ns/iter (+/- 580) = 8801 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 393,903 ns/iter (+/- 2,700) = 1510 MB/s +test sherlock::name_sherlock_nocase ... bench: 373,411 ns/iter (+/- 3,264) = 1593 MB/s +test sherlock::name_whitespace ... bench: 79,175 ns/iter (+/- 1,288) = 7514 MB/s +test sherlock::no_match_common ... bench: 276,503 ns/iter (+/- 2,155) = 2151 MB/s +test sherlock::no_match_really_common ... bench: 276,535 ns/iter (+/- 416) = 2151 MB/s +test sherlock::no_match_uncommon ... bench: 10,535 ns/iter (+/- 105) = 56472 MB/s +test sherlock::quotes ... bench: 5,746,202 ns/iter (+/- 33,993) = 103 MB/s +test sherlock::repeated_class_negation ... bench: 46,124,528 ns/iter (+/- 125,861) = 12 MB/s +test sherlock::the_lower ... bench: 2,527,960 ns/iter (+/- 12,351) = 235 MB/s +test sherlock::the_nocase ... bench: 3,210,112 ns/iter (+/- 10,799) = 185 MB/s +test sherlock::the_upper ... bench: 240,272 ns/iter (+/- 3,902) = 2476 MB/s +test sherlock::the_whitespace ... bench: 3,511,711 ns/iter (+/- 17,181) = 169 MB/s +test sherlock::word_ending_n ... bench: 29,535,089 ns/iter (+/- 95,201) = 20 MB/s +test sherlock::words ... bench: 43,341,782 ns/iter (+/- 110,038) = 13 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 95 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/dphobos-dmd-ct b/vendor/regex/record/old-bench-log/07/dphobos-dmd-ct new file mode 100644 index 0000000..9dd6d02 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/dphobos-dmd-ct @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 354 ns/iter (+/- 25) = 1101 MB/s +test misc::anchored_literal_long_non_match ... bench: 314 ns/iter (+/- 3) = 1242 MB/s +test misc::anchored_literal_short_match ... bench: 331 ns/iter (+/- 0) = 78 MB/s +test misc::anchored_literal_short_non_match ... bench: 314 ns/iter (+/- 4) = 82 MB/s +test misc::easy0_1K ... bench: 430 ns/iter (+/- 2) = 2444 MB/s +test misc::easy0_1MB ... bench: 16,692 ns/iter (+/- 222) = 62820 MB/s +test misc::easy0_32 ... bench: 420 ns/iter (+/- 4) = 140 MB/s +test misc::easy0_32K ... bench: 755 ns/iter (+/- 5) = 43437 MB/s +test misc::easy1_1K ... bench: 407 ns/iter (+/- 10) = 2565 MB/s +test misc::easy1_1MB ... bench: 16,670 ns/iter (+/- 205) = 62903 MB/s +test misc::easy1_32 ... bench: 389 ns/iter (+/- 0) = 133 MB/s +test misc::easy1_32K ... bench: 732 ns/iter (+/- 6) = 44792 MB/s +test misc::hard_1K ... bench: 35,518 ns/iter (+/- 346) = 29 MB/s +test misc::hard_1MB ... bench: 31,657,473 ns/iter (+/- 512,618) = 33 MB/s +test misc::hard_32 ... bench: 1,057 ns/iter (+/- 7) = 55 MB/s +test misc::hard_32K ... bench: 950,905 ns/iter (+/- 13,239) = 34 MB/s +test misc::literal ... bench: 320 ns/iter (+/- 3) = 159 MB/s +test misc::long_needle1 ... bench: 73,954 ns/iter (+/- 331) = 1352 MB/s +test misc::long_needle2 ... bench: 73,915 ns/iter (+/- 199) = 1352 MB/s +test misc::match_class ... bench: 374 ns/iter (+/- 3) = 216 MB/s +test misc::match_class_in_range ... bench: 372 ns/iter (+/- 0) = 217 MB/s +test misc::match_class_unicode ... bench: 1,631 ns/iter (+/- 8) = 98 MB/s +test misc::medium_1K ... bench: 965 ns/iter (+/- 10) = 1090 MB/s +test misc::medium_1MB ... bench: 563,242 ns/iter (+/- 6,767) = 1861 MB/s +test misc::medium_32 ... bench: 434 ns/iter (+/- 3) = 138 MB/s +test misc::medium_32K ... bench: 17,976 ns/iter (+/- 137) = 1824 MB/s +test misc::not_literal ... bench: 1,063 ns/iter (+/- 2) = 47 MB/s +test misc::one_pass_long_prefix ... bench: 405 ns/iter (+/- 4) = 64 MB/s +test misc::one_pass_long_prefix_not ... bench: 410 ns/iter (+/- 2) = 63 MB/s +test misc::one_pass_short ... bench: 539 ns/iter (+/- 12) = 31 MB/s +test misc::one_pass_short_not ... bench: 534 ns/iter (+/- 1) = 31 MB/s +test misc::reallyhard2_1K ... bench: 75,108 ns/iter (+/- 699) = 13 MB/s +test misc::reallyhard_1K ... bench: 34,681 ns/iter (+/- 268) = 30 MB/s +test misc::reallyhard_1MB ... bench: 30,579,065 ns/iter (+/- 389,443) = 34 MB/s +test misc::reallyhard_32 ... bench: 1,025 ns/iter (+/- 22) = 57 MB/s +test misc::reallyhard_32K ... bench: 920,515 ns/iter (+/- 26,281) = 35 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,607 ns/iter (+/- 36) = 1736 MB/s +test regexdna::find_new_lines ... bench: 34,122,227 ns/iter (+/- 842,345) = 148 MB/s +test regexdna::subst1 ... bench: 9,932,271 ns/iter (+/- 86,915) = 511 MB/s +test regexdna::subst10 ... bench: 9,977,738 ns/iter (+/- 51,656) = 509 MB/s +test regexdna::subst11 ... bench: 9,945,085 ns/iter (+/- 53,175) = 511 MB/s +test regexdna::subst2 ... bench: 9,928,284 ns/iter (+/- 32,335) = 512 MB/s +test regexdna::subst3 ... bench: 9,968,901 ns/iter (+/- 41,254) = 509 MB/s +test regexdna::subst4 ... bench: 9,912,463 ns/iter (+/- 28,171) = 512 MB/s +test regexdna::subst5 ... bench: 9,948,128 ns/iter (+/- 22,949) = 510 MB/s +test regexdna::subst6 ... bench: 9,916,200 ns/iter (+/- 28,947) = 512 MB/s +test regexdna::subst7 ... bench: 9,996,277 ns/iter (+/- 37,585) = 508 MB/s +test regexdna::subst8 ... bench: 9,974,849 ns/iter (+/- 41,503) = 509 MB/s +test regexdna::subst9 ... bench: 9,961,948 ns/iter (+/- 28,254) = 510 MB/s +test regexdna::variant1 ... bench: 3,504,049 ns/iter (+/- 15,090) = 1450 MB/s +test regexdna::variant2 ... bench: 3,800,264 ns/iter (+/- 12,123) = 1337 MB/s +test regexdna::variant3 ... bench: 4,932,027 ns/iter (+/- 15,553) = 1030 MB/s +test regexdna::variant4 ... bench: 4,709,109 ns/iter (+/- 15,213) = 1079 MB/s +test regexdna::variant5 ... bench: 4,918,928 ns/iter (+/- 19,196) = 1033 MB/s +test regexdna::variant6 ... bench: 4,244,250 ns/iter (+/- 24,367) = 1197 MB/s +test regexdna::variant7 ... bench: 4,245,530 ns/iter (+/- 16,178) = 1197 MB/s +test regexdna::variant8 ... bench: 4,205,036 ns/iter (+/- 10,733) = 1208 MB/s +test regexdna::variant9 ... bench: 3,864,771 ns/iter (+/- 11,864) = 1315 MB/s +test sherlock::before_after_holmes ... bench: 22,490,817 ns/iter (+/- 571,510) = 26 MB/s +test sherlock::before_holmes ... bench: 22,603,264 ns/iter (+/- 74,703) = 26 MB/s +test sherlock::holmes_cochar_watson ... bench: 519,710 ns/iter (+/- 5,878) = 1144 MB/s +test sherlock::ing_suffix ... bench: 9,237,783 ns/iter (+/- 30,188) = 64 MB/s +test sherlock::ing_suffix_limited_space ... bench: 12,074,301 ns/iter (+/- 30,860) = 49 MB/s +test sherlock::letters ... bench: 137,678,575 ns/iter (+/- 131,761) = 4 MB/s +test sherlock::letters_lower ... bench: 135,414,657 ns/iter (+/- 134,307) = 4 MB/s +test sherlock::letters_upper ... bench: 5,004,996 ns/iter (+/- 23,224) = 118 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 341,556 ns/iter (+/- 1,500) = 1741 MB/s +test sherlock::name_alt1 ... bench: 378,291 ns/iter (+/- 1,545) = 1572 MB/s +test sherlock::name_alt2 ... bench: 528,403 ns/iter (+/- 2,273) = 1125 MB/s +test sherlock::name_alt3 ... bench: 685,634 ns/iter (+/- 17,205) = 867 MB/s +test sherlock::name_alt3_nocase ... bench: 825,069 ns/iter (+/- 10,490) = 721 MB/s +test sherlock::name_alt4 ... bench: 555,717 ns/iter (+/- 3,223) = 1070 MB/s +test sherlock::name_alt4_nocase ... bench: 649,913 ns/iter (+/- 4,543) = 915 MB/s +test sherlock::name_alt5 ... bench: 570,036 ns/iter (+/- 543) = 1043 MB/s +test sherlock::name_alt5_nocase ... bench: 580,445 ns/iter (+/- 2,100) = 1024 MB/s +test sherlock::name_holmes ... bench: 185,140 ns/iter (+/- 2,100) = 3213 MB/s +test sherlock::name_holmes_nocase ... bench: 479,902 ns/iter (+/- 5,898) = 1239 MB/s +test sherlock::name_sherlock ... bench: 51,053 ns/iter (+/- 491) = 11653 MB/s +test sherlock::name_sherlock_holmes ... bench: 50,722 ns/iter (+/- 296) = 11729 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 355,142 ns/iter (+/- 1,424) = 1675 MB/s +test sherlock::name_sherlock_nocase ... bench: 354,932 ns/iter (+/- 1,554) = 1676 MB/s +test sherlock::name_whitespace ... bench: 56,972 ns/iter (+/- 271) = 10442 MB/s +test sherlock::no_match_common ... bench: 274,260 ns/iter (+/- 3,092) = 2169 MB/s +test sherlock::no_match_really_common ... bench: 273,984 ns/iter (+/- 2,202) = 2171 MB/s +test sherlock::no_match_uncommon ... bench: 10,444 ns/iter (+/- 68) = 56964 MB/s +test sherlock::quotes ... bench: 2,755,414 ns/iter (+/- 11,488) = 215 MB/s +test sherlock::repeated_class_negation ... bench: 21,585,138 ns/iter (+/- 50,347) = 27 MB/s +test sherlock::the_lower ... bench: 2,835,360 ns/iter (+/- 10,083) = 209 MB/s +test sherlock::the_nocase ... bench: 3,060,088 ns/iter (+/- 10,321) = 194 MB/s +test sherlock::the_upper ... bench: 272,416 ns/iter (+/- 3,308) = 2183 MB/s +test sherlock::the_whitespace ... bench: 2,991,214 ns/iter (+/- 27,223) = 198 MB/s +test sherlock::word_ending_n ... bench: 30,726,303 ns/iter (+/- 83,743) = 19 MB/s +test sherlock::words ... bench: 42,256,710 ns/iter (+/- 88,302) = 14 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/oniguruma b/vendor/regex/record/old-bench-log/07/oniguruma new file mode 100644 index 0000000..b9e8e29 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/oniguruma @@ -0,0 +1,99 @@ + +running 94 tests +test misc::anchored_literal_long_match ... bench: 129 ns/iter (+/- 3) = 3023 MB/s +test misc::anchored_literal_long_non_match ... bench: 402 ns/iter (+/- 1) = 970 MB/s +test misc::anchored_literal_short_match ... bench: 130 ns/iter (+/- 1) = 200 MB/s +test misc::anchored_literal_short_non_match ... bench: 49 ns/iter (+/- 0) = 530 MB/s +test misc::easy0_1K ... bench: 281 ns/iter (+/- 3) = 3740 MB/s +test misc::easy0_1MB ... bench: 119,847 ns/iter (+/- 713) = 8749 MB/s +test misc::easy0_32 ... bench: 154 ns/iter (+/- 4) = 383 MB/s +test misc::easy0_32K ... bench: 3,985 ns/iter (+/- 24) = 8229 MB/s +test misc::easy1_1K ... bench: 3,472 ns/iter (+/- 11) = 300 MB/s +test misc::easy1_1MB ... bench: 3,385,764 ns/iter (+/- 6,630) = 309 MB/s +test misc::easy1_32 ... bench: 283 ns/iter (+/- 6) = 183 MB/s +test misc::easy1_32K ... bench: 105,977 ns/iter (+/- 319) = 309 MB/s +test misc::hard_1K ... bench: 106,973 ns/iter (+/- 1,091) = 9 MB/s +test misc::hard_1MB ... bench: 114,602,847 ns/iter (+/- 336,051) = 9 MB/s +test misc::hard_32 ... bench: 3,569 ns/iter (+/- 3) = 16 MB/s +test misc::hard_32K ... bench: 3,570,108 ns/iter (+/- 17,057) = 9 MB/s +test misc::literal ... bench: 287 ns/iter (+/- 1) = 177 MB/s +test misc::long_needle1 ... bench: 5,430,190 ns/iter (+/- 271,737) = 18 MB/s +test misc::long_needle2 ... bench: 5,651,748 ns/iter (+/- 260,960) = 17 MB/s +test misc::match_class ... bench: 369 ns/iter (+/- 0) = 219 MB/s +test misc::match_class_in_range ... bench: 370 ns/iter (+/- 8) = 218 MB/s +test misc::match_class_unicode ... bench: 1,600 ns/iter (+/- 24) = 100 MB/s +test misc::medium_1K ... bench: 295 ns/iter (+/- 4) = 3566 MB/s +test misc::medium_1MB ... bench: 119,845 ns/iter (+/- 707) = 8749 MB/s +test misc::medium_32 ... bench: 166 ns/iter (+/- 0) = 361 MB/s +test misc::medium_32K ... bench: 3,995 ns/iter (+/- 30) = 8209 MB/s +test misc::not_literal ... bench: 365 ns/iter (+/- 1) = 139 MB/s +test misc::one_pass_long_prefix ... bench: 155 ns/iter (+/- 0) = 167 MB/s +test misc::one_pass_long_prefix_not ... bench: 162 ns/iter (+/- 0) = 160 MB/s +test misc::one_pass_short ... bench: 279 ns/iter (+/- 0) = 60 MB/s +test misc::one_pass_short_not ... bench: 269 ns/iter (+/- 3) = 63 MB/s +test misc::reallyhard2_1K ... bench: 227,630 ns/iter (+/- 963) = 4 MB/s +test misc::reallyhard_1K ... bench: 106,964 ns/iter (+/- 1,199) = 9 MB/s +test misc::reallyhard_1MB ... bench: 114,622,989 ns/iter (+/- 206,430) = 9 MB/s +test misc::reallyhard_32 ... bench: 3,477 ns/iter (+/- 13) = 16 MB/s +test misc::reallyhard_32K ... bench: 3,580,927 ns/iter (+/- 15,784) = 9 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 23,518 ns/iter (+/- 105) = 340 MB/s +test regexdna::find_new_lines ... bench: 33,300,039 ns/iter (+/- 827,837) = 152 MB/s +test regexdna::subst1 ... bench: 22,829,688 ns/iter (+/- 81,653) = 222 MB/s +test regexdna::subst10 ... bench: 22,843,694 ns/iter (+/- 94,299) = 222 MB/s +test regexdna::subst11 ... bench: 22,827,872 ns/iter (+/- 84,129) = 222 MB/s +test regexdna::subst2 ... bench: 22,841,925 ns/iter (+/- 84,394) = 222 MB/s +test regexdna::subst3 ... bench: 22,885,409 ns/iter (+/- 114,277) = 222 MB/s +test regexdna::subst4 ... bench: 22,837,475 ns/iter (+/- 58,938) = 222 MB/s +test regexdna::subst5 ... bench: 22,835,207 ns/iter (+/- 39,862) = 222 MB/s +test regexdna::subst6 ... bench: 22,833,199 ns/iter (+/- 77,142) = 222 MB/s +test regexdna::subst7 ... bench: 22,851,757 ns/iter (+/- 322,186) = 222 MB/s +test regexdna::subst8 ... bench: 22,842,892 ns/iter (+/- 86,166) = 222 MB/s +test regexdna::subst9 ... bench: 22,840,862 ns/iter (+/- 105,926) = 222 MB/s +test regexdna::variant1 ... bench: 91,691,325 ns/iter (+/- 194,247) = 55 MB/s +test regexdna::variant2 ... bench: 105,586,659 ns/iter (+/- 320,354) = 48 MB/s +test regexdna::variant3 ... bench: 94,437,485 ns/iter (+/- 277,744) = 53 MB/s +test regexdna::variant4 ... bench: 90,399,600 ns/iter (+/- 184,588) = 56 MB/s +test regexdna::variant5 ... bench: 90,332,232 ns/iter (+/- 174,254) = 56 MB/s +test regexdna::variant6 ... bench: 90,519,504 ns/iter (+/- 227,643) = 56 MB/s +test regexdna::variant7 ... bench: 90,881,562 ns/iter (+/- 221,861) = 55 MB/s +test regexdna::variant8 ... bench: 96,962,980 ns/iter (+/- 180,002) = 52 MB/s +test regexdna::variant9 ... bench: 109,558,711 ns/iter (+/- 166,337) = 46 MB/s +test sherlock::before_after_holmes ... bench: 31,530,493 ns/iter (+/- 112,639) = 18 MB/s +test sherlock::before_holmes ... bench: 30,420,729 ns/iter (+/- 114,072) = 19 MB/s +test sherlock::everything_greedy ... bench: 6,656,677 ns/iter (+/- 167,110) = 89 MB/s +test sherlock::holmes_cochar_watson ... bench: 1,992,839 ns/iter (+/- 8,037) = 298 MB/s +test sherlock::ing_suffix ... bench: 15,878,331 ns/iter (+/- 150,901) = 37 MB/s +test sherlock::ing_suffix_limited_space ... bench: 2,085,471 ns/iter (+/- 8,438) = 285 MB/s +test sherlock::letters ... bench: 89,091,241 ns/iter (+/- 182,225) = 6 MB/s +test sherlock::letters_lower ... bench: 55,634,237 ns/iter (+/- 115,097) = 10 MB/s +test sherlock::letters_upper ... bench: 10,126,641 ns/iter (+/- 36,015) = 58 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 191,963 ns/iter (+/- 687) = 3099 MB/s +test sherlock::name_alt1 ... bench: 1,983,070 ns/iter (+/- 5,863) = 300 MB/s +test sherlock::name_alt2 ... bench: 1,972,746 ns/iter (+/- 14,082) = 301 MB/s +test sherlock::name_alt3 ... bench: 2,424,033 ns/iter (+/- 13,209) = 245 MB/s +test sherlock::name_alt3_nocase ... bench: 16,876,942 ns/iter (+/- 77,218) = 35 MB/s +test sherlock::name_alt4 ... bench: 1,986,579 ns/iter (+/- 9,195) = 299 MB/s +test sherlock::name_alt4_nocase ... bench: 4,992,277 ns/iter (+/- 10,882) = 119 MB/s +test sherlock::name_alt5 ... bench: 2,033,937 ns/iter (+/- 13,627) = 292 MB/s +test sherlock::name_alt5_nocase ... bench: 6,292,627 ns/iter (+/- 14,666) = 94 MB/s +test sherlock::name_holmes ... bench: 450,290 ns/iter (+/- 1,882) = 1321 MB/s +test sherlock::name_holmes_nocase ... bench: 3,032,489 ns/iter (+/- 8,728) = 196 MB/s +test sherlock::name_sherlock ... bench: 265,379 ns/iter (+/- 865) = 2241 MB/s +test sherlock::name_sherlock_holmes ... bench: 201,375 ns/iter (+/- 2,146) = 2954 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 3,010,059 ns/iter (+/- 7,093) = 197 MB/s +test sherlock::name_sherlock_nocase ... bench: 3,016,713 ns/iter (+/- 11,280) = 197 MB/s +test sherlock::name_whitespace ... bench: 266,706 ns/iter (+/- 908) = 2230 MB/s +test sherlock::no_match_common ... bench: 544,428 ns/iter (+/- 7,562) = 1092 MB/s +test sherlock::no_match_really_common ... bench: 626,986 ns/iter (+/- 2,959) = 948 MB/s +test sherlock::no_match_uncommon ... bench: 534,517 ns/iter (+/- 4,342) = 1113 MB/s +test sherlock::quotes ... bench: 3,210,614 ns/iter (+/- 15,699) = 185 MB/s +test sherlock::repeated_class_negation ... bench: 31,147,103 ns/iter (+/- 117,471) = 19 MB/s +test sherlock::the_lower ... bench: 2,275,468 ns/iter (+/- 19,220) = 261 MB/s +test sherlock::the_nocase ... bench: 4,999,086 ns/iter (+/- 20,184) = 119 MB/s +test sherlock::the_upper ... bench: 893,288 ns/iter (+/- 11,368) = 666 MB/s +test sherlock::the_whitespace ... bench: 2,364,893 ns/iter (+/- 21,124) = 251 MB/s +test sherlock::word_ending_n ... bench: 18,221,921 ns/iter (+/- 62,927) = 32 MB/s +test sherlock::words ... bench: 27,552,543 ns/iter (+/- 89,437) = 21 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 94 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/pcre1 b/vendor/regex/record/old-bench-log/07/pcre1 new file mode 100644 index 0000000..a28d3cb --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/pcre1 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 23 ns/iter (+/- 0) = 16956 MB/s +test misc::anchored_literal_long_non_match ... bench: 19 ns/iter (+/- 0) = 20526 MB/s +test misc::anchored_literal_short_match ... bench: 23 ns/iter (+/- 0) = 1130 MB/s +test misc::anchored_literal_short_non_match ... bench: 19 ns/iter (+/- 0) = 1368 MB/s +test misc::easy0_1K ... bench: 223 ns/iter (+/- 2) = 4713 MB/s +test misc::easy0_1MB ... bench: 178,098 ns/iter (+/- 3,124) = 5887 MB/s +test misc::easy0_32 ... bench: 39 ns/iter (+/- 0) = 1512 MB/s +test misc::easy0_32K ... bench: 5,600 ns/iter (+/- 27) = 5856 MB/s +test misc::easy1_1K ... bench: 210 ns/iter (+/- 7) = 4971 MB/s +test misc::easy1_1MB ... bench: 178,177 ns/iter (+/- 1,024) = 5885 MB/s +test misc::easy1_32 ... bench: 40 ns/iter (+/- 0) = 1300 MB/s +test misc::easy1_32K ... bench: 5,592 ns/iter (+/- 52) = 5863 MB/s +test misc::hard_1K ... bench: 1,223 ns/iter (+/- 14) = 859 MB/s +test misc::hard_1MB ... bench: 983,169 ns/iter (+/- 13,398) = 1066 MB/s +test misc::hard_32 ... bench: 99 ns/iter (+/- 0) = 595 MB/s +test misc::hard_32K ... bench: 31,422 ns/iter (+/- 326) = 1043 MB/s +test misc::literal ... bench: 23 ns/iter (+/- 0) = 2217 MB/s +test misc::long_needle1 ... bench: 464,932 ns/iter (+/- 1,869) = 215 MB/s +test misc::long_needle2 ... bench: 462,587 ns/iter (+/- 6,375) = 216 MB/s +test misc::match_class ... bench: 73 ns/iter (+/- 0) = 1109 MB/s +test misc::match_class_in_range ... bench: 25 ns/iter (+/- 0) = 3240 MB/s +test misc::match_class_unicode ... bench: 263 ns/iter (+/- 2) = 612 MB/s +test misc::medium_1K ... bench: 213 ns/iter (+/- 3) = 4938 MB/s +test misc::medium_1MB ... bench: 178,077 ns/iter (+/- 1,844) = 5888 MB/s +test misc::medium_32 ... bench: 48 ns/iter (+/- 0) = 1250 MB/s +test misc::medium_32K ... bench: 5,598 ns/iter (+/- 38) = 5858 MB/s +test misc::not_literal ... bench: 131 ns/iter (+/- 0) = 389 MB/s +test misc::one_pass_long_prefix ... bench: 22 ns/iter (+/- 0) = 1181 MB/s +test misc::one_pass_long_prefix_not ... bench: 22 ns/iter (+/- 0) = 1181 MB/s +test misc::one_pass_short ... bench: 44 ns/iter (+/- 0) = 386 MB/s +test misc::one_pass_short_not ... bench: 44 ns/iter (+/- 0) = 386 MB/s +test misc::reallyhard2_1K ... bench: 3,503 ns/iter (+/- 29) = 296 MB/s +test misc::reallyhard_1K ... bench: 1,276 ns/iter (+/- 14) = 823 MB/s +test misc::reallyhard_1MB ... bench: 1,003,152 ns/iter (+/- 10,884) = 1045 MB/s +test misc::reallyhard_32 ... bench: 102 ns/iter (+/- 7) = 578 MB/s +test misc::reallyhard_32K ... bench: 31,035 ns/iter (+/- 221) = 1056 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 3,924 ns/iter (+/- 23) = 2038 MB/s +test regexdna::find_new_lines ... bench: 2,398,578 ns/iter (+/- 28,663) = 2119 MB/s +test regexdna::subst1 ... bench: 1,073,632 ns/iter (+/- 7,567) = 4734 MB/s +test regexdna::subst10 ... bench: 1,068,696 ns/iter (+/- 14,896) = 4756 MB/s +test regexdna::subst11 ... bench: 1,071,991 ns/iter (+/- 21,623) = 4742 MB/s +test regexdna::subst2 ... bench: 1,064,244 ns/iter (+/- 22,701) = 4776 MB/s +test regexdna::subst3 ... bench: 1,081,402 ns/iter (+/- 25,919) = 4700 MB/s +test regexdna::subst4 ... bench: 1,078,319 ns/iter (+/- 8,278) = 4714 MB/s +test regexdna::subst5 ... bench: 1,067,600 ns/iter (+/- 6,079) = 4761 MB/s +test regexdna::subst6 ... bench: 1,073,509 ns/iter (+/- 8,068) = 4735 MB/s +test regexdna::subst7 ... bench: 1,075,462 ns/iter (+/- 9,145) = 4726 MB/s +test regexdna::subst8 ... bench: 1,073,592 ns/iter (+/- 10,284) = 4734 MB/s +test regexdna::subst9 ... bench: 1,074,960 ns/iter (+/- 11,802) = 4728 MB/s +test regexdna::variant1 ... bench: 14,120,901 ns/iter (+/- 85,462) = 359 MB/s +test regexdna::variant2 ... bench: 15,606,152 ns/iter (+/- 128,452) = 325 MB/s +test regexdna::variant3 ... bench: 18,892,502 ns/iter (+/- 82,790) = 269 MB/s +test regexdna::variant4 ... bench: 17,988,621 ns/iter (+/- 50,462) = 282 MB/s +test regexdna::variant5 ... bench: 15,854,890 ns/iter (+/- 54,745) = 320 MB/s +test regexdna::variant6 ... bench: 16,126,069 ns/iter (+/- 76,013) = 315 MB/s +test regexdna::variant7 ... bench: 17,997,470 ns/iter (+/- 94,472) = 282 MB/s +test regexdna::variant8 ... bench: 23,004,949 ns/iter (+/- 81,626) = 220 MB/s +test regexdna::variant9 ... bench: 20,272,633 ns/iter (+/- 99,674) = 250 MB/s +test sherlock::before_after_holmes ... bench: 3,660,138 ns/iter (+/- 41,095) = 162 MB/s +test sherlock::before_holmes ... bench: 3,632,955 ns/iter (+/- 25,761) = 163 MB/s +test sherlock::holmes_cochar_watson ... bench: 458,639 ns/iter (+/- 9,185) = 1297 MB/s +test sherlock::ing_suffix ... bench: 1,746,052 ns/iter (+/- 31,762) = 340 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,067,141 ns/iter (+/- 12,943) = 146 MB/s +test sherlock::letters ... bench: 11,360,188 ns/iter (+/- 22,264) = 52 MB/s +test sherlock::letters_lower ... bench: 11,137,940 ns/iter (+/- 35,225) = 53 MB/s +test sherlock::letters_upper ... bench: 1,505,435 ns/iter (+/- 10,318) = 395 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 181,084 ns/iter (+/- 3,121) = 3285 MB/s +test sherlock::name_alt1 ... bench: 427,474 ns/iter (+/- 1,601) = 1391 MB/s +test sherlock::name_alt2 ... bench: 434,858 ns/iter (+/- 6,444) = 1368 MB/s +test sherlock::name_alt3 ... bench: 747,274 ns/iter (+/- 7,303) = 796 MB/s +test sherlock::name_alt3_nocase ... bench: 2,574,102 ns/iter (+/- 44,203) = 231 MB/s +test sherlock::name_alt4 ... bench: 66,428 ns/iter (+/- 336) = 8956 MB/s +test sherlock::name_alt4_nocase ... bench: 1,333,932 ns/iter (+/- 6,683) = 445 MB/s +test sherlock::name_alt5 ... bench: 598,062 ns/iter (+/- 4,936) = 994 MB/s +test sherlock::name_alt5_nocase ... bench: 1,496,292 ns/iter (+/- 6,595) = 397 MB/s +test sherlock::name_holmes ... bench: 359,203 ns/iter (+/- 6,202) = 1656 MB/s +test sherlock::name_holmes_nocase ... bench: 454,624 ns/iter (+/- 2,658) = 1308 MB/s +test sherlock::name_sherlock ... bench: 243,450 ns/iter (+/- 2,435) = 2443 MB/s +test sherlock::name_sherlock_holmes ... bench: 182,407 ns/iter (+/- 878) = 3261 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 940,244 ns/iter (+/- 6,064) = 632 MB/s +test sherlock::name_sherlock_nocase ... bench: 904,285 ns/iter (+/- 9,405) = 657 MB/s +test sherlock::name_whitespace ... bench: 244,114 ns/iter (+/- 1,875) = 2437 MB/s +test sherlock::no_match_common ... bench: 358,735 ns/iter (+/- 4,090) = 1658 MB/s +test sherlock::no_match_really_common ... bench: 348,964 ns/iter (+/- 6,060) = 1704 MB/s +test sherlock::no_match_uncommon ... bench: 21,256 ns/iter (+/- 144) = 27988 MB/s +test sherlock::quotes ... bench: 422,149 ns/iter (+/- 1,540) = 1409 MB/s +test sherlock::repeated_class_negation ... bench: 5,232,683 ns/iter (+/- 21,609) = 113 MB/s +test sherlock::the_lower ... bench: 651,539 ns/iter (+/- 1,763) = 913 MB/s +test sherlock::the_nocase ... bench: 693,506 ns/iter (+/- 13,143) = 857 MB/s +test sherlock::the_upper ... bench: 46,904 ns/iter (+/- 657) = 12684 MB/s +test sherlock::the_whitespace ... bench: 788,070 ns/iter (+/- 17,403) = 754 MB/s +test sherlock::word_ending_n ... bench: 4,545,774 ns/iter (+/- 26,965) = 130 MB/s +test sherlock::words ... bench: 5,493,039 ns/iter (+/- 16,767) = 108 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/pcre2 b/vendor/regex/record/old-bench-log/07/pcre2 new file mode 100644 index 0000000..c2bbd39 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/pcre2 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_long_non_match ... bench: 12 ns/iter (+/- 0) = 32500 MB/s +test misc::anchored_literal_short_match ... bench: 14 ns/iter (+/- 0) = 1857 MB/s +test misc::anchored_literal_short_non_match ... bench: 11 ns/iter (+/- 0) = 2363 MB/s +test misc::easy0_1K ... bench: 81 ns/iter (+/- 4) = 12975 MB/s +test misc::easy0_1MB ... bench: 60,199 ns/iter (+/- 658) = 17418 MB/s +test misc::easy0_32 ... bench: 28 ns/iter (+/- 0) = 2107 MB/s +test misc::easy0_32K ... bench: 1,878 ns/iter (+/- 25) = 17462 MB/s +test misc::easy1_1K ... bench: 81 ns/iter (+/- 0) = 12888 MB/s +test misc::easy1_1MB ... bench: 59,222 ns/iter (+/- 598) = 17706 MB/s +test misc::easy1_32 ... bench: 28 ns/iter (+/- 0) = 1857 MB/s +test misc::easy1_32K ... bench: 1,819 ns/iter (+/- 6) = 18025 MB/s +test misc::hard_1K ... bench: 1,147 ns/iter (+/- 13) = 916 MB/s +test misc::hard_1MB ... bench: 990,924 ns/iter (+/- 6,065) = 1058 MB/s +test misc::hard_32 ... bench: 82 ns/iter (+/- 3) = 719 MB/s +test misc::hard_32K ... bench: 32,218 ns/iter (+/- 471) = 1017 MB/s +test misc::literal ... bench: 15 ns/iter (+/- 0) = 3400 MB/s +test misc::long_needle1 ... bench: 464,061 ns/iter (+/- 2,241) = 215 MB/s +test misc::long_needle2 ... bench: 465,191 ns/iter (+/- 823) = 214 MB/s +test misc::match_class ... bench: 46 ns/iter (+/- 1) = 1760 MB/s +test misc::match_class_in_range ... bench: 16 ns/iter (+/- 0) = 5062 MB/s +test misc::match_class_unicode ... bench: 246 ns/iter (+/- 0) = 654 MB/s +test misc::medium_1K ... bench: 102 ns/iter (+/- 9) = 10313 MB/s +test misc::medium_1MB ... bench: 60,042 ns/iter (+/- 585) = 17464 MB/s +test misc::medium_32 ... bench: 29 ns/iter (+/- 1) = 2068 MB/s +test misc::medium_32K ... bench: 1,901 ns/iter (+/- 23) = 17251 MB/s +test misc::not_literal ... bench: 122 ns/iter (+/- 2) = 418 MB/s +test misc::one_pass_long_prefix ... bench: 13 ns/iter (+/- 0) = 2000 MB/s +test misc::one_pass_long_prefix_not ... bench: 13 ns/iter (+/- 0) = 2000 MB/s +test misc::one_pass_short ... bench: 36 ns/iter (+/- 0) = 472 MB/s +test misc::one_pass_short_not ... bench: 36 ns/iter (+/- 0) = 472 MB/s +test misc::reallyhard2_1K ... bench: 3,517 ns/iter (+/- 39) = 295 MB/s +test misc::reallyhard_1K ... bench: 1,123 ns/iter (+/- 12) = 935 MB/s +test misc::reallyhard_1MB ... bench: 992,521 ns/iter (+/- 13,407) = 1056 MB/s +test misc::reallyhard_32 ... bench: 71 ns/iter (+/- 0) = 830 MB/s +test misc::reallyhard_32K ... bench: 30,626 ns/iter (+/- 206) = 1070 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 2,824 ns/iter (+/- 21) = 2832 MB/s +test regexdna::find_new_lines ... bench: 1,500,377 ns/iter (+/- 8,152) = 3388 MB/s +test regexdna::subst1 ... bench: 815,769 ns/iter (+/- 14,286) = 6231 MB/s +test regexdna::subst10 ... bench: 820,459 ns/iter (+/- 57,098) = 6195 MB/s +test regexdna::subst11 ... bench: 810,986 ns/iter (+/- 4,270) = 6268 MB/s +test regexdna::subst2 ... bench: 815,568 ns/iter (+/- 35,148) = 6232 MB/s +test regexdna::subst3 ... bench: 812,590 ns/iter (+/- 6,782) = 6255 MB/s +test regexdna::subst4 ... bench: 831,679 ns/iter (+/- 12,372) = 6112 MB/s +test regexdna::subst5 ... bench: 823,207 ns/iter (+/- 12,977) = 6175 MB/s +test regexdna::subst6 ... bench: 815,506 ns/iter (+/- 11,610) = 6233 MB/s +test regexdna::subst7 ... bench: 818,104 ns/iter (+/- 4,807) = 6213 MB/s +test regexdna::subst8 ... bench: 815,265 ns/iter (+/- 21,504) = 6235 MB/s +test regexdna::subst9 ... bench: 809,236 ns/iter (+/- 7,003) = 6281 MB/s +test regexdna::variant1 ... bench: 8,375,573 ns/iter (+/- 80,345) = 606 MB/s +test regexdna::variant2 ... bench: 11,207,698 ns/iter (+/- 45,582) = 453 MB/s +test regexdna::variant3 ... bench: 10,505,744 ns/iter (+/- 69,756) = 483 MB/s +test regexdna::variant4 ... bench: 9,276,177 ns/iter (+/- 50,904) = 548 MB/s +test regexdna::variant5 ... bench: 9,333,446 ns/iter (+/- 41,108) = 544 MB/s +test regexdna::variant6 ... bench: 9,865,395 ns/iter (+/- 26,010) = 515 MB/s +test regexdna::variant7 ... bench: 10,033,179 ns/iter (+/- 28,272) = 506 MB/s +test regexdna::variant8 ... bench: 10,752,604 ns/iter (+/- 37,714) = 472 MB/s +test regexdna::variant9 ... bench: 11,397,272 ns/iter (+/- 41,200) = 446 MB/s +test sherlock::before_after_holmes ... bench: 3,627,616 ns/iter (+/- 18,202) = 164 MB/s +test sherlock::before_holmes ... bench: 3,614,713 ns/iter (+/- 18,191) = 164 MB/s +test sherlock::holmes_cochar_watson ... bench: 68,419 ns/iter (+/- 918) = 8695 MB/s +test sherlock::ing_suffix ... bench: 1,766,571 ns/iter (+/- 16,612) = 336 MB/s +test sherlock::ing_suffix_limited_space ... bench: 4,018,396 ns/iter (+/- 11,822) = 148 MB/s +test sherlock::letters ... bench: 8,058,390 ns/iter (+/- 39,083) = 73 MB/s +test sherlock::letters_lower ... bench: 8,014,051 ns/iter (+/- 33,500) = 74 MB/s +test sherlock::letters_upper ... bench: 1,452,421 ns/iter (+/- 157,023) = 409 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 36,248 ns/iter (+/- 252) = 16412 MB/s +test sherlock::name_alt1 ... bench: 45,538 ns/iter (+/- 235) = 13064 MB/s +test sherlock::name_alt2 ... bench: 62,202 ns/iter (+/- 892) = 9564 MB/s +test sherlock::name_alt3 ... bench: 623,900 ns/iter (+/- 3,139) = 953 MB/s +test sherlock::name_alt3_nocase ... bench: 2,518,464 ns/iter (+/- 31,943) = 236 MB/s +test sherlock::name_alt4 ... bench: 62,015 ns/iter (+/- 712) = 9593 MB/s +test sherlock::name_alt4_nocase ... bench: 1,162,489 ns/iter (+/- 14,622) = 511 MB/s +test sherlock::name_alt5 ... bench: 589,686 ns/iter (+/- 6,775) = 1008 MB/s +test sherlock::name_alt5_nocase ... bench: 1,359,066 ns/iter (+/- 7,487) = 437 MB/s +test sherlock::name_holmes ... bench: 45,993 ns/iter (+/- 812) = 12935 MB/s +test sherlock::name_holmes_nocase ... bench: 82,326 ns/iter (+/- 758) = 7226 MB/s +test sherlock::name_sherlock ... bench: 36,848 ns/iter (+/- 50) = 16145 MB/s +test sherlock::name_sherlock_holmes ... bench: 36,778 ns/iter (+/- 621) = 16176 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 636,825 ns/iter (+/- 2,957) = 934 MB/s +test sherlock::name_sherlock_nocase ... bench: 635,313 ns/iter (+/- 10,776) = 936 MB/s +test sherlock::name_whitespace ... bench: 37,360 ns/iter (+/- 132) = 15924 MB/s +test sherlock::no_match_common ... bench: 34,545 ns/iter (+/- 239) = 17221 MB/s +test sherlock::no_match_really_common ... bench: 49,019 ns/iter (+/- 590) = 12136 MB/s +test sherlock::no_match_uncommon ... bench: 34,410 ns/iter (+/- 182) = 17289 MB/s +test sherlock::quotes ... bench: 414,599 ns/iter (+/- 3,528) = 1434 MB/s +test sherlock::repeated_class_negation ... bench: 5,106,885 ns/iter (+/- 23,660) = 116 MB/s +test sherlock::the_lower ... bench: 234,135 ns/iter (+/- 3,821) = 2540 MB/s +test sherlock::the_nocase ... bench: 261,765 ns/iter (+/- 2,272) = 2272 MB/s +test sherlock::the_upper ... bench: 50,816 ns/iter (+/- 583) = 11707 MB/s +test sherlock::the_whitespace ... bench: 408,355 ns/iter (+/- 5,463) = 1456 MB/s +test sherlock::word_ending_n ... bench: 4,367,721 ns/iter (+/- 55,474) = 136 MB/s +test sherlock::words ... bench: 4,640,171 ns/iter (+/- 20,462) = 128 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/re2 b/vendor/regex/record/old-bench-log/07/re2 new file mode 100644 index 0000000..6888bea --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/re2 @@ -0,0 +1,101 @@ + +running 96 tests +test misc::anchored_literal_long_match ... bench: 69 ns/iter (+/- 0) = 5652 MB/s +test misc::anchored_literal_long_non_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_short_match ... bench: 69 ns/iter (+/- 0) = 376 MB/s +test misc::anchored_literal_short_non_match ... bench: 15 ns/iter (+/- 0) = 1733 MB/s +test misc::easy0_1K ... bench: 106 ns/iter (+/- 0) = 9915 MB/s +test misc::easy0_1MB ... bench: 15,311 ns/iter (+/- 113) = 68486 MB/s +test misc::easy0_32 ... bench: 100 ns/iter (+/- 3) = 590 MB/s +test misc::easy0_32K ... bench: 426 ns/iter (+/- 1) = 76983 MB/s +test misc::easy1_1K ... bench: 98 ns/iter (+/- 0) = 10653 MB/s +test misc::easy1_1MB ... bench: 15,299 ns/iter (+/- 136) = 68540 MB/s +test misc::easy1_32 ... bench: 91 ns/iter (+/- 0) = 571 MB/s +test misc::easy1_32K ... bench: 413 ns/iter (+/- 5) = 79389 MB/s +test misc::hard_1K ... bench: 1,815 ns/iter (+/- 43) = 579 MB/s +test misc::hard_1MB ... bench: 1,842,293 ns/iter (+/- 10,227) = 569 MB/s +test misc::hard_32 ... bench: 146 ns/iter (+/- 4) = 404 MB/s +test misc::hard_32K ... bench: 57,638 ns/iter (+/- 481) = 568 MB/s +test misc::literal ... bench: 64 ns/iter (+/- 1) = 796 MB/s +test misc::long_needle1 ... bench: 122,154 ns/iter (+/- 840) = 818 MB/s +test misc::long_needle2 ... bench: 122,105 ns/iter (+/- 578) = 818 MB/s +test misc::match_class ... bench: 178 ns/iter (+/- 0) = 455 MB/s +test misc::match_class_in_range ... bench: 178 ns/iter (+/- 2) = 455 MB/s +test misc::match_class_unicode ... bench: 293 ns/iter (+/- 2) = 549 MB/s +test misc::medium_1K ... bench: 1,610 ns/iter (+/- 26) = 653 MB/s +test misc::medium_1MB ... bench: 1,537,932 ns/iter (+/- 4,134) = 681 MB/s +test misc::medium_32 ... bench: 158 ns/iter (+/- 1) = 379 MB/s +test misc::medium_32K ... bench: 48,172 ns/iter (+/- 390) = 680 MB/s +test misc::no_exponential ... bench: 216 ns/iter (+/- 1) = 462 MB/s +test misc::not_literal ... bench: 127 ns/iter (+/- 1) = 401 MB/s +test misc::one_pass_long_prefix ... bench: 64 ns/iter (+/- 0) = 406 MB/s +test misc::one_pass_long_prefix_not ... bench: 100 ns/iter (+/- 1) = 260 MB/s +test misc::one_pass_short ... bench: 88 ns/iter (+/- 0) = 193 MB/s +test misc::one_pass_short_not ... bench: 86 ns/iter (+/- 0) = 197 MB/s +test misc::reallyhard2_1K ... bench: 1,332 ns/iter (+/- 41) = 780 MB/s +test misc::reallyhard_1K ... bench: 1,815 ns/iter (+/- 16) = 579 MB/s +test misc::reallyhard_1MB ... bench: 1,842,206 ns/iter (+/- 9,086) = 569 MB/s +test misc::reallyhard_32 ... bench: 149 ns/iter (+/- 1) = 395 MB/s +test misc::reallyhard_32K ... bench: 57,591 ns/iter (+/- 101) = 569 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 11,753 ns/iter (+/- 130) = 680 MB/s +test regexdna::find_new_lines ... bench: 24,330,235 ns/iter (+/- 374,274) = 208 MB/s +test regexdna::subst1 ... bench: 4,079,631 ns/iter (+/- 51,348) = 1246 MB/s +test regexdna::subst10 ... bench: 4,080,803 ns/iter (+/- 30,966) = 1245 MB/s +test regexdna::subst11 ... bench: 4,154,389 ns/iter (+/- 34,825) = 1223 MB/s +test regexdna::subst2 ... bench: 4,076,784 ns/iter (+/- 102,863) = 1246 MB/s +test regexdna::subst3 ... bench: 4,074,850 ns/iter (+/- 52,106) = 1247 MB/s +test regexdna::subst4 ... bench: 4,078,456 ns/iter (+/- 12,343) = 1246 MB/s +test regexdna::subst5 ... bench: 4,075,812 ns/iter (+/- 24,524) = 1247 MB/s +test regexdna::subst6 ... bench: 4,097,009 ns/iter (+/- 13,240) = 1240 MB/s +test regexdna::subst7 ... bench: 4,069,096 ns/iter (+/- 29,794) = 1249 MB/s +test regexdna::subst8 ... bench: 4,078,838 ns/iter (+/- 20,713) = 1246 MB/s +test regexdna::subst9 ... bench: 4,116,905 ns/iter (+/- 14,130) = 1234 MB/s +test regexdna::variant1 ... bench: 21,411,252 ns/iter (+/- 568,076) = 237 MB/s +test regexdna::variant2 ... bench: 21,082,571 ns/iter (+/- 92,912) = 241 MB/s +test regexdna::variant3 ... bench: 20,302,954 ns/iter (+/- 118,421) = 250 MB/s +test regexdna::variant4 ... bench: 21,290,669 ns/iter (+/- 102,527) = 238 MB/s +test regexdna::variant5 ... bench: 21,451,671 ns/iter (+/- 99,524) = 236 MB/s +test regexdna::variant6 ... bench: 21,057,017 ns/iter (+/- 530,904) = 241 MB/s +test regexdna::variant7 ... bench: 20,394,037 ns/iter (+/- 128,973) = 249 MB/s +test regexdna::variant8 ... bench: 17,839,069 ns/iter (+/- 122,671) = 284 MB/s +test regexdna::variant9 ... bench: 12,720,049 ns/iter (+/- 76,816) = 399 MB/s +test sherlock::before_after_holmes ... bench: 1,044,129 ns/iter (+/- 4,967) = 569 MB/s +test sherlock::before_holmes ... bench: 1,067,879 ns/iter (+/- 11,345) = 557 MB/s +test sherlock::everything_greedy ... bench: 5,193,222 ns/iter (+/- 10,990) = 114 MB/s +test sherlock::everything_greedy_nl ... bench: 2,038,599 ns/iter (+/- 18,946) = 291 MB/s +test sherlock::holmes_cochar_watson ... bench: 909,342 ns/iter (+/- 5,295) = 654 MB/s +test sherlock::holmes_coword_watson ... bench: 939,154 ns/iter (+/- 6,087) = 633 MB/s +test sherlock::ing_suffix ... bench: 2,729,081 ns/iter (+/- 15,969) = 217 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,373,143 ns/iter (+/- 17,068) = 433 MB/s +test sherlock::letters ... bench: 56,266,035 ns/iter (+/- 165,398) = 10 MB/s +test sherlock::letters_lower ... bench: 54,590,671 ns/iter (+/- 138,842) = 10 MB/s +test sherlock::letters_upper ... bench: 2,702,242 ns/iter (+/- 9,889) = 220 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 2,430,065 ns/iter (+/- 27,756) = 244 MB/s +test sherlock::name_alt1 ... bench: 45,514 ns/iter (+/- 403) = 13071 MB/s +test sherlock::name_alt2 ... bench: 975,861 ns/iter (+/- 11,553) = 609 MB/s +test sherlock::name_alt3 ... bench: 1,070,967 ns/iter (+/- 11,065) = 555 MB/s +test sherlock::name_alt3_nocase ... bench: 2,574,585 ns/iter (+/- 39,816) = 231 MB/s +test sherlock::name_alt4 ... bench: 978,776 ns/iter (+/- 25,503) = 607 MB/s +test sherlock::name_alt4_nocase ... bench: 1,643,230 ns/iter (+/- 27,685) = 362 MB/s +test sherlock::name_alt5 ... bench: 998,349 ns/iter (+/- 6,658) = 595 MB/s +test sherlock::name_alt5_nocase ... bench: 1,781,006 ns/iter (+/- 22,507) = 334 MB/s +test sherlock::name_holmes ... bench: 92,561 ns/iter (+/- 1,358) = 6427 MB/s +test sherlock::name_holmes_nocase ... bench: 1,281,827 ns/iter (+/- 7,651) = 464 MB/s +test sherlock::name_sherlock ... bench: 31,994 ns/iter (+/- 326) = 18595 MB/s +test sherlock::name_sherlock_holmes ... bench: 34,272 ns/iter (+/- 445) = 17359 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,218,006 ns/iter (+/- 19,301) = 488 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,214,744 ns/iter (+/- 11,473) = 489 MB/s +test sherlock::name_whitespace ... bench: 35,455 ns/iter (+/- 412) = 16779 MB/s +test sherlock::no_match_common ... bench: 299,771 ns/iter (+/- 7,799) = 1984 MB/s +test sherlock::no_match_really_common ... bench: 299,595 ns/iter (+/- 926) = 1985 MB/s +test sherlock::no_match_uncommon ... bench: 9,803 ns/iter (+/- 139) = 60688 MB/s +test sherlock::quotes ... bench: 1,033,423 ns/iter (+/- 9,177) = 575 MB/s +test sherlock::the_lower ... bench: 1,454,358 ns/iter (+/- 75,304) = 409 MB/s +test sherlock::the_nocase ... bench: 2,486,681 ns/iter (+/- 9,026) = 239 MB/s +test sherlock::the_upper ... bench: 123,989 ns/iter (+/- 1,097) = 4798 MB/s +test sherlock::the_whitespace ... bench: 1,454,732 ns/iter (+/- 118,006) = 408 MB/s +test sherlock::word_ending_n ... bench: 1,922,008 ns/iter (+/- 15,040) = 309 MB/s +test sherlock::words ... bench: 16,054,888 ns/iter (+/- 90,684) = 37 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 96 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/rust b/vendor/regex/record/old-bench-log/07/rust new file mode 100644 index 0000000..f5e73a2 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/rust @@ -0,0 +1,113 @@ + +running 108 tests +test misc::anchored_literal_long_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_long_non_match ... bench: 16 ns/iter (+/- 0) = 24375 MB/s +test misc::anchored_literal_short_match ... bench: 14 ns/iter (+/- 0) = 1857 MB/s +test misc::anchored_literal_short_non_match ... bench: 16 ns/iter (+/- 0) = 1625 MB/s +test misc::easy0_1K ... bench: 11 ns/iter (+/- 0) = 95545 MB/s +test misc::easy0_1MB ... bench: 15 ns/iter (+/- 0) = 69906866 MB/s +test misc::easy0_32 ... bench: 11 ns/iter (+/- 0) = 5363 MB/s +test misc::easy0_32K ... bench: 11 ns/iter (+/- 0) = 2981363 MB/s +test misc::easy1_1K ... bench: 36 ns/iter (+/- 0) = 29000 MB/s +test misc::easy1_1MB ... bench: 38 ns/iter (+/- 0) = 27594631 MB/s +test misc::easy1_32 ... bench: 39 ns/iter (+/- 0) = 1333 MB/s +test misc::easy1_32K ... bench: 36 ns/iter (+/- 0) = 910777 MB/s +test misc::hard_1K ... bench: 48 ns/iter (+/- 0) = 21895 MB/s +test misc::hard_1MB ... bench: 51 ns/iter (+/- 0) = 20560843 MB/s +test misc::hard_32 ... bench: 48 ns/iter (+/- 1) = 1229 MB/s +test misc::hard_32K ... bench: 48 ns/iter (+/- 0) = 683229 MB/s +test misc::literal ... bench: 10 ns/iter (+/- 0) = 5100 MB/s +test misc::long_needle1 ... bench: 956 ns/iter (+/- 14) = 104603 MB/s +test misc::long_needle2 ... bench: 538,237 ns/iter (+/- 8,739) = 185 MB/s +test misc::match_class ... bench: 57 ns/iter (+/- 0) = 1421 MB/s +test misc::match_class_in_range ... bench: 22 ns/iter (+/- 0) = 3681 MB/s +test misc::match_class_unicode ... bench: 245 ns/iter (+/- 4) = 657 MB/s +test misc::medium_1K ... bench: 13 ns/iter (+/- 0) = 80923 MB/s +test misc::medium_1MB ... bench: 15 ns/iter (+/- 0) = 69906933 MB/s +test misc::medium_32 ... bench: 12 ns/iter (+/- 0) = 5000 MB/s +test misc::medium_32K ... bench: 12 ns/iter (+/- 0) = 2733000 MB/s +test misc::no_exponential ... bench: 318 ns/iter (+/- 0) = 314 MB/s +test misc::not_literal ... bench: 85 ns/iter (+/- 0) = 600 MB/s +test misc::one_pass_long_prefix ... bench: 48 ns/iter (+/- 0) = 541 MB/s +test misc::one_pass_long_prefix_not ... bench: 48 ns/iter (+/- 0) = 541 MB/s +test misc::one_pass_short ... bench: 34 ns/iter (+/- 0) = 500 MB/s +test misc::one_pass_short_not ... bench: 37 ns/iter (+/- 0) = 459 MB/s +test misc::reallyhard2_1K ... bench: 51 ns/iter (+/- 0) = 20392 MB/s +test misc::reallyhard_1K ... bench: 1,547 ns/iter (+/- 19) = 679 MB/s +test misc::reallyhard_1MB ... bench: 1,533,883 ns/iter (+/- 9,553) = 683 MB/s +test misc::reallyhard_32 ... bench: 96 ns/iter (+/- 0) = 614 MB/s +test misc::reallyhard_32K ... bench: 47,989 ns/iter (+/- 198) = 683 MB/s +test misc::replace_all ... bench: 136 ns/iter (+/- 0) +test misc::reverse_suffix_no_quadratic ... bench: 4,016 ns/iter (+/- 21) = 1992 MB/s +test misc::short_haystack_1000000x ... bench: 64,033 ns/iter (+/- 470) = 124935 MB/s +test misc::short_haystack_100000x ... bench: 6,472 ns/iter (+/- 44) = 123611 MB/s +test misc::short_haystack_10000x ... bench: 1,906 ns/iter (+/- 49) = 41978 MB/s +test misc::short_haystack_1000x ... bench: 362 ns/iter (+/- 1) = 22129 MB/s +test misc::short_haystack_100x ... bench: 259 ns/iter (+/- 2) = 3131 MB/s +test misc::short_haystack_10x ... bench: 228 ns/iter (+/- 0) = 399 MB/s +test misc::short_haystack_1x ... bench: 223 ns/iter (+/- 2) = 85 MB/s +test misc::short_haystack_2x ... bench: 224 ns/iter (+/- 2) = 120 MB/s +test misc::short_haystack_3x ... bench: 221 ns/iter (+/- 2) = 158 MB/s +test misc::short_haystack_4x ... bench: 223 ns/iter (+/- 2) = 192 MB/s +test regexdna::find_new_lines ... bench: 11,885,905 ns/iter (+/- 23,501) = 427 MB/s +test regexdna::subst1 ... bench: 712,544 ns/iter (+/- 16,100) = 7134 MB/s +test regexdna::subst10 ... bench: 709,739 ns/iter (+/- 8,467) = 7162 MB/s +test regexdna::subst11 ... bench: 714,261 ns/iter (+/- 8,495) = 7117 MB/s +test regexdna::subst2 ... bench: 711,197 ns/iter (+/- 14,736) = 7147 MB/s +test regexdna::subst3 ... bench: 718,083 ns/iter (+/- 5,050) = 7079 MB/s +test regexdna::subst4 ... bench: 725,196 ns/iter (+/- 20,044) = 7009 MB/s +test regexdna::subst5 ... bench: 709,301 ns/iter (+/- 10,961) = 7166 MB/s +test regexdna::subst6 ... bench: 715,658 ns/iter (+/- 16,431) = 7103 MB/s +test regexdna::subst7 ... bench: 707,472 ns/iter (+/- 5,764) = 7185 MB/s +test regexdna::subst8 ... bench: 707,300 ns/iter (+/- 19,545) = 7187 MB/s +test regexdna::subst9 ... bench: 709,950 ns/iter (+/- 11,319) = 7160 MB/s +test regexdna::variant1 ... bench: 2,498,980 ns/iter (+/- 67,933) = 2034 MB/s +test regexdna::variant2 ... bench: 5,544,923 ns/iter (+/- 31,911) = 916 MB/s +test regexdna::variant3 ... bench: 6,441,568 ns/iter (+/- 20,197) = 789 MB/s +test regexdna::variant4 ... bench: 6,421,276 ns/iter (+/- 161,499) = 791 MB/s +test regexdna::variant5 ... bench: 5,093,567 ns/iter (+/- 18,696) = 998 MB/s +test regexdna::variant6 ... bench: 5,094,859 ns/iter (+/- 22,894) = 997 MB/s +test regexdna::variant7 ... bench: 4,540,111 ns/iter (+/- 11,863) = 1119 MB/s +test regexdna::variant8 ... bench: 4,636,741 ns/iter (+/- 23,448) = 1096 MB/s +test regexdna::variant9 ... bench: 4,557,500 ns/iter (+/- 16,168) = 1115 MB/s +test sherlock::before_after_holmes ... bench: 880,959 ns/iter (+/- 3,004) = 675 MB/s +test sherlock::before_holmes ... bench: 54,416 ns/iter (+/- 1,099) = 10933 MB/s +test sherlock::everything_greedy ... bench: 1,736,180 ns/iter (+/- 9,410) = 342 MB/s +test sherlock::everything_greedy_nl ... bench: 783,848 ns/iter (+/- 19,640) = 758 MB/s +test sherlock::holmes_cochar_watson ... bench: 90,085 ns/iter (+/- 499) = 6604 MB/s +test sherlock::holmes_coword_watson ... bench: 459,431 ns/iter (+/- 830) = 1294 MB/s +test sherlock::ing_suffix ... bench: 348,103 ns/iter (+/- 9,052) = 1709 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,017,387 ns/iter (+/- 3,906) = 584 MB/s +test sherlock::letters ... bench: 18,265,074 ns/iter (+/- 463,241) = 32 MB/s +test sherlock::letters_lower ... bench: 17,846,209 ns/iter (+/- 431,089) = 33 MB/s +test sherlock::letters_upper ... bench: 1,594,743 ns/iter (+/- 3,151) = 373 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 873,308 ns/iter (+/- 1,379) = 681 MB/s +test sherlock::name_alt1 ... bench: 21,144 ns/iter (+/- 315) = 28137 MB/s +test sherlock::name_alt2 ... bench: 71,354 ns/iter (+/- 1,432) = 8337 MB/s +test sherlock::name_alt3 ... bench: 79,167 ns/iter (+/- 294) = 7514 MB/s +test sherlock::name_alt3_nocase ... bench: 1,111,300 ns/iter (+/- 4,434) = 535 MB/s +test sherlock::name_alt4 ... bench: 100,864 ns/iter (+/- 570) = 5898 MB/s +test sherlock::name_alt4_nocase ... bench: 157,266 ns/iter (+/- 4,048) = 3782 MB/s +test sherlock::name_alt5 ... bench: 74,375 ns/iter (+/- 576) = 7999 MB/s +test sherlock::name_alt5_nocase ... bench: 467,879 ns/iter (+/- 2,115) = 1271 MB/s +test sherlock::name_holmes ... bench: 26,856 ns/iter (+/- 345) = 22152 MB/s +test sherlock::name_holmes_nocase ... bench: 124,140 ns/iter (+/- 1,111) = 4792 MB/s +test sherlock::name_sherlock ... bench: 52,330 ns/iter (+/- 316) = 11368 MB/s +test sherlock::name_sherlock_holmes ... bench: 19,646 ns/iter (+/- 355) = 30282 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 107,973 ns/iter (+/- 508) = 5510 MB/s +test sherlock::name_sherlock_nocase ... bench: 105,141 ns/iter (+/- 426) = 5658 MB/s +test sherlock::name_whitespace ... bench: 61,149 ns/iter (+/- 350) = 9729 MB/s +test sherlock::no_match_common ... bench: 11,735 ns/iter (+/- 185) = 50697 MB/s +test sherlock::no_match_really_common ... bench: 274,089 ns/iter (+/- 617) = 2170 MB/s +test sherlock::no_match_uncommon ... bench: 11,581 ns/iter (+/- 298) = 51371 MB/s +test sherlock::quotes ... bench: 447,749 ns/iter (+/- 1,173) = 1328 MB/s +test sherlock::repeated_class_negation ... bench: 69,119,491 ns/iter (+/- 117,739) = 8 MB/s +test sherlock::the_lower ... bench: 492,559 ns/iter (+/- 1,674) = 1207 MB/s +test sherlock::the_nocase ... bench: 341,445 ns/iter (+/- 6,455) = 1742 MB/s +test sherlock::the_upper ... bench: 30,555 ns/iter (+/- 168) = 19470 MB/s +test sherlock::the_whitespace ... bench: 950,630 ns/iter (+/- 25,179) = 625 MB/s +test sherlock::word_ending_n ... bench: 1,551,930 ns/iter (+/- 17,792) = 383 MB/s +test sherlock::words ... bench: 7,229,870 ns/iter (+/- 25,046) = 82 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 108 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/rust-bytes b/vendor/regex/record/old-bench-log/07/rust-bytes new file mode 100644 index 0000000..310d775 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/rust-bytes @@ -0,0 +1,101 @@ + +running 96 tests +test misc::anchored_literal_long_match ... bench: 16 ns/iter (+/- 0) = 24375 MB/s +test misc::anchored_literal_long_non_match ... bench: 16 ns/iter (+/- 0) = 24375 MB/s +test misc::anchored_literal_short_match ... bench: 14 ns/iter (+/- 0) = 1857 MB/s +test misc::anchored_literal_short_non_match ... bench: 16 ns/iter (+/- 0) = 1625 MB/s +test misc::easy0_1K ... bench: 11 ns/iter (+/- 0) = 95545 MB/s +test misc::easy0_1MB ... bench: 14 ns/iter (+/- 0) = 74900214 MB/s +test misc::easy0_32 ... bench: 11 ns/iter (+/- 0) = 5363 MB/s +test misc::easy0_32K ... bench: 11 ns/iter (+/- 0) = 2981363 MB/s +test misc::easy1_1K ... bench: 36 ns/iter (+/- 0) = 29000 MB/s +test misc::easy1_1MB ... bench: 38 ns/iter (+/- 0) = 27594631 MB/s +test misc::easy1_32 ... bench: 36 ns/iter (+/- 0) = 1444 MB/s +test misc::easy1_32K ... bench: 36 ns/iter (+/- 0) = 910777 MB/s +test misc::hard_1K ... bench: 46 ns/iter (+/- 0) = 22847 MB/s +test misc::hard_1MB ... bench: 49 ns/iter (+/- 0) = 21400061 MB/s +test misc::hard_32 ... bench: 46 ns/iter (+/- 0) = 1282 MB/s +test misc::hard_32K ... bench: 46 ns/iter (+/- 0) = 712934 MB/s +test misc::literal ... bench: 10 ns/iter (+/- 0) = 5100 MB/s +test misc::long_needle1 ... bench: 1,119 ns/iter (+/- 22) = 89366 MB/s +test misc::long_needle2 ... bench: 535,168 ns/iter (+/- 2,976) = 186 MB/s +test misc::match_class ... bench: 67 ns/iter (+/- 0) = 1208 MB/s +test misc::match_class_in_range ... bench: 21 ns/iter (+/- 0) = 3857 MB/s +test misc::medium_1K ... bench: 12 ns/iter (+/- 0) = 87666 MB/s +test misc::medium_1MB ... bench: 16 ns/iter (+/- 0) = 65537750 MB/s +test misc::medium_32 ... bench: 12 ns/iter (+/- 0) = 5000 MB/s +test misc::medium_32K ... bench: 12 ns/iter (+/- 0) = 2733000 MB/s +test misc::no_exponential ... bench: 320 ns/iter (+/- 3) = 312 MB/s +test misc::not_literal ... bench: 86 ns/iter (+/- 0) = 593 MB/s +test misc::one_pass_long_prefix ... bench: 48 ns/iter (+/- 0) = 541 MB/s +test misc::one_pass_long_prefix_not ... bench: 48 ns/iter (+/- 0) = 541 MB/s +test misc::one_pass_short ... bench: 34 ns/iter (+/- 0) = 500 MB/s +test misc::one_pass_short_not ... bench: 37 ns/iter (+/- 0) = 459 MB/s +test misc::reallyhard2_1K ... bench: 50 ns/iter (+/- 0) = 20800 MB/s +test misc::reallyhard_1K ... bench: 1,548 ns/iter (+/- 0) = 678 MB/s +test misc::reallyhard_1MB ... bench: 1,534,068 ns/iter (+/- 14,813) = 683 MB/s +test misc::reallyhard_32 ... bench: 98 ns/iter (+/- 1) = 602 MB/s +test misc::reallyhard_32K ... bench: 48,003 ns/iter (+/- 128) = 683 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,015 ns/iter (+/- 11) = 1992 MB/s +test regexdna::find_new_lines ... bench: 11,859,603 ns/iter (+/- 22,707) = 428 MB/s +test regexdna::subst1 ... bench: 717,255 ns/iter (+/- 3,261) = 7087 MB/s +test regexdna::subst10 ... bench: 719,600 ns/iter (+/- 4,712) = 7064 MB/s +test regexdna::subst11 ... bench: 708,612 ns/iter (+/- 6,314) = 7173 MB/s +test regexdna::subst2 ... bench: 715,174 ns/iter (+/- 5,097) = 7107 MB/s +test regexdna::subst3 ... bench: 711,261 ns/iter (+/- 12,051) = 7147 MB/s +test regexdna::subst4 ... bench: 761,920 ns/iter (+/- 4,924) = 6671 MB/s +test regexdna::subst5 ... bench: 740,755 ns/iter (+/- 12,762) = 6862 MB/s +test regexdna::subst6 ... bench: 713,936 ns/iter (+/- 7,103) = 7120 MB/s +test regexdna::subst7 ... bench: 710,142 ns/iter (+/- 5,377) = 7158 MB/s +test regexdna::subst8 ... bench: 712,154 ns/iter (+/- 4,485) = 7138 MB/s +test regexdna::subst9 ... bench: 713,214 ns/iter (+/- 6,830) = 7127 MB/s +test regexdna::variant1 ... bench: 2,448,709 ns/iter (+/- 10,799) = 2075 MB/s +test regexdna::variant2 ... bench: 5,541,606 ns/iter (+/- 26,197) = 917 MB/s +test regexdna::variant3 ... bench: 6,563,736 ns/iter (+/- 163,805) = 774 MB/s +test regexdna::variant4 ... bench: 6,428,096 ns/iter (+/- 38,372) = 790 MB/s +test regexdna::variant5 ... bench: 5,110,667 ns/iter (+/- 141,363) = 994 MB/s +test regexdna::variant6 ... bench: 5,086,936 ns/iter (+/- 25,675) = 999 MB/s +test regexdna::variant7 ... bench: 4,607,360 ns/iter (+/- 31,834) = 1103 MB/s +test regexdna::variant8 ... bench: 4,636,550 ns/iter (+/- 11,143) = 1096 MB/s +test regexdna::variant9 ... bench: 4,534,765 ns/iter (+/- 18,435) = 1120 MB/s +test sherlock::before_after_holmes ... bench: 880,980 ns/iter (+/- 1,386) = 675 MB/s +test sherlock::before_holmes ... bench: 56,626 ns/iter (+/- 612) = 10506 MB/s +test sherlock::everything_greedy ... bench: 1,715,022 ns/iter (+/- 7,374) = 346 MB/s +test sherlock::everything_greedy_nl ... bench: 778,398 ns/iter (+/- 6,195) = 764 MB/s +test sherlock::holmes_cochar_watson ... bench: 91,093 ns/iter (+/- 266) = 6531 MB/s +test sherlock::holmes_coword_watson ... bench: 457,793 ns/iter (+/- 3,094) = 1299 MB/s +test sherlock::ing_suffix ... bench: 348,696 ns/iter (+/- 2,174) = 1706 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,017,664 ns/iter (+/- 8,581) = 584 MB/s +test sherlock::letters ... bench: 19,098,779 ns/iter (+/- 36,233) = 31 MB/s +test sherlock::letters_lower ... bench: 17,748,386 ns/iter (+/- 37,835) = 33 MB/s +test sherlock::letters_upper ... bench: 1,592,729 ns/iter (+/- 2,977) = 373 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 873,365 ns/iter (+/- 1,399) = 681 MB/s +test sherlock::name_alt1 ... bench: 21,965 ns/iter (+/- 336) = 27085 MB/s +test sherlock::name_alt2 ... bench: 73,887 ns/iter (+/- 107) = 8051 MB/s +test sherlock::name_alt3 ... bench: 79,186 ns/iter (+/- 274) = 7513 MB/s +test sherlock::name_alt3_nocase ... bench: 1,111,949 ns/iter (+/- 3,589) = 535 MB/s +test sherlock::name_alt4 ... bench: 102,493 ns/iter (+/- 959) = 5804 MB/s +test sherlock::name_alt4_nocase ... bench: 158,438 ns/iter (+/- 946) = 3754 MB/s +test sherlock::name_alt5 ... bench: 74,362 ns/iter (+/- 139) = 8000 MB/s +test sherlock::name_alt5_nocase ... bench: 469,720 ns/iter (+/- 5,941) = 1266 MB/s +test sherlock::name_holmes ... bench: 28,919 ns/iter (+/- 372) = 20572 MB/s +test sherlock::name_holmes_nocase ... bench: 123,251 ns/iter (+/- 786) = 4827 MB/s +test sherlock::name_sherlock ... bench: 53,032 ns/iter (+/- 487) = 11218 MB/s +test sherlock::name_sherlock_holmes ... bench: 20,566 ns/iter (+/- 280) = 28927 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 108,166 ns/iter (+/- 303) = 5500 MB/s +test sherlock::name_sherlock_nocase ... bench: 105,034 ns/iter (+/- 797) = 5664 MB/s +test sherlock::name_whitespace ... bench: 60,968 ns/iter (+/- 490) = 9758 MB/s +test sherlock::no_match_common ... bench: 12,191 ns/iter (+/- 128) = 48801 MB/s +test sherlock::no_match_really_common ... bench: 274,528 ns/iter (+/- 1,101) = 2167 MB/s +test sherlock::no_match_uncommon ... bench: 12,197 ns/iter (+/- 191) = 48776 MB/s +test sherlock::quotes ... bench: 446,264 ns/iter (+/- 5,936) = 1333 MB/s +test sherlock::repeated_class_negation ... bench: 69,728,764 ns/iter (+/- 155,104) = 8 MB/s +test sherlock::the_lower ... bench: 493,734 ns/iter (+/- 5,997) = 1204 MB/s +test sherlock::the_nocase ... bench: 339,088 ns/iter (+/- 3,760) = 1754 MB/s +test sherlock::the_upper ... bench: 30,957 ns/iter (+/- 313) = 19218 MB/s +test sherlock::the_whitespace ... bench: 921,059 ns/iter (+/- 8,102) = 645 MB/s +test sherlock::word_ending_n ... bench: 1,530,899 ns/iter (+/- 18,006) = 388 MB/s +test sherlock::words ... bench: 6,959,355 ns/iter (+/- 31,671) = 85 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 96 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/stdcpp b/vendor/regex/record/old-bench-log/07/stdcpp new file mode 100644 index 0000000..57c25ae --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/stdcpp @@ -0,0 +1,87 @@ + +running 82 tests +test misc::anchored_literal_long_match ... bench: 142 ns/iter (+/- 0) = 2746 MB/s +test misc::anchored_literal_long_non_match ... bench: 5,504 ns/iter (+/- 20) = 70 MB/s +test misc::anchored_literal_short_match ... bench: 143 ns/iter (+/- 0) = 181 MB/s +test misc::anchored_literal_short_non_match ... bench: 471 ns/iter (+/- 1) = 55 MB/s +test misc::easy0_1K ... bench: 14,534 ns/iter (+/- 87) = 72 MB/s +test misc::easy0_1MB ... bench: 14,554,912 ns/iter (+/- 33,264) = 72 MB/s +test misc::easy0_32 ... bench: 730 ns/iter (+/- 1) = 80 MB/s +test misc::easy0_32K ... bench: 454,911 ns/iter (+/- 526) = 72 MB/s +test misc::easy1_1K ... bench: 14,486 ns/iter (+/- 45) = 72 MB/s +test misc::easy1_1MB ... bench: 14,555,850 ns/iter (+/- 108,290) = 72 MB/s +test misc::easy1_32 ... bench: 692 ns/iter (+/- 1) = 75 MB/s +test misc::easy1_32K ... bench: 456,269 ns/iter (+/- 2,856) = 71 MB/s +test misc::hard_1K ... bench: 299,581 ns/iter (+/- 7,493) = 3 MB/s +test misc::hard_1MB ... bench: 314,289,240 ns/iter (+/- 128,869) = 3 MB/s +test misc::hard_32 ... bench: 9,202 ns/iter (+/- 17) = 6 MB/s +test misc::hard_32K ... bench: 9,777,807 ns/iter (+/- 19,451) = 3 MB/s +test misc::literal ... bench: 804 ns/iter (+/- 2) = 63 MB/s +test misc::long_needle1 ... bench: 15,712,941 ns/iter (+/- 23,893) = 6 MB/s +test misc::long_needle2 ... bench: 15,955,109 ns/iter (+/- 26,652) = 6 MB/s +test misc::match_class ... bench: 1,250 ns/iter (+/- 4) = 64 MB/s +test misc::match_class_in_range ... bench: 1,250 ns/iter (+/- 4) = 64 MB/s +test misc::medium_1K ... bench: 14,913 ns/iter (+/- 108) = 70 MB/s +test misc::medium_1MB ... bench: 14,929,542 ns/iter (+/- 38,890) = 70 MB/s +test misc::medium_32 ... bench: 736 ns/iter (+/- 0) = 81 MB/s +test misc::medium_32K ... bench: 466,504 ns/iter (+/- 1,488) = 70 MB/s +test misc::not_literal ... bench: 1,015 ns/iter (+/- 8) = 50 MB/s +test misc::one_pass_long_prefix ... bench: 262 ns/iter (+/- 0) = 99 MB/s +test misc::one_pass_long_prefix_not ... bench: 263 ns/iter (+/- 3) = 98 MB/s +test misc::one_pass_short ... bench: 502 ns/iter (+/- 2) = 33 MB/s +test misc::one_pass_short_not ... bench: 498 ns/iter (+/- 0) = 34 MB/s +test misc::reallyhard2_1K ... bench: 304,485 ns/iter (+/- 762) = 3 MB/s +test misc::reallyhard_1K ... bench: 292,315 ns/iter (+/- 1,985) = 3 MB/s +test misc::reallyhard_1MB ... bench: 313,208,610 ns/iter (+/- 163,013) = 3 MB/s +test misc::reallyhard_32 ... bench: 9,232 ns/iter (+/- 21) = 6 MB/s +test misc::reallyhard_32K ... bench: 9,952,463 ns/iter (+/- 22,317) = 3 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 114,029 ns/iter (+/- 734) = 70 MB/s +test regexdna::find_new_lines ... bench: 121,481,845 ns/iter (+/- 289,966) = 41 MB/s +test regexdna::subst1 ... bench: 73,580,323 ns/iter (+/- 82,998) = 69 MB/s +test regexdna::subst10 ... bench: 73,588,543 ns/iter (+/- 95,250) = 69 MB/s +test regexdna::subst11 ... bench: 73,592,436 ns/iter (+/- 86,358) = 69 MB/s +test regexdna::subst2 ... bench: 73,581,323 ns/iter (+/- 88,210) = 69 MB/s +test regexdna::subst3 ... bench: 73,577,422 ns/iter (+/- 48,215) = 69 MB/s +test regexdna::subst4 ... bench: 73,586,896 ns/iter (+/- 82,117) = 69 MB/s +test regexdna::subst5 ... bench: 73,652,696 ns/iter (+/- 95,155) = 69 MB/s +test regexdna::subst6 ... bench: 74,633,620 ns/iter (+/- 74,754) = 68 MB/s +test regexdna::subst7 ... bench: 73,586,338 ns/iter (+/- 82,645) = 69 MB/s +test regexdna::subst8 ... bench: 75,009,572 ns/iter (+/- 116,800) = 67 MB/s +test regexdna::subst9 ... bench: 73,581,469 ns/iter (+/- 146,286) = 69 MB/s +test regexdna::variant1 ... bench: 140,768,740 ns/iter (+/- 113,580) = 36 MB/s +test regexdna::variant2 ... bench: 153,330,005 ns/iter (+/- 11,581,095) = 33 MB/s +test regexdna::variant3 ... bench: 145,484,512 ns/iter (+/- 150,566) = 34 MB/s +test regexdna::variant4 ... bench: 141,659,767 ns/iter (+/- 123,940) = 35 MB/s +test regexdna::variant5 ... bench: 145,309,207 ns/iter (+/- 129,675) = 34 MB/s +test regexdna::variant6 ... bench: 141,145,017 ns/iter (+/- 164,414) = 36 MB/s +test regexdna::variant7 ... bench: 141,897,206 ns/iter (+/- 212,981) = 35 MB/s +test regexdna::variant8 ... bench: 150,467,139 ns/iter (+/- 120,619) = 33 MB/s +test regexdna::variant9 ... bench: 151,635,430 ns/iter (+/- 128,912) = 33 MB/s +test sherlock::before_after_holmes ... bench: 36,941,681 ns/iter (+/- 36,199) = 16 MB/s +test sherlock::before_holmes ... bench: 36,920,860 ns/iter (+/- 38,258) = 16 MB/s +test sherlock::everything_greedy ... bench: 9,047,684 ns/iter (+/- 18,290) = 65 MB/s +test sherlock::holmes_cochar_watson ... bench: 12,634,723 ns/iter (+/- 36,086) = 47 MB/s +test sherlock::ing_suffix ... bench: 30,232,323 ns/iter (+/- 49,084) = 19 MB/s +test sherlock::ing_suffix_limited_space ... bench: 18,837,733 ns/iter (+/- 39,569) = 31 MB/s +test sherlock::name_alt1 ... bench: 12,462,918 ns/iter (+/- 17,158) = 47 MB/s +test sherlock::name_alt2 ... bench: 12,490,419 ns/iter (+/- 26,214) = 47 MB/s +test sherlock::name_alt3 ... bench: 33,156,941 ns/iter (+/- 47,236) = 17 MB/s +test sherlock::name_alt4 ... bench: 12,583,828 ns/iter (+/- 26,121) = 47 MB/s +test sherlock::name_alt5 ... bench: 16,615,345 ns/iter (+/- 22,930) = 35 MB/s +test sherlock::name_holmes ... bench: 8,307,917 ns/iter (+/- 17,452) = 71 MB/s +test sherlock::name_sherlock ... bench: 8,273,395 ns/iter (+/- 25,717) = 71 MB/s +test sherlock::name_sherlock_holmes ... bench: 8,270,000 ns/iter (+/- 19,702) = 71 MB/s +test sherlock::name_whitespace ... bench: 8,453,784 ns/iter (+/- 19,604) = 70 MB/s +test sherlock::no_match_common ... bench: 8,679,069 ns/iter (+/- 27,721) = 68 MB/s +test sherlock::no_match_really_common ... bench: 8,679,099 ns/iter (+/- 17,665) = 68 MB/s +test sherlock::no_match_uncommon ... bench: 8,260,259 ns/iter (+/- 147,913) = 72 MB/s +test sherlock::quotes ... bench: 10,257,367 ns/iter (+/- 25,054) = 58 MB/s +test sherlock::repeated_class_negation ... bench: 25,374,678 ns/iter (+/- 23,494) = 23 MB/s +test sherlock::the_lower ... bench: 9,424,206 ns/iter (+/- 23,231) = 63 MB/s +test sherlock::the_upper ... bench: 8,350,015 ns/iter (+/- 23,176) = 71 MB/s +test sherlock::the_whitespace ... bench: 9,285,991 ns/iter (+/- 16,835) = 64 MB/s +test sherlock::word_ending_n ... bench: 69,609,427 ns/iter (+/- 52,974) = 8 MB/s +test sherlock::words ... bench: 20,107,601 ns/iter (+/- 36,086) = 29 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 82 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/stdcpp-libcxx b/vendor/regex/record/old-bench-log/07/stdcpp-libcxx new file mode 100644 index 0000000..ff21e67 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/stdcpp-libcxx @@ -0,0 +1,87 @@ + +running 82 tests +test misc::anchored_literal_long_match ... bench: 162 ns/iter (+/- 0) = 2407 MB/s +test misc::anchored_literal_long_non_match ... bench: 21,901 ns/iter (+/- 140) = 17 MB/s +test misc::anchored_literal_short_match ... bench: 162 ns/iter (+/- 0) = 160 MB/s +test misc::anchored_literal_short_non_match ... bench: 1,501 ns/iter (+/- 1) = 17 MB/s +test misc::easy0_1K ... bench: 39,405 ns/iter (+/- 250) = 26 MB/s +test misc::easy0_1MB ... bench: 39,706,678 ns/iter (+/- 103,211) = 26 MB/s +test misc::easy0_32 ... bench: 1,415 ns/iter (+/- 3) = 41 MB/s +test misc::easy0_32K ... bench: 1,241,085 ns/iter (+/- 5,625) = 26 MB/s +test misc::easy1_1K ... bench: 39,421 ns/iter (+/- 275) = 26 MB/s +test misc::easy1_1MB ... bench: 39,725,158 ns/iter (+/- 64,488) = 26 MB/s +test misc::easy1_32 ... bench: 1,421 ns/iter (+/- 8) = 36 MB/s +test misc::easy1_32K ... bench: 1,240,953 ns/iter (+/- 5,794) = 26 MB/s +test misc::hard_1K ... bench: 1,263,948 ns/iter (+/- 31,771) +test misc::hard_1MB ... bench: 1,331,000,673 ns/iter (+/- 7,401,131) +test misc::hard_32 ... bench: 37,752 ns/iter (+/- 109) = 1 MB/s +test misc::hard_32K ... bench: 41,044,286 ns/iter (+/- 57,765) +test misc::literal ... bench: 1,980 ns/iter (+/- 7) = 25 MB/s +test misc::long_needle1 ... bench: 12,425,121 ns/iter (+/- 36,611) = 8 MB/s +test misc::long_needle2 ... bench: 12,568,992 ns/iter (+/- 28,513) = 7 MB/s +test misc::match_class ... bench: 3,918 ns/iter (+/- 67) = 20 MB/s +test misc::match_class_in_range ... bench: 3,534 ns/iter (+/- 11) = 22 MB/s +test misc::medium_1K ... bench: 44,910 ns/iter (+/- 167) = 23 MB/s +test misc::medium_1MB ... bench: 45,558,328 ns/iter (+/- 77,166) = 23 MB/s +test misc::medium_32 ... bench: 1,599 ns/iter (+/- 12) = 37 MB/s +test misc::medium_32K ... bench: 1,423,945 ns/iter (+/- 9,468) = 23 MB/s +test misc::not_literal ... bench: 2,051 ns/iter (+/- 16) = 24 MB/s +test misc::one_pass_long_prefix ... bench: 222 ns/iter (+/- 0) = 117 MB/s +test misc::one_pass_long_prefix_not ... bench: 223 ns/iter (+/- 0) = 116 MB/s +test misc::one_pass_short ... bench: 2,002 ns/iter (+/- 37) = 8 MB/s +test misc::one_pass_short_not ... bench: 1,990 ns/iter (+/- 6) = 8 MB/s +test misc::reallyhard2_1K ... bench: 1,335,845 ns/iter (+/- 6,233) +test misc::reallyhard_1K ... bench: 1,208,846 ns/iter (+/- 6,070) +test misc::reallyhard_1MB ... bench: 1,291,183,401 ns/iter (+/- 4,281,775) +test misc::reallyhard_32 ... bench: 36,521 ns/iter (+/- 157) = 1 MB/s +test misc::reallyhard_32K ... bench: 40,131,467 ns/iter (+/- 66,846) +test misc::reverse_suffix_no_quadratic ... bench: 506,352 ns/iter (+/- 632) = 15 MB/s +test regexdna::find_new_lines ... bench: 510,954,670 ns/iter (+/- 1,946,366) = 9 MB/s +test regexdna::subst1 ... bench: 198,786,137 ns/iter (+/- 240,963) = 25 MB/s +test regexdna::subst10 ... bench: 198,733,597 ns/iter (+/- 770,484) = 25 MB/s +test regexdna::subst11 ... bench: 198,734,922 ns/iter (+/- 198,116) = 25 MB/s +test regexdna::subst2 ... bench: 198,735,715 ns/iter (+/- 235,337) = 25 MB/s +test regexdna::subst3 ... bench: 198,736,727 ns/iter (+/- 157,633) = 25 MB/s +test regexdna::subst4 ... bench: 198,811,880 ns/iter (+/- 1,502,214) = 25 MB/s +test regexdna::subst5 ... bench: 198,697,281 ns/iter (+/- 211,978) = 25 MB/s +test regexdna::subst6 ... bench: 198,714,239 ns/iter (+/- 1,187,050) = 25 MB/s +test regexdna::subst7 ... bench: 199,021,730 ns/iter (+/- 1,555,969) = 25 MB/s +test regexdna::subst8 ... bench: 199,033,133 ns/iter (+/- 213,859) = 25 MB/s +test regexdna::subst9 ... bench: 199,466,527 ns/iter (+/- 1,394,750) = 25 MB/s +test regexdna::variant1 ... bench: 403,588,578 ns/iter (+/- 493,905) = 12 MB/s +test regexdna::variant2 ... bench: 440,582,945 ns/iter (+/- 305,836) = 11 MB/s +test regexdna::variant3 ... bench: 417,460,804 ns/iter (+/- 1,858,105) = 12 MB/s +test regexdna::variant4 ... bench: 407,209,088 ns/iter (+/- 1,374,513) = 12 MB/s +test regexdna::variant5 ... bench: 408,665,895 ns/iter (+/- 338,946) = 12 MB/s +test regexdna::variant6 ... bench: 408,640,565 ns/iter (+/- 1,895,287) = 12 MB/s +test regexdna::variant7 ... bench: 406,340,097 ns/iter (+/- 2,309,358) = 12 MB/s +test regexdna::variant8 ... bench: 413,195,331 ns/iter (+/- 2,178,194) = 12 MB/s +test regexdna::variant9 ... bench: 438,844,927 ns/iter (+/- 2,589,599) = 11 MB/s +test sherlock::before_after_holmes ... bench: 165,435,560 ns/iter (+/- 165,901) = 3 MB/s +test sherlock::before_holmes ... bench: 164,466,984 ns/iter (+/- 178,082) = 3 MB/s +test sherlock::everything_greedy ... bench: 34,680,745 ns/iter (+/- 862,671) = 17 MB/s +test sherlock::holmes_cochar_watson ... bench: 59,712,596 ns/iter (+/- 85,049) = 9 MB/s +test sherlock::ing_suffix ... bench: 135,611,524 ns/iter (+/- 383,869) = 4 MB/s +test sherlock::ing_suffix_limited_space ... bench: 73,398,446 ns/iter (+/- 112,893) = 8 MB/s +test sherlock::name_alt1 ... bench: 42,274,906 ns/iter (+/- 60,836) = 14 MB/s +test sherlock::name_alt2 ... bench: 42,159,449 ns/iter (+/- 56,642) = 14 MB/s +test sherlock::name_alt3 ... bench: 121,926,811 ns/iter (+/- 624,877) = 4 MB/s +test sherlock::name_alt4 ... bench: 58,912,788 ns/iter (+/- 101,576) = 10 MB/s +test sherlock::name_alt5 ... bench: 63,891,303 ns/iter (+/- 79,754) = 9 MB/s +test sherlock::name_holmes ... bench: 22,995,759 ns/iter (+/- 45,074) = 25 MB/s +test sherlock::name_sherlock ... bench: 23,024,135 ns/iter (+/- 86,982) = 25 MB/s +test sherlock::name_sherlock_holmes ... bench: 23,026,357 ns/iter (+/- 42,271) = 25 MB/s +test sherlock::name_whitespace ... bench: 32,485,572 ns/iter (+/- 77,736) = 18 MB/s +test sherlock::no_match_common ... bench: 23,544,207 ns/iter (+/- 590,037) = 25 MB/s +test sherlock::no_match_really_common ... bench: 23,543,480 ns/iter (+/- 51,838) = 25 MB/s +test sherlock::no_match_uncommon ... bench: 23,024,692 ns/iter (+/- 78,358) = 25 MB/s +test sherlock::quotes ... bench: 42,376,602 ns/iter (+/- 49,060) = 14 MB/s +test sherlock::repeated_class_negation ... bench: 92,701,274 ns/iter (+/- 208,063) = 6 MB/s +test sherlock::the_lower ... bench: 23,553,163 ns/iter (+/- 61,446) = 25 MB/s +test sherlock::the_upper ... bench: 23,281,951 ns/iter (+/- 35,811) = 25 MB/s +test sherlock::the_whitespace ... bench: 33,011,779 ns/iter (+/- 65,085) = 18 MB/s +test sherlock::word_ending_n ... bench: 64,965,762 ns/iter (+/- 106,103) = 9 MB/s +test sherlock::words ... bench: 47,466,153 ns/iter (+/- 773,222) = 12 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 82 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/07/tcl b/vendor/regex/record/old-bench-log/07/tcl new file mode 100644 index 0000000..0586935 --- /dev/null +++ b/vendor/regex/record/old-bench-log/07/tcl @@ -0,0 +1,94 @@ + +running 89 tests +test misc::anchored_literal_long_match ... bench: 452 ns/iter (+/- 6) = 862 MB/s +test misc::anchored_literal_long_non_match ... bench: 92 ns/iter (+/- 2) = 4239 MB/s +test misc::anchored_literal_short_match ... bench: 454 ns/iter (+/- 6) = 57 MB/s +test misc::anchored_literal_short_non_match ... bench: 92 ns/iter (+/- 1) = 282 MB/s +test misc::easy0_1K ... bench: 9,231 ns/iter (+/- 59) = 113 MB/s +test misc::easy0_1MB ... bench: 2,828,050 ns/iter (+/- 9,104) = 370 MB/s +test misc::easy0_32 ... bench: 6,527 ns/iter (+/- 78) = 9 MB/s +test misc::easy0_32K ... bench: 94,825 ns/iter (+/- 410) = 345 MB/s +test misc::easy1_1K ... bench: 5,420 ns/iter (+/- 54) = 192 MB/s +test misc::easy1_1MB ... bench: 2,823,597 ns/iter (+/- 8,534) = 371 MB/s +test misc::easy1_32 ... bench: 2,727 ns/iter (+/- 80) = 19 MB/s +test misc::easy1_32K ... bench: 93,382 ns/iter (+/- 108) = 351 MB/s +test misc::hard_1K ... bench: 12,046 ns/iter (+/- 88) = 87 MB/s +test misc::hard_1MB ... bench: 2,831,445 ns/iter (+/- 9,713) = 370 MB/s +test misc::hard_32 ... bench: 9,257 ns/iter (+/- 63) = 6 MB/s +test misc::hard_32K ... bench: 97,613 ns/iter (+/- 533) = 335 MB/s +test misc::literal ... bench: 398 ns/iter (+/- 14) = 128 MB/s +test misc::long_needle1 ... bench: 18,459,088 ns/iter (+/- 162,391) = 5 MB/s +test misc::long_needle2 ... bench: 18,390,595 ns/iter (+/- 96,143) = 5 MB/s +test misc::match_class ... bench: 480 ns/iter (+/- 1) = 168 MB/s +test misc::match_class_in_range ... bench: 477 ns/iter (+/- 10) = 169 MB/s +test misc::medium_1K ... bench: 9,573 ns/iter (+/- 94) = 109 MB/s +test misc::medium_1MB ... bench: 2,828,512 ns/iter (+/- 28,270) = 370 MB/s +test misc::medium_32 ... bench: 6,874 ns/iter (+/- 68) = 8 MB/s +test misc::medium_32K ... bench: 95,040 ns/iter (+/- 517) = 345 MB/s +test misc::no_exponential ... bench: 1,976,788 ns/iter (+/- 20,661) +test misc::not_literal ... bench: 1,548 ns/iter (+/- 15) = 32 MB/s +test misc::one_pass_long_prefix ... bench: 5,063 ns/iter (+/- 76) = 5 MB/s +test misc::one_pass_long_prefix_not ... bench: 4,933 ns/iter (+/- 62) = 5 MB/s +test misc::one_pass_short ... bench: 486 ns/iter (+/- 4) = 34 MB/s +test misc::one_pass_short_not ... bench: 579 ns/iter (+/- 3) = 29 MB/s +test misc::reallyhard2_1K ... bench: 88,153 ns/iter (+/- 2,317) = 11 MB/s +test misc::reallyhard_1K ... bench: 12,157 ns/iter (+/- 51) = 86 MB/s +test misc::reallyhard_1MB ... bench: 2,866,126 ns/iter (+/- 71,338) = 365 MB/s +test misc::reallyhard_32 ... bench: 9,321 ns/iter (+/- 138) = 6 MB/s +test misc::reallyhard_32K ... bench: 97,799 ns/iter (+/- 1,087) = 335 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 22,679 ns/iter (+/- 293) = 352 MB/s +test regexdna::find_new_lines ... bench: 38,700,951 ns/iter (+/- 105,197) = 131 MB/s +test regexdna::subst1 ... bench: 22,123,470 ns/iter (+/- 96,738) = 229 MB/s +test regexdna::subst10 ... bench: 22,125,412 ns/iter (+/- 65,856) = 229 MB/s +test regexdna::subst11 ... bench: 22,178,791 ns/iter (+/- 75,853) = 229 MB/s +test regexdna::subst2 ... bench: 22,348,278 ns/iter (+/- 228,790) = 227 MB/s +test regexdna::subst3 ... bench: 22,187,493 ns/iter (+/- 69,149) = 229 MB/s +test regexdna::subst4 ... bench: 22,134,373 ns/iter (+/- 71,979) = 229 MB/s +test regexdna::subst5 ... bench: 22,183,169 ns/iter (+/- 66,220) = 229 MB/s +test regexdna::subst6 ... bench: 22,263,432 ns/iter (+/- 91,605) = 228 MB/s +test regexdna::subst7 ... bench: 22,256,481 ns/iter (+/- 62,794) = 228 MB/s +test regexdna::subst8 ... bench: 22,134,314 ns/iter (+/- 75,199) = 229 MB/s +test regexdna::subst9 ... bench: 22,144,129 ns/iter (+/- 76,744) = 229 MB/s +test regexdna::variant1 ... bench: 13,846,793 ns/iter (+/- 33,520) = 367 MB/s +test regexdna::variant2 ... bench: 14,248,239 ns/iter (+/- 62,252) = 356 MB/s +test regexdna::variant3 ... bench: 15,702,520 ns/iter (+/- 339,738) = 323 MB/s +test regexdna::variant4 ... bench: 15,143,136 ns/iter (+/- 52,300) = 335 MB/s +test regexdna::variant5 ... bench: 16,324,698 ns/iter (+/- 50,942) = 311 MB/s +test regexdna::variant6 ... bench: 14,508,593 ns/iter (+/- 46,251) = 350 MB/s +test regexdna::variant7 ... bench: 14,443,485 ns/iter (+/- 80,444) = 351 MB/s +test regexdna::variant8 ... bench: 14,430,571 ns/iter (+/- 63,143) = 352 MB/s +test regexdna::variant9 ... bench: 14,883,129 ns/iter (+/- 76,837) = 341 MB/s +test sherlock::before_after_holmes ... bench: 2,227,807 ns/iter (+/- 9,119) = 267 MB/s +test sherlock::before_holmes ... bench: 2,700,579 ns/iter (+/- 24,875) = 220 MB/s +test sherlock::holmes_cochar_watson ... bench: 2,211,847 ns/iter (+/- 15,027) = 268 MB/s +test sherlock::ing_suffix ... bench: 4,398,150 ns/iter (+/- 27,219) = 135 MB/s +test sherlock::ing_suffix_limited_space ... bench: 17,992,130 ns/iter (+/- 457,978) = 33 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 1,845,704 ns/iter (+/- 9,382) = 322 MB/s +test sherlock::name_alt1 ... bench: 1,890,373 ns/iter (+/- 9,971) = 314 MB/s +test sherlock::name_alt2 ... bench: 2,626,524 ns/iter (+/- 18,261) = 226 MB/s +test sherlock::name_alt3 ... bench: 4,468,643 ns/iter (+/- 11,946) = 133 MB/s +test sherlock::name_alt3_nocase ... bench: 7,226,342 ns/iter (+/- 57,220) = 82 MB/s +test sherlock::name_alt4 ... bench: 2,395,105 ns/iter (+/- 31,101) = 248 MB/s +test sherlock::name_alt4_nocase ... bench: 2,895,153 ns/iter (+/- 12,446) = 205 MB/s +test sherlock::name_alt5 ... bench: 3,253,560 ns/iter (+/- 33,725) = 182 MB/s +test sherlock::name_alt5_nocase ... bench: 4,008,656 ns/iter (+/- 39,415) = 148 MB/s +test sherlock::name_holmes ... bench: 2,076,117 ns/iter (+/- 6,376) = 286 MB/s +test sherlock::name_holmes_nocase ... bench: 2,157,634 ns/iter (+/- 6,494) = 275 MB/s +test sherlock::name_sherlock ... bench: 1,757,317 ns/iter (+/- 5,935) = 338 MB/s +test sherlock::name_sherlock_holmes ... bench: 1,897,004 ns/iter (+/- 12,012) = 313 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 1,939,722 ns/iter (+/- 6,273) = 306 MB/s +test sherlock::name_sherlock_nocase ... bench: 1,801,334 ns/iter (+/- 3,179) = 330 MB/s +test sherlock::name_whitespace ... bench: 1,910,996 ns/iter (+/- 6,429) = 311 MB/s +test sherlock::no_match_common ... bench: 1,601,431 ns/iter (+/- 7,131) = 371 MB/s +test sherlock::no_match_really_common ... bench: 1,601,153 ns/iter (+/- 4,375) = 371 MB/s +test sherlock::no_match_uncommon ... bench: 1,600,840 ns/iter (+/- 8,348) = 371 MB/s +test sherlock::quotes ... bench: 7,620,650 ns/iter (+/- 48,467) = 78 MB/s +test sherlock::repeated_class_negation ... bench: 55,564,521 ns/iter (+/- 210,324) = 10 MB/s +test sherlock::the_lower ... bench: 5,628,558 ns/iter (+/- 19,934) = 105 MB/s +test sherlock::the_nocase ... bench: 6,063,195 ns/iter (+/- 28,534) = 98 MB/s +test sherlock::the_upper ... bench: 1,992,703 ns/iter (+/- 6,736) = 298 MB/s +test sherlock::the_whitespace ... bench: 7,159,423 ns/iter (+/- 38,306) = 83 MB/s +test sherlock::words ... bench: 38,358,421 ns/iter (+/- 99,230) = 15 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 89 measured; 0 filtered out + diff --git a/vendor/regex/record/old-bench-log/08-new-memmem/rust-after-01 b/vendor/regex/record/old-bench-log/08-new-memmem/rust-after-01 new file mode 100644 index 0000000..521e935 --- /dev/null +++ b/vendor/regex/record/old-bench-log/08-new-memmem/rust-after-01 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 1) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 19 ns/iter (+/- 2) = 20526 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 1) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 19 ns/iter (+/- 0) = 1368 MB/s +test misc::easy0_1K ... bench: 15 ns/iter (+/- 2) = 70066 MB/s +test misc::easy0_1MB ... bench: 22 ns/iter (+/- 0) = 47663772 MB/s +test misc::easy0_32 ... bench: 14 ns/iter (+/- 2) = 4214 MB/s +test misc::easy0_32K ... bench: 15 ns/iter (+/- 1) = 2186333 MB/s +test misc::easy1_1K ... bench: 40 ns/iter (+/- 2) = 26100 MB/s +test misc::easy1_1MB ... bench: 44 ns/iter (+/- 5) = 23831727 MB/s +test misc::easy1_32 ... bench: 39 ns/iter (+/- 5) = 1333 MB/s +test misc::easy1_32K ... bench: 41 ns/iter (+/- 3) = 799707 MB/s +test misc::hard_1K ... bench: 50 ns/iter (+/- 7) = 21020 MB/s +test misc::hard_1MB ... bench: 55 ns/iter (+/- 6) = 19065509 MB/s +test misc::hard_32 ... bench: 50 ns/iter (+/- 7) = 1180 MB/s +test misc::hard_32K ... bench: 50 ns/iter (+/- 2) = 655900 MB/s +test misc::is_match_set ... bench: 60 ns/iter (+/- 2) = 416 MB/s +test misc::literal ... bench: 12 ns/iter (+/- 1) = 4250 MB/s +test misc::long_needle1 ... bench: 3,252 ns/iter (+/- 168) = 30750 MB/s +test misc::long_needle2 ... bench: 355,576 ns/iter (+/- 34,074) = 281 MB/s +test misc::match_class ... bench: 67 ns/iter (+/- 2) = 1208 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 0) = 5785 MB/s +test misc::match_class_unicode ... bench: 256 ns/iter (+/- 36) = 628 MB/s +test misc::matches_set ... bench: 458 ns/iter (+/- 65) = 54 MB/s +test misc::medium_1K ... bench: 15 ns/iter (+/- 1) = 70133 MB/s +test misc::medium_1MB ... bench: 23 ns/iter (+/- 2) = 45591478 MB/s +test misc::medium_32 ... bench: 15 ns/iter (+/- 0) = 4000 MB/s +test misc::medium_32K ... bench: 15 ns/iter (+/- 0) = 2186400 MB/s +test misc::no_exponential ... bench: 406 ns/iter (+/- 32) = 246 MB/s +test misc::not_literal ... bench: 90 ns/iter (+/- 12) = 566 MB/s +test misc::one_pass_long_prefix ... bench: 53 ns/iter (+/- 1) = 490 MB/s +test misc::one_pass_long_prefix_not ... bench: 51 ns/iter (+/- 7) = 509 MB/s +test misc::one_pass_short ... bench: 37 ns/iter (+/- 1) = 459 MB/s +test misc::one_pass_short_not ... bench: 39 ns/iter (+/- 5) = 435 MB/s +test misc::reallyhard2_1K ... bench: 75 ns/iter (+/- 2) = 13866 MB/s +test misc::reallyhard_1K ... bench: 1,591 ns/iter (+/- 227) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,576,602 ns/iter (+/- 204,573) = 665 MB/s +test misc::reallyhard_32 ... bench: 102 ns/iter (+/- 7) = 578 MB/s +test misc::reallyhard_32K ... bench: 49,327 ns/iter (+/- 4,812) = 664 MB/s +test misc::replace_all ... bench: 132 ns/iter (+/- 13) +test misc::reverse_suffix_no_quadratic ... bench: 4,190 ns/iter (+/- 581) = 1909 MB/s +test misc::short_haystack_1000000x ... bench: 132,982 ns/iter (+/- 18,045) = 60158 MB/s +test misc::short_haystack_100000x ... bench: 14,720 ns/iter (+/- 946) = 54348 MB/s +test misc::short_haystack_10000x ... bench: 5,993 ns/iter (+/- 381) = 13350 MB/s +test misc::short_haystack_1000x ... bench: 476 ns/iter (+/- 58) = 16829 MB/s +test misc::short_haystack_100x ... bench: 227 ns/iter (+/- 22) = 3572 MB/s +test misc::short_haystack_10x ... bench: 211 ns/iter (+/- 13) = 431 MB/s +test misc::short_haystack_1x ... bench: 204 ns/iter (+/- 29) = 93 MB/s +test misc::short_haystack_2x ... bench: 206 ns/iter (+/- 7) = 131 MB/s +test misc::short_haystack_3x ... bench: 212 ns/iter (+/- 16) = 165 MB/s +test misc::short_haystack_4x ... bench: 207 ns/iter (+/- 29) = 207 MB/s +test regexdna::find_new_lines ... bench: 12,053,740 ns/iter (+/- 393,644) = 421 MB/s +test regexdna::subst1 ... bench: 786,112 ns/iter (+/- 91,136) = 6466 MB/s +test regexdna::subst10 ... bench: 831,353 ns/iter (+/- 67,293) = 6114 MB/s +test regexdna::subst11 ... bench: 784,021 ns/iter (+/- 28,112) = 6483 MB/s +test regexdna::subst2 ... bench: 785,838 ns/iter (+/- 108,510) = 6468 MB/s +test regexdna::subst3 ... bench: 791,789 ns/iter (+/- 37,364) = 6420 MB/s +test regexdna::subst4 ... bench: 784,224 ns/iter (+/- 23,802) = 6482 MB/s +test regexdna::subst5 ... bench: 788,368 ns/iter (+/- 75,171) = 6448 MB/s +test regexdna::subst6 ... bench: 784,730 ns/iter (+/- 48,594) = 6477 MB/s +test regexdna::subst7 ... bench: 788,067 ns/iter (+/- 88,333) = 6450 MB/s +test regexdna::subst8 ... bench: 810,784 ns/iter (+/- 111,836) = 6269 MB/s +test regexdna::subst9 ... bench: 788,854 ns/iter (+/- 66,496) = 6444 MB/s +test regexdna::variant1 ... bench: 2,238,677 ns/iter (+/- 144,752) = 2270 MB/s +test regexdna::variant2 ... bench: 3,258,761 ns/iter (+/- 205,012) = 1559 MB/s +test regexdna::variant3 ... bench: 3,818,146 ns/iter (+/- 254,877) = 1331 MB/s +test regexdna::variant4 ... bench: 3,837,323 ns/iter (+/- 349,373) = 1324 MB/s +test regexdna::variant5 ... bench: 2,698,901 ns/iter (+/- 111,145) = 1883 MB/s +test regexdna::variant6 ... bench: 2,687,854 ns/iter (+/- 184,039) = 1891 MB/s +test regexdna::variant7 ... bench: 3,291,211 ns/iter (+/- 220,992) = 1544 MB/s +test regexdna::variant8 ... bench: 3,359,262 ns/iter (+/- 185,610) = 1513 MB/s +test regexdna::variant9 ... bench: 3,293,953 ns/iter (+/- 245,454) = 1543 MB/s +test rust_compile::compile_huge ... bench: 95,142 ns/iter (+/- 10,195) +test rust_compile::compile_huge_bytes ... bench: 5,650,680 ns/iter (+/- 252,936) +test rust_compile::compile_huge_full ... bench: 10,867,986 ns/iter (+/- 275,259) +test rust_compile::compile_simple ... bench: 3,751 ns/iter (+/- 310) +test rust_compile::compile_simple_bytes ... bench: 3,664 ns/iter (+/- 172) +test rust_compile::compile_simple_full ... bench: 22,078 ns/iter (+/- 3,259) +test rust_compile::compile_small ... bench: 8,499 ns/iter (+/- 942) +test rust_compile::compile_small_bytes ... bench: 151,196 ns/iter (+/- 16,322) +test rust_compile::compile_small_full ... bench: 309,597 ns/iter (+/- 32,622) +test sherlock::before_after_holmes ... bench: 917,591 ns/iter (+/- 55,643) = 648 MB/s +test sherlock::before_holmes ... bench: 62,726 ns/iter (+/- 8,861) = 9484 MB/s +test sherlock::everything_greedy ... bench: 2,036,050 ns/iter (+/- 152,461) = 292 MB/s +test sherlock::everything_greedy_nl ... bench: 796,690 ns/iter (+/- 71,089) = 746 MB/s +test sherlock::holmes_cochar_watson ... bench: 106,258 ns/iter (+/- 8,294) = 5598 MB/s +test sherlock::holmes_coword_watson ... bench: 481,086 ns/iter (+/- 60,212) = 1236 MB/s +test sherlock::ing_suffix ... bench: 322,033 ns/iter (+/- 8,912) = 1847 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,067,523 ns/iter (+/- 89,630) = 557 MB/s +test sherlock::letters ... bench: 22,745,932 ns/iter (+/- 428,787) = 26 MB/s +test sherlock::letters_lower ... bench: 22,228,365 ns/iter (+/- 495,287) = 26 MB/s +test sherlock::letters_upper ... bench: 1,775,941 ns/iter (+/- 158,985) = 334 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,327 ns/iter (+/- 49,085) = 663 MB/s +test sherlock::name_alt1 ... bench: 32,008 ns/iter (+/- 4,011) = 18587 MB/s +test sherlock::name_alt2 ... bench: 86,850 ns/iter (+/- 5,463) = 6850 MB/s +test sherlock::name_alt3 ... bench: 98,359 ns/iter (+/- 14,052) = 6048 MB/s +test sherlock::name_alt3_nocase ... bench: 381,147 ns/iter (+/- 16,996) = 1560 MB/s +test sherlock::name_alt4 ... bench: 121,025 ns/iter (+/- 16,654) = 4915 MB/s +test sherlock::name_alt4_nocase ... bench: 188,972 ns/iter (+/- 26,145) = 3148 MB/s +test sherlock::name_alt5 ... bench: 91,832 ns/iter (+/- 6,188) = 6478 MB/s +test sherlock::name_alt5_nocase ... bench: 351,422 ns/iter (+/- 49,084) = 1692 MB/s +test sherlock::name_holmes ... bench: 33,405 ns/iter (+/- 3,113) = 17809 MB/s +test sherlock::name_holmes_nocase ... bench: 134,899 ns/iter (+/- 10,883) = 4410 MB/s +test sherlock::name_sherlock ... bench: 22,455 ns/iter (+/- 2,027) = 26494 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,283 ns/iter (+/- 2,281) = 26698 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,492 ns/iter (+/- 6,496) = 6102 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,627 ns/iter (+/- 8,442) = 6221 MB/s +test sherlock::name_whitespace ... bench: 30,702 ns/iter (+/- 4,194) = 19377 MB/s +test sherlock::no_match_common ... bench: 19,616 ns/iter (+/- 2,677) = 30328 MB/s +test sherlock::no_match_really_common ... bench: 25,601 ns/iter (+/- 2,506) = 23238 MB/s +test sherlock::no_match_uncommon ... bench: 19,641 ns/iter (+/- 2,175) = 30290 MB/s +test sherlock::quotes ... bench: 369,048 ns/iter (+/- 25,898) = 1612 MB/s +test sherlock::repeated_class_negation ... bench: 75,780,396 ns/iter (+/- 1,032,817) = 7 MB/s +test sherlock::the_lower ... bench: 327,762 ns/iter (+/- 48,769) = 1815 MB/s +test sherlock::the_nocase ... bench: 532,075 ns/iter (+/- 40,117) = 1118 MB/s +test sherlock::the_upper ... bench: 45,197 ns/iter (+/- 1,621) = 13163 MB/s +test sherlock::the_whitespace ... bench: 819,239 ns/iter (+/- 81,388) = 726 MB/s +test sherlock::word_ending_n ... bench: 1,716,625 ns/iter (+/- 120,247) = 346 MB/s +test sherlock::words ... bench: 8,690,764 ns/iter (+/- 322,915) = 68 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 114.31s + diff --git a/vendor/regex/record/old-bench-log/08-new-memmem/rust-after-02 b/vendor/regex/record/old-bench-log/08-new-memmem/rust-after-02 new file mode 100644 index 0000000..60d0578 --- /dev/null +++ b/vendor/regex/record/old-bench-log/08-new-memmem/rust-after-02 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 2) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 19 ns/iter (+/- 1) = 20526 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 1) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 19 ns/iter (+/- 2) = 1368 MB/s +test misc::easy0_1K ... bench: 15 ns/iter (+/- 1) = 70066 MB/s +test misc::easy0_1MB ... bench: 22 ns/iter (+/- 1) = 47663772 MB/s +test misc::easy0_32 ... bench: 14 ns/iter (+/- 1) = 4214 MB/s +test misc::easy0_32K ... bench: 15 ns/iter (+/- 1) = 2186333 MB/s +test misc::easy1_1K ... bench: 39 ns/iter (+/- 4) = 26769 MB/s +test misc::easy1_1MB ... bench: 43 ns/iter (+/- 3) = 24385953 MB/s +test misc::easy1_32 ... bench: 39 ns/iter (+/- 4) = 1333 MB/s +test misc::easy1_32K ... bench: 39 ns/iter (+/- 3) = 840717 MB/s +test misc::hard_1K ... bench: 50 ns/iter (+/- 5) = 21020 MB/s +test misc::hard_1MB ... bench: 55 ns/iter (+/- 7) = 19065509 MB/s +test misc::hard_32 ... bench: 50 ns/iter (+/- 5) = 1180 MB/s +test misc::hard_32K ... bench: 50 ns/iter (+/- 6) = 655900 MB/s +test misc::is_match_set ... bench: 60 ns/iter (+/- 4) = 416 MB/s +test misc::literal ... bench: 12 ns/iter (+/- 0) = 4250 MB/s +test misc::long_needle1 ... bench: 3,251 ns/iter (+/- 333) = 30760 MB/s +test misc::long_needle2 ... bench: 355,576 ns/iter (+/- 24,612) = 281 MB/s +test misc::match_class ... bench: 66 ns/iter (+/- 1) = 1227 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 1) = 5785 MB/s +test misc::match_class_unicode ... bench: 254 ns/iter (+/- 25) = 633 MB/s +test misc::matches_set ... bench: 456 ns/iter (+/- 17) = 54 MB/s +test misc::medium_1K ... bench: 15 ns/iter (+/- 0) = 70133 MB/s +test misc::medium_1MB ... bench: 23 ns/iter (+/- 2) = 45591478 MB/s +test misc::medium_32 ... bench: 15 ns/iter (+/- 2) = 4000 MB/s +test misc::medium_32K ... bench: 15 ns/iter (+/- 2) = 2186400 MB/s +test misc::no_exponential ... bench: 403 ns/iter (+/- 55) = 248 MB/s +test misc::not_literal ... bench: 90 ns/iter (+/- 12) = 566 MB/s +test misc::one_pass_long_prefix ... bench: 51 ns/iter (+/- 7) = 509 MB/s +test misc::one_pass_long_prefix_not ... bench: 51 ns/iter (+/- 5) = 509 MB/s +test misc::one_pass_short ... bench: 38 ns/iter (+/- 5) = 447 MB/s +test misc::one_pass_short_not ... bench: 39 ns/iter (+/- 1) = 435 MB/s +test misc::reallyhard2_1K ... bench: 75 ns/iter (+/- 2) = 13866 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 148) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,576,299 ns/iter (+/- 142,145) = 665 MB/s +test misc::reallyhard_32 ... bench: 103 ns/iter (+/- 8) = 572 MB/s +test misc::reallyhard_32K ... bench: 49,326 ns/iter (+/- 3,202) = 664 MB/s +test misc::replace_all ... bench: 132 ns/iter (+/- 16) +test misc::reverse_suffix_no_quadratic ... bench: 4,168 ns/iter (+/- 227) = 1919 MB/s +test misc::short_haystack_1000000x ... bench: 132,733 ns/iter (+/- 18,141) = 60271 MB/s +test misc::short_haystack_100000x ... bench: 14,468 ns/iter (+/- 1,777) = 55295 MB/s +test misc::short_haystack_10000x ... bench: 6,316 ns/iter (+/- 360) = 12667 MB/s +test misc::short_haystack_1000x ... bench: 474 ns/iter (+/- 69) = 16900 MB/s +test misc::short_haystack_100x ... bench: 229 ns/iter (+/- 32) = 3541 MB/s +test misc::short_haystack_10x ... bench: 212 ns/iter (+/- 18) = 429 MB/s +test misc::short_haystack_1x ... bench: 205 ns/iter (+/- 28) = 92 MB/s +test misc::short_haystack_2x ... bench: 207 ns/iter (+/- 20) = 130 MB/s +test misc::short_haystack_3x ... bench: 213 ns/iter (+/- 7) = 164 MB/s +test misc::short_haystack_4x ... bench: 208 ns/iter (+/- 9) = 206 MB/s +test regexdna::find_new_lines ... bench: 12,050,847 ns/iter (+/- 346,484) = 421 MB/s +test regexdna::subst1 ... bench: 817,689 ns/iter (+/- 104,629) = 6216 MB/s +test regexdna::subst10 ... bench: 788,728 ns/iter (+/- 66,497) = 6445 MB/s +test regexdna::subst11 ... bench: 787,188 ns/iter (+/- 49,158) = 6457 MB/s +test regexdna::subst2 ... bench: 787,143 ns/iter (+/- 108,541) = 6458 MB/s +test regexdna::subst3 ... bench: 792,452 ns/iter (+/- 32,963) = 6414 MB/s +test regexdna::subst4 ... bench: 820,043 ns/iter (+/- 71,037) = 6198 MB/s +test regexdna::subst5 ... bench: 790,043 ns/iter (+/- 39,234) = 6434 MB/s +test regexdna::subst6 ... bench: 785,007 ns/iter (+/- 18,701) = 6475 MB/s +test regexdna::subst7 ... bench: 789,393 ns/iter (+/- 51,525) = 6439 MB/s +test regexdna::subst8 ... bench: 784,190 ns/iter (+/- 90,675) = 6482 MB/s +test regexdna::subst9 ... bench: 789,021 ns/iter (+/- 88,256) = 6442 MB/s +test regexdna::variant1 ... bench: 2,237,592 ns/iter (+/- 146,174) = 2271 MB/s +test regexdna::variant2 ... bench: 3,255,382 ns/iter (+/- 179,473) = 1561 MB/s +test regexdna::variant3 ... bench: 3,812,799 ns/iter (+/- 210,786) = 1333 MB/s +test regexdna::variant4 ... bench: 3,853,476 ns/iter (+/- 263,442) = 1319 MB/s +test regexdna::variant5 ... bench: 2,696,756 ns/iter (+/- 161,353) = 1885 MB/s +test regexdna::variant6 ... bench: 2,683,221 ns/iter (+/- 149,650) = 1894 MB/s +test regexdna::variant7 ... bench: 3,289,426 ns/iter (+/- 209,217) = 1545 MB/s +test regexdna::variant8 ... bench: 3,362,858 ns/iter (+/- 274,273) = 1511 MB/s +test regexdna::variant9 ... bench: 3,287,253 ns/iter (+/- 188,894) = 1546 MB/s +test rust_compile::compile_huge ... bench: 94,912 ns/iter (+/- 12,311) +test rust_compile::compile_huge_bytes ... bench: 5,534,281 ns/iter (+/- 192,069) +test rust_compile::compile_huge_full ... bench: 10,969,970 ns/iter (+/- 312,230) +test rust_compile::compile_simple ... bench: 3,523 ns/iter (+/- 525) +test rust_compile::compile_simple_bytes ... bench: 3,564 ns/iter (+/- 355) +test rust_compile::compile_simple_full ... bench: 19,887 ns/iter (+/- 1,885) +test rust_compile::compile_small ... bench: 8,294 ns/iter (+/- 1,123) +test rust_compile::compile_small_bytes ... bench: 153,070 ns/iter (+/- 20,825) +test rust_compile::compile_small_full ... bench: 313,318 ns/iter (+/- 28,271) +test sherlock::before_after_holmes ... bench: 907,585 ns/iter (+/- 86,027) = 655 MB/s +test sherlock::before_holmes ... bench: 62,765 ns/iter (+/- 6,413) = 9478 MB/s +test sherlock::everything_greedy ... bench: 2,033,519 ns/iter (+/- 97,963) = 292 MB/s +test sherlock::everything_greedy_nl ... bench: 796,514 ns/iter (+/- 48,247) = 746 MB/s +test sherlock::holmes_cochar_watson ... bench: 107,788 ns/iter (+/- 15,545) = 5519 MB/s +test sherlock::holmes_coword_watson ... bench: 482,686 ns/iter (+/- 49,033) = 1232 MB/s +test sherlock::ing_suffix ... bench: 322,901 ns/iter (+/- 46,329) = 1842 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,067,799 ns/iter (+/- 57,022) = 557 MB/s +test sherlock::letters ... bench: 22,823,246 ns/iter (+/- 472,094) = 26 MB/s +test sherlock::letters_lower ... bench: 22,137,278 ns/iter (+/- 443,188) = 26 MB/s +test sherlock::letters_upper ... bench: 1,773,598 ns/iter (+/- 96,994) = 335 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,623 ns/iter (+/- 48,509) = 662 MB/s +test sherlock::name_alt1 ... bench: 31,882 ns/iter (+/- 3,354) = 18660 MB/s +test sherlock::name_alt2 ... bench: 86,500 ns/iter (+/- 7,997) = 6877 MB/s +test sherlock::name_alt3 ... bench: 98,159 ns/iter (+/- 6,106) = 6060 MB/s +test sherlock::name_alt3_nocase ... bench: 383,858 ns/iter (+/- 19,224) = 1549 MB/s +test sherlock::name_alt4 ... bench: 122,489 ns/iter (+/- 17,271) = 4857 MB/s +test sherlock::name_alt4_nocase ... bench: 192,081 ns/iter (+/- 10,999) = 3097 MB/s +test sherlock::name_alt5 ... bench: 91,396 ns/iter (+/- 6,399) = 6509 MB/s +test sherlock::name_alt5_nocase ... bench: 354,804 ns/iter (+/- 26,158) = 1676 MB/s +test sherlock::name_holmes ... bench: 33,569 ns/iter (+/- 4,647) = 17722 MB/s +test sherlock::name_holmes_nocase ... bench: 136,387 ns/iter (+/- 14,005) = 4362 MB/s +test sherlock::name_sherlock ... bench: 22,468 ns/iter (+/- 1,144) = 26479 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,279 ns/iter (+/- 1,563) = 26703 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 98,003 ns/iter (+/- 10,978) = 6070 MB/s +test sherlock::name_sherlock_nocase ... bench: 96,130 ns/iter (+/- 4,373) = 6188 MB/s +test sherlock::name_whitespace ... bench: 30,532 ns/iter (+/- 3,125) = 19485 MB/s +test sherlock::no_match_common ... bench: 19,644 ns/iter (+/- 2,118) = 30285 MB/s +test sherlock::no_match_really_common ... bench: 25,374 ns/iter (+/- 1,538) = 23446 MB/s +test sherlock::no_match_uncommon ... bench: 19,602 ns/iter (+/- 427) = 30350 MB/s +test sherlock::quotes ... bench: 369,657 ns/iter (+/- 52,406) = 1609 MB/s +test sherlock::repeated_class_negation ... bench: 76,922,839 ns/iter (+/- 1,261,770) = 7 MB/s +test sherlock::the_lower ... bench: 326,221 ns/iter (+/- 35,683) = 1823 MB/s +test sherlock::the_nocase ... bench: 525,254 ns/iter (+/- 26,000) = 1132 MB/s +test sherlock::the_upper ... bench: 44,702 ns/iter (+/- 5,012) = 13308 MB/s +test sherlock::the_whitespace ... bench: 814,494 ns/iter (+/- 66,715) = 730 MB/s +test sherlock::word_ending_n ... bench: 1,705,139 ns/iter (+/- 97,420) = 348 MB/s +test sherlock::words ... bench: 8,632,437 ns/iter (+/- 278,177) = 68 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 106.01s + diff --git a/vendor/regex/record/old-bench-log/08-new-memmem/rust-before-01 b/vendor/regex/record/old-bench-log/08-new-memmem/rust-before-01 new file mode 100644 index 0000000..1316e6d --- /dev/null +++ b/vendor/regex/record/old-bench-log/08-new-memmem/rust-before-01 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 1) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 17 ns/iter (+/- 1) = 22941 MB/s +test misc::anchored_literal_short_match ... bench: 16 ns/iter (+/- 2) = 1625 MB/s +test misc::anchored_literal_short_non_match ... bench: 17 ns/iter (+/- 2) = 1529 MB/s +test misc::easy0_1K ... bench: 12 ns/iter (+/- 1) = 87583 MB/s +test misc::easy0_1MB ... bench: 15 ns/iter (+/- 0) = 69906866 MB/s +test misc::easy0_32 ... bench: 11 ns/iter (+/- 0) = 5363 MB/s +test misc::easy0_32K ... bench: 12 ns/iter (+/- 2) = 2732916 MB/s +test misc::easy1_1K ... bench: 39 ns/iter (+/- 5) = 26769 MB/s +test misc::easy1_1MB ... bench: 40 ns/iter (+/- 6) = 26214900 MB/s +test misc::easy1_32 ... bench: 39 ns/iter (+/- 3) = 1333 MB/s +test misc::easy1_32K ... bench: 39 ns/iter (+/- 5) = 840717 MB/s +test misc::hard_1K ... bench: 49 ns/iter (+/- 1) = 21448 MB/s +test misc::hard_1MB ... bench: 52 ns/iter (+/- 2) = 20165442 MB/s +test misc::hard_32 ... bench: 49 ns/iter (+/- 2) = 1204 MB/s +test misc::hard_32K ... bench: 49 ns/iter (+/- 3) = 669285 MB/s +test misc::is_match_set ... bench: 59 ns/iter (+/- 2) = 423 MB/s +test misc::literal ... bench: 11 ns/iter (+/- 1) = 4636 MB/s +test misc::long_needle1 ... bench: 1,161 ns/iter (+/- 54) = 86133 MB/s +test misc::long_needle2 ... bench: 680,687 ns/iter (+/- 63,713) = 146 MB/s +test misc::match_class ... bench: 69 ns/iter (+/- 4) = 1173 MB/s +test misc::match_class_in_range ... bench: 13 ns/iter (+/- 0) = 6230 MB/s +test misc::match_class_unicode ... bench: 253 ns/iter (+/- 9) = 636 MB/s +test misc::matches_set ... bench: 453 ns/iter (+/- 65) = 55 MB/s +test misc::medium_1K ... bench: 13 ns/iter (+/- 0) = 80923 MB/s +test misc::medium_1MB ... bench: 17 ns/iter (+/- 2) = 61682588 MB/s +test misc::medium_32 ... bench: 13 ns/iter (+/- 0) = 4615 MB/s +test misc::medium_32K ... bench: 13 ns/iter (+/- 0) = 2522769 MB/s +test misc::no_exponential ... bench: 330 ns/iter (+/- 47) = 303 MB/s +test misc::not_literal ... bench: 88 ns/iter (+/- 9) = 579 MB/s +test misc::one_pass_long_prefix ... bench: 50 ns/iter (+/- 7) = 520 MB/s +test misc::one_pass_long_prefix_not ... bench: 50 ns/iter (+/- 2) = 520 MB/s +test misc::one_pass_short ... bench: 36 ns/iter (+/- 4) = 472 MB/s +test misc::one_pass_short_not ... bench: 39 ns/iter (+/- 2) = 435 MB/s +test misc::reallyhard2_1K ... bench: 55 ns/iter (+/- 7) = 18909 MB/s +test misc::reallyhard_1K ... bench: 1,590 ns/iter (+/- 225) = 661 MB/s +test misc::reallyhard_1MB ... bench: 1,580,163 ns/iter (+/- 224,935) = 663 MB/s +test misc::reallyhard_32 ... bench: 100 ns/iter (+/- 6) = 590 MB/s +test misc::reallyhard_32K ... bench: 49,318 ns/iter (+/- 6,046) = 664 MB/s +test misc::replace_all ... bench: 127 ns/iter (+/- 14) +test misc::reverse_suffix_no_quadratic ... bench: 4,240 ns/iter (+/- 117) = 1886 MB/s +test misc::short_haystack_1000000x ... bench: 89,004 ns/iter (+/- 2,927) = 89883 MB/s +test misc::short_haystack_100000x ... bench: 10,349 ns/iter (+/- 334) = 77303 MB/s +test misc::short_haystack_10000x ... bench: 5,835 ns/iter (+/- 700) = 13712 MB/s +test misc::short_haystack_1000x ... bench: 563 ns/iter (+/- 33) = 14229 MB/s +test misc::short_haystack_100x ... bench: 260 ns/iter (+/- 21) = 3119 MB/s +test misc::short_haystack_10x ... bench: 221 ns/iter (+/- 31) = 411 MB/s +test misc::short_haystack_1x ... bench: 211 ns/iter (+/- 30) = 90 MB/s +test misc::short_haystack_2x ... bench: 213 ns/iter (+/- 19) = 126 MB/s +test misc::short_haystack_3x ... bench: 212 ns/iter (+/- 7) = 165 MB/s +test misc::short_haystack_4x ... bench: 221 ns/iter (+/- 26) = 194 MB/s +test regexdna::find_new_lines ... bench: 12,035,248 ns/iter (+/- 362,122) = 422 MB/s +test regexdna::subst1 ... bench: 787,853 ns/iter (+/- 29,667) = 6452 MB/s +test regexdna::subst10 ... bench: 750,718 ns/iter (+/- 103,118) = 6771 MB/s +test regexdna::subst11 ... bench: 749,377 ns/iter (+/- 103,312) = 6783 MB/s +test regexdna::subst2 ... bench: 748,785 ns/iter (+/- 83,175) = 6788 MB/s +test regexdna::subst3 ... bench: 755,004 ns/iter (+/- 75,589) = 6732 MB/s +test regexdna::subst4 ... bench: 747,617 ns/iter (+/- 70,600) = 6799 MB/s +test regexdna::subst5 ... bench: 752,458 ns/iter (+/- 86,154) = 6755 MB/s +test regexdna::subst6 ... bench: 749,801 ns/iter (+/- 102,642) = 6779 MB/s +test regexdna::subst7 ... bench: 760,975 ns/iter (+/- 105,159) = 6680 MB/s +test regexdna::subst8 ... bench: 749,002 ns/iter (+/- 82,082) = 6786 MB/s +test regexdna::subst9 ... bench: 751,248 ns/iter (+/- 100,152) = 6766 MB/s +test regexdna::variant1 ... bench: 2,211,035 ns/iter (+/- 150,147) = 2299 MB/s +test regexdna::variant2 ... bench: 3,210,193 ns/iter (+/- 161,942) = 1583 MB/s +test regexdna::variant3 ... bench: 3,793,641 ns/iter (+/- 203,795) = 1339 MB/s +test regexdna::variant4 ... bench: 3,799,721 ns/iter (+/- 140,933) = 1337 MB/s +test regexdna::variant5 ... bench: 2,652,750 ns/iter (+/- 185,489) = 1916 MB/s +test regexdna::variant6 ... bench: 2,633,257 ns/iter (+/- 211,323) = 1930 MB/s +test regexdna::variant7 ... bench: 3,268,111 ns/iter (+/- 176,273) = 1555 MB/s +test regexdna::variant8 ... bench: 3,331,333 ns/iter (+/- 264,431) = 1525 MB/s +test regexdna::variant9 ... bench: 3,268,398 ns/iter (+/- 298,223) = 1555 MB/s +test rust_compile::compile_huge ... bench: 94,562 ns/iter (+/- 2,194) +test rust_compile::compile_huge_bytes ... bench: 5,611,428 ns/iter (+/- 202,365) +test rust_compile::compile_huge_full ... bench: 10,933,505 ns/iter (+/- 325,078) +test rust_compile::compile_simple ... bench: 3,496 ns/iter (+/- 156) +test rust_compile::compile_simple_bytes ... bench: 3,572 ns/iter (+/- 389) +test rust_compile::compile_simple_full ... bench: 20,283 ns/iter (+/- 1,894) +test rust_compile::compile_small ... bench: 8,475 ns/iter (+/- 1,008) +test rust_compile::compile_small_bytes ... bench: 157,446 ns/iter (+/- 11,319) +test rust_compile::compile_small_full ... bench: 316,041 ns/iter (+/- 23,620) +test sherlock::before_after_holmes ... bench: 906,578 ns/iter (+/- 129,507) = 656 MB/s +test sherlock::before_holmes ... bench: 64,715 ns/iter (+/- 9,107) = 9193 MB/s +test sherlock::everything_greedy ... bench: 2,065,017 ns/iter (+/- 156,855) = 288 MB/s +test sherlock::everything_greedy_nl ... bench: 810,672 ns/iter (+/- 100,547) = 733 MB/s +test sherlock::holmes_cochar_watson ... bench: 106,124 ns/iter (+/- 10,948) = 5606 MB/s +test sherlock::holmes_coword_watson ... bench: 488,503 ns/iter (+/- 63,243) = 1217 MB/s +test sherlock::ing_suffix ... bench: 384,936 ns/iter (+/- 25,316) = 1545 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,060,294 ns/iter (+/- 152,263) = 561 MB/s +test sherlock::letters ... bench: 22,127,059 ns/iter (+/- 413,502) = 26 MB/s +test sherlock::letters_lower ... bench: 21,535,012 ns/iter (+/- 463,835) = 27 MB/s +test sherlock::letters_upper ... bench: 1,758,480 ns/iter (+/- 130,352) = 338 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,329 ns/iter (+/- 96,625) = 663 MB/s +test sherlock::name_alt1 ... bench: 31,585 ns/iter (+/- 2,796) = 18835 MB/s +test sherlock::name_alt2 ... bench: 86,223 ns/iter (+/- 9,553) = 6899 MB/s +test sherlock::name_alt3 ... bench: 97,177 ns/iter (+/- 11,479) = 6122 MB/s +test sherlock::name_alt3_nocase ... bench: 381,511 ns/iter (+/- 55,025) = 1559 MB/s +test sherlock::name_alt4 ... bench: 121,672 ns/iter (+/- 9,253) = 4889 MB/s +test sherlock::name_alt4_nocase ... bench: 187,887 ns/iter (+/- 26,932) = 3166 MB/s +test sherlock::name_alt5 ... bench: 90,732 ns/iter (+/- 7,251) = 6557 MB/s +test sherlock::name_alt5_nocase ... bench: 352,388 ns/iter (+/- 50,408) = 1688 MB/s +test sherlock::name_holmes ... bench: 33,836 ns/iter (+/- 3,388) = 17582 MB/s +test sherlock::name_holmes_nocase ... bench: 133,068 ns/iter (+/- 7,602) = 4470 MB/s +test sherlock::name_sherlock ... bench: 62,719 ns/iter (+/- 8,927) = 9485 MB/s +test sherlock::name_sherlock_holmes ... bench: 24,688 ns/iter (+/- 2,482) = 24098 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,793 ns/iter (+/- 12,078) = 6083 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,772 ns/iter (+/- 13,713) = 6211 MB/s +test sherlock::name_whitespace ... bench: 70,942 ns/iter (+/- 5,565) = 8386 MB/s +test sherlock::no_match_common ... bench: 14,645 ns/iter (+/- 1,430) = 40623 MB/s +test sherlock::no_match_really_common ... bench: 239,346 ns/iter (+/- 17,292) = 2485 MB/s +test sherlock::no_match_uncommon ... bench: 14,637 ns/iter (+/- 1,360) = 40645 MB/s +test sherlock::quotes ... bench: 367,945 ns/iter (+/- 35,370) = 1616 MB/s +test sherlock::repeated_class_negation ... bench: 74,367,046 ns/iter (+/- 1,114,875) = 7 MB/s +test sherlock::the_lower ... bench: 463,888 ns/iter (+/- 67,551) = 1282 MB/s +test sherlock::the_nocase ... bench: 520,822 ns/iter (+/- 76,131) = 1142 MB/s +test sherlock::the_upper ... bench: 37,354 ns/iter (+/- 4,110) = 15926 MB/s +test sherlock::the_whitespace ... bench: 922,312 ns/iter (+/- 95,082) = 645 MB/s +test sherlock::word_ending_n ... bench: 1,679,343 ns/iter (+/- 165,580) = 354 MB/s +test sherlock::words ... bench: 8,280,082 ns/iter (+/- 290,280) = 71 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 113.49s + diff --git a/vendor/regex/record/old-bench-log/08-new-memmem/rust-before-02 b/vendor/regex/record/old-bench-log/08-new-memmem/rust-before-02 new file mode 100644 index 0000000..5d75102 --- /dev/null +++ b/vendor/regex/record/old-bench-log/08-new-memmem/rust-before-02 @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 17 ns/iter (+/- 0) = 22941 MB/s +test misc::anchored_literal_long_non_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_short_match ... bench: 16 ns/iter (+/- 2) = 1625 MB/s +test misc::anchored_literal_short_non_match ... bench: 17 ns/iter (+/- 2) = 1529 MB/s +test misc::easy0_1K ... bench: 12 ns/iter (+/- 0) = 87583 MB/s +test misc::easy0_1MB ... bench: 14 ns/iter (+/- 1) = 74900214 MB/s +test misc::easy0_32 ... bench: 11 ns/iter (+/- 1) = 5363 MB/s +test misc::easy0_32K ... bench: 12 ns/iter (+/- 1) = 2732916 MB/s +test misc::easy1_1K ... bench: 38 ns/iter (+/- 5) = 27473 MB/s +test misc::easy1_1MB ... bench: 40 ns/iter (+/- 5) = 26214900 MB/s +test misc::easy1_32 ... bench: 38 ns/iter (+/- 1) = 1368 MB/s +test misc::easy1_32K ... bench: 38 ns/iter (+/- 1) = 862842 MB/s +test misc::hard_1K ... bench: 49 ns/iter (+/- 4) = 21448 MB/s +test misc::hard_1MB ... bench: 52 ns/iter (+/- 7) = 20165442 MB/s +test misc::hard_32 ... bench: 49 ns/iter (+/- 1) = 1204 MB/s +test misc::hard_32K ... bench: 49 ns/iter (+/- 6) = 669285 MB/s +test misc::is_match_set ... bench: 59 ns/iter (+/- 2) = 423 MB/s +test misc::literal ... bench: 11 ns/iter (+/- 0) = 4636 MB/s +test misc::long_needle1 ... bench: 1,179 ns/iter (+/- 92) = 84818 MB/s +test misc::long_needle2 ... bench: 680,418 ns/iter (+/- 27,142) = 146 MB/s +test misc::match_class ... bench: 68 ns/iter (+/- 6) = 1191 MB/s +test misc::match_class_in_range ... bench: 13 ns/iter (+/- 1) = 6230 MB/s +test misc::match_class_unicode ... bench: 253 ns/iter (+/- 33) = 636 MB/s +test misc::matches_set ... bench: 453 ns/iter (+/- 65) = 55 MB/s +test misc::medium_1K ... bench: 13 ns/iter (+/- 1) = 80923 MB/s +test misc::medium_1MB ... bench: 17 ns/iter (+/- 2) = 61682588 MB/s +test misc::medium_32 ... bench: 13 ns/iter (+/- 0) = 4615 MB/s +test misc::medium_32K ... bench: 13 ns/iter (+/- 0) = 2522769 MB/s +test misc::no_exponential ... bench: 330 ns/iter (+/- 47) = 303 MB/s +test misc::not_literal ... bench: 88 ns/iter (+/- 12) = 579 MB/s +test misc::one_pass_long_prefix ... bench: 50 ns/iter (+/- 6) = 520 MB/s +test misc::one_pass_long_prefix_not ... bench: 50 ns/iter (+/- 7) = 520 MB/s +test misc::one_pass_short ... bench: 36 ns/iter (+/- 2) = 472 MB/s +test misc::one_pass_short_not ... bench: 38 ns/iter (+/- 5) = 447 MB/s +test misc::reallyhard2_1K ... bench: 55 ns/iter (+/- 7) = 18909 MB/s +test misc::reallyhard_1K ... bench: 1,590 ns/iter (+/- 64) = 661 MB/s +test misc::reallyhard_1MB ... bench: 1,581,975 ns/iter (+/- 126,709) = 662 MB/s +test misc::reallyhard_32 ... bench: 100 ns/iter (+/- 4) = 590 MB/s +test misc::reallyhard_32K ... bench: 49,323 ns/iter (+/- 7,063) = 664 MB/s +test misc::replace_all ... bench: 127 ns/iter (+/- 5) +test misc::reverse_suffix_no_quadratic ... bench: 4,171 ns/iter (+/- 624) = 1918 MB/s +test misc::short_haystack_1000000x ... bench: 88,960 ns/iter (+/- 7,710) = 89928 MB/s +test misc::short_haystack_100000x ... bench: 10,193 ns/iter (+/- 952) = 78486 MB/s +test misc::short_haystack_10000x ... bench: 5,798 ns/iter (+/- 636) = 13799 MB/s +test misc::short_haystack_1000x ... bench: 418 ns/iter (+/- 60) = 19165 MB/s +test misc::short_haystack_100x ... bench: 258 ns/iter (+/- 21) = 3143 MB/s +test misc::short_haystack_10x ... bench: 216 ns/iter (+/- 21) = 421 MB/s +test misc::short_haystack_1x ... bench: 205 ns/iter (+/- 29) = 92 MB/s +test misc::short_haystack_2x ... bench: 206 ns/iter (+/- 22) = 131 MB/s +test misc::short_haystack_3x ... bench: 205 ns/iter (+/- 29) = 170 MB/s +test misc::short_haystack_4x ... bench: 214 ns/iter (+/- 6) = 200 MB/s +test regexdna::find_new_lines ... bench: 12,039,715 ns/iter (+/- 410,515) = 422 MB/s +test regexdna::subst1 ... bench: 750,454 ns/iter (+/- 65,358) = 6773 MB/s +test regexdna::subst10 ... bench: 748,321 ns/iter (+/- 93,416) = 6793 MB/s +test regexdna::subst11 ... bench: 747,906 ns/iter (+/- 92,141) = 6796 MB/s +test regexdna::subst2 ... bench: 755,082 ns/iter (+/- 88,044) = 6732 MB/s +test regexdna::subst3 ... bench: 753,496 ns/iter (+/- 70,987) = 6746 MB/s +test regexdna::subst4 ... bench: 747,103 ns/iter (+/- 102,992) = 6804 MB/s +test regexdna::subst5 ... bench: 750,805 ns/iter (+/- 72,572) = 6770 MB/s +test regexdna::subst6 ... bench: 748,419 ns/iter (+/- 47,272) = 6792 MB/s +test regexdna::subst7 ... bench: 752,556 ns/iter (+/- 95,329) = 6754 MB/s +test regexdna::subst8 ... bench: 756,009 ns/iter (+/- 78,049) = 6724 MB/s +test regexdna::subst9 ... bench: 749,278 ns/iter (+/- 70,259) = 6784 MB/s +test regexdna::variant1 ... bench: 2,215,182 ns/iter (+/- 114,543) = 2294 MB/s +test regexdna::variant2 ... bench: 3,207,983 ns/iter (+/- 184,419) = 1584 MB/s +test regexdna::variant3 ... bench: 3,791,716 ns/iter (+/- 192,185) = 1340 MB/s +test regexdna::variant4 ... bench: 3,809,934 ns/iter (+/- 222,872) = 1334 MB/s +test regexdna::variant5 ... bench: 2,651,345 ns/iter (+/- 183,673) = 1917 MB/s +test regexdna::variant6 ... bench: 2,635,566 ns/iter (+/- 170,288) = 1928 MB/s +test regexdna::variant7 ... bench: 3,265,519 ns/iter (+/- 234,923) = 1556 MB/s +test regexdna::variant8 ... bench: 3,340,830 ns/iter (+/- 183,129) = 1521 MB/s +test regexdna::variant9 ... bench: 3,267,141 ns/iter (+/- 185,543) = 1555 MB/s +test rust_compile::compile_huge ... bench: 94,368 ns/iter (+/- 13,293) +test rust_compile::compile_huge_bytes ... bench: 5,616,594 ns/iter (+/- 243,462) +test rust_compile::compile_huge_full ... bench: 10,862,100 ns/iter (+/- 260,207) +test rust_compile::compile_simple ... bench: 3,463 ns/iter (+/- 350) +test rust_compile::compile_simple_bytes ... bench: 3,542 ns/iter (+/- 504) +test rust_compile::compile_simple_full ... bench: 20,562 ns/iter (+/- 3,117) +test rust_compile::compile_small ... bench: 8,325 ns/iter (+/- 641) +test rust_compile::compile_small_bytes ... bench: 153,450 ns/iter (+/- 11,174) +test rust_compile::compile_small_full ... bench: 315,871 ns/iter (+/- 33,828) +test sherlock::before_after_holmes ... bench: 906,423 ns/iter (+/- 34,801) = 656 MB/s +test sherlock::before_holmes ... bench: 64,457 ns/iter (+/- 8,343) = 9229 MB/s +test sherlock::everything_greedy ... bench: 2,058,675 ns/iter (+/- 208,885) = 288 MB/s +test sherlock::everything_greedy_nl ... bench: 810,638 ns/iter (+/- 39,955) = 733 MB/s +test sherlock::holmes_cochar_watson ... bench: 106,048 ns/iter (+/- 8,158) = 5610 MB/s +test sherlock::holmes_coword_watson ... bench: 482,243 ns/iter (+/- 30,955) = 1233 MB/s +test sherlock::ing_suffix ... bench: 385,767 ns/iter (+/- 24,902) = 1542 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,060,762 ns/iter (+/- 94,273) = 560 MB/s +test sherlock::letters ... bench: 22,127,007 ns/iter (+/- 467,539) = 26 MB/s +test sherlock::letters_lower ... bench: 21,719,871 ns/iter (+/- 459,587) = 27 MB/s +test sherlock::letters_upper ... bench: 1,753,028 ns/iter (+/- 172,914) = 339 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,091 ns/iter (+/- 109,954) = 663 MB/s +test sherlock::name_alt1 ... bench: 31,636 ns/iter (+/- 2,323) = 18805 MB/s +test sherlock::name_alt2 ... bench: 85,898 ns/iter (+/- 10,486) = 6926 MB/s +test sherlock::name_alt3 ... bench: 97,104 ns/iter (+/- 8,851) = 6126 MB/s +test sherlock::name_alt3_nocase ... bench: 381,487 ns/iter (+/- 14,829) = 1559 MB/s +test sherlock::name_alt4 ... bench: 121,301 ns/iter (+/- 17,178) = 4904 MB/s +test sherlock::name_alt4_nocase ... bench: 187,262 ns/iter (+/- 17,478) = 3177 MB/s +test sherlock::name_alt5 ... bench: 90,773 ns/iter (+/- 2,791) = 6554 MB/s +test sherlock::name_alt5_nocase ... bench: 351,900 ns/iter (+/- 40,408) = 1690 MB/s +test sherlock::name_holmes ... bench: 34,767 ns/iter (+/- 3,334) = 17112 MB/s +test sherlock::name_holmes_nocase ... bench: 132,953 ns/iter (+/- 15,747) = 4474 MB/s +test sherlock::name_sherlock ... bench: 66,566 ns/iter (+/- 6,822) = 8937 MB/s +test sherlock::name_sherlock_holmes ... bench: 24,481 ns/iter (+/- 2,330) = 24301 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,531 ns/iter (+/- 12,331) = 6099 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,808 ns/iter (+/- 13,250) = 6209 MB/s +test sherlock::name_whitespace ... bench: 71,342 ns/iter (+/- 9,877) = 8339 MB/s +test sherlock::no_match_common ... bench: 14,704 ns/iter (+/- 1,241) = 40460 MB/s +test sherlock::no_match_really_common ... bench: 238,731 ns/iter (+/- 31,179) = 2492 MB/s +test sherlock::no_match_uncommon ... bench: 14,620 ns/iter (+/- 1,250) = 40693 MB/s +test sherlock::quotes ... bench: 367,740 ns/iter (+/- 10,107) = 1617 MB/s +test sherlock::repeated_class_negation ... bench: 76,315,217 ns/iter (+/- 940,903) = 7 MB/s +test sherlock::the_lower ... bench: 464,322 ns/iter (+/- 14,654) = 1281 MB/s +test sherlock::the_nocase ... bench: 519,069 ns/iter (+/- 59,161) = 1146 MB/s +test sherlock::the_upper ... bench: 37,575 ns/iter (+/- 2,455) = 15833 MB/s +test sherlock::the_whitespace ... bench: 939,412 ns/iter (+/- 60,941) = 633 MB/s +test sherlock::word_ending_n ... bench: 1,681,192 ns/iter (+/- 156,265) = 353 MB/s +test sherlock::words ... bench: 8,213,141 ns/iter (+/- 322,533) = 72 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 94.52s + diff --git a/vendor/regex/record/old-bench-log/09-new-baseline/pcre2 b/vendor/regex/record/old-bench-log/09-new-baseline/pcre2 new file mode 100644 index 0000000..595365d --- /dev/null +++ b/vendor/regex/record/old-bench-log/09-new-baseline/pcre2 @@ -0,0 +1,98 @@ + +running 93 tests +test misc::anchored_literal_long_match ... bench: 8 ns/iter (+/- 0) = 48750 MB/s +test misc::anchored_literal_long_non_match ... bench: 7 ns/iter (+/- 0) = 55714 MB/s +test misc::anchored_literal_short_match ... bench: 8 ns/iter (+/- 0) = 3250 MB/s +test misc::anchored_literal_short_non_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::easy0_1K ... bench: 32 ns/iter (+/- 1) = 32843 MB/s +test misc::easy0_1MB ... bench: 22,160 ns/iter (+/- 3,887) = 47319 MB/s +test misc::easy0_32 ... bench: 10 ns/iter (+/- 0) = 5900 MB/s +test misc::easy0_32K ... bench: 651 ns/iter (+/- 2) = 50376 MB/s +test misc::easy1_1K ... bench: 36 ns/iter (+/- 1) = 29000 MB/s +test misc::easy1_1MB ... bench: 22,982 ns/iter (+/- 2,839) = 45626 MB/s +test misc::easy1_32 ... bench: 12 ns/iter (+/- 0) = 4333 MB/s +test misc::easy1_32K ... bench: 654 ns/iter (+/- 2) = 50134 MB/s +test misc::hard_1K ... bench: 469 ns/iter (+/- 9) = 2240 MB/s +test misc::hard_1MB ... bench: 733,962 ns/iter (+/- 28,297) = 1428 MB/s +test misc::hard_32 ... bench: 34 ns/iter (+/- 4) = 1735 MB/s +test misc::hard_32K ... bench: 19,567 ns/iter (+/- 363) = 1676 MB/s +test misc::literal ... bench: 8 ns/iter (+/- 0) = 6375 MB/s +test misc::long_needle1 ... bench: 257,858 ns/iter (+/- 646) = 387 MB/s +test misc::long_needle2 ... bench: 259,045 ns/iter (+/- 2,220) = 386 MB/s +test misc::match_class ... bench: 34 ns/iter (+/- 1) = 2382 MB/s +test misc::match_class_in_range ... bench: 9 ns/iter (+/- 0) = 9000 MB/s +test misc::match_class_unicode ... bench: 125 ns/iter (+/- 3) = 1288 MB/s +test misc::medium_1K ... bench: 35 ns/iter (+/- 3) = 30057 MB/s +test misc::medium_1MB ... bench: 21,126 ns/iter (+/- 4,036) = 49635 MB/s +test misc::medium_32 ... bench: 10 ns/iter (+/- 0) = 6000 MB/s +test misc::medium_32K ... bench: 714 ns/iter (+/- 122) = 45932 MB/s +test misc::not_literal ... bench: 62 ns/iter (+/- 2) = 822 MB/s +test misc::one_pass_long_prefix ... bench: 8 ns/iter (+/- 0) = 3250 MB/s +test misc::one_pass_long_prefix_not ... bench: 8 ns/iter (+/- 0) = 3250 MB/s +test misc::one_pass_short ... bench: 19 ns/iter (+/- 1) = 894 MB/s +test misc::one_pass_short_not ... bench: 19 ns/iter (+/- 1) = 894 MB/s +test misc::reallyhard2_1K ... bench: 1,704 ns/iter (+/- 17) = 610 MB/s +test misc::reallyhard_1K ... bench: 495 ns/iter (+/- 9) = 2123 MB/s +test misc::reallyhard_1MB ... bench: 682,371 ns/iter (+/- 31,284) = 1536 MB/s +test misc::reallyhard_32 ... bench: 34 ns/iter (+/- 2) = 1735 MB/s +test misc::reallyhard_32K ... bench: 17,994 ns/iter (+/- 540) = 1822 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 1,507 ns/iter (+/- 10) = 5308 MB/s +test regexdna::find_new_lines ... bench: 849,983 ns/iter (+/- 25,898) = 5980 MB/s +test regexdna::subst1 ... bench: 520,602 ns/iter (+/- 12,170) = 9764 MB/s +test regexdna::subst10 ... bench: 517,151 ns/iter (+/- 19,060) = 9829 MB/s +test regexdna::subst11 ... bench: 519,209 ns/iter (+/- 12,477) = 9790 MB/s +test regexdna::subst2 ... bench: 513,418 ns/iter (+/- 19,803) = 9901 MB/s +test regexdna::subst3 ... bench: 514,166 ns/iter (+/- 13,019) = 9886 MB/s +test regexdna::subst4 ... bench: 517,808 ns/iter (+/- 30,655) = 9817 MB/s +test regexdna::subst5 ... bench: 516,922 ns/iter (+/- 17,204) = 9834 MB/s +test regexdna::subst6 ... bench: 509,430 ns/iter (+/- 20,608) = 9978 MB/s +test regexdna::subst7 ... bench: 519,437 ns/iter (+/- 10,537) = 9786 MB/s +test regexdna::subst8 ... bench: 520,282 ns/iter (+/- 25,742) = 9770 MB/s +test regexdna::subst9 ... bench: 512,819 ns/iter (+/- 11,443) = 9912 MB/s +test regexdna::variant1 ... bench: 5,302,526 ns/iter (+/- 158,370) = 958 MB/s +test regexdna::variant2 ... bench: 7,421,107 ns/iter (+/- 105,716) = 684 MB/s +test regexdna::variant3 ... bench: 7,310,968 ns/iter (+/- 103,989) = 695 MB/s +test regexdna::variant4 ... bench: 6,152,891 ns/iter (+/- 144,194) = 826 MB/s +test regexdna::variant5 ... bench: 5,717,515 ns/iter (+/- 42,902) = 889 MB/s +test regexdna::variant6 ... bench: 5,840,938 ns/iter (+/- 47,730) = 870 MB/s +test regexdna::variant7 ... bench: 6,624,859 ns/iter (+/- 37,376) = 767 MB/s +test regexdna::variant8 ... bench: 7,308,342 ns/iter (+/- 58,395) = 695 MB/s +test regexdna::variant9 ... bench: 7,372,260 ns/iter (+/- 76,966) = 689 MB/s +test sherlock::before_after_holmes ... bench: 2,817,108 ns/iter (+/- 18,002) = 211 MB/s +test sherlock::before_holmes ... bench: 2,841,515 ns/iter (+/- 14,677) = 209 MB/s +test sherlock::holmes_cochar_watson ... bench: 33,066 ns/iter (+/- 1,766) = 17992 MB/s +test sherlock::ing_suffix ... bench: 1,299,382 ns/iter (+/- 19,674) = 457 MB/s +test sherlock::ing_suffix_limited_space ... bench: 2,799,189 ns/iter (+/- 33,841) = 212 MB/s +test sherlock::letters ... bench: 4,923,399 ns/iter (+/- 111,904) = 120 MB/s +test sherlock::letters_lower ... bench: 5,057,224 ns/iter (+/- 102,860) = 117 MB/s +test sherlock::letters_upper ... bench: 874,306 ns/iter (+/- 10,587) = 680 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 15,876 ns/iter (+/- 58) = 37473 MB/s +test sherlock::name_alt1 ... bench: 19,349 ns/iter (+/- 201) = 30747 MB/s +test sherlock::name_alt2 ... bench: 29,916 ns/iter (+/- 581) = 19886 MB/s +test sherlock::name_alt3 ... bench: 461,887 ns/iter (+/- 5,337) = 1288 MB/s +test sherlock::name_alt3_nocase ... bench: 1,813,574 ns/iter (+/- 27,519) = 328 MB/s +test sherlock::name_alt4 ... bench: 30,155 ns/iter (+/- 1,407) = 19729 MB/s +test sherlock::name_alt4_nocase ... bench: 822,605 ns/iter (+/- 56,624) = 723 MB/s +test sherlock::name_alt5 ... bench: 426,318 ns/iter (+/- 12,233) = 1395 MB/s +test sherlock::name_alt5_nocase ... bench: 1,012,097 ns/iter (+/- 27,806) = 587 MB/s +test sherlock::name_holmes ... bench: 19,833 ns/iter (+/- 499) = 29997 MB/s +test sherlock::name_holmes_nocase ... bench: 40,266 ns/iter (+/- 2,089) = 14775 MB/s +test sherlock::name_sherlock ... bench: 14,589 ns/iter (+/- 115) = 40779 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,213 ns/iter (+/- 81) = 41858 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 602,296 ns/iter (+/- 98,066) = 987 MB/s +test sherlock::name_sherlock_nocase ... bench: 479,745 ns/iter (+/- 18,070) = 1240 MB/s +test sherlock::name_whitespace ... bench: 14,584 ns/iter (+/- 44) = 40793 MB/s +test sherlock::no_match_common ... bench: 13,499 ns/iter (+/- 1,090) = 44072 MB/s +test sherlock::no_match_really_common ... bench: 12,507 ns/iter (+/- 1,238) = 47568 MB/s +test sherlock::no_match_uncommon ... bench: 11,534 ns/iter (+/- 9) = 51580 MB/s +test sherlock::quotes ... bench: 251,867 ns/iter (+/- 11,818) = 2362 MB/s +test sherlock::repeated_class_negation ... bench: 2,969,330 ns/iter (+/- 287,150) = 200 MB/s +test sherlock::the_lower ... bench: 206,513 ns/iter (+/- 3,294) = 2880 MB/s +test sherlock::the_nocase ... bench: 237,655 ns/iter (+/- 6,616) = 2503 MB/s +test sherlock::the_upper ... bench: 23,922 ns/iter (+/- 510) = 24869 MB/s +test sherlock::the_whitespace ... bench: 326,257 ns/iter (+/- 10,038) = 1823 MB/s +test sherlock::word_ending_n ... bench: 3,264,085 ns/iter (+/- 57,242) = 182 MB/s +test sherlock::words ... bench: 3,161,731 ns/iter (+/- 45,794) = 188 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 93 measured; 0 filtered out; finished in 184.16s + diff --git a/vendor/regex/record/old-bench-log/09-new-baseline/re2 b/vendor/regex/record/old-bench-log/09-new-baseline/re2 new file mode 100644 index 0000000..9bae2a1 --- /dev/null +++ b/vendor/regex/record/old-bench-log/09-new-baseline/re2 @@ -0,0 +1,101 @@ + +running 96 tests +test misc::anchored_literal_long_match ... bench: 26 ns/iter (+/- 0) = 15000 MB/s +test misc::anchored_literal_long_non_match ... bench: 4 ns/iter (+/- 0) = 97500 MB/s +test misc::anchored_literal_short_match ... bench: 26 ns/iter (+/- 0) = 1000 MB/s +test misc::anchored_literal_short_non_match ... bench: 4 ns/iter (+/- 0) = 6500 MB/s +test misc::easy0_1K ... bench: 50 ns/iter (+/- 0) = 21020 MB/s +test misc::easy0_1MB ... bench: 51 ns/iter (+/- 0) = 20560843 MB/s +test misc::easy0_32 ... bench: 50 ns/iter (+/- 0) = 1180 MB/s +test misc::easy0_32K ... bench: 50 ns/iter (+/- 0) = 655900 MB/s +test misc::easy1_1K ... bench: 43 ns/iter (+/- 1) = 24279 MB/s +test misc::easy1_1MB ... bench: 43 ns/iter (+/- 0) = 24385953 MB/s +test misc::easy1_32 ... bench: 43 ns/iter (+/- 1) = 1209 MB/s +test misc::easy1_32K ... bench: 43 ns/iter (+/- 0) = 762511 MB/s +test misc::hard_1K ... bench: 50 ns/iter (+/- 0) = 21020 MB/s +test misc::hard_1MB ... bench: 50 ns/iter (+/- 0) = 20972060 MB/s +test misc::hard_32 ... bench: 50 ns/iter (+/- 0) = 1180 MB/s +test misc::hard_32K ... bench: 50 ns/iter (+/- 0) = 655900 MB/s +test misc::literal ... bench: 25 ns/iter (+/- 0) = 2040 MB/s +test misc::long_needle1 ... bench: 356,319 ns/iter (+/- 680) = 280 MB/s +test misc::long_needle2 ... bench: 356,384 ns/iter (+/- 3,126) = 280 MB/s +test misc::match_class ... bench: 94 ns/iter (+/- 0) = 861 MB/s +test misc::match_class_in_range ... bench: 94 ns/iter (+/- 0) = 861 MB/s +test misc::match_class_unicode ... bench: 168 ns/iter (+/- 1) = 958 MB/s +test misc::medium_1K ... bench: 51 ns/iter (+/- 0) = 20627 MB/s +test misc::medium_1MB ... bench: 51 ns/iter (+/- 0) = 20560862 MB/s +test misc::medium_32 ... bench: 51 ns/iter (+/- 0) = 1176 MB/s +test misc::medium_32K ... bench: 51 ns/iter (+/- 1) = 643058 MB/s +test misc::no_exponential ... bench: 112 ns/iter (+/- 0) = 892 MB/s +test misc::not_literal ... bench: 66 ns/iter (+/- 0) = 772 MB/s +test misc::one_pass_long_prefix ... bench: 25 ns/iter (+/- 0) = 1040 MB/s +test misc::one_pass_long_prefix_not ... bench: 44 ns/iter (+/- 0) = 590 MB/s +test misc::one_pass_short ... bench: 43 ns/iter (+/- 0) = 395 MB/s +test misc::one_pass_short_not ... bench: 41 ns/iter (+/- 0) = 414 MB/s +test misc::reallyhard2_1K ... bench: 978 ns/iter (+/- 7) = 1063 MB/s +test misc::reallyhard_1K ... bench: 987 ns/iter (+/- 11) = 1064 MB/s +test misc::reallyhard_1MB ... bench: 957,501 ns/iter (+/- 8,247) = 1095 MB/s +test misc::reallyhard_32 ... bench: 73 ns/iter (+/- 0) = 808 MB/s +test misc::reallyhard_32K ... bench: 30,057 ns/iter (+/- 315) = 1091 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 7,368 ns/iter (+/- 105) = 1085 MB/s +test regexdna::find_new_lines ... bench: 15,567,882 ns/iter (+/- 48,213) = 326 MB/s +test regexdna::subst1 ... bench: 2,011,288 ns/iter (+/- 23,092) = 2527 MB/s +test regexdna::subst10 ... bench: 2,013,337 ns/iter (+/- 33,388) = 2524 MB/s +test regexdna::subst11 ... bench: 2,005,968 ns/iter (+/- 25,799) = 2534 MB/s +test regexdna::subst2 ... bench: 2,022,572 ns/iter (+/- 23,311) = 2513 MB/s +test regexdna::subst3 ... bench: 2,018,386 ns/iter (+/- 32,071) = 2518 MB/s +test regexdna::subst4 ... bench: 2,013,345 ns/iter (+/- 32,599) = 2524 MB/s +test regexdna::subst5 ... bench: 2,015,871 ns/iter (+/- 25,081) = 2521 MB/s +test regexdna::subst6 ... bench: 2,008,492 ns/iter (+/- 24,502) = 2530 MB/s +test regexdna::subst7 ... bench: 2,018,804 ns/iter (+/- 38,700) = 2518 MB/s +test regexdna::subst8 ... bench: 2,010,856 ns/iter (+/- 23,695) = 2527 MB/s +test regexdna::subst9 ... bench: 2,023,767 ns/iter (+/- 17,040) = 2511 MB/s +test regexdna::variant1 ... bench: 4,688,839 ns/iter (+/- 19,258) = 1084 MB/s +test regexdna::variant2 ... bench: 4,693,463 ns/iter (+/- 31,741) = 1083 MB/s +test regexdna::variant3 ... bench: 4,674,020 ns/iter (+/- 15,755) = 1087 MB/s +test regexdna::variant4 ... bench: 4,666,017 ns/iter (+/- 16,318) = 1089 MB/s +test regexdna::variant5 ... bench: 4,682,965 ns/iter (+/- 17,552) = 1085 MB/s +test regexdna::variant6 ... bench: 4,661,825 ns/iter (+/- 21,667) = 1090 MB/s +test regexdna::variant7 ... bench: 4,697,959 ns/iter (+/- 24,282) = 1082 MB/s +test regexdna::variant8 ... bench: 4,700,703 ns/iter (+/- 21,377) = 1081 MB/s +test regexdna::variant9 ... bench: 4,665,298 ns/iter (+/- 19,086) = 1089 MB/s +test sherlock::before_after_holmes ... bench: 560,350 ns/iter (+/- 3,852) = 1061 MB/s +test sherlock::before_holmes ... bench: 574,423 ns/iter (+/- 4,638) = 1035 MB/s +test sherlock::everything_greedy ... bench: 2,688,852 ns/iter (+/- 16,320) = 221 MB/s +test sherlock::everything_greedy_nl ... bench: 1,206,136 ns/iter (+/- 6,173) = 493 MB/s +test sherlock::holmes_cochar_watson ... bench: 547,910 ns/iter (+/- 7,147) = 1085 MB/s +test sherlock::holmes_coword_watson ... bench: 610,803 ns/iter (+/- 1,029) = 974 MB/s +test sherlock::ing_suffix ... bench: 777,478 ns/iter (+/- 3,028) = 765 MB/s +test sherlock::ing_suffix_limited_space ... bench: 725,653 ns/iter (+/- 4,746) = 819 MB/s +test sherlock::letters ... bench: 25,265,004 ns/iter (+/- 120,234) = 23 MB/s +test sherlock::letters_lower ... bench: 24,615,621 ns/iter (+/- 134,875) = 24 MB/s +test sherlock::letters_upper ... bench: 1,485,920 ns/iter (+/- 21,446) = 400 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 547,229 ns/iter (+/- 3,726) = 1087 MB/s +test sherlock::name_alt1 ... bench: 18,148 ns/iter (+/- 131) = 32782 MB/s +test sherlock::name_alt2 ... bench: 586,335 ns/iter (+/- 3,679) = 1014 MB/s +test sherlock::name_alt3 ... bench: 601,096 ns/iter (+/- 3,781) = 989 MB/s +test sherlock::name_alt3_nocase ... bench: 602,319 ns/iter (+/- 7,872) = 987 MB/s +test sherlock::name_alt4 ... bench: 586,762 ns/iter (+/- 3,465) = 1013 MB/s +test sherlock::name_alt4_nocase ... bench: 595,539 ns/iter (+/- 3,240) = 998 MB/s +test sherlock::name_alt5 ... bench: 592,474 ns/iter (+/- 6,361) = 1004 MB/s +test sherlock::name_alt5_nocase ... bench: 593,214 ns/iter (+/- 4,667) = 1002 MB/s +test sherlock::name_holmes ... bench: 40,236 ns/iter (+/- 514) = 14786 MB/s +test sherlock::name_holmes_nocase ... bench: 215,216 ns/iter (+/- 4,822) = 2764 MB/s +test sherlock::name_sherlock ... bench: 14,064 ns/iter (+/- 159) = 42301 MB/s +test sherlock::name_sherlock_holmes ... bench: 15,727 ns/iter (+/- 166) = 37828 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 552,042 ns/iter (+/- 6,395) = 1077 MB/s +test sherlock::name_sherlock_nocase ... bench: 552,475 ns/iter (+/- 5,365) = 1076 MB/s +test sherlock::name_whitespace ... bench: 16,210 ns/iter (+/- 194) = 36701 MB/s +test sherlock::no_match_common ... bench: 147,489 ns/iter (+/- 602) = 4033 MB/s +test sherlock::no_match_really_common ... bench: 157,205 ns/iter (+/- 350) = 3784 MB/s +test sherlock::no_match_uncommon ... bench: 4,849 ns/iter (+/- 5) = 122691 MB/s +test sherlock::quotes ... bench: 619,880 ns/iter (+/- 5,189) = 959 MB/s +test sherlock::the_lower ... bench: 685,396 ns/iter (+/- 12,559) = 868 MB/s +test sherlock::the_nocase ... bench: 771,051 ns/iter (+/- 18,470) = 771 MB/s +test sherlock::the_upper ... bench: 59,139 ns/iter (+/- 1,604) = 10059 MB/s +test sherlock::the_whitespace ... bench: 736,147 ns/iter (+/- 7,668) = 808 MB/s +test sherlock::word_ending_n ... bench: 1,200,401 ns/iter (+/- 11,206) = 495 MB/s +test sherlock::words ... bench: 8,024,768 ns/iter (+/- 93,051) = 74 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 96 measured; 0 filtered out; finished in 86.80s + diff --git a/vendor/regex/record/old-bench-log/09-new-baseline/rust b/vendor/regex/record/old-bench-log/09-new-baseline/rust new file mode 100644 index 0000000..30924d4 --- /dev/null +++ b/vendor/regex/record/old-bench-log/09-new-baseline/rust @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 7 ns/iter (+/- 0) = 55714 MB/s +test misc::anchored_literal_long_non_match ... bench: 10 ns/iter (+/- 0) = 39000 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 10 ns/iter (+/- 0) = 2600 MB/s +test misc::easy0_1K ... bench: 8 ns/iter (+/- 0) = 131375 MB/s +test misc::easy0_1MB ... bench: 12 ns/iter (+/- 0) = 87383583 MB/s +test misc::easy0_32 ... bench: 8 ns/iter (+/- 0) = 7375 MB/s +test misc::easy0_32K ... bench: 8 ns/iter (+/- 0) = 4099375 MB/s +test misc::easy1_1K ... bench: 25 ns/iter (+/- 0) = 41760 MB/s +test misc::easy1_1MB ... bench: 26 ns/iter (+/- 0) = 40330615 MB/s +test misc::easy1_32 ... bench: 25 ns/iter (+/- 0) = 2080 MB/s +test misc::easy1_32K ... bench: 26 ns/iter (+/- 0) = 1261076 MB/s +test misc::hard_1K ... bench: 33 ns/iter (+/- 0) = 31848 MB/s +test misc::hard_1MB ... bench: 33 ns/iter (+/- 0) = 31775848 MB/s +test misc::hard_32 ... bench: 34 ns/iter (+/- 0) = 1735 MB/s +test misc::hard_32K ... bench: 33 ns/iter (+/- 0) = 993787 MB/s +test misc::is_match_set ... bench: 35 ns/iter (+/- 0) = 714 MB/s +test misc::literal ... bench: 7 ns/iter (+/- 0) = 7285 MB/s +test misc::long_needle1 ... bench: 1,517 ns/iter (+/- 25) = 65920 MB/s +test misc::long_needle2 ... bench: 186,131 ns/iter (+/- 1,191) = 537 MB/s +test misc::match_class ... bench: 37 ns/iter (+/- 0) = 2189 MB/s +test misc::match_class_in_range ... bench: 7 ns/iter (+/- 0) = 11571 MB/s +test misc::match_class_unicode ... bench: 160 ns/iter (+/- 1) = 1006 MB/s +test misc::matches_set ... bench: 200 ns/iter (+/- 4) = 125 MB/s +test misc::medium_1K ... bench: 8 ns/iter (+/- 0) = 131500 MB/s +test misc::medium_1MB ... bench: 12 ns/iter (+/- 0) = 87383666 MB/s +test misc::medium_32 ... bench: 8 ns/iter (+/- 0) = 7500 MB/s +test misc::medium_32K ... bench: 8 ns/iter (+/- 0) = 4099500 MB/s +test misc::no_exponential ... bench: 262 ns/iter (+/- 6) = 381 MB/s +test misc::not_literal ... bench: 43 ns/iter (+/- 1) = 1186 MB/s +test misc::one_pass_long_prefix ... bench: 23 ns/iter (+/- 1) = 1130 MB/s +test misc::one_pass_long_prefix_not ... bench: 23 ns/iter (+/- 1) = 1130 MB/s +test misc::one_pass_short ... bench: 16 ns/iter (+/- 0) = 1062 MB/s +test misc::one_pass_short_not ... bench: 18 ns/iter (+/- 0) = 944 MB/s +test misc::reallyhard2_1K ... bench: 36 ns/iter (+/- 1) = 28888 MB/s +test misc::reallyhard_1K ... bench: 1,155 ns/iter (+/- 11) = 909 MB/s +test misc::reallyhard_1MB ... bench: 1,152,983 ns/iter (+/- 6,607) = 909 MB/s +test misc::reallyhard_32 ... bench: 52 ns/iter (+/- 2) = 1134 MB/s +test misc::reallyhard_32K ... bench: 36,194 ns/iter (+/- 327) = 906 MB/s +test misc::replace_all ... bench: 81 ns/iter (+/- 5) +test misc::reverse_suffix_no_quadratic ... bench: 2,269 ns/iter (+/- 3) = 3525 MB/s +test misc::short_haystack_1000000x ... bench: 63,956 ns/iter (+/- 209) = 125086 MB/s +test misc::short_haystack_100000x ... bench: 5,877 ns/iter (+/- 66) = 136125 MB/s +test misc::short_haystack_10000x ... bench: 2,414 ns/iter (+/- 10) = 33144 MB/s +test misc::short_haystack_1000x ... bench: 195 ns/iter (+/- 11) = 41082 MB/s +test misc::short_haystack_100x ... bench: 96 ns/iter (+/- 7) = 8447 MB/s +test misc::short_haystack_10x ... bench: 85 ns/iter (+/- 8) = 1070 MB/s +test misc::short_haystack_1x ... bench: 85 ns/iter (+/- 6) = 223 MB/s +test misc::short_haystack_2x ... bench: 86 ns/iter (+/- 12) = 313 MB/s +test misc::short_haystack_3x ... bench: 85 ns/iter (+/- 22) = 411 MB/s +test misc::short_haystack_4x ... bench: 85 ns/iter (+/- 12) = 505 MB/s +test regexdna::find_new_lines ... bench: 6,977,678 ns/iter (+/- 90,937) = 728 MB/s +test regexdna::subst1 ... bench: 423,846 ns/iter (+/- 41,460) = 11993 MB/s +test regexdna::subst10 ... bench: 424,043 ns/iter (+/- 55,743) = 11987 MB/s +test regexdna::subst11 ... bench: 418,549 ns/iter (+/- 12,106) = 12145 MB/s +test regexdna::subst2 ... bench: 430,056 ns/iter (+/- 8,862) = 11820 MB/s +test regexdna::subst3 ... bench: 429,634 ns/iter (+/- 26,807) = 11831 MB/s +test regexdna::subst4 ... bench: 419,313 ns/iter (+/- 42,070) = 12123 MB/s +test regexdna::subst5 ... bench: 425,299 ns/iter (+/- 43,161) = 11952 MB/s +test regexdna::subst6 ... bench: 420,177 ns/iter (+/- 49,394) = 12098 MB/s +test regexdna::subst7 ... bench: 425,118 ns/iter (+/- 46,952) = 11957 MB/s +test regexdna::subst8 ... bench: 420,840 ns/iter (+/- 11,623) = 12079 MB/s +test regexdna::subst9 ... bench: 420,752 ns/iter (+/- 10,186) = 12081 MB/s +test regexdna::variant1 ... bench: 1,445,103 ns/iter (+/- 29,436) = 3517 MB/s +test regexdna::variant2 ... bench: 2,234,423 ns/iter (+/- 24,502) = 2275 MB/s +test regexdna::variant3 ... bench: 2,730,972 ns/iter (+/- 26,961) = 1861 MB/s +test regexdna::variant4 ... bench: 2,708,975 ns/iter (+/- 36,517) = 1876 MB/s +test regexdna::variant5 ... bench: 1,663,458 ns/iter (+/- 39,508) = 3055 MB/s +test regexdna::variant6 ... bench: 1,673,873 ns/iter (+/- 14,846) = 3036 MB/s +test regexdna::variant7 ... bench: 2,322,347 ns/iter (+/- 33,731) = 2188 MB/s +test regexdna::variant8 ... bench: 2,350,779 ns/iter (+/- 54,976) = 2162 MB/s +test regexdna::variant9 ... bench: 2,326,741 ns/iter (+/- 20,836) = 2184 MB/s +test rust_compile::compile_huge ... bench: 47,700 ns/iter (+/- 230) +test rust_compile::compile_huge_bytes ... bench: 2,987,898 ns/iter (+/- 32,819) +test rust_compile::compile_huge_full ... bench: 5,705,551 ns/iter (+/- 63,483) +test rust_compile::compile_simple ... bench: 1,963 ns/iter (+/- 44) +test rust_compile::compile_simple_bytes ... bench: 1,970 ns/iter (+/- 32) +test rust_compile::compile_simple_full ... bench: 9,677 ns/iter (+/- 69) +test rust_compile::compile_small ... bench: 4,501 ns/iter (+/- 70) +test rust_compile::compile_small_bytes ... bench: 75,372 ns/iter (+/- 2,007) +test rust_compile::compile_small_full ... bench: 151,733 ns/iter (+/- 2,378) +test sherlock::before_after_holmes ... bench: 655,827 ns/iter (+/- 1,426) = 907 MB/s +test sherlock::before_holmes ... bench: 24,653 ns/iter (+/- 224) = 24132 MB/s +test sherlock::everything_greedy ... bench: 1,026,254 ns/iter (+/- 27,926) = 579 MB/s +test sherlock::everything_greedy_nl ... bench: 469,676 ns/iter (+/- 62,296) = 1266 MB/s +test sherlock::holmes_cochar_watson ... bench: 47,578 ns/iter (+/- 1,730) = 12504 MB/s +test sherlock::holmes_coword_watson ... bench: 321,318 ns/iter (+/- 3,235) = 1851 MB/s +test sherlock::ing_suffix ... bench: 150,908 ns/iter (+/- 3,952) = 3942 MB/s +test sherlock::ing_suffix_limited_space ... bench: 726,848 ns/iter (+/- 5,314) = 818 MB/s +test sherlock::letters ... bench: 9,719,997 ns/iter (+/- 67,717) = 61 MB/s +test sherlock::letters_lower ... bench: 9,559,105 ns/iter (+/- 79,257) = 62 MB/s +test sherlock::letters_upper ... bench: 1,066,791 ns/iter (+/- 13,193) = 557 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 653,228 ns/iter (+/- 881) = 910 MB/s +test sherlock::name_alt1 ... bench: 10,663 ns/iter (+/- 76) = 55794 MB/s +test sherlock::name_alt2 ... bench: 33,831 ns/iter (+/- 967) = 17585 MB/s +test sherlock::name_alt3 ... bench: 38,061 ns/iter (+/- 1,123) = 15631 MB/s +test sherlock::name_alt3_nocase ... bench: 218,691 ns/iter (+/- 2,345) = 2720 MB/s +test sherlock::name_alt4 ... bench: 52,408 ns/iter (+/- 1,315) = 11351 MB/s +test sherlock::name_alt4_nocase ... bench: 84,212 ns/iter (+/- 2,708) = 7064 MB/s +test sherlock::name_alt5 ... bench: 35,272 ns/iter (+/- 1,784) = 16867 MB/s +test sherlock::name_alt5_nocase ... bench: 193,585 ns/iter (+/- 5,057) = 3073 MB/s +test sherlock::name_holmes ... bench: 15,018 ns/iter (+/- 440) = 39614 MB/s +test sherlock::name_holmes_nocase ... bench: 60,207 ns/iter (+/- 1,046) = 9881 MB/s +test sherlock::name_sherlock ... bench: 10,344 ns/iter (+/- 52) = 57514 MB/s +test sherlock::name_sherlock_holmes ... bench: 10,374 ns/iter (+/- 98) = 57348 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 42,037 ns/iter (+/- 1,363) = 14152 MB/s +test sherlock::name_sherlock_nocase ... bench: 41,192 ns/iter (+/- 881) = 14442 MB/s +test sherlock::name_whitespace ... bench: 13,893 ns/iter (+/- 55) = 42822 MB/s +test sherlock::no_match_common ... bench: 8,700 ns/iter (+/- 10) = 68383 MB/s +test sherlock::no_match_really_common ... bench: 10,368 ns/iter (+/- 123) = 57381 MB/s +test sherlock::no_match_uncommon ... bench: 8,695 ns/iter (+/- 7) = 68422 MB/s +test sherlock::quotes ... bench: 222,526 ns/iter (+/- 5,362) = 2673 MB/s +test sherlock::repeated_class_negation ... bench: 35,869,193 ns/iter (+/- 551,212) = 16 MB/s +test sherlock::the_lower ... bench: 187,208 ns/iter (+/- 4,374) = 3177 MB/s +test sherlock::the_nocase ... bench: 280,625 ns/iter (+/- 10,142) = 2120 MB/s +test sherlock::the_upper ... bench: 19,742 ns/iter (+/- 692) = 30135 MB/s +test sherlock::the_whitespace ... bench: 396,099 ns/iter (+/- 10,400) = 1501 MB/s +test sherlock::word_ending_n ... bench: 1,055,639 ns/iter (+/- 6,627) = 563 MB/s +test sherlock::words ... bench: 4,280,471 ns/iter (+/- 53,841) = 138 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 141.25s + diff --git a/vendor/regex/record/old-bench-log/09-new-baseline/rust-bytes b/vendor/regex/record/old-bench-log/09-new-baseline/rust-bytes new file mode 100644 index 0000000..ff08ed1 --- /dev/null +++ b/vendor/regex/record/old-bench-log/09-new-baseline/rust-bytes @@ -0,0 +1,112 @@ + +running 107 tests +test misc::anchored_literal_long_match ... bench: 7 ns/iter (+/- 0) = 55714 MB/s +test misc::anchored_literal_long_non_match ... bench: 8 ns/iter (+/- 0) = 48750 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 9 ns/iter (+/- 0) = 2888 MB/s +test misc::easy0_1K ... bench: 7 ns/iter (+/- 0) = 150142 MB/s +test misc::easy0_1MB ... bench: 11 ns/iter (+/- 1) = 95327545 MB/s +test misc::easy0_32 ... bench: 7 ns/iter (+/- 0) = 8428 MB/s +test misc::easy0_32K ... bench: 7 ns/iter (+/- 0) = 4685000 MB/s +test misc::easy1_1K ... bench: 17 ns/iter (+/- 0) = 61411 MB/s +test misc::easy1_1MB ... bench: 20 ns/iter (+/- 0) = 52429800 MB/s +test misc::easy1_32 ... bench: 17 ns/iter (+/- 0) = 3058 MB/s +test misc::easy1_32K ... bench: 17 ns/iter (+/- 1) = 1928705 MB/s +test misc::hard_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::hard_1MB ... bench: 27 ns/iter (+/- 0) = 38837148 MB/s +test misc::hard_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::hard_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::is_match_set ... bench: 35 ns/iter (+/- 0) = 714 MB/s +test misc::literal ... bench: 7 ns/iter (+/- 0) = 7285 MB/s +test misc::long_needle1 ... bench: 1,325 ns/iter (+/- 18) = 75472 MB/s +test misc::long_needle2 ... bench: 186,021 ns/iter (+/- 1,157) = 537 MB/s +test misc::match_class ... bench: 38 ns/iter (+/- 3) = 2131 MB/s +test misc::match_class_in_range ... bench: 7 ns/iter (+/- 0) = 11571 MB/s +test misc::matches_set ... bench: 172 ns/iter (+/- 4) = 145 MB/s +test misc::medium_1K ... bench: 7 ns/iter (+/- 0) = 150285 MB/s +test misc::medium_1MB ... bench: 12 ns/iter (+/- 0) = 87383666 MB/s +test misc::medium_32 ... bench: 8 ns/iter (+/- 0) = 7500 MB/s +test misc::medium_32K ... bench: 7 ns/iter (+/- 0) = 4685142 MB/s +test misc::no_exponential ... bench: 272 ns/iter (+/- 10) = 367 MB/s +test misc::not_literal ... bench: 42 ns/iter (+/- 1) = 1214 MB/s +test misc::one_pass_long_prefix ... bench: 23 ns/iter (+/- 1) = 1130 MB/s +test misc::one_pass_long_prefix_not ... bench: 22 ns/iter (+/- 0) = 1181 MB/s +test misc::one_pass_short ... bench: 15 ns/iter (+/- 0) = 1133 MB/s +test misc::one_pass_short_not ... bench: 18 ns/iter (+/- 0) = 944 MB/s +test misc::reallyhard2_1K ... bench: 36 ns/iter (+/- 0) = 28888 MB/s +test misc::reallyhard_1K ... bench: 1,152 ns/iter (+/- 14) = 912 MB/s +test misc::reallyhard_1MB ... bench: 1,155,496 ns/iter (+/- 7,722) = 907 MB/s +test misc::reallyhard_32 ... bench: 51 ns/iter (+/- 1) = 1156 MB/s +test misc::reallyhard_32K ... bench: 36,202 ns/iter (+/- 167) = 905 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 2,269 ns/iter (+/- 2) = 3525 MB/s +test regexdna::find_new_lines ... bench: 6,982,142 ns/iter (+/- 70,086) = 728 MB/s +test regexdna::subst1 ... bench: 425,753 ns/iter (+/- 15,075) = 11939 MB/s +test regexdna::subst10 ... bench: 431,401 ns/iter (+/- 19,346) = 11783 MB/s +test regexdna::subst11 ... bench: 427,131 ns/iter (+/- 38,166) = 11901 MB/s +test regexdna::subst2 ... bench: 423,284 ns/iter (+/- 9,016) = 12009 MB/s +test regexdna::subst3 ... bench: 425,850 ns/iter (+/- 7,324) = 11937 MB/s +test regexdna::subst4 ... bench: 426,013 ns/iter (+/- 6,922) = 11932 MB/s +test regexdna::subst5 ... bench: 426,029 ns/iter (+/- 8,697) = 11932 MB/s +test regexdna::subst6 ... bench: 427,781 ns/iter (+/- 8,166) = 11883 MB/s +test regexdna::subst7 ... bench: 426,589 ns/iter (+/- 13,274) = 11916 MB/s +test regexdna::subst8 ... bench: 424,152 ns/iter (+/- 14,879) = 11984 MB/s +test regexdna::subst9 ... bench: 428,066 ns/iter (+/- 8,773) = 11875 MB/s +test regexdna::variant1 ... bench: 1,446,630 ns/iter (+/- 53,195) = 3513 MB/s +test regexdna::variant2 ... bench: 2,241,934 ns/iter (+/- 42,563) = 2267 MB/s +test regexdna::variant3 ... bench: 2,741,736 ns/iter (+/- 28,424) = 1854 MB/s +test regexdna::variant4 ... bench: 2,725,768 ns/iter (+/- 37,801) = 1864 MB/s +test regexdna::variant5 ... bench: 1,686,366 ns/iter (+/- 25,054) = 3014 MB/s +test regexdna::variant6 ... bench: 1,689,225 ns/iter (+/- 24,479) = 3009 MB/s +test regexdna::variant7 ... bench: 2,343,567 ns/iter (+/- 34,646) = 2169 MB/s +test regexdna::variant8 ... bench: 2,363,133 ns/iter (+/- 69,696) = 2151 MB/s +test regexdna::variant9 ... bench: 2,337,512 ns/iter (+/- 32,958) = 2174 MB/s +test rust_compile::compile_huge ... bench: 53,055 ns/iter (+/- 88) +test rust_compile::compile_huge_bytes ... bench: 2,979,724 ns/iter (+/- 43,904) +test rust_compile::compile_huge_full ... bench: 5,825,193 ns/iter (+/- 61,322) +test rust_compile::compile_simple ... bench: 1,927 ns/iter (+/- 39) +test rust_compile::compile_simple_bytes ... bench: 1,924 ns/iter (+/- 29) +test rust_compile::compile_simple_full ... bench: 9,830 ns/iter (+/- 108) +test rust_compile::compile_small ... bench: 4,569 ns/iter (+/- 70) +test rust_compile::compile_small_bytes ... bench: 74,875 ns/iter (+/- 1,337) +test rust_compile::compile_small_full ... bench: 151,485 ns/iter (+/- 3,063) +test sherlock::before_after_holmes ... bench: 655,632 ns/iter (+/- 801) = 907 MB/s +test sherlock::before_holmes ... bench: 24,576 ns/iter (+/- 307) = 24207 MB/s +test sherlock::everything_greedy ... bench: 1,026,410 ns/iter (+/- 57,265) = 579 MB/s +test sherlock::everything_greedy_nl ... bench: 424,490 ns/iter (+/- 7,188) = 1401 MB/s +test sherlock::holmes_cochar_watson ... bench: 46,935 ns/iter (+/- 1,007) = 12675 MB/s +test sherlock::holmes_coword_watson ... bench: 322,497 ns/iter (+/- 3,680) = 1844 MB/s +test sherlock::ing_suffix ... bench: 149,923 ns/iter (+/- 2,936) = 3968 MB/s +test sherlock::ing_suffix_limited_space ... bench: 732,021 ns/iter (+/- 10,242) = 812 MB/s +test sherlock::letters ... bench: 9,716,641 ns/iter (+/- 56,270) = 61 MB/s +test sherlock::letters_lower ... bench: 9,541,922 ns/iter (+/- 63,715) = 62 MB/s +test sherlock::letters_upper ... bench: 1,070,240 ns/iter (+/- 10,505) = 555 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 652,312 ns/iter (+/- 546) = 912 MB/s +test sherlock::name_alt1 ... bench: 10,832 ns/iter (+/- 499) = 54923 MB/s +test sherlock::name_alt2 ... bench: 33,528 ns/iter (+/- 484) = 17744 MB/s +test sherlock::name_alt3 ... bench: 37,352 ns/iter (+/- 1,173) = 15927 MB/s +test sherlock::name_alt3_nocase ... bench: 217,570 ns/iter (+/- 3,401) = 2734 MB/s +test sherlock::name_alt4 ... bench: 52,711 ns/iter (+/- 1,257) = 11286 MB/s +test sherlock::name_alt4_nocase ... bench: 81,635 ns/iter (+/- 1,740) = 7287 MB/s +test sherlock::name_alt5 ... bench: 34,935 ns/iter (+/- 1,190) = 17029 MB/s +test sherlock::name_alt5_nocase ... bench: 194,600 ns/iter (+/- 3,742) = 3057 MB/s +test sherlock::name_holmes ... bench: 14,670 ns/iter (+/- 153) = 40554 MB/s +test sherlock::name_holmes_nocase ... bench: 59,906 ns/iter (+/- 898) = 9931 MB/s +test sherlock::name_sherlock ... bench: 10,470 ns/iter (+/- 74) = 56822 MB/s +test sherlock::name_sherlock_holmes ... bench: 10,291 ns/iter (+/- 29) = 57810 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 41,316 ns/iter (+/- 1,350) = 14399 MB/s +test sherlock::name_sherlock_nocase ... bench: 40,748 ns/iter (+/- 724) = 14600 MB/s +test sherlock::name_whitespace ... bench: 14,682 ns/iter (+/- 52) = 40521 MB/s +test sherlock::no_match_common ... bench: 8,822 ns/iter (+/- 310) = 67437 MB/s +test sherlock::no_match_really_common ... bench: 8,990 ns/iter (+/- 129) = 66177 MB/s +test sherlock::no_match_uncommon ... bench: 8,649 ns/iter (+/- 192) = 68786 MB/s +test sherlock::quotes ... bench: 218,225 ns/iter (+/- 4,267) = 2726 MB/s +test sherlock::repeated_class_negation ... bench: 35,771,807 ns/iter (+/- 640,817) = 16 MB/s +test sherlock::the_lower ... bench: 190,205 ns/iter (+/- 9,051) = 3127 MB/s +test sherlock::the_nocase ... bench: 280,386 ns/iter (+/- 5,346) = 2121 MB/s +test sherlock::the_upper ... bench: 19,325 ns/iter (+/- 695) = 30785 MB/s +test sherlock::the_whitespace ... bench: 409,665 ns/iter (+/- 7,657) = 1452 MB/s +test sherlock::word_ending_n ... bench: 1,066,052 ns/iter (+/- 7,072) = 558 MB/s +test sherlock::words ... bench: 4,330,659 ns/iter (+/- 53,403) = 137 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 131.99s + diff --git a/vendor/regex/record/old-bench-log/10-last-frontier/rust-after-literal.log b/vendor/regex/record/old-bench-log/10-last-frontier/rust-after-literal.log new file mode 100644 index 0000000..c45b55c --- /dev/null +++ b/vendor/regex/record/old-bench-log/10-last-frontier/rust-after-literal.log @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 20 ns/iter (+/- 0) = 19500 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 0) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 20 ns/iter (+/- 0) = 1300 MB/s +test misc::easy0_1K ... bench: 51 ns/iter (+/- 2) = 20607 MB/s +test misc::easy0_1MB ... bench: 56 ns/iter (+/- 1) = 18725053 MB/s +test misc::easy0_32 ... bench: 51 ns/iter (+/- 0) = 1156 MB/s +test misc::easy0_32K ... bench: 53 ns/iter (+/- 1) = 618773 MB/s +test misc::easy1_1K ... bench: 41 ns/iter (+/- 0) = 25463 MB/s +test misc::easy1_1MB ... bench: 44 ns/iter (+/- 1) = 23831727 MB/s +test misc::easy1_32 ... bench: 40 ns/iter (+/- 1) = 1300 MB/s +test misc::easy1_32K ... bench: 40 ns/iter (+/- 1) = 819700 MB/s +test misc::hard_1K ... bench: 51 ns/iter (+/- 2) = 20607 MB/s +test misc::hard_1MB ... bench: 56 ns/iter (+/- 1) = 18725053 MB/s +test misc::hard_32 ... bench: 51 ns/iter (+/- 2) = 1156 MB/s +test misc::hard_32K ... bench: 51 ns/iter (+/- 1) = 643039 MB/s +test misc::is_match_set ... bench: 61 ns/iter (+/- 2) = 409 MB/s +test misc::literal ... bench: 13 ns/iter (+/- 0) = 3923 MB/s +test misc::long_needle1 ... bench: 3,242 ns/iter (+/- 79) = 30845 MB/s +test misc::long_needle2 ... bench: 350,572 ns/iter (+/- 6,860) = 285 MB/s +test misc::match_class ... bench: 62 ns/iter (+/- 6) = 1306 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 0) = 5785 MB/s +test misc::match_class_unicode ... bench: 259 ns/iter (+/- 15) = 621 MB/s +test misc::matches_set ... bench: 462 ns/iter (+/- 9) = 54 MB/s +test misc::medium_1K ... bench: 53 ns/iter (+/- 0) = 19849 MB/s +test misc::medium_1MB ... bench: 58 ns/iter (+/- 1) = 18079379 MB/s +test misc::medium_32 ... bench: 53 ns/iter (+/- 1) = 1132 MB/s +test misc::medium_32K ... bench: 53 ns/iter (+/- 1) = 618792 MB/s +test misc::no_exponential ... bench: 423 ns/iter (+/- 13) = 236 MB/s +test misc::not_literal ... bench: 89 ns/iter (+/- 0) = 573 MB/s +test misc::one_pass_long_prefix ... bench: 52 ns/iter (+/- 0) = 500 MB/s +test misc::one_pass_long_prefix_not ... bench: 52 ns/iter (+/- 1) = 500 MB/s +test misc::one_pass_short ... bench: 38 ns/iter (+/- 1) = 447 MB/s +test misc::one_pass_short_not ... bench: 41 ns/iter (+/- 1) = 414 MB/s +test misc::reallyhard2_1K ... bench: 81 ns/iter (+/- 1) = 12839 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 1) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,575,822 ns/iter (+/- 39,203) = 665 MB/s +test misc::reallyhard_32 ... bench: 102 ns/iter (+/- 0) = 578 MB/s +test misc::reallyhard_32K ... bench: 49,328 ns/iter (+/- 2,598) = 664 MB/s +test misc::replace_all ... bench: 132 ns/iter (+/- 3) +test misc::reverse_suffix_no_quadratic ... bench: 4,171 ns/iter (+/- 134) = 1918 MB/s +test misc::short_haystack_1000000x ... bench: 132,251 ns/iter (+/- 729) = 60491 MB/s +test misc::short_haystack_100000x ... bench: 13,184 ns/iter (+/- 408) = 60680 MB/s +test misc::short_haystack_10000x ... bench: 6,036 ns/iter (+/- 167) = 13255 MB/s +test misc::short_haystack_1000x ... bench: 602 ns/iter (+/- 14) = 13307 MB/s +test misc::short_haystack_100x ... bench: 230 ns/iter (+/- 7) = 3526 MB/s +test misc::short_haystack_10x ... bench: 218 ns/iter (+/- 3) = 417 MB/s +test misc::short_haystack_1x ... bench: 210 ns/iter (+/- 8) = 90 MB/s +test misc::short_haystack_2x ... bench: 225 ns/iter (+/- 6) = 120 MB/s +test misc::short_haystack_3x ... bench: 211 ns/iter (+/- 8) = 165 MB/s +test misc::short_haystack_4x ... bench: 212 ns/iter (+/- 6) = 202 MB/s +test regexdna::find_new_lines ... bench: 12,245,066 ns/iter (+/- 117,141) = 415 MB/s +test regexdna::subst1 ... bench: 786,357 ns/iter (+/- 14,200) = 6464 MB/s +test regexdna::subst10 ... bench: 788,550 ns/iter (+/- 26,456) = 6446 MB/s +test regexdna::subst11 ... bench: 782,161 ns/iter (+/- 15,583) = 6499 MB/s +test regexdna::subst2 ... bench: 784,902 ns/iter (+/- 23,379) = 6476 MB/s +test regexdna::subst3 ... bench: 786,640 ns/iter (+/- 27,063) = 6462 MB/s +test regexdna::subst4 ... bench: 785,591 ns/iter (+/- 20,498) = 6470 MB/s +test regexdna::subst5 ... bench: 787,447 ns/iter (+/- 20,892) = 6455 MB/s +test regexdna::subst6 ... bench: 784,994 ns/iter (+/- 19,687) = 6475 MB/s +test regexdna::subst7 ... bench: 801,921 ns/iter (+/- 15,391) = 6339 MB/s +test regexdna::subst8 ... bench: 785,541 ns/iter (+/- 11,908) = 6471 MB/s +test regexdna::subst9 ... bench: 785,848 ns/iter (+/- 28,020) = 6468 MB/s +test regexdna::variant1 ... bench: 2,195,058 ns/iter (+/- 44,066) = 2315 MB/s +test regexdna::variant2 ... bench: 3,219,968 ns/iter (+/- 59,372) = 1578 MB/s +test regexdna::variant3 ... bench: 3,776,467 ns/iter (+/- 54,326) = 1346 MB/s +test regexdna::variant4 ... bench: 3,803,674 ns/iter (+/- 95,281) = 1336 MB/s +test regexdna::variant5 ... bench: 2,661,333 ns/iter (+/- 46,408) = 1910 MB/s +test regexdna::variant6 ... bench: 2,645,716 ns/iter (+/- 38,659) = 1921 MB/s +test regexdna::variant7 ... bench: 3,228,352 ns/iter (+/- 69,155) = 1574 MB/s +test regexdna::variant8 ... bench: 3,305,563 ns/iter (+/- 59,321) = 1537 MB/s +test regexdna::variant9 ... bench: 3,225,039 ns/iter (+/- 49,720) = 1576 MB/s +test rust_compile::compile_huge ... bench: 100,381 ns/iter (+/- 2,052) +test rust_compile::compile_huge_bytes ... bench: 5,899,989 ns/iter (+/- 114,363) +test rust_compile::compile_huge_full ... bench: 11,650,995 ns/iter (+/- 172,285) +test rust_compile::compile_simple ... bench: 4,082 ns/iter (+/- 88) +test rust_compile::compile_simple_bytes ... bench: 4,153 ns/iter (+/- 120) +test rust_compile::compile_simple_full ... bench: 20,414 ns/iter (+/- 1,860) +test rust_compile::compile_small ... bench: 9,114 ns/iter (+/- 216) +test rust_compile::compile_small_bytes ... bench: 183,049 ns/iter (+/- 9,917) +test rust_compile::compile_small_full ... bench: 361,291 ns/iter (+/- 11,045) +test sherlock::before_after_holmes ... bench: 907,103 ns/iter (+/- 12,165) = 655 MB/s +test sherlock::before_holmes ... bench: 62,501 ns/iter (+/- 1,880) = 9518 MB/s +test sherlock::everything_greedy ... bench: 2,062,116 ns/iter (+/- 41,900) = 288 MB/s +test sherlock::everything_greedy_nl ... bench: 894,529 ns/iter (+/- 38,723) = 665 MB/s +test sherlock::holmes_cochar_watson ... bench: 103,305 ns/iter (+/- 3,798) = 5758 MB/s +test sherlock::holmes_coword_watson ... bench: 479,423 ns/iter (+/- 13,924) = 1240 MB/s +test sherlock::ing_suffix ... bench: 318,300 ns/iter (+/- 6,846) = 1869 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,066,300 ns/iter (+/- 19,375) = 557 MB/s +test sherlock::letters ... bench: 21,777,358 ns/iter (+/- 230,478) = 27 MB/s +test sherlock::letters_lower ... bench: 21,152,019 ns/iter (+/- 203,617) = 28 MB/s +test sherlock::letters_upper ... bench: 1,777,626 ns/iter (+/- 26,243) = 334 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,509 ns/iter (+/- 24,983) = 662 MB/s +test sherlock::name_alt1 ... bench: 32,255 ns/iter (+/- 681) = 18444 MB/s +test sherlock::name_alt2 ... bench: 86,369 ns/iter (+/- 2,494) = 6888 MB/s +test sherlock::name_alt3 ... bench: 97,618 ns/iter (+/- 564) = 6094 MB/s +test sherlock::name_alt3_nocase ... bench: 944,848 ns/iter (+/- 31,039) = 629 MB/s +test sherlock::name_alt4 ... bench: 122,029 ns/iter (+/- 2,716) = 4875 MB/s +test sherlock::name_alt4_nocase ... bench: 225,544 ns/iter (+/- 5,783) = 2637 MB/s +test sherlock::name_alt5 ... bench: 91,897 ns/iter (+/- 3,796) = 6473 MB/s +test sherlock::name_alt5_nocase ... bench: 936,420 ns/iter (+/- 15,092) = 635 MB/s +test sherlock::name_holmes ... bench: 33,448 ns/iter (+/- 959) = 17786 MB/s +test sherlock::name_holmes_nocase ... bench: 115,864 ns/iter (+/- 1,645) = 5134 MB/s +test sherlock::name_sherlock ... bench: 22,474 ns/iter (+/- 674) = 26472 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,184 ns/iter (+/- 497) = 26818 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 99,629 ns/iter (+/- 2,398) = 5971 MB/s +test sherlock::name_sherlock_nocase ... bench: 99,523 ns/iter (+/- 2,674) = 5977 MB/s +test sherlock::name_whitespace ... bench: 30,815 ns/iter (+/- 107) = 19306 MB/s +test sherlock::no_match_common ... bench: 19,661 ns/iter (+/- 656) = 30259 MB/s +test sherlock::no_match_really_common ... bench: 27,544 ns/iter (+/- 527) = 21599 MB/s +test sherlock::no_match_uncommon ... bench: 19,553 ns/iter (+/- 31) = 30426 MB/s +test sherlock::quotes ... bench: 369,144 ns/iter (+/- 45,316) = 1611 MB/s +test sherlock::repeated_class_negation ... bench: 68,838,857 ns/iter (+/- 330,544) = 8 MB/s +test sherlock::the_lower ... bench: 321,692 ns/iter (+/- 5,418) = 1849 MB/s +test sherlock::the_nocase ... bench: 507,936 ns/iter (+/- 3,080) = 1171 MB/s +test sherlock::the_upper ... bench: 43,705 ns/iter (+/- 788) = 13612 MB/s +test sherlock::the_whitespace ... bench: 819,179 ns/iter (+/- 20,071) = 726 MB/s +test sherlock::word_ending_n ... bench: 1,700,300 ns/iter (+/- 36,623) = 349 MB/s +test sherlock::words ... bench: 8,249,767 ns/iter (+/- 75,015) = 72 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 111.55s + diff --git a/vendor/regex/record/old-bench-log/10-last-frontier/rust-before-literal.log b/vendor/regex/record/old-bench-log/10-last-frontier/rust-before-literal.log new file mode 100644 index 0000000..98b3496 --- /dev/null +++ b/vendor/regex/record/old-bench-log/10-last-frontier/rust-before-literal.log @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 0) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 18 ns/iter (+/- 0) = 1444 MB/s +test misc::easy0_1K ... bench: 15 ns/iter (+/- 0) = 70066 MB/s +test misc::easy0_1MB ... bench: 21 ns/iter (+/- 0) = 49933476 MB/s +test misc::easy0_32 ... bench: 15 ns/iter (+/- 0) = 3933 MB/s +test misc::easy0_32K ... bench: 14 ns/iter (+/- 0) = 2342500 MB/s +test misc::easy1_1K ... bench: 40 ns/iter (+/- 1) = 26100 MB/s +test misc::easy1_1MB ... bench: 45 ns/iter (+/- 1) = 23302133 MB/s +test misc::easy1_32 ... bench: 40 ns/iter (+/- 5) = 1300 MB/s +test misc::easy1_32K ... bench: 40 ns/iter (+/- 1) = 819700 MB/s +test misc::hard_1K ... bench: 51 ns/iter (+/- 1) = 20607 MB/s +test misc::hard_1MB ... bench: 56 ns/iter (+/- 0) = 18725053 MB/s +test misc::hard_32 ... bench: 51 ns/iter (+/- 3) = 1156 MB/s +test misc::hard_32K ... bench: 51 ns/iter (+/- 1) = 643039 MB/s +test misc::is_match_set ... bench: 61 ns/iter (+/- 2) = 409 MB/s +test misc::literal ... bench: 13 ns/iter (+/- 0) = 3923 MB/s +test misc::long_needle1 ... bench: 3,259 ns/iter (+/- 86) = 30684 MB/s +test misc::long_needle2 ... bench: 350,722 ns/iter (+/- 6,984) = 285 MB/s +test misc::match_class ... bench: 60 ns/iter (+/- 1) = 1350 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 0) = 5785 MB/s +test misc::match_class_unicode ... bench: 255 ns/iter (+/- 0) = 631 MB/s +test misc::matches_set ... bench: 481 ns/iter (+/- 11) = 51 MB/s +test misc::medium_1K ... bench: 15 ns/iter (+/- 0) = 70133 MB/s +test misc::medium_1MB ... bench: 22 ns/iter (+/- 0) = 47663818 MB/s +test misc::medium_32 ... bench: 15 ns/iter (+/- 0) = 4000 MB/s +test misc::medium_32K ... bench: 15 ns/iter (+/- 0) = 2186400 MB/s +test misc::no_exponential ... bench: 442 ns/iter (+/- 13) = 226 MB/s +test misc::not_literal ... bench: 89 ns/iter (+/- 1) = 573 MB/s +test misc::one_pass_long_prefix ... bench: 54 ns/iter (+/- 1) = 481 MB/s +test misc::one_pass_long_prefix_not ... bench: 52 ns/iter (+/- 1) = 500 MB/s +test misc::one_pass_short ... bench: 39 ns/iter (+/- 0) = 435 MB/s +test misc::one_pass_short_not ... bench: 42 ns/iter (+/- 0) = 404 MB/s +test misc::reallyhard2_1K ... bench: 83 ns/iter (+/- 6) = 12530 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 4) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,575,691 ns/iter (+/- 29,668) = 665 MB/s +test misc::reallyhard_32 ... bench: 101 ns/iter (+/- 5) = 584 MB/s +test misc::reallyhard_32K ... bench: 49,325 ns/iter (+/- 1,734) = 664 MB/s +test misc::replace_all ... bench: 134 ns/iter (+/- 2) +test misc::reverse_suffix_no_quadratic ... bench: 4,189 ns/iter (+/- 274) = 1909 MB/s +test misc::short_haystack_1000000x ... bench: 132,182 ns/iter (+/- 4,966) = 60522 MB/s +test misc::short_haystack_100000x ... bench: 13,344 ns/iter (+/- 275) = 59952 MB/s +test misc::short_haystack_10000x ... bench: 6,119 ns/iter (+/- 285) = 13075 MB/s +test misc::short_haystack_1000x ... bench: 617 ns/iter (+/- 15) = 12983 MB/s +test misc::short_haystack_100x ... bench: 230 ns/iter (+/- 7) = 3526 MB/s +test misc::short_haystack_10x ... bench: 207 ns/iter (+/- 8) = 439 MB/s +test misc::short_haystack_1x ... bench: 213 ns/iter (+/- 7) = 89 MB/s +test misc::short_haystack_2x ... bench: 206 ns/iter (+/- 6) = 131 MB/s +test misc::short_haystack_3x ... bench: 207 ns/iter (+/- 10) = 169 MB/s +test misc::short_haystack_4x ... bench: 208 ns/iter (+/- 7) = 206 MB/s +test regexdna::find_new_lines ... bench: 12,275,804 ns/iter (+/- 145,331) = 414 MB/s +test regexdna::subst1 ... bench: 793,517 ns/iter (+/- 44,203) = 6406 MB/s +test regexdna::subst10 ... bench: 794,922 ns/iter (+/- 23,459) = 6394 MB/s +test regexdna::subst11 ... bench: 790,525 ns/iter (+/- 23,010) = 6430 MB/s +test regexdna::subst2 ... bench: 790,637 ns/iter (+/- 17,962) = 6429 MB/s +test regexdna::subst3 ... bench: 793,559 ns/iter (+/- 17,575) = 6405 MB/s +test regexdna::subst4 ... bench: 792,738 ns/iter (+/- 15,237) = 6412 MB/s +test regexdna::subst5 ... bench: 795,060 ns/iter (+/- 26,172) = 6393 MB/s +test regexdna::subst6 ... bench: 792,357 ns/iter (+/- 15,067) = 6415 MB/s +test regexdna::subst7 ... bench: 797,006 ns/iter (+/- 27,928) = 6378 MB/s +test regexdna::subst8 ... bench: 790,603 ns/iter (+/- 22,754) = 6429 MB/s +test regexdna::subst9 ... bench: 793,055 ns/iter (+/- 13,202) = 6409 MB/s +test regexdna::variant1 ... bench: 2,204,304 ns/iter (+/- 50,669) = 2306 MB/s +test regexdna::variant2 ... bench: 3,224,798 ns/iter (+/- 45,705) = 1576 MB/s +test regexdna::variant3 ... bench: 3,802,774 ns/iter (+/- 86,530) = 1336 MB/s +test regexdna::variant4 ... bench: 3,805,916 ns/iter (+/- 69,737) = 1335 MB/s +test regexdna::variant5 ... bench: 2,662,373 ns/iter (+/- 61,259) = 1909 MB/s +test regexdna::variant6 ... bench: 2,654,072 ns/iter (+/- 51,095) = 1915 MB/s +test regexdna::variant7 ... bench: 3,232,369 ns/iter (+/- 67,147) = 1572 MB/s +test regexdna::variant8 ... bench: 3,311,225 ns/iter (+/- 66,086) = 1535 MB/s +test regexdna::variant9 ... bench: 3,241,601 ns/iter (+/- 68,394) = 1568 MB/s +test rust_compile::compile_huge ... bench: 100,955 ns/iter (+/- 2,466) +test rust_compile::compile_huge_bytes ... bench: 5,936,732 ns/iter (+/- 126,993) +test rust_compile::compile_huge_full ... bench: 11,880,838 ns/iter (+/- 211,387) +test rust_compile::compile_simple ... bench: 4,575 ns/iter (+/- 139) +test rust_compile::compile_simple_bytes ... bench: 4,653 ns/iter (+/- 122) +test rust_compile::compile_simple_full ... bench: 20,656 ns/iter (+/- 535) +test rust_compile::compile_small ... bench: 9,613 ns/iter (+/- 992) +test rust_compile::compile_small_bytes ... bench: 188,349 ns/iter (+/- 4,733) +test rust_compile::compile_small_full ... bench: 341,554 ns/iter (+/- 9,774) +test sherlock::before_after_holmes ... bench: 907,419 ns/iter (+/- 11,645) = 655 MB/s +test sherlock::before_holmes ... bench: 62,036 ns/iter (+/- 1,854) = 9590 MB/s +test sherlock::everything_greedy ... bench: 2,072,694 ns/iter (+/- 45,192) = 287 MB/s +test sherlock::everything_greedy_nl ... bench: 884,483 ns/iter (+/- 25,710) = 672 MB/s +test sherlock::holmes_cochar_watson ... bench: 103,873 ns/iter (+/- 1,310) = 5727 MB/s +test sherlock::holmes_coword_watson ... bench: 481,491 ns/iter (+/- 11,516) = 1235 MB/s +test sherlock::ing_suffix ... bench: 323,119 ns/iter (+/- 7,438) = 1841 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,067,293 ns/iter (+/- 18,661) = 557 MB/s +test sherlock::letters ... bench: 21,732,526 ns/iter (+/- 253,563) = 27 MB/s +test sherlock::letters_lower ... bench: 21,187,465 ns/iter (+/- 191,023) = 28 MB/s +test sherlock::letters_upper ... bench: 1,766,003 ns/iter (+/- 17,494) = 336 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,387 ns/iter (+/- 26,674) = 662 MB/s +test sherlock::name_alt1 ... bench: 34,183 ns/iter (+/- 885) = 17404 MB/s +test sherlock::name_alt2 ... bench: 87,151 ns/iter (+/- 2,139) = 6826 MB/s +test sherlock::name_alt3 ... bench: 99,293 ns/iter (+/- 1,938) = 5991 MB/s +test sherlock::name_alt3_nocase ... bench: 379,228 ns/iter (+/- 22,539) = 1568 MB/s +test sherlock::name_alt4 ... bench: 123,040 ns/iter (+/- 2,676) = 4835 MB/s +test sherlock::name_alt4_nocase ... bench: 186,045 ns/iter (+/- 403) = 3197 MB/s +test sherlock::name_alt5 ... bench: 91,679 ns/iter (+/- 2,543) = 6489 MB/s +test sherlock::name_alt5_nocase ... bench: 343,668 ns/iter (+/- 6,807) = 1731 MB/s +test sherlock::name_holmes ... bench: 33,802 ns/iter (+/- 936) = 17600 MB/s +test sherlock::name_holmes_nocase ... bench: 136,208 ns/iter (+/- 4,317) = 4367 MB/s +test sherlock::name_sherlock ... bench: 22,534 ns/iter (+/- 462) = 26401 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,514 ns/iter (+/- 697) = 26425 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,796 ns/iter (+/- 2,037) = 6083 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,809 ns/iter (+/- 1,538) = 6209 MB/s +test sherlock::name_whitespace ... bench: 30,959 ns/iter (+/- 968) = 19216 MB/s +test sherlock::no_match_common ... bench: 19,568 ns/iter (+/- 616) = 30403 MB/s +test sherlock::no_match_really_common ... bench: 26,273 ns/iter (+/- 1,143) = 22644 MB/s +test sherlock::no_match_uncommon ... bench: 19,643 ns/iter (+/- 496) = 30287 MB/s +test sherlock::quotes ... bench: 371,876 ns/iter (+/- 2,494) = 1599 MB/s +test sherlock::repeated_class_negation ... bench: 76,963,104 ns/iter (+/- 277,311) = 7 MB/s +test sherlock::the_lower ... bench: 331,250 ns/iter (+/- 8,588) = 1796 MB/s +test sherlock::the_nocase ... bench: 516,528 ns/iter (+/- 40,826) = 1151 MB/s +test sherlock::the_upper ... bench: 44,206 ns/iter (+/- 1,277) = 13458 MB/s +test sherlock::the_whitespace ... bench: 822,577 ns/iter (+/- 23,649) = 723 MB/s +test sherlock::word_ending_n ... bench: 1,685,110 ns/iter (+/- 34,615) = 353 MB/s +test sherlock::words ... bench: 8,333,499 ns/iter (+/- 152,757) = 71 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 124.94s + diff --git a/vendor/regex/record/old-bench-log/10-last-frontier/rust-bytes-after-literal.log b/vendor/regex/record/old-bench-log/10-last-frontier/rust-bytes-after-literal.log new file mode 100644 index 0000000..470e09b --- /dev/null +++ b/vendor/regex/record/old-bench-log/10-last-frontier/rust-bytes-after-literal.log @@ -0,0 +1,112 @@ + +running 107 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 20 ns/iter (+/- 1) = 19500 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 0) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 20 ns/iter (+/- 0) = 1300 MB/s +test misc::easy0_1K ... bench: 54 ns/iter (+/- 2) = 19462 MB/s +test misc::easy0_1MB ... bench: 56 ns/iter (+/- 1) = 18725053 MB/s +test misc::easy0_32 ... bench: 51 ns/iter (+/- 1) = 1156 MB/s +test misc::easy0_32K ... bench: 51 ns/iter (+/- 2) = 643039 MB/s +test misc::easy1_1K ... bench: 41 ns/iter (+/- 1) = 25463 MB/s +test misc::easy1_1MB ... bench: 44 ns/iter (+/- 1) = 23831727 MB/s +test misc::easy1_32 ... bench: 40 ns/iter (+/- 2) = 1300 MB/s +test misc::easy1_32K ... bench: 40 ns/iter (+/- 1) = 819700 MB/s +test misc::hard_1K ... bench: 52 ns/iter (+/- 1) = 20211 MB/s +test misc::hard_1MB ... bench: 57 ns/iter (+/- 0) = 18396543 MB/s +test misc::hard_32 ... bench: 51 ns/iter (+/- 0) = 1156 MB/s +test misc::hard_32K ... bench: 51 ns/iter (+/- 3) = 643039 MB/s +test misc::is_match_set ... bench: 61 ns/iter (+/- 2) = 409 MB/s +test misc::literal ... bench: 14 ns/iter (+/- 0) = 3642 MB/s +test misc::long_needle1 ... bench: 3,249 ns/iter (+/- 87) = 30779 MB/s +test misc::long_needle2 ... bench: 350,559 ns/iter (+/- 7,154) = 285 MB/s +test misc::match_class ... bench: 61 ns/iter (+/- 4) = 1327 MB/s +test misc::match_class_in_range ... bench: 14 ns/iter (+/- 0) = 5785 MB/s +test misc::matches_set ... bench: 401 ns/iter (+/- 17) = 62 MB/s +test misc::medium_1K ... bench: 53 ns/iter (+/- 0) = 19849 MB/s +test misc::medium_1MB ... bench: 58 ns/iter (+/- 0) = 18079379 MB/s +test misc::medium_32 ... bench: 53 ns/iter (+/- 0) = 1132 MB/s +test misc::medium_32K ... bench: 53 ns/iter (+/- 2) = 618792 MB/s +test misc::no_exponential ... bench: 421 ns/iter (+/- 8) = 237 MB/s +test misc::not_literal ... bench: 90 ns/iter (+/- 0) = 566 MB/s +test misc::one_pass_long_prefix ... bench: 53 ns/iter (+/- 1) = 490 MB/s +test misc::one_pass_long_prefix_not ... bench: 53 ns/iter (+/- 0) = 490 MB/s +test misc::one_pass_short ... bench: 38 ns/iter (+/- 0) = 447 MB/s +test misc::one_pass_short_not ... bench: 42 ns/iter (+/- 3) = 404 MB/s +test misc::reallyhard2_1K ... bench: 77 ns/iter (+/- 1) = 13506 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 1) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,575,759 ns/iter (+/- 49,997) = 665 MB/s +test misc::reallyhard_32 ... bench: 102 ns/iter (+/- 2) = 578 MB/s +test misc::reallyhard_32K ... bench: 49,326 ns/iter (+/- 1,055) = 664 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,161 ns/iter (+/- 94) = 1922 MB/s +test regexdna::find_new_lines ... bench: 12,344,799 ns/iter (+/- 188,054) = 411 MB/s +test regexdna::subst1 ... bench: 780,449 ns/iter (+/- 14,474) = 6513 MB/s +test regexdna::subst10 ... bench: 795,203 ns/iter (+/- 40,742) = 6392 MB/s +test regexdna::subst11 ... bench: 816,444 ns/iter (+/- 23,334) = 6226 MB/s +test regexdna::subst2 ... bench: 777,546 ns/iter (+/- 19,625) = 6537 MB/s +test regexdna::subst3 ... bench: 783,295 ns/iter (+/- 8,266) = 6489 MB/s +test regexdna::subst4 ... bench: 775,154 ns/iter (+/- 21,350) = 6557 MB/s +test regexdna::subst5 ... bench: 781,414 ns/iter (+/- 21,057) = 6505 MB/s +test regexdna::subst6 ... bench: 783,595 ns/iter (+/- 23,835) = 6487 MB/s +test regexdna::subst7 ... bench: 821,620 ns/iter (+/- 46,131) = 6187 MB/s +test regexdna::subst8 ... bench: 818,402 ns/iter (+/- 21,350) = 6211 MB/s +test regexdna::subst9 ... bench: 779,115 ns/iter (+/- 21,335) = 6524 MB/s +test regexdna::variant1 ... bench: 2,189,308 ns/iter (+/- 32,528) = 2321 MB/s +test regexdna::variant2 ... bench: 3,217,478 ns/iter (+/- 36,011) = 1579 MB/s +test regexdna::variant3 ... bench: 3,771,330 ns/iter (+/- 74,944) = 1347 MB/s +test regexdna::variant4 ... bench: 3,787,593 ns/iter (+/- 37,825) = 1342 MB/s +test regexdna::variant5 ... bench: 2,669,799 ns/iter (+/- 69,777) = 1904 MB/s +test regexdna::variant6 ... bench: 2,651,559 ns/iter (+/- 33,895) = 1917 MB/s +test regexdna::variant7 ... bench: 3,222,991 ns/iter (+/- 41,014) = 1577 MB/s +test regexdna::variant8 ... bench: 3,298,048 ns/iter (+/- 41,331) = 1541 MB/s +test regexdna::variant9 ... bench: 3,218,486 ns/iter (+/- 50,318) = 1579 MB/s +test rust_compile::compile_huge ... bench: 100,031 ns/iter (+/- 3,464) +test rust_compile::compile_huge_bytes ... bench: 5,885,102 ns/iter (+/- 130,016) +test rust_compile::compile_huge_full ... bench: 11,641,251 ns/iter (+/- 147,700) +test rust_compile::compile_simple ... bench: 4,263 ns/iter (+/- 116) +test rust_compile::compile_simple_bytes ... bench: 4,236 ns/iter (+/- 91) +test rust_compile::compile_simple_full ... bench: 22,349 ns/iter (+/- 2,085) +test rust_compile::compile_small ... bench: 9,537 ns/iter (+/- 298) +test rust_compile::compile_small_bytes ... bench: 178,561 ns/iter (+/- 3,796) +test rust_compile::compile_small_full ... bench: 363,343 ns/iter (+/- 9,481) +test sherlock::before_after_holmes ... bench: 907,022 ns/iter (+/- 19,133) = 655 MB/s +test sherlock::before_holmes ... bench: 63,729 ns/iter (+/- 1,830) = 9335 MB/s +test sherlock::everything_greedy ... bench: 2,181,593 ns/iter (+/- 46,002) = 272 MB/s +test sherlock::everything_greedy_nl ... bench: 884,811 ns/iter (+/- 26,211) = 672 MB/s +test sherlock::holmes_cochar_watson ... bench: 105,610 ns/iter (+/- 3,120) = 5633 MB/s +test sherlock::holmes_coword_watson ... bench: 480,986 ns/iter (+/- 13,228) = 1236 MB/s +test sherlock::ing_suffix ... bench: 322,921 ns/iter (+/- 3,555) = 1842 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,065,372 ns/iter (+/- 21,242) = 558 MB/s +test sherlock::letters ... bench: 22,109,015 ns/iter (+/- 146,243) = 26 MB/s +test sherlock::letters_lower ... bench: 21,686,153 ns/iter (+/- 206,041) = 27 MB/s +test sherlock::letters_upper ... bench: 1,778,225 ns/iter (+/- 25,935) = 334 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,355 ns/iter (+/- 26,781) = 662 MB/s +test sherlock::name_alt1 ... bench: 31,927 ns/iter (+/- 633) = 18634 MB/s +test sherlock::name_alt2 ... bench: 87,040 ns/iter (+/- 1,859) = 6835 MB/s +test sherlock::name_alt3 ... bench: 97,715 ns/iter (+/- 2,109) = 6088 MB/s +test sherlock::name_alt3_nocase ... bench: 944,955 ns/iter (+/- 26,503) = 629 MB/s +test sherlock::name_alt4 ... bench: 120,935 ns/iter (+/- 2,399) = 4919 MB/s +test sherlock::name_alt4_nocase ... bench: 228,597 ns/iter (+/- 7,137) = 2602 MB/s +test sherlock::name_alt5 ... bench: 91,174 ns/iter (+/- 1,096) = 6525 MB/s +test sherlock::name_alt5_nocase ... bench: 937,189 ns/iter (+/- 23,839) = 634 MB/s +test sherlock::name_holmes ... bench: 34,020 ns/iter (+/- 752) = 17487 MB/s +test sherlock::name_holmes_nocase ... bench: 117,194 ns/iter (+/- 3,444) = 5076 MB/s +test sherlock::name_sherlock ... bench: 22,557 ns/iter (+/- 388) = 26374 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,428 ns/iter (+/- 683) = 26526 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 99,637 ns/iter (+/- 636) = 5971 MB/s +test sherlock::name_sherlock_nocase ... bench: 97,895 ns/iter (+/- 1,875) = 6077 MB/s +test sherlock::name_whitespace ... bench: 30,772 ns/iter (+/- 1,591) = 19333 MB/s +test sherlock::no_match_common ... bench: 19,665 ns/iter (+/- 296) = 30253 MB/s +test sherlock::no_match_really_common ... bench: 27,403 ns/iter (+/- 2,507) = 21710 MB/s +test sherlock::no_match_uncommon ... bench: 19,601 ns/iter (+/- 293) = 30352 MB/s +test sherlock::quotes ... bench: 370,323 ns/iter (+/- 1,345) = 1606 MB/s +test sherlock::repeated_class_negation ... bench: 68,414,794 ns/iter (+/- 342,428) = 8 MB/s +test sherlock::the_lower ... bench: 327,767 ns/iter (+/- 5,493) = 1815 MB/s +test sherlock::the_nocase ... bench: 507,818 ns/iter (+/- 1,796) = 1171 MB/s +test sherlock::the_upper ... bench: 45,045 ns/iter (+/- 1,400) = 13207 MB/s +test sherlock::the_whitespace ... bench: 822,080 ns/iter (+/- 16,581) = 723 MB/s +test sherlock::word_ending_n ... bench: 1,690,084 ns/iter (+/- 40,361) = 352 MB/s +test sherlock::words ... bench: 8,573,617 ns/iter (+/- 143,313) = 69 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 110.03s + diff --git a/vendor/regex/record/old-bench-log/10-last-frontier/rust-bytes-before-literal.log b/vendor/regex/record/old-bench-log/10-last-frontier/rust-bytes-before-literal.log new file mode 100644 index 0000000..7016e3c --- /dev/null +++ b/vendor/regex/record/old-bench-log/10-last-frontier/rust-bytes-before-literal.log @@ -0,0 +1,112 @@ + +running 107 tests +test misc::anchored_literal_long_match ... bench: 18 ns/iter (+/- 0) = 21666 MB/s +test misc::anchored_literal_long_non_match ... bench: 19 ns/iter (+/- 1) = 20526 MB/s +test misc::anchored_literal_short_match ... bench: 18 ns/iter (+/- 0) = 1444 MB/s +test misc::anchored_literal_short_non_match ... bench: 20 ns/iter (+/- 0) = 1300 MB/s +test misc::easy0_1K ... bench: 14 ns/iter (+/- 0) = 75071 MB/s +test misc::easy0_1MB ... bench: 21 ns/iter (+/- 0) = 49933476 MB/s +test misc::easy0_32 ... bench: 14 ns/iter (+/- 0) = 4214 MB/s +test misc::easy0_32K ... bench: 14 ns/iter (+/- 0) = 2342500 MB/s +test misc::easy1_1K ... bench: 41 ns/iter (+/- 0) = 25463 MB/s +test misc::easy1_1MB ... bench: 48 ns/iter (+/- 0) = 21845750 MB/s +test misc::easy1_32 ... bench: 41 ns/iter (+/- 0) = 1268 MB/s +test misc::easy1_32K ... bench: 41 ns/iter (+/- 1) = 799707 MB/s +test misc::hard_1K ... bench: 51 ns/iter (+/- 1) = 20607 MB/s +test misc::hard_1MB ... bench: 56 ns/iter (+/- 2) = 18725053 MB/s +test misc::hard_32 ... bench: 51 ns/iter (+/- 6) = 1156 MB/s +test misc::hard_32K ... bench: 51 ns/iter (+/- 1) = 643039 MB/s +test misc::is_match_set ... bench: 62 ns/iter (+/- 2) = 403 MB/s +test misc::literal ... bench: 13 ns/iter (+/- 0) = 3923 MB/s +test misc::long_needle1 ... bench: 2,825 ns/iter (+/- 57) = 35398 MB/s +test misc::long_needle2 ... bench: 350,755 ns/iter (+/- 11,905) = 285 MB/s +test misc::match_class ... bench: 64 ns/iter (+/- 1) = 1265 MB/s +test misc::match_class_in_range ... bench: 13 ns/iter (+/- 0) = 6230 MB/s +test misc::matches_set ... bench: 422 ns/iter (+/- 12) = 59 MB/s +test misc::medium_1K ... bench: 15 ns/iter (+/- 0) = 70133 MB/s +test misc::medium_1MB ... bench: 21 ns/iter (+/- 0) = 49933523 MB/s +test misc::medium_32 ... bench: 15 ns/iter (+/- 0) = 4000 MB/s +test misc::medium_32K ... bench: 14 ns/iter (+/- 0) = 2342571 MB/s +test misc::no_exponential ... bench: 443 ns/iter (+/- 12) = 225 MB/s +test misc::not_literal ... bench: 89 ns/iter (+/- 1) = 573 MB/s +test misc::one_pass_long_prefix ... bench: 52 ns/iter (+/- 1) = 500 MB/s +test misc::one_pass_long_prefix_not ... bench: 52 ns/iter (+/- 1) = 500 MB/s +test misc::one_pass_short ... bench: 40 ns/iter (+/- 1) = 425 MB/s +test misc::one_pass_short_not ... bench: 42 ns/iter (+/- 0) = 404 MB/s +test misc::reallyhard2_1K ... bench: 80 ns/iter (+/- 0) = 13000 MB/s +test misc::reallyhard_1K ... bench: 1,592 ns/iter (+/- 1) = 660 MB/s +test misc::reallyhard_1MB ... bench: 1,575,789 ns/iter (+/- 34,236) = 665 MB/s +test misc::reallyhard_32 ... bench: 101 ns/iter (+/- 2) = 584 MB/s +test misc::reallyhard_32K ... bench: 49,321 ns/iter (+/- 2,718) = 664 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 4,158 ns/iter (+/- 93) = 1924 MB/s +test regexdna::find_new_lines ... bench: 12,391,732 ns/iter (+/- 180,913) = 410 MB/s +test regexdna::subst1 ... bench: 781,690 ns/iter (+/- 29,637) = 6503 MB/s +test regexdna::subst10 ... bench: 778,306 ns/iter (+/- 22,706) = 6531 MB/s +test regexdna::subst11 ... bench: 777,716 ns/iter (+/- 24,635) = 6536 MB/s +test regexdna::subst2 ... bench: 791,786 ns/iter (+/- 15,778) = 6420 MB/s +test regexdna::subst3 ... bench: 783,470 ns/iter (+/- 25,543) = 6488 MB/s +test regexdna::subst4 ... bench: 814,902 ns/iter (+/- 14,146) = 6238 MB/s +test regexdna::subst5 ... bench: 781,464 ns/iter (+/- 19,532) = 6504 MB/s +test regexdna::subst6 ... bench: 780,116 ns/iter (+/- 16,558) = 6516 MB/s +test regexdna::subst7 ... bench: 795,982 ns/iter (+/- 11,254) = 6386 MB/s +test regexdna::subst8 ... bench: 781,746 ns/iter (+/- 24,996) = 6502 MB/s +test regexdna::subst9 ... bench: 783,793 ns/iter (+/- 14,943) = 6485 MB/s +test regexdna::variant1 ... bench: 2,188,940 ns/iter (+/- 42,308) = 2322 MB/s +test regexdna::variant2 ... bench: 3,218,011 ns/iter (+/- 50,700) = 1579 MB/s +test regexdna::variant3 ... bench: 3,778,907 ns/iter (+/- 90,543) = 1345 MB/s +test regexdna::variant4 ... bench: 3,803,852 ns/iter (+/- 68,319) = 1336 MB/s +test regexdna::variant5 ... bench: 2,660,949 ns/iter (+/- 55,488) = 1910 MB/s +test regexdna::variant6 ... bench: 2,647,131 ns/iter (+/- 26,846) = 1920 MB/s +test regexdna::variant7 ... bench: 3,235,032 ns/iter (+/- 37,599) = 1571 MB/s +test regexdna::variant8 ... bench: 3,305,124 ns/iter (+/- 67,109) = 1538 MB/s +test regexdna::variant9 ... bench: 3,231,033 ns/iter (+/- 55,626) = 1573 MB/s +test rust_compile::compile_huge ... bench: 99,387 ns/iter (+/- 2,366) +test rust_compile::compile_huge_bytes ... bench: 5,865,693 ns/iter (+/- 62,255) +test rust_compile::compile_huge_full ... bench: 11,752,845 ns/iter (+/- 195,440) +test rust_compile::compile_simple ... bench: 4,117 ns/iter (+/- 141) +test rust_compile::compile_simple_bytes ... bench: 4,162 ns/iter (+/- 67) +test rust_compile::compile_simple_full ... bench: 19,955 ns/iter (+/- 622) +test rust_compile::compile_small ... bench: 9,140 ns/iter (+/- 112) +test rust_compile::compile_small_bytes ... bench: 165,990 ns/iter (+/- 5,876) +test rust_compile::compile_small_full ... bench: 342,897 ns/iter (+/- 13,730) +test sherlock::before_after_holmes ... bench: 906,789 ns/iter (+/- 13,931) = 656 MB/s +test sherlock::before_holmes ... bench: 62,319 ns/iter (+/- 790) = 9546 MB/s +test sherlock::everything_greedy ... bench: 2,175,424 ns/iter (+/- 47,720) = 273 MB/s +test sherlock::everything_greedy_nl ... bench: 884,406 ns/iter (+/- 22,679) = 672 MB/s +test sherlock::holmes_cochar_watson ... bench: 105,261 ns/iter (+/- 3,536) = 5651 MB/s +test sherlock::holmes_coword_watson ... bench: 479,524 ns/iter (+/- 7,749) = 1240 MB/s +test sherlock::ing_suffix ... bench: 321,401 ns/iter (+/- 9,123) = 1851 MB/s +test sherlock::ing_suffix_limited_space ... bench: 1,069,722 ns/iter (+/- 16,366) = 556 MB/s +test sherlock::letters ... bench: 21,959,896 ns/iter (+/- 204,695) = 27 MB/s +test sherlock::letters_lower ... bench: 21,462,457 ns/iter (+/- 207,449) = 27 MB/s +test sherlock::letters_upper ... bench: 1,768,026 ns/iter (+/- 41,459) = 336 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 897,197 ns/iter (+/- 14,349) = 663 MB/s +test sherlock::name_alt1 ... bench: 34,037 ns/iter (+/- 719) = 17479 MB/s +test sherlock::name_alt2 ... bench: 86,788 ns/iter (+/- 1,203) = 6855 MB/s +test sherlock::name_alt3 ... bench: 98,225 ns/iter (+/- 1,589) = 6056 MB/s +test sherlock::name_alt3_nocase ... bench: 377,597 ns/iter (+/- 14,840) = 1575 MB/s +test sherlock::name_alt4 ... bench: 122,440 ns/iter (+/- 8,123) = 4858 MB/s +test sherlock::name_alt4_nocase ... bench: 187,282 ns/iter (+/- 5,176) = 3176 MB/s +test sherlock::name_alt5 ... bench: 91,429 ns/iter (+/- 1,944) = 6507 MB/s +test sherlock::name_alt5_nocase ... bench: 348,111 ns/iter (+/- 12,721) = 1709 MB/s +test sherlock::name_holmes ... bench: 33,547 ns/iter (+/- 1,119) = 17734 MB/s +test sherlock::name_holmes_nocase ... bench: 132,342 ns/iter (+/- 3,974) = 4495 MB/s +test sherlock::name_sherlock ... bench: 22,562 ns/iter (+/- 364) = 26368 MB/s +test sherlock::name_sherlock_holmes ... bench: 22,313 ns/iter (+/- 579) = 26663 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 97,556 ns/iter (+/- 2,092) = 6098 MB/s +test sherlock::name_sherlock_nocase ... bench: 95,917 ns/iter (+/- 4,054) = 6202 MB/s +test sherlock::name_whitespace ... bench: 30,997 ns/iter (+/- 1,039) = 19193 MB/s +test sherlock::no_match_common ... bench: 19,690 ns/iter (+/- 378) = 30214 MB/s +test sherlock::no_match_really_common ... bench: 27,629 ns/iter (+/- 465) = 21532 MB/s +test sherlock::no_match_uncommon ... bench: 19,681 ns/iter (+/- 291) = 30228 MB/s +test sherlock::quotes ... bench: 368,290 ns/iter (+/- 1,508) = 1615 MB/s +test sherlock::repeated_class_negation ... bench: 73,004,024 ns/iter (+/- 1,040,743) = 8 MB/s +test sherlock::the_lower ... bench: 320,929 ns/iter (+/- 12,287) = 1853 MB/s +test sherlock::the_nocase ... bench: 514,946 ns/iter (+/- 11,241) = 1155 MB/s +test sherlock::the_upper ... bench: 43,816 ns/iter (+/- 1,719) = 13577 MB/s +test sherlock::the_whitespace ... bench: 825,245 ns/iter (+/- 20,797) = 720 MB/s +test sherlock::word_ending_n ... bench: 1,676,908 ns/iter (+/- 40,650) = 354 MB/s +test sherlock::words ... bench: 8,449,099 ns/iter (+/- 123,842) = 70 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 128.47s + diff --git a/vendor/regex/record/old-bench-log/11-regex-1.7.3/rust b/vendor/regex/record/old-bench-log/11-regex-1.7.3/rust new file mode 100644 index 0000000..aed99af --- /dev/null +++ b/vendor/regex/record/old-bench-log/11-regex-1.7.3/rust @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 7 ns/iter (+/- 0) = 55714 MB/s +test misc::anchored_literal_long_non_match ... bench: 10 ns/iter (+/- 0) = 39000 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 10 ns/iter (+/- 0) = 2600 MB/s +test misc::easy0_1K ... bench: 7 ns/iter (+/- 0) = 150142 MB/s +test misc::easy0_1MB ... bench: 11 ns/iter (+/- 1) = 95327545 MB/s +test misc::easy0_32 ... bench: 7 ns/iter (+/- 0) = 8428 MB/s +test misc::easy0_32K ... bench: 7 ns/iter (+/- 0) = 4685000 MB/s +test misc::easy1_1K ... bench: 17 ns/iter (+/- 1) = 61411 MB/s +test misc::easy1_1MB ... bench: 20 ns/iter (+/- 0) = 52429800 MB/s +test misc::easy1_32 ... bench: 18 ns/iter (+/- 1) = 2888 MB/s +test misc::easy1_32K ... bench: 18 ns/iter (+/- 0) = 1821555 MB/s +test misc::hard_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::hard_1MB ... bench: 28 ns/iter (+/- 0) = 37450107 MB/s +test misc::hard_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::hard_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::is_match_set ... bench: 37 ns/iter (+/- 0) = 675 MB/s +test misc::literal ... bench: 8 ns/iter (+/- 1) = 6375 MB/s +test misc::long_needle1 ... bench: 1,785 ns/iter (+/- 1) = 56022 MB/s +test misc::long_needle2 ... bench: 193,595 ns/iter (+/- 1,486) = 516 MB/s +test misc::match_class ... bench: 37 ns/iter (+/- 1) = 2189 MB/s +test misc::match_class_in_range ... bench: 8 ns/iter (+/- 0) = 10125 MB/s +test misc::match_class_unicode ... bench: 181 ns/iter (+/- 1) = 889 MB/s +test misc::matches_set ... bench: 216 ns/iter (+/- 9) = 115 MB/s +test misc::medium_1K ... bench: 7 ns/iter (+/- 0) = 150285 MB/s +test misc::medium_1MB ... bench: 12 ns/iter (+/- 1) = 87383666 MB/s +test misc::medium_32 ... bench: 7 ns/iter (+/- 0) = 8571 MB/s +test misc::medium_32K ... bench: 7 ns/iter (+/- 0) = 4685142 MB/s +test misc::no_exponential ... bench: 283 ns/iter (+/- 7) = 353 MB/s +test misc::not_literal ... bench: 53 ns/iter (+/- 1) = 962 MB/s +test misc::one_pass_long_prefix ... bench: 24 ns/iter (+/- 2) = 1083 MB/s +test misc::one_pass_long_prefix_not ... bench: 24 ns/iter (+/- 2) = 1083 MB/s +test misc::one_pass_short ... bench: 16 ns/iter (+/- 0) = 1062 MB/s +test misc::one_pass_short_not ... bench: 19 ns/iter (+/- 0) = 894 MB/s +test misc::reallyhard2_1K ... bench: 41 ns/iter (+/- 0) = 25365 MB/s +test misc::reallyhard_1K ... bench: 1,208 ns/iter (+/- 2) = 870 MB/s +test misc::reallyhard_1MB ... bench: 1,213,959 ns/iter (+/- 7,198) = 863 MB/s +test misc::reallyhard_32 ... bench: 62 ns/iter (+/- 0) = 951 MB/s +test misc::reallyhard_32K ... bench: 38,231 ns/iter (+/- 354) = 857 MB/s +test misc::replace_all ... bench: 86 ns/iter (+/- 3) +test misc::reverse_suffix_no_quadratic ... bench: 2,351 ns/iter (+/- 31) = 3402 MB/s +test misc::short_haystack_1000000x ... bench: 91,018 ns/iter (+/- 203) = 87894 MB/s +test misc::short_haystack_100000x ... bench: 9,277 ns/iter (+/- 40) = 86235 MB/s +test misc::short_haystack_10000x ... bench: 2,863 ns/iter (+/- 4) = 27946 MB/s +test misc::short_haystack_1000x ... bench: 201 ns/iter (+/- 3) = 39855 MB/s +test misc::short_haystack_100x ... bench: 100 ns/iter (+/- 2) = 8110 MB/s +test misc::short_haystack_10x ... bench: 88 ns/iter (+/- 0) = 1034 MB/s +test misc::short_haystack_1x ... bench: 86 ns/iter (+/- 1) = 220 MB/s +test misc::short_haystack_2x ... bench: 87 ns/iter (+/- 0) = 310 MB/s +test misc::short_haystack_3x ... bench: 88 ns/iter (+/- 1) = 397 MB/s +test misc::short_haystack_4x ... bench: 88 ns/iter (+/- 1) = 488 MB/s +test regexdna::find_new_lines ... bench: 7,348,651 ns/iter (+/- 40,559) = 691 MB/s +test regexdna::subst1 ... bench: 493,624 ns/iter (+/- 10,315) = 10298 MB/s +test regexdna::subst10 ... bench: 489,573 ns/iter (+/- 18,151) = 10383 MB/s +test regexdna::subst11 ... bench: 492,501 ns/iter (+/- 11,650) = 10321 MB/s +test regexdna::subst2 ... bench: 492,283 ns/iter (+/- 12,363) = 10326 MB/s +test regexdna::subst3 ... bench: 496,795 ns/iter (+/- 20,704) = 10232 MB/s +test regexdna::subst4 ... bench: 489,245 ns/iter (+/- 10,289) = 10390 MB/s +test regexdna::subst5 ... bench: 499,701 ns/iter (+/- 11,359) = 10172 MB/s +test regexdna::subst6 ... bench: 490,460 ns/iter (+/- 8,758) = 10364 MB/s +test regexdna::subst7 ... bench: 496,398 ns/iter (+/- 18,774) = 10240 MB/s +test regexdna::subst8 ... bench: 497,077 ns/iter (+/- 24,767) = 10226 MB/s +test regexdna::subst9 ... bench: 496,763 ns/iter (+/- 12,477) = 10233 MB/s +test regexdna::variant1 ... bench: 1,454,747 ns/iter (+/- 48,995) = 3494 MB/s +test regexdna::variant2 ... bench: 2,311,001 ns/iter (+/- 63,347) = 2199 MB/s +test regexdna::variant3 ... bench: 2,832,483 ns/iter (+/- 33,976) = 1794 MB/s +test regexdna::variant4 ... bench: 2,796,710 ns/iter (+/- 56,279) = 1817 MB/s +test regexdna::variant5 ... bench: 1,708,634 ns/iter (+/- 25,749) = 2975 MB/s +test regexdna::variant6 ... bench: 1,706,259 ns/iter (+/- 22,151) = 2979 MB/s +test regexdna::variant7 ... bench: 2,400,436 ns/iter (+/- 24,655) = 2117 MB/s +test regexdna::variant8 ... bench: 2,413,765 ns/iter (+/- 50,326) = 2106 MB/s +test regexdna::variant9 ... bench: 2,402,528 ns/iter (+/- 26,150) = 2115 MB/s +test rust_compile::compile_huge ... bench: 51,936 ns/iter (+/- 834) +test rust_compile::compile_huge_bytes ... bench: 3,294,633 ns/iter (+/- 40,585) +test rust_compile::compile_huge_full ... bench: 6,323,294 ns/iter (+/- 66,684) +test rust_compile::compile_simple ... bench: 1,992 ns/iter (+/- 25) +test rust_compile::compile_simple_bytes ... bench: 2,004 ns/iter (+/- 20) +test rust_compile::compile_simple_full ... bench: 9,697 ns/iter (+/- 68) +test rust_compile::compile_small ... bench: 4,261 ns/iter (+/- 72) +test rust_compile::compile_small_bytes ... bench: 83,908 ns/iter (+/- 1,405) +test rust_compile::compile_small_full ... bench: 166,152 ns/iter (+/- 3,508) +test sherlock::before_after_holmes ... bench: 699,767 ns/iter (+/- 6,201) = 850 MB/s +test sherlock::before_holmes ... bench: 29,284 ns/iter (+/- 573) = 20315 MB/s +test sherlock::everything_greedy ... bench: 1,070,812 ns/iter (+/- 18,795) = 555 MB/s +test sherlock::everything_greedy_nl ... bench: 445,517 ns/iter (+/- 7,760) = 1335 MB/s +test sherlock::holmes_cochar_watson ... bench: 43,459 ns/iter (+/- 901) = 13689 MB/s +test sherlock::holmes_coword_watson ... bench: 335,772 ns/iter (+/- 6,348) = 1771 MB/s +test sherlock::ing_suffix ... bench: 153,546 ns/iter (+/- 3,075) = 3874 MB/s +test sherlock::ing_suffix_limited_space ... bench: 777,388 ns/iter (+/- 8,447) = 765 MB/s +test sherlock::letters ... bench: 10,123,374 ns/iter (+/- 90,059) = 58 MB/s +test sherlock::letters_lower ... bench: 9,957,916 ns/iter (+/- 63,766) = 59 MB/s +test sherlock::letters_upper ... bench: 1,123,119 ns/iter (+/- 17,972) = 529 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 694,714 ns/iter (+/- 7,006) = 856 MB/s +test sherlock::name_alt1 ... bench: 13,427 ns/iter (+/- 331) = 44308 MB/s +test sherlock::name_alt2 ... bench: 33,171 ns/iter (+/- 1,029) = 17935 MB/s +test sherlock::name_alt3 ... bench: 36,816 ns/iter (+/- 1,138) = 16159 MB/s +test sherlock::name_alt3_nocase ... bench: 221,185 ns/iter (+/- 3,268) = 2689 MB/s +test sherlock::name_alt4 ... bench: 49,883 ns/iter (+/- 1,150) = 11926 MB/s +test sherlock::name_alt4_nocase ... bench: 74,967 ns/iter (+/- 1,807) = 7935 MB/s +test sherlock::name_alt5 ... bench: 34,675 ns/iter (+/- 1,335) = 17157 MB/s +test sherlock::name_alt5_nocase ... bench: 192,109 ns/iter (+/- 6,194) = 3096 MB/s +test sherlock::name_holmes ... bench: 18,355 ns/iter (+/- 389) = 32412 MB/s +test sherlock::name_holmes_nocase ... bench: 58,179 ns/iter (+/- 917) = 10225 MB/s +test sherlock::name_sherlock ... bench: 14,307 ns/iter (+/- 74) = 41583 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,332 ns/iter (+/- 144) = 41510 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 41,336 ns/iter (+/- 736) = 14392 MB/s +test sherlock::name_sherlock_nocase ... bench: 40,029 ns/iter (+/- 1,393) = 14862 MB/s +test sherlock::name_whitespace ... bench: 17,807 ns/iter (+/- 105) = 33410 MB/s +test sherlock::no_match_common ... bench: 13,625 ns/iter (+/- 15) = 43664 MB/s +test sherlock::no_match_really_common ... bench: 13,818 ns/iter (+/- 282) = 43054 MB/s +test sherlock::no_match_uncommon ... bench: 13,628 ns/iter (+/- 27) = 43655 MB/s +test sherlock::quotes ... bench: 232,910 ns/iter (+/- 1,883) = 2554 MB/s +test sherlock::repeated_class_negation ... bench: 36,892,964 ns/iter (+/- 629,538) = 16 MB/s +test sherlock::the_lower ... bench: 203,077 ns/iter (+/- 2,574) = 2929 MB/s +test sherlock::the_nocase ... bench: 290,781 ns/iter (+/- 6,597) = 2045 MB/s +test sherlock::the_upper ... bench: 22,731 ns/iter (+/- 439) = 26172 MB/s +test sherlock::the_whitespace ... bench: 423,983 ns/iter (+/- 10,849) = 1403 MB/s +test sherlock::word_ending_n ... bench: 1,109,013 ns/iter (+/- 12,645) = 536 MB/s +test sherlock::words ... bench: 4,529,451 ns/iter (+/- 44,285) = 131 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 164.08s + diff --git a/vendor/regex/record/old-bench-log/11-regex-1.7.3/rust-bytes b/vendor/regex/record/old-bench-log/11-regex-1.7.3/rust-bytes new file mode 100644 index 0000000..e9f750e --- /dev/null +++ b/vendor/regex/record/old-bench-log/11-regex-1.7.3/rust-bytes @@ -0,0 +1,112 @@ + +running 107 tests +test misc::anchored_literal_long_match ... bench: 8 ns/iter (+/- 0) = 48750 MB/s +test misc::anchored_literal_long_non_match ... bench: 10 ns/iter (+/- 0) = 39000 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 10 ns/iter (+/- 0) = 2600 MB/s +test misc::easy0_1K ... bench: 7 ns/iter (+/- 0) = 150142 MB/s +test misc::easy0_1MB ... bench: 11 ns/iter (+/- 0) = 95327545 MB/s +test misc::easy0_32 ... bench: 7 ns/iter (+/- 0) = 8428 MB/s +test misc::easy0_32K ... bench: 7 ns/iter (+/- 0) = 4685000 MB/s +test misc::easy1_1K ... bench: 17 ns/iter (+/- 0) = 61411 MB/s +test misc::easy1_1MB ... bench: 20 ns/iter (+/- 0) = 52429800 MB/s +test misc::easy1_32 ... bench: 18 ns/iter (+/- 0) = 2888 MB/s +test misc::easy1_32K ... bench: 18 ns/iter (+/- 0) = 1821555 MB/s +test misc::hard_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::hard_1MB ... bench: 28 ns/iter (+/- 0) = 37450107 MB/s +test misc::hard_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::hard_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::is_match_set ... bench: 37 ns/iter (+/- 0) = 675 MB/s +test misc::literal ... bench: 7 ns/iter (+/- 0) = 7285 MB/s +test misc::long_needle1 ... bench: 2,186 ns/iter (+/- 19) = 45746 MB/s +test misc::long_needle2 ... bench: 210,378 ns/iter (+/- 61,574) = 475 MB/s +test misc::match_class ... bench: 39 ns/iter (+/- 1) = 2076 MB/s +test misc::match_class_in_range ... bench: 7 ns/iter (+/- 0) = 11571 MB/s +test misc::matches_set ... bench: 176 ns/iter (+/- 12) = 142 MB/s +test misc::medium_1K ... bench: 8 ns/iter (+/- 0) = 131500 MB/s +test misc::medium_1MB ... bench: 12 ns/iter (+/- 0) = 87383666 MB/s +test misc::medium_32 ... bench: 8 ns/iter (+/- 0) = 7500 MB/s +test misc::medium_32K ... bench: 8 ns/iter (+/- 0) = 4099500 MB/s +test misc::no_exponential ... bench: 274 ns/iter (+/- 7) = 364 MB/s +test misc::not_literal ... bench: 53 ns/iter (+/- 0) = 962 MB/s +test misc::one_pass_long_prefix ... bench: 24 ns/iter (+/- 2) = 1083 MB/s +test misc::one_pass_long_prefix_not ... bench: 24 ns/iter (+/- 1) = 1083 MB/s +test misc::one_pass_short ... bench: 16 ns/iter (+/- 1) = 1062 MB/s +test misc::one_pass_short_not ... bench: 19 ns/iter (+/- 0) = 894 MB/s +test misc::reallyhard2_1K ... bench: 38 ns/iter (+/- 5) = 27368 MB/s +test misc::reallyhard_1K ... bench: 1,220 ns/iter (+/- 15) = 861 MB/s +test misc::reallyhard_1MB ... bench: 1,215,297 ns/iter (+/- 5,229) = 862 MB/s +test misc::reallyhard_32 ... bench: 63 ns/iter (+/- 1) = 936 MB/s +test misc::reallyhard_32K ... bench: 38,164 ns/iter (+/- 232) = 859 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 2,353 ns/iter (+/- 8) = 3399 MB/s +test regexdna::find_new_lines ... bench: 7,346,276 ns/iter (+/- 46,149) = 691 MB/s +test regexdna::subst1 ... bench: 486,203 ns/iter (+/- 21,159) = 10455 MB/s +test regexdna::subst10 ... bench: 494,356 ns/iter (+/- 6,423) = 10282 MB/s +test regexdna::subst11 ... bench: 481,930 ns/iter (+/- 19,639) = 10548 MB/s +test regexdna::subst2 ... bench: 486,672 ns/iter (+/- 22,184) = 10445 MB/s +test regexdna::subst3 ... bench: 487,152 ns/iter (+/- 19,776) = 10434 MB/s +test regexdna::subst4 ... bench: 486,534 ns/iter (+/- 23,897) = 10448 MB/s +test regexdna::subst5 ... bench: 481,412 ns/iter (+/- 26,310) = 10559 MB/s +test regexdna::subst6 ... bench: 479,498 ns/iter (+/- 20,310) = 10601 MB/s +test regexdna::subst7 ... bench: 481,960 ns/iter (+/- 18,492) = 10547 MB/s +test regexdna::subst8 ... bench: 482,282 ns/iter (+/- 22,522) = 10540 MB/s +test regexdna::subst9 ... bench: 489,224 ns/iter (+/- 25,264) = 10390 MB/s +test regexdna::variant1 ... bench: 1,470,068 ns/iter (+/- 65,563) = 3457 MB/s +test regexdna::variant2 ... bench: 2,298,112 ns/iter (+/- 27,688) = 2211 MB/s +test regexdna::variant3 ... bench: 2,818,539 ns/iter (+/- 31,432) = 1803 MB/s +test regexdna::variant4 ... bench: 2,786,226 ns/iter (+/- 30,699) = 1824 MB/s +test regexdna::variant5 ... bench: 1,716,429 ns/iter (+/- 20,264) = 2961 MB/s +test regexdna::variant6 ... bench: 1,719,420 ns/iter (+/- 23,944) = 2956 MB/s +test regexdna::variant7 ... bench: 2,391,022 ns/iter (+/- 23,192) = 2126 MB/s +test regexdna::variant8 ... bench: 2,418,744 ns/iter (+/- 44,152) = 2101 MB/s +test regexdna::variant9 ... bench: 2,400,918 ns/iter (+/- 24,041) = 2117 MB/s +test rust_compile::compile_huge ... bench: 57,745 ns/iter (+/- 816) +test rust_compile::compile_huge_bytes ... bench: 3,346,952 ns/iter (+/- 39,488) +test rust_compile::compile_huge_full ... bench: 6,344,293 ns/iter (+/- 53,114) +test rust_compile::compile_simple ... bench: 2,040 ns/iter (+/- 32) +test rust_compile::compile_simple_bytes ... bench: 2,010 ns/iter (+/- 34) +test rust_compile::compile_simple_full ... bench: 9,632 ns/iter (+/- 464) +test rust_compile::compile_small ... bench: 4,445 ns/iter (+/- 77) +test rust_compile::compile_small_bytes ... bench: 83,791 ns/iter (+/- 1,929) +test rust_compile::compile_small_full ... bench: 164,948 ns/iter (+/- 2,595) +test sherlock::before_after_holmes ... bench: 699,996 ns/iter (+/- 6,647) = 849 MB/s +test sherlock::before_holmes ... bench: 28,208 ns/iter (+/- 233) = 21090 MB/s +test sherlock::everything_greedy ... bench: 1,033,048 ns/iter (+/- 9,790) = 575 MB/s +test sherlock::everything_greedy_nl ... bench: 424,081 ns/iter (+/- 22,574) = 1402 MB/s +test sherlock::holmes_cochar_watson ... bench: 43,131 ns/iter (+/- 827) = 13793 MB/s +test sherlock::holmes_coword_watson ... bench: 336,678 ns/iter (+/- 6,985) = 1767 MB/s +test sherlock::ing_suffix ... bench: 153,589 ns/iter (+/- 3,193) = 3873 MB/s +test sherlock::ing_suffix_limited_space ... bench: 776,911 ns/iter (+/- 8,815) = 765 MB/s +test sherlock::letters ... bench: 10,056,702 ns/iter (+/- 49,688) = 59 MB/s +test sherlock::letters_lower ... bench: 9,900,568 ns/iter (+/- 76,118) = 60 MB/s +test sherlock::letters_upper ... bench: 1,120,456 ns/iter (+/- 13,538) = 530 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 693,727 ns/iter (+/- 6,840) = 857 MB/s +test sherlock::name_alt1 ... bench: 11,101 ns/iter (+/- 65) = 53592 MB/s +test sherlock::name_alt2 ... bench: 34,003 ns/iter (+/- 966) = 17496 MB/s +test sherlock::name_alt3 ... bench: 37,975 ns/iter (+/- 1,313) = 15666 MB/s +test sherlock::name_alt3_nocase ... bench: 214,299 ns/iter (+/- 3,026) = 2776 MB/s +test sherlock::name_alt4 ... bench: 50,551 ns/iter (+/- 1,377) = 11768 MB/s +test sherlock::name_alt4_nocase ... bench: 74,713 ns/iter (+/- 1,359) = 7962 MB/s +test sherlock::name_alt5 ... bench: 35,426 ns/iter (+/- 625) = 16793 MB/s +test sherlock::name_alt5_nocase ... bench: 190,521 ns/iter (+/- 4,903) = 3122 MB/s +test sherlock::name_holmes ... bench: 18,070 ns/iter (+/- 763) = 32923 MB/s +test sherlock::name_holmes_nocase ... bench: 58,454 ns/iter (+/- 1,228) = 10177 MB/s +test sherlock::name_sherlock ... bench: 14,380 ns/iter (+/- 227) = 41372 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,491 ns/iter (+/- 116) = 41055 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 40,722 ns/iter (+/- 231) = 14609 MB/s +test sherlock::name_sherlock_nocase ... bench: 39,937 ns/iter (+/- 623) = 14896 MB/s +test sherlock::name_whitespace ... bench: 17,979 ns/iter (+/- 140) = 33090 MB/s +test sherlock::no_match_common ... bench: 13,650 ns/iter (+/- 112) = 43584 MB/s +test sherlock::no_match_really_common ... bench: 13,623 ns/iter (+/- 295) = 43671 MB/s +test sherlock::no_match_uncommon ... bench: 13,641 ns/iter (+/- 55) = 43613 MB/s +test sherlock::quotes ... bench: 232,451 ns/iter (+/- 6,555) = 2559 MB/s +test sherlock::repeated_class_negation ... bench: 36,984,199 ns/iter (+/- 623,153) = 16 MB/s +test sherlock::the_lower ... bench: 189,502 ns/iter (+/- 4,870) = 3139 MB/s +test sherlock::the_nocase ... bench: 294,945 ns/iter (+/- 9,381) = 2017 MB/s +test sherlock::the_upper ... bench: 21,591 ns/iter (+/- 680) = 27554 MB/s +test sherlock::the_whitespace ... bench: 424,862 ns/iter (+/- 7,197) = 1400 MB/s +test sherlock::word_ending_n ... bench: 1,126,768 ns/iter (+/- 13,900) = 527 MB/s +test sherlock::words ... bench: 4,517,167 ns/iter (+/- 55,809) = 131 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 150.58s + diff --git a/vendor/regex/record/old-bench-log/12-regex-1.8.1/rust b/vendor/regex/record/old-bench-log/12-regex-1.8.1/rust new file mode 100644 index 0000000..282893e --- /dev/null +++ b/vendor/regex/record/old-bench-log/12-regex-1.8.1/rust @@ -0,0 +1,124 @@ + +running 119 tests +test misc::anchored_literal_long_match ... bench: 8 ns/iter (+/- 0) = 48750 MB/s +test misc::anchored_literal_long_non_match ... bench: 9 ns/iter (+/- 0) = 43333 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 9 ns/iter (+/- 0) = 2888 MB/s +test misc::easy0_1K ... bench: 24 ns/iter (+/- 1) = 43791 MB/s +test misc::easy0_1MB ... bench: 28 ns/iter (+/- 0) = 37450107 MB/s +test misc::easy0_32 ... bench: 25 ns/iter (+/- 0) = 2360 MB/s +test misc::easy0_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::easy1_1K ... bench: 18 ns/iter (+/- 1) = 58000 MB/s +test misc::easy1_1MB ... bench: 21 ns/iter (+/- 0) = 49933142 MB/s +test misc::easy1_32 ... bench: 18 ns/iter (+/- 0) = 2888 MB/s +test misc::easy1_32K ... bench: 18 ns/iter (+/- 0) = 1821555 MB/s +test misc::hard_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::hard_1MB ... bench: 29 ns/iter (+/- 0) = 36158724 MB/s +test misc::hard_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::hard_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::is_match_set ... bench: 37 ns/iter (+/- 0) = 675 MB/s +test misc::literal ... bench: 7 ns/iter (+/- 0) = 7285 MB/s +test misc::long_needle1 ... bench: 1,802 ns/iter (+/- 6) = 55494 MB/s +test misc::long_needle2 ... bench: 207,353 ns/iter (+/- 165) = 482 MB/s +test misc::match_class ... bench: 41 ns/iter (+/- 2) = 1975 MB/s +test misc::match_class_in_range ... bench: 7 ns/iter (+/- 0) = 11571 MB/s +test misc::match_class_unicode ... bench: 168 ns/iter (+/- 3) = 958 MB/s +test misc::matches_set ... bench: 210 ns/iter (+/- 5) = 119 MB/s +test misc::medium_1K ... bench: 25 ns/iter (+/- 0) = 42080 MB/s +test misc::medium_1MB ... bench: 29 ns/iter (+/- 0) = 36158758 MB/s +test misc::medium_32 ... bench: 25 ns/iter (+/- 0) = 2400 MB/s +test misc::medium_32K ... bench: 25 ns/iter (+/- 0) = 1311840 MB/s +test misc::no_exponential ... bench: 268 ns/iter (+/- 7) = 373 MB/s +test misc::not_literal ... bench: 44 ns/iter (+/- 4) = 1159 MB/s +test misc::one_pass_long_prefix ... bench: 24 ns/iter (+/- 2) = 1083 MB/s +test misc::one_pass_long_prefix_not ... bench: 23 ns/iter (+/- 2) = 1130 MB/s +test misc::one_pass_short ... bench: 16 ns/iter (+/- 0) = 1062 MB/s +test misc::one_pass_short_not ... bench: 19 ns/iter (+/- 0) = 894 MB/s +test misc::reallyhard2_1K ... bench: 38 ns/iter (+/- 1) = 27368 MB/s +test misc::reallyhard_1K ... bench: 1,215 ns/iter (+/- 12) = 865 MB/s +test misc::reallyhard_1MB ... bench: 1,215,907 ns/iter (+/- 6,442) = 862 MB/s +test misc::reallyhard_32 ... bench: 53 ns/iter (+/- 2) = 1113 MB/s +test misc::reallyhard_32K ... bench: 38,162 ns/iter (+/- 464) = 859 MB/s +test misc::replace_all ... bench: 86 ns/iter (+/- 5) +test misc::reverse_suffix_no_quadratic ... bench: 2,355 ns/iter (+/- 470) = 3397 MB/s +test misc::short_haystack_1000000x ... bench: 91,039 ns/iter (+/- 157) = 87874 MB/s +test misc::short_haystack_100000x ... bench: 7,595 ns/iter (+/- 33) = 105333 MB/s +test misc::short_haystack_10000x ... bench: 2,865 ns/iter (+/- 9) = 27927 MB/s +test misc::short_haystack_1000x ... bench: 211 ns/iter (+/- 2) = 37966 MB/s +test misc::short_haystack_100x ... bench: 98 ns/iter (+/- 3) = 8275 MB/s +test misc::short_haystack_10x ... bench: 92 ns/iter (+/- 4) = 989 MB/s +test misc::short_haystack_1x ... bench: 90 ns/iter (+/- 2) = 211 MB/s +test misc::short_haystack_2x ... bench: 88 ns/iter (+/- 3) = 306 MB/s +test misc::short_haystack_3x ... bench: 91 ns/iter (+/- 3) = 384 MB/s +test misc::short_haystack_4x ... bench: 90 ns/iter (+/- 3) = 477 MB/s +test regexdna::find_new_lines ... bench: 7,323,399 ns/iter (+/- 24,661) = 694 MB/s +test regexdna::subst1 ... bench: 473,671 ns/iter (+/- 16,963) = 10731 MB/s +test regexdna::subst10 ... bench: 463,672 ns/iter (+/- 13,433) = 10963 MB/s +test regexdna::subst11 ... bench: 470,891 ns/iter (+/- 28,305) = 10795 MB/s +test regexdna::subst2 ... bench: 469,218 ns/iter (+/- 26,181) = 10833 MB/s +test regexdna::subst3 ... bench: 467,417 ns/iter (+/- 30,700) = 10875 MB/s +test regexdna::subst4 ... bench: 469,373 ns/iter (+/- 17,254) = 10830 MB/s +test regexdna::subst5 ... bench: 467,035 ns/iter (+/- 30,365) = 10884 MB/s +test regexdna::subst6 ... bench: 466,540 ns/iter (+/- 18,283) = 10895 MB/s +test regexdna::subst7 ... bench: 470,291 ns/iter (+/- 23,930) = 10809 MB/s +test regexdna::subst8 ... bench: 466,425 ns/iter (+/- 27,080) = 10898 MB/s +test regexdna::subst9 ... bench: 468,192 ns/iter (+/- 17,296) = 10857 MB/s +test regexdna::variant1 ... bench: 653,471 ns/iter (+/- 8,898) = 7779 MB/s +test regexdna::variant2 ... bench: 902,852 ns/iter (+/- 12,549) = 5630 MB/s +test regexdna::variant3 ... bench: 1,158,000 ns/iter (+/- 14,075) = 4389 MB/s +test regexdna::variant4 ... bench: 1,149,520 ns/iter (+/- 13,482) = 4422 MB/s +test regexdna::variant5 ... bench: 1,132,121 ns/iter (+/- 7,624) = 4490 MB/s +test regexdna::variant6 ... bench: 1,069,227 ns/iter (+/- 13,436) = 4754 MB/s +test regexdna::variant7 ... bench: 1,150,436 ns/iter (+/- 28,302) = 4418 MB/s +test regexdna::variant8 ... bench: 1,148,923 ns/iter (+/- 49,063) = 4424 MB/s +test regexdna::variant9 ... bench: 1,190,858 ns/iter (+/- 15,044) = 4268 MB/s +test rust_compile::compile_huge ... bench: 52,168 ns/iter (+/- 827) +test rust_compile::compile_huge_bytes ... bench: 3,330,456 ns/iter (+/- 57,242) +test rust_compile::compile_huge_full ... bench: 6,378,126 ns/iter (+/- 85,019) +test rust_compile::compile_simple ... bench: 2,291 ns/iter (+/- 39) +test rust_compile::compile_simple_bytes ... bench: 2,355 ns/iter (+/- 37) +test rust_compile::compile_simple_full ... bench: 14,581 ns/iter (+/- 103) +test rust_compile::compile_small ... bench: 10,443 ns/iter (+/- 114) +test rust_compile::compile_small_bytes ... bench: 11,269 ns/iter (+/- 150) +test rust_compile::compile_small_full ... bench: 14,746 ns/iter (+/- 212) +test sherlock::before_after_holmes ... bench: 699,736 ns/iter (+/- 6,402) = 850 MB/s +test sherlock::before_holmes ... bench: 28,001 ns/iter (+/- 198) = 21246 MB/s +test sherlock::everything_greedy ... bench: 1,029,174 ns/iter (+/- 33,321) = 578 MB/s +test sherlock::everything_greedy_nl ... bench: 460,103 ns/iter (+/- 23,290) = 1293 MB/s +test sherlock::holmes_cochar_watson ... bench: 57,666 ns/iter (+/- 907) = 10316 MB/s +test sherlock::holmes_coword_watson ... bench: 345,016 ns/iter (+/- 4,672) = 1724 MB/s +test sherlock::ing_suffix ... bench: 150,499 ns/iter (+/- 4,855) = 3953 MB/s +test sherlock::ing_suffix_limited_space ... bench: 777,723 ns/iter (+/- 8,076) = 764 MB/s +test sherlock::letters ... bench: 10,022,203 ns/iter (+/- 77,897) = 59 MB/s +test sherlock::letters_lower ... bench: 9,861,816 ns/iter (+/- 76,172) = 60 MB/s +test sherlock::letters_upper ... bench: 1,134,201 ns/iter (+/- 11,926) = 524 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 693,533 ns/iter (+/- 6,686) = 857 MB/s +test sherlock::name_alt1 ... bench: 11,974 ns/iter (+/- 292) = 49685 MB/s +test sherlock::name_alt2 ... bench: 44,708 ns/iter (+/- 573) = 13307 MB/s +test sherlock::name_alt3 ... bench: 49,873 ns/iter (+/- 785) = 11928 MB/s +test sherlock::name_alt3_nocase ... bench: 190,194 ns/iter (+/- 2,944) = 3128 MB/s +test sherlock::name_alt4 ... bench: 52,028 ns/iter (+/- 1,102) = 11434 MB/s +test sherlock::name_alt4_nocase ... bench: 119,891 ns/iter (+/- 921) = 4962 MB/s +test sherlock::name_alt5 ... bench: 47,139 ns/iter (+/- 1,617) = 12620 MB/s +test sherlock::name_alt5_nocase ... bench: 200,159 ns/iter (+/- 3,992) = 2972 MB/s +test sherlock::name_holmes ... bench: 17,902 ns/iter (+/- 577) = 33232 MB/s +test sherlock::name_holmes_nocase ... bench: 58,219 ns/iter (+/- 1,215) = 10218 MB/s +test sherlock::name_sherlock ... bench: 14,314 ns/iter (+/- 45) = 41563 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,399 ns/iter (+/- 45) = 41317 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 41,418 ns/iter (+/- 591) = 14364 MB/s +test sherlock::name_sherlock_nocase ... bench: 39,877 ns/iter (+/- 545) = 14919 MB/s +test sherlock::name_whitespace ... bench: 17,883 ns/iter (+/- 151) = 33268 MB/s +test sherlock::no_match_common ... bench: 13,696 ns/iter (+/- 123) = 43438 MB/s +test sherlock::no_match_really_common ... bench: 10,157 ns/iter (+/- 222) = 58573 MB/s +test sherlock::no_match_uncommon ... bench: 13,663 ns/iter (+/- 53) = 43543 MB/s +test sherlock::quotes ... bench: 234,890 ns/iter (+/- 4,574) = 2532 MB/s +test sherlock::repeated_class_negation ... bench: 36,406,680 ns/iter (+/- 397,378) = 16 MB/s +test sherlock::the_lower ... bench: 192,028 ns/iter (+/- 5,315) = 3098 MB/s +test sherlock::the_nocase ... bench: 311,087 ns/iter (+/- 6,723) = 1912 MB/s +test sherlock::the_upper ... bench: 21,710 ns/iter (+/- 1,269) = 27403 MB/s +test sherlock::the_whitespace ... bench: 425,246 ns/iter (+/- 7,741) = 1399 MB/s +test sherlock::word_ending_n ... bench: 1,116,412 ns/iter (+/- 11,753) = 532 MB/s +test sherlock::words ... bench: 4,452,805 ns/iter (+/- 84,309) = 133 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 119 measured; 0 filtered out; finished in 142.33s + diff --git a/vendor/regex/record/old-bench-log/12-regex-1.8.1/rust-bytes b/vendor/regex/record/old-bench-log/12-regex-1.8.1/rust-bytes new file mode 100644 index 0000000..f5380a7 --- /dev/null +++ b/vendor/regex/record/old-bench-log/12-regex-1.8.1/rust-bytes @@ -0,0 +1,112 @@ + +running 107 tests +test misc::anchored_literal_long_match ... bench: 7 ns/iter (+/- 0) = 55714 MB/s +test misc::anchored_literal_long_non_match ... bench: 8 ns/iter (+/- 0) = 48750 MB/s +test misc::anchored_literal_short_match ... bench: 7 ns/iter (+/- 0) = 3714 MB/s +test misc::anchored_literal_short_non_match ... bench: 8 ns/iter (+/- 0) = 3250 MB/s +test misc::easy0_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::easy0_1MB ... bench: 28 ns/iter (+/- 0) = 37450107 MB/s +test misc::easy0_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::easy0_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::easy1_1K ... bench: 18 ns/iter (+/- 0) = 58000 MB/s +test misc::easy1_1MB ... bench: 21 ns/iter (+/- 0) = 49933142 MB/s +test misc::easy1_32 ... bench: 18 ns/iter (+/- 2) = 2888 MB/s +test misc::easy1_32K ... bench: 18 ns/iter (+/- 0) = 1821555 MB/s +test misc::hard_1K ... bench: 24 ns/iter (+/- 0) = 43791 MB/s +test misc::hard_1MB ... bench: 28 ns/iter (+/- 0) = 37450107 MB/s +test misc::hard_32 ... bench: 24 ns/iter (+/- 0) = 2458 MB/s +test misc::hard_32K ... bench: 24 ns/iter (+/- 0) = 1366458 MB/s +test misc::is_match_set ... bench: 37 ns/iter (+/- 0) = 675 MB/s +test misc::literal ... bench: 7 ns/iter (+/- 0) = 7285 MB/s +test misc::long_needle1 ... bench: 1,801 ns/iter (+/- 2) = 55525 MB/s +test misc::long_needle2 ... bench: 212,892 ns/iter (+/- 206) = 469 MB/s +test misc::match_class ... bench: 40 ns/iter (+/- 0) = 2025 MB/s +test misc::match_class_in_range ... bench: 7 ns/iter (+/- 0) = 11571 MB/s +test misc::matches_set ... bench: 174 ns/iter (+/- 2) = 143 MB/s +test misc::medium_1K ... bench: 25 ns/iter (+/- 0) = 42080 MB/s +test misc::medium_1MB ... bench: 29 ns/iter (+/- 0) = 36158758 MB/s +test misc::medium_32 ... bench: 25 ns/iter (+/- 0) = 2400 MB/s +test misc::medium_32K ... bench: 25 ns/iter (+/- 0) = 1311840 MB/s +test misc::no_exponential ... bench: 270 ns/iter (+/- 8) = 370 MB/s +test misc::not_literal ... bench: 44 ns/iter (+/- 1) = 1159 MB/s +test misc::one_pass_long_prefix ... bench: 23 ns/iter (+/- 0) = 1130 MB/s +test misc::one_pass_long_prefix_not ... bench: 23 ns/iter (+/- 0) = 1130 MB/s +test misc::one_pass_short ... bench: 16 ns/iter (+/- 1) = 1062 MB/s +test misc::one_pass_short_not ... bench: 19 ns/iter (+/- 0) = 894 MB/s +test misc::reallyhard2_1K ... bench: 38 ns/iter (+/- 2) = 27368 MB/s +test misc::reallyhard_1K ... bench: 1,215 ns/iter (+/- 15) = 865 MB/s +test misc::reallyhard_1MB ... bench: 1,217,631 ns/iter (+/- 11,216) = 861 MB/s +test misc::reallyhard_32 ... bench: 53 ns/iter (+/- 4) = 1113 MB/s +test misc::reallyhard_32K ... bench: 38,251 ns/iter (+/- 364) = 857 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 2,353 ns/iter (+/- 4) = 3399 MB/s +test regexdna::find_new_lines ... bench: 7,322,463 ns/iter (+/- 37,966) = 694 MB/s +test regexdna::subst1 ... bench: 466,849 ns/iter (+/- 12,252) = 10888 MB/s +test regexdna::subst10 ... bench: 465,011 ns/iter (+/- 19,693) = 10931 MB/s +test regexdna::subst11 ... bench: 457,806 ns/iter (+/- 13,453) = 11103 MB/s +test regexdna::subst2 ... bench: 456,878 ns/iter (+/- 32,828) = 11126 MB/s +test regexdna::subst3 ... bench: 465,531 ns/iter (+/- 21,786) = 10919 MB/s +test regexdna::subst4 ... bench: 454,553 ns/iter (+/- 12,698) = 11183 MB/s +test regexdna::subst5 ... bench: 456,977 ns/iter (+/- 13,155) = 11123 MB/s +test regexdna::subst6 ... bench: 466,105 ns/iter (+/- 15,667) = 10906 MB/s +test regexdna::subst7 ... bench: 462,655 ns/iter (+/- 18,871) = 10987 MB/s +test regexdna::subst8 ... bench: 456,642 ns/iter (+/- 19,218) = 11132 MB/s +test regexdna::subst9 ... bench: 456,307 ns/iter (+/- 15,369) = 11140 MB/s +test regexdna::variant1 ... bench: 655,033 ns/iter (+/- 7,901) = 7760 MB/s +test regexdna::variant2 ... bench: 902,675 ns/iter (+/- 15,165) = 5631 MB/s +test regexdna::variant3 ... bench: 1,159,521 ns/iter (+/- 14,489) = 4384 MB/s +test regexdna::variant4 ... bench: 1,147,781 ns/iter (+/- 16,536) = 4428 MB/s +test regexdna::variant5 ... bench: 1,133,068 ns/iter (+/- 13,938) = 4486 MB/s +test regexdna::variant6 ... bench: 1,061,174 ns/iter (+/- 14,478) = 4790 MB/s +test regexdna::variant7 ... bench: 1,151,637 ns/iter (+/- 35,753) = 4414 MB/s +test regexdna::variant8 ... bench: 1,137,068 ns/iter (+/- 37,678) = 4470 MB/s +test regexdna::variant9 ... bench: 1,185,082 ns/iter (+/- 14,355) = 4289 MB/s +test rust_compile::compile_huge ... bench: 66,894 ns/iter (+/- 2,425) +test rust_compile::compile_huge_bytes ... bench: 3,331,663 ns/iter (+/- 47,261) +test rust_compile::compile_huge_full ... bench: 6,446,254 ns/iter (+/- 65,334) +test rust_compile::compile_simple ... bench: 2,351 ns/iter (+/- 71) +test rust_compile::compile_simple_bytes ... bench: 2,350 ns/iter (+/- 49) +test rust_compile::compile_simple_full ... bench: 14,460 ns/iter (+/- 144) +test rust_compile::compile_small ... bench: 10,350 ns/iter (+/- 120) +test rust_compile::compile_small_bytes ... bench: 10,993 ns/iter (+/- 89) +test rust_compile::compile_small_full ... bench: 14,201 ns/iter (+/- 139) +test sherlock::before_after_holmes ... bench: 698,092 ns/iter (+/- 6,907) = 852 MB/s +test sherlock::before_holmes ... bench: 29,127 ns/iter (+/- 1,001) = 20425 MB/s +test sherlock::everything_greedy ... bench: 1,026,902 ns/iter (+/- 86,299) = 579 MB/s +test sherlock::everything_greedy_nl ... bench: 433,157 ns/iter (+/- 10,129) = 1373 MB/s +test sherlock::holmes_cochar_watson ... bench: 57,103 ns/iter (+/- 509) = 10418 MB/s +test sherlock::holmes_coword_watson ... bench: 344,973 ns/iter (+/- 3,288) = 1724 MB/s +test sherlock::ing_suffix ... bench: 158,337 ns/iter (+/- 2,492) = 3757 MB/s +test sherlock::ing_suffix_limited_space ... bench: 776,703 ns/iter (+/- 8,000) = 765 MB/s +test sherlock::letters ... bench: 10,179,909 ns/iter (+/- 55,188) = 58 MB/s +test sherlock::letters_lower ... bench: 10,007,465 ns/iter (+/- 75,168) = 59 MB/s +test sherlock::letters_upper ... bench: 1,116,201 ns/iter (+/- 11,571) = 532 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 693,124 ns/iter (+/- 6,540) = 858 MB/s +test sherlock::name_alt1 ... bench: 12,079 ns/iter (+/- 192) = 49253 MB/s +test sherlock::name_alt2 ... bench: 44,336 ns/iter (+/- 1,424) = 13418 MB/s +test sherlock::name_alt3 ... bench: 49,569 ns/iter (+/- 721) = 12002 MB/s +test sherlock::name_alt3_nocase ... bench: 189,812 ns/iter (+/- 2,952) = 3134 MB/s +test sherlock::name_alt4 ... bench: 52,132 ns/iter (+/- 1,182) = 11412 MB/s +test sherlock::name_alt4_nocase ... bench: 120,591 ns/iter (+/- 2,521) = 4933 MB/s +test sherlock::name_alt5 ... bench: 46,956 ns/iter (+/- 545) = 12670 MB/s +test sherlock::name_alt5_nocase ... bench: 199,252 ns/iter (+/- 2,212) = 2985 MB/s +test sherlock::name_holmes ... bench: 17,983 ns/iter (+/- 591) = 33083 MB/s +test sherlock::name_holmes_nocase ... bench: 58,139 ns/iter (+/- 919) = 10232 MB/s +test sherlock::name_sherlock ... bench: 14,283 ns/iter (+/- 113) = 41653 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,587 ns/iter (+/- 82) = 40785 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 40,947 ns/iter (+/- 385) = 14529 MB/s +test sherlock::name_sherlock_nocase ... bench: 39,607 ns/iter (+/- 567) = 15020 MB/s +test sherlock::name_whitespace ... bench: 18,803 ns/iter (+/- 232) = 31640 MB/s +test sherlock::no_match_common ... bench: 13,704 ns/iter (+/- 73) = 43413 MB/s +test sherlock::no_match_really_common ... bench: 14,166 ns/iter (+/- 191) = 41997 MB/s +test sherlock::no_match_uncommon ... bench: 13,702 ns/iter (+/- 36) = 43419 MB/s +test sherlock::quotes ... bench: 232,609 ns/iter (+/- 3,217) = 2557 MB/s +test sherlock::repeated_class_negation ... bench: 36,167,769 ns/iter (+/- 592,579) = 16 MB/s +test sherlock::the_lower ... bench: 188,281 ns/iter (+/- 2,966) = 3159 MB/s +test sherlock::the_nocase ... bench: 312,853 ns/iter (+/- 23,145) = 1901 MB/s +test sherlock::the_upper ... bench: 20,987 ns/iter (+/- 909) = 28347 MB/s +test sherlock::the_whitespace ... bench: 427,154 ns/iter (+/- 6,396) = 1392 MB/s +test sherlock::word_ending_n ... bench: 1,112,964 ns/iter (+/- 15,393) = 534 MB/s +test sherlock::words ... bench: 4,513,468 ns/iter (+/- 35,410) = 131 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 107 measured; 0 filtered out; finished in 143.96s + diff --git a/vendor/regex/record/old-bench-log/13-regex-1.9.0/rust b/vendor/regex/record/old-bench-log/13-regex-1.9.0/rust new file mode 100644 index 0000000..b46bdf9 --- /dev/null +++ b/vendor/regex/record/old-bench-log/13-regex-1.9.0/rust @@ -0,0 +1,115 @@ + +running 110 tests +test misc::anchored_literal_long_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_long_non_match ... bench: 12 ns/iter (+/- 1) = 32500 MB/s +test misc::anchored_literal_short_match ... bench: 15 ns/iter (+/- 0) = 1733 MB/s +test misc::anchored_literal_short_non_match ... bench: 12 ns/iter (+/- 1) = 2166 MB/s +test misc::easy0_1K ... bench: 42 ns/iter (+/- 0) = 25023 MB/s +test misc::easy0_1MB ... bench: 42 ns/iter (+/- 0) = 24966738 MB/s +test misc::easy0_32 ... bench: 42 ns/iter (+/- 0) = 1404 MB/s +test misc::easy0_32K ... bench: 43 ns/iter (+/- 0) = 762674 MB/s +test misc::easy1_1K ... bench: 35 ns/iter (+/- 0) = 29828 MB/s +test misc::easy1_1MB ... bench: 35 ns/iter (+/- 0) = 29959885 MB/s +test misc::easy1_32 ... bench: 35 ns/iter (+/- 0) = 1485 MB/s +test misc::easy1_32K ... bench: 35 ns/iter (+/- 0) = 936800 MB/s +test misc::hard_1K ... bench: 43 ns/iter (+/- 0) = 24441 MB/s +test misc::hard_1MB ... bench: 42 ns/iter (+/- 0) = 24966738 MB/s +test misc::hard_32 ... bench: 42 ns/iter (+/- 0) = 1404 MB/s +test misc::hard_32K ... bench: 42 ns/iter (+/- 0) = 780833 MB/s +test misc::is_match_set ... bench: 46 ns/iter (+/- 1) = 543 MB/s +test misc::literal ... bench: 9 ns/iter (+/- 0) = 5666 MB/s +test misc::long_needle1 ... bench: 1,801 ns/iter (+/- 24) = 55525 MB/s +test misc::long_needle2 ... bench: 194,124 ns/iter (+/- 289) = 515 MB/s +test misc::match_class ... bench: 22 ns/iter (+/- 1) = 3681 MB/s +test misc::match_class_in_range ... bench: 10 ns/iter (+/- 0) = 8100 MB/s +test misc::match_class_unicode ... bench: 196 ns/iter (+/- 0) = 821 MB/s +test misc::matches_set ... bench: 55 ns/iter (+/- 3) = 454 MB/s +test misc::medium_1K ... bench: 43 ns/iter (+/- 0) = 24465 MB/s +test misc::medium_1MB ... bench: 43 ns/iter (+/- 0) = 24386139 MB/s +test misc::medium_32 ... bench: 43 ns/iter (+/- 0) = 1395 MB/s +test misc::medium_32K ... bench: 43 ns/iter (+/- 0) = 762697 MB/s +test misc::no_exponential ... bench: 167 ns/iter (+/- 0) = 598 MB/s +test misc::not_literal ... bench: 26 ns/iter (+/- 1) = 1961 MB/s +test misc::one_pass_long_prefix ... bench: 40 ns/iter (+/- 0) = 650 MB/s +test misc::one_pass_long_prefix_not ... bench: 40 ns/iter (+/- 0) = 650 MB/s +test misc::one_pass_short ... bench: 30 ns/iter (+/- 0) = 566 MB/s +test misc::one_pass_short_not ... bench: 31 ns/iter (+/- 0) = 548 MB/s +test misc::reallyhard2_1K ... bench: 67 ns/iter (+/- 1) = 15522 MB/s +test misc::reallyhard_1K ... bench: 78 ns/iter (+/- 1) = 13474 MB/s +test misc::reallyhard_1MB ... bench: 19,310 ns/iter (+/- 80) = 54303 MB/s +test misc::reallyhard_32 ... bench: 62 ns/iter (+/- 2) = 951 MB/s +test misc::reallyhard_32K ... bench: 543 ns/iter (+/- 4) = 60395 MB/s +test misc::replace_all ... bench: 151 ns/iter (+/- 13) +test misc::reverse_suffix_no_quadratic ... bench: 9,302 ns/iter (+/- 25) = 860 MB/s +test misc::short_haystack_1000000x ... bench: 90,868 ns/iter (+/- 354) = 88039 MB/s +test misc::short_haystack_100000x ... bench: 7,215 ns/iter (+/- 18) = 110881 MB/s +test misc::short_haystack_10000x ... bench: 605 ns/iter (+/- 2) = 132249 MB/s +test misc::short_haystack_1000x ... bench: 148 ns/iter (+/- 2) = 54128 MB/s +test misc::short_haystack_100x ... bench: 83 ns/iter (+/- 3) = 9771 MB/s +test misc::short_haystack_10x ... bench: 89 ns/iter (+/- 1) = 1022 MB/s +test misc::short_haystack_1x ... bench: 79 ns/iter (+/- 1) = 240 MB/s +test misc::short_haystack_2x ... bench: 79 ns/iter (+/- 1) = 341 MB/s +test misc::short_haystack_3x ... bench: 80 ns/iter (+/- 2) = 437 MB/s +test misc::short_haystack_4x ... bench: 79 ns/iter (+/- 1) = 544 MB/s +test regexdna::find_new_lines ... bench: 1,748,215 ns/iter (+/- 25,793) = 2907 MB/s +test regexdna::subst1 ... bench: 486,169 ns/iter (+/- 11,425) = 10456 MB/s +test regexdna::subst10 ... bench: 479,019 ns/iter (+/- 7,468) = 10612 MB/s +test regexdna::subst11 ... bench: 481,118 ns/iter (+/- 10,305) = 10565 MB/s +test regexdna::subst2 ... bench: 484,508 ns/iter (+/- 11,753) = 10491 MB/s +test regexdna::subst3 ... bench: 481,861 ns/iter (+/- 7,991) = 10549 MB/s +test regexdna::subst4 ... bench: 477,043 ns/iter (+/- 12,101) = 10656 MB/s +test regexdna::subst5 ... bench: 483,954 ns/iter (+/- 7,728) = 10503 MB/s +test regexdna::subst6 ... bench: 479,564 ns/iter (+/- 13,514) = 10600 MB/s +test regexdna::subst7 ... bench: 481,345 ns/iter (+/- 11,205) = 10560 MB/s +test regexdna::subst8 ... bench: 479,772 ns/iter (+/- 13,266) = 10595 MB/s +test regexdna::subst9 ... bench: 480,299 ns/iter (+/- 9,997) = 10583 MB/s +test regexdna::variant1 ... bench: 693,230 ns/iter (+/- 21,808) = 7332 MB/s +test regexdna::variant2 ... bench: 936,552 ns/iter (+/- 9,916) = 5427 MB/s +test regexdna::variant3 ... bench: 1,192,921 ns/iter (+/- 11,038) = 4261 MB/s +test regexdna::variant4 ... bench: 1,170,341 ns/iter (+/- 27,745) = 4343 MB/s +test regexdna::variant5 ... bench: 1,166,877 ns/iter (+/- 8,369) = 4356 MB/s +test regexdna::variant6 ... bench: 1,085,919 ns/iter (+/- 9,594) = 4681 MB/s +test regexdna::variant7 ... bench: 1,248,718 ns/iter (+/- 13,480) = 4070 MB/s +test regexdna::variant8 ... bench: 1,216,643 ns/iter (+/- 15,505) = 4178 MB/s +test regexdna::variant9 ... bench: 1,219,951 ns/iter (+/- 14,109) = 4166 MB/s +test sherlock::before_after_holmes ... bench: 27,363 ns/iter (+/- 604) = 21742 MB/s +test sherlock::before_holmes ... bench: 31,147 ns/iter (+/- 876) = 19100 MB/s +test sherlock::everything_greedy ... bench: 1,326,354 ns/iter (+/- 22,628) = 448 MB/s +test sherlock::everything_greedy_nl ... bench: 801,343 ns/iter (+/- 895) = 742 MB/s +test sherlock::holmes_cochar_watson ... bench: 56,328 ns/iter (+/- 1,009) = 10561 MB/s +test sherlock::holmes_coword_watson ... bench: 301,186 ns/iter (+/- 3,615) = 1975 MB/s +test sherlock::ing_suffix ... bench: 176,428 ns/iter (+/- 2,182) = 3372 MB/s +test sherlock::ing_suffix_limited_space ... bench: 173,948 ns/iter (+/- 5,073) = 3420 MB/s +test sherlock::letters ... bench: 7,226,608 ns/iter (+/- 261,849) = 82 MB/s +test sherlock::letters_lower ... bench: 7,024,589 ns/iter (+/- 145,281) = 84 MB/s +test sherlock::letters_upper ... bench: 1,004,841 ns/iter (+/- 6,857) = 592 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 15,978 ns/iter (+/- 90) = 37234 MB/s +test sherlock::name_alt1 ... bench: 11,151 ns/iter (+/- 289) = 53352 MB/s +test sherlock::name_alt2 ... bench: 45,441 ns/iter (+/- 960) = 13092 MB/s +test sherlock::name_alt3 ... bench: 51,934 ns/iter (+/- 806) = 11455 MB/s +test sherlock::name_alt3_nocase ... bench: 171,844 ns/iter (+/- 4,176) = 3462 MB/s +test sherlock::name_alt4 ... bench: 46,611 ns/iter (+/- 1,072) = 12763 MB/s +test sherlock::name_alt4_nocase ... bench: 74,956 ns/iter (+/- 2,098) = 7937 MB/s +test sherlock::name_alt5 ... bench: 47,595 ns/iter (+/- 595) = 12499 MB/s +test sherlock::name_alt5_nocase ... bench: 100,636 ns/iter (+/- 814) = 5911 MB/s +test sherlock::name_holmes ... bench: 19,293 ns/iter (+/- 687) = 30836 MB/s +test sherlock::name_holmes_nocase ... bench: 52,310 ns/iter (+/- 1,024) = 11373 MB/s +test sherlock::name_sherlock ... bench: 16,080 ns/iter (+/- 327) = 36998 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,605 ns/iter (+/- 120) = 40734 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 38,662 ns/iter (+/- 360) = 15388 MB/s +test sherlock::name_sherlock_nocase ... bench: 37,650 ns/iter (+/- 316) = 15801 MB/s +test sherlock::name_whitespace ... bench: 16,234 ns/iter (+/- 125) = 36647 MB/s +test sherlock::no_match_common ... bench: 13,709 ns/iter (+/- 72) = 43397 MB/s +test sherlock::no_match_really_common ... bench: 9,870 ns/iter (+/- 133) = 60276 MB/s +test sherlock::no_match_uncommon ... bench: 13,735 ns/iter (+/- 57) = 43315 MB/s +test sherlock::quotes ... bench: 189,377 ns/iter (+/- 2,105) = 3141 MB/s +test sherlock::repeated_class_negation ... bench: 29,934 ns/iter (+/- 1,249) = 19874 MB/s +test sherlock::the_lower ... bench: 213,236 ns/iter (+/- 3,823) = 2790 MB/s +test sherlock::the_nocase ... bench: 322,922 ns/iter (+/- 5,946) = 1842 MB/s +test sherlock::the_upper ... bench: 23,494 ns/iter (+/- 718) = 25322 MB/s +test sherlock::the_whitespace ... bench: 392,113 ns/iter (+/- 6,046) = 1517 MB/s +test sherlock::word_ending_n ... bench: 673,618 ns/iter (+/- 12,865) = 883 MB/s +test sherlock::words ... bench: 3,632,096 ns/iter (+/- 56,944) = 163 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 110 measured; 0 filtered out; finished in 117.87s + diff --git a/vendor/regex/record/old-bench-log/13-regex-1.9.0/rust-bytes b/vendor/regex/record/old-bench-log/13-regex-1.9.0/rust-bytes new file mode 100644 index 0000000..8ac6c04 --- /dev/null +++ b/vendor/regex/record/old-bench-log/13-regex-1.9.0/rust-bytes @@ -0,0 +1,103 @@ + +running 98 tests +test misc::anchored_literal_long_match ... bench: 15 ns/iter (+/- 0) = 26000 MB/s +test misc::anchored_literal_long_non_match ... bench: 12 ns/iter (+/- 0) = 32500 MB/s +test misc::anchored_literal_short_match ... bench: 15 ns/iter (+/- 0) = 1733 MB/s +test misc::anchored_literal_short_non_match ... bench: 12 ns/iter (+/- 0) = 2166 MB/s +test misc::easy0_1K ... bench: 42 ns/iter (+/- 0) = 25023 MB/s +test misc::easy0_1MB ... bench: 42 ns/iter (+/- 0) = 24966738 MB/s +test misc::easy0_32 ... bench: 42 ns/iter (+/- 0) = 1404 MB/s +test misc::easy0_32K ... bench: 42 ns/iter (+/- 0) = 780833 MB/s +test misc::easy1_1K ... bench: 34 ns/iter (+/- 1) = 30705 MB/s +test misc::easy1_1MB ... bench: 34 ns/iter (+/- 0) = 30841058 MB/s +test misc::easy1_32 ... bench: 34 ns/iter (+/- 0) = 1529 MB/s +test misc::easy1_32K ... bench: 34 ns/iter (+/- 0) = 964352 MB/s +test misc::hard_1K ... bench: 42 ns/iter (+/- 0) = 25023 MB/s +test misc::hard_1MB ... bench: 42 ns/iter (+/- 0) = 24966738 MB/s +test misc::hard_32 ... bench: 42 ns/iter (+/- 0) = 1404 MB/s +test misc::hard_32K ... bench: 42 ns/iter (+/- 0) = 780833 MB/s +test misc::is_match_set ... bench: 47 ns/iter (+/- 1) = 531 MB/s +test misc::literal ... bench: 10 ns/iter (+/- 0) = 5100 MB/s +test misc::long_needle1 ... bench: 1,808 ns/iter (+/- 7) = 55310 MB/s +test misc::long_needle2 ... bench: 213,106 ns/iter (+/- 416) = 469 MB/s +test misc::match_class ... bench: 23 ns/iter (+/- 1) = 3521 MB/s +test misc::match_class_in_range ... bench: 11 ns/iter (+/- 0) = 7363 MB/s +test misc::matches_set ... bench: 56 ns/iter (+/- 3) = 446 MB/s +test misc::medium_1K ... bench: 43 ns/iter (+/- 0) = 24465 MB/s +test misc::medium_1MB ... bench: 43 ns/iter (+/- 0) = 24386139 MB/s +test misc::medium_32 ... bench: 43 ns/iter (+/- 0) = 1395 MB/s +test misc::medium_32K ... bench: 43 ns/iter (+/- 0) = 762697 MB/s +test misc::no_exponential ... bench: 162 ns/iter (+/- 4) = 617 MB/s +test misc::not_literal ... bench: 27 ns/iter (+/- 1) = 1888 MB/s +test misc::one_pass_long_prefix ... bench: 41 ns/iter (+/- 0) = 634 MB/s +test misc::one_pass_long_prefix_not ... bench: 41 ns/iter (+/- 0) = 634 MB/s +test misc::one_pass_short ... bench: 30 ns/iter (+/- 0) = 566 MB/s +test misc::one_pass_short_not ... bench: 31 ns/iter (+/- 0) = 548 MB/s +test misc::reallyhard2_1K ... bench: 70 ns/iter (+/- 1) = 14857 MB/s +test misc::reallyhard_1K ... bench: 78 ns/iter (+/- 3) = 13474 MB/s +test misc::reallyhard_1MB ... bench: 19,850 ns/iter (+/- 345) = 52826 MB/s +test misc::reallyhard_32 ... bench: 61 ns/iter (+/- 2) = 967 MB/s +test misc::reallyhard_32K ... bench: 546 ns/iter (+/- 8) = 60064 MB/s +test misc::reverse_suffix_no_quadratic ... bench: 9,304 ns/iter (+/- 29) = 859 MB/s +test regexdna::find_new_lines ... bench: 1,733,767 ns/iter (+/- 66,699) = 2932 MB/s +test regexdna::subst1 ... bench: 486,442 ns/iter (+/- 11,929) = 10450 MB/s +test regexdna::subst10 ... bench: 486,073 ns/iter (+/- 12,157) = 10458 MB/s +test regexdna::subst11 ... bench: 483,485 ns/iter (+/- 11,703) = 10514 MB/s +test regexdna::subst2 ... bench: 487,298 ns/iter (+/- 9,184) = 10431 MB/s +test regexdna::subst3 ... bench: 491,219 ns/iter (+/- 9,614) = 10348 MB/s +test regexdna::subst4 ... bench: 482,668 ns/iter (+/- 9,576) = 10531 MB/s +test regexdna::subst5 ... bench: 489,673 ns/iter (+/- 8,331) = 10381 MB/s +test regexdna::subst6 ... bench: 484,707 ns/iter (+/- 5,276) = 10487 MB/s +test regexdna::subst7 ... bench: 485,109 ns/iter (+/- 9,360) = 10478 MB/s +test regexdna::subst8 ... bench: 485,790 ns/iter (+/- 9,298) = 10464 MB/s +test regexdna::subst9 ... bench: 483,255 ns/iter (+/- 12,434) = 10519 MB/s +test regexdna::variant1 ... bench: 654,757 ns/iter (+/- 8,719) = 7763 MB/s +test regexdna::variant2 ... bench: 905,052 ns/iter (+/- 9,599) = 5616 MB/s +test regexdna::variant3 ... bench: 1,161,187 ns/iter (+/- 13,798) = 4377 MB/s +test regexdna::variant4 ... bench: 1,144,656 ns/iter (+/- 15,198) = 4440 MB/s +test regexdna::variant5 ... bench: 1,136,222 ns/iter (+/- 9,112) = 4473 MB/s +test regexdna::variant6 ... bench: 1,062,124 ns/iter (+/- 12,336) = 4786 MB/s +test regexdna::variant7 ... bench: 1,144,371 ns/iter (+/- 44,700) = 4442 MB/s +test regexdna::variant8 ... bench: 1,143,064 ns/iter (+/- 53,456) = 4447 MB/s +test regexdna::variant9 ... bench: 1,187,063 ns/iter (+/- 14,341) = 4282 MB/s +test sherlock::before_after_holmes ... bench: 27,804 ns/iter (+/- 598) = 21397 MB/s +test sherlock::before_holmes ... bench: 31,197 ns/iter (+/- 933) = 19070 MB/s +test sherlock::everything_greedy ... bench: 1,272,335 ns/iter (+/- 12,466) = 467 MB/s +test sherlock::everything_greedy_nl ... bench: 801,469 ns/iter (+/- 955) = 742 MB/s +test sherlock::holmes_cochar_watson ... bench: 56,790 ns/iter (+/- 1,606) = 10476 MB/s +test sherlock::holmes_coword_watson ... bench: 300,554 ns/iter (+/- 3,460) = 1979 MB/s +test sherlock::ing_suffix ... bench: 179,355 ns/iter (+/- 5,486) = 3317 MB/s +test sherlock::ing_suffix_limited_space ... bench: 175,703 ns/iter (+/- 2,380) = 3386 MB/s +test sherlock::letters ... bench: 7,197,094 ns/iter (+/- 181,502) = 82 MB/s +test sherlock::letters_lower ... bench: 7,100,979 ns/iter (+/- 155,898) = 83 MB/s +test sherlock::letters_upper ... bench: 1,018,217 ns/iter (+/- 21,695) = 584 MB/s +test sherlock::line_boundary_sherlock_holmes ... bench: 15,931 ns/iter (+/- 140) = 37344 MB/s +test sherlock::name_alt1 ... bench: 10,932 ns/iter (+/- 96) = 54421 MB/s +test sherlock::name_alt2 ... bench: 45,580 ns/iter (+/- 829) = 13052 MB/s +test sherlock::name_alt3 ... bench: 51,942 ns/iter (+/- 1,418) = 11453 MB/s +test sherlock::name_alt3_nocase ... bench: 171,749 ns/iter (+/- 1,451) = 3463 MB/s +test sherlock::name_alt4 ... bench: 45,705 ns/iter (+/- 1,536) = 13016 MB/s +test sherlock::name_alt4_nocase ... bench: 73,782 ns/iter (+/- 1,679) = 8063 MB/s +test sherlock::name_alt5 ... bench: 48,045 ns/iter (+/- 1,261) = 12382 MB/s +test sherlock::name_alt5_nocase ... bench: 100,307 ns/iter (+/- 553) = 5931 MB/s +test sherlock::name_holmes ... bench: 18,916 ns/iter (+/- 662) = 31451 MB/s +test sherlock::name_holmes_nocase ... bench: 52,714 ns/iter (+/- 774) = 11286 MB/s +test sherlock::name_sherlock ... bench: 14,575 ns/iter (+/- 163) = 40818 MB/s +test sherlock::name_sherlock_holmes ... bench: 14,625 ns/iter (+/- 166) = 40679 MB/s +test sherlock::name_sherlock_holmes_nocase ... bench: 39,024 ns/iter (+/- 361) = 15245 MB/s +test sherlock::name_sherlock_nocase ... bench: 38,025 ns/iter (+/- 418) = 15645 MB/s +test sherlock::name_whitespace ... bench: 16,247 ns/iter (+/- 88) = 36618 MB/s +test sherlock::no_match_common ... bench: 13,724 ns/iter (+/- 28) = 43349 MB/s +test sherlock::no_match_really_common ... bench: 13,798 ns/iter (+/- 93) = 43117 MB/s +test sherlock::no_match_uncommon ... bench: 13,671 ns/iter (+/- 80) = 43517 MB/s +test sherlock::quotes ... bench: 189,359 ns/iter (+/- 2,334) = 3141 MB/s +test sherlock::repeated_class_negation ... bench: 29,083 ns/iter (+/- 708) = 20456 MB/s +test sherlock::the_lower ... bench: 204,122 ns/iter (+/- 4,256) = 2914 MB/s +test sherlock::the_nocase ... bench: 319,388 ns/iter (+/- 6,790) = 1862 MB/s +test sherlock::the_upper ... bench: 22,706 ns/iter (+/- 961) = 26201 MB/s +test sherlock::the_whitespace ... bench: 386,276 ns/iter (+/- 4,950) = 1540 MB/s +test sherlock::word_ending_n ... bench: 690,010 ns/iter (+/- 8,516) = 862 MB/s +test sherlock::words ... bench: 3,659,990 ns/iter (+/- 104,505) = 162 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 98 measured; 0 filtered out; finished in 105.65s + diff --git a/vendor/regex/record/old-bench-log/README.md b/vendor/regex/record/old-bench-log/README.md new file mode 100644 index 0000000..aab290e --- /dev/null +++ b/vendor/regex/record/old-bench-log/README.md @@ -0,0 +1,11 @@ +These represent an old log of benchmarks from regex 1.7.3 and older. New +and much more comprehensive benchmarks are now maintained as part of the +[rebar] project. + +We keep these old benchmark recordings for posterity, but they may be removed +in the future. + +Measurements can be compared using the [`cargo-benchcmp`][cargo-benchcmp] tool. + +[rebar]: https://github.com/BurntSushi/rebar +[cargo-benchcmp]: https://github.com/BurntSushi/cargo-benchcmp diff --git a/vendor/regex/record/old-bench-log/old/01-before b/vendor/regex/record/old-bench-log/old/01-before new file mode 100644 index 0000000..74890a3 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/01-before @@ -0,0 +1,28 @@ +test bench::anchored_literal_long_match ... bench: 520 ns/iter (+/- 1) +test bench::anchored_literal_long_non_match ... bench: 236 ns/iter (+/- 2) +test bench::anchored_literal_short_match ... bench: 519 ns/iter (+/- 2) +test bench::anchored_literal_short_non_match ... bench: 238 ns/iter (+/- 2) +test bench::easy0_1K ... bench: 7742 ns/iter (+/- 97) = 132 MB/s +test bench::easy0_32 ... bench: 4989 ns/iter (+/- 20) = 6 MB/s +test bench::easy0_32K ... bench: 96347 ns/iter (+/- 997) = 340 MB/s +test bench::easy1_1K ... bench: 9805 ns/iter (+/- 1846) = 104 MB/s +test bench::easy1_32 ... bench: 4930 ns/iter (+/- 202) = 6 MB/s +test bench::easy1_32K ... bench: 163332 ns/iter (+/- 9207) = 200 MB/s +test bench::hard_1K ... bench: 97455 ns/iter (+/- 1089) = 10 MB/s +test bench::hard_32 ... bench: 8256 ns/iter (+/- 148) = 3 MB/s +test bench::hard_32K ... bench: 2948095 ns/iter (+/- 11988) = 11 MB/s +test bench::literal ... bench: 371 ns/iter (+/- 5) +test bench::match_class ... bench: 2168 ns/iter (+/- 12) +test bench::match_class_in_range ... bench: 2379 ns/iter (+/- 13) +test bench::medium_1K ... bench: 37073 ns/iter (+/- 1100) = 27 MB/s +test bench::medium_32 ... bench: 6183 ns/iter (+/- 218) = 5 MB/s +test bench::medium_32K ... bench: 1032000 ns/iter (+/- 8278) = 31 MB/s +test bench::no_exponential ... bench: 727975 ns/iter (+/- 2970) +test bench::not_literal ... bench: 4670 ns/iter (+/- 29) +test bench::one_pass_long_prefix ... bench: 1562 ns/iter (+/- 24) +test bench::one_pass_long_prefix_not ... bench: 1539 ns/iter (+/- 40) +test bench::one_pass_short_a ... bench: 2688 ns/iter (+/- 21) +test bench::one_pass_short_a_not ... bench: 4197 ns/iter (+/- 36) +test bench::one_pass_short_b ... bench: 2198 ns/iter (+/- 22) +test bench::one_pass_short_b_not ... bench: 3761 ns/iter (+/- 41) +test bench::replace_all ... bench: 2874 ns/iter (+/- 25) diff --git a/vendor/regex/record/old-bench-log/old/02-new-syntax-crate b/vendor/regex/record/old-bench-log/old/02-new-syntax-crate new file mode 100644 index 0000000..267808f --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/02-new-syntax-crate @@ -0,0 +1,28 @@ +test bench::anchored_literal_long_match ... bench: 545 ns/iter (+/- 12) +test bench::anchored_literal_long_non_match ... bench: 251 ns/iter (+/- 11) +test bench::anchored_literal_short_match ... bench: 521 ns/iter (+/- 31) +test bench::anchored_literal_short_non_match ... bench: 231 ns/iter (+/- 0) +test bench::easy0_1K ... bench: 7465 ns/iter (+/- 102) = 137 MB/s +test bench::easy0_32 ... bench: 4995 ns/iter (+/- 27) = 6 MB/s +test bench::easy0_32K ... bench: 86985 ns/iter (+/- 755) = 376 MB/s +test bench::easy1_1K ... bench: 9493 ns/iter (+/- 1727) = 107 MB/s +test bench::easy1_32 ... bench: 4955 ns/iter (+/- 324) = 6 MB/s +test bench::easy1_32K ... bench: 155288 ns/iter (+/- 13016) = 210 MB/s +test bench::hard_1K ... bench: 95925 ns/iter (+/- 1674) = 10 MB/s +test bench::hard_32 ... bench: 8264 ns/iter (+/- 151) = 3 MB/s +test bench::hard_32K ... bench: 2886440 ns/iter (+/- 25807) = 11 MB/s +test bench::literal ... bench: 365 ns/iter (+/- 12) +test bench::match_class ... bench: 2313 ns/iter (+/- 8) +test bench::match_class_in_range ... bench: 2596 ns/iter (+/- 8) +test bench::medium_1K ... bench: 38136 ns/iter (+/- 941) = 26 MB/s +test bench::medium_32 ... bench: 6178 ns/iter (+/- 147) = 5 MB/s +test bench::medium_32K ... bench: 1065698 ns/iter (+/- 6815) = 30 MB/s +test bench::no_exponential ... bench: 682461 ns/iter (+/- 2860) +test bench::not_literal ... bench: 4525 ns/iter (+/- 67) +test bench::one_pass_long_prefix ... bench: 1459 ns/iter (+/- 13) +test bench::one_pass_long_prefix_not ... bench: 1463 ns/iter (+/- 8) +test bench::one_pass_short_a ... bench: 2615 ns/iter (+/- 10) +test bench::one_pass_short_a_not ... bench: 4066 ns/iter (+/- 48) +test bench::one_pass_short_b ... bench: 2064 ns/iter (+/- 10) +test bench::one_pass_short_b_not ... bench: 3502 ns/iter (+/- 24) +test bench::replace_all ... bench: 2949 ns/iter (+/- 15) diff --git a/vendor/regex/record/old-bench-log/old/03-new-syntax-crate b/vendor/regex/record/old-bench-log/old/03-new-syntax-crate new file mode 100644 index 0000000..a50005d --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/03-new-syntax-crate @@ -0,0 +1,28 @@ +test bench::anchored_literal_long_match ... bench: 373 ns/iter (+/- 5) +test bench::anchored_literal_long_non_match ... bench: 204 ns/iter (+/- 3) +test bench::anchored_literal_short_match ... bench: 376 ns/iter (+/- 5) +test bench::anchored_literal_short_non_match ... bench: 206 ns/iter (+/- 3) +test bench::easy0_1K ... bench: 9136 ns/iter (+/- 177) = 112 MB/s +test bench::easy0_32 ... bench: 6641 ns/iter (+/- 86) = 4 MB/s +test bench::easy0_32K ... bench: 88826 ns/iter (+/- 1366) = 368 MB/s +test bench::easy1_1K ... bench: 10937 ns/iter (+/- 737) = 93 MB/s +test bench::easy1_32 ... bench: 7366 ns/iter (+/- 219) = 4 MB/s +test bench::easy1_32K ... bench: 122324 ns/iter (+/- 4628) = 267 MB/s +test bench::hard_1K ... bench: 59998 ns/iter (+/- 965) = 17 MB/s +test bench::hard_32 ... bench: 9058 ns/iter (+/- 123) = 3 MB/s +test bench::hard_32K ... bench: 1694326 ns/iter (+/- 27226) = 19 MB/s +test bench::literal ... bench: 336 ns/iter (+/- 6) +test bench::match_class ... bench: 2109 ns/iter (+/- 27) +test bench::match_class_in_range ... bench: 2274 ns/iter (+/- 32) +test bench::medium_1K ... bench: 38317 ns/iter (+/- 1075) = 26 MB/s +test bench::medium_32 ... bench: 7969 ns/iter (+/- 115) = 4 MB/s +test bench::medium_32K ... bench: 1028260 ns/iter (+/- 12905) = 31 MB/s +test bench::no_exponential ... bench: 257719 ns/iter (+/- 4939) +test bench::not_literal ... bench: 1699 ns/iter (+/- 31) +test bench::one_pass_long_prefix ... bench: 750 ns/iter (+/- 9) +test bench::one_pass_long_prefix_not ... bench: 747 ns/iter (+/- 12) +test bench::one_pass_short_a ... bench: 1844 ns/iter (+/- 22) +test bench::one_pass_short_a_not ... bench: 2395 ns/iter (+/- 21) +test bench::one_pass_short_b ... bench: 1270 ns/iter (+/- 26) +test bench::one_pass_short_b_not ... bench: 1869 ns/iter (+/- 25) +test bench::replace_all ... bench: 3124 ns/iter (+/- 53) diff --git a/vendor/regex/record/old-bench-log/old/04-fixed-benchmark b/vendor/regex/record/old-bench-log/old/04-fixed-benchmark new file mode 100644 index 0000000..1956e98 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/04-fixed-benchmark @@ -0,0 +1,28 @@ +test bench::anchored_literal_long_match ... bench: 373 ns/iter (+/- 5) +test bench::anchored_literal_long_non_match ... bench: 202 ns/iter (+/- 12) +test bench::anchored_literal_short_match ... bench: 380 ns/iter (+/- 135) +test bench::anchored_literal_short_non_match ... bench: 211 ns/iter (+/- 1) +test bench::easy0_1K ... bench: 2,723 ns/iter (+/- 101) = 376 MB/s +test bench::easy0_32 ... bench: 255 ns/iter (+/- 2) = 125 MB/s +test bench::easy0_32K ... bench: 81,845 ns/iter (+/- 598) = 400 MB/s +test bench::easy1_1K ... bench: 3,872 ns/iter (+/- 783) = 264 MB/s +test bench::easy1_32 ... bench: 287 ns/iter (+/- 143) = 111 MB/s +test bench::easy1_32K ... bench: 115,340 ns/iter (+/- 4,717) = 284 MB/s +test bench::hard_1K ... bench: 52,484 ns/iter (+/- 472) = 19 MB/s +test bench::hard_32 ... bench: 1,923 ns/iter (+/- 49) = 16 MB/s +test bench::hard_32K ... bench: 1,710,214 ns/iter (+/- 9,733) = 19 MB/s +test bench::literal ... bench: 337 ns/iter (+/- 13) +test bench::match_class ... bench: 2,141 ns/iter (+/- 7) +test bench::match_class_in_range ... bench: 2,301 ns/iter (+/- 7) +test bench::medium_1K ... bench: 31,696 ns/iter (+/- 961) = 32 MB/s +test bench::medium_32 ... bench: 1,155 ns/iter (+/- 71) = 27 MB/s +test bench::medium_32K ... bench: 1,016,101 ns/iter (+/- 12,090) = 32 MB/s +test bench::no_exponential ... bench: 262,801 ns/iter (+/- 1,332) +test bench::not_literal ... bench: 1,729 ns/iter (+/- 3) +test bench::one_pass_long_prefix ... bench: 779 ns/iter (+/- 4) +test bench::one_pass_long_prefix_not ... bench: 779 ns/iter (+/- 6) +test bench::one_pass_short_a ... bench: 1,943 ns/iter (+/- 10) +test bench::one_pass_short_a_not ... bench: 2,545 ns/iter (+/- 9) +test bench::one_pass_short_b ... bench: 1,364 ns/iter (+/- 4) +test bench::one_pass_short_b_not ... bench: 2,029 ns/iter (+/- 22) +test bench::replace_all ... bench: 3,185 ns/iter (+/- 12) diff --git a/vendor/regex/record/old-bench-log/old/05-thread-caching b/vendor/regex/record/old-bench-log/old/05-thread-caching new file mode 100644 index 0000000..238f978 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/05-thread-caching @@ -0,0 +1,29 @@ +test bench::anchored_literal_long_match ... bench: 287 ns/iter (+/- 11) +test bench::anchored_literal_long_non_match ... bench: 111 ns/iter (+/- 0) +test bench::anchored_literal_short_match ... bench: 286 ns/iter (+/- 4) +test bench::anchored_literal_short_non_match ... bench: 114 ns/iter (+/- 0) +test bench::easy0_1K ... bench: 2562 ns/iter (+/- 94) = 399 MB/s +test bench::easy0_32 ... bench: 95 ns/iter (+/- 1) = 336 MB/s +test bench::easy0_32K ... bench: 81755 ns/iter (+/- 576) = 400 MB/s +test bench::easy1_1K ... bench: 3586 ns/iter (+/- 917) = 285 MB/s +test bench::easy1_32 ... bench: 155 ns/iter (+/- 132) = 206 MB/s +test bench::easy1_32K ... bench: 113980 ns/iter (+/- 9331) = 287 MB/s +test bench::hard_1K ... bench: 54573 ns/iter (+/- 565) = 18 MB/s +test bench::hard_32 ... bench: 1806 ns/iter (+/- 44) = 17 MB/s +test bench::hard_32K ... bench: 1754465 ns/iter (+/- 7867) = 18 MB/s +test bench::literal ... bench: 299 ns/iter (+/- 1) +test bench::match_class ... bench: 2399 ns/iter (+/- 23) +test bench::match_class_in_range ... bench: 2142 ns/iter (+/- 8) +test bench::match_class_unicode ... bench: 2804 ns/iter (+/- 9) +test bench::medium_1K ... bench: 29536 ns/iter (+/- 537) = 34 MB/s +test bench::medium_32 ... bench: 962 ns/iter (+/- 59) = 33 MB/s +test bench::medium_32K ... bench: 946483 ns/iter (+/- 7106) = 34 MB/s +test bench::no_exponential ... bench: 274301 ns/iter (+/- 552) +test bench::not_literal ... bench: 2039 ns/iter (+/- 13) +test bench::one_pass_long_prefix ... bench: 573 ns/iter (+/- 3) +test bench::one_pass_long_prefix_not ... bench: 577 ns/iter (+/- 4) +test bench::one_pass_short_a ... bench: 1951 ns/iter (+/- 29) +test bench::one_pass_short_a_not ... bench: 2464 ns/iter (+/- 10) +test bench::one_pass_short_b ... bench: 1301 ns/iter (+/- 6) +test bench::one_pass_short_b_not ... bench: 1785 ns/iter (+/- 6) +test bench::replace_all ... bench: 2168 ns/iter (+/- 152) diff --git a/vendor/regex/record/old-bench-log/old/06-major-dynamic b/vendor/regex/record/old-bench-log/old/06-major-dynamic new file mode 100644 index 0000000..123efdd --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/06-major-dynamic @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 206 ns/iter (+/- 7) +test bench::anchored_literal_long_non_match ... bench: 97 ns/iter (+/- 1) +test bench::anchored_literal_short_match ... bench: 193 ns/iter (+/- 1) +test bench::anchored_literal_short_non_match ... bench: 86 ns/iter (+/- 0) +test bench::easy0_1K ... bench: 356 ns/iter (+/- 136) = 2876 MB/s +test bench::easy0_1MB ... bench: 352,434 ns/iter (+/- 7,874) = 2974 MB/s +test bench::easy0_32 ... bench: 72 ns/iter (+/- 21) = 444 MB/s +test bench::easy0_32K ... bench: 11,053 ns/iter (+/- 1,388) = 2964 MB/s +test bench::easy1_1K ... bench: 331 ns/iter (+/- 162) = 3093 MB/s +test bench::easy1_1MB ... bench: 353,723 ns/iter (+/- 6,836) = 2964 MB/s +test bench::easy1_32 ... bench: 73 ns/iter (+/- 20) = 438 MB/s +test bench::easy1_32K ... bench: 10,297 ns/iter (+/- 1,137) = 3182 MB/s +test bench::hard_1K ... bench: 34,951 ns/iter (+/- 171) = 29 MB/s +test bench::hard_1MB ... bench: 63,323,613 ns/iter (+/- 279,582) = 15 MB/s +test bench::hard_32 ... bench: 1,131 ns/iter (+/- 13) = 28 MB/s +test bench::hard_32K ... bench: 1,099,921 ns/iter (+/- 1,338) = 29 MB/s +test bench::literal ... bench: 16 ns/iter (+/- 0) +test bench::match_class ... bench: 188 ns/iter (+/- 0) +test bench::match_class_in_range ... bench: 188 ns/iter (+/- 0) +test bench::match_class_unicode ... bench: 1,940 ns/iter (+/- 10) +test bench::medium_1K ... bench: 5,262 ns/iter (+/- 256) = 194 MB/s +test bench::medium_1MB ... bench: 5,295,539 ns/iter (+/- 9,808) = 197 MB/s +test bench::medium_32 ... bench: 217 ns/iter (+/- 19) = 147 MB/s +test bench::medium_32K ... bench: 169,169 ns/iter (+/- 1,606) = 193 MB/s +test bench::no_exponential ... bench: 293,739 ns/iter (+/- 1,632) +test bench::not_literal ... bench: 1,371 ns/iter (+/- 136) +test bench::one_pass_long_prefix ... bench: 337 ns/iter (+/- 6) +test bench::one_pass_long_prefix_not ... bench: 341 ns/iter (+/- 6) +test bench::one_pass_short_a ... bench: 1,399 ns/iter (+/- 16) +test bench::one_pass_short_a_not ... bench: 1,229 ns/iter (+/- 13) +test bench::one_pass_short_b ... bench: 844 ns/iter (+/- 24) +test bench::one_pass_short_b_not ... bench: 849 ns/iter (+/- 45) +test bench::replace_all ... bench: 579 ns/iter (+/- 3) diff --git a/vendor/regex/record/old-bench-log/old/06-major-macro b/vendor/regex/record/old-bench-log/old/06-major-macro new file mode 100644 index 0000000..199561d --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/06-major-macro @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 225 ns/iter (+/- 22) +test bench::anchored_literal_long_non_match ... bench: 62 ns/iter (+/- 1) +test bench::anchored_literal_short_match ... bench: 225 ns/iter (+/- 1) +test bench::anchored_literal_short_non_match ... bench: 60 ns/iter (+/- 1) +test bench::easy0_1K ... bench: 29,984 ns/iter (+/- 190) = 34 MB/s +test bench::easy0_1MB ... bench: 30,641,690 ns/iter (+/- 110,535) = 33 MB/s +test bench::easy0_32 ... bench: 981 ns/iter (+/- 12) = 32 MB/s +test bench::easy0_32K ... bench: 957,358 ns/iter (+/- 2,633) = 34 MB/s +test bench::easy1_1K ... bench: 29,636 ns/iter (+/- 150) = 34 MB/s +test bench::easy1_1MB ... bench: 30,295,321 ns/iter (+/- 98,181) = 34 MB/s +test bench::easy1_32 ... bench: 971 ns/iter (+/- 30) = 32 MB/s +test bench::easy1_32K ... bench: 947,307 ns/iter (+/- 4,258) = 34 MB/s +test bench::hard_1K ... bench: 54,856 ns/iter (+/- 209) = 18 MB/s +test bench::hard_1MB ... bench: 56,126,571 ns/iter (+/- 224,163) = 17 MB/s +test bench::hard_32 ... bench: 1,776 ns/iter (+/- 23) = 18 MB/s +test bench::hard_32K ... bench: 1,753,833 ns/iter (+/- 54,427) = 18 MB/s +test bench::literal ... bench: 1,516 ns/iter (+/- 6) +test bench::match_class ... bench: 2,429 ns/iter (+/- 11) +test bench::match_class_in_range ... bench: 2,398 ns/iter (+/- 4) +test bench::match_class_unicode ... bench: 12,915 ns/iter (+/- 29) +test bench::medium_1K ... bench: 31,914 ns/iter (+/- 276) = 32 MB/s +test bench::medium_1MB ... bench: 32,617,173 ns/iter (+/- 68,114) = 31 MB/s +test bench::medium_32 ... bench: 1,046 ns/iter (+/- 42) = 30 MB/s +test bench::medium_32K ... bench: 1,019,516 ns/iter (+/- 3,788) = 32 MB/s +test bench::no_exponential ... bench: 303,239 ns/iter (+/- 518) +test bench::not_literal ... bench: 1,756 ns/iter (+/- 115) +test bench::one_pass_long_prefix ... bench: 834 ns/iter (+/- 7) +test bench::one_pass_long_prefix_not ... bench: 858 ns/iter (+/- 15) +test bench::one_pass_short_a ... bench: 1,597 ns/iter (+/- 9) +test bench::one_pass_short_a_not ... bench: 1,950 ns/iter (+/- 21) +test bench::one_pass_short_b ... bench: 1,077 ns/iter (+/- 5) +test bench::one_pass_short_b_not ... bench: 1,596 ns/iter (+/- 9) +test bench::replace_all ... bench: 1,288 ns/iter (+/- 13) diff --git a/vendor/regex/record/old-bench-log/old/07-prefix-improvements b/vendor/regex/record/old-bench-log/old/07-prefix-improvements new file mode 100644 index 0000000..55477fd --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/07-prefix-improvements @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 197 ns/iter (+/- 9) +test bench::anchored_literal_long_non_match ... bench: 95 ns/iter (+/- 1) +test bench::anchored_literal_short_match ... bench: 193 ns/iter (+/- 2) +test bench::anchored_literal_short_non_match ... bench: 85 ns/iter (+/- 2) +test bench::easy0_1K ... bench: 304 ns/iter (+/- 119) = 3368 MB/s +test bench::easy0_1MB ... bench: 281,912 ns/iter (+/- 5,274) = 3719 MB/s +test bench::easy0_32 ... bench: 74 ns/iter (+/- 16) = 432 MB/s +test bench::easy0_32K ... bench: 8,909 ns/iter (+/- 667) = 3678 MB/s +test bench::easy1_1K ... bench: 300 ns/iter (+/- 111) = 3413 MB/s +test bench::easy1_1MB ... bench: 282,250 ns/iter (+/- 5,556) = 3714 MB/s +test bench::easy1_32 ... bench: 98 ns/iter (+/- 17) = 326 MB/s +test bench::easy1_32K ... bench: 8,105 ns/iter (+/- 593) = 4042 MB/s +test bench::hard_1K ... bench: 34,562 ns/iter (+/- 211) = 29 MB/s +test bench::hard_1MB ... bench: 64,510,947 ns/iter (+/- 308,627) = 15 MB/s +test bench::hard_32 ... bench: 1,139 ns/iter (+/- 26) = 28 MB/s +test bench::hard_32K ... bench: 1,102,562 ns/iter (+/- 1,850) = 29 MB/s +test bench::literal ... bench: 15 ns/iter (+/- 0) +test bench::match_class ... bench: 105 ns/iter (+/- 1) +test bench::match_class_in_range ... bench: 105 ns/iter (+/- 1) +test bench::match_class_unicode ... bench: 2,270 ns/iter (+/- 185) +test bench::medium_1K ... bench: 2,262 ns/iter (+/- 73) = 452 MB/s +test bench::medium_1MB ... bench: 2,185,098 ns/iter (+/- 3,007) = 479 MB/s +test bench::medium_32 ... bench: 139 ns/iter (+/- 1) = 230 MB/s +test bench::medium_32K ... bench: 72,320 ns/iter (+/- 193) = 453 MB/s +test bench::no_exponential ... bench: 300,699 ns/iter (+/- 494) +test bench::not_literal ... bench: 1,462 ns/iter (+/- 89) +test bench::one_pass_long_prefix ... bench: 283 ns/iter (+/- 1) +test bench::one_pass_long_prefix_not ... bench: 287 ns/iter (+/- 0) +test bench::one_pass_short_a ... bench: 1,131 ns/iter (+/- 11) +test bench::one_pass_short_a_not ... bench: 1,259 ns/iter (+/- 12) +test bench::one_pass_short_b ... bench: 883 ns/iter (+/- 15) +test bench::one_pass_short_b_not ... bench: 799 ns/iter (+/- 28) +test bench::replace_all ... bench: 170 ns/iter (+/- 1) diff --git a/vendor/regex/record/old-bench-log/old/08-case-fixes b/vendor/regex/record/old-bench-log/old/08-case-fixes new file mode 100644 index 0000000..7609f6c --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/08-case-fixes @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 192 ns/iter (+/- 11) +test bench::anchored_literal_long_non_match ... bench: 92 ns/iter (+/- 4) +test bench::anchored_literal_short_match ... bench: 182 ns/iter (+/- 6) +test bench::anchored_literal_short_non_match ... bench: 82 ns/iter (+/- 1) +test bench::easy0_1K ... bench: 277 ns/iter (+/- 79) = 3696 MB/s +test bench::easy0_1MB ... bench: 230,829 ns/iter (+/- 5,712) = 4542 MB/s +test bench::easy0_32 ... bench: 70 ns/iter (+/- 4) = 457 MB/s +test bench::easy0_32K ... bench: 8,444 ns/iter (+/- 492) = 3880 MB/s +test bench::easy1_1K ... bench: 272 ns/iter (+/- 98) = 3764 MB/s +test bench::easy1_1MB ... bench: 273,867 ns/iter (+/- 6,351) = 3828 MB/s +test bench::easy1_32 ... bench: 72 ns/iter (+/- 15) = 444 MB/s +test bench::easy1_32K ... bench: 8,109 ns/iter (+/- 540) = 4040 MB/s +test bench::hard_1K ... bench: 31,043 ns/iter (+/- 1,237) = 32 MB/s +test bench::hard_1MB ... bench: 60,077,413 ns/iter (+/- 129,611) = 16 MB/s +test bench::hard_32 ... bench: 1,036 ns/iter (+/- 20) = 30 MB/s +test bench::hard_32K ... bench: 996,238 ns/iter (+/- 3,181) = 32 MB/s +test bench::literal ... bench: 15 ns/iter (+/- 0) +test bench::match_class ... bench: 75 ns/iter (+/- 7) +test bench::match_class_in_range ... bench: 77 ns/iter (+/- 7) +test bench::match_class_unicode ... bench: 2,057 ns/iter (+/- 102) +test bench::medium_1K ... bench: 2,252 ns/iter (+/- 63) = 454 MB/s +test bench::medium_1MB ... bench: 2,186,091 ns/iter (+/- 7,496) = 479 MB/s +test bench::medium_32 ... bench: 132 ns/iter (+/- 2) = 242 MB/s +test bench::medium_32K ... bench: 72,394 ns/iter (+/- 342) = 452 MB/s +test bench::no_exponential ... bench: 286,662 ns/iter (+/- 1,150) +test bench::not_literal ... bench: 1,130 ns/iter (+/- 10) +test bench::one_pass_long_prefix ... bench: 271 ns/iter (+/- 0) +test bench::one_pass_long_prefix_not ... bench: 276 ns/iter (+/- 3) +test bench::one_pass_short_a ... bench: 1,147 ns/iter (+/- 10) +test bench::one_pass_short_a_not ... bench: 901 ns/iter (+/- 8) +test bench::one_pass_short_b ... bench: 887 ns/iter (+/- 7) +test bench::one_pass_short_b_not ... bench: 777 ns/iter (+/- 6) +test bench::replace_all ... bench: 154 ns/iter (+/- 0) diff --git a/vendor/regex/record/old-bench-log/old/09-before-compiler-rewrite b/vendor/regex/record/old-bench-log/old/09-before-compiler-rewrite new file mode 100644 index 0000000..fe67d09 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/09-before-compiler-rewrite @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 156 ns/iter (+/- 5) +test bench::anchored_literal_long_non_match ... bench: 85 ns/iter (+/- 7) +test bench::anchored_literal_short_match ... bench: 145 ns/iter (+/- 3) +test bench::anchored_literal_short_non_match ... bench: 76 ns/iter (+/- 2) +test bench::easy0_1K ... bench: 269 ns/iter (+/- 63) = 3806 MB/s +test bench::easy0_1MB ... bench: 232,461 ns/iter (+/- 13,022) = 4509 MB/s +test bench::easy0_32 ... bench: 63 ns/iter (+/- 6) = 507 MB/s +test bench::easy0_32K ... bench: 8,358 ns/iter (+/- 430) = 3920 MB/s +test bench::easy1_1K ... bench: 274 ns/iter (+/- 101) = 3737 MB/s +test bench::easy1_1MB ... bench: 278,949 ns/iter (+/- 11,324) = 3758 MB/s +test bench::easy1_32 ... bench: 63 ns/iter (+/- 15) = 507 MB/s +test bench::easy1_32K ... bench: 7,731 ns/iter (+/- 488) = 4238 MB/s +test bench::hard_1K ... bench: 44,685 ns/iter (+/- 661) = 22 MB/s +test bench::hard_1MB ... bench: 60,108,237 ns/iter (+/- 814,810) = 16 MB/s +test bench::hard_32 ... bench: 1,412 ns/iter (+/- 38) = 22 MB/s +test bench::hard_32K ... bench: 1,363,335 ns/iter (+/- 21,316) = 24 MB/s +test bench::literal ... bench: 14 ns/iter (+/- 0) +test bench::match_class ... bench: 81 ns/iter (+/- 0) +test bench::match_class_in_range ... bench: 81 ns/iter (+/- 2) +test bench::match_class_unicode ... bench: 2,978 ns/iter (+/- 64) +test bench::medium_1K ... bench: 2,239 ns/iter (+/- 68) = 457 MB/s +test bench::medium_1MB ... bench: 2,215,729 ns/iter (+/- 20,897) = 472 MB/s +test bench::medium_32 ... bench: 124 ns/iter (+/- 2) = 258 MB/s +test bench::medium_32K ... bench: 72,486 ns/iter (+/- 1,027) = 452 MB/s +test bench::no_exponential ... bench: 282,992 ns/iter (+/- 8,102) +test bench::not_literal ... bench: 1,526 ns/iter (+/- 32) +test bench::one_pass_long_prefix ... bench: 307 ns/iter (+/- 7) +test bench::one_pass_long_prefix_not ... bench: 311 ns/iter (+/- 8) +test bench::one_pass_short_a ... bench: 623 ns/iter (+/- 12) +test bench::one_pass_short_a_not ... bench: 920 ns/iter (+/- 19) +test bench::one_pass_short_b ... bench: 554 ns/iter (+/- 13) +test bench::one_pass_short_b_not ... bench: 740 ns/iter (+/- 12) +test bench::replace_all ... bench: 155 ns/iter (+/- 5) diff --git a/vendor/regex/record/old-bench-log/old/10-compiler-rewrite b/vendor/regex/record/old-bench-log/old/10-compiler-rewrite new file mode 100644 index 0000000..e25a602 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/10-compiler-rewrite @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 145 ns/iter (+/- 1) +test bench::anchored_literal_long_non_match ... bench: 92 ns/iter (+/- 2) +test bench::anchored_literal_short_match ... bench: 129 ns/iter (+/- 3) +test bench::anchored_literal_short_non_match ... bench: 72 ns/iter (+/- 1) +test bench::easy0_1K ... bench: 268 ns/iter (+/- 88) = 3820 MB/s +test bench::easy0_1MB ... bench: 234,067 ns/iter (+/- 4,663) = 4479 MB/s +test bench::easy0_32 ... bench: 64 ns/iter (+/- 4) = 500 MB/s +test bench::easy0_32K ... bench: 8,298 ns/iter (+/- 521) = 3948 MB/s +test bench::easy1_1K ... bench: 275 ns/iter (+/- 95) = 3723 MB/s +test bench::easy1_1MB ... bench: 280,466 ns/iter (+/- 5,938) = 3738 MB/s +test bench::easy1_32 ... bench: 64 ns/iter (+/- 16) = 500 MB/s +test bench::easy1_32K ... bench: 7,693 ns/iter (+/- 595) = 4259 MB/s +test bench::hard_1K ... bench: 27,844 ns/iter (+/- 1,012) = 36 MB/s +test bench::hard_1MB ... bench: 52,323,489 ns/iter (+/- 1,251,665) = 19 MB/s +test bench::hard_32 ... bench: 970 ns/iter (+/- 92) = 32 MB/s +test bench::hard_32K ... bench: 896,945 ns/iter (+/- 29,977) = 36 MB/s +test bench::literal ... bench: 13 ns/iter (+/- 1) +test bench::match_class ... bench: 80 ns/iter (+/- 0) +test bench::match_class_in_range ... bench: 80 ns/iter (+/- 0) +test bench::match_class_unicode ... bench: 2,150 ns/iter (+/- 18) +test bench::medium_1K ... bench: 2,241 ns/iter (+/- 55) = 456 MB/s +test bench::medium_1MB ... bench: 2,186,354 ns/iter (+/- 9,134) = 479 MB/s +test bench::medium_32 ... bench: 125 ns/iter (+/- 1) = 256 MB/s +test bench::medium_32K ... bench: 72,156 ns/iter (+/- 145) = 454 MB/s +test bench::no_exponential ... bench: 305,034 ns/iter (+/- 1,134) +test bench::not_literal ... bench: 1,169 ns/iter (+/- 105) +test bench::one_pass_long_prefix ... bench: 257 ns/iter (+/- 4) +test bench::one_pass_long_prefix_not ... bench: 276 ns/iter (+/- 4) +test bench::one_pass_short_a ... bench: 680 ns/iter (+/- 3) +test bench::one_pass_short_a_not ... bench: 804 ns/iter (+/- 48) +test bench::one_pass_short_b ... bench: 337 ns/iter (+/- 3) +test bench::one_pass_short_b_not ... bench: 339 ns/iter (+/- 5) +test bench::replace_all ... bench: 150 ns/iter (+/- 1) diff --git a/vendor/regex/record/old-bench-log/old/11-compiler-rewrite b/vendor/regex/record/old-bench-log/old/11-compiler-rewrite new file mode 100644 index 0000000..3296d43 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/11-compiler-rewrite @@ -0,0 +1,33 @@ +test bench::anchored_literal_long_match ... bench: 171 ns/iter (+/- 20) +test bench::anchored_literal_long_non_match ... bench: 90 ns/iter (+/- 8) +test bench::anchored_literal_short_match ... bench: 180 ns/iter (+/- 33) +test bench::anchored_literal_short_non_match ... bench: 78 ns/iter (+/- 9) +test bench::easy0_1K ... bench: 272 ns/iter (+/- 82) = 3764 MB/s +test bench::easy0_1MB ... bench: 233,014 ns/iter (+/- 22,144) = 4500 MB/s +test bench::easy0_32 ... bench: 62 ns/iter (+/- 6) = 516 MB/s +test bench::easy0_32K ... bench: 8,490 ns/iter (+/- 905) = 3859 MB/s +test bench::easy1_1K ... bench: 273 ns/iter (+/- 100) = 3750 MB/s +test bench::easy1_1MB ... bench: 279,901 ns/iter (+/- 5,598) = 3746 MB/s +test bench::easy1_32 ... bench: 62 ns/iter (+/- 6) = 516 MB/s +test bench::easy1_32K ... bench: 7,713 ns/iter (+/- 566) = 4248 MB/s +test bench::hard_1K ... bench: 38,641 ns/iter (+/- 605) = 26 MB/s +test bench::hard_1MB ... bench: 56,579,116 ns/iter (+/- 1,193,231) = 18 MB/s +test bench::hard_32 ... bench: 1,252 ns/iter (+/- 24) = 25 MB/s +test bench::hard_32K ... bench: 1,247,639 ns/iter (+/- 12,774) = 26 MB/s +test bench::literal ... bench: 13 ns/iter (+/- 1) +test bench::match_class ... bench: 80 ns/iter (+/- 1) +test bench::match_class_in_range ... bench: 80 ns/iter (+/- 0) +test bench::match_class_unicode ... bench: 2,459 ns/iter (+/- 77) +test bench::medium_1K ... bench: 2,244 ns/iter (+/- 63) = 456 MB/s +test bench::medium_1MB ... bench: 2,192,052 ns/iter (+/- 21,460) = 478 MB/s +test bench::medium_32 ... bench: 122 ns/iter (+/- 3) = 262 MB/s +test bench::medium_32K ... bench: 73,167 ns/iter (+/- 15,655) = 447 MB/s +test bench::no_exponential ... bench: 289,292 ns/iter (+/- 1,488) +test bench::not_literal ... bench: 1,480 ns/iter (+/- 18) +test bench::one_pass_long_prefix ... bench: 324 ns/iter (+/- 15) +test bench::one_pass_long_prefix_not ... bench: 337 ns/iter (+/- 5) +test bench::one_pass_short_a ... bench: 1,161 ns/iter (+/- 10) +test bench::one_pass_short_a_not ... bench: 798 ns/iter (+/- 6) +test bench::one_pass_short_b ... bench: 456 ns/iter (+/- 6) +test bench::one_pass_short_b_not ... bench: 452 ns/iter (+/- 33) +test bench::replace_all ... bench: 148 ns/iter (+/- 0) diff --git a/vendor/regex/record/old-bench-log/old/12-executor b/vendor/regex/record/old-bench-log/old/12-executor new file mode 100644 index 0000000..8ec8561 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/12-executor @@ -0,0 +1,35 @@ +test bench::anchored_literal_long_match ... bench: 179 ns/iter (+/- 5) +test bench::anchored_literal_long_non_match ... bench: 90 ns/iter (+/- 1) +test bench::anchored_literal_short_match ... bench: 164 ns/iter (+/- 16) +test bench::anchored_literal_short_non_match ... bench: 79 ns/iter (+/- 1) +test bench::compile_simple ... bench: 3,708 ns/iter (+/- 225) +test bench::compile_unicode ... bench: 5,871 ns/iter (+/- 264) +test bench::easy0_1K ... bench: 263 ns/iter (+/- 92) = 3893 MB/s +test bench::easy0_1MB ... bench: 217,835 ns/iter (+/- 4,074) = 4813 MB/s +test bench::easy0_32 ... bench: 67 ns/iter (+/- 1) = 477 MB/s +test bench::easy0_32K ... bench: 8,204 ns/iter (+/- 426) = 3994 MB/s +test bench::easy1_1K ... bench: 276 ns/iter (+/- 100) = 3710 MB/s +test bench::easy1_1MB ... bench: 284,086 ns/iter (+/- 6,516) = 3691 MB/s +test bench::easy1_32 ... bench: 70 ns/iter (+/- 15) = 457 MB/s +test bench::easy1_32K ... bench: 7,844 ns/iter (+/- 556) = 4177 MB/s +test bench::hard_1K ... bench: 30,062 ns/iter (+/- 1,684) = 34 MB/s +test bench::hard_1MB ... bench: 50,839,701 ns/iter (+/- 104,343) = 20 MB/s +test bench::hard_32 ... bench: 1,009 ns/iter (+/- 48) = 31 MB/s +test bench::hard_32K ... bench: 965,341 ns/iter (+/- 45,075) = 33 MB/s +test bench::literal ... bench: 12 ns/iter (+/- 0) +test bench::match_class ... bench: 80 ns/iter (+/- 0) +test bench::match_class_in_range ... bench: 80 ns/iter (+/- 1) +test bench::match_class_unicode ... bench: 2,150 ns/iter (+/- 22) +test bench::medium_1K ... bench: 2,262 ns/iter (+/- 66) = 452 MB/s +test bench::medium_1MB ... bench: 2,193,428 ns/iter (+/- 6,147) = 478 MB/s +test bench::medium_32 ... bench: 129 ns/iter (+/- 1) = 248 MB/s +test bench::medium_32K ... bench: 72,629 ns/iter (+/- 348) = 451 MB/s +test bench::no_exponential ... bench: 289,043 ns/iter (+/- 2,478) +test bench::not_literal ... bench: 1,195 ns/iter (+/- 10) +test bench::one_pass_long_prefix ... bench: 265 ns/iter (+/- 3) +test bench::one_pass_long_prefix_not ... bench: 270 ns/iter (+/- 4) +test bench::one_pass_short_a ... bench: 730 ns/iter (+/- 4) +test bench::one_pass_short_a_not ... bench: 712 ns/iter (+/- 4) +test bench::one_pass_short_b ... bench: 445 ns/iter (+/- 49) +test bench::one_pass_short_b_not ... bench: 406 ns/iter (+/- 72) +test bench::replace_all ... bench: 136 ns/iter (+/- 2) diff --git a/vendor/regex/record/old-bench-log/old/12-executor-bytes b/vendor/regex/record/old-bench-log/old/12-executor-bytes new file mode 100644 index 0000000..c036920 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/12-executor-bytes @@ -0,0 +1,35 @@ +test bench::anchored_literal_long_match ... bench: 190 ns/iter (+/- 12) +test bench::anchored_literal_long_non_match ... bench: 85 ns/iter (+/- 2) +test bench::anchored_literal_short_match ... bench: 147 ns/iter (+/- 9) +test bench::anchored_literal_short_non_match ... bench: 74 ns/iter (+/- 5) +test bench::compile_simple ... bench: 4,218 ns/iter (+/- 201) +test bench::compile_unicode ... bench: 402,353 ns/iter (+/- 2,642) +test bench::easy0_1K ... bench: 253 ns/iter (+/- 79) = 4047 MB/s +test bench::easy0_1MB ... bench: 215,308 ns/iter (+/- 3,474) = 4870 MB/s +test bench::easy0_32 ... bench: 64 ns/iter (+/- 4) = 500 MB/s +test bench::easy0_32K ... bench: 8,134 ns/iter (+/- 435) = 4028 MB/s +test bench::easy1_1K ... bench: 277 ns/iter (+/- 105) = 3696 MB/s +test bench::easy1_1MB ... bench: 283,435 ns/iter (+/- 5,975) = 3699 MB/s +test bench::easy1_32 ... bench: 64 ns/iter (+/- 14) = 500 MB/s +test bench::easy1_32K ... bench: 7,832 ns/iter (+/- 575) = 4183 MB/s +test bench::hard_1K ... bench: 35,380 ns/iter (+/- 772) = 28 MB/s +test bench::hard_1MB ... bench: 46,639,535 ns/iter (+/- 456,010) = 22 MB/s +test bench::hard_32 ... bench: 1,110 ns/iter (+/- 53) = 28 MB/s +test bench::hard_32K ... bench: 1,146,751 ns/iter (+/- 17,290) = 28 MB/s +test bench::literal ... bench: 12 ns/iter (+/- 0) +test bench::match_class ... bench: 80 ns/iter (+/- 1) +test bench::match_class_in_range ... bench: 80 ns/iter (+/- 0) +test bench::match_class_unicode ... bench: 2,487,088 ns/iter (+/- 103,259) +test bench::medium_1K ... bench: 2,253 ns/iter (+/- 52) = 454 MB/s +test bench::medium_1MB ... bench: 2,193,344 ns/iter (+/- 7,582) = 478 MB/s +test bench::medium_32 ... bench: 119 ns/iter (+/- 5) = 268 MB/s +test bench::medium_32K ... bench: 72,569 ns/iter (+/- 283) = 451 MB/s +test bench::no_exponential ... bench: 292,840 ns/iter (+/- 2,823) +test bench::not_literal ... bench: 6,417 ns/iter (+/- 26) +test bench::one_pass_long_prefix ... bench: 304 ns/iter (+/- 0) +test bench::one_pass_long_prefix_not ... bench: 943 ns/iter (+/- 44) +test bench::one_pass_short_a ... bench: 688 ns/iter (+/- 11) +test bench::one_pass_short_a_not ... bench: 687 ns/iter (+/- 7) +test bench::one_pass_short_b ... bench: 589 ns/iter (+/- 6) +test bench::one_pass_short_b_not ... bench: 357 ns/iter (+/- 11) +test bench::replace_all ... bench: 131 ns/iter (+/- 1) diff --git a/vendor/regex/record/old-bench-log/old/13-cache-byte-range-suffixes b/vendor/regex/record/old-bench-log/old/13-cache-byte-range-suffixes new file mode 100644 index 0000000..5a2ec09 --- /dev/null +++ b/vendor/regex/record/old-bench-log/old/13-cache-byte-range-suffixes @@ -0,0 +1,35 @@ +test bench::anchored_literal_long_match ... bench: 174 ns/iter (+/- 65) +test bench::anchored_literal_long_non_match ... bench: 94 ns/iter (+/- 1) +test bench::anchored_literal_short_match ... bench: 142 ns/iter (+/- 1) +test bench::anchored_literal_short_non_match ... bench: 82 ns/iter (+/- 0) +test bench::compile_simple ... bench: 4,878 ns/iter (+/- 207) +test bench::compile_unicode ... bench: 679,701 ns/iter (+/- 10,264) +test bench::easy0_1K ... bench: 257 ns/iter (+/- 83) = 3984 MB/s +test bench::easy0_1MB ... bench: 217,698 ns/iter (+/- 3,307) = 4816 MB/s +test bench::easy0_32 ... bench: 61 ns/iter (+/- 3) = 524 MB/s +test bench::easy0_32K ... bench: 8,144 ns/iter (+/- 449) = 4023 MB/s +test bench::easy1_1K ... bench: 276 ns/iter (+/- 106) = 3710 MB/s +test bench::easy1_1MB ... bench: 285,518 ns/iter (+/- 4,933) = 3672 MB/s +test bench::easy1_32 ... bench: 61 ns/iter (+/- 12) = 524 MB/s +test bench::easy1_32K ... bench: 7,896 ns/iter (+/- 508) = 4149 MB/s +test bench::hard_1K ... bench: 35,361 ns/iter (+/- 684) = 28 MB/s +test bench::hard_1MB ... bench: 48,691,236 ns/iter (+/- 2,316,446) = 21 MB/s +test bench::hard_32 ... bench: 1,087 ns/iter (+/- 33) = 29 MB/s +test bench::hard_32K ... bench: 1,147,627 ns/iter (+/- 4,982) = 28 MB/s +test bench::literal ... bench: 12 ns/iter (+/- 0) +test bench::match_class ... bench: 80 ns/iter (+/- 0) +test bench::match_class_in_range ... bench: 80 ns/iter (+/- 0) +test bench::match_class_unicode ... bench: 2,431,592 ns/iter (+/- 89,268) +test bench::medium_1K ... bench: 2,245 ns/iter (+/- 93) = 456 MB/s +test bench::medium_1MB ... bench: 2,192,828 ns/iter (+/- 4,343) = 478 MB/s +test bench::medium_32 ... bench: 120 ns/iter (+/- 2) = 266 MB/s +test bench::medium_32K ... bench: 72,996 ns/iter (+/- 627) = 448 MB/s +test bench::no_exponential ... bench: 290,775 ns/iter (+/- 1,176) +test bench::not_literal ... bench: 5,282 ns/iter (+/- 199) +test bench::one_pass_long_prefix ... bench: 294 ns/iter (+/- 3) +test bench::one_pass_long_prefix_not ... bench: 315 ns/iter (+/- 7) +test bench::one_pass_short_a ... bench: 708 ns/iter (+/- 21) +test bench::one_pass_short_a_not ... bench: 861 ns/iter (+/- 9) +test bench::one_pass_short_b ... bench: 607 ns/iter (+/- 2) +test bench::one_pass_short_b_not ... bench: 344 ns/iter (+/- 11) +test bench::replace_all ... bench: 135 ns/iter (+/- 1) diff --git a/vendor/regex/src/backtrack.rs b/vendor/regex/src/backtrack.rs deleted file mode 100644 index 4d83856..0000000 --- a/vendor/regex/src/backtrack.rs +++ /dev/null @@ -1,282 +0,0 @@ -// This is the backtracking matching engine. It has the same exact capability -// as the full NFA simulation, except it is artificially restricted to small -// regexes on small inputs because of its memory requirements. -// -// In particular, this is a *bounded* backtracking engine. It retains worst -// case linear time by keeping track of the states that it has visited (using a -// bitmap). Namely, once a state is visited, it is never visited again. Since a -// state is keyed by `(instruction index, input index)`, we have that its time -// complexity is `O(mn)` (i.e., linear in the size of the search text). -// -// The backtracking engine can beat out the NFA simulation on small -// regexes/inputs because it doesn't have to keep track of multiple copies of -// the capture groups. In benchmarks, the backtracking engine is roughly twice -// as fast as the full NFA simulation. Note though that its performance doesn't -// scale, even if you're willing to live with the memory requirements. Namely, -// the bitset has to be zeroed on each execution, which becomes quite expensive -// on large bitsets. - -use crate::exec::ProgramCache; -use crate::input::{Input, InputAt}; -use crate::prog::{InstPtr, Program}; -use crate::re_trait::Slot; - -type Bits = u32; - -const BIT_SIZE: usize = 32; -const MAX_SIZE_BYTES: usize = 256 * (1 << 10); // 256 KB - -/// Returns true iff the given regex and input should be executed by this -/// engine with reasonable memory usage. -pub fn should_exec(num_insts: usize, text_len: usize) -> bool { - // Total memory usage in bytes is determined by: - // - // ((len(insts) * (len(input) + 1) + bits - 1) / bits) * (size_of(u32)) - // - // The actual limit picked is pretty much a heuristic. - // See: https://github.com/rust-lang/regex/issues/215 - let size = ((num_insts * (text_len + 1) + BIT_SIZE - 1) / BIT_SIZE) * 4; - size <= MAX_SIZE_BYTES -} - -/// A backtracking matching engine. -#[derive(Debug)] -pub struct Bounded<'a, 'm, 'r, 's, I> { - prog: &'r Program, - input: I, - matches: &'m mut [bool], - slots: &'s mut [Slot], - m: &'a mut Cache, -} - -/// Shared cached state between multiple invocations of a backtracking engine -/// in the same thread. -#[derive(Clone, Debug)] -pub struct Cache { - jobs: Vec, - visited: Vec, -} - -impl Cache { - /// Create new empty cache for the backtracking engine. - pub fn new(_prog: &Program) -> Self { - Cache { jobs: vec![], visited: vec![] } - } -} - -/// A job is an explicit unit of stack space in the backtracking engine. -/// -/// The "normal" representation is a single state transition, which corresponds -/// to an NFA state and a character in the input. However, the backtracking -/// engine must keep track of old capture group values. We use the explicit -/// stack to do it. -#[derive(Clone, Copy, Debug)] -enum Job { - Inst { ip: InstPtr, at: InputAt }, - SaveRestore { slot: usize, old_pos: Option }, -} - -impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> { - /// Execute the backtracking matching engine. - /// - /// If there's a match, `exec` returns `true` and populates the given - /// captures accordingly. - pub fn exec( - prog: &'r Program, - cache: &ProgramCache, - matches: &'m mut [bool], - slots: &'s mut [Slot], - input: I, - start: usize, - end: usize, - ) -> bool { - let mut cache = cache.borrow_mut(); - let cache = &mut cache.backtrack; - let start = input.at(start); - let mut b = Bounded { prog, input, matches, slots, m: cache }; - b.exec_(start, end) - } - - /// Clears the cache such that the backtracking engine can be executed - /// on some input of fixed length. - fn clear(&mut self) { - // Reset the job memory so that we start fresh. - self.m.jobs.clear(); - - // Now we need to clear the bit state set. - // We do this by figuring out how much space we need to keep track - // of the states we've visited. - // Then we reset all existing allocated space to 0. - // Finally, we request more space if we need it. - // - // This is all a little circuitous, but doing this using unchecked - // operations doesn't seem to have a measurable impact on performance. - // (Probably because backtracking is limited to such small - // inputs/regexes in the first place.) - let visited_len = - (self.prog.len() * (self.input.len() + 1) + BIT_SIZE - 1) - / BIT_SIZE; - self.m.visited.truncate(visited_len); - for v in &mut self.m.visited { - *v = 0; - } - if visited_len > self.m.visited.len() { - let len = self.m.visited.len(); - self.m.visited.reserve_exact(visited_len - len); - for _ in 0..(visited_len - len) { - self.m.visited.push(0); - } - } - } - - /// Start backtracking at the given position in the input, but also look - /// for literal prefixes. - fn exec_(&mut self, mut at: InputAt, end: usize) -> bool { - self.clear(); - // If this is an anchored regex at the beginning of the input, then - // we're either already done or we only need to try backtracking once. - if self.prog.is_anchored_start { - return if !at.is_start() { false } else { self.backtrack(at) }; - } - let mut matched = false; - loop { - if !self.prog.prefixes.is_empty() { - at = match self.input.prefix_at(&self.prog.prefixes, at) { - None => break, - Some(at) => at, - }; - } - matched = self.backtrack(at) || matched; - if matched && self.prog.matches.len() == 1 { - return true; - } - if at.pos() >= end { - break; - } - at = self.input.at(at.next_pos()); - } - matched - } - - /// The main backtracking loop starting at the given input position. - fn backtrack(&mut self, start: InputAt) -> bool { - // N.B. We use an explicit stack to avoid recursion. - // To avoid excessive pushing and popping, most transitions are handled - // in the `step` helper function, which only pushes to the stack when - // there's a capture or a branch. - let mut matched = false; - self.m.jobs.push(Job::Inst { ip: 0, at: start }); - while let Some(job) = self.m.jobs.pop() { - match job { - Job::Inst { ip, at } => { - if self.step(ip, at) { - // Only quit if we're matching one regex. - // If we're matching a regex set, then mush on and - // try to find other matches (if we want them). - if self.prog.matches.len() == 1 { - return true; - } - matched = true; - } - } - Job::SaveRestore { slot, old_pos } => { - if slot < self.slots.len() { - self.slots[slot] = old_pos; - } - } - } - } - matched - } - - fn step(&mut self, mut ip: InstPtr, mut at: InputAt) -> bool { - use crate::prog::Inst::*; - loop { - // This loop is an optimization to avoid constantly pushing/popping - // from the stack. Namely, if we're pushing a job only to run it - // next, avoid the push and just mutate `ip` (and possibly `at`) - // in place. - if self.has_visited(ip, at) { - return false; - } - match self.prog[ip] { - Match(slot) => { - if slot < self.matches.len() { - self.matches[slot] = true; - } - return true; - } - Save(ref inst) => { - if let Some(&old_pos) = self.slots.get(inst.slot) { - // If this path doesn't work out, then we save the old - // capture index (if one exists) in an alternate - // job. If the next path fails, then the alternate - // job is popped and the old capture index is restored. - self.m.jobs.push(Job::SaveRestore { - slot: inst.slot, - old_pos, - }); - self.slots[inst.slot] = Some(at.pos()); - } - ip = inst.goto; - } - Split(ref inst) => { - self.m.jobs.push(Job::Inst { ip: inst.goto2, at }); - ip = inst.goto1; - } - EmptyLook(ref inst) => { - if self.input.is_empty_match(at, inst) { - ip = inst.goto; - } else { - return false; - } - } - Char(ref inst) => { - if inst.c == at.char() { - ip = inst.goto; - at = self.input.at(at.next_pos()); - } else { - return false; - } - } - Ranges(ref inst) => { - if inst.matches(at.char()) { - ip = inst.goto; - at = self.input.at(at.next_pos()); - } else { - return false; - } - } - Bytes(ref inst) => { - if let Some(b) = at.byte() { - if inst.matches(b) { - ip = inst.goto; - at = self.input.at(at.next_pos()); - continue; - } - } - return false; - } - } - } - } - - fn has_visited(&mut self, ip: InstPtr, at: InputAt) -> bool { - let k = ip * (self.input.len() + 1) + at.pos(); - let k1 = k / BIT_SIZE; - let k2 = usize_to_u32(1 << (k & (BIT_SIZE - 1))); - if self.m.visited[k1] & k2 == 0 { - self.m.visited[k1] |= k2; - false - } else { - true - } - } -} - -fn usize_to_u32(n: usize) -> u32 { - if (n as u64) > (::std::u32::MAX as u64) { - panic!("BUG: {} is too big to fit into u32", n) - } - n as u32 -} diff --git a/vendor/regex/src/builders.rs b/vendor/regex/src/builders.rs new file mode 100644 index 0000000..c111a96 --- /dev/null +++ b/vendor/regex/src/builders.rs @@ -0,0 +1,2539 @@ +#![allow(warnings)] + +// This module defines an internal builder that encapsulates all interaction +// with meta::Regex construction, and then 4 public API builders that wrap +// around it. The docs are essentially repeated on each of the 4 public +// builders, with tweaks to the examples as needed. +// +// The reason why there are so many builders is partially because of a misstep +// in the initial API design: the builder constructor takes in the pattern +// strings instead of using the `build` method to accept the pattern strings. +// This means `new` has a different signature for each builder. It probably +// would have been nicer to to use one builder with `fn new()`, and then add +// `build(pat)` and `build_many(pats)` constructors. +// +// The other reason is because I think the `bytes` module should probably +// have its own builder type. That way, it is completely isolated from the +// top-level API. +// +// If I could do it again, I'd probably have a `regex::Builder` and a +// `regex::bytes::Builder`. Each would have `build` and `build_set` (or +// `build_many`) methods for constructing a single pattern `Regex` and a +// multi-pattern `RegexSet`, respectively. + +use alloc::{ + string::{String, ToString}, + sync::Arc, + vec, + vec::Vec, +}; + +use regex_automata::{ + meta, nfa::thompson::WhichCaptures, util::syntax, MatchKind, +}; + +use crate::error::Error; + +/// A builder for constructing a `Regex`, `bytes::Regex`, `RegexSet` or a +/// `bytes::RegexSet`. +/// +/// This is essentially the implementation of the four different builder types +/// in the public API: `RegexBuilder`, `bytes::RegexBuilder`, `RegexSetBuilder` +/// and `bytes::RegexSetBuilder`. +#[derive(Clone, Debug)] +struct Builder { + pats: Vec, + metac: meta::Config, + syntaxc: syntax::Config, +} + +impl Default for Builder { + fn default() -> Builder { + let metac = meta::Config::new() + .nfa_size_limit(Some(10 * (1 << 20))) + .hybrid_cache_capacity(2 * (1 << 20)); + Builder { pats: vec![], metac, syntaxc: syntax::Config::default() } + } +} + +impl Builder { + fn new(patterns: I) -> Builder + where + S: AsRef, + I: IntoIterator, + { + let mut b = Builder::default(); + b.pats.extend(patterns.into_iter().map(|p| p.as_ref().to_string())); + b + } + + fn build_one_string(&self) -> Result { + assert_eq!(1, self.pats.len()); + let metac = self + .metac + .clone() + .match_kind(MatchKind::LeftmostFirst) + .utf8_empty(true); + let syntaxc = self.syntaxc.clone().utf8(true); + let pattern = Arc::from(self.pats[0].as_str()); + meta::Builder::new() + .configure(metac) + .syntax(syntaxc) + .build(&pattern) + .map(|meta| crate::Regex { meta, pattern }) + .map_err(Error::from_meta_build_error) + } + + fn build_one_bytes(&self) -> Result { + assert_eq!(1, self.pats.len()); + let metac = self + .metac + .clone() + .match_kind(MatchKind::LeftmostFirst) + .utf8_empty(false); + let syntaxc = self.syntaxc.clone().utf8(false); + let pattern = Arc::from(self.pats[0].as_str()); + meta::Builder::new() + .configure(metac) + .syntax(syntaxc) + .build(&pattern) + .map(|meta| crate::bytes::Regex { meta, pattern }) + .map_err(Error::from_meta_build_error) + } + + fn build_many_string(&self) -> Result { + let metac = self + .metac + .clone() + .match_kind(MatchKind::All) + .utf8_empty(true) + .which_captures(WhichCaptures::None); + let syntaxc = self.syntaxc.clone().utf8(true); + let patterns = Arc::from(self.pats.as_slice()); + meta::Builder::new() + .configure(metac) + .syntax(syntaxc) + .build_many(&patterns) + .map(|meta| crate::RegexSet { meta, patterns }) + .map_err(Error::from_meta_build_error) + } + + fn build_many_bytes(&self) -> Result { + let metac = self + .metac + .clone() + .match_kind(MatchKind::All) + .utf8_empty(false) + .which_captures(WhichCaptures::None); + let syntaxc = self.syntaxc.clone().utf8(false); + let patterns = Arc::from(self.pats.as_slice()); + meta::Builder::new() + .configure(metac) + .syntax(syntaxc) + .build_many(&patterns) + .map(|meta| crate::bytes::RegexSet { meta, patterns }) + .map_err(Error::from_meta_build_error) + } + + fn case_insensitive(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.case_insensitive(yes); + self + } + + fn multi_line(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.multi_line(yes); + self + } + + fn dot_matches_new_line(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.dot_matches_new_line(yes); + self + } + + fn crlf(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.crlf(yes); + self + } + + fn line_terminator(&mut self, byte: u8) -> &mut Builder { + self.metac = self.metac.clone().line_terminator(byte); + self.syntaxc = self.syntaxc.line_terminator(byte); + self + } + + fn swap_greed(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.swap_greed(yes); + self + } + + fn ignore_whitespace(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.ignore_whitespace(yes); + self + } + + fn unicode(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.unicode(yes); + self + } + + fn octal(&mut self, yes: bool) -> &mut Builder { + self.syntaxc = self.syntaxc.octal(yes); + self + } + + fn size_limit(&mut self, limit: usize) -> &mut Builder { + self.metac = self.metac.clone().nfa_size_limit(Some(limit)); + self + } + + fn dfa_size_limit(&mut self, limit: usize) -> &mut Builder { + self.metac = self.metac.clone().hybrid_cache_capacity(limit); + self + } + + fn nest_limit(&mut self, limit: u32) -> &mut Builder { + self.syntaxc = self.syntaxc.nest_limit(limit); + self + } +} + +pub(crate) mod string { + use crate::{error::Error, Regex, RegexSet}; + + use super::Builder; + + /// A configurable builder for a [`Regex`]. + /// + /// This builder can be used to programmatically set flags such as `i` + /// (case insensitive) and `x` (for verbose mode). This builder can also be + /// used to configure things like the line terminator and a size limit on + /// the compiled regular expression. + #[derive(Clone, Debug)] + pub struct RegexBuilder { + builder: Builder, + } + + impl RegexBuilder { + /// Create a new builder with a default configuration for the given + /// pattern. + /// + /// If the pattern is invalid or exceeds the configured size limits, + /// then an error will be returned when [`RegexBuilder::build`] is + /// called. + pub fn new(pattern: &str) -> RegexBuilder { + RegexBuilder { builder: Builder::new([pattern]) } + } + + /// Compiles the pattern given to `RegexBuilder::new` with the + /// configuration set on this builder. + /// + /// If the pattern isn't a valid regex or if a configured size limit + /// was exceeded, then an error is returned. + pub fn build(&self) -> Result { + self.builder.build_one_string() + } + + /// This configures Unicode mode for the entire pattern. + /// + /// Enabling Unicode mode does a number of things: + /// + /// * Most fundamentally, it causes the fundamental atom of matching + /// to be a single codepoint. When Unicode mode is disabled, it's a + /// single byte. For example, when Unicode mode is enabled, `.` will + /// match `💩` once, where as it will match 4 times when Unicode mode + /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) + /// * Case insensitive matching uses Unicode simple case folding rules. + /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are + /// available. + /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and + /// `\d`. + /// * The word boundary assertions, `\b` and `\B`, use the Unicode + /// definition of a word character. + /// + /// Note that if Unicode mode is disabled, then the regex will fail to + /// compile if it could match invalid UTF-8. For example, when Unicode + /// mode is disabled, then since `.` matches any byte (except for + /// `\n`), then it can match invalid UTF-8 and thus building a regex + /// from it will fail. Another example is `\w` and `\W`. Since `\w` can + /// only match ASCII bytes when Unicode mode is disabled, it's allowed. + /// But `\W` can match more than ASCII bytes, including invalid UTF-8, + /// and so it is not allowed. This restriction can be lifted only by + /// using a [`bytes::Regex`](crate::bytes::Regex). + /// + /// For more details on the Unicode support in this crate, see the + /// [Unicode section](crate#unicode) in this crate's top-level + /// documentation. + /// + /// The default for this is `true`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"\w") + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(!re.is_match("δ")); + /// + /// let re = RegexBuilder::new(r"s") + /// .case_insensitive(true) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally 'ſ' is included when searching for 's' case + /// // insensitively due to Unicode's simple case folding rules. But + /// // when Unicode mode is disabled, only ASCII case insensitive rules + /// // are used. + /// assert!(!re.is_match("ſ")); + /// ``` + pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.unicode(yes); + self + } + + /// This configures whether to enable case insensitive matching for the + /// entire pattern. + /// + /// This setting can also be configured using the inline flag `i` + /// in the pattern. For example, `(?i:foo)` matches `foo` case + /// insensitively while `(?-i:foo)` matches `foo` case sensitively. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"foo(?-i:bar)quux") + /// .case_insensitive(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("FoObarQuUx")); + /// // Even though case insensitive matching is enabled in the builder, + /// // it can be locally disabled within the pattern. In this case, + /// // `bar` is matched case sensitively. + /// assert!(!re.is_match("fooBARquux")); + /// ``` + pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.case_insensitive(yes); + self + } + + /// This configures multi-line mode for the entire pattern. + /// + /// Enabling multi-line mode changes the behavior of the `^` and `$` + /// anchor assertions. Instead of only matching at the beginning and + /// end of a haystack, respectively, multi-line mode causes them to + /// match at the beginning and end of a line *in addition* to the + /// beginning and end of a haystack. More precisely, `^` will match at + /// the position immediately following a `\n` and `$` will match at the + /// position immediately preceding a `\n`. + /// + /// The behavior of this option can be impacted by other settings too: + /// + /// * The [`RegexBuilder::line_terminator`] option changes `\n` above + /// to any ASCII byte. + /// * The [`RegexBuilder::crlf`] option changes the line terminator to + /// be either `\r` or `\n`, but never at the position between a `\r` + /// and `\n`. + /// + /// This setting can also be configured using the inline flag `m` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .build() + /// .unwrap(); + /// assert_eq!(Some(1..4), re.find("\nfoo\n").map(|m| m.range())); + /// ``` + pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.multi_line(yes); + self + } + + /// This configures dot-matches-new-line mode for the entire pattern. + /// + /// Perhaps surprisingly, the default behavior for `.` is not to match + /// any character, but rather, to match any character except for the + /// line terminator (which is `\n` by default). When this mode is + /// enabled, the behavior changes such that `.` truly matches any + /// character. + /// + /// This setting can also be configured using the inline flag `s` in + /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent + /// regexes. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"foo.bar") + /// .dot_matches_new_line(true) + /// .build() + /// .unwrap(); + /// let hay = "foo\nbar"; + /// assert_eq!(Some("foo\nbar"), re.find(hay).map(|m| m.as_str())); + /// ``` + pub fn dot_matches_new_line( + &mut self, + yes: bool, + ) -> &mut RegexBuilder { + self.builder.dot_matches_new_line(yes); + self + } + + /// This configures CRLF mode for the entire pattern. + /// + /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for + /// short) and `\n` ("line feed" or LF for short) are treated as line + /// terminators. This results in the following: + /// + /// * Unless dot-matches-new-line mode is enabled, `.` will now match + /// any character except for `\n` and `\r`. + /// * When multi-line mode is enabled, `^` will match immediately + /// following a `\n` or a `\r`. Similarly, `$` will match immediately + /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match + /// between `\r` and `\n`. + /// + /// This setting can also be configured using the inline flag `R` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = "\r\nfoo\r\n"; + /// // If CRLF mode weren't enabled here, then '$' wouldn't match + /// // immediately after 'foo', and thus no match would be found. + /// assert_eq!(Some("foo"), re.find(hay).map(|m| m.as_str())); + /// ``` + /// + /// This example demonstrates that `^` will never match at a position + /// between `\r` and `\n`. (`$` will similarly not match between a `\r` + /// and a `\n`.) + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^") + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = "\r\n\r\n"; + /// let ranges: Vec<_> = re.find_iter(hay).map(|m| m.range()).collect(); + /// assert_eq!(ranges, vec![0..0, 2..2, 4..4]); + /// ``` + pub fn crlf(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.crlf(yes); + self + } + + /// Configures the line terminator to be used by the regex. + /// + /// The line terminator is relevant in two ways for a particular regex: + /// + /// * When dot-matches-new-line mode is *not* enabled (the default), + /// then `.` will match any character except for the configured line + /// terminator. + /// * When multi-line mode is enabled (not the default), then `^` and + /// `$` will match immediately after and before, respectively, a line + /// terminator. + /// + /// In both cases, if CRLF mode is enabled in a particular context, + /// then it takes precedence over any configured line terminator. + /// + /// This option cannot be configured from within the pattern. + /// + /// The default line terminator is `\n`. + /// + /// # Example + /// + /// This shows how to treat the NUL byte as a line terminator. This can + /// be a useful heuristic when searching binary data. + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// let hay = "\x00foo\x00"; + /// assert_eq!(Some(1..4), re.find(hay).map(|m| m.range())); + /// ``` + /// + /// This example shows that the behavior of `.` is impacted by this + /// setting as well: + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r".") + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// assert!(re.is_match("\n")); + /// assert!(!re.is_match("\x00")); + /// ``` + /// + /// This shows that building a regex will fail if the byte given + /// is not ASCII and the pattern could result in matching invalid + /// UTF-8. This is because any singular non-ASCII byte is not valid + /// UTF-8, and it is not permitted for a [`Regex`] to match invalid + /// UTF-8. (It is permissible to use a non-ASCII byte when building a + /// [`bytes::Regex`](crate::bytes::Regex).) + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// assert!(RegexBuilder::new(r".").line_terminator(0x80).build().is_err()); + /// // Note that using a non-ASCII byte isn't enough on its own to + /// // cause regex compilation to fail. You actually have to make use + /// // of it in the regex in a way that leads to matching invalid + /// // UTF-8. If you don't, then regex compilation will succeed! + /// assert!(RegexBuilder::new(r"a").line_terminator(0x80).build().is_ok()); + /// ``` + pub fn line_terminator(&mut self, byte: u8) -> &mut RegexBuilder { + self.builder.line_terminator(byte); + self + } + + /// This configures swap-greed mode for the entire pattern. + /// + /// When swap-greed mode is enabled, patterns like `a+` will become + /// non-greedy and patterns like `a+?` will become greedy. In other + /// words, the meanings of `a+` and `a+?` are switched. + /// + /// This setting can also be configured using the inline flag `U` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"a+") + /// .swap_greed(true) + /// .build() + /// .unwrap(); + /// assert_eq!(Some("a"), re.find("aaa").map(|m| m.as_str())); + /// ``` + pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.swap_greed(yes); + self + } + + /// This configures verbose mode for the entire pattern. + /// + /// When enabled, whitespace will treated as insignifcant in the + /// pattern and `#` can be used to start a comment until the next new + /// line. + /// + /// Normally, in most places in a pattern, whitespace is treated + /// literally. For example ` +` will match one or more ASCII whitespace + /// characters. + /// + /// When verbose mode is enabled, `\#` can be used to match a literal + /// `#` and `\ ` can be used to match a literal ASCII whitespace + /// character. + /// + /// Verbose mode is useful for permitting regexes to be formatted and + /// broken up more nicely. This may make them more easily readable. + /// + /// This setting can also be configured using the inline flag `x` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// let pat = r" + /// \b + /// (?\p{Uppercase}\w*) # always start with uppercase letter + /// [\s--\n]+ # whitespace should separate names + /// (?: # middle name can be an initial! + /// (?:(?\p{Uppercase})\.|(?\p{Uppercase}\w*)) + /// [\s--\n]+ + /// )? + /// (?\p{Uppercase}\w*) + /// \b + /// "; + /// let re = RegexBuilder::new(pat) + /// .ignore_whitespace(true) + /// .build() + /// .unwrap(); + /// + /// let caps = re.captures("Harry Potter").unwrap(); + /// assert_eq!("Harry", &caps["first"]); + /// assert_eq!("Potter", &caps["last"]); + /// + /// let caps = re.captures("Harry J. Potter").unwrap(); + /// assert_eq!("Harry", &caps["first"]); + /// // Since a middle name/initial isn't required for an overall match, + /// // we can't assume that 'initial' or 'middle' will be populated! + /// assert_eq!(Some("J"), caps.name("initial").map(|m| m.as_str())); + /// assert_eq!(None, caps.name("middle").map(|m| m.as_str())); + /// assert_eq!("Potter", &caps["last"]); + /// + /// let caps = re.captures("Harry James Potter").unwrap(); + /// assert_eq!("Harry", &caps["first"]); + /// // Since a middle name/initial isn't required for an overall match, + /// // we can't assume that 'initial' or 'middle' will be populated! + /// assert_eq!(None, caps.name("initial").map(|m| m.as_str())); + /// assert_eq!(Some("James"), caps.name("middle").map(|m| m.as_str())); + /// assert_eq!("Potter", &caps["last"]); + /// ``` + pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.ignore_whitespace(yes); + self + } + + /// This configures octal mode for the entire pattern. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints + /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all + /// equivalent patterns, where the last example shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, + /// it does make good error messages harder. That is, in PCRE based + /// regex engines, syntax like `\1` invokes a backreference, which is + /// explicitly unsupported this library. However, many users expect + /// backreferences to be supported. Therefore, when octal support + /// is disabled, the error message will explicitly mention that + /// backreferences aren't supported. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// // Normally this pattern would not compile, with an error message + /// // about backreferences not being supported. But with octal mode + /// // enabled, octal escape sequences work. + /// let re = RegexBuilder::new(r"\141") + /// .octal(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("a")); + /// ``` + pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.octal(yes); + self + } + + /// Sets the approximate size limit, in bytes, of the compiled regex. + /// + /// This roughly corresponds to the number of heap memory, in + /// bytes, occupied by a single regex. If the regex would otherwise + /// approximately exceed this limit, then compiling that regex will + /// fail. + /// + /// The main utility of a method like this is to avoid compiling + /// regexes that use an unexpected amount of resources, such as + /// time and memory. Even if the memory usage of a large regex is + /// acceptable, its search time may not be. Namely, worst case time + /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and + /// `n ~ len(haystack)`. That is, search time depends, in part, on the + /// size of the compiled regex. This means that putting a limit on the + /// size of the regex limits how much a regex can impact search time. + /// + /// For more information about regex size limits, see the section on + /// [untrusted inputs](crate#untrusted-input) in the top-level crate + /// documentation. + /// + /// The default for this is some reasonable number that permits most + /// patterns to compile successfully. + /// + /// # Example + /// + /// ``` + /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041 + /// use regex::RegexBuilder; + /// + /// // It may surprise you how big some seemingly small patterns can + /// // be! Since \w is Unicode aware, this generates a regex that can + /// // match approximately 140,000 distinct codepoints. + /// assert!(RegexBuilder::new(r"\w").size_limit(45_000).build().is_err()); + /// ``` + pub fn size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { + self.builder.size_limit(bytes); + self + } + + /// Set the approximate capacity, in bytes, of the cache of transitions + /// used by the lazy DFA. + /// + /// While the lazy DFA isn't always used, in tends to be the most + /// commonly use regex engine in default configurations. It tends to + /// adopt the performance profile of a fully build DFA, but without the + /// downside of taking worst case exponential time to build. + /// + /// The downside is that it needs to keep a cache of transitions and + /// states that are built while running a search, and this cache + /// can fill up. When it fills up, the cache will reset itself. Any + /// previously generated states and transitions will then need to be + /// re-generated. If this happens too many times, then this library + /// will bail out of using the lazy DFA and switch to a different regex + /// engine. + /// + /// If your regex provokes this particular downside of the lazy DFA, + /// then it may be beneficial to increase its cache capacity. This will + /// potentially reduce the frequency of cache resetting (ideally to + /// `0`). While it won't fix all potential performance problems with + /// the lazy DFA, increasing the cache capacity does fix some. + /// + /// There is no easy way to determine, a priori, whether increasing + /// this cache capacity will help. In general, the larger your regex, + /// the more cache it's likely to use. But that isn't an ironclad rule. + /// For example, a regex like `[01]*1[01]{N}` would normally produce a + /// fully build DFA that is exponential in size with respect to `N`. + /// The lazy DFA will prevent exponential space blow-up, but it cache + /// is likely to fill up, even when it's large and even for smallish + /// values of `N`. + /// + /// If you aren't sure whether this helps or not, it is sensible to + /// set this to some arbitrarily large number in testing, such as + /// `usize::MAX`. Namely, this represents the amount of capacity that + /// *may* be used. It's probably not a good idea to use `usize::MAX` in + /// production though, since it implies there are no controls on heap + /// memory used by this library during a search. In effect, set it to + /// whatever you're willing to allocate for a single regex search. + pub fn dfa_size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { + self.builder.dfa_size_limit(bytes); + self + } + + /// Set the nesting limit for this parser. + /// + /// The nesting limit controls how deep the abstract syntax tree is + /// allowed to be. If the AST exceeds the given limit (e.g., with too + /// many nested groups), then an error is returned by the parser. + /// + /// The purpose of this limit is to act as a heuristic to prevent stack + /// overflow for consumers that do structural induction on an AST using + /// explicit recursion. While this crate never does this (instead using + /// constant stack space and moving the call stack to the heap), other + /// crates may. + /// + /// This limit is not checked until the entire AST is parsed. + /// Therefore, if callers want to put a limit on the amount of heap + /// space used, then they should impose a limit on the length, in + /// bytes, of the concrete pattern string. In particular, this is + /// viable since this parser implementation will limit itself to heap + /// space proportional to the length of the pattern string. See also + /// the [untrusted inputs](crate#untrusted-input) section in the + /// top-level crate documentation for more information about this. + /// + /// Note that a nest limit of `0` will return a nest limit error for + /// most patterns but not all. For example, a nest limit of `0` permits + /// `a` but not `ab`, since `ab` requires an explicit concatenation, + /// which results in a nest depth of `1`. In general, a nest limit is + /// not something that manifests in an obvious way in the concrete + /// syntax, therefore, it should not be used in a granular way. + /// + /// # Example + /// + /// ``` + /// use regex::RegexBuilder; + /// + /// assert!(RegexBuilder::new(r"a").nest_limit(0).build().is_ok()); + /// assert!(RegexBuilder::new(r"ab").nest_limit(0).build().is_err()); + /// ``` + pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder { + self.builder.nest_limit(limit); + self + } + } + + /// A configurable builder for a [`RegexSet`]. + /// + /// This builder can be used to programmatically set flags such as + /// `i` (case insensitive) and `x` (for verbose mode). This builder + /// can also be used to configure things like the line terminator + /// and a size limit on the compiled regular expression. + #[derive(Clone, Debug)] + pub struct RegexSetBuilder { + builder: Builder, + } + + impl RegexSetBuilder { + /// Create a new builder with a default configuration for the given + /// patterns. + /// + /// If the patterns are invalid or exceed the configured size limits, + /// then an error will be returned when [`RegexSetBuilder::build`] is + /// called. + pub fn new(patterns: I) -> RegexSetBuilder + where + I: IntoIterator, + S: AsRef, + { + RegexSetBuilder { builder: Builder::new(patterns) } + } + + /// Compiles the patterns given to `RegexSetBuilder::new` with the + /// configuration set on this builder. + /// + /// If the patterns aren't valid regexes or if a configured size limit + /// was exceeded, then an error is returned. + pub fn build(&self) -> Result { + self.builder.build_many_string() + } + + /// This configures Unicode mode for the all of the patterns. + /// + /// Enabling Unicode mode does a number of things: + /// + /// * Most fundamentally, it causes the fundamental atom of matching + /// to be a single codepoint. When Unicode mode is disabled, it's a + /// single byte. For example, when Unicode mode is enabled, `.` will + /// match `💩` once, where as it will match 4 times when Unicode mode + /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) + /// * Case insensitive matching uses Unicode simple case folding rules. + /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are + /// available. + /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and + /// `\d`. + /// * The word boundary assertions, `\b` and `\B`, use the Unicode + /// definition of a word character. + /// + /// Note that if Unicode mode is disabled, then the regex will fail to + /// compile if it could match invalid UTF-8. For example, when Unicode + /// mode is disabled, then since `.` matches any byte (except for + /// `\n`), then it can match invalid UTF-8 and thus building a regex + /// from it will fail. Another example is `\w` and `\W`. Since `\w` can + /// only match ASCII bytes when Unicode mode is disabled, it's allowed. + /// But `\W` can match more than ASCII bytes, including invalid UTF-8, + /// and so it is not allowed. This restriction can be lifted only by + /// using a [`bytes::RegexSet`](crate::bytes::RegexSet). + /// + /// For more details on the Unicode support in this crate, see the + /// [Unicode section](crate#unicode) in this crate's top-level + /// documentation. + /// + /// The default for this is `true`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"\w"]) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(!re.is_match("δ")); + /// + /// let re = RegexSetBuilder::new([r"s"]) + /// .case_insensitive(true) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally 'ſ' is included when searching for 's' case + /// // insensitively due to Unicode's simple case folding rules. But + /// // when Unicode mode is disabled, only ASCII case insensitive rules + /// // are used. + /// assert!(!re.is_match("ſ")); + /// ``` + pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.unicode(yes); + self + } + + /// This configures whether to enable case insensitive matching for all + /// of the patterns. + /// + /// This setting can also be configured using the inline flag `i` + /// in the pattern. For example, `(?i:foo)` matches `foo` case + /// insensitively while `(?-i:foo)` matches `foo` case sensitively. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"foo(?-i:bar)quux"]) + /// .case_insensitive(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("FoObarQuUx")); + /// // Even though case insensitive matching is enabled in the builder, + /// // it can be locally disabled within the pattern. In this case, + /// // `bar` is matched case sensitively. + /// assert!(!re.is_match("fooBARquux")); + /// ``` + pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.case_insensitive(yes); + self + } + + /// This configures multi-line mode for all of the patterns. + /// + /// Enabling multi-line mode changes the behavior of the `^` and `$` + /// anchor assertions. Instead of only matching at the beginning and + /// end of a haystack, respectively, multi-line mode causes them to + /// match at the beginning and end of a line *in addition* to the + /// beginning and end of a haystack. More precisely, `^` will match at + /// the position immediately following a `\n` and `$` will match at the + /// position immediately preceding a `\n`. + /// + /// The behavior of this option can be impacted by other settings too: + /// + /// * The [`RegexSetBuilder::line_terminator`] option changes `\n` + /// above to any ASCII byte. + /// * The [`RegexSetBuilder::crlf`] option changes the line terminator + /// to be either `\r` or `\n`, but never at the position between a `\r` + /// and `\n`. + /// + /// This setting can also be configured using the inline flag `m` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("\nfoo\n")); + /// ``` + pub fn multi_line(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.multi_line(yes); + self + } + + /// This configures dot-matches-new-line mode for the entire pattern. + /// + /// Perhaps surprisingly, the default behavior for `.` is not to match + /// any character, but rather, to match any character except for the + /// line terminator (which is `\n` by default). When this mode is + /// enabled, the behavior changes such that `.` truly matches any + /// character. + /// + /// This setting can also be configured using the inline flag `s` in + /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent + /// regexes. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"foo.bar"]) + /// .dot_matches_new_line(true) + /// .build() + /// .unwrap(); + /// let hay = "foo\nbar"; + /// assert!(re.is_match(hay)); + /// ``` + pub fn dot_matches_new_line( + &mut self, + yes: bool, + ) -> &mut RegexSetBuilder { + self.builder.dot_matches_new_line(yes); + self + } + + /// This configures CRLF mode for all of the patterns. + /// + /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for + /// short) and `\n` ("line feed" or LF for short) are treated as line + /// terminators. This results in the following: + /// + /// * Unless dot-matches-new-line mode is enabled, `.` will now match + /// any character except for `\n` and `\r`. + /// * When multi-line mode is enabled, `^` will match immediately + /// following a `\n` or a `\r`. Similarly, `$` will match immediately + /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match + /// between `\r` and `\n`. + /// + /// This setting can also be configured using the inline flag `R` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = "\r\nfoo\r\n"; + /// // If CRLF mode weren't enabled here, then '$' wouldn't match + /// // immediately after 'foo', and thus no match would be found. + /// assert!(re.is_match(hay)); + /// ``` + /// + /// This example demonstrates that `^` will never match at a position + /// between `\r` and `\n`. (`$` will similarly not match between a `\r` + /// and a `\n`.) + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^\n"]) + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// assert!(!re.is_match("\r\n")); + /// ``` + pub fn crlf(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.crlf(yes); + self + } + + /// Configures the line terminator to be used by the regex. + /// + /// The line terminator is relevant in two ways for a particular regex: + /// + /// * When dot-matches-new-line mode is *not* enabled (the default), + /// then `.` will match any character except for the configured line + /// terminator. + /// * When multi-line mode is enabled (not the default), then `^` and + /// `$` will match immediately after and before, respectively, a line + /// terminator. + /// + /// In both cases, if CRLF mode is enabled in a particular context, + /// then it takes precedence over any configured line terminator. + /// + /// This option cannot be configured from within the pattern. + /// + /// The default line terminator is `\n`. + /// + /// # Example + /// + /// This shows how to treat the NUL byte as a line terminator. This can + /// be a useful heuristic when searching binary data. + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// let hay = "\x00foo\x00"; + /// assert!(re.is_match(hay)); + /// ``` + /// + /// This example shows that the behavior of `.` is impacted by this + /// setting as well: + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"."]) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// assert!(re.is_match("\n")); + /// assert!(!re.is_match("\x00")); + /// ``` + /// + /// This shows that building a regex will fail if the byte given + /// is not ASCII and the pattern could result in matching invalid + /// UTF-8. This is because any singular non-ASCII byte is not valid + /// UTF-8, and it is not permitted for a [`RegexSet`] to match invalid + /// UTF-8. (It is permissible to use a non-ASCII byte when building a + /// [`bytes::RegexSet`](crate::bytes::RegexSet).) + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// assert!( + /// RegexSetBuilder::new([r"."]) + /// .line_terminator(0x80) + /// .build() + /// .is_err() + /// ); + /// // Note that using a non-ASCII byte isn't enough on its own to + /// // cause regex compilation to fail. You actually have to make use + /// // of it in the regex in a way that leads to matching invalid + /// // UTF-8. If you don't, then regex compilation will succeed! + /// assert!( + /// RegexSetBuilder::new([r"a"]) + /// .line_terminator(0x80) + /// .build() + /// .is_ok() + /// ); + /// ``` + pub fn line_terminator(&mut self, byte: u8) -> &mut RegexSetBuilder { + self.builder.line_terminator(byte); + self + } + + /// This configures swap-greed mode for all of the patterns. + /// + /// When swap-greed mode is enabled, patterns like `a+` will become + /// non-greedy and patterns like `a+?` will become greedy. In other + /// words, the meanings of `a+` and `a+?` are switched. + /// + /// This setting can also be configured using the inline flag `U` in + /// the pattern. + /// + /// Note that this is generally not useful for a `RegexSet` since a + /// `RegexSet` can only report whether a pattern matches or not. Since + /// greediness never impacts whether a match is found or not (only the + /// offsets of the match), it follows that whether parts of a pattern + /// are greedy or not doesn't matter for a `RegexSet`. + /// + /// The default for this is `false`. + pub fn swap_greed(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.swap_greed(yes); + self + } + + /// This configures verbose mode for all of the patterns. + /// + /// When enabled, whitespace will treated as insignifcant in the + /// pattern and `#` can be used to start a comment until the next new + /// line. + /// + /// Normally, in most places in a pattern, whitespace is treated + /// literally. For example ` +` will match one or more ASCII whitespace + /// characters. + /// + /// When verbose mode is enabled, `\#` can be used to match a literal + /// `#` and `\ ` can be used to match a literal ASCII whitespace + /// character. + /// + /// Verbose mode is useful for permitting regexes to be formatted and + /// broken up more nicely. This may make them more easily readable. + /// + /// This setting can also be configured using the inline flag `x` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// let pat = r" + /// \b + /// (?\p{Uppercase}\w*) # always start with uppercase letter + /// [\s--\n]+ # whitespace should separate names + /// (?: # middle name can be an initial! + /// (?:(?\p{Uppercase})\.|(?\p{Uppercase}\w*)) + /// [\s--\n]+ + /// )? + /// (?\p{Uppercase}\w*) + /// \b + /// "; + /// let re = RegexSetBuilder::new([pat]) + /// .ignore_whitespace(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("Harry Potter")); + /// assert!(re.is_match("Harry J. Potter")); + /// assert!(re.is_match("Harry James Potter")); + /// assert!(!re.is_match("harry J. Potter")); + /// ``` + pub fn ignore_whitespace( + &mut self, + yes: bool, + ) -> &mut RegexSetBuilder { + self.builder.ignore_whitespace(yes); + self + } + + /// This configures octal mode for all of the patterns. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints + /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all + /// equivalent patterns, where the last example shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, + /// it does make good error messages harder. That is, in PCRE based + /// regex engines, syntax like `\1` invokes a backreference, which is + /// explicitly unsupported this library. However, many users expect + /// backreferences to be supported. Therefore, when octal support + /// is disabled, the error message will explicitly mention that + /// backreferences aren't supported. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// // Normally this pattern would not compile, with an error message + /// // about backreferences not being supported. But with octal mode + /// // enabled, octal escape sequences work. + /// let re = RegexSetBuilder::new([r"\141"]) + /// .octal(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match("a")); + /// ``` + pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.octal(yes); + self + } + + /// Sets the approximate size limit, in bytes, of the compiled regex. + /// + /// This roughly corresponds to the number of heap memory, in + /// bytes, occupied by a single regex. If the regex would otherwise + /// approximately exceed this limit, then compiling that regex will + /// fail. + /// + /// The main utility of a method like this is to avoid compiling + /// regexes that use an unexpected amount of resources, such as + /// time and memory. Even if the memory usage of a large regex is + /// acceptable, its search time may not be. Namely, worst case time + /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and + /// `n ~ len(haystack)`. That is, search time depends, in part, on the + /// size of the compiled regex. This means that putting a limit on the + /// size of the regex limits how much a regex can impact search time. + /// + /// For more information about regex size limits, see the section on + /// [untrusted inputs](crate#untrusted-input) in the top-level crate + /// documentation. + /// + /// The default for this is some reasonable number that permits most + /// patterns to compile successfully. + /// + /// # Example + /// + /// ``` + /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041 + /// use regex::RegexSetBuilder; + /// + /// // It may surprise you how big some seemingly small patterns can + /// // be! Since \w is Unicode aware, this generates a regex that can + /// // match approximately 140,000 distinct codepoints. + /// assert!( + /// RegexSetBuilder::new([r"\w"]) + /// .size_limit(45_000) + /// .build() + /// .is_err() + /// ); + /// ``` + pub fn size_limit(&mut self, bytes: usize) -> &mut RegexSetBuilder { + self.builder.size_limit(bytes); + self + } + + /// Set the approximate capacity, in bytes, of the cache of transitions + /// used by the lazy DFA. + /// + /// While the lazy DFA isn't always used, in tends to be the most + /// commonly use regex engine in default configurations. It tends to + /// adopt the performance profile of a fully build DFA, but without the + /// downside of taking worst case exponential time to build. + /// + /// The downside is that it needs to keep a cache of transitions and + /// states that are built while running a search, and this cache + /// can fill up. When it fills up, the cache will reset itself. Any + /// previously generated states and transitions will then need to be + /// re-generated. If this happens too many times, then this library + /// will bail out of using the lazy DFA and switch to a different regex + /// engine. + /// + /// If your regex provokes this particular downside of the lazy DFA, + /// then it may be beneficial to increase its cache capacity. This will + /// potentially reduce the frequency of cache resetting (ideally to + /// `0`). While it won't fix all potential performance problems with + /// the lazy DFA, increasing the cache capacity does fix some. + /// + /// There is no easy way to determine, a priori, whether increasing + /// this cache capacity will help. In general, the larger your regex, + /// the more cache it's likely to use. But that isn't an ironclad rule. + /// For example, a regex like `[01]*1[01]{N}` would normally produce a + /// fully build DFA that is exponential in size with respect to `N`. + /// The lazy DFA will prevent exponential space blow-up, but it cache + /// is likely to fill up, even when it's large and even for smallish + /// values of `N`. + /// + /// If you aren't sure whether this helps or not, it is sensible to + /// set this to some arbitrarily large number in testing, such as + /// `usize::MAX`. Namely, this represents the amount of capacity that + /// *may* be used. It's probably not a good idea to use `usize::MAX` in + /// production though, since it implies there are no controls on heap + /// memory used by this library during a search. In effect, set it to + /// whatever you're willing to allocate for a single regex search. + pub fn dfa_size_limit( + &mut self, + bytes: usize, + ) -> &mut RegexSetBuilder { + self.builder.dfa_size_limit(bytes); + self + } + + /// Set the nesting limit for this parser. + /// + /// The nesting limit controls how deep the abstract syntax tree is + /// allowed to be. If the AST exceeds the given limit (e.g., with too + /// many nested groups), then an error is returned by the parser. + /// + /// The purpose of this limit is to act as a heuristic to prevent stack + /// overflow for consumers that do structural induction on an AST using + /// explicit recursion. While this crate never does this (instead using + /// constant stack space and moving the call stack to the heap), other + /// crates may. + /// + /// This limit is not checked until the entire AST is parsed. + /// Therefore, if callers want to put a limit on the amount of heap + /// space used, then they should impose a limit on the length, in + /// bytes, of the concrete pattern string. In particular, this is + /// viable since this parser implementation will limit itself to heap + /// space proportional to the length of the pattern string. See also + /// the [untrusted inputs](crate#untrusted-input) section in the + /// top-level crate documentation for more information about this. + /// + /// Note that a nest limit of `0` will return a nest limit error for + /// most patterns but not all. For example, a nest limit of `0` permits + /// `a` but not `ab`, since `ab` requires an explicit concatenation, + /// which results in a nest depth of `1`. In general, a nest limit is + /// not something that manifests in an obvious way in the concrete + /// syntax, therefore, it should not be used in a granular way. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSetBuilder; + /// + /// assert!(RegexSetBuilder::new([r"a"]).nest_limit(0).build().is_ok()); + /// assert!(RegexSetBuilder::new([r"ab"]).nest_limit(0).build().is_err()); + /// ``` + pub fn nest_limit(&mut self, limit: u32) -> &mut RegexSetBuilder { + self.builder.nest_limit(limit); + self + } + } +} + +pub(crate) mod bytes { + use crate::{ + bytes::{Regex, RegexSet}, + error::Error, + }; + + use super::Builder; + + /// A configurable builder for a [`Regex`]. + /// + /// This builder can be used to programmatically set flags such as `i` + /// (case insensitive) and `x` (for verbose mode). This builder can also be + /// used to configure things like the line terminator and a size limit on + /// the compiled regular expression. + #[derive(Clone, Debug)] + pub struct RegexBuilder { + builder: Builder, + } + + impl RegexBuilder { + /// Create a new builder with a default configuration for the given + /// pattern. + /// + /// If the pattern is invalid or exceeds the configured size limits, + /// then an error will be returned when [`RegexBuilder::build`] is + /// called. + pub fn new(pattern: &str) -> RegexBuilder { + RegexBuilder { builder: Builder::new([pattern]) } + } + + /// Compiles the pattern given to `RegexBuilder::new` with the + /// configuration set on this builder. + /// + /// If the pattern isn't a valid regex or if a configured size limit + /// was exceeded, then an error is returned. + pub fn build(&self) -> Result { + self.builder.build_one_bytes() + } + + /// This configures Unicode mode for the entire pattern. + /// + /// Enabling Unicode mode does a number of things: + /// + /// * Most fundamentally, it causes the fundamental atom of matching + /// to be a single codepoint. When Unicode mode is disabled, it's a + /// single byte. For example, when Unicode mode is enabled, `.` will + /// match `💩` once, where as it will match 4 times when Unicode mode + /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) + /// * Case insensitive matching uses Unicode simple case folding rules. + /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are + /// available. + /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and + /// `\d`. + /// * The word boundary assertions, `\b` and `\B`, use the Unicode + /// definition of a word character. + /// + /// Note that unlike the top-level `Regex` for searching `&str`, it + /// is permitted to disable Unicode mode even if the resulting pattern + /// could match invalid UTF-8. For example, `(?-u:.)` is not a valid + /// pattern for a top-level `Regex`, but is valid for a `bytes::Regex`. + /// + /// For more details on the Unicode support in this crate, see the + /// [Unicode section](crate#unicode) in this crate's top-level + /// documentation. + /// + /// The default for this is `true`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"\w") + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(!re.is_match("δ".as_bytes())); + /// + /// let re = RegexBuilder::new(r"s") + /// .case_insensitive(true) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally 'ſ' is included when searching for 's' case + /// // insensitively due to Unicode's simple case folding rules. But + /// // when Unicode mode is disabled, only ASCII case insensitive rules + /// // are used. + /// assert!(!re.is_match("ſ".as_bytes())); + /// ``` + /// + /// Since this builder is for constructing a [`bytes::Regex`](Regex), + /// one can disable Unicode mode even if it would match invalid UTF-8: + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r".") + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(re.is_match(b"\xFF")); + /// ``` + pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.unicode(yes); + self + } + + /// This configures whether to enable case insensitive matching for the + /// entire pattern. + /// + /// This setting can also be configured using the inline flag `i` + /// in the pattern. For example, `(?i:foo)` matches `foo` case + /// insensitively while `(?-i:foo)` matches `foo` case sensitively. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"foo(?-i:bar)quux") + /// .case_insensitive(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"FoObarQuUx")); + /// // Even though case insensitive matching is enabled in the builder, + /// // it can be locally disabled within the pattern. In this case, + /// // `bar` is matched case sensitively. + /// assert!(!re.is_match(b"fooBARquux")); + /// ``` + pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.case_insensitive(yes); + self + } + + /// This configures multi-line mode for the entire pattern. + /// + /// Enabling multi-line mode changes the behavior of the `^` and `$` + /// anchor assertions. Instead of only matching at the beginning and + /// end of a haystack, respectively, multi-line mode causes them to + /// match at the beginning and end of a line *in addition* to the + /// beginning and end of a haystack. More precisely, `^` will match at + /// the position immediately following a `\n` and `$` will match at the + /// position immediately preceding a `\n`. + /// + /// The behavior of this option can be impacted by other settings too: + /// + /// * The [`RegexBuilder::line_terminator`] option changes `\n` above + /// to any ASCII byte. + /// * The [`RegexBuilder::crlf`] option changes the line terminator to + /// be either `\r` or `\n`, but never at the position between a `\r` + /// and `\n`. + /// + /// This setting can also be configured using the inline flag `m` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .build() + /// .unwrap(); + /// assert_eq!(Some(1..4), re.find(b"\nfoo\n").map(|m| m.range())); + /// ``` + pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.multi_line(yes); + self + } + + /// This configures dot-matches-new-line mode for the entire pattern. + /// + /// Perhaps surprisingly, the default behavior for `.` is not to match + /// any character, but rather, to match any character except for the + /// line terminator (which is `\n` by default). When this mode is + /// enabled, the behavior changes such that `.` truly matches any + /// character. + /// + /// This setting can also be configured using the inline flag `s` in + /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent + /// regexes. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"foo.bar") + /// .dot_matches_new_line(true) + /// .build() + /// .unwrap(); + /// let hay = b"foo\nbar"; + /// assert_eq!(Some(&b"foo\nbar"[..]), re.find(hay).map(|m| m.as_bytes())); + /// ``` + pub fn dot_matches_new_line( + &mut self, + yes: bool, + ) -> &mut RegexBuilder { + self.builder.dot_matches_new_line(yes); + self + } + + /// This configures CRLF mode for the entire pattern. + /// + /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for + /// short) and `\n` ("line feed" or LF for short) are treated as line + /// terminators. This results in the following: + /// + /// * Unless dot-matches-new-line mode is enabled, `.` will now match + /// any character except for `\n` and `\r`. + /// * When multi-line mode is enabled, `^` will match immediately + /// following a `\n` or a `\r`. Similarly, `$` will match immediately + /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match + /// between `\r` and `\n`. + /// + /// This setting can also be configured using the inline flag `R` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = b"\r\nfoo\r\n"; + /// // If CRLF mode weren't enabled here, then '$' wouldn't match + /// // immediately after 'foo', and thus no match would be found. + /// assert_eq!(Some(&b"foo"[..]), re.find(hay).map(|m| m.as_bytes())); + /// ``` + /// + /// This example demonstrates that `^` will never match at a position + /// between `\r` and `\n`. (`$` will similarly not match between a `\r` + /// and a `\n`.) + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^") + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = b"\r\n\r\n"; + /// let ranges: Vec<_> = re.find_iter(hay).map(|m| m.range()).collect(); + /// assert_eq!(ranges, vec![0..0, 2..2, 4..4]); + /// ``` + pub fn crlf(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.crlf(yes); + self + } + + /// Configures the line terminator to be used by the regex. + /// + /// The line terminator is relevant in two ways for a particular regex: + /// + /// * When dot-matches-new-line mode is *not* enabled (the default), + /// then `.` will match any character except for the configured line + /// terminator. + /// * When multi-line mode is enabled (not the default), then `^` and + /// `$` will match immediately after and before, respectively, a line + /// terminator. + /// + /// In both cases, if CRLF mode is enabled in a particular context, + /// then it takes precedence over any configured line terminator. + /// + /// This option cannot be configured from within the pattern. + /// + /// The default line terminator is `\n`. + /// + /// # Example + /// + /// This shows how to treat the NUL byte as a line terminator. This can + /// be a useful heuristic when searching binary data. + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"^foo$") + /// .multi_line(true) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// let hay = b"\x00foo\x00"; + /// assert_eq!(Some(1..4), re.find(hay).map(|m| m.range())); + /// ``` + /// + /// This example shows that the behavior of `.` is impacted by this + /// setting as well: + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r".") + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"\n")); + /// assert!(!re.is_match(b"\x00")); + /// ``` + /// + /// This shows that building a regex will work even when the byte + /// given is not ASCII. This is unlike the top-level `Regex` API where + /// matching invalid UTF-8 is not allowed. + /// + /// Note though that you must disable Unicode mode. This is required + /// because Unicode mode requires matching one codepoint at a time, + /// and there is no way to match a non-ASCII byte as if it were a + /// codepoint. + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// assert!( + /// RegexBuilder::new(r".") + /// .unicode(false) + /// .line_terminator(0x80) + /// .build() + /// .is_ok(), + /// ); + /// ``` + pub fn line_terminator(&mut self, byte: u8) -> &mut RegexBuilder { + self.builder.line_terminator(byte); + self + } + + /// This configures swap-greed mode for the entire pattern. + /// + /// When swap-greed mode is enabled, patterns like `a+` will become + /// non-greedy and patterns like `a+?` will become greedy. In other + /// words, the meanings of `a+` and `a+?` are switched. + /// + /// This setting can also be configured using the inline flag `U` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let re = RegexBuilder::new(r"a+") + /// .swap_greed(true) + /// .build() + /// .unwrap(); + /// assert_eq!(Some(&b"a"[..]), re.find(b"aaa").map(|m| m.as_bytes())); + /// ``` + pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.swap_greed(yes); + self + } + + /// This configures verbose mode for the entire pattern. + /// + /// When enabled, whitespace will treated as insignifcant in the + /// pattern and `#` can be used to start a comment until the next new + /// line. + /// + /// Normally, in most places in a pattern, whitespace is treated + /// literally. For example ` +` will match one or more ASCII whitespace + /// characters. + /// + /// When verbose mode is enabled, `\#` can be used to match a literal + /// `#` and `\ ` can be used to match a literal ASCII whitespace + /// character. + /// + /// Verbose mode is useful for permitting regexes to be formatted and + /// broken up more nicely. This may make them more easily readable. + /// + /// This setting can also be configured using the inline flag `x` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// let pat = r" + /// \b + /// (?\p{Uppercase}\w*) # always start with uppercase letter + /// [\s--\n]+ # whitespace should separate names + /// (?: # middle name can be an initial! + /// (?:(?\p{Uppercase})\.|(?\p{Uppercase}\w*)) + /// [\s--\n]+ + /// )? + /// (?\p{Uppercase}\w*) + /// \b + /// "; + /// let re = RegexBuilder::new(pat) + /// .ignore_whitespace(true) + /// .build() + /// .unwrap(); + /// + /// let caps = re.captures(b"Harry Potter").unwrap(); + /// assert_eq!(&b"Harry"[..], &caps["first"]); + /// assert_eq!(&b"Potter"[..], &caps["last"]); + /// + /// let caps = re.captures(b"Harry J. Potter").unwrap(); + /// assert_eq!(&b"Harry"[..], &caps["first"]); + /// // Since a middle name/initial isn't required for an overall match, + /// // we can't assume that 'initial' or 'middle' will be populated! + /// assert_eq!( + /// Some(&b"J"[..]), + /// caps.name("initial").map(|m| m.as_bytes()), + /// ); + /// assert_eq!(None, caps.name("middle").map(|m| m.as_bytes())); + /// assert_eq!(&b"Potter"[..], &caps["last"]); + /// + /// let caps = re.captures(b"Harry James Potter").unwrap(); + /// assert_eq!(&b"Harry"[..], &caps["first"]); + /// // Since a middle name/initial isn't required for an overall match, + /// // we can't assume that 'initial' or 'middle' will be populated! + /// assert_eq!(None, caps.name("initial").map(|m| m.as_bytes())); + /// assert_eq!( + /// Some(&b"James"[..]), + /// caps.name("middle").map(|m| m.as_bytes()), + /// ); + /// assert_eq!(&b"Potter"[..], &caps["last"]); + /// ``` + pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.ignore_whitespace(yes); + self + } + + /// This configures octal mode for the entire pattern. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints + /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all + /// equivalent patterns, where the last example shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, + /// it does make good error messages harder. That is, in PCRE based + /// regex engines, syntax like `\1` invokes a backreference, which is + /// explicitly unsupported this library. However, many users expect + /// backreferences to be supported. Therefore, when octal support + /// is disabled, the error message will explicitly mention that + /// backreferences aren't supported. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// // Normally this pattern would not compile, with an error message + /// // about backreferences not being supported. But with octal mode + /// // enabled, octal escape sequences work. + /// let re = RegexBuilder::new(r"\141") + /// .octal(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"a")); + /// ``` + pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder { + self.builder.octal(yes); + self + } + + /// Sets the approximate size limit, in bytes, of the compiled regex. + /// + /// This roughly corresponds to the number of heap memory, in + /// bytes, occupied by a single regex. If the regex would otherwise + /// approximately exceed this limit, then compiling that regex will + /// fail. + /// + /// The main utility of a method like this is to avoid compiling + /// regexes that use an unexpected amount of resources, such as + /// time and memory. Even if the memory usage of a large regex is + /// acceptable, its search time may not be. Namely, worst case time + /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and + /// `n ~ len(haystack)`. That is, search time depends, in part, on the + /// size of the compiled regex. This means that putting a limit on the + /// size of the regex limits how much a regex can impact search time. + /// + /// For more information about regex size limits, see the section on + /// [untrusted inputs](crate#untrusted-input) in the top-level crate + /// documentation. + /// + /// The default for this is some reasonable number that permits most + /// patterns to compile successfully. + /// + /// # Example + /// + /// ``` + /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041 + /// use regex::bytes::RegexBuilder; + /// + /// // It may surprise you how big some seemingly small patterns can + /// // be! Since \w is Unicode aware, this generates a regex that can + /// // match approximately 140,000 distinct codepoints. + /// assert!(RegexBuilder::new(r"\w").size_limit(45_000).build().is_err()); + /// ``` + pub fn size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { + self.builder.size_limit(bytes); + self + } + + /// Set the approximate capacity, in bytes, of the cache of transitions + /// used by the lazy DFA. + /// + /// While the lazy DFA isn't always used, in tends to be the most + /// commonly use regex engine in default configurations. It tends to + /// adopt the performance profile of a fully build DFA, but without the + /// downside of taking worst case exponential time to build. + /// + /// The downside is that it needs to keep a cache of transitions and + /// states that are built while running a search, and this cache + /// can fill up. When it fills up, the cache will reset itself. Any + /// previously generated states and transitions will then need to be + /// re-generated. If this happens too many times, then this library + /// will bail out of using the lazy DFA and switch to a different regex + /// engine. + /// + /// If your regex provokes this particular downside of the lazy DFA, + /// then it may be beneficial to increase its cache capacity. This will + /// potentially reduce the frequency of cache resetting (ideally to + /// `0`). While it won't fix all potential performance problems with + /// the lazy DFA, increasing the cache capacity does fix some. + /// + /// There is no easy way to determine, a priori, whether increasing + /// this cache capacity will help. In general, the larger your regex, + /// the more cache it's likely to use. But that isn't an ironclad rule. + /// For example, a regex like `[01]*1[01]{N}` would normally produce a + /// fully build DFA that is exponential in size with respect to `N`. + /// The lazy DFA will prevent exponential space blow-up, but it cache + /// is likely to fill up, even when it's large and even for smallish + /// values of `N`. + /// + /// If you aren't sure whether this helps or not, it is sensible to + /// set this to some arbitrarily large number in testing, such as + /// `usize::MAX`. Namely, this represents the amount of capacity that + /// *may* be used. It's probably not a good idea to use `usize::MAX` in + /// production though, since it implies there are no controls on heap + /// memory used by this library during a search. In effect, set it to + /// whatever you're willing to allocate for a single regex search. + pub fn dfa_size_limit(&mut self, bytes: usize) -> &mut RegexBuilder { + self.builder.dfa_size_limit(bytes); + self + } + + /// Set the nesting limit for this parser. + /// + /// The nesting limit controls how deep the abstract syntax tree is + /// allowed to be. If the AST exceeds the given limit (e.g., with too + /// many nested groups), then an error is returned by the parser. + /// + /// The purpose of this limit is to act as a heuristic to prevent stack + /// overflow for consumers that do structural induction on an AST using + /// explicit recursion. While this crate never does this (instead using + /// constant stack space and moving the call stack to the heap), other + /// crates may. + /// + /// This limit is not checked until the entire AST is parsed. + /// Therefore, if callers want to put a limit on the amount of heap + /// space used, then they should impose a limit on the length, in + /// bytes, of the concrete pattern string. In particular, this is + /// viable since this parser implementation will limit itself to heap + /// space proportional to the length of the pattern string. See also + /// the [untrusted inputs](crate#untrusted-input) section in the + /// top-level crate documentation for more information about this. + /// + /// Note that a nest limit of `0` will return a nest limit error for + /// most patterns but not all. For example, a nest limit of `0` permits + /// `a` but not `ab`, since `ab` requires an explicit concatenation, + /// which results in a nest depth of `1`. In general, a nest limit is + /// not something that manifests in an obvious way in the concrete + /// syntax, therefore, it should not be used in a granular way. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexBuilder; + /// + /// assert!(RegexBuilder::new(r"a").nest_limit(0).build().is_ok()); + /// assert!(RegexBuilder::new(r"ab").nest_limit(0).build().is_err()); + /// ``` + pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder { + self.builder.nest_limit(limit); + self + } + } + + /// A configurable builder for a [`RegexSet`]. + /// + /// This builder can be used to programmatically set flags such as `i` + /// (case insensitive) and `x` (for verbose mode). This builder can also be + /// used to configure things like the line terminator and a size limit on + /// the compiled regular expression. + #[derive(Clone, Debug)] + pub struct RegexSetBuilder { + builder: Builder, + } + + impl RegexSetBuilder { + /// Create a new builder with a default configuration for the given + /// patterns. + /// + /// If the patterns are invalid or exceed the configured size limits, + /// then an error will be returned when [`RegexSetBuilder::build`] is + /// called. + pub fn new(patterns: I) -> RegexSetBuilder + where + I: IntoIterator, + S: AsRef, + { + RegexSetBuilder { builder: Builder::new(patterns) } + } + + /// Compiles the patterns given to `RegexSetBuilder::new` with the + /// configuration set on this builder. + /// + /// If the patterns aren't valid regexes or if a configured size limit + /// was exceeded, then an error is returned. + pub fn build(&self) -> Result { + self.builder.build_many_bytes() + } + + /// This configures Unicode mode for the all of the patterns. + /// + /// Enabling Unicode mode does a number of things: + /// + /// * Most fundamentally, it causes the fundamental atom of matching + /// to be a single codepoint. When Unicode mode is disabled, it's a + /// single byte. For example, when Unicode mode is enabled, `.` will + /// match `💩` once, where as it will match 4 times when Unicode mode + /// is disabled. (Since the UTF-8 encoding of `💩` is 4 bytes long.) + /// * Case insensitive matching uses Unicode simple case folding rules. + /// * Unicode character classes like `\p{Letter}` and `\p{Greek}` are + /// available. + /// * Perl character classes are Unicode aware. That is, `\w`, `\s` and + /// `\d`. + /// * The word boundary assertions, `\b` and `\B`, use the Unicode + /// definition of a word character. + /// + /// Note that unlike the top-level `RegexSet` for searching `&str`, + /// it is permitted to disable Unicode mode even if the resulting + /// pattern could match invalid UTF-8. For example, `(?-u:.)` is not + /// a valid pattern for a top-level `RegexSet`, but is valid for a + /// `bytes::RegexSet`. + /// + /// For more details on the Unicode support in this crate, see the + /// [Unicode section](crate#unicode) in this crate's top-level + /// documentation. + /// + /// The default for this is `true`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"\w"]) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(!re.is_match("δ".as_bytes())); + /// + /// let re = RegexSetBuilder::new([r"s"]) + /// .case_insensitive(true) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally 'ſ' is included when searching for 's' case + /// // insensitively due to Unicode's simple case folding rules. But + /// // when Unicode mode is disabled, only ASCII case insensitive rules + /// // are used. + /// assert!(!re.is_match("ſ".as_bytes())); + /// ``` + /// + /// Since this builder is for constructing a + /// [`bytes::RegexSet`](RegexSet), one can disable Unicode mode even if + /// it would match invalid UTF-8: + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"."]) + /// .unicode(false) + /// .build() + /// .unwrap(); + /// // Normally greek letters would be included in \w, but since + /// // Unicode mode is disabled, it only matches ASCII letters. + /// assert!(re.is_match(b"\xFF")); + /// ``` + pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.unicode(yes); + self + } + + /// This configures whether to enable case insensitive matching for all + /// of the patterns. + /// + /// This setting can also be configured using the inline flag `i` + /// in the pattern. For example, `(?i:foo)` matches `foo` case + /// insensitively while `(?-i:foo)` matches `foo` case sensitively. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"foo(?-i:bar)quux"]) + /// .case_insensitive(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"FoObarQuUx")); + /// // Even though case insensitive matching is enabled in the builder, + /// // it can be locally disabled within the pattern. In this case, + /// // `bar` is matched case sensitively. + /// assert!(!re.is_match(b"fooBARquux")); + /// ``` + pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.case_insensitive(yes); + self + } + + /// This configures multi-line mode for all of the patterns. + /// + /// Enabling multi-line mode changes the behavior of the `^` and `$` + /// anchor assertions. Instead of only matching at the beginning and + /// end of a haystack, respectively, multi-line mode causes them to + /// match at the beginning and end of a line *in addition* to the + /// beginning and end of a haystack. More precisely, `^` will match at + /// the position immediately following a `\n` and `$` will match at the + /// position immediately preceding a `\n`. + /// + /// The behavior of this option can be impacted by other settings too: + /// + /// * The [`RegexSetBuilder::line_terminator`] option changes `\n` + /// above to any ASCII byte. + /// * The [`RegexSetBuilder::crlf`] option changes the line terminator + /// to be either `\r` or `\n`, but never at the position between a `\r` + /// and `\n`. + /// + /// This setting can also be configured using the inline flag `m` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"\nfoo\n")); + /// ``` + pub fn multi_line(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.multi_line(yes); + self + } + + /// This configures dot-matches-new-line mode for the entire pattern. + /// + /// Perhaps surprisingly, the default behavior for `.` is not to match + /// any character, but rather, to match any character except for the + /// line terminator (which is `\n` by default). When this mode is + /// enabled, the behavior changes such that `.` truly matches any + /// character. + /// + /// This setting can also be configured using the inline flag `s` in + /// the pattern. For example, `(?s:.)` and `\p{any}` are equivalent + /// regexes. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"foo.bar"]) + /// .dot_matches_new_line(true) + /// .build() + /// .unwrap(); + /// let hay = b"foo\nbar"; + /// assert!(re.is_match(hay)); + /// ``` + pub fn dot_matches_new_line( + &mut self, + yes: bool, + ) -> &mut RegexSetBuilder { + self.builder.dot_matches_new_line(yes); + self + } + + /// This configures CRLF mode for all of the patterns. + /// + /// When CRLF mode is enabled, both `\r` ("carriage return" or CR for + /// short) and `\n` ("line feed" or LF for short) are treated as line + /// terminators. This results in the following: + /// + /// * Unless dot-matches-new-line mode is enabled, `.` will now match + /// any character except for `\n` and `\r`. + /// * When multi-line mode is enabled, `^` will match immediately + /// following a `\n` or a `\r`. Similarly, `$` will match immediately + /// preceding a `\n` or a `\r`. Neither `^` nor `$` will ever match + /// between `\r` and `\n`. + /// + /// This setting can also be configured using the inline flag `R` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// let hay = b"\r\nfoo\r\n"; + /// // If CRLF mode weren't enabled here, then '$' wouldn't match + /// // immediately after 'foo', and thus no match would be found. + /// assert!(re.is_match(hay)); + /// ``` + /// + /// This example demonstrates that `^` will never match at a position + /// between `\r` and `\n`. (`$` will similarly not match between a `\r` + /// and a `\n`.) + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^\n"]) + /// .multi_line(true) + /// .crlf(true) + /// .build() + /// .unwrap(); + /// assert!(!re.is_match(b"\r\n")); + /// ``` + pub fn crlf(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.crlf(yes); + self + } + + /// Configures the line terminator to be used by the regex. + /// + /// The line terminator is relevant in two ways for a particular regex: + /// + /// * When dot-matches-new-line mode is *not* enabled (the default), + /// then `.` will match any character except for the configured line + /// terminator. + /// * When multi-line mode is enabled (not the default), then `^` and + /// `$` will match immediately after and before, respectively, a line + /// terminator. + /// + /// In both cases, if CRLF mode is enabled in a particular context, + /// then it takes precedence over any configured line terminator. + /// + /// This option cannot be configured from within the pattern. + /// + /// The default line terminator is `\n`. + /// + /// # Example + /// + /// This shows how to treat the NUL byte as a line terminator. This can + /// be a useful heuristic when searching binary data. + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"^foo$"]) + /// .multi_line(true) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// let hay = b"\x00foo\x00"; + /// assert!(re.is_match(hay)); + /// ``` + /// + /// This example shows that the behavior of `.` is impacted by this + /// setting as well: + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let re = RegexSetBuilder::new([r"."]) + /// .line_terminator(b'\x00') + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"\n")); + /// assert!(!re.is_match(b"\x00")); + /// ``` + /// + /// This shows that building a regex will work even when the byte given + /// is not ASCII. This is unlike the top-level `RegexSet` API where + /// matching invalid UTF-8 is not allowed. + /// + /// Note though that you must disable Unicode mode. This is required + /// because Unicode mode requires matching one codepoint at a time, + /// and there is no way to match a non-ASCII byte as if it were a + /// codepoint. + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// assert!( + /// RegexSetBuilder::new([r"."]) + /// .unicode(false) + /// .line_terminator(0x80) + /// .build() + /// .is_ok(), + /// ); + /// ``` + pub fn line_terminator(&mut self, byte: u8) -> &mut RegexSetBuilder { + self.builder.line_terminator(byte); + self + } + + /// This configures swap-greed mode for all of the patterns. + /// + /// When swap-greed mode is enabled, patterns like `a+` will become + /// non-greedy and patterns like `a+?` will become greedy. In other + /// words, the meanings of `a+` and `a+?` are switched. + /// + /// This setting can also be configured using the inline flag `U` in + /// the pattern. + /// + /// Note that this is generally not useful for a `RegexSet` since a + /// `RegexSet` can only report whether a pattern matches or not. Since + /// greediness never impacts whether a match is found or not (only the + /// offsets of the match), it follows that whether parts of a pattern + /// are greedy or not doesn't matter for a `RegexSet`. + /// + /// The default for this is `false`. + pub fn swap_greed(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.swap_greed(yes); + self + } + + /// This configures verbose mode for all of the patterns. + /// + /// When enabled, whitespace will treated as insignifcant in the + /// pattern and `#` can be used to start a comment until the next new + /// line. + /// + /// Normally, in most places in a pattern, whitespace is treated + /// literally. For example ` +` will match one or more ASCII whitespace + /// characters. + /// + /// When verbose mode is enabled, `\#` can be used to match a literal + /// `#` and `\ ` can be used to match a literal ASCII whitespace + /// character. + /// + /// Verbose mode is useful for permitting regexes to be formatted and + /// broken up more nicely. This may make them more easily readable. + /// + /// This setting can also be configured using the inline flag `x` in + /// the pattern. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// let pat = r" + /// \b + /// (?\p{Uppercase}\w*) # always start with uppercase letter + /// [\s--\n]+ # whitespace should separate names + /// (?: # middle name can be an initial! + /// (?:(?\p{Uppercase})\.|(?\p{Uppercase}\w*)) + /// [\s--\n]+ + /// )? + /// (?\p{Uppercase}\w*) + /// \b + /// "; + /// let re = RegexSetBuilder::new([pat]) + /// .ignore_whitespace(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"Harry Potter")); + /// assert!(re.is_match(b"Harry J. Potter")); + /// assert!(re.is_match(b"Harry James Potter")); + /// assert!(!re.is_match(b"harry J. Potter")); + /// ``` + pub fn ignore_whitespace( + &mut self, + yes: bool, + ) -> &mut RegexSetBuilder { + self.builder.ignore_whitespace(yes); + self + } + + /// This configures octal mode for all of the patterns. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints + /// in a pattern. For example, `a`, `\x61`, `\u0061` and `\141` are all + /// equivalent patterns, where the last example shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, + /// it does make good error messages harder. That is, in PCRE based + /// regex engines, syntax like `\1` invokes a backreference, which is + /// explicitly unsupported this library. However, many users expect + /// backreferences to be supported. Therefore, when octal support + /// is disabled, the error message will explicitly mention that + /// backreferences aren't supported. + /// + /// The default for this is `false`. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// // Normally this pattern would not compile, with an error message + /// // about backreferences not being supported. But with octal mode + /// // enabled, octal escape sequences work. + /// let re = RegexSetBuilder::new([r"\141"]) + /// .octal(true) + /// .build() + /// .unwrap(); + /// assert!(re.is_match(b"a")); + /// ``` + pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.builder.octal(yes); + self + } + + /// Sets the approximate size limit, in bytes, of the compiled regex. + /// + /// This roughly corresponds to the number of heap memory, in + /// bytes, occupied by a single regex. If the regex would otherwise + /// approximately exceed this limit, then compiling that regex will + /// fail. + /// + /// The main utility of a method like this is to avoid compiling + /// regexes that use an unexpected amount of resources, such as + /// time and memory. Even if the memory usage of a large regex is + /// acceptable, its search time may not be. Namely, worst case time + /// complexity for search is `O(m * n)`, where `m ~ len(pattern)` and + /// `n ~ len(haystack)`. That is, search time depends, in part, on the + /// size of the compiled regex. This means that putting a limit on the + /// size of the regex limits how much a regex can impact search time. + /// + /// For more information about regex size limits, see the section on + /// [untrusted inputs](crate#untrusted-input) in the top-level crate + /// documentation. + /// + /// The default for this is some reasonable number that permits most + /// patterns to compile successfully. + /// + /// # Example + /// + /// ``` + /// # if !cfg!(target_pointer_width = "64") { return; } // see #1041 + /// use regex::bytes::RegexSetBuilder; + /// + /// // It may surprise you how big some seemingly small patterns can + /// // be! Since \w is Unicode aware, this generates a regex that can + /// // match approximately 140,000 distinct codepoints. + /// assert!( + /// RegexSetBuilder::new([r"\w"]) + /// .size_limit(45_000) + /// .build() + /// .is_err() + /// ); + /// ``` + pub fn size_limit(&mut self, bytes: usize) -> &mut RegexSetBuilder { + self.builder.size_limit(bytes); + self + } + + /// Set the approximate capacity, in bytes, of the cache of transitions + /// used by the lazy DFA. + /// + /// While the lazy DFA isn't always used, in tends to be the most + /// commonly use regex engine in default configurations. It tends to + /// adopt the performance profile of a fully build DFA, but without the + /// downside of taking worst case exponential time to build. + /// + /// The downside is that it needs to keep a cache of transitions and + /// states that are built while running a search, and this cache + /// can fill up. When it fills up, the cache will reset itself. Any + /// previously generated states and transitions will then need to be + /// re-generated. If this happens too many times, then this library + /// will bail out of using the lazy DFA and switch to a different regex + /// engine. + /// + /// If your regex provokes this particular downside of the lazy DFA, + /// then it may be beneficial to increase its cache capacity. This will + /// potentially reduce the frequency of cache resetting (ideally to + /// `0`). While it won't fix all potential performance problems with + /// the lazy DFA, increasing the cache capacity does fix some. + /// + /// There is no easy way to determine, a priori, whether increasing + /// this cache capacity will help. In general, the larger your regex, + /// the more cache it's likely to use. But that isn't an ironclad rule. + /// For example, a regex like `[01]*1[01]{N}` would normally produce a + /// fully build DFA that is exponential in size with respect to `N`. + /// The lazy DFA will prevent exponential space blow-up, but it cache + /// is likely to fill up, even when it's large and even for smallish + /// values of `N`. + /// + /// If you aren't sure whether this helps or not, it is sensible to + /// set this to some arbitrarily large number in testing, such as + /// `usize::MAX`. Namely, this represents the amount of capacity that + /// *may* be used. It's probably not a good idea to use `usize::MAX` in + /// production though, since it implies there are no controls on heap + /// memory used by this library during a search. In effect, set it to + /// whatever you're willing to allocate for a single regex search. + pub fn dfa_size_limit( + &mut self, + bytes: usize, + ) -> &mut RegexSetBuilder { + self.builder.dfa_size_limit(bytes); + self + } + + /// Set the nesting limit for this parser. + /// + /// The nesting limit controls how deep the abstract syntax tree is + /// allowed to be. If the AST exceeds the given limit (e.g., with too + /// many nested groups), then an error is returned by the parser. + /// + /// The purpose of this limit is to act as a heuristic to prevent stack + /// overflow for consumers that do structural induction on an AST using + /// explicit recursion. While this crate never does this (instead using + /// constant stack space and moving the call stack to the heap), other + /// crates may. + /// + /// This limit is not checked until the entire AST is parsed. + /// Therefore, if callers want to put a limit on the amount of heap + /// space used, then they should impose a limit on the length, in + /// bytes, of the concrete pattern string. In particular, this is + /// viable since this parser implementation will limit itself to heap + /// space proportional to the length of the pattern string. See also + /// the [untrusted inputs](crate#untrusted-input) section in the + /// top-level crate documentation for more information about this. + /// + /// Note that a nest limit of `0` will return a nest limit error for + /// most patterns but not all. For example, a nest limit of `0` permits + /// `a` but not `ab`, since `ab` requires an explicit concatenation, + /// which results in a nest depth of `1`. In general, a nest limit is + /// not something that manifests in an obvious way in the concrete + /// syntax, therefore, it should not be used in a granular way. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSetBuilder; + /// + /// assert!(RegexSetBuilder::new([r"a"]).nest_limit(0).build().is_ok()); + /// assert!(RegexSetBuilder::new([r"ab"]).nest_limit(0).build().is_err()); + /// ``` + pub fn nest_limit(&mut self, limit: u32) -> &mut RegexSetBuilder { + self.builder.nest_limit(limit); + self + } + } +} diff --git a/vendor/regex/src/bytes.rs b/vendor/regex/src/bytes.rs new file mode 100644 index 0000000..383ac4a --- /dev/null +++ b/vendor/regex/src/bytes.rs @@ -0,0 +1,91 @@ +/*! +Search for regex matches in `&[u8]` haystacks. + +This module provides a nearly identical API via [`Regex`] to the one found in +the top-level of this crate. There are two important differences: + +1. Matching is done on `&[u8]` instead of `&str`. Additionally, `Vec` +is used where `String` would have been used in the top-level API. +2. Unicode support can be disabled even when disabling it would result in +matching invalid UTF-8 bytes. + +# Example: match null terminated string + +This shows how to find all null-terminated strings in a slice of bytes. This +works even if a C string contains invalid UTF-8. + +```rust +use regex::bytes::Regex; + +let re = Regex::new(r"(?-u)(?[^\x00]+)\x00").unwrap(); +let hay = b"foo\x00qu\xFFux\x00baz\x00"; + +// Extract all of the strings without the NUL terminator from each match. +// The unwrap is OK here since a match requires the `cstr` capture to match. +let cstrs: Vec<&[u8]> = + re.captures_iter(hay) + .map(|c| c.name("cstr").unwrap().as_bytes()) + .collect(); +assert_eq!(cstrs, vec![&b"foo"[..], &b"qu\xFFux"[..], &b"baz"[..]]); +``` + +# Example: selectively enable Unicode support + +This shows how to match an arbitrary byte pattern followed by a UTF-8 encoded +string (e.g., to extract a title from a Matroska file): + +```rust +use regex::bytes::Regex; + +let re = Regex::new( + r"(?-u)\x7b\xa9(?:[\x80-\xfe]|[\x40-\xff].)(?u:(.*))" +).unwrap(); +let hay = b"\x12\xd0\x3b\x5f\x7b\xa9\x85\xe2\x98\x83\x80\x98\x54\x76\x68\x65"; + +// Notice that despite the `.*` at the end, it will only match valid UTF-8 +// because Unicode mode was enabled with the `u` flag. Without the `u` flag, +// the `.*` would match the rest of the bytes regardless of whether they were +// valid UTF-8. +let (_, [title]) = re.captures(hay).unwrap().extract(); +assert_eq!(title, b"\xE2\x98\x83"); +// We can UTF-8 decode the title now. And the unwrap here +// is correct because the existence of a match guarantees +// that `title` is valid UTF-8. +let title = std::str::from_utf8(title).unwrap(); +assert_eq!(title, "☃"); +``` + +In general, if the Unicode flag is enabled in a capture group and that capture +is part of the overall match, then the capture is *guaranteed* to be valid +UTF-8. + +# Syntax + +The supported syntax is pretty much the same as the syntax for Unicode +regular expressions with a few changes that make sense for matching arbitrary +bytes: + +1. The `u` flag can be disabled even when disabling it might cause the regex to +match invalid UTF-8. When the `u` flag is disabled, the regex is said to be in +"ASCII compatible" mode. +2. In ASCII compatible mode, Unicode character classes are not allowed. Literal +Unicode scalar values outside of character classes are allowed. +3. In ASCII compatible mode, Perl character classes (`\w`, `\d` and `\s`) +revert to their typical ASCII definition. `\w` maps to `[[:word:]]`, `\d` maps +to `[[:digit:]]` and `\s` maps to `[[:space:]]`. +4. In ASCII compatible mode, word boundaries use the ASCII compatible `\w` to +determine whether a byte is a word byte or not. +5. Hexadecimal notation can be used to specify arbitrary bytes instead of +Unicode codepoints. For example, in ASCII compatible mode, `\xFF` matches the +literal byte `\xFF`, while in Unicode mode, `\xFF` is the Unicode codepoint +`U+00FF` that matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal +notation when enabled. +6. In ASCII compatible mode, `.` matches any *byte* except for `\n`. When the +`s` flag is additionally enabled, `.` matches any byte. + +# Performance + +In general, one should expect performance on `&[u8]` to be roughly similar to +performance on `&str`. +*/ +pub use crate::{builders::bytes::*, regex::bytes::*, regexset::bytes::*}; diff --git a/vendor/regex/src/compile.rs b/vendor/regex/src/compile.rs deleted file mode 100644 index 0030cfb..0000000 --- a/vendor/regex/src/compile.rs +++ /dev/null @@ -1,1324 +0,0 @@ -use std::collections::HashMap; -use std::fmt; -use std::iter; -use std::result; -use std::sync::Arc; - -use regex_syntax::hir::{self, Hir, Look}; -use regex_syntax::is_word_byte; -use regex_syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences}; - -use crate::prog::{ - EmptyLook, Inst, InstBytes, InstChar, InstEmptyLook, InstPtr, InstRanges, - InstSave, InstSplit, Program, -}; - -use crate::Error; - -type Result = result::Result; -type ResultOrEmpty = result::Result, Error>; - -#[derive(Debug)] -struct Patch { - hole: Hole, - entry: InstPtr, -} - -/// A compiler translates a regular expression AST to a sequence of -/// instructions. The sequence of instructions represents an NFA. -// `Compiler` is only public via the `internal` module, so avoid deriving -// `Debug`. -#[allow(missing_debug_implementations)] -pub struct Compiler { - insts: Vec, - compiled: Program, - capture_name_idx: HashMap, - num_exprs: usize, - size_limit: usize, - suffix_cache: SuffixCache, - utf8_seqs: Option, - byte_classes: ByteClassSet, - // This keeps track of extra bytes allocated while compiling the regex - // program. Currently, this corresponds to two things. First is the heap - // memory allocated by Unicode character classes ('InstRanges'). Second is - // a "fake" amount of memory used by empty sub-expressions, so that enough - // empty sub-expressions will ultimately trigger the compiler to bail - // because of a size limit restriction. (That empty sub-expressions don't - // add to heap memory usage is more-or-less an implementation detail.) In - // the second case, if we don't bail, then an excessively large repetition - // on an empty sub-expression can result in the compiler using a very large - // amount of CPU time. - extra_inst_bytes: usize, -} - -impl Compiler { - /// Create a new regular expression compiler. - /// - /// Various options can be set before calling `compile` on an expression. - pub fn new() -> Self { - Compiler { - insts: vec![], - compiled: Program::new(), - capture_name_idx: HashMap::new(), - num_exprs: 0, - size_limit: 10 * (1 << 20), - suffix_cache: SuffixCache::new(1000), - utf8_seqs: Some(Utf8Sequences::new('\x00', '\x00')), - byte_classes: ByteClassSet::new(), - extra_inst_bytes: 0, - } - } - - /// The size of the resulting program is limited by size_limit. If - /// the program approximately exceeds the given size (in bytes), then - /// compilation will stop and return an error. - pub fn size_limit(mut self, size_limit: usize) -> Self { - self.size_limit = size_limit; - self - } - - /// If bytes is true, then the program is compiled as a byte based - /// automaton, which incorporates UTF-8 decoding into the machine. If it's - /// false, then the automaton is Unicode scalar value based, e.g., an - /// engine utilizing such an automaton is responsible for UTF-8 decoding. - /// - /// The specific invariant is that when returning a byte based machine, - /// the neither the `Char` nor `Ranges` instructions are produced. - /// Conversely, when producing a Unicode scalar value machine, the `Bytes` - /// instruction is never produced. - /// - /// Note that `dfa(true)` implies `bytes(true)`. - pub fn bytes(mut self, yes: bool) -> Self { - self.compiled.is_bytes = yes; - self - } - - /// When disabled, the program compiled may match arbitrary bytes. - /// - /// When enabled (the default), all compiled programs exclusively match - /// valid UTF-8 bytes. - pub fn only_utf8(mut self, yes: bool) -> Self { - self.compiled.only_utf8 = yes; - self - } - - /// When set, the machine returned is suitable for use in the DFA matching - /// engine. - /// - /// In particular, this ensures that if the regex is not anchored in the - /// beginning, then a preceding `.*?` is included in the program. (The NFA - /// based engines handle the preceding `.*?` explicitly, which is difficult - /// or impossible in the DFA engine.) - pub fn dfa(mut self, yes: bool) -> Self { - self.compiled.is_dfa = yes; - self - } - - /// When set, the machine returned is suitable for matching text in - /// reverse. In particular, all concatenations are flipped. - pub fn reverse(mut self, yes: bool) -> Self { - self.compiled.is_reverse = yes; - self - } - - /// Compile a regular expression given its AST. - /// - /// The compiler is guaranteed to succeed unless the program exceeds the - /// specified size limit. If the size limit is exceeded, then compilation - /// stops and returns an error. - pub fn compile(mut self, exprs: &[Hir]) -> result::Result { - debug_assert!(!exprs.is_empty()); - self.num_exprs = exprs.len(); - if exprs.len() == 1 { - self.compile_one(&exprs[0]) - } else { - self.compile_many(exprs) - } - } - - fn compile_one(mut self, expr: &Hir) -> result::Result { - // If we're compiling a forward DFA and we aren't anchored, then - // add a `.*?` before the first capture group. - // Other matching engines handle this by baking the logic into the - // matching engine itself. - let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 }; - self.compiled.is_anchored_start = - expr.properties().look_set_prefix().contains(Look::Start); - self.compiled.is_anchored_end = - expr.properties().look_set_suffix().contains(Look::End); - if self.compiled.needs_dotstar() { - dotstar_patch = self.c_dotstar()?; - self.compiled.start = dotstar_patch.entry; - } - self.compiled.captures = vec![None]; - let patch = - self.c_capture(0, expr)?.unwrap_or_else(|| self.next_inst()); - if self.compiled.needs_dotstar() { - self.fill(dotstar_patch.hole, patch.entry); - } else { - self.compiled.start = patch.entry; - } - self.fill_to_next(patch.hole); - self.compiled.matches = vec![self.insts.len()]; - self.push_compiled(Inst::Match(0)); - self.compiled.static_captures_len = - expr.properties().static_explicit_captures_len(); - self.compile_finish() - } - - fn compile_many( - mut self, - exprs: &[Hir], - ) -> result::Result { - debug_assert!(exprs.len() > 1); - - self.compiled.is_anchored_start = exprs - .iter() - .all(|e| e.properties().look_set_prefix().contains(Look::Start)); - self.compiled.is_anchored_end = exprs - .iter() - .all(|e| e.properties().look_set_suffix().contains(Look::End)); - let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 }; - if self.compiled.needs_dotstar() { - dotstar_patch = self.c_dotstar()?; - self.compiled.start = dotstar_patch.entry; - } else { - self.compiled.start = 0; // first instruction is always split - } - self.fill_to_next(dotstar_patch.hole); - - let mut prev_hole = Hole::None; - for (i, expr) in exprs[0..exprs.len() - 1].iter().enumerate() { - self.fill_to_next(prev_hole); - let split = self.push_split_hole(); - let Patch { hole, entry } = - self.c_capture(0, expr)?.unwrap_or_else(|| self.next_inst()); - self.fill_to_next(hole); - self.compiled.matches.push(self.insts.len()); - self.push_compiled(Inst::Match(i)); - prev_hole = self.fill_split(split, Some(entry), None); - } - let i = exprs.len() - 1; - let Patch { hole, entry } = - self.c_capture(0, &exprs[i])?.unwrap_or_else(|| self.next_inst()); - self.fill(prev_hole, entry); - self.fill_to_next(hole); - self.compiled.matches.push(self.insts.len()); - self.push_compiled(Inst::Match(i)); - self.compile_finish() - } - - fn compile_finish(mut self) -> result::Result { - self.compiled.insts = - self.insts.into_iter().map(|inst| inst.unwrap()).collect(); - self.compiled.byte_classes = self.byte_classes.byte_classes(); - self.compiled.capture_name_idx = Arc::new(self.capture_name_idx); - Ok(self.compiled) - } - - /// Compile expr into self.insts, returning a patch on success, - /// or an error if we run out of memory. - /// - /// All of the c_* methods of the compiler share the contract outlined - /// here. - /// - /// The main thing that a c_* method does is mutate `self.insts` - /// to add a list of mostly compiled instructions required to execute - /// the given expression. `self.insts` contains MaybeInsts rather than - /// Insts because there is some backpatching required. - /// - /// The `Patch` value returned by each c_* method provides metadata - /// about the compiled instructions emitted to `self.insts`. The - /// `entry` member of the patch refers to the first instruction - /// (the entry point), while the `hole` member contains zero or - /// more offsets to partial instructions that need to be backpatched. - /// The c_* routine can't know where its list of instructions are going to - /// jump to after execution, so it is up to the caller to patch - /// these jumps to point to the right place. So compiling some - /// expression, e, we would end up with a situation that looked like: - /// - /// ```text - /// self.insts = [ ..., i1, i2, ..., iexit1, ..., iexitn, ...] - /// ^ ^ ^ - /// | \ / - /// entry \ / - /// hole - /// ``` - /// - /// To compile two expressions, e1 and e2, concatenated together we - /// would do: - /// - /// ```ignore - /// let patch1 = self.c(e1); - /// let patch2 = self.c(e2); - /// ``` - /// - /// while leaves us with a situation that looks like - /// - /// ```text - /// self.insts = [ ..., i1, ..., iexit1, ..., i2, ..., iexit2 ] - /// ^ ^ ^ ^ - /// | | | | - /// entry1 hole1 entry2 hole2 - /// ``` - /// - /// Then to merge the two patches together into one we would backpatch - /// hole1 with entry2 and return a new patch that enters at entry1 - /// and has hole2 for a hole. In fact, if you look at the c_concat - /// method you will see that it does exactly this, though it handles - /// a list of expressions rather than just the two that we use for - /// an example. - /// - /// Ok(None) is returned when an expression is compiled to no - /// instruction, and so no patch.entry value makes sense. - fn c(&mut self, expr: &Hir) -> ResultOrEmpty { - use crate::prog; - use regex_syntax::hir::HirKind::*; - - self.check_size()?; - match *expr.kind() { - Empty => self.c_empty(), - Literal(hir::Literal(ref bytes)) => { - if self.compiled.is_reverse { - let mut bytes = bytes.to_vec(); - bytes.reverse(); - self.c_literal(&bytes) - } else { - self.c_literal(bytes) - } - } - Class(hir::Class::Unicode(ref cls)) => self.c_class(cls.ranges()), - Class(hir::Class::Bytes(ref cls)) => { - if self.compiled.uses_bytes() { - self.c_class_bytes(cls.ranges()) - } else { - assert!(cls.is_ascii()); - let mut char_ranges = vec![]; - for r in cls.iter() { - let (s, e) = (r.start() as char, r.end() as char); - char_ranges.push(hir::ClassUnicodeRange::new(s, e)); - } - self.c_class(&char_ranges) - } - } - Look(ref look) => match *look { - hir::Look::Start if self.compiled.is_reverse => { - self.c_empty_look(prog::EmptyLook::EndText) - } - hir::Look::Start => { - self.c_empty_look(prog::EmptyLook::StartText) - } - hir::Look::End if self.compiled.is_reverse => { - self.c_empty_look(prog::EmptyLook::StartText) - } - hir::Look::End => self.c_empty_look(prog::EmptyLook::EndText), - hir::Look::StartLF if self.compiled.is_reverse => { - self.byte_classes.set_range(b'\n', b'\n'); - self.c_empty_look(prog::EmptyLook::EndLine) - } - hir::Look::StartLF => { - self.byte_classes.set_range(b'\n', b'\n'); - self.c_empty_look(prog::EmptyLook::StartLine) - } - hir::Look::EndLF if self.compiled.is_reverse => { - self.byte_classes.set_range(b'\n', b'\n'); - self.c_empty_look(prog::EmptyLook::StartLine) - } - hir::Look::EndLF => { - self.byte_classes.set_range(b'\n', b'\n'); - self.c_empty_look(prog::EmptyLook::EndLine) - } - hir::Look::StartCRLF | hir::Look::EndCRLF => { - return Err(Error::Syntax( - "CRLF-aware line anchors are not supported yet" - .to_string(), - )); - } - hir::Look::WordAscii => { - self.byte_classes.set_word_boundary(); - self.c_empty_look(prog::EmptyLook::WordBoundaryAscii) - } - hir::Look::WordAsciiNegate => { - self.byte_classes.set_word_boundary(); - self.c_empty_look(prog::EmptyLook::NotWordBoundaryAscii) - } - hir::Look::WordUnicode => { - if !cfg!(feature = "unicode-perl") { - return Err(Error::Syntax( - "Unicode word boundaries are unavailable when \ - the unicode-perl feature is disabled" - .to_string(), - )); - } - self.compiled.has_unicode_word_boundary = true; - self.byte_classes.set_word_boundary(); - // We also make sure that all ASCII bytes are in a different - // class from non-ASCII bytes. Otherwise, it's possible for - // ASCII bytes to get lumped into the same class as non-ASCII - // bytes. This in turn may cause the lazy DFA to falsely start - // when it sees an ASCII byte that maps to a byte class with - // non-ASCII bytes. This ensures that never happens. - self.byte_classes.set_range(0, 0x7F); - self.c_empty_look(prog::EmptyLook::WordBoundary) - } - hir::Look::WordUnicodeNegate => { - if !cfg!(feature = "unicode-perl") { - return Err(Error::Syntax( - "Unicode word boundaries are unavailable when \ - the unicode-perl feature is disabled" - .to_string(), - )); - } - self.compiled.has_unicode_word_boundary = true; - self.byte_classes.set_word_boundary(); - // See comments above for why we set the ASCII range here. - self.byte_classes.set_range(0, 0x7F); - self.c_empty_look(prog::EmptyLook::NotWordBoundary) - } - }, - Capture(hir::Capture { index, ref name, ref sub }) => { - if index as usize >= self.compiled.captures.len() { - let name = match *name { - None => None, - Some(ref boxed_str) => Some(boxed_str.to_string()), - }; - self.compiled.captures.push(name.clone()); - if let Some(name) = name { - self.capture_name_idx.insert(name, index as usize); - } - } - self.c_capture(2 * index as usize, sub) - } - Concat(ref es) => { - if self.compiled.is_reverse { - self.c_concat(es.iter().rev()) - } else { - self.c_concat(es) - } - } - Alternation(ref es) => self.c_alternate(&**es), - Repetition(ref rep) => self.c_repeat(rep), - } - } - - fn c_empty(&mut self) -> ResultOrEmpty { - // See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8 - // See: CVE-2022-24713 - // - // Since 'empty' sub-expressions don't increase the size of - // the actual compiled object, we "fake" an increase in its - // size so that our 'check_size_limit' routine will eventually - // stop compilation if there are too many empty sub-expressions - // (e.g., via a large repetition). - self.extra_inst_bytes += std::mem::size_of::(); - Ok(None) - } - - fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> ResultOrEmpty { - if self.num_exprs > 1 || self.compiled.is_dfa { - // Don't ever compile Save instructions for regex sets because - // they are never used. They are also never used in DFA programs - // because DFAs can't handle captures. - self.c(expr) - } else { - let entry = self.insts.len(); - let hole = self.push_hole(InstHole::Save { slot: first_slot }); - let patch = self.c(expr)?.unwrap_or_else(|| self.next_inst()); - self.fill(hole, patch.entry); - self.fill_to_next(patch.hole); - let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 }); - Ok(Some(Patch { hole, entry })) - } - } - - fn c_dotstar(&mut self) -> Result { - let hir = if self.compiled.only_utf8() { - Hir::dot(hir::Dot::AnyChar) - } else { - Hir::dot(hir::Dot::AnyByte) - }; - Ok(self - .c(&Hir::repetition(hir::Repetition { - min: 0, - max: None, - greedy: false, - sub: Box::new(hir), - }))? - .unwrap()) - } - - fn c_char(&mut self, c: char) -> ResultOrEmpty { - if self.compiled.uses_bytes() { - if c.is_ascii() { - let b = c as u8; - let hole = - self.push_hole(InstHole::Bytes { start: b, end: b }); - self.byte_classes.set_range(b, b); - Ok(Some(Patch { hole, entry: self.insts.len() - 1 })) - } else { - self.c_class(&[hir::ClassUnicodeRange::new(c, c)]) - } - } else { - let hole = self.push_hole(InstHole::Char { c }); - Ok(Some(Patch { hole, entry: self.insts.len() - 1 })) - } - } - - fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> ResultOrEmpty { - use std::mem::size_of; - - if ranges.is_empty() { - return Err(Error::Syntax( - "empty character classes are not allowed".to_string(), - )); - } - if self.compiled.uses_bytes() { - Ok(Some(CompileClass { c: self, ranges }.compile()?)) - } else { - let ranges: Vec<(char, char)> = - ranges.iter().map(|r| (r.start(), r.end())).collect(); - let hole = if ranges.len() == 1 && ranges[0].0 == ranges[0].1 { - self.push_hole(InstHole::Char { c: ranges[0].0 }) - } else { - self.extra_inst_bytes += - ranges.len() * (size_of::() * 2); - self.push_hole(InstHole::Ranges { ranges }) - }; - Ok(Some(Patch { hole, entry: self.insts.len() - 1 })) - } - } - - fn c_byte(&mut self, b: u8) -> ResultOrEmpty { - self.c_class_bytes(&[hir::ClassBytesRange::new(b, b)]) - } - - fn c_class_bytes( - &mut self, - ranges: &[hir::ClassBytesRange], - ) -> ResultOrEmpty { - if ranges.is_empty() { - return Err(Error::Syntax( - "empty character classes are not allowed".to_string(), - )); - } - - let first_split_entry = self.insts.len(); - let mut holes = vec![]; - let mut prev_hole = Hole::None; - for r in &ranges[0..ranges.len() - 1] { - self.fill_to_next(prev_hole); - let split = self.push_split_hole(); - let next = self.insts.len(); - self.byte_classes.set_range(r.start(), r.end()); - holes.push(self.push_hole(InstHole::Bytes { - start: r.start(), - end: r.end(), - })); - prev_hole = self.fill_split(split, Some(next), None); - } - let next = self.insts.len(); - let r = &ranges[ranges.len() - 1]; - self.byte_classes.set_range(r.start(), r.end()); - holes.push( - self.push_hole(InstHole::Bytes { start: r.start(), end: r.end() }), - ); - self.fill(prev_hole, next); - Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry })) - } - - fn c_empty_look(&mut self, look: EmptyLook) -> ResultOrEmpty { - let hole = self.push_hole(InstHole::EmptyLook { look }); - Ok(Some(Patch { hole, entry: self.insts.len() - 1 })) - } - - fn c_literal(&mut self, bytes: &[u8]) -> ResultOrEmpty { - match core::str::from_utf8(bytes) { - Ok(string) => { - let mut it = string.chars(); - let Patch { mut hole, entry } = loop { - match it.next() { - None => return self.c_empty(), - Some(ch) => { - if let Some(p) = self.c_char(ch)? { - break p; - } - } - } - }; - for ch in it { - if let Some(p) = self.c_char(ch)? { - self.fill(hole, p.entry); - hole = p.hole; - } - } - Ok(Some(Patch { hole, entry })) - } - Err(_) => { - assert!(self.compiled.uses_bytes()); - let mut it = bytes.iter().copied(); - let Patch { mut hole, entry } = loop { - match it.next() { - None => return self.c_empty(), - Some(byte) => { - if let Some(p) = self.c_byte(byte)? { - break p; - } - } - } - }; - for byte in it { - if let Some(p) = self.c_byte(byte)? { - self.fill(hole, p.entry); - hole = p.hole; - } - } - Ok(Some(Patch { hole, entry })) - } - } - } - - fn c_concat<'a, I>(&mut self, exprs: I) -> ResultOrEmpty - where - I: IntoIterator, - { - let mut exprs = exprs.into_iter(); - let Patch { mut hole, entry } = loop { - match exprs.next() { - None => return self.c_empty(), - Some(e) => { - if let Some(p) = self.c(e)? { - break p; - } - } - } - }; - for e in exprs { - if let Some(p) = self.c(e)? { - self.fill(hole, p.entry); - hole = p.hole; - } - } - Ok(Some(Patch { hole, entry })) - } - - fn c_alternate(&mut self, exprs: &[Hir]) -> ResultOrEmpty { - debug_assert!( - exprs.len() >= 2, - "alternates must have at least 2 exprs" - ); - - // Initial entry point is always the first split. - let first_split_entry = self.insts.len(); - - // Save up all of the holes from each alternate. They will all get - // patched to point to the same location. - let mut holes = vec![]; - - // true indicates that the hole is a split where we want to fill - // the second branch. - let mut prev_hole = (Hole::None, false); - for e in &exprs[0..exprs.len() - 1] { - if prev_hole.1 { - let next = self.insts.len(); - self.fill_split(prev_hole.0, None, Some(next)); - } else { - self.fill_to_next(prev_hole.0); - } - let split = self.push_split_hole(); - if let Some(Patch { hole, entry }) = self.c(e)? { - holes.push(hole); - prev_hole = (self.fill_split(split, Some(entry), None), false); - } else { - let (split1, split2) = split.dup_one(); - holes.push(split1); - prev_hole = (split2, true); - } - } - if let Some(Patch { hole, entry }) = self.c(&exprs[exprs.len() - 1])? { - holes.push(hole); - if prev_hole.1 { - self.fill_split(prev_hole.0, None, Some(entry)); - } else { - self.fill(prev_hole.0, entry); - } - } else { - // We ignore prev_hole.1. When it's true, it means we have two - // empty branches both pushing prev_hole.0 into holes, so both - // branches will go to the same place anyway. - holes.push(prev_hole.0); - } - Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry })) - } - - fn c_repeat(&mut self, rep: &hir::Repetition) -> ResultOrEmpty { - match (rep.min, rep.max) { - (0, Some(1)) => self.c_repeat_zero_or_one(&rep.sub, rep.greedy), - (0, None) => self.c_repeat_zero_or_more(&rep.sub, rep.greedy), - (1, None) => self.c_repeat_one_or_more(&rep.sub, rep.greedy), - (min, None) => { - self.c_repeat_range_min_or_more(&rep.sub, rep.greedy, min) - } - (min, Some(max)) => { - self.c_repeat_range(&rep.sub, rep.greedy, min, max) - } - } - } - - fn c_repeat_zero_or_one( - &mut self, - expr: &Hir, - greedy: bool, - ) -> ResultOrEmpty { - let split_entry = self.insts.len(); - let split = self.push_split_hole(); - let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? { - Some(p) => p, - None => return self.pop_split_hole(), - }; - let split_hole = if greedy { - self.fill_split(split, Some(entry_rep), None) - } else { - self.fill_split(split, None, Some(entry_rep)) - }; - let holes = vec![hole_rep, split_hole]; - Ok(Some(Patch { hole: Hole::Many(holes), entry: split_entry })) - } - - fn c_repeat_zero_or_more( - &mut self, - expr: &Hir, - greedy: bool, - ) -> ResultOrEmpty { - let split_entry = self.insts.len(); - let split = self.push_split_hole(); - let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? { - Some(p) => p, - None => return self.pop_split_hole(), - }; - - self.fill(hole_rep, split_entry); - let split_hole = if greedy { - self.fill_split(split, Some(entry_rep), None) - } else { - self.fill_split(split, None, Some(entry_rep)) - }; - Ok(Some(Patch { hole: split_hole, entry: split_entry })) - } - - fn c_repeat_one_or_more( - &mut self, - expr: &Hir, - greedy: bool, - ) -> ResultOrEmpty { - let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? { - Some(p) => p, - None => return Ok(None), - }; - self.fill_to_next(hole_rep); - let split = self.push_split_hole(); - - let split_hole = if greedy { - self.fill_split(split, Some(entry_rep), None) - } else { - self.fill_split(split, None, Some(entry_rep)) - }; - Ok(Some(Patch { hole: split_hole, entry: entry_rep })) - } - - fn c_repeat_range_min_or_more( - &mut self, - expr: &Hir, - greedy: bool, - min: u32, - ) -> ResultOrEmpty { - let min = u32_to_usize(min); - // Using next_inst() is ok, because we can't return it (concat would - // have to return Some(_) while c_repeat_range_min_or_more returns - // None). - let patch_concat = self - .c_concat(iter::repeat(expr).take(min))? - .unwrap_or_else(|| self.next_inst()); - if let Some(patch_rep) = self.c_repeat_zero_or_more(expr, greedy)? { - self.fill(patch_concat.hole, patch_rep.entry); - Ok(Some(Patch { hole: patch_rep.hole, entry: patch_concat.entry })) - } else { - Ok(None) - } - } - - fn c_repeat_range( - &mut self, - expr: &Hir, - greedy: bool, - min: u32, - max: u32, - ) -> ResultOrEmpty { - let (min, max) = (u32_to_usize(min), u32_to_usize(max)); - debug_assert!(min <= max); - let patch_concat = self.c_concat(iter::repeat(expr).take(min))?; - if min == max { - return Ok(patch_concat); - } - // Same reasoning as in c_repeat_range_min_or_more (we know that min < - // max at this point). - let patch_concat = patch_concat.unwrap_or_else(|| self.next_inst()); - let initial_entry = patch_concat.entry; - // It is much simpler to compile, e.g., `a{2,5}` as: - // - // aaa?a?a? - // - // But you end up with a sequence of instructions like this: - // - // 0: 'a' - // 1: 'a', - // 2: split(3, 4) - // 3: 'a' - // 4: split(5, 6) - // 5: 'a' - // 6: split(7, 8) - // 7: 'a' - // 8: MATCH - // - // This is *incredibly* inefficient because the splits end - // up forming a chain, which has to be resolved everything a - // transition is followed. - let mut holes = vec![]; - let mut prev_hole = patch_concat.hole; - for _ in min..max { - self.fill_to_next(prev_hole); - let split = self.push_split_hole(); - let Patch { hole, entry } = match self.c(expr)? { - Some(p) => p, - None => return self.pop_split_hole(), - }; - prev_hole = hole; - if greedy { - holes.push(self.fill_split(split, Some(entry), None)); - } else { - holes.push(self.fill_split(split, None, Some(entry))); - } - } - holes.push(prev_hole); - Ok(Some(Patch { hole: Hole::Many(holes), entry: initial_entry })) - } - - /// Can be used as a default value for the c_* functions when the call to - /// c_function is followed by inserting at least one instruction that is - /// always executed after the ones written by the c* function. - fn next_inst(&self) -> Patch { - Patch { hole: Hole::None, entry: self.insts.len() } - } - - fn fill(&mut self, hole: Hole, goto: InstPtr) { - match hole { - Hole::None => {} - Hole::One(pc) => { - self.insts[pc].fill(goto); - } - Hole::Many(holes) => { - for hole in holes { - self.fill(hole, goto); - } - } - } - } - - fn fill_to_next(&mut self, hole: Hole) { - let next = self.insts.len(); - self.fill(hole, next); - } - - fn fill_split( - &mut self, - hole: Hole, - goto1: Option, - goto2: Option, - ) -> Hole { - match hole { - Hole::None => Hole::None, - Hole::One(pc) => match (goto1, goto2) { - (Some(goto1), Some(goto2)) => { - self.insts[pc].fill_split(goto1, goto2); - Hole::None - } - (Some(goto1), None) => { - self.insts[pc].half_fill_split_goto1(goto1); - Hole::One(pc) - } - (None, Some(goto2)) => { - self.insts[pc].half_fill_split_goto2(goto2); - Hole::One(pc) - } - (None, None) => unreachable!( - "at least one of the split \ - holes must be filled" - ), - }, - Hole::Many(holes) => { - let mut new_holes = vec![]; - for hole in holes { - new_holes.push(self.fill_split(hole, goto1, goto2)); - } - if new_holes.is_empty() { - Hole::None - } else if new_holes.len() == 1 { - new_holes.pop().unwrap() - } else { - Hole::Many(new_holes) - } - } - } - } - - fn push_compiled(&mut self, inst: Inst) { - self.insts.push(MaybeInst::Compiled(inst)); - } - - fn push_hole(&mut self, inst: InstHole) -> Hole { - let hole = self.insts.len(); - self.insts.push(MaybeInst::Uncompiled(inst)); - Hole::One(hole) - } - - fn push_split_hole(&mut self) -> Hole { - let hole = self.insts.len(); - self.insts.push(MaybeInst::Split); - Hole::One(hole) - } - - fn pop_split_hole(&mut self) -> ResultOrEmpty { - self.insts.pop(); - Ok(None) - } - - fn check_size(&self) -> result::Result<(), Error> { - use std::mem::size_of; - - let size = - self.extra_inst_bytes + (self.insts.len() * size_of::()); - if size > self.size_limit { - Err(Error::CompiledTooBig(self.size_limit)) - } else { - Ok(()) - } - } -} - -#[derive(Debug)] -enum Hole { - None, - One(InstPtr), - Many(Vec), -} - -impl Hole { - fn dup_one(self) -> (Self, Self) { - match self { - Hole::One(pc) => (Hole::One(pc), Hole::One(pc)), - Hole::None | Hole::Many(_) => { - unreachable!("must be called on single hole") - } - } - } -} - -#[derive(Clone, Debug)] -enum MaybeInst { - Compiled(Inst), - Uncompiled(InstHole), - Split, - Split1(InstPtr), - Split2(InstPtr), -} - -impl MaybeInst { - fn fill(&mut self, goto: InstPtr) { - let maybeinst = match *self { - MaybeInst::Split => MaybeInst::Split1(goto), - MaybeInst::Uncompiled(ref inst) => { - MaybeInst::Compiled(inst.fill(goto)) - } - MaybeInst::Split1(goto1) => { - MaybeInst::Compiled(Inst::Split(InstSplit { - goto1, - goto2: goto, - })) - } - MaybeInst::Split2(goto2) => { - MaybeInst::Compiled(Inst::Split(InstSplit { - goto1: goto, - goto2, - })) - } - _ => unreachable!( - "not all instructions were compiled! \ - found uncompiled instruction: {:?}", - self - ), - }; - *self = maybeinst; - } - - fn fill_split(&mut self, goto1: InstPtr, goto2: InstPtr) { - let filled = match *self { - MaybeInst::Split => Inst::Split(InstSplit { goto1, goto2 }), - _ => unreachable!( - "must be called on Split instruction, \ - instead it was called on: {:?}", - self - ), - }; - *self = MaybeInst::Compiled(filled); - } - - fn half_fill_split_goto1(&mut self, goto1: InstPtr) { - let half_filled = match *self { - MaybeInst::Split => goto1, - _ => unreachable!( - "must be called on Split instruction, \ - instead it was called on: {:?}", - self - ), - }; - *self = MaybeInst::Split1(half_filled); - } - - fn half_fill_split_goto2(&mut self, goto2: InstPtr) { - let half_filled = match *self { - MaybeInst::Split => goto2, - _ => unreachable!( - "must be called on Split instruction, \ - instead it was called on: {:?}", - self - ), - }; - *self = MaybeInst::Split2(half_filled); - } - - fn unwrap(self) -> Inst { - match self { - MaybeInst::Compiled(inst) => inst, - _ => unreachable!( - "must be called on a compiled instruction, \ - instead it was called on: {:?}", - self - ), - } - } -} - -#[derive(Clone, Debug)] -enum InstHole { - Save { slot: usize }, - EmptyLook { look: EmptyLook }, - Char { c: char }, - Ranges { ranges: Vec<(char, char)> }, - Bytes { start: u8, end: u8 }, -} - -impl InstHole { - fn fill(&self, goto: InstPtr) -> Inst { - match *self { - InstHole::Save { slot } => Inst::Save(InstSave { goto, slot }), - InstHole::EmptyLook { look } => { - Inst::EmptyLook(InstEmptyLook { goto, look }) - } - InstHole::Char { c } => Inst::Char(InstChar { goto, c }), - InstHole::Ranges { ref ranges } => Inst::Ranges(InstRanges { - goto, - ranges: ranges.clone().into_boxed_slice(), - }), - InstHole::Bytes { start, end } => { - Inst::Bytes(InstBytes { goto, start, end }) - } - } - } -} - -struct CompileClass<'a, 'b> { - c: &'a mut Compiler, - ranges: &'b [hir::ClassUnicodeRange], -} - -impl<'a, 'b> CompileClass<'a, 'b> { - fn compile(mut self) -> Result { - let mut holes = vec![]; - let mut initial_entry = None; - let mut last_split = Hole::None; - let mut utf8_seqs = self.c.utf8_seqs.take().unwrap(); - self.c.suffix_cache.clear(); - - for (i, range) in self.ranges.iter().enumerate() { - let is_last_range = i + 1 == self.ranges.len(); - utf8_seqs.reset(range.start(), range.end()); - let mut it = (&mut utf8_seqs).peekable(); - loop { - let utf8_seq = match it.next() { - None => break, - Some(utf8_seq) => utf8_seq, - }; - if is_last_range && it.peek().is_none() { - let Patch { hole, entry } = self.c_utf8_seq(&utf8_seq)?; - holes.push(hole); - self.c.fill(last_split, entry); - last_split = Hole::None; - if initial_entry.is_none() { - initial_entry = Some(entry); - } - } else { - if initial_entry.is_none() { - initial_entry = Some(self.c.insts.len()); - } - self.c.fill_to_next(last_split); - last_split = self.c.push_split_hole(); - let Patch { hole, entry } = self.c_utf8_seq(&utf8_seq)?; - holes.push(hole); - last_split = - self.c.fill_split(last_split, Some(entry), None); - } - } - } - self.c.utf8_seqs = Some(utf8_seqs); - Ok(Patch { hole: Hole::Many(holes), entry: initial_entry.unwrap() }) - } - - fn c_utf8_seq(&mut self, seq: &Utf8Sequence) -> Result { - if self.c.compiled.is_reverse { - self.c_utf8_seq_(seq) - } else { - self.c_utf8_seq_(seq.into_iter().rev()) - } - } - - fn c_utf8_seq_<'r, I>(&mut self, seq: I) -> Result - where - I: IntoIterator, - { - // The initial instruction for each UTF-8 sequence should be the same. - let mut from_inst = ::std::usize::MAX; - let mut last_hole = Hole::None; - for byte_range in seq { - let key = SuffixCacheKey { - from_inst, - start: byte_range.start, - end: byte_range.end, - }; - { - let pc = self.c.insts.len(); - if let Some(cached_pc) = self.c.suffix_cache.get(key, pc) { - from_inst = cached_pc; - continue; - } - } - self.c.byte_classes.set_range(byte_range.start, byte_range.end); - if from_inst == ::std::usize::MAX { - last_hole = self.c.push_hole(InstHole::Bytes { - start: byte_range.start, - end: byte_range.end, - }); - } else { - self.c.push_compiled(Inst::Bytes(InstBytes { - goto: from_inst, - start: byte_range.start, - end: byte_range.end, - })); - } - from_inst = self.c.insts.len().checked_sub(1).unwrap(); - debug_assert!(from_inst < ::std::usize::MAX); - } - debug_assert!(from_inst < ::std::usize::MAX); - Ok(Patch { hole: last_hole, entry: from_inst }) - } -} - -/// `SuffixCache` is a simple bounded hash map for caching suffix entries in -/// UTF-8 automata. For example, consider the Unicode range \u{0}-\u{FFFF}. -/// The set of byte ranges looks like this: -/// -/// [0-7F] -/// [C2-DF][80-BF] -/// [E0][A0-BF][80-BF] -/// [E1-EC][80-BF][80-BF] -/// [ED][80-9F][80-BF] -/// [EE-EF][80-BF][80-BF] -/// -/// Each line above translates to one alternate in the compiled regex program. -/// However, all but one of the alternates end in the same suffix, which is -/// a waste of an instruction. The suffix cache facilitates reusing them across -/// alternates. -/// -/// Note that a HashMap could be trivially used for this, but we don't need its -/// overhead. Some small bounded space (LRU style) is more than enough. -/// -/// This uses similar idea to [`SparseSet`](../sparse/struct.SparseSet.html), -/// except it uses hashes as original indices and then compares full keys for -/// validation against `dense` array. -#[derive(Debug)] -struct SuffixCache { - sparse: Box<[usize]>, - dense: Vec, -} - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -struct SuffixCacheEntry { - key: SuffixCacheKey, - pc: InstPtr, -} - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -struct SuffixCacheKey { - from_inst: InstPtr, - start: u8, - end: u8, -} - -impl SuffixCache { - fn new(size: usize) -> Self { - SuffixCache { - sparse: vec![0usize; size].into(), - dense: Vec::with_capacity(size), - } - } - - fn get(&mut self, key: SuffixCacheKey, pc: InstPtr) -> Option { - let hash = self.hash(&key); - let pos = &mut self.sparse[hash]; - if let Some(entry) = self.dense.get(*pos) { - if entry.key == key { - return Some(entry.pc); - } - } - *pos = self.dense.len(); - self.dense.push(SuffixCacheEntry { key, pc }); - None - } - - fn clear(&mut self) { - self.dense.clear(); - } - - fn hash(&self, suffix: &SuffixCacheKey) -> usize { - // Basic FNV-1a hash as described: - // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function - const FNV_PRIME: u64 = 1_099_511_628_211; - let mut h = 14_695_981_039_346_656_037; - h = (h ^ (suffix.from_inst as u64)).wrapping_mul(FNV_PRIME); - h = (h ^ (suffix.start as u64)).wrapping_mul(FNV_PRIME); - h = (h ^ (suffix.end as u64)).wrapping_mul(FNV_PRIME); - (h as usize) % self.sparse.len() - } -} - -struct ByteClassSet([bool; 256]); - -impl ByteClassSet { - fn new() -> Self { - ByteClassSet([false; 256]) - } - - fn set_range(&mut self, start: u8, end: u8) { - debug_assert!(start <= end); - if start > 0 { - self.0[start as usize - 1] = true; - } - self.0[end as usize] = true; - } - - fn set_word_boundary(&mut self) { - // We need to mark all ranges of bytes whose pairs result in - // evaluating \b differently. - let iswb = is_word_byte; - let mut b1: u16 = 0; - let mut b2: u16; - while b1 <= 255 { - b2 = b1 + 1; - while b2 <= 255 && iswb(b1 as u8) == iswb(b2 as u8) { - b2 += 1; - } - self.set_range(b1 as u8, (b2 - 1) as u8); - b1 = b2; - } - } - - fn byte_classes(&self) -> Vec { - // N.B. If you're debugging the DFA, it's useful to simply return - // `(0..256).collect()`, which effectively removes the byte classes - // and makes the transitions easier to read. - // (0usize..256).map(|x| x as u8).collect() - let mut byte_classes = vec![0; 256]; - let mut class = 0u8; - let mut i = 0; - loop { - byte_classes[i] = class as u8; - if i >= 255 { - break; - } - if self.0[i] { - class = class.checked_add(1).unwrap(); - } - i += 1; - } - byte_classes - } -} - -impl fmt::Debug for ByteClassSet { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_tuple("ByteClassSet").field(&&self.0[..]).finish() - } -} - -fn u32_to_usize(n: u32) -> usize { - // In case usize is less than 32 bits, we need to guard against overflow. - // On most platforms this compiles to nothing. - // TODO Use `std::convert::TryFrom` once it's stable. - if (n as u64) > (::std::usize::MAX as u64) { - panic!("BUG: {} is too big to be pointer sized", n) - } - n as usize -} - -#[cfg(test)] -mod tests { - use super::ByteClassSet; - - #[test] - fn byte_classes() { - let mut set = ByteClassSet::new(); - set.set_range(b'a', b'z'); - let classes = set.byte_classes(); - assert_eq!(classes[0], 0); - assert_eq!(classes[1], 0); - assert_eq!(classes[2], 0); - assert_eq!(classes[b'a' as usize - 1], 0); - assert_eq!(classes[b'a' as usize], 1); - assert_eq!(classes[b'm' as usize], 1); - assert_eq!(classes[b'z' as usize], 1); - assert_eq!(classes[b'z' as usize + 1], 2); - assert_eq!(classes[254], 2); - assert_eq!(classes[255], 2); - - let mut set = ByteClassSet::new(); - set.set_range(0, 2); - set.set_range(4, 6); - let classes = set.byte_classes(); - assert_eq!(classes[0], 0); - assert_eq!(classes[1], 0); - assert_eq!(classes[2], 0); - assert_eq!(classes[3], 1); - assert_eq!(classes[4], 2); - assert_eq!(classes[5], 2); - assert_eq!(classes[6], 2); - assert_eq!(classes[7], 3); - assert_eq!(classes[255], 3); - } - - #[test] - fn full_byte_classes() { - let mut set = ByteClassSet::new(); - for i in 0..256u16 { - set.set_range(i as u8, i as u8); - } - assert_eq!(set.byte_classes().len(), 256); - } -} diff --git a/vendor/regex/src/dfa.rs b/vendor/regex/src/dfa.rs deleted file mode 100644 index 78ed710..0000000 --- a/vendor/regex/src/dfa.rs +++ /dev/null @@ -1,1945 +0,0 @@ -/*! -The DFA matching engine. - -A DFA provides faster matching because the engine is in exactly one state at -any point in time. In the NFA, there may be multiple active states, and -considerable CPU cycles are spent shuffling them around. In finite automata -speak, the DFA follows epsilon transitions in the regex far less than the NFA. - -A DFA is a classic trade off between time and space. The NFA is slower, but -its memory requirements are typically small and predictable. The DFA is faster, -but given the right regex and the right input, the number of states in the -DFA can grow exponentially. To mitigate this space problem, we do two things: - -1. We implement an *online* DFA. That is, the DFA is constructed from the NFA - during a search. When a new state is computed, it is stored in a cache so - that it may be reused. An important consequence of this implementation - is that states that are never reached for a particular input are never - computed. (This is impossible in an "offline" DFA which needs to compute - all possible states up front.) -2. If the cache gets too big, we wipe it and continue matching. - -In pathological cases, a new state can be created for every byte of input. -(e.g., The regex `(a|b)*a(a|b){20}` on a long sequence of a's and b's.) -In this case, performance regresses to slightly slower than the full NFA -simulation, in large part because the cache becomes useless. If the cache -is wiped too frequently, the DFA quits and control falls back to one of the -NFA simulations. - -Because of the "lazy" nature of this DFA, the inner matching loop is -considerably more complex than one might expect out of a DFA. A number of -tricks are employed to make it fast. Tread carefully. - -N.B. While this implementation is heavily commented, Russ Cox's series of -articles on regexes is strongly recommended: -(As is the DFA implementation in RE2, which heavily influenced this -implementation.) -*/ - -use std::collections::HashMap; -use std::fmt; -use std::iter::repeat; -use std::mem; -use std::sync::Arc; - -use crate::exec::ProgramCache; -use crate::prog::{Inst, Program}; -use crate::sparse::SparseSet; - -/// Return true if and only if the given program can be executed by a DFA. -/// -/// Generally, a DFA is always possible. A pathological case where it is not -/// possible is if the number of NFA states exceeds `u32::MAX`, in which case, -/// this function will return false. -/// -/// This function will also return false if the given program has any Unicode -/// instructions (Char or Ranges) since the DFA operates on bytes only. -pub fn can_exec(insts: &Program) -> bool { - use crate::prog::Inst::*; - // If for some reason we manage to allocate a regex program with more - // than i32::MAX instructions, then we can't execute the DFA because we - // use 32 bit instruction pointer deltas for memory savings. - // If i32::MAX is the largest positive delta, - // then -i32::MAX == i32::MIN + 1 is the largest negative delta, - // and we are OK to use 32 bits. - if insts.dfa_size_limit == 0 || insts.len() > ::std::i32::MAX as usize { - return false; - } - for inst in insts { - match *inst { - Char(_) | Ranges(_) => return false, - EmptyLook(_) | Match(_) | Save(_) | Split(_) | Bytes(_) => {} - } - } - true -} - -/// A reusable cache of DFA states. -/// -/// This cache is reused between multiple invocations of the same regex -/// program. (It is not shared simultaneously between threads. If there is -/// contention, then new caches are created.) -#[derive(Debug)] -pub struct Cache { - /// Group persistent DFA related cache state together. The sparse sets - /// listed below are used as scratch space while computing uncached states. - inner: CacheInner, - /// qcur and qnext are ordered sets with constant time - /// addition/membership/clearing-whole-set and linear time iteration. They - /// are used to manage the sets of NFA states in DFA states when computing - /// cached DFA states. In particular, the order of the NFA states matters - /// for leftmost-first style matching. Namely, when computing a cached - /// state, the set of NFA states stops growing as soon as the first Match - /// instruction is observed. - qcur: SparseSet, - qnext: SparseSet, -} - -/// `CacheInner` is logically just a part of Cache, but groups together fields -/// that aren't passed as function parameters throughout search. (This split -/// is mostly an artifact of the borrow checker. It is happily paid.) -#[derive(Debug)] -struct CacheInner { - /// A cache of pre-compiled DFA states, keyed by the set of NFA states - /// and the set of empty-width flags set at the byte in the input when the - /// state was observed. - /// - /// A StatePtr is effectively a `*State`, but to avoid various inconvenient - /// things, we just pass indexes around manually. The performance impact of - /// this is probably an instruction or two in the inner loop. However, on - /// 64 bit, each StatePtr is half the size of a *State. - compiled: StateMap, - /// The transition table. - /// - /// The transition table is laid out in row-major order, where states are - /// rows and the transitions for each state are columns. At a high level, - /// given state `s` and byte `b`, the next state can be found at index - /// `s * 256 + b`. - /// - /// This is, of course, a lie. A StatePtr is actually a pointer to the - /// *start* of a row in this table. When indexing in the DFA's inner loop, - /// this removes the need to multiply the StatePtr by the stride. Yes, it - /// matters. This reduces the number of states we can store, but: the - /// stride is rarely 256 since we define transitions in terms of - /// *equivalence classes* of bytes. Each class corresponds to a set of - /// bytes that never discriminate a distinct path through the DFA from each - /// other. - trans: Transitions, - /// A set of cached start states, which are limited to the number of - /// permutations of flags set just before the initial byte of input. (The - /// index into this vec is a `EmptyFlags`.) - /// - /// N.B. A start state can be "dead" (i.e., no possible match), so we - /// represent it with a StatePtr. - start_states: Vec, - /// Stack scratch space used to follow epsilon transitions in the NFA. - /// (This permits us to avoid recursion.) - /// - /// The maximum stack size is the number of NFA states. - stack: Vec, - /// The total number of times this cache has been flushed by the DFA - /// because of space constraints. - flush_count: u64, - /// The total heap size of the DFA's cache. We use this to determine when - /// we should flush the cache. - size: usize, - /// Scratch space used when building instruction pointer lists for new - /// states. This helps amortize allocation. - insts_scratch_space: Vec, -} - -/// The transition table. -/// -/// It is laid out in row-major order, with states as rows and byte class -/// transitions as columns. -/// -/// The transition table is responsible for producing valid `StatePtrs`. A -/// `StatePtr` points to the start of a particular row in this table. When -/// indexing to find the next state this allows us to avoid a multiplication -/// when computing an index into the table. -#[derive(Clone)] -struct Transitions { - /// The table. - table: Vec, - /// The stride. - num_byte_classes: usize, -} - -/// Fsm encapsulates the actual execution of the DFA. -#[derive(Debug)] -pub struct Fsm<'a> { - /// prog contains the NFA instruction opcodes. DFA execution uses either - /// the `dfa` instructions or the `dfa_reverse` instructions from - /// `exec::ExecReadOnly`. (It never uses `ExecReadOnly.nfa`, which may have - /// Unicode opcodes that cannot be executed by the DFA.) - prog: &'a Program, - /// The start state. We record it here because the pointer may change - /// when the cache is wiped. - start: StatePtr, - /// The current position in the input. - at: usize, - /// Should we quit after seeing the first match? e.g., When the caller - /// uses `is_match` or `shortest_match`. - quit_after_match: bool, - /// The last state that matched. - /// - /// When no match has occurred, this is set to STATE_UNKNOWN. - /// - /// This is only useful when matching regex sets. The last match state - /// is useful because it contains all of the match instructions seen, - /// thereby allowing us to enumerate which regexes in the set matched. - last_match_si: StatePtr, - /// The input position of the last cache flush. We use this to determine - /// if we're thrashing in the cache too often. If so, the DFA quits so - /// that we can fall back to the NFA algorithm. - last_cache_flush: usize, - /// All cached DFA information that is persisted between searches. - cache: &'a mut CacheInner, -} - -/// The result of running the DFA. -/// -/// Generally, the result is either a match or not a match, but sometimes the -/// DFA runs too slowly because the cache size is too small. In that case, it -/// gives up with the intent of falling back to the NFA algorithm. -/// -/// The DFA can also give up if it runs out of room to create new states, or if -/// it sees non-ASCII bytes in the presence of a Unicode word boundary. -#[derive(Clone, Debug)] -pub enum Result { - Match(T), - NoMatch(usize), - Quit, -} - -impl Result { - /// Returns true if this result corresponds to a match. - pub fn is_match(&self) -> bool { - match *self { - Result::Match(_) => true, - Result::NoMatch(_) | Result::Quit => false, - } - } - - /// Maps the given function onto T and returns the result. - /// - /// If this isn't a match, then this is a no-op. - #[cfg(feature = "perf-literal")] - pub fn map U>(self, mut f: F) -> Result { - match self { - Result::Match(t) => Result::Match(f(t)), - Result::NoMatch(x) => Result::NoMatch(x), - Result::Quit => Result::Quit, - } - } - - /// Sets the non-match position. - /// - /// If this isn't a non-match, then this is a no-op. - fn set_non_match(self, at: usize) -> Result { - match self { - Result::NoMatch(_) => Result::NoMatch(at), - r => r, - } - } -} - -/// `State` is a DFA state. It contains an ordered set of NFA states (not -/// necessarily complete) and a smattering of flags. -/// -/// The flags are packed into the first byte of data. -/// -/// States don't carry their transitions. Instead, transitions are stored in -/// a single row-major table. -/// -/// Delta encoding is used to store the instruction pointers. -/// The first instruction pointer is stored directly starting -/// at data[1], and each following pointer is stored as an offset -/// to the previous one. If a delta is in the range -127..127, -/// it is packed into a single byte; Otherwise the byte 128 (-128 as an i8) -/// is coded as a flag, followed by 4 bytes encoding the delta. -#[derive(Clone, Eq, Hash, PartialEq)] -struct State { - data: Arc<[u8]>, -} - -/// `InstPtr` is a 32 bit pointer into a sequence of opcodes (i.e., it indexes -/// an NFA state). -/// -/// Throughout this library, this is usually set to `usize`, but we force a -/// `u32` here for the DFA to save on space. -type InstPtr = u32; - -/// Adds ip to data using delta encoding with respect to prev. -/// -/// After completion, `data` will contain `ip` and `prev` will be set to `ip`. -fn push_inst_ptr(data: &mut Vec, prev: &mut InstPtr, ip: InstPtr) { - let delta = (ip as i32) - (*prev as i32); - write_vari32(data, delta); - *prev = ip; -} - -struct InstPtrs<'a> { - base: usize, - data: &'a [u8], -} - -impl<'a> Iterator for InstPtrs<'a> { - type Item = usize; - - fn next(&mut self) -> Option { - if self.data.is_empty() { - return None; - } - let (delta, nread) = read_vari32(self.data); - let base = self.base as i32 + delta; - debug_assert!(base >= 0); - debug_assert!(nread > 0); - self.data = &self.data[nread..]; - self.base = base as usize; - Some(self.base) - } -} - -impl State { - fn flags(&self) -> StateFlags { - StateFlags(self.data[0]) - } - - fn inst_ptrs(&self) -> InstPtrs<'_> { - InstPtrs { base: 0, data: &self.data[1..] } - } -} - -/// `StatePtr` is a 32 bit pointer to the start of a row in the transition -/// table. -/// -/// It has many special values. There are two types of special values: -/// sentinels and flags. -/// -/// Sentinels corresponds to special states that carry some kind of -/// significance. There are three such states: unknown, dead and quit states. -/// -/// Unknown states are states that haven't been computed yet. They indicate -/// that a transition should be filled in that points to either an existing -/// cached state or a new state altogether. In general, an unknown state means -/// "follow the NFA's epsilon transitions." -/// -/// Dead states are states that can never lead to a match, no matter what -/// subsequent input is observed. This means that the DFA should quit -/// immediately and return the longest match it has found thus far. -/// -/// Quit states are states that imply the DFA is not capable of matching the -/// regex correctly. Currently, this is only used when a Unicode word boundary -/// exists in the regex *and* a non-ASCII byte is observed. -/// -/// The other type of state pointer is a state pointer with special flag bits. -/// There are two flags: a start flag and a match flag. The lower bits of both -/// kinds always contain a "valid" `StatePtr` (indicated by the `STATE_MAX` -/// mask). -/// -/// The start flag means that the state is a start state, and therefore may be -/// subject to special prefix scanning optimizations. -/// -/// The match flag means that the state is a match state, and therefore the -/// current position in the input (while searching) should be recorded. -/// -/// The above exists mostly in the service of making the inner loop fast. -/// In particular, the inner *inner* loop looks something like this: -/// -/// ```ignore -/// while state <= STATE_MAX and i < len(text): -/// state = state.next[i] -/// ``` -/// -/// This is nice because it lets us execute a lazy DFA as if it were an -/// entirely offline DFA (i.e., with very few instructions). The loop will -/// quit only when we need to examine a case that needs special attention. -type StatePtr = u32; - -/// An unknown state means that the state has not been computed yet, and that -/// the only way to progress is to compute it. -const STATE_UNKNOWN: StatePtr = 1 << 31; - -/// A dead state means that the state has been computed and it is known that -/// once it is entered, no future match can ever occur. -const STATE_DEAD: StatePtr = STATE_UNKNOWN + 1; - -/// A quit state means that the DFA came across some input that it doesn't -/// know how to process correctly. The DFA should quit and another matching -/// engine should be run in its place. -const STATE_QUIT: StatePtr = STATE_DEAD + 1; - -/// A start state is a state that the DFA can start in. -/// -/// Note that start states have their lower bits set to a state pointer. -const STATE_START: StatePtr = 1 << 30; - -/// A match state means that the regex has successfully matched. -/// -/// Note that match states have their lower bits set to a state pointer. -const STATE_MATCH: StatePtr = 1 << 29; - -/// The maximum state pointer. This is useful to mask out the "valid" state -/// pointer from a state with the "start" or "match" bits set. -/// -/// It doesn't make sense to use this with unknown, dead or quit state -/// pointers, since those pointers are sentinels and never have their lower -/// bits set to anything meaningful. -const STATE_MAX: StatePtr = STATE_MATCH - 1; - -/// Byte is a u8 in spirit, but a u16 in practice so that we can represent the -/// special EOF sentinel value. -#[derive(Copy, Clone, Debug)] -struct Byte(u16); - -/// A set of flags for zero-width assertions. -#[derive(Clone, Copy, Eq, Debug, Default, Hash, PartialEq)] -struct EmptyFlags { - start: bool, - end: bool, - start_line: bool, - end_line: bool, - word_boundary: bool, - not_word_boundary: bool, -} - -/// A set of flags describing various configurations of a DFA state. This is -/// represented by a `u8` so that it is compact. -#[derive(Clone, Copy, Eq, Default, Hash, PartialEq)] -struct StateFlags(u8); - -impl Cache { - /// Create new empty cache for the DFA engine. - pub fn new(prog: &Program) -> Self { - // We add 1 to account for the special EOF byte. - let num_byte_classes = (prog.byte_classes[255] as usize + 1) + 1; - let starts = vec![STATE_UNKNOWN; 256]; - let mut cache = Cache { - inner: CacheInner { - compiled: StateMap::new(num_byte_classes), - trans: Transitions::new(num_byte_classes), - start_states: starts, - stack: vec![], - flush_count: 0, - size: 0, - insts_scratch_space: vec![], - }, - qcur: SparseSet::new(prog.insts.len()), - qnext: SparseSet::new(prog.insts.len()), - }; - cache.inner.reset_size(); - cache - } -} - -impl CacheInner { - /// Resets the cache size to account for fixed costs, such as the program - /// and stack sizes. - fn reset_size(&mut self) { - self.size = (self.start_states.len() * mem::size_of::()) - + (self.stack.len() * mem::size_of::()); - } -} - -impl<'a> Fsm<'a> { - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn forward( - prog: &'a Program, - cache: &ProgramCache, - quit_after_match: bool, - text: &[u8], - at: usize, - ) -> Result { - let mut cache = cache.borrow_mut(); - let cache = &mut cache.dfa; - let mut dfa = Fsm { - prog, - start: 0, // filled in below - at, - quit_after_match, - last_match_si: STATE_UNKNOWN, - last_cache_flush: at, - cache: &mut cache.inner, - }; - let (empty_flags, state_flags) = dfa.start_flags(text, at); - dfa.start = - match dfa.start_state(&mut cache.qcur, empty_flags, state_flags) { - None => return Result::Quit, - Some(STATE_DEAD) => return Result::NoMatch(at), - Some(si) => si, - }; - debug_assert!(dfa.start != STATE_UNKNOWN); - dfa.exec_at(&mut cache.qcur, &mut cache.qnext, text) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn reverse( - prog: &'a Program, - cache: &ProgramCache, - quit_after_match: bool, - text: &[u8], - at: usize, - ) -> Result { - let mut cache = cache.borrow_mut(); - let cache = &mut cache.dfa_reverse; - let mut dfa = Fsm { - prog, - start: 0, // filled in below - at, - quit_after_match, - last_match_si: STATE_UNKNOWN, - last_cache_flush: at, - cache: &mut cache.inner, - }; - let (empty_flags, state_flags) = dfa.start_flags_reverse(text, at); - dfa.start = - match dfa.start_state(&mut cache.qcur, empty_flags, state_flags) { - None => return Result::Quit, - Some(STATE_DEAD) => return Result::NoMatch(at), - Some(si) => si, - }; - debug_assert!(dfa.start != STATE_UNKNOWN); - dfa.exec_at_reverse(&mut cache.qcur, &mut cache.qnext, text) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn forward_many( - prog: &'a Program, - cache: &ProgramCache, - matches: &mut [bool], - text: &[u8], - at: usize, - ) -> Result { - debug_assert!(matches.len() == prog.matches.len()); - let mut cache = cache.borrow_mut(); - let cache = &mut cache.dfa; - let mut dfa = Fsm { - prog, - start: 0, // filled in below - at, - quit_after_match: false, - last_match_si: STATE_UNKNOWN, - last_cache_flush: at, - cache: &mut cache.inner, - }; - let (empty_flags, state_flags) = dfa.start_flags(text, at); - dfa.start = - match dfa.start_state(&mut cache.qcur, empty_flags, state_flags) { - None => return Result::Quit, - Some(STATE_DEAD) => return Result::NoMatch(at), - Some(si) => si, - }; - debug_assert!(dfa.start != STATE_UNKNOWN); - let result = dfa.exec_at(&mut cache.qcur, &mut cache.qnext, text); - if result.is_match() { - if matches.len() == 1 { - matches[0] = true; - } else { - debug_assert!(dfa.last_match_si != STATE_UNKNOWN); - debug_assert!(dfa.last_match_si != STATE_DEAD); - for ip in dfa.state(dfa.last_match_si).inst_ptrs() { - if let Inst::Match(slot) = dfa.prog[ip] { - matches[slot] = true; - } - } - } - } - result - } - - /// Executes the DFA on a forward NFA. - /// - /// {qcur,qnext} are scratch ordered sets which may be non-empty. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn exec_at( - &mut self, - qcur: &mut SparseSet, - qnext: &mut SparseSet, - text: &[u8], - ) -> Result { - // For the most part, the DFA is basically: - // - // last_match = null - // while current_byte != EOF: - // si = current_state.next[current_byte] - // if si is match - // last_match = si - // return last_match - // - // However, we need to deal with a few things: - // - // 1. This is an *online* DFA, so the current state's next list - // may not point to anywhere yet, so we must go out and compute - // them. (They are then cached into the current state's next list - // to avoid re-computation.) - // 2. If we come across a state that is known to be dead (i.e., never - // leads to a match), then we can quit early. - // 3. If the caller just wants to know if a match occurs, then we - // can quit as soon as we know we have a match. (Full leftmost - // first semantics require continuing on.) - // 4. If we're in the start state, then we can use a pre-computed set - // of prefix literals to skip quickly along the input. - // 5. After the input is exhausted, we run the DFA on one symbol - // that stands for EOF. This is useful for handling empty width - // assertions. - // 6. We can't actually do state.next[byte]. Instead, we have to do - // state.next[byte_classes[byte]], which permits us to keep the - // 'next' list very small. - // - // Since there's a bunch of extra stuff we need to consider, we do some - // pretty hairy tricks to get the inner loop to run as fast as - // possible. - debug_assert!(!self.prog.is_reverse); - - // The last match is the currently known ending match position. It is - // reported as an index to the most recent byte that resulted in a - // transition to a match state and is always stored in capture slot `1` - // when searching forwards. Its maximum value is `text.len()`. - let mut result = Result::NoMatch(self.at); - let (mut prev_si, mut next_si) = (self.start, self.start); - let mut at = self.at; - while at < text.len() { - // This is the real inner loop. We take advantage of special bits - // set in the state pointer to determine whether a state is in the - // "common" case or not. Specifically, the common case is a - // non-match non-start non-dead state that has already been - // computed. So long as we remain in the common case, this inner - // loop will chew through the input. - // - // We also unroll the loop 4 times to amortize the cost of checking - // whether we've consumed the entire input. We are also careful - // to make sure that `prev_si` always represents the previous state - // and `next_si` always represents the next state after the loop - // exits, even if it isn't always true inside the loop. - while next_si <= STATE_MAX && at < text.len() { - // Argument for safety is in the definition of next_si. - prev_si = unsafe { self.next_si(next_si, text, at) }; - at += 1; - if prev_si > STATE_MAX || at + 2 >= text.len() { - mem::swap(&mut prev_si, &mut next_si); - break; - } - next_si = unsafe { self.next_si(prev_si, text, at) }; - at += 1; - if next_si > STATE_MAX { - break; - } - prev_si = unsafe { self.next_si(next_si, text, at) }; - at += 1; - if prev_si > STATE_MAX { - mem::swap(&mut prev_si, &mut next_si); - break; - } - next_si = unsafe { self.next_si(prev_si, text, at) }; - at += 1; - } - if next_si & STATE_MATCH > 0 { - // A match state is outside of the common case because it needs - // special case analysis. In particular, we need to record the - // last position as having matched and possibly quit the DFA if - // we don't need to keep matching. - next_si &= !STATE_MATCH; - result = Result::Match(at - 1); - if self.quit_after_match { - return result; - } - self.last_match_si = next_si; - prev_si = next_si; - - // This permits short-circuiting when matching a regex set. - // In particular, if this DFA state contains only match states, - // then it's impossible to extend the set of matches since - // match states are final. Therefore, we can quit. - if self.prog.matches.len() > 1 { - let state = self.state(next_si); - let just_matches = - state.inst_ptrs().all(|ip| self.prog[ip].is_match()); - if just_matches { - return result; - } - } - - // Another inner loop! If the DFA stays in this particular - // match state, then we can rip through all of the input - // very quickly, and only recording the match location once - // we've left this particular state. - let cur = at; - while (next_si & !STATE_MATCH) == prev_si - && at + 2 < text.len() - { - // Argument for safety is in the definition of next_si. - next_si = unsafe { - self.next_si(next_si & !STATE_MATCH, text, at) - }; - at += 1; - } - if at > cur { - result = Result::Match(at - 2); - } - } else if next_si & STATE_START > 0 { - // A start state isn't in the common case because we may - // want to do quick prefix scanning. If the program doesn't - // have a detected prefix, then start states are actually - // considered common and this case is never reached. - debug_assert!(self.has_prefix()); - next_si &= !STATE_START; - prev_si = next_si; - at = match self.prefix_at(text, at) { - None => return Result::NoMatch(text.len()), - Some(i) => i, - }; - } else if next_si >= STATE_UNKNOWN { - if next_si == STATE_QUIT { - return Result::Quit; - } - // Finally, this corresponds to the case where the transition - // entered a state that can never lead to a match or a state - // that hasn't been computed yet. The latter being the "slow" - // path. - let byte = Byte::byte(text[at - 1]); - // We no longer care about the special bits in the state - // pointer. - prev_si &= STATE_MAX; - // Record where we are. This is used to track progress for - // determining whether we should quit if we've flushed the - // cache too much. - self.at = at; - next_si = match self.next_state(qcur, qnext, prev_si, byte) { - None => return Result::Quit, - Some(STATE_DEAD) => return result.set_non_match(at), - Some(si) => si, - }; - debug_assert!(next_si != STATE_UNKNOWN); - if next_si & STATE_MATCH > 0 { - next_si &= !STATE_MATCH; - result = Result::Match(at - 1); - if self.quit_after_match { - return result; - } - self.last_match_si = next_si; - } - prev_si = next_si; - } else { - prev_si = next_si; - } - } - - // Run the DFA once more on the special EOF sentinel value. - // We don't care about the special bits in the state pointer any more, - // so get rid of them. - prev_si &= STATE_MAX; - prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) { - None => return Result::Quit, - Some(STATE_DEAD) => return result.set_non_match(text.len()), - Some(si) => si & !STATE_START, - }; - debug_assert!(prev_si != STATE_UNKNOWN); - if prev_si & STATE_MATCH > 0 { - prev_si &= !STATE_MATCH; - self.last_match_si = prev_si; - result = Result::Match(text.len()); - } - result - } - - /// Executes the DFA on a reverse NFA. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn exec_at_reverse( - &mut self, - qcur: &mut SparseSet, - qnext: &mut SparseSet, - text: &[u8], - ) -> Result { - // The comments in `exec_at` above mostly apply here too. The main - // difference is that we move backwards over the input and we look for - // the longest possible match instead of the leftmost-first match. - // - // N.B. The code duplication here is regrettable. Efforts to improve - // it without sacrificing performance are welcome. ---AG - debug_assert!(self.prog.is_reverse); - let mut result = Result::NoMatch(self.at); - let (mut prev_si, mut next_si) = (self.start, self.start); - let mut at = self.at; - while at > 0 { - while next_si <= STATE_MAX && at > 0 { - // Argument for safety is in the definition of next_si. - at -= 1; - prev_si = unsafe { self.next_si(next_si, text, at) }; - if prev_si > STATE_MAX || at <= 4 { - mem::swap(&mut prev_si, &mut next_si); - break; - } - at -= 1; - next_si = unsafe { self.next_si(prev_si, text, at) }; - if next_si > STATE_MAX { - break; - } - at -= 1; - prev_si = unsafe { self.next_si(next_si, text, at) }; - if prev_si > STATE_MAX { - mem::swap(&mut prev_si, &mut next_si); - break; - } - at -= 1; - next_si = unsafe { self.next_si(prev_si, text, at) }; - } - if next_si & STATE_MATCH > 0 { - next_si &= !STATE_MATCH; - result = Result::Match(at + 1); - if self.quit_after_match { - return result; - } - self.last_match_si = next_si; - prev_si = next_si; - let cur = at; - while (next_si & !STATE_MATCH) == prev_si && at >= 2 { - // Argument for safety is in the definition of next_si. - at -= 1; - next_si = unsafe { - self.next_si(next_si & !STATE_MATCH, text, at) - }; - } - if at < cur { - result = Result::Match(at + 2); - } - } else if next_si >= STATE_UNKNOWN { - if next_si == STATE_QUIT { - return Result::Quit; - } - let byte = Byte::byte(text[at]); - prev_si &= STATE_MAX; - self.at = at; - next_si = match self.next_state(qcur, qnext, prev_si, byte) { - None => return Result::Quit, - Some(STATE_DEAD) => return result.set_non_match(at), - Some(si) => si, - }; - debug_assert!(next_si != STATE_UNKNOWN); - if next_si & STATE_MATCH > 0 { - next_si &= !STATE_MATCH; - result = Result::Match(at + 1); - if self.quit_after_match { - return result; - } - self.last_match_si = next_si; - } - prev_si = next_si; - } else { - prev_si = next_si; - } - } - - // Run the DFA once more on the special EOF sentinel value. - prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) { - None => return Result::Quit, - Some(STATE_DEAD) => return result.set_non_match(0), - Some(si) => si, - }; - debug_assert!(prev_si != STATE_UNKNOWN); - if prev_si & STATE_MATCH > 0 { - prev_si &= !STATE_MATCH; - self.last_match_si = prev_si; - result = Result::Match(0); - } - result - } - - /// next_si transitions to the next state, where the transition input - /// corresponds to text[i]. - /// - /// This elides bounds checks, and is therefore not safe. - #[cfg_attr(feature = "perf-inline", inline(always))] - unsafe fn next_si(&self, si: StatePtr, text: &[u8], i: usize) -> StatePtr { - // What is the argument for safety here? - // We have three unchecked accesses that could possibly violate safety: - // - // 1. The given byte of input (`text[i]`). - // 2. The class of the byte of input (`classes[text[i]]`). - // 3. The transition for the class (`trans[si + cls]`). - // - // (1) is only safe when calling next_si is guarded by - // `i < text.len()`. - // - // (2) is the easiest case to guarantee since `text[i]` is always a - // `u8` and `self.prog.byte_classes` always has length `u8::MAX`. - // (See `ByteClassSet.byte_classes` in `compile.rs`.) - // - // (3) is only safe if (1)+(2) are safe. Namely, the transitions - // of every state are defined to have length equal to the number of - // byte classes in the program. Therefore, a valid class leads to a - // valid transition. (All possible transitions are valid lookups, even - // if it points to a state that hasn't been computed yet.) (3) also - // relies on `si` being correct, but StatePtrs should only ever be - // retrieved from the transition table, which ensures they are correct. - debug_assert!(i < text.len()); - let b = *text.get_unchecked(i); - debug_assert!((b as usize) < self.prog.byte_classes.len()); - let cls = *self.prog.byte_classes.get_unchecked(b as usize); - self.cache.trans.next_unchecked(si, cls as usize) - } - - /// Computes the next state given the current state and the current input - /// byte (which may be EOF). - /// - /// If STATE_DEAD is returned, then there is no valid state transition. - /// This implies that no permutation of future input can lead to a match - /// state. - /// - /// STATE_UNKNOWN can never be returned. - fn exec_byte( - &mut self, - qcur: &mut SparseSet, - qnext: &mut SparseSet, - mut si: StatePtr, - b: Byte, - ) -> Option { - use crate::prog::Inst::*; - - // Initialize a queue with the current DFA state's NFA states. - qcur.clear(); - for ip in self.state(si).inst_ptrs() { - qcur.insert(ip); - } - - // Before inspecting the current byte, we may need to also inspect - // whether the position immediately preceding the current byte - // satisfies the empty assertions found in the current state. - // - // We only need to do this step if there are any empty assertions in - // the current state. - let is_word_last = self.state(si).flags().is_word(); - let is_word = b.is_ascii_word(); - if self.state(si).flags().has_empty() { - // Compute the flags immediately preceding the current byte. - // This means we only care about the "end" or "end line" flags. - // (The "start" flags are computed immediately following the - // current byte and are handled below.) - let mut flags = EmptyFlags::default(); - if b.is_eof() { - flags.end = true; - flags.end_line = true; - } else if b.as_byte().map_or(false, |b| b == b'\n') { - flags.end_line = true; - } - if is_word_last == is_word { - flags.not_word_boundary = true; - } else { - flags.word_boundary = true; - } - // Now follow epsilon transitions from every NFA state, but make - // sure we only follow transitions that satisfy our flags. - qnext.clear(); - for &ip in &*qcur { - self.follow_epsilons(usize_to_u32(ip), qnext, flags); - } - mem::swap(qcur, qnext); - } - - // Now we set flags for immediately after the current byte. Since start - // states are processed separately, and are the only states that can - // have the StartText flag set, we therefore only need to worry about - // the StartLine flag here. - // - // We do also keep track of whether this DFA state contains a NFA state - // that is a matching state. This is precisely how we delay the DFA - // matching by one byte in order to process the special EOF sentinel - // byte. Namely, if this DFA state containing a matching NFA state, - // then it is the *next* DFA state that is marked as a match. - let mut empty_flags = EmptyFlags::default(); - let mut state_flags = StateFlags::default(); - empty_flags.start_line = b.as_byte().map_or(false, |b| b == b'\n'); - if b.is_ascii_word() { - state_flags.set_word(); - } - // Now follow all epsilon transitions again, but only after consuming - // the current byte. - qnext.clear(); - for &ip in &*qcur { - match self.prog[ip as usize] { - // These states never happen in a byte-based program. - Char(_) | Ranges(_) => unreachable!(), - // These states are handled when following epsilon transitions. - Save(_) | Split(_) | EmptyLook(_) => {} - Match(_) => { - state_flags.set_match(); - if !self.continue_past_first_match() { - break; - } else if self.prog.matches.len() > 1 - && !qnext.contains(ip as usize) - { - // If we are continuing on to find other matches, - // then keep a record of the match states we've seen. - qnext.insert(ip); - } - } - Bytes(ref inst) => { - if b.as_byte().map_or(false, |b| inst.matches(b)) { - self.follow_epsilons( - inst.goto as InstPtr, - qnext, - empty_flags, - ); - } - } - } - } - - let cache = if b.is_eof() && self.prog.matches.len() > 1 { - // If we're processing the last byte of the input and we're - // matching a regex set, then make the next state contain the - // previous states transitions. We do this so that the main - // matching loop can extract all of the match instructions. - mem::swap(qcur, qnext); - // And don't cache this state because it's totally bunk. - false - } else { - true - }; - - // We've now built up the set of NFA states that ought to comprise the - // next DFA state, so try to find it in the cache, and if it doesn't - // exist, cache it. - // - // N.B. We pass `&mut si` here because the cache may clear itself if - // it has gotten too full. When that happens, the location of the - // current state may change. - let mut next = - match self.cached_state(qnext, state_flags, Some(&mut si)) { - None => return None, - Some(next) => next, - }; - if (self.start & !STATE_START) == next { - // Start states can never be match states since all matches are - // delayed by one byte. - debug_assert!(!self.state(next).flags().is_match()); - next = self.start_ptr(next); - } - if next <= STATE_MAX && self.state(next).flags().is_match() { - next |= STATE_MATCH; - } - debug_assert!(next != STATE_UNKNOWN); - // And now store our state in the current state's next list. - if cache { - let cls = self.byte_class(b); - self.cache.trans.set_next(si, cls, next); - } - Some(next) - } - - /// Follows the epsilon transitions starting at (and including) `ip`. The - /// resulting states are inserted into the ordered set `q`. - /// - /// Conditional epsilon transitions (i.e., empty width assertions) are only - /// followed if they are satisfied by the given flags, which should - /// represent the flags set at the current location in the input. - /// - /// If the current location corresponds to the empty string, then only the - /// end line and/or end text flags may be set. If the current location - /// corresponds to a real byte in the input, then only the start line - /// and/or start text flags may be set. - /// - /// As an exception to the above, when finding the initial state, any of - /// the above flags may be set: - /// - /// If matching starts at the beginning of the input, then start text and - /// start line should be set. If the input is empty, then end text and end - /// line should also be set. - /// - /// If matching starts after the beginning of the input, then only start - /// line should be set if the preceding byte is `\n`. End line should never - /// be set in this case. (Even if the following byte is a `\n`, it will - /// be handled in a subsequent DFA state.) - fn follow_epsilons( - &mut self, - ip: InstPtr, - q: &mut SparseSet, - flags: EmptyFlags, - ) { - use crate::prog::EmptyLook::*; - use crate::prog::Inst::*; - - // We need to traverse the NFA to follow epsilon transitions, so avoid - // recursion with an explicit stack. - self.cache.stack.push(ip); - while let Some(mut ip) = self.cache.stack.pop() { - // Try to munch through as many states as possible without - // pushes/pops to the stack. - loop { - // Don't visit states we've already added. - if q.contains(ip as usize) { - break; - } - q.insert(ip as usize); - match self.prog[ip as usize] { - Char(_) | Ranges(_) => unreachable!(), - Match(_) | Bytes(_) => { - break; - } - EmptyLook(ref inst) => { - // Only follow empty assertion states if our flags - // satisfy the assertion. - match inst.look { - StartLine if flags.start_line => { - ip = inst.goto as InstPtr; - } - EndLine if flags.end_line => { - ip = inst.goto as InstPtr; - } - StartText if flags.start => { - ip = inst.goto as InstPtr; - } - EndText if flags.end => { - ip = inst.goto as InstPtr; - } - WordBoundaryAscii if flags.word_boundary => { - ip = inst.goto as InstPtr; - } - NotWordBoundaryAscii - if flags.not_word_boundary => - { - ip = inst.goto as InstPtr; - } - WordBoundary if flags.word_boundary => { - ip = inst.goto as InstPtr; - } - NotWordBoundary if flags.not_word_boundary => { - ip = inst.goto as InstPtr; - } - StartLine | EndLine | StartText | EndText - | WordBoundaryAscii | NotWordBoundaryAscii - | WordBoundary | NotWordBoundary => { - break; - } - } - } - Save(ref inst) => { - ip = inst.goto as InstPtr; - } - Split(ref inst) => { - self.cache.stack.push(inst.goto2 as InstPtr); - ip = inst.goto1 as InstPtr; - } - } - } - } - } - - /// Find a previously computed state matching the given set of instructions - /// and is_match bool. - /// - /// The given set of instructions should represent a single state in the - /// NFA along with all states reachable without consuming any input. - /// - /// The is_match bool should be true if and only if the preceding DFA state - /// contains an NFA matching state. The cached state produced here will - /// then signify a match. (This enables us to delay a match by one byte, - /// in order to account for the EOF sentinel byte.) - /// - /// If the cache is full, then it is wiped before caching a new state. - /// - /// The current state should be specified if it exists, since it will need - /// to be preserved if the cache clears itself. (Start states are - /// always saved, so they should not be passed here.) It takes a mutable - /// pointer to the index because if the cache is cleared, the state's - /// location may change. - fn cached_state( - &mut self, - q: &SparseSet, - mut state_flags: StateFlags, - current_state: Option<&mut StatePtr>, - ) -> Option { - // If we couldn't come up with a non-empty key to represent this state, - // then it is dead and can never lead to a match. - // - // Note that inst_flags represent the set of empty width assertions - // in q. We use this as an optimization in exec_byte to determine when - // we should follow epsilon transitions at the empty string preceding - // the current byte. - let key = match self.cached_state_key(q, &mut state_flags) { - None => return Some(STATE_DEAD), - Some(v) => v, - }; - // In the cache? Cool. Done. - if let Some(si) = self.cache.compiled.get_ptr(&key) { - return Some(si); - } - // If the cache has gotten too big, wipe it. - if self.approximate_size() > self.prog.dfa_size_limit - && !self.clear_cache_and_save(current_state) - { - // Ooops. DFA is giving up. - return None; - } - // Allocate room for our state and add it. - self.add_state(key) - } - - /// Produces a key suitable for describing a state in the DFA cache. - /// - /// The key invariant here is that equivalent keys are produced for any two - /// sets of ordered NFA states (and toggling of whether the previous NFA - /// states contain a match state) that do not discriminate a match for any - /// input. - /// - /// Specifically, q should be an ordered set of NFA states and is_match - /// should be true if and only if the previous NFA states contained a match - /// state. - fn cached_state_key( - &mut self, - q: &SparseSet, - state_flags: &mut StateFlags, - ) -> Option { - use crate::prog::Inst::*; - - // We need to build up enough information to recognize pre-built states - // in the DFA. Generally speaking, this includes every instruction - // except for those which are purely epsilon transitions, e.g., the - // Save and Split instructions. - // - // Empty width assertions are also epsilon transitions, but since they - // are conditional, we need to make them part of a state's key in the - // cache. - - let mut insts = - mem::replace(&mut self.cache.insts_scratch_space, vec![]); - insts.clear(); - // Reserve 1 byte for flags. - insts.push(0); - - let mut prev = 0; - for &ip in q { - let ip = usize_to_u32(ip); - match self.prog[ip as usize] { - Char(_) | Ranges(_) => unreachable!(), - Save(_) | Split(_) => {} - Bytes(_) => push_inst_ptr(&mut insts, &mut prev, ip), - EmptyLook(_) => { - state_flags.set_empty(); - push_inst_ptr(&mut insts, &mut prev, ip) - } - Match(_) => { - push_inst_ptr(&mut insts, &mut prev, ip); - if !self.continue_past_first_match() { - break; - } - } - } - } - // If we couldn't transition to any other instructions and we didn't - // see a match when expanding NFA states previously, then this is a - // dead state and no amount of additional input can transition out - // of this state. - let opt_state = if insts.len() == 1 && !state_flags.is_match() { - None - } else { - let StateFlags(f) = *state_flags; - insts[0] = f; - Some(State { data: Arc::from(&*insts) }) - }; - self.cache.insts_scratch_space = insts; - opt_state - } - - /// Clears the cache, but saves and restores current_state if it is not - /// none. - /// - /// The current state must be provided here in case its location in the - /// cache changes. - /// - /// This returns false if the cache is not cleared and the DFA should - /// give up. - fn clear_cache_and_save( - &mut self, - current_state: Option<&mut StatePtr>, - ) -> bool { - if self.cache.compiled.is_empty() { - // Nothing to clear... - return true; - } - match current_state { - None => self.clear_cache(), - Some(si) => { - let cur = self.state(*si).clone(); - if !self.clear_cache() { - return false; - } - // The unwrap is OK because we just cleared the cache and - // therefore know that the next state pointer won't exceed - // STATE_MAX. - *si = self.restore_state(cur).unwrap(); - true - } - } - } - - /// Wipes the state cache, but saves and restores the current start state. - /// - /// This returns false if the cache is not cleared and the DFA should - /// give up. - fn clear_cache(&mut self) -> bool { - // Bail out of the DFA if we're moving too "slowly." - // A heuristic from RE2: assume the DFA is too slow if it is processing - // 10 or fewer bytes per state. - // Additionally, we permit the cache to be flushed a few times before - // caling it quits. - let nstates = self.cache.compiled.len(); - if self.cache.flush_count >= 3 - && self.at >= self.last_cache_flush - && (self.at - self.last_cache_flush) <= 10 * nstates - { - return false; - } - // Update statistics tracking cache flushes. - self.last_cache_flush = self.at; - self.cache.flush_count += 1; - - // OK, actually flush the cache. - let start = self.state(self.start & !STATE_START).clone(); - let last_match = if self.last_match_si <= STATE_MAX { - Some(self.state(self.last_match_si).clone()) - } else { - None - }; - self.cache.reset_size(); - self.cache.trans.clear(); - self.cache.compiled.clear(); - for s in &mut self.cache.start_states { - *s = STATE_UNKNOWN; - } - // The unwraps are OK because we just cleared the cache and therefore - // know that the next state pointer won't exceed STATE_MAX. - let start_ptr = self.restore_state(start).unwrap(); - self.start = self.start_ptr(start_ptr); - if let Some(last_match) = last_match { - self.last_match_si = self.restore_state(last_match).unwrap(); - } - true - } - - /// Restores the given state back into the cache, and returns a pointer - /// to it. - fn restore_state(&mut self, state: State) -> Option { - // If we've already stored this state, just return a pointer to it. - // None will be the wiser. - if let Some(si) = self.cache.compiled.get_ptr(&state) { - return Some(si); - } - self.add_state(state) - } - - /// Returns the next state given the current state si and current byte - /// b. {qcur,qnext} are used as scratch space for storing ordered NFA - /// states. - /// - /// This tries to fetch the next state from the cache, but if that fails, - /// it computes the next state, caches it and returns a pointer to it. - /// - /// The pointer can be to a real state, or it can be STATE_DEAD. - /// STATE_UNKNOWN cannot be returned. - /// - /// None is returned if a new state could not be allocated (i.e., the DFA - /// ran out of space and thinks it's running too slowly). - fn next_state( - &mut self, - qcur: &mut SparseSet, - qnext: &mut SparseSet, - si: StatePtr, - b: Byte, - ) -> Option { - if si == STATE_DEAD { - return Some(STATE_DEAD); - } - match self.cache.trans.next(si, self.byte_class(b)) { - STATE_UNKNOWN => self.exec_byte(qcur, qnext, si, b), - STATE_QUIT => None, - nsi => Some(nsi), - } - } - - /// Computes and returns the start state, where searching begins at - /// position `at` in `text`. If the state has already been computed, - /// then it is pulled from the cache. If the state hasn't been cached, - /// then it is computed, cached and a pointer to it is returned. - /// - /// This may return STATE_DEAD but never STATE_UNKNOWN. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn start_state( - &mut self, - q: &mut SparseSet, - empty_flags: EmptyFlags, - state_flags: StateFlags, - ) -> Option { - // Compute an index into our cache of start states based on the set - // of empty/state flags set at the current position in the input. We - // don't use every flag since not all flags matter. For example, since - // matches are delayed by one byte, start states can never be match - // states. - let flagi = { - (((empty_flags.start as u8) << 0) - | ((empty_flags.end as u8) << 1) - | ((empty_flags.start_line as u8) << 2) - | ((empty_flags.end_line as u8) << 3) - | ((empty_flags.word_boundary as u8) << 4) - | ((empty_flags.not_word_boundary as u8) << 5) - | ((state_flags.is_word() as u8) << 6)) as usize - }; - match self.cache.start_states[flagi] { - STATE_UNKNOWN => {} - si => return Some(si), - } - q.clear(); - let start = usize_to_u32(self.prog.start); - self.follow_epsilons(start, q, empty_flags); - // Start states can never be match states because we delay every match - // by one byte. Given an empty string and an empty match, the match - // won't actually occur until the DFA processes the special EOF - // sentinel byte. - let sp = match self.cached_state(q, state_flags, None) { - None => return None, - Some(sp) => self.start_ptr(sp), - }; - self.cache.start_states[flagi] = sp; - Some(sp) - } - - /// Computes the set of starting flags for the given position in text. - /// - /// This should only be used when executing the DFA forwards over the - /// input. - fn start_flags(&self, text: &[u8], at: usize) -> (EmptyFlags, StateFlags) { - let mut empty_flags = EmptyFlags::default(); - let mut state_flags = StateFlags::default(); - empty_flags.start = at == 0; - empty_flags.end = text.is_empty(); - empty_flags.start_line = at == 0 || text[at - 1] == b'\n'; - empty_flags.end_line = text.is_empty(); - - let is_word_last = at > 0 && Byte::byte(text[at - 1]).is_ascii_word(); - let is_word = at < text.len() && Byte::byte(text[at]).is_ascii_word(); - if is_word_last { - state_flags.set_word(); - } - if is_word == is_word_last { - empty_flags.not_word_boundary = true; - } else { - empty_flags.word_boundary = true; - } - (empty_flags, state_flags) - } - - /// Computes the set of starting flags for the given position in text. - /// - /// This should only be used when executing the DFA in reverse over the - /// input. - fn start_flags_reverse( - &self, - text: &[u8], - at: usize, - ) -> (EmptyFlags, StateFlags) { - let mut empty_flags = EmptyFlags::default(); - let mut state_flags = StateFlags::default(); - empty_flags.start = at == text.len(); - empty_flags.end = text.is_empty(); - empty_flags.start_line = at == text.len() || text[at] == b'\n'; - empty_flags.end_line = text.is_empty(); - - let is_word_last = - at < text.len() && Byte::byte(text[at]).is_ascii_word(); - let is_word = at > 0 && Byte::byte(text[at - 1]).is_ascii_word(); - if is_word_last { - state_flags.set_word(); - } - if is_word == is_word_last { - empty_flags.not_word_boundary = true; - } else { - empty_flags.word_boundary = true; - } - (empty_flags, state_flags) - } - - /// Returns a reference to a State given a pointer to it. - fn state(&self, si: StatePtr) -> &State { - self.cache.compiled.get_state(si).unwrap() - } - - /// Adds the given state to the DFA. - /// - /// This allocates room for transitions out of this state in - /// self.cache.trans. The transitions can be set with the returned - /// StatePtr. - /// - /// If None is returned, then the state limit was reached and the DFA - /// should quit. - fn add_state(&mut self, state: State) -> Option { - // This will fail if the next state pointer exceeds STATE_PTR. In - // practice, the cache limit will prevent us from ever getting here, - // but maybe callers will set the cache size to something ridiculous... - let si = match self.cache.trans.add() { - None => return None, - Some(si) => si, - }; - // If the program has a Unicode word boundary, then set any transitions - // for non-ASCII bytes to STATE_QUIT. If the DFA stumbles over such a - // transition, then it will quit and an alternative matching engine - // will take over. - if self.prog.has_unicode_word_boundary { - for b in 128..256 { - let cls = self.byte_class(Byte::byte(b as u8)); - self.cache.trans.set_next(si, cls, STATE_QUIT); - } - } - // Finally, put our actual state on to our heap of states and index it - // so we can find it later. - self.cache.size += self.cache.trans.state_heap_size() - + state.data.len() - + (2 * mem::size_of::()) - + mem::size_of::(); - self.cache.compiled.insert(state, si); - // Transition table and set of states and map should all be in sync. - debug_assert!( - self.cache.compiled.len() == self.cache.trans.num_states() - ); - Some(si) - } - - /// Quickly finds the next occurrence of any literal prefixes in the regex. - /// If there are no literal prefixes, then the current position is - /// returned. If there are literal prefixes and one could not be found, - /// then None is returned. - /// - /// This should only be called when the DFA is in a start state. - fn prefix_at(&self, text: &[u8], at: usize) -> Option { - self.prog.prefixes.find(&text[at..]).map(|(s, _)| at + s) - } - - /// Returns the number of byte classes required to discriminate transitions - /// in each state. - /// - /// invariant: num_byte_classes() == len(State.next) - fn num_byte_classes(&self) -> usize { - // We add 1 to account for the special EOF byte. - (self.prog.byte_classes[255] as usize + 1) + 1 - } - - /// Given an input byte or the special EOF sentinel, return its - /// corresponding byte class. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn byte_class(&self, b: Byte) -> usize { - match b.as_byte() { - None => self.num_byte_classes() - 1, - Some(b) => self.u8_class(b), - } - } - - /// Like byte_class, but explicitly for u8s. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn u8_class(&self, b: u8) -> usize { - self.prog.byte_classes[b as usize] as usize - } - - /// Returns true if the DFA should continue searching past the first match. - /// - /// Leftmost first semantics in the DFA are preserved by not following NFA - /// transitions after the first match is seen. - /// - /// On occasion, we want to avoid leftmost first semantics to find either - /// the longest match (for reverse search) or all possible matches (for - /// regex sets). - fn continue_past_first_match(&self) -> bool { - self.prog.is_reverse || self.prog.matches.len() > 1 - } - - /// Returns true if there is a prefix we can quickly search for. - fn has_prefix(&self) -> bool { - !self.prog.is_reverse - && !self.prog.prefixes.is_empty() - && !self.prog.is_anchored_start - } - - /// Sets the STATE_START bit in the given state pointer if and only if - /// we have a prefix to scan for. - /// - /// If there's no prefix, then it's a waste to treat the start state - /// specially. - fn start_ptr(&self, si: StatePtr) -> StatePtr { - if self.has_prefix() { - si | STATE_START - } else { - si - } - } - - /// Approximate size returns the approximate heap space currently used by - /// the DFA. It is used to determine whether the DFA's state cache needs to - /// be wiped. Namely, it is possible that for certain regexes on certain - /// inputs, a new state could be created for every byte of input. (This is - /// bad for memory use, so we bound it with a cache.) - fn approximate_size(&self) -> usize { - self.cache.size - } -} - -/// An abstraction for representing a map of states. The map supports two -/// different ways of state lookup. One is fast constant time access via a -/// state pointer. The other is a hashmap lookup based on the DFA's -/// constituent NFA states. -/// -/// A DFA state internally uses an Arc such that we only need to store the -/// set of NFA states on the heap once, even though we support looking up -/// states by two different means. A more natural way to express this might -/// use raw pointers, but an Arc is safe and effectively achieves the same -/// thing. -#[derive(Debug)] -struct StateMap { - /// The keys are not actually static but rely on always pointing to a - /// buffer in `states` which will never be moved except when clearing - /// the map or on drop, in which case the keys of this map will be - /// removed before - map: HashMap, - /// Our set of states. Note that `StatePtr / num_byte_classes` indexes - /// this Vec rather than just a `StatePtr`. - states: Vec, - /// The number of byte classes in the DFA. Used to index `states`. - num_byte_classes: usize, -} - -impl StateMap { - fn new(num_byte_classes: usize) -> StateMap { - StateMap { map: HashMap::new(), states: vec![], num_byte_classes } - } - - fn len(&self) -> usize { - self.states.len() - } - - fn is_empty(&self) -> bool { - self.states.is_empty() - } - - fn get_ptr(&self, state: &State) -> Option { - self.map.get(state).cloned() - } - - fn get_state(&self, si: StatePtr) -> Option<&State> { - self.states.get(si as usize / self.num_byte_classes) - } - - fn insert(&mut self, state: State, si: StatePtr) { - self.map.insert(state.clone(), si); - self.states.push(state); - } - - fn clear(&mut self) { - self.map.clear(); - self.states.clear(); - } -} - -impl Transitions { - /// Create a new transition table. - /// - /// The number of byte classes corresponds to the stride. Every state will - /// have `num_byte_classes` slots for transitions. - fn new(num_byte_classes: usize) -> Transitions { - Transitions { table: vec![], num_byte_classes } - } - - /// Returns the total number of states currently in this table. - fn num_states(&self) -> usize { - self.table.len() / self.num_byte_classes - } - - /// Allocates room for one additional state and returns a pointer to it. - /// - /// If there's no more room, None is returned. - fn add(&mut self) -> Option { - let si = self.table.len(); - if si > STATE_MAX as usize { - return None; - } - self.table.extend(repeat(STATE_UNKNOWN).take(self.num_byte_classes)); - Some(usize_to_u32(si)) - } - - /// Clears the table of all states. - fn clear(&mut self) { - self.table.clear(); - } - - /// Sets the transition from (si, cls) to next. - fn set_next(&mut self, si: StatePtr, cls: usize, next: StatePtr) { - self.table[si as usize + cls] = next; - } - - /// Returns the transition corresponding to (si, cls). - fn next(&self, si: StatePtr, cls: usize) -> StatePtr { - self.table[si as usize + cls] - } - - /// The heap size, in bytes, of a single state in the transition table. - fn state_heap_size(&self) -> usize { - self.num_byte_classes * mem::size_of::() - } - - /// Like `next`, but uses unchecked access and is therefore not safe. - unsafe fn next_unchecked(&self, si: StatePtr, cls: usize) -> StatePtr { - debug_assert!((si as usize) < self.table.len()); - debug_assert!(cls < self.num_byte_classes); - *self.table.get_unchecked(si as usize + cls) - } -} - -impl StateFlags { - fn is_match(&self) -> bool { - self.0 & 0b0000_0001 > 0 - } - - fn set_match(&mut self) { - self.0 |= 0b0000_0001; - } - - fn is_word(&self) -> bool { - self.0 & 0b0000_0010 > 0 - } - - fn set_word(&mut self) { - self.0 |= 0b0000_0010; - } - - fn has_empty(&self) -> bool { - self.0 & 0b0000_0100 > 0 - } - - fn set_empty(&mut self) { - self.0 |= 0b0000_0100; - } -} - -impl Byte { - fn byte(b: u8) -> Self { - Byte(b as u16) - } - fn eof() -> Self { - Byte(256) - } - fn is_eof(&self) -> bool { - self.0 == 256 - } - - fn is_ascii_word(&self) -> bool { - let b = match self.as_byte() { - None => return false, - Some(b) => b, - }; - match b { - b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'_' => true, - _ => false, - } - } - - fn as_byte(&self) -> Option { - if self.is_eof() { - None - } else { - Some(self.0 as u8) - } - } -} - -impl fmt::Debug for State { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let ips: Vec = self.inst_ptrs().collect(); - f.debug_struct("State") - .field("flags", &self.flags()) - .field("insts", &ips) - .finish() - } -} - -impl fmt::Debug for Transitions { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut fmtd = f.debug_map(); - for si in 0..self.num_states() { - let s = si * self.num_byte_classes; - let e = s + self.num_byte_classes; - fmtd.entry(&si.to_string(), &TransitionsRow(&self.table[s..e])); - } - fmtd.finish() - } -} - -struct TransitionsRow<'a>(&'a [StatePtr]); - -impl<'a> fmt::Debug for TransitionsRow<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut fmtd = f.debug_map(); - for (b, si) in self.0.iter().enumerate() { - match *si { - STATE_UNKNOWN => {} - STATE_DEAD => { - fmtd.entry(&vb(b as usize), &"DEAD"); - } - si => { - fmtd.entry(&vb(b as usize), &si.to_string()); - } - } - } - fmtd.finish() - } -} - -impl fmt::Debug for StateFlags { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("StateFlags") - .field("is_match", &self.is_match()) - .field("is_word", &self.is_word()) - .field("has_empty", &self.has_empty()) - .finish() - } -} - -/// Helper function for formatting a byte as a nice-to-read escaped string. -fn vb(b: usize) -> String { - use std::ascii::escape_default; - - if b > ::std::u8::MAX as usize { - "EOF".to_owned() - } else { - let escaped = escape_default(b as u8).collect::>(); - String::from_utf8_lossy(&escaped).into_owned() - } -} - -fn usize_to_u32(n: usize) -> u32 { - if (n as u64) > (::std::u32::MAX as u64) { - panic!("BUG: {} is too big to fit into u32", n) - } - n as u32 -} - -#[allow(dead_code)] // useful for debugging -fn show_state_ptr(si: StatePtr) -> String { - let mut s = format!("{:?}", si & STATE_MAX); - if si == STATE_UNKNOWN { - s = format!("{} (unknown)", s); - } - if si == STATE_DEAD { - s = format!("{} (dead)", s); - } - if si == STATE_QUIT { - s = format!("{} (quit)", s); - } - if si & STATE_START > 0 { - s = format!("{} (start)", s); - } - if si & STATE_MATCH > 0 { - s = format!("{} (match)", s); - } - s -} - -/// https://developers.google.com/protocol-buffers/docs/encoding#varints -fn write_vari32(data: &mut Vec, n: i32) { - let mut un = (n as u32) << 1; - if n < 0 { - un = !un; - } - write_varu32(data, un) -} - -/// https://developers.google.com/protocol-buffers/docs/encoding#varints -fn read_vari32(data: &[u8]) -> (i32, usize) { - let (un, i) = read_varu32(data); - let mut n = (un >> 1) as i32; - if un & 1 != 0 { - n = !n; - } - (n, i) -} - -/// https://developers.google.com/protocol-buffers/docs/encoding#varints -fn write_varu32(data: &mut Vec, mut n: u32) { - while n >= 0b1000_0000 { - data.push((n as u8) | 0b1000_0000); - n >>= 7; - } - data.push(n as u8); -} - -/// https://developers.google.com/protocol-buffers/docs/encoding#varints -fn read_varu32(data: &[u8]) -> (u32, usize) { - let mut n: u32 = 0; - let mut shift: u32 = 0; - for (i, &b) in data.iter().enumerate() { - if b < 0b1000_0000 { - return (n | ((b as u32) << shift), i + 1); - } - n |= ((b as u32) & 0b0111_1111) << shift; - shift += 7; - } - (0, 0) -} - -#[cfg(test)] -mod tests { - - use super::{ - push_inst_ptr, read_vari32, read_varu32, write_vari32, write_varu32, - State, StateFlags, - }; - use quickcheck::{quickcheck, Gen, QuickCheck}; - use std::sync::Arc; - - #[test] - fn prop_state_encode_decode() { - fn p(mut ips: Vec, flags: u8) -> bool { - // It looks like our encoding scheme can't handle instruction - // pointers at or above 2**31. We should fix that, but it seems - // unlikely to occur in real code due to the amount of memory - // required for such a state machine. So for now, we just clamp - // our test data. - for ip in &mut ips { - if *ip >= 1 << 31 { - *ip = (1 << 31) - 1; - } - } - let mut data = vec![flags]; - let mut prev = 0; - for &ip in ips.iter() { - push_inst_ptr(&mut data, &mut prev, ip); - } - let state = State { data: Arc::from(&data[..]) }; - - let expected: Vec = - ips.into_iter().map(|ip| ip as usize).collect(); - let got: Vec = state.inst_ptrs().collect(); - expected == got && state.flags() == StateFlags(flags) - } - QuickCheck::new() - .gen(Gen::new(10_000)) - .quickcheck(p as fn(Vec, u8) -> bool); - } - - #[test] - fn prop_read_write_u32() { - fn p(n: u32) -> bool { - let mut buf = vec![]; - write_varu32(&mut buf, n); - let (got, nread) = read_varu32(&buf); - nread == buf.len() && got == n - } - quickcheck(p as fn(u32) -> bool); - } - - #[test] - fn prop_read_write_i32() { - fn p(n: i32) -> bool { - let mut buf = vec![]; - write_vari32(&mut buf, n); - let (got, nread) = read_vari32(&buf); - nread == buf.len() && got == n - } - quickcheck(p as fn(i32) -> bool); - } -} diff --git a/vendor/regex/src/error.rs b/vendor/regex/src/error.rs index 6c341f6..6026b38 100644 --- a/vendor/regex/src/error.rs +++ b/vendor/regex/src/error.rs @@ -1,7 +1,9 @@ -use std::fmt; -use std::iter::repeat; +use alloc::string::{String, ToString}; + +use regex_automata::meta; /// An error that occurred during parsing or compiling a regular expression. +#[non_exhaustive] #[derive(Clone, PartialEq)] pub enum Error { /// A syntax error. @@ -27,29 +29,44 @@ pub enum Error { /// approaches may be appropriate. Instead, you'll have to determine just /// how big of a regex you want to allow. CompiledTooBig(usize), - /// Hints that destructuring should not be exhaustive. - /// - /// This enum may grow additional variants, so this makes sure clients - /// don't count on exhaustive matching. (Otherwise, adding a new variant - /// could break existing code.) - #[doc(hidden)] - __Nonexhaustive, } -impl ::std::error::Error for Error { +impl Error { + pub(crate) fn from_meta_build_error(err: meta::BuildError) -> Error { + if let Some(size_limit) = err.size_limit() { + Error::CompiledTooBig(size_limit) + } else if let Some(ref err) = err.syntax_error() { + Error::Syntax(err.to_string()) + } else { + // This is a little suspect. Technically there are more ways for + // a meta regex to fail to build other than "exceeded size limit" + // and "syntax error." For example, if there are too many states + // or even too many patterns. But in practice this is probably + // good enough. The worst thing that happens is that Error::Syntax + // represents an error that isn't technically a syntax error, but + // the actual message will still be shown. So... it's not too bad. + // + // We really should have made the Error type in the regex crate + // completely opaque. Rookie mistake. + Error::Syntax(err.to_string()) + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for Error { // TODO: Remove this method entirely on the next breaking semver release. #[allow(deprecated)] fn description(&self) -> &str { match *self { Error::Syntax(ref err) => err, Error::CompiledTooBig(_) => "compiled program too big", - Error::__Nonexhaustive => unreachable!(), } } } -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match *self { Error::Syntax(ref err) => err.fmt(f), Error::CompiledTooBig(limit) => write!( @@ -57,7 +74,6 @@ impl fmt::Display for Error { "Compiled regex exceeds size limit of {} bytes.", limit ), - Error::__Nonexhaustive => unreachable!(), } } } @@ -66,11 +82,11 @@ impl fmt::Display for Error { // errors when people use `Regex::new(...).unwrap()`. It's a little weird, // but the `Syntax` variant is already storing a `String` anyway, so we might // as well format it nicely. -impl fmt::Debug for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl core::fmt::Debug for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match *self { Error::Syntax(ref err) => { - let hr: String = repeat('~').take(79).collect(); + let hr: String = core::iter::repeat('~').take(79).collect(); writeln!(f, "Syntax(")?; writeln!(f, "{}", hr)?; writeln!(f, "{}", err)?; @@ -81,9 +97,6 @@ impl fmt::Debug for Error { Error::CompiledTooBig(limit) => { f.debug_tuple("CompiledTooBig").field(&limit).finish() } - Error::__Nonexhaustive => { - f.debug_tuple("__Nonexhaustive").finish() - } } } } diff --git a/vendor/regex/src/exec.rs b/vendor/regex/src/exec.rs deleted file mode 100644 index c449b38..0000000 --- a/vendor/regex/src/exec.rs +++ /dev/null @@ -1,1748 +0,0 @@ -use std::cell::RefCell; -use std::collections::HashMap; -use std::panic::AssertUnwindSafe; -use std::sync::Arc; - -#[cfg(feature = "perf-literal")] -use aho_corasick::{AhoCorasick, MatchKind}; -use regex_syntax::hir::literal; -use regex_syntax::hir::{Hir, Look}; -use regex_syntax::ParserBuilder; - -use crate::backtrack; -use crate::compile::Compiler; -#[cfg(feature = "perf-dfa")] -use crate::dfa; -use crate::error::Error; -use crate::input::{ByteInput, CharInput}; -use crate::literal::LiteralSearcher; -use crate::pikevm; -use crate::pool::{Pool, PoolGuard}; -use crate::prog::Program; -use crate::re_builder::RegexOptions; -use crate::re_bytes; -use crate::re_set; -use crate::re_trait::{Locations, RegularExpression, Slot}; -use crate::re_unicode; -use crate::utf8::next_utf8; - -/// `Exec` manages the execution of a regular expression. -/// -/// In particular, this manages the various compiled forms of a single regular -/// expression and the choice of which matching engine to use to execute a -/// regular expression. -#[derive(Debug)] -pub struct Exec { - /// All read only state. - ro: Arc, - /// A pool of reusable values for the various matching engines. - /// - /// Note that boxing this value is not strictly necessary, but it is an - /// easy way to ensure that T does not bloat the stack sized used by a pool - /// in the case where T is big. And this turns out to be the case at the - /// time of writing for regex's use of this pool. At the time of writing, - /// the size of a Regex on the stack is 856 bytes. Boxing this value - /// reduces that size to 16 bytes. - pool: Box>, -} - -/// `ExecNoSync` is like `Exec`, except it embeds a reference to a cache. This -/// means it is no longer Sync, but we can now avoid the overhead of -/// synchronization to fetch the cache. -#[derive(Debug)] -pub struct ExecNoSync<'c> { - /// All read only state. - ro: &'c Arc, - /// Caches for the various matching engines. - cache: PoolGuard<'c, ProgramCache>, -} - -/// `ExecNoSyncStr` is like `ExecNoSync`, but matches on &str instead of &[u8]. -#[derive(Debug)] -pub struct ExecNoSyncStr<'c>(ExecNoSync<'c>); - -/// `ExecReadOnly` comprises all read only state for a regex. Namely, all such -/// state is determined at compile time and never changes during search. -#[derive(Debug)] -struct ExecReadOnly { - /// The original regular expressions given by the caller to compile. - res: Vec, - /// A compiled program that is used in the NFA simulation and backtracking. - /// It can be byte-based or Unicode codepoint based. - /// - /// N.B. It is not possibly to make this byte-based from the public API. - /// It is only used for testing byte based programs in the NFA simulations. - nfa: Program, - /// A compiled byte based program for DFA execution. This is only used - /// if a DFA can be executed. (Currently, only word boundary assertions are - /// not supported.) Note that this program contains an embedded `.*?` - /// preceding the first capture group, unless the regex is anchored at the - /// beginning. - #[allow(dead_code)] - dfa: Program, - /// The same as above, except the program is reversed (and there is no - /// preceding `.*?`). This is used by the DFA to find the starting location - /// of matches. - #[allow(dead_code)] - dfa_reverse: Program, - /// A set of suffix literals extracted from the regex. - /// - /// Prefix literals are stored on the `Program`, since they are used inside - /// the matching engines. - #[allow(dead_code)] - suffixes: LiteralSearcher, - /// An Aho-Corasick automaton with leftmost-first match semantics. - /// - /// This is only set when the entire regex is a simple unanchored - /// alternation of literals. We could probably use it more circumstances, - /// but this is already hacky enough in this architecture. - /// - /// N.B. We use u32 as a state ID representation under the assumption that - /// if we were to exhaust the ID space, we probably would have long - /// surpassed the compilation size limit. - #[cfg(feature = "perf-literal")] - ac: Option, - /// match_type encodes as much upfront knowledge about how we're going to - /// execute a search as possible. - match_type: MatchType, -} - -/// Facilitates the construction of an executor by exposing various knobs -/// to control how a regex is executed and what kinds of resources it's -/// permitted to use. -// `ExecBuilder` is only public via the `internal` module, so avoid deriving -// `Debug`. -#[allow(missing_debug_implementations)] -pub struct ExecBuilder { - options: RegexOptions, - match_type: Option, - bytes: bool, - only_utf8: bool, -} - -/// Parsed represents a set of parsed regular expressions and their detected -/// literals. -struct Parsed { - exprs: Vec, - prefixes: literal::Seq, - suffixes: literal::Seq, - bytes: bool, -} - -impl ExecBuilder { - /// Create a regex execution builder. - /// - /// This uses default settings for everything except the regex itself, - /// which must be provided. Further knobs can be set by calling methods, - /// and then finally, `build` to actually create the executor. - pub fn new(re: &str) -> Self { - Self::new_many(&[re]) - } - - /// Like new, but compiles the union of the given regular expressions. - /// - /// Note that when compiling 2 or more regular expressions, capture groups - /// are completely unsupported. (This means both `find` and `captures` - /// won't work.) - pub fn new_many(res: I) -> Self - where - S: AsRef, - I: IntoIterator, - { - let mut opts = RegexOptions::default(); - opts.pats = res.into_iter().map(|s| s.as_ref().to_owned()).collect(); - Self::new_options(opts) - } - - /// Create a regex execution builder. - pub fn new_options(opts: RegexOptions) -> Self { - ExecBuilder { - options: opts, - match_type: None, - bytes: false, - only_utf8: true, - } - } - - /// Set the matching engine to be automatically determined. - /// - /// This is the default state and will apply whatever optimizations are - /// possible, such as running a DFA. - /// - /// This overrides whatever was previously set via the `nfa` or - /// `bounded_backtracking` methods. - pub fn automatic(mut self) -> Self { - self.match_type = None; - self - } - - /// Sets the matching engine to use the NFA algorithm no matter what - /// optimizations are possible. - /// - /// This overrides whatever was previously set via the `automatic` or - /// `bounded_backtracking` methods. - pub fn nfa(mut self) -> Self { - self.match_type = Some(MatchType::Nfa(MatchNfaType::PikeVM)); - self - } - - /// Sets the matching engine to use a bounded backtracking engine no - /// matter what optimizations are possible. - /// - /// One must use this with care, since the bounded backtracking engine - /// uses memory proportion to `len(regex) * len(text)`. - /// - /// This overrides whatever was previously set via the `automatic` or - /// `nfa` methods. - pub fn bounded_backtracking(mut self) -> Self { - self.match_type = Some(MatchType::Nfa(MatchNfaType::Backtrack)); - self - } - - /// Compiles byte based programs for use with the NFA matching engines. - /// - /// By default, the NFA engines match on Unicode scalar values. They can - /// be made to use byte based programs instead. In general, the byte based - /// programs are slower because of a less efficient encoding of character - /// classes. - /// - /// Note that this does not impact DFA matching engines, which always - /// execute on bytes. - pub fn bytes(mut self, yes: bool) -> Self { - self.bytes = yes; - self - } - - /// When disabled, the program compiled may match arbitrary bytes. - /// - /// When enabled (the default), all compiled programs exclusively match - /// valid UTF-8 bytes. - pub fn only_utf8(mut self, yes: bool) -> Self { - self.only_utf8 = yes; - self - } - - /// Set the Unicode flag. - pub fn unicode(mut self, yes: bool) -> Self { - self.options.unicode = yes; - self - } - - /// Parse the current set of patterns into their AST and extract literals. - fn parse(&self) -> Result { - let mut exprs = Vec::with_capacity(self.options.pats.len()); - let mut prefixes = Some(literal::Seq::empty()); - let mut suffixes = Some(literal::Seq::empty()); - let mut bytes = false; - let is_set = self.options.pats.len() > 1; - // If we're compiling a regex set and that set has any anchored - // expressions, then disable all literal optimizations. - for pat in &self.options.pats { - let mut parser = ParserBuilder::new() - .octal(self.options.octal) - .case_insensitive(self.options.case_insensitive) - .multi_line(self.options.multi_line) - .dot_matches_new_line(self.options.dot_matches_new_line) - .swap_greed(self.options.swap_greed) - .ignore_whitespace(self.options.ignore_whitespace) - .unicode(self.options.unicode) - .utf8(self.only_utf8) - .nest_limit(self.options.nest_limit) - .build(); - let expr = - parser.parse(pat).map_err(|e| Error::Syntax(e.to_string()))?; - let props = expr.properties(); - // This used to just check whether the HIR matched valid UTF-8 - // or not, but in regex-syntax 0.7, we changed our definition of - // "matches valid UTF-8" to exclude zero-width matches. And in - // particular, previously, we considered WordAsciiNegate (that - // is '(?-u:\B)') to be capable of matching invalid UTF-8. Our - // matcher engines were built under this assumption and fixing - // them is not worth it with the imminent plan to switch over to - // regex-automata. So for now, we retain the previous behavior by - // just explicitly treating the presence of a negated ASCII word - // boundary as forcing use to use a byte oriented automaton. - bytes = bytes - || !props.is_utf8() - || props.look_set().contains(Look::WordAsciiNegate); - - if cfg!(feature = "perf-literal") { - if !props.look_set_prefix().contains(Look::Start) - && props.look_set().contains(Look::Start) - { - // Partial anchors unfortunately make it hard to use - // prefixes, so disable them. - prefixes = None; - } else if is_set - && props.look_set_prefix_any().contains(Look::Start) - { - // Regex sets with anchors do not go well with literal - // optimizations. - prefixes = None; - } else if props.look_set_prefix_any().contains_word() { - // The new literal extractor ignores look-around while - // the old one refused to extract prefixes from regexes - // that began with a \b. These old creaky regex internals - // can't deal with it, so we drop it. - prefixes = None; - } else if props.look_set_prefix_any().contains(Look::StartLF) { - // Similar to the reasoning for word boundaries, this old - // regex engine can't handle literal prefixes with '(?m:^)' - // at the beginning of a regex. - prefixes = None; - } - - if !props.look_set_suffix().contains(Look::End) - && props.look_set().contains(Look::End) - { - // Partial anchors unfortunately make it hard to use - // suffixes, so disable them. - suffixes = None; - } else if is_set - && props.look_set_suffix_any().contains(Look::End) - { - // Regex sets with anchors do not go well with literal - // optimizations. - suffixes = None; - } else if props.look_set_suffix_any().contains_word() { - // See the prefix case for reasoning here. - suffixes = None; - } else if props.look_set_suffix_any().contains(Look::EndLF) { - // See the prefix case for reasoning here. - suffixes = None; - } - - let (mut pres, mut suffs) = - if prefixes.is_none() && suffixes.is_none() { - (literal::Seq::infinite(), literal::Seq::infinite()) - } else { - literal_analysis(&expr) - }; - // These old creaky regex internals can't handle cases where - // the literal sequences are exact but there are look-around - // assertions. So we make sure the sequences are inexact if - // there are look-around assertions anywhere. This forces the - // regex engines to run instead of assuming that a literal - // match implies an overall match. - if !props.look_set().is_empty() { - pres.make_inexact(); - suffs.make_inexact(); - } - prefixes = prefixes.and_then(|mut prefixes| { - prefixes.union(&mut pres); - Some(prefixes) - }); - suffixes = suffixes.and_then(|mut suffixes| { - suffixes.union(&mut suffs); - Some(suffixes) - }); - } - exprs.push(expr); - } - Ok(Parsed { - exprs, - prefixes: prefixes.unwrap_or_else(literal::Seq::empty), - suffixes: suffixes.unwrap_or_else(literal::Seq::empty), - bytes, - }) - } - - /// Build an executor that can run a regular expression. - pub fn build(self) -> Result { - // Special case when we have no patterns to compile. - // This can happen when compiling a regex set. - if self.options.pats.is_empty() { - let ro = Arc::new(ExecReadOnly { - res: vec![], - nfa: Program::new(), - dfa: Program::new(), - dfa_reverse: Program::new(), - suffixes: LiteralSearcher::empty(), - #[cfg(feature = "perf-literal")] - ac: None, - match_type: MatchType::Nothing, - }); - let pool = ExecReadOnly::new_pool(&ro); - return Ok(Exec { ro, pool }); - } - let parsed = self.parse()?; - let mut nfa = Compiler::new() - .size_limit(self.options.size_limit) - .bytes(self.bytes || parsed.bytes) - .only_utf8(self.only_utf8) - .compile(&parsed.exprs)?; - let mut dfa = Compiler::new() - .size_limit(self.options.size_limit) - .dfa(true) - .only_utf8(self.only_utf8) - .compile(&parsed.exprs)?; - let mut dfa_reverse = Compiler::new() - .size_limit(self.options.size_limit) - .dfa(true) - .only_utf8(self.only_utf8) - .reverse(true) - .compile(&parsed.exprs)?; - - #[cfg(feature = "perf-literal")] - let ac = self.build_aho_corasick(&parsed); - nfa.prefixes = LiteralSearcher::prefixes(parsed.prefixes); - dfa.prefixes = nfa.prefixes.clone(); - dfa.dfa_size_limit = self.options.dfa_size_limit; - dfa_reverse.dfa_size_limit = self.options.dfa_size_limit; - - let mut ro = ExecReadOnly { - res: self.options.pats, - nfa, - dfa, - dfa_reverse, - suffixes: LiteralSearcher::suffixes(parsed.suffixes), - #[cfg(feature = "perf-literal")] - ac, - match_type: MatchType::Nothing, - }; - ro.match_type = ro.choose_match_type(self.match_type); - - let ro = Arc::new(ro); - let pool = ExecReadOnly::new_pool(&ro); - Ok(Exec { ro, pool }) - } - - #[cfg(feature = "perf-literal")] - fn build_aho_corasick(&self, parsed: &Parsed) -> Option { - if parsed.exprs.len() != 1 { - return None; - } - let lits = match alternation_literals(&parsed.exprs[0]) { - None => return None, - Some(lits) => lits, - }; - // If we have a small number of literals, then let Teddy handle - // things (see literal/mod.rs). - if lits.len() <= 32 { - return None; - } - Some( - AhoCorasick::builder() - .match_kind(MatchKind::LeftmostFirst) - .build(&lits) - // This should never happen because we'd long exceed the - // compilation limit for regexes first. - .expect("AC automaton too big"), - ) - } -} - -impl<'c> RegularExpression for ExecNoSyncStr<'c> { - type Text = str; - - fn slots_len(&self) -> usize { - self.0.slots_len() - } - - fn next_after_empty(&self, text: &str, i: usize) -> usize { - next_utf8(text.as_bytes(), i) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn shortest_match_at(&self, text: &str, start: usize) -> Option { - self.0.shortest_match_at(text.as_bytes(), start) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn is_match_at(&self, text: &str, start: usize) -> bool { - self.0.is_match_at(text.as_bytes(), start) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_at(&self, text: &str, start: usize) -> Option<(usize, usize)> { - self.0.find_at(text.as_bytes(), start) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn captures_read_at( - &self, - locs: &mut Locations, - text: &str, - start: usize, - ) -> Option<(usize, usize)> { - self.0.captures_read_at(locs, text.as_bytes(), start) - } -} - -impl<'c> RegularExpression for ExecNoSync<'c> { - type Text = [u8]; - - /// Returns the number of capture slots in the regular expression. (There - /// are two slots for every capture group, corresponding to possibly empty - /// start and end locations of the capture.) - fn slots_len(&self) -> usize { - self.ro.nfa.captures.len() * 2 - } - - fn next_after_empty(&self, _text: &[u8], i: usize) -> usize { - i + 1 - } - - /// Returns the end of a match location, possibly occurring before the - /// end location of the correct leftmost-first match. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn shortest_match_at(&self, text: &[u8], start: usize) -> Option { - if !self.is_anchor_end_match(text) { - return None; - } - match self.ro.match_type { - #[cfg(feature = "perf-literal")] - MatchType::Literal(ty) => { - self.find_literals(ty, text, start).map(|(_, e)| e) - } - #[cfg(feature = "perf-dfa")] - MatchType::Dfa | MatchType::DfaMany => { - match self.shortest_dfa(text, start) { - dfa::Result::Match(end) => Some(end), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.shortest_nfa(text, start), - } - } - #[cfg(feature = "perf-dfa")] - MatchType::DfaAnchoredReverse => { - match dfa::Fsm::reverse( - &self.ro.dfa_reverse, - self.cache.value(), - true, - &text[start..], - text.len() - start, - ) { - dfa::Result::Match(_) => Some(text.len()), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.shortest_nfa(text, start), - } - } - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - MatchType::DfaSuffix => { - match self.shortest_dfa_reverse_suffix(text, start) { - dfa::Result::Match(e) => Some(e), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.shortest_nfa(text, start), - } - } - MatchType::Nfa(ty) => self.shortest_nfa_type(ty, text, start), - MatchType::Nothing => None, - } - } - - /// Returns true if and only if the regex matches text. - /// - /// For single regular expressions, this is equivalent to calling - /// shortest_match(...).is_some(). - #[cfg_attr(feature = "perf-inline", inline(always))] - fn is_match_at(&self, text: &[u8], start: usize) -> bool { - if !self.is_anchor_end_match(text) { - return false; - } - // We need to do this dance because shortest_match relies on the NFA - // filling in captures[1], but a RegexSet has no captures. In other - // words, a RegexSet can't (currently) use shortest_match. ---AG - match self.ro.match_type { - #[cfg(feature = "perf-literal")] - MatchType::Literal(ty) => { - self.find_literals(ty, text, start).is_some() - } - #[cfg(feature = "perf-dfa")] - MatchType::Dfa | MatchType::DfaMany => { - match self.shortest_dfa(text, start) { - dfa::Result::Match(_) => true, - dfa::Result::NoMatch(_) => false, - dfa::Result::Quit => self.match_nfa(text, start), - } - } - #[cfg(feature = "perf-dfa")] - MatchType::DfaAnchoredReverse => { - match dfa::Fsm::reverse( - &self.ro.dfa_reverse, - self.cache.value(), - true, - &text[start..], - text.len() - start, - ) { - dfa::Result::Match(_) => true, - dfa::Result::NoMatch(_) => false, - dfa::Result::Quit => self.match_nfa(text, start), - } - } - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - MatchType::DfaSuffix => { - match self.shortest_dfa_reverse_suffix(text, start) { - dfa::Result::Match(_) => true, - dfa::Result::NoMatch(_) => false, - dfa::Result::Quit => self.match_nfa(text, start), - } - } - MatchType::Nfa(ty) => self.match_nfa_type(ty, text, start), - MatchType::Nothing => false, - } - } - - /// Finds the start and end location of the leftmost-first match, starting - /// at the given location. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_at(&self, text: &[u8], start: usize) -> Option<(usize, usize)> { - if !self.is_anchor_end_match(text) { - return None; - } - match self.ro.match_type { - #[cfg(feature = "perf-literal")] - MatchType::Literal(ty) => self.find_literals(ty, text, start), - #[cfg(feature = "perf-dfa")] - MatchType::Dfa => match self.find_dfa_forward(text, start) { - dfa::Result::Match((s, e)) => Some((s, e)), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => { - self.find_nfa(MatchNfaType::Auto, text, start) - } - }, - #[cfg(feature = "perf-dfa")] - MatchType::DfaAnchoredReverse => { - match self.find_dfa_anchored_reverse(text, start) { - dfa::Result::Match((s, e)) => Some((s, e)), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => { - self.find_nfa(MatchNfaType::Auto, text, start) - } - } - } - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - MatchType::DfaSuffix => { - match self.find_dfa_reverse_suffix(text, start) { - dfa::Result::Match((s, e)) => Some((s, e)), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => { - self.find_nfa(MatchNfaType::Auto, text, start) - } - } - } - MatchType::Nfa(ty) => self.find_nfa(ty, text, start), - MatchType::Nothing => None, - #[cfg(feature = "perf-dfa")] - MatchType::DfaMany => { - unreachable!("BUG: RegexSet cannot be used with find") - } - } - } - - /// Finds the start and end location of the leftmost-first match and also - /// fills in all matching capture groups. - /// - /// The number of capture slots given should be equal to the total number - /// of capture slots in the compiled program. - /// - /// Note that the first two slots always correspond to the start and end - /// locations of the overall match. - fn captures_read_at( - &self, - locs: &mut Locations, - text: &[u8], - start: usize, - ) -> Option<(usize, usize)> { - let slots = locs.as_slots(); - for slot in slots.iter_mut() { - *slot = None; - } - // If the caller unnecessarily uses this, then we try to save them - // from themselves. - match slots.len() { - 0 => return self.find_at(text, start), - 2 => { - return self.find_at(text, start).map(|(s, e)| { - slots[0] = Some(s); - slots[1] = Some(e); - (s, e) - }); - } - _ => {} // fallthrough - } - if !self.is_anchor_end_match(text) { - return None; - } - match self.ro.match_type { - #[cfg(feature = "perf-literal")] - MatchType::Literal(ty) => { - self.find_literals(ty, text, start).and_then(|(s, e)| { - self.captures_nfa_type( - MatchNfaType::Auto, - slots, - text, - s, - e, - ) - }) - } - #[cfg(feature = "perf-dfa")] - MatchType::Dfa => { - if self.ro.nfa.is_anchored_start { - self.captures_nfa(slots, text, start) - } else { - match self.find_dfa_forward(text, start) { - dfa::Result::Match((s, e)) => self.captures_nfa_type( - MatchNfaType::Auto, - slots, - text, - s, - e, - ), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => { - self.captures_nfa(slots, text, start) - } - } - } - } - #[cfg(feature = "perf-dfa")] - MatchType::DfaAnchoredReverse => { - match self.find_dfa_anchored_reverse(text, start) { - dfa::Result::Match((s, e)) => self.captures_nfa_type( - MatchNfaType::Auto, - slots, - text, - s, - e, - ), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.captures_nfa(slots, text, start), - } - } - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - MatchType::DfaSuffix => { - match self.find_dfa_reverse_suffix(text, start) { - dfa::Result::Match((s, e)) => self.captures_nfa_type( - MatchNfaType::Auto, - slots, - text, - s, - e, - ), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.captures_nfa(slots, text, start), - } - } - MatchType::Nfa(ty) => { - self.captures_nfa_type(ty, slots, text, start, text.len()) - } - MatchType::Nothing => None, - #[cfg(feature = "perf-dfa")] - MatchType::DfaMany => { - unreachable!("BUG: RegexSet cannot be used with captures") - } - } - } -} - -impl<'c> ExecNoSync<'c> { - /// Finds the leftmost-first match using only literal search. - #[cfg(feature = "perf-literal")] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_literals( - &self, - ty: MatchLiteralType, - text: &[u8], - start: usize, - ) -> Option<(usize, usize)> { - use self::MatchLiteralType::*; - match ty { - Unanchored => { - let lits = &self.ro.nfa.prefixes; - lits.find(&text[start..]).map(|(s, e)| (start + s, start + e)) - } - AnchoredStart => { - let lits = &self.ro.nfa.prefixes; - if start == 0 || !self.ro.nfa.is_anchored_start { - lits.find_start(&text[start..]) - .map(|(s, e)| (start + s, start + e)) - } else { - None - } - } - AnchoredEnd => { - let lits = &self.ro.suffixes; - lits.find_end(&text[start..]) - .map(|(s, e)| (start + s, start + e)) - } - AhoCorasick => self - .ro - .ac - .as_ref() - .unwrap() - .find(&text[start..]) - .map(|m| (start + m.start(), start + m.end())), - } - } - - /// Finds the leftmost-first match (start and end) using only the DFA. - /// - /// If the result returned indicates that the DFA quit, then another - /// matching engine should be used. - #[cfg(feature = "perf-dfa")] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_dfa_forward( - &self, - text: &[u8], - start: usize, - ) -> dfa::Result<(usize, usize)> { - use crate::dfa::Result::*; - let end = match dfa::Fsm::forward( - &self.ro.dfa, - self.cache.value(), - false, - text, - start, - ) { - NoMatch(i) => return NoMatch(i), - Quit => return Quit, - Match(end) if start == end => return Match((start, start)), - Match(end) => end, - }; - // Now run the DFA in reverse to find the start of the match. - match dfa::Fsm::reverse( - &self.ro.dfa_reverse, - self.cache.value(), - false, - &text[start..], - end - start, - ) { - Match(s) => Match((start + s, end)), - NoMatch(i) => NoMatch(i), - Quit => Quit, - } - } - - /// Finds the leftmost-first match (start and end) using only the DFA, - /// but assumes the regex is anchored at the end and therefore starts at - /// the end of the regex and matches in reverse. - /// - /// If the result returned indicates that the DFA quit, then another - /// matching engine should be used. - #[cfg(feature = "perf-dfa")] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_dfa_anchored_reverse( - &self, - text: &[u8], - start: usize, - ) -> dfa::Result<(usize, usize)> { - use crate::dfa::Result::*; - match dfa::Fsm::reverse( - &self.ro.dfa_reverse, - self.cache.value(), - false, - &text[start..], - text.len() - start, - ) { - Match(s) => Match((start + s, text.len())), - NoMatch(i) => NoMatch(i), - Quit => Quit, - } - } - - /// Finds the end of the shortest match using only the DFA. - #[cfg(feature = "perf-dfa")] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn shortest_dfa(&self, text: &[u8], start: usize) -> dfa::Result { - dfa::Fsm::forward(&self.ro.dfa, self.cache.value(), true, text, start) - } - - /// Finds the end of the shortest match using only the DFA by scanning for - /// suffix literals. - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn shortest_dfa_reverse_suffix( - &self, - text: &[u8], - start: usize, - ) -> dfa::Result { - match self.exec_dfa_reverse_suffix(text, start) { - None => self.shortest_dfa(text, start), - Some(r) => r.map(|(_, end)| end), - } - } - - /// Finds the end of the shortest match using only the DFA by scanning for - /// suffix literals. It also reports the start of the match. - /// - /// Note that if None is returned, then the optimization gave up to avoid - /// worst case quadratic behavior. A forward scanning DFA should be tried - /// next. - /// - /// If a match is returned and the full leftmost-first match is desired, - /// then a forward scan starting from the beginning of the match must be - /// done. - /// - /// If the result returned indicates that the DFA quit, then another - /// matching engine should be used. - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn exec_dfa_reverse_suffix( - &self, - text: &[u8], - original_start: usize, - ) -> Option> { - use crate::dfa::Result::*; - - let lcs = self.ro.suffixes.lcs(); - debug_assert!(lcs.len() >= 1); - let mut start = original_start; - let mut end = start; - let mut last_literal = start; - while end <= text.len() { - last_literal += match lcs.find(&text[last_literal..]) { - None => return Some(NoMatch(text.len())), - Some(i) => i, - }; - end = last_literal + lcs.len(); - match dfa::Fsm::reverse( - &self.ro.dfa_reverse, - self.cache.value(), - false, - &text[start..end], - end - start, - ) { - Match(0) | NoMatch(0) => return None, - Match(i) => return Some(Match((start + i, end))), - NoMatch(i) => { - start += i; - last_literal += 1; - continue; - } - Quit => return Some(Quit), - }; - } - Some(NoMatch(text.len())) - } - - /// Finds the leftmost-first match (start and end) using only the DFA - /// by scanning for suffix literals. - /// - /// If the result returned indicates that the DFA quit, then another - /// matching engine should be used. - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_dfa_reverse_suffix( - &self, - text: &[u8], - start: usize, - ) -> dfa::Result<(usize, usize)> { - use crate::dfa::Result::*; - - let match_start = match self.exec_dfa_reverse_suffix(text, start) { - None => return self.find_dfa_forward(text, start), - Some(Match((start, _))) => start, - Some(r) => return r, - }; - // At this point, we've found a match. The only way to quit now - // without a match is if the DFA gives up (seems unlikely). - // - // Now run the DFA forwards to find the proper end of the match. - // (The suffix literal match can only indicate the earliest - // possible end location, which may appear before the end of the - // leftmost-first match.) - match dfa::Fsm::forward( - &self.ro.dfa, - self.cache.value(), - false, - text, - match_start, - ) { - NoMatch(_) => panic!("BUG: reverse match implies forward match"), - Quit => Quit, - Match(e) => Match((match_start, e)), - } - } - - /// Executes the NFA engine to return whether there is a match or not. - /// - /// Ideally, we could use shortest_nfa(...).is_some() and get the same - /// performance characteristics, but regex sets don't have captures, which - /// shortest_nfa depends on. - #[cfg(feature = "perf-dfa")] - fn match_nfa(&self, text: &[u8], start: usize) -> bool { - self.match_nfa_type(MatchNfaType::Auto, text, start) - } - - /// Like match_nfa, but allows specification of the type of NFA engine. - fn match_nfa_type( - &self, - ty: MatchNfaType, - text: &[u8], - start: usize, - ) -> bool { - self.exec_nfa( - ty, - &mut [false], - &mut [], - true, - false, - text, - start, - text.len(), - ) - } - - /// Finds the shortest match using an NFA. - #[cfg(feature = "perf-dfa")] - fn shortest_nfa(&self, text: &[u8], start: usize) -> Option { - self.shortest_nfa_type(MatchNfaType::Auto, text, start) - } - - /// Like shortest_nfa, but allows specification of the type of NFA engine. - fn shortest_nfa_type( - &self, - ty: MatchNfaType, - text: &[u8], - start: usize, - ) -> Option { - let mut slots = [None, None]; - if self.exec_nfa( - ty, - &mut [false], - &mut slots, - true, - true, - text, - start, - text.len(), - ) { - slots[1] - } else { - None - } - } - - /// Like find, but executes an NFA engine. - fn find_nfa( - &self, - ty: MatchNfaType, - text: &[u8], - start: usize, - ) -> Option<(usize, usize)> { - let mut slots = [None, None]; - if self.exec_nfa( - ty, - &mut [false], - &mut slots, - false, - false, - text, - start, - text.len(), - ) { - match (slots[0], slots[1]) { - (Some(s), Some(e)) => Some((s, e)), - _ => None, - } - } else { - None - } - } - - /// Like find_nfa, but fills in captures. - /// - /// `slots` should have length equal to `2 * nfa.captures.len()`. - #[cfg(feature = "perf-dfa")] - fn captures_nfa( - &self, - slots: &mut [Slot], - text: &[u8], - start: usize, - ) -> Option<(usize, usize)> { - self.captures_nfa_type( - MatchNfaType::Auto, - slots, - text, - start, - text.len(), - ) - } - - /// Like captures_nfa, but allows specification of type of NFA engine. - fn captures_nfa_type( - &self, - ty: MatchNfaType, - slots: &mut [Slot], - text: &[u8], - start: usize, - end: usize, - ) -> Option<(usize, usize)> { - if self.exec_nfa( - ty, - &mut [false], - slots, - false, - false, - text, - start, - end, - ) { - match (slots[0], slots[1]) { - (Some(s), Some(e)) => Some((s, e)), - _ => None, - } - } else { - None - } - } - - fn exec_nfa( - &self, - mut ty: MatchNfaType, - matches: &mut [bool], - slots: &mut [Slot], - quit_after_match: bool, - quit_after_match_with_pos: bool, - text: &[u8], - start: usize, - end: usize, - ) -> bool { - use self::MatchNfaType::*; - if let Auto = ty { - if backtrack::should_exec(self.ro.nfa.len(), text.len()) { - ty = Backtrack; - } else { - ty = PikeVM; - } - } - // The backtracker can't return the shortest match position as it is - // implemented today. So if someone calls `shortest_match` and we need - // to run an NFA, then use the PikeVM. - if quit_after_match_with_pos || ty == PikeVM { - self.exec_pikevm( - matches, - slots, - quit_after_match, - text, - start, - end, - ) - } else { - self.exec_backtrack(matches, slots, text, start, end) - } - } - - /// Always run the NFA algorithm. - fn exec_pikevm( - &self, - matches: &mut [bool], - slots: &mut [Slot], - quit_after_match: bool, - text: &[u8], - start: usize, - end: usize, - ) -> bool { - if self.ro.nfa.uses_bytes() { - pikevm::Fsm::exec( - &self.ro.nfa, - self.cache.value(), - matches, - slots, - quit_after_match, - ByteInput::new(text, self.ro.nfa.only_utf8), - start, - end, - ) - } else { - pikevm::Fsm::exec( - &self.ro.nfa, - self.cache.value(), - matches, - slots, - quit_after_match, - CharInput::new(text), - start, - end, - ) - } - } - - /// Always runs the NFA using bounded backtracking. - fn exec_backtrack( - &self, - matches: &mut [bool], - slots: &mut [Slot], - text: &[u8], - start: usize, - end: usize, - ) -> bool { - if self.ro.nfa.uses_bytes() { - backtrack::Bounded::exec( - &self.ro.nfa, - self.cache.value(), - matches, - slots, - ByteInput::new(text, self.ro.nfa.only_utf8), - start, - end, - ) - } else { - backtrack::Bounded::exec( - &self.ro.nfa, - self.cache.value(), - matches, - slots, - CharInput::new(text), - start, - end, - ) - } - } - - /// Finds which regular expressions match the given text. - /// - /// `matches` should have length equal to the number of regexes being - /// searched. - /// - /// This is only useful when one wants to know which regexes in a set - /// match some text. - pub fn many_matches_at( - &self, - matches: &mut [bool], - text: &[u8], - start: usize, - ) -> bool { - use self::MatchType::*; - if !self.is_anchor_end_match(text) { - return false; - } - match self.ro.match_type { - #[cfg(feature = "perf-literal")] - Literal(ty) => { - debug_assert_eq!(matches.len(), 1); - matches[0] = self.find_literals(ty, text, start).is_some(); - matches[0] - } - #[cfg(feature = "perf-dfa")] - Dfa | DfaAnchoredReverse | DfaMany => { - match dfa::Fsm::forward_many( - &self.ro.dfa, - self.cache.value(), - matches, - text, - start, - ) { - dfa::Result::Match(_) => true, - dfa::Result::NoMatch(_) => false, - dfa::Result::Quit => self.exec_nfa( - MatchNfaType::Auto, - matches, - &mut [], - false, - false, - text, - start, - text.len(), - ), - } - } - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - DfaSuffix => { - match dfa::Fsm::forward_many( - &self.ro.dfa, - self.cache.value(), - matches, - text, - start, - ) { - dfa::Result::Match(_) => true, - dfa::Result::NoMatch(_) => false, - dfa::Result::Quit => self.exec_nfa( - MatchNfaType::Auto, - matches, - &mut [], - false, - false, - text, - start, - text.len(), - ), - } - } - Nfa(ty) => self.exec_nfa( - ty, - matches, - &mut [], - false, - false, - text, - start, - text.len(), - ), - Nothing => false, - } - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn is_anchor_end_match(&self, text: &[u8]) -> bool { - #[cfg(not(feature = "perf-literal"))] - fn imp(_: &ExecReadOnly, _: &[u8]) -> bool { - true - } - - #[cfg(feature = "perf-literal")] - fn imp(ro: &ExecReadOnly, text: &[u8]) -> bool { - // Only do this check if the haystack is big (>1MB). - if text.len() > (1 << 20) && ro.nfa.is_anchored_end { - let lcs = ro.suffixes.lcs(); - if lcs.len() >= 1 && !lcs.is_suffix(text) { - return false; - } - } - true - } - - imp(&self.ro, text) - } - - pub fn capture_name_idx(&self) -> &Arc> { - &self.ro.nfa.capture_name_idx - } -} - -impl<'c> ExecNoSyncStr<'c> { - pub fn capture_name_idx(&self) -> &Arc> { - self.0.capture_name_idx() - } -} - -impl Exec { - /// Get a searcher that isn't Sync. - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn searcher(&self) -> ExecNoSync<'_> { - ExecNoSync { - ro: &self.ro, // a clone is too expensive here! (and not needed) - cache: self.pool.get(), - } - } - - /// Get a searcher that isn't Sync and can match on &str. - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn searcher_str(&self) -> ExecNoSyncStr<'_> { - ExecNoSyncStr(self.searcher()) - } - - /// Build a Regex from this executor. - pub fn into_regex(self) -> re_unicode::Regex { - re_unicode::Regex::from(self) - } - - /// Build a RegexSet from this executor. - pub fn into_regex_set(self) -> re_set::unicode::RegexSet { - re_set::unicode::RegexSet::from(self) - } - - /// Build a Regex from this executor that can match arbitrary bytes. - pub fn into_byte_regex(self) -> re_bytes::Regex { - re_bytes::Regex::from(self) - } - - /// Build a RegexSet from this executor that can match arbitrary bytes. - pub fn into_byte_regex_set(self) -> re_set::bytes::RegexSet { - re_set::bytes::RegexSet::from(self) - } - - /// The original regular expressions given by the caller that were - /// compiled. - pub fn regex_strings(&self) -> &[String] { - &self.ro.res - } - - /// Return a slice of capture names. - /// - /// Any capture that isn't named is None. - pub fn capture_names(&self) -> &[Option] { - &self.ro.nfa.captures - } - - /// Return a reference to named groups mapping (from group name to - /// group position). - pub fn capture_name_idx(&self) -> &Arc> { - &self.ro.nfa.capture_name_idx - } - - /// If the number of capture groups in every match is always the same, then - /// return that number. Otherwise return `None`. - pub fn static_captures_len(&self) -> Option { - self.ro.nfa.static_captures_len - } -} - -impl Clone for Exec { - fn clone(&self) -> Exec { - let pool = ExecReadOnly::new_pool(&self.ro); - Exec { ro: self.ro.clone(), pool } - } -} - -impl ExecReadOnly { - fn choose_match_type(&self, hint: Option) -> MatchType { - if let Some(MatchType::Nfa(_)) = hint { - return hint.unwrap(); - } - // If the NFA is empty, then we'll never match anything. - if self.nfa.insts.is_empty() { - return MatchType::Nothing; - } - if let Some(literalty) = self.choose_literal_match_type() { - return literalty; - } - if let Some(dfaty) = self.choose_dfa_match_type() { - return dfaty; - } - // We're so totally hosed. - MatchType::Nfa(MatchNfaType::Auto) - } - - /// If a plain literal scan can be used, then a corresponding literal - /// search type is returned. - fn choose_literal_match_type(&self) -> Option { - #[cfg(not(feature = "perf-literal"))] - fn imp(_: &ExecReadOnly) -> Option { - None - } - - #[cfg(feature = "perf-literal")] - fn imp(ro: &ExecReadOnly) -> Option { - // If our set of prefixes is complete, then we can use it to find - // a match in lieu of a regex engine. This doesn't quite work well - // in the presence of multiple regexes, so only do it when there's - // one. - // - // TODO(burntsushi): Also, don't try to match literals if the regex - // is partially anchored. We could technically do it, but we'd need - // to create two sets of literals: all of them and then the subset - // that aren't anchored. We would then only search for all of them - // when at the beginning of the input and use the subset in all - // other cases. - if ro.res.len() != 1 { - return None; - } - if ro.ac.is_some() { - return Some(MatchType::Literal( - MatchLiteralType::AhoCorasick, - )); - } - if ro.nfa.prefixes.complete() { - return if ro.nfa.is_anchored_start { - Some(MatchType::Literal(MatchLiteralType::AnchoredStart)) - } else { - Some(MatchType::Literal(MatchLiteralType::Unanchored)) - }; - } - if ro.suffixes.complete() { - return if ro.nfa.is_anchored_end { - Some(MatchType::Literal(MatchLiteralType::AnchoredEnd)) - } else { - // This case shouldn't happen. When the regex isn't - // anchored, then complete prefixes should imply complete - // suffixes. - Some(MatchType::Literal(MatchLiteralType::Unanchored)) - }; - } - None - } - - imp(self) - } - - /// If a DFA scan can be used, then choose the appropriate DFA strategy. - fn choose_dfa_match_type(&self) -> Option { - #[cfg(not(feature = "perf-dfa"))] - fn imp(_: &ExecReadOnly) -> Option { - None - } - - #[cfg(feature = "perf-dfa")] - fn imp(ro: &ExecReadOnly) -> Option { - if !dfa::can_exec(&ro.dfa) { - return None; - } - // Regex sets require a slightly specialized path. - if ro.res.len() >= 2 { - return Some(MatchType::DfaMany); - } - // If the regex is anchored at the end but not the start, then - // just match in reverse from the end of the haystack. - if !ro.nfa.is_anchored_start && ro.nfa.is_anchored_end { - return Some(MatchType::DfaAnchoredReverse); - } - #[cfg(feature = "perf-literal")] - { - // If there's a longish suffix literal, then it might be faster - // to look for that first. - if ro.should_suffix_scan() { - return Some(MatchType::DfaSuffix); - } - } - // Fall back to your garden variety forward searching lazy DFA. - Some(MatchType::Dfa) - } - - imp(self) - } - - /// Returns true if the program is amenable to suffix scanning. - /// - /// When this is true, as a heuristic, we assume it is OK to quickly scan - /// for suffix literals and then do a *reverse* DFA match from any matches - /// produced by the literal scan. (And then followed by a forward DFA - /// search, since the previously found suffix literal maybe not actually be - /// the end of a match.) - /// - /// This is a bit of a specialized optimization, but can result in pretty - /// big performance wins if 1) there are no prefix literals and 2) the - /// suffix literals are pretty rare in the text. (1) is obviously easy to - /// account for but (2) is harder. As a proxy, we assume that longer - /// strings are generally rarer, so we only enable this optimization when - /// we have a meaty suffix. - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - fn should_suffix_scan(&self) -> bool { - if self.suffixes.is_empty() { - return false; - } - let lcs_len = self.suffixes.lcs().char_len(); - lcs_len >= 3 && lcs_len > self.dfa.prefixes.lcp().char_len() - } - - fn new_pool(ro: &Arc) -> Box> { - let ro = ro.clone(); - Box::new(Pool::new(Box::new(move || { - AssertUnwindSafe(RefCell::new(ProgramCacheInner::new(&ro))) - }))) - } -} - -#[derive(Clone, Copy, Debug)] -enum MatchType { - /// A single or multiple literal search. This is only used when the regex - /// can be decomposed into a literal search. - #[cfg(feature = "perf-literal")] - Literal(MatchLiteralType), - /// A normal DFA search. - #[cfg(feature = "perf-dfa")] - Dfa, - /// A reverse DFA search starting from the end of a haystack. - #[cfg(feature = "perf-dfa")] - DfaAnchoredReverse, - /// A reverse DFA search with suffix literal scanning. - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - DfaSuffix, - /// Use the DFA on two or more regular expressions. - #[cfg(feature = "perf-dfa")] - DfaMany, - /// An NFA variant. - Nfa(MatchNfaType), - /// No match is ever possible, so don't ever try to search. - Nothing, -} - -#[derive(Clone, Copy, Debug)] -#[cfg(feature = "perf-literal")] -enum MatchLiteralType { - /// Match literals anywhere in text. - Unanchored, - /// Match literals only at the start of text. - AnchoredStart, - /// Match literals only at the end of text. - AnchoredEnd, - /// Use an Aho-Corasick automaton. This requires `ac` to be Some on - /// ExecReadOnly. - AhoCorasick, -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum MatchNfaType { - /// Choose between Backtrack and PikeVM. - Auto, - /// NFA bounded backtracking. - /// - /// (This is only set by tests, since it never makes sense to always want - /// backtracking.) - Backtrack, - /// The Pike VM. - /// - /// (This is only set by tests, since it never makes sense to always want - /// the Pike VM.) - PikeVM, -} - -/// `ProgramCache` maintains reusable allocations for each matching engine -/// available to a particular program. -/// -/// We declare this as unwind safe since it's a cache that's only used for -/// performance purposes. If a panic occurs, it is (or should be) always safe -/// to continue using the same regex object. -pub type ProgramCache = AssertUnwindSafe>; - -#[derive(Debug)] -pub struct ProgramCacheInner { - pub pikevm: pikevm::Cache, - pub backtrack: backtrack::Cache, - #[cfg(feature = "perf-dfa")] - pub dfa: dfa::Cache, - #[cfg(feature = "perf-dfa")] - pub dfa_reverse: dfa::Cache, -} - -impl ProgramCacheInner { - fn new(ro: &ExecReadOnly) -> Self { - ProgramCacheInner { - pikevm: pikevm::Cache::new(&ro.nfa), - backtrack: backtrack::Cache::new(&ro.nfa), - #[cfg(feature = "perf-dfa")] - dfa: dfa::Cache::new(&ro.dfa), - #[cfg(feature = "perf-dfa")] - dfa_reverse: dfa::Cache::new(&ro.dfa_reverse), - } - } -} - -/// Alternation literals checks if the given HIR is a simple alternation of -/// literals, and if so, returns them. Otherwise, this returns None. -#[cfg(feature = "perf-literal")] -fn alternation_literals(expr: &Hir) -> Option>> { - use regex_syntax::hir::{HirKind, Literal}; - - // This is pretty hacky, but basically, if `is_alternation_literal` is - // true, then we can make several assumptions about the structure of our - // HIR. This is what justifies the `unreachable!` statements below. - // - // This code should be refactored once we overhaul this crate's - // optimization pipeline, because this is a terribly inflexible way to go - // about things. - - if !expr.properties().is_alternation_literal() { - return None; - } - let alts = match *expr.kind() { - HirKind::Alternation(ref alts) => alts, - _ => return None, // one literal isn't worth it - }; - - let mut lits = vec![]; - for alt in alts { - let mut lit = vec![]; - match *alt.kind() { - HirKind::Literal(Literal(ref bytes)) => { - lit.extend_from_slice(bytes) - } - HirKind::Concat(ref exprs) => { - for e in exprs { - match *e.kind() { - HirKind::Literal(Literal(ref bytes)) => { - lit.extend_from_slice(bytes); - } - _ => unreachable!("expected literal, got {:?}", e), - } - } - } - _ => unreachable!("expected literal or concat, got {:?}", alt), - } - lits.push(lit); - } - Some(lits) -} - -#[cfg(not(feature = "perf-literal"))] -fn literal_analysis(_: &Hir) -> (literal::Seq, literal::Seq) { - (literal::Seq::infinite(), literal::Seq::infinite()) -} - -#[cfg(feature = "perf-literal")] -fn literal_analysis(expr: &Hir) -> (literal::Seq, literal::Seq) { - const ATTEMPTS: [(usize, usize); 3] = [(5, 50), (4, 30), (3, 20)]; - - let mut prefixes = literal::Extractor::new() - .kind(literal::ExtractKind::Prefix) - .extract(expr); - for (keep, limit) in ATTEMPTS { - let len = match prefixes.len() { - None => break, - Some(len) => len, - }; - if len <= limit { - break; - } - prefixes.keep_first_bytes(keep); - prefixes.minimize_by_preference(); - } - - let mut suffixes = literal::Extractor::new() - .kind(literal::ExtractKind::Suffix) - .extract(expr); - for (keep, limit) in ATTEMPTS { - let len = match suffixes.len() { - None => break, - Some(len) => len, - }; - if len <= limit { - break; - } - suffixes.keep_last_bytes(keep); - suffixes.minimize_by_preference(); - } - - (prefixes, suffixes) -} - -#[cfg(test)] -mod test { - #[test] - fn uppercut_s_backtracking_bytes_default_bytes_mismatch() { - use crate::internal::ExecBuilder; - - let backtrack_bytes_re = ExecBuilder::new("^S") - .bounded_backtracking() - .only_utf8(false) - .build() - .map(|exec| exec.into_byte_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let default_bytes_re = ExecBuilder::new("^S") - .only_utf8(false) - .build() - .map(|exec| exec.into_byte_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let input = vec![83, 83]; - - let s1 = backtrack_bytes_re.split(&input); - let s2 = default_bytes_re.split(&input); - for (chunk1, chunk2) in s1.zip(s2) { - assert_eq!(chunk1, chunk2); - } - } - - #[test] - fn unicode_lit_star_backtracking_utf8bytes_default_utf8bytes_mismatch() { - use crate::internal::ExecBuilder; - - let backtrack_bytes_re = ExecBuilder::new(r"^(?u:\*)") - .bounded_backtracking() - .bytes(true) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let default_bytes_re = ExecBuilder::new(r"^(?u:\*)") - .bytes(true) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let input = "**"; - - let s1 = backtrack_bytes_re.split(input); - let s2 = default_bytes_re.split(input); - for (chunk1, chunk2) in s1.zip(s2) { - assert_eq!(chunk1, chunk2); - } - } -} diff --git a/vendor/regex/src/expand.rs b/vendor/regex/src/expand.rs deleted file mode 100644 index 98fafc9..0000000 --- a/vendor/regex/src/expand.rs +++ /dev/null @@ -1,247 +0,0 @@ -use std::str; - -use crate::find_byte::find_byte; - -use crate::re_bytes; -use crate::re_unicode; - -pub fn expand_str( - caps: &re_unicode::Captures<'_>, - mut replacement: &str, - dst: &mut String, -) { - while !replacement.is_empty() { - match find_byte(b'$', replacement.as_bytes()) { - None => break, - Some(i) => { - dst.push_str(&replacement[..i]); - replacement = &replacement[i..]; - } - } - if replacement.as_bytes().get(1).map_or(false, |&b| b == b'$') { - dst.push_str("$"); - replacement = &replacement[2..]; - continue; - } - debug_assert!(!replacement.is_empty()); - let cap_ref = match find_cap_ref(replacement.as_bytes()) { - Some(cap_ref) => cap_ref, - None => { - dst.push_str("$"); - replacement = &replacement[1..]; - continue; - } - }; - replacement = &replacement[cap_ref.end..]; - match cap_ref.cap { - Ref::Number(i) => { - dst.push_str(caps.get(i).map(|m| m.as_str()).unwrap_or("")); - } - Ref::Named(name) => { - dst.push_str( - caps.name(name).map(|m| m.as_str()).unwrap_or(""), - ); - } - } - } - dst.push_str(replacement); -} - -pub fn expand_bytes( - caps: &re_bytes::Captures<'_>, - mut replacement: &[u8], - dst: &mut Vec, -) { - while !replacement.is_empty() { - match find_byte(b'$', replacement) { - None => break, - Some(i) => { - dst.extend(&replacement[..i]); - replacement = &replacement[i..]; - } - } - if replacement.get(1).map_or(false, |&b| b == b'$') { - dst.push(b'$'); - replacement = &replacement[2..]; - continue; - } - debug_assert!(!replacement.is_empty()); - let cap_ref = match find_cap_ref(replacement) { - Some(cap_ref) => cap_ref, - None => { - dst.push(b'$'); - replacement = &replacement[1..]; - continue; - } - }; - replacement = &replacement[cap_ref.end..]; - match cap_ref.cap { - Ref::Number(i) => { - dst.extend(caps.get(i).map(|m| m.as_bytes()).unwrap_or(b"")); - } - Ref::Named(name) => { - dst.extend( - caps.name(name).map(|m| m.as_bytes()).unwrap_or(b""), - ); - } - } - } - dst.extend(replacement); -} - -/// `CaptureRef` represents a reference to a capture group inside some text. -/// The reference is either a capture group name or a number. -/// -/// It is also tagged with the position in the text following the -/// capture reference. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -struct CaptureRef<'a> { - cap: Ref<'a>, - end: usize, -} - -/// A reference to a capture group in some text. -/// -/// e.g., `$2`, `$foo`, `${foo}`. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum Ref<'a> { - Named(&'a str), - Number(usize), -} - -impl<'a> From<&'a str> for Ref<'a> { - fn from(x: &'a str) -> Ref<'a> { - Ref::Named(x) - } -} - -impl From for Ref<'static> { - fn from(x: usize) -> Ref<'static> { - Ref::Number(x) - } -} - -/// Parses a possible reference to a capture group name in the given text, -/// starting at the beginning of `replacement`. -/// -/// If no such valid reference could be found, None is returned. -fn find_cap_ref(replacement: &[u8]) -> Option> { - let mut i = 0; - let rep: &[u8] = replacement; - if rep.len() <= 1 || rep[0] != b'$' { - return None; - } - i += 1; - if rep[i] == b'{' { - return find_cap_ref_braced(rep, i + 1); - } - let mut cap_end = i; - while rep.get(cap_end).copied().map_or(false, is_valid_cap_letter) { - cap_end += 1; - } - if cap_end == i { - return None; - } - // We just verified that the range 0..cap_end is valid ASCII, so it must - // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8 - // check via an unchecked conversion or by parsing the number straight from - // &[u8]. - let cap = - str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name"); - Some(CaptureRef { - cap: match cap.parse::() { - Ok(i) => Ref::Number(i as usize), - Err(_) => Ref::Named(cap), - }, - end: cap_end, - }) -} - -fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option> { - let start = i; - while rep.get(i).map_or(false, |&b| b != b'}') { - i += 1; - } - if !rep.get(i).map_or(false, |&b| b == b'}') { - return None; - } - // When looking at braced names, we don't put any restrictions on the name, - // so it's possible it could be invalid UTF-8. But a capture group name - // can never be invalid UTF-8, so if we have invalid UTF-8, then we can - // safely return None. - let cap = match str::from_utf8(&rep[start..i]) { - Err(_) => return None, - Ok(cap) => cap, - }; - Some(CaptureRef { - cap: match cap.parse::() { - Ok(i) => Ref::Number(i as usize), - Err(_) => Ref::Named(cap), - }, - end: i + 1, - }) -} - -/// Returns true if and only if the given byte is allowed in a capture name -/// written in non-brace form. -fn is_valid_cap_letter(b: u8) -> bool { - match b { - b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true, - _ => false, - } -} - -#[cfg(test)] -mod tests { - use super::{find_cap_ref, CaptureRef}; - - macro_rules! find { - ($name:ident, $text:expr) => { - #[test] - fn $name() { - assert_eq!(None, find_cap_ref($text.as_bytes())); - } - }; - ($name:ident, $text:expr, $capref:expr) => { - #[test] - fn $name() { - assert_eq!(Some($capref), find_cap_ref($text.as_bytes())); - } - }; - } - - macro_rules! c { - ($name_or_number:expr, $pos:expr) => { - CaptureRef { cap: $name_or_number.into(), end: $pos } - }; - } - - find!(find_cap_ref1, "$foo", c!("foo", 4)); - find!(find_cap_ref2, "${foo}", c!("foo", 6)); - find!(find_cap_ref3, "$0", c!(0, 2)); - find!(find_cap_ref4, "$5", c!(5, 2)); - find!(find_cap_ref5, "$10", c!(10, 3)); - // See https://github.com/rust-lang/regex/pull/585 - // for more on characters following numbers - find!(find_cap_ref6, "$42a", c!("42a", 4)); - find!(find_cap_ref7, "${42}a", c!(42, 5)); - find!(find_cap_ref8, "${42"); - find!(find_cap_ref9, "${42 "); - find!(find_cap_ref10, " $0 "); - find!(find_cap_ref11, "$"); - find!(find_cap_ref12, " "); - find!(find_cap_ref13, ""); - find!(find_cap_ref14, "$1-$2", c!(1, 2)); - find!(find_cap_ref15, "$1_$2", c!("1_", 3)); - find!(find_cap_ref16, "$x-$y", c!("x", 2)); - find!(find_cap_ref17, "$x_$y", c!("x_", 3)); - find!(find_cap_ref18, "${#}", c!("#", 4)); - find!(find_cap_ref19, "${Z[}", c!("Z[", 5)); - find!(find_cap_ref20, "${¾}", c!("¾", 5)); - find!(find_cap_ref21, "${¾a}", c!("¾a", 6)); - find!(find_cap_ref22, "${a¾}", c!("a¾", 6)); - find!(find_cap_ref23, "${☃}", c!("☃", 6)); - find!(find_cap_ref24, "${a☃}", c!("a☃", 7)); - find!(find_cap_ref25, "${☃a}", c!("☃a", 7)); - find!(find_cap_ref26, "${名字}", c!("名字", 9)); -} diff --git a/vendor/regex/src/find_byte.rs b/vendor/regex/src/find_byte.rs index e95f72a..9c6915d 100644 --- a/vendor/regex/src/find_byte.rs +++ b/vendor/regex/src/find_byte.rs @@ -2,7 +2,7 @@ /// /// If the perf-literal feature is enabled, then this uses the super optimized /// memchr crate. Otherwise, it uses the naive byte-at-a-time implementation. -pub fn find_byte(needle: u8, haystack: &[u8]) -> Option { +pub(crate) fn find_byte(needle: u8, haystack: &[u8]) -> Option { #[cfg(not(feature = "perf-literal"))] fn imp(needle: u8, haystack: &[u8]) -> Option { haystack.iter().position(|&b| b == needle) @@ -10,8 +10,7 @@ pub fn find_byte(needle: u8, haystack: &[u8]) -> Option { #[cfg(feature = "perf-literal")] fn imp(needle: u8, haystack: &[u8]) -> Option { - use memchr::memchr; - memchr(needle, haystack) + memchr::memchr(needle, haystack) } imp(needle, haystack) diff --git a/vendor/regex/src/freqs.rs b/vendor/regex/src/freqs.rs deleted file mode 100644 index fcffa95..0000000 --- a/vendor/regex/src/freqs.rs +++ /dev/null @@ -1,261 +0,0 @@ -// NOTE: The following code was generated by "scripts/frequencies.py", do not -// edit directly - -pub const BYTE_FREQUENCIES: [u8; 256] = [ - 55, // '\x00' - 52, // '\x01' - 51, // '\x02' - 50, // '\x03' - 49, // '\x04' - 48, // '\x05' - 47, // '\x06' - 46, // '\x07' - 45, // '\x08' - 103, // '\t' - 242, // '\n' - 66, // '\x0b' - 67, // '\x0c' - 229, // '\r' - 44, // '\x0e' - 43, // '\x0f' - 42, // '\x10' - 41, // '\x11' - 40, // '\x12' - 39, // '\x13' - 38, // '\x14' - 37, // '\x15' - 36, // '\x16' - 35, // '\x17' - 34, // '\x18' - 33, // '\x19' - 56, // '\x1a' - 32, // '\x1b' - 31, // '\x1c' - 30, // '\x1d' - 29, // '\x1e' - 28, // '\x1f' - 255, // ' ' - 148, // '!' - 164, // '"' - 149, // '#' - 136, // '$' - 160, // '%' - 155, // '&' - 173, // "'" - 221, // '(' - 222, // ')' - 134, // '*' - 122, // '+' - 232, // ',' - 202, // '-' - 215, // '.' - 224, // '/' - 208, // '0' - 220, // '1' - 204, // '2' - 187, // '3' - 183, // '4' - 179, // '5' - 177, // '6' - 168, // '7' - 178, // '8' - 200, // '9' - 226, // ':' - 195, // ';' - 154, // '<' - 184, // '=' - 174, // '>' - 126, // '?' - 120, // '@' - 191, // 'A' - 157, // 'B' - 194, // 'C' - 170, // 'D' - 189, // 'E' - 162, // 'F' - 161, // 'G' - 150, // 'H' - 193, // 'I' - 142, // 'J' - 137, // 'K' - 171, // 'L' - 176, // 'M' - 185, // 'N' - 167, // 'O' - 186, // 'P' - 112, // 'Q' - 175, // 'R' - 192, // 'S' - 188, // 'T' - 156, // 'U' - 140, // 'V' - 143, // 'W' - 123, // 'X' - 133, // 'Y' - 128, // 'Z' - 147, // '[' - 138, // '\\' - 146, // ']' - 114, // '^' - 223, // '_' - 151, // '`' - 249, // 'a' - 216, // 'b' - 238, // 'c' - 236, // 'd' - 253, // 'e' - 227, // 'f' - 218, // 'g' - 230, // 'h' - 247, // 'i' - 135, // 'j' - 180, // 'k' - 241, // 'l' - 233, // 'm' - 246, // 'n' - 244, // 'o' - 231, // 'p' - 139, // 'q' - 245, // 'r' - 243, // 's' - 251, // 't' - 235, // 'u' - 201, // 'v' - 196, // 'w' - 240, // 'x' - 214, // 'y' - 152, // 'z' - 182, // '{' - 205, // '|' - 181, // '}' - 127, // '~' - 27, // '\x7f' - 212, // '\x80' - 211, // '\x81' - 210, // '\x82' - 213, // '\x83' - 228, // '\x84' - 197, // '\x85' - 169, // '\x86' - 159, // '\x87' - 131, // '\x88' - 172, // '\x89' - 105, // '\x8a' - 80, // '\x8b' - 98, // '\x8c' - 96, // '\x8d' - 97, // '\x8e' - 81, // '\x8f' - 207, // '\x90' - 145, // '\x91' - 116, // '\x92' - 115, // '\x93' - 144, // '\x94' - 130, // '\x95' - 153, // '\x96' - 121, // '\x97' - 107, // '\x98' - 132, // '\x99' - 109, // '\x9a' - 110, // '\x9b' - 124, // '\x9c' - 111, // '\x9d' - 82, // '\x9e' - 108, // '\x9f' - 118, // '\xa0' - 141, // '¡' - 113, // '¢' - 129, // '£' - 119, // '¤' - 125, // '¥' - 165, // '¦' - 117, // '§' - 92, // '¨' - 106, // '©' - 83, // 'ª' - 72, // '«' - 99, // '¬' - 93, // '\xad' - 65, // '®' - 79, // '¯' - 166, // '°' - 237, // '±' - 163, // '²' - 199, // '³' - 190, // '´' - 225, // 'µ' - 209, // '¶' - 203, // '·' - 198, // '¸' - 217, // '¹' - 219, // 'º' - 206, // '»' - 234, // '¼' - 248, // '½' - 158, // '¾' - 239, // '¿' - 255, // 'À' - 255, // 'Á' - 255, // 'Â' - 255, // 'Ã' - 255, // 'Ä' - 255, // 'Å' - 255, // 'Æ' - 255, // 'Ç' - 255, // 'È' - 255, // 'É' - 255, // 'Ê' - 255, // 'Ë' - 255, // 'Ì' - 255, // 'Í' - 255, // 'Î' - 255, // 'Ï' - 255, // 'Ð' - 255, // 'Ñ' - 255, // 'Ò' - 255, // 'Ó' - 255, // 'Ô' - 255, // 'Õ' - 255, // 'Ö' - 255, // '×' - 255, // 'Ø' - 255, // 'Ù' - 255, // 'Ú' - 255, // 'Û' - 255, // 'Ü' - 255, // 'Ý' - 255, // 'Þ' - 255, // 'ß' - 255, // 'à' - 255, // 'á' - 255, // 'â' - 255, // 'ã' - 255, // 'ä' - 255, // 'å' - 255, // 'æ' - 255, // 'ç' - 255, // 'è' - 255, // 'é' - 255, // 'ê' - 255, // 'ë' - 255, // 'ì' - 255, // 'í' - 255, // 'î' - 255, // 'ï' - 255, // 'ð' - 255, // 'ñ' - 255, // 'ò' - 255, // 'ó' - 255, // 'ô' - 255, // 'õ' - 255, // 'ö' - 255, // '÷' - 255, // 'ø' - 255, // 'ù' - 255, // 'ú' - 255, // 'û' - 255, // 'ü' - 255, // 'ý' - 255, // 'þ' - 255, // 'ÿ' -]; diff --git a/vendor/regex/src/input.rs b/vendor/regex/src/input.rs deleted file mode 100644 index df6c3e0..0000000 --- a/vendor/regex/src/input.rs +++ /dev/null @@ -1,432 +0,0 @@ -use std::char; -use std::cmp::Ordering; -use std::fmt; -use std::ops; -use std::u32; - -use crate::literal::LiteralSearcher; -use crate::prog::InstEmptyLook; -use crate::utf8::{decode_last_utf8, decode_utf8}; - -/// Represents a location in the input. -#[derive(Clone, Copy, Debug)] -pub struct InputAt { - pos: usize, - c: Char, - byte: Option, - len: usize, -} - -impl InputAt { - /// Returns true iff this position is at the beginning of the input. - pub fn is_start(&self) -> bool { - self.pos == 0 - } - - /// Returns true iff this position is past the end of the input. - pub fn is_end(&self) -> bool { - self.c.is_none() && self.byte.is_none() - } - - /// Returns the character at this position. - /// - /// If this position is just before or after the input, then an absent - /// character is returned. - pub fn char(&self) -> Char { - self.c - } - - /// Returns the byte at this position. - pub fn byte(&self) -> Option { - self.byte - } - - /// Returns the UTF-8 width of the character at this position. - pub fn len(&self) -> usize { - self.len - } - - /// Returns whether the UTF-8 width of the character at this position - /// is zero. - pub fn is_empty(&self) -> bool { - self.len == 0 - } - - /// Returns the byte offset of this position. - pub fn pos(&self) -> usize { - self.pos - } - - /// Returns the byte offset of the next position in the input. - pub fn next_pos(&self) -> usize { - self.pos + self.len - } -} - -/// An abstraction over input used in the matching engines. -pub trait Input: fmt::Debug { - /// Return an encoding of the position at byte offset `i`. - fn at(&self, i: usize) -> InputAt; - - /// Return the Unicode character occurring next to `at`. - /// - /// If no such character could be decoded, then `Char` is absent. - fn next_char(&self, at: InputAt) -> Char; - - /// Return the Unicode character occurring previous to `at`. - /// - /// If no such character could be decoded, then `Char` is absent. - fn previous_char(&self, at: InputAt) -> Char; - - /// Return true if the given empty width instruction matches at the - /// input position given. - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool; - - /// Scan the input for a matching prefix. - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option; - - /// The number of bytes in the input. - fn len(&self) -> usize; - - /// Whether the input is empty. - fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Return the given input as a sequence of bytes. - fn as_bytes(&self) -> &[u8]; -} - -impl<'a, T: Input> Input for &'a T { - fn at(&self, i: usize) -> InputAt { - (**self).at(i) - } - - fn next_char(&self, at: InputAt) -> Char { - (**self).next_char(at) - } - - fn previous_char(&self, at: InputAt) -> Char { - (**self).previous_char(at) - } - - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { - (**self).is_empty_match(at, empty) - } - - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option { - (**self).prefix_at(prefixes, at) - } - - fn len(&self) -> usize { - (**self).len() - } - - fn as_bytes(&self) -> &[u8] { - (**self).as_bytes() - } -} - -/// An input reader over characters. -#[derive(Clone, Copy, Debug)] -pub struct CharInput<'t>(&'t [u8]); - -impl<'t> CharInput<'t> { - /// Return a new character input reader for the given string. - pub fn new(s: &'t [u8]) -> CharInput<'t> { - CharInput(s) - } -} - -impl<'t> ops::Deref for CharInput<'t> { - type Target = [u8]; - - fn deref(&self) -> &[u8] { - self.0 - } -} - -impl<'t> Input for CharInput<'t> { - fn at(&self, i: usize) -> InputAt { - if i >= self.len() { - InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 } - } else { - let c = decode_utf8(&self[i..]).map(|(c, _)| c).into(); - InputAt { pos: i, c, byte: None, len: c.len_utf8() } - } - } - - fn next_char(&self, at: InputAt) -> Char { - at.char() - } - - fn previous_char(&self, at: InputAt) -> Char { - decode_last_utf8(&self[..at.pos()]).map(|(c, _)| c).into() - } - - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { - use crate::prog::EmptyLook::*; - match empty.look { - StartLine => { - let c = self.previous_char(at); - at.pos() == 0 || c == '\n' - } - EndLine => { - let c = self.next_char(at); - at.pos() == self.len() || c == '\n' - } - StartText => at.pos() == 0, - EndText => at.pos() == self.len(), - WordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() != c2.is_word_char() - } - NotWordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() == c2.is_word_char() - } - WordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_byte() != c2.is_word_byte() - } - NotWordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_byte() == c2.is_word_byte() - } - } - } - - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option { - prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s)) - } - - fn len(&self) -> usize { - self.0.len() - } - - fn as_bytes(&self) -> &[u8] { - self.0 - } -} - -/// An input reader over bytes. -#[derive(Clone, Copy, Debug)] -pub struct ByteInput<'t> { - text: &'t [u8], - only_utf8: bool, -} - -impl<'t> ByteInput<'t> { - /// Return a new byte-based input reader for the given string. - pub fn new(text: &'t [u8], only_utf8: bool) -> ByteInput<'t> { - ByteInput { text, only_utf8 } - } -} - -impl<'t> ops::Deref for ByteInput<'t> { - type Target = [u8]; - - fn deref(&self) -> &[u8] { - self.text - } -} - -impl<'t> Input for ByteInput<'t> { - fn at(&self, i: usize) -> InputAt { - if i >= self.len() { - InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 } - } else { - InputAt { - pos: i, - c: None.into(), - byte: self.get(i).cloned(), - len: 1, - } - } - } - - fn next_char(&self, at: InputAt) -> Char { - decode_utf8(&self[at.pos()..]).map(|(c, _)| c).into() - } - - fn previous_char(&self, at: InputAt) -> Char { - decode_last_utf8(&self[..at.pos()]).map(|(c, _)| c).into() - } - - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { - use crate::prog::EmptyLook::*; - match empty.look { - StartLine => { - let c = self.previous_char(at); - at.pos() == 0 || c == '\n' - } - EndLine => { - let c = self.next_char(at); - at.pos() == self.len() || c == '\n' - } - StartText => at.pos() == 0, - EndText => at.pos() == self.len(), - WordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() != c2.is_word_char() - } - NotWordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() == c2.is_word_char() - } - WordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - if self.only_utf8 { - // If we must match UTF-8, then we can't match word - // boundaries at invalid UTF-8. - if c1.is_none() && !at.is_start() { - return false; - } - if c2.is_none() && !at.is_end() { - return false; - } - } - c1.is_word_byte() != c2.is_word_byte() - } - NotWordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - if self.only_utf8 { - // If we must match UTF-8, then we can't match word - // boundaries at invalid UTF-8. - if c1.is_none() && !at.is_start() { - return false; - } - if c2.is_none() && !at.is_end() { - return false; - } - } - c1.is_word_byte() == c2.is_word_byte() - } - } - } - - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option { - prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s)) - } - - fn len(&self) -> usize { - self.text.len() - } - - fn as_bytes(&self) -> &[u8] { - self.text - } -} - -/// An inline representation of `Option`. -/// -/// This eliminates the need to do case analysis on `Option` to determine -/// ordinality with other characters. -/// -/// (The `Option` is not related to encoding. Instead, it is used in the -/// matching engines to represent the beginning and ending boundaries of the -/// search text.) -#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub struct Char(u32); - -impl fmt::Debug for Char { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match char::from_u32(self.0) { - None => write!(f, "Empty"), - Some(c) => write!(f, "{:?}", c), - } - } -} - -impl Char { - /// Returns true iff the character is absent. - #[inline] - pub fn is_none(self) -> bool { - self.0 == u32::MAX - } - - /// Returns the length of the character's UTF-8 encoding. - /// - /// If the character is absent, then `1` is returned. - #[inline] - pub fn len_utf8(self) -> usize { - char::from_u32(self.0).map_or(1, |c| c.len_utf8()) - } - - /// Returns true iff the character is a word character. - /// - /// If the character is absent, then false is returned. - pub fn is_word_char(self) -> bool { - // is_word_character can panic if the Unicode data for \w isn't - // available. However, our compiler ensures that if a Unicode word - // boundary is used, then the data must also be available. If it isn't, - // then the compiler returns an error. - char::from_u32(self.0).map_or(false, regex_syntax::is_word_character) - } - - /// Returns true iff the byte is a word byte. - /// - /// If the byte is absent, then false is returned. - pub fn is_word_byte(self) -> bool { - match char::from_u32(self.0) { - Some(c) if c <= '\u{7F}' => regex_syntax::is_word_byte(c as u8), - None | Some(_) => false, - } - } -} - -impl From for Char { - fn from(c: char) -> Char { - Char(c as u32) - } -} - -impl From> for Char { - fn from(c: Option) -> Char { - c.map_or(Char(u32::MAX), |c| c.into()) - } -} - -impl PartialEq for Char { - #[inline] - fn eq(&self, other: &char) -> bool { - self.0 == *other as u32 - } -} - -impl PartialEq for char { - #[inline] - fn eq(&self, other: &Char) -> bool { - *self as u32 == other.0 - } -} - -impl PartialOrd for Char { - #[inline] - fn partial_cmp(&self, other: &char) -> Option { - self.0.partial_cmp(&(*other as u32)) - } -} - -impl PartialOrd for char { - #[inline] - fn partial_cmp(&self, other: &Char) -> Option { - (*self as u32).partial_cmp(&other.0) - } -} diff --git a/vendor/regex/src/lib.rs b/vendor/regex/src/lib.rs index 82c1b77..6dbd3c2 100644 --- a/vendor/regex/src/lib.rs +++ b/vendor/regex/src/lib.rs @@ -1,146 +1,371 @@ /*! -This crate provides a library for parsing, compiling, and executing regular -expressions. Its syntax is similar to Perl-style regular expressions, but lacks -a few features like look around and backreferences. In exchange, all searches -execute in linear time with respect to the size of the regular expression and -search text. +This crate provides routines for searching strings for matches of a [regular +expression] (aka "regex"). The regex syntax supported by this crate is similar +to other regex engines, but it lacks several features that are not known how to +implement efficiently. This includes, but is not limited to, look-around and +backreferences. In exchange, all regex searches in this crate have worst case +`O(m * n)` time complexity, where `m` is proportional to the size of the regex +and `n` is proportional to the size of the string being searched. -This crate's documentation provides some simple examples, describes -[Unicode support](#unicode) and exhaustively lists the -[supported syntax](#syntax). +[regular expression]: https://en.wikipedia.org/wiki/Regular_expression -For more specific details on the API for regular expressions, please see the -documentation for the [`Regex`](struct.Regex.html) type. +If you just want API documentation, then skip to the [`Regex`] type. Otherwise, +here's a quick example showing one way of parsing the output of a grep-like +program: + +```rust +use regex::Regex; + +let re = Regex::new(r"(?m)^([^:]+):([0-9]+):(.+)$").unwrap(); +let hay = "\ +path/to/foo:54:Blue Harvest +path/to/bar:90:Something, Something, Something, Dark Side +path/to/baz:3:It's a Trap! +"; + +let mut results = vec![]; +for (_, [path, lineno, line]) in re.captures_iter(hay).map(|c| c.extract()) { + results.push((path, lineno.parse::()?, line)); +} +assert_eq!(results, vec![ + ("path/to/foo", 54, "Blue Harvest"), + ("path/to/bar", 90, "Something, Something, Something, Dark Side"), + ("path/to/baz", 3, "It's a Trap!"), +]); +# Ok::<(), Box>(()) +``` + +# Overview + +The primary type in this crate is a [`Regex`]. Its most important methods are +as follows: + +* [`Regex::new`] compiles a regex using the default configuration. A +[`RegexBuilder`] permits setting a non-default configuration. (For example, +case insensitive matching, verbose mode and others.) +* [`Regex::is_match`] reports whether a match exists in a particular haystack. +* [`Regex::find`] reports the byte offsets of a match in a haystack, if one +exists. [`Regex::find_iter`] returns an iterator over all such matches. +* [`Regex::captures`] returns a [`Captures`], which reports both the byte +offsets of a match in a haystack and the byte offsets of each matching capture +group from the regex in the haystack. +[`Regex::captures_iter`] returns an iterator over all such matches. + +There is also a [`RegexSet`], which permits searching for multiple regex +patterns simultaneously in a single search. However, it currently only reports +which patterns match and *not* the byte offsets of a match. + +Otherwise, this top-level crate documentation is organized as follows: + +* [Usage](#usage) shows how to add the `regex` crate to your Rust project. +* [Examples](#examples) provides a limited selection of regex search examples. +* [Performance](#performance) provides a brief summary of how to optimize regex +searching speed. +* [Unicode](#unicode) discusses support for non-ASCII patterns. +* [Syntax](#syntax) enumerates the specific regex syntax supported by this +crate. +* [Untrusted input](#untrusted-input) discusses how this crate deals with regex +patterns or haystacks that are untrusted. +* [Crate features](#crate-features) documents the Cargo features that can be +enabled or disabled for this crate. +* [Other crates](#other-crates) links to other crates in the `regex` family. # Usage -This crate is [on crates.io](https://crates.io/crates/regex) and can be +The `regex` crate is [on crates.io](https://crates.io/crates/regex) and can be used by adding `regex` to your dependencies in your project's `Cargo.toml`. +Or more simply, just run `cargo add regex`. + +Here is a complete example that creates a new Rust project, adds a dependency +on `regex`, creates the source code for a regex search and then runs the +program. + +First, create the project in a new directory: -```toml -[dependencies] -regex = "1" +```text +$ mkdir regex-example +$ cd regex-example +$ cargo init ``` -# Example: find a date +Second, add a dependency on `regex`: -General use of regular expressions in this package involves compiling an -expression and then using it to search, split or replace text. For example, -to confirm that some text resembles a date: +```text +$ cargo add regex +``` + +Third, edit `src/main.rs`. Delete what's there and replace it with this: + +``` +use regex::Regex; + +fn main() { + let re = Regex::new(r"Hello (?\w+)!").unwrap(); + let Some(caps) = re.captures("Hello Murphy!") else { + println!("no match!"); + return; + }; + println!("The name is: {}", &caps["name"]); +} +``` + +Fourth, run it with `cargo run`: + +```text +$ cargo run + Compiling memchr v2.5.0 + Compiling regex-syntax v0.7.1 + Compiling aho-corasick v1.0.1 + Compiling regex v1.8.1 + Compiling regex-example v0.1.0 (/tmp/regex-example) + Finished dev [unoptimized + debuginfo] target(s) in 4.22s + Running `target/debug/regex-example` +The name is: Murphy +``` + +The first time you run the program will show more output like above. But +subsequent runs shouldn't have to re-compile the dependencies. + +# Examples + +This section provides a few examples, in tutorial style, showing how to +search a haystack with a regex. There are more examples throughout the API +documentation. + +Before starting though, it's worth defining a few terms: + +* A **regex** is a Rust value whose type is `Regex`. We use `re` as a +variable name for a regex. +* A **pattern** is the string that is used to build a regex. We use `pat` as +a variable name for a pattern. +* A **haystack** is the string that is searched by a regex. We use `hay` as a +variable name for a haystack. + +Sometimes the words "regex" and "pattern" are used interchangeably. + +General use of regular expressions in this crate proceeds by compiling a +**pattern** into a **regex**, and then using that regex to search, split or +replace parts of a **haystack**. + +### Example: find a middle initial + +We'll start off with a very simple example: a regex that looks for a specific +name but uses a wildcard to match a middle initial. Our pattern serves as +something like a template that will match a particular name with *any* middle +initial. ```rust use regex::Regex; -let re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap(); -assert!(re.is_match("2014-01-01")); + +// We use 'unwrap()' here because it would be a bug in our program if the +// pattern failed to compile to a regex. Panicking in the presence of a bug +// is okay. +let re = Regex::new(r"Homer (.)\. Simpson").unwrap(); +let hay = "Homer J. Simpson"; +let Some(caps) = re.captures(hay) else { return }; +assert_eq!("J", &caps[1]); ``` -Notice the use of the `^` and `$` anchors. In this crate, every expression -is executed with an implicit `.*?` at the beginning and end, which allows -it to match anywhere in the text. Anchors can be used to ensure that the -full text matches an expression. +There are a few things worth noticing here in our first example: -This example also demonstrates the utility of -[raw strings](https://doc.rust-lang.org/stable/reference/tokens.html#raw-string-literals) -in Rust, which -are just like regular strings except they are prefixed with an `r` and do -not process any escape sequences. For example, `"\\d"` is the same -expression as `r"\d"`. +* The `.` is a special pattern meta character that means "match any single +character except for new lines." (More precisely, in this crate, it means +"match any UTF-8 encoding of any Unicode scalar value other than `\n`.") +* We can match an actual `.` literally by escaping it, i.e., `\.`. +* We use Rust's [raw strings] to avoid needing to deal with escape sequences in +both the regex pattern syntax and in Rust's string literal syntax. If we didn't +use raw strings here, we would have had to use `\\.` to match a literal `.` +character. That is, `r"\."` and `"\\."` are equivalent patterns. +* We put our wildcard `.` instruction in parentheses. These parentheses have a +special meaning that says, "make whatever part of the haystack matches within +these parentheses available as a capturing group." After finding a match, we +access this capture group with `&caps[1]`. -# Example: Avoid compiling the same regex in a loop +[raw strings]: https://doc.rust-lang.org/stable/reference/tokens.html#raw-string-literals -It is an anti-pattern to compile the same regular expression in a loop -since compilation is typically expensive. (It takes anywhere from a few -microseconds to a few **milliseconds** depending on the size of the -regex.) Not only is compilation itself expensive, but this also prevents -optimizations that reuse allocations internally to the matching engines. +Otherwise, we execute a search using `re.captures(hay)` and return from our +function if no match occurred. We then reference the middle initial by asking +for the part of the haystack that matched the capture group indexed at `1`. +(The capture group at index 0 is implicit and always corresponds to the entire +match. In this case, that's `Homer J. Simpson`.) -In Rust, it can sometimes be a pain to pass regular expressions around if -they're used from inside a helper function. Instead, we recommend using the -[`lazy_static`](https://crates.io/crates/lazy_static) crate to ensure that -regular expressions are compiled exactly once. +### Example: named capture groups -For example: +Continuing from our middle initial example above, we can tweak the pattern +slightly to give a name to the group that matches the middle initial: ```rust -use lazy_static::lazy_static; use regex::Regex; -fn some_helper_function(text: &str) -> bool { - lazy_static! { - static ref RE: Regex = Regex::new("...").unwrap(); - } - RE.is_match(text) -} +// Note that (?P.) is a different way to spell the same thing. +let re = Regex::new(r"Homer (?.)\. Simpson").unwrap(); +let hay = "Homer J. Simpson"; +let Some(caps) = re.captures(hay) else { return }; +assert_eq!("J", &caps["middle"]); +``` + +Giving a name to a group can be useful when there are multiple groups in +a pattern. It makes the code referring to those groups a bit easier to +understand. + +### Example: validating a particular date format + +This examples shows how to confirm whether a haystack, in its entirety, matches +a particular date format: + +```rust +use regex::Regex; -fn main() {} +let re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap(); +assert!(re.is_match("2010-03-14")); ``` -Specifically, in this example, the regex will be compiled when it is used for -the first time. On subsequent uses, it will reuse the previous compilation. +Notice the use of the `^` and `$` anchors. In this crate, every regex search is +run with an implicit `(?s:.)*?` at the beginning of its pattern, which allows +the regex to match anywhere in a haystack. Anchors, as above, can be used to +ensure that the full haystack matches a pattern. + +This crate is also Unicode aware by default, which means that `\d` might match +more than you might expect it to. For example: + +```rust +use regex::Regex; + +let re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap(); +assert!(re.is_match("𝟚𝟘𝟙𝟘-𝟘𝟛-𝟙𝟜")); +``` + +To only match an ASCII decimal digit, all of the following are equivalent: -# Example: iterating over capture groups +* `[0-9]` +* `(?-u:\d)` +* `[[:digit:]]` +* `[\d&&\p{ascii}]` -This crate provides convenient iterators for matching an expression -repeatedly against a search string to find successive non-overlapping -matches. For example, to find all dates in a string and be able to access -them by their component pieces: +### Example: finding dates in a haystack + +In the previous example, we showed how one might validate that a haystack, +in its entirety, corresponded to a particular date format. But what if we wanted +to extract all things that look like dates in a specific format from a haystack? +To do this, we can use an iterator API to find all matches (notice that we've +removed the anchors and switched to looking for ASCII-only digits): ```rust -# use regex::Regex; -# fn main() { -let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap(); -let text = "2012-03-14, 2013-01-01 and 2014-07-05"; -for cap in re.captures_iter(text) { - println!("Month: {} Day: {} Year: {}", &cap[2], &cap[3], &cap[1]); -} -// Output: -// Month: 03 Day: 14 Year: 2012 -// Month: 01 Day: 01 Year: 2013 -// Month: 07 Day: 05 Year: 2014 -# } +use regex::Regex; + +let re = Regex::new(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap(); +let hay = "What do 1865-04-14, 1881-07-02, 1901-09-06 and 1963-11-22 have in common?"; +// 'm' is a 'Match', and 'as_str()' returns the matching part of the haystack. +let dates: Vec<&str> = re.find_iter(hay).map(|m| m.as_str()).collect(); +assert_eq!(dates, vec![ + "1865-04-14", + "1881-07-02", + "1901-09-06", + "1963-11-22", +]); +``` + +We can also iterate over [`Captures`] values instead of [`Match`] values, and +that in turn permits accessing each component of the date via capturing groups: + +```rust +use regex::Regex; + +let re = Regex::new(r"(?[0-9]{4})-(?[0-9]{2})-(?[0-9]{2})").unwrap(); +let hay = "What do 1865-04-14, 1881-07-02, 1901-09-06 and 1963-11-22 have in common?"; +// 'm' is a 'Match', and 'as_str()' returns the matching part of the haystack. +let dates: Vec<(&str, &str, &str)> = re.captures_iter(hay).map(|caps| { + // The unwraps are okay because every capture group must match if the whole + // regex matches, and in this context, we know we have a match. + // + // Note that we use `caps.name("y").unwrap().as_str()` instead of + // `&caps["y"]` because the lifetime of the former is the same as the + // lifetime of `hay` above, but the lifetime of the latter is tied to the + // lifetime of `caps` due to how the `Index` trait is defined. + let year = caps.name("y").unwrap().as_str(); + let month = caps.name("m").unwrap().as_str(); + let day = caps.name("d").unwrap().as_str(); + (year, month, day) +}).collect(); +assert_eq!(dates, vec![ + ("1865", "04", "14"), + ("1881", "07", "02"), + ("1901", "09", "06"), + ("1963", "11", "22"), +]); ``` -Notice that the year is in the capture group indexed at `1`. This is -because the *entire match* is stored in the capture group at index `0`. +### Example: simpler capture group extraction -# Example: replacement with named capture groups +One can use [`Captures::extract`] to make the code from the previous example a +bit simpler in this case: + +```rust +use regex::Regex; + +let re = Regex::new(r"([0-9]{4})-([0-9]{2})-([0-9]{2})").unwrap(); +let hay = "What do 1865-04-14, 1881-07-02, 1901-09-06 and 1963-11-22 have in common?"; +let dates: Vec<(&str, &str, &str)> = re.captures_iter(hay).map(|caps| { + let (_, [year, month, day]) = caps.extract(); + (year, month, day) +}).collect(); +assert_eq!(dates, vec![ + ("1865", "04", "14"), + ("1881", "07", "02"), + ("1901", "09", "06"), + ("1963", "11", "22"), +]); +``` + +`Captures::extract` works by ensuring that the number of matching groups match +the number of groups requested via the `[year, month, day]` syntax. If they do, +then the substrings for each corresponding capture group are automatically +returned in an appropriately sized array. Rust's syntax for pattern matching +arrays does the rest. + +### Example: replacement with named capture groups Building on the previous example, perhaps we'd like to rearrange the date -formats. This can be done with text replacement. But to make the code -clearer, we can *name* our capture groups and use those names as variables -in our replacement text: +formats. This can be done by finding each match and replacing it with +something different. The [`Regex::replace_all`] routine provides a convenient +way to do this, including by supporting references to named groups in the +replacement string: ```rust -# use regex::Regex; -# fn main() { -let re = Regex::new(r"(?P\d{4})-(?P\d{2})-(?P\d{2})").unwrap(); -let before = "2012-03-14, 2013-01-01 and 2014-07-05"; +use regex::Regex; + +let re = Regex::new(r"(?\d{4})-(?\d{2})-(?\d{2})").unwrap(); +let before = "1973-01-05, 1975-08-25 and 1980-10-18"; let after = re.replace_all(before, "$m/$d/$y"); -assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014"); -# } +assert_eq!(after, "01/05/1973, 08/25/1975 and 10/18/1980"); ``` -The `replace` methods are actually polymorphic in the replacement, which +The replace methods are actually polymorphic in the replacement, which provides more flexibility than is seen here. (See the documentation for -`Regex::replace` for more details.) +[`Regex::replace`] for more details.) + +### Example: verbose mode -Note that if your regex gets complicated, you can use the `x` flag to -enable insignificant whitespace mode, which also lets you write comments: +When your regex gets complicated, you might consider using something other +than regex. But if you stick with regex, you can use the `x` flag to enable +insignificant whitespace mode or "verbose mode." In this mode, whitespace +is treated as insignificant and one may write comments. This may make your +patterns easier to comprehend. ```rust -# use regex::Regex; -# fn main() { +use regex::Regex; + let re = Regex::new(r"(?x) - (?P\d{4}) # the year + (?P\d{4}) # the year, including all Unicode digits - - (?P\d{2}) # the month + (?P\d{2}) # the month, including all Unicode digits - - (?P\d{2}) # the day + (?P\d{2}) # the day, including all Unicode digits ").unwrap(); -let before = "2012-03-14, 2013-01-01 and 2014-07-05"; + +let before = "1973-01-05, 1975-08-25 and 1980-10-18"; let after = re.replace_all(before, "$m/$d/$y"); -assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014"); -# } +assert_eq!(after, "01/05/1973, 08/25/1975 and 10/18/1980"); ``` If you wish to match against whitespace in this mode, you can still use `\s`, @@ -148,10 +373,10 @@ If you wish to match against whitespace in this mode, you can still use `\s`, directly with `\ `, use its hex character code `\x20` or temporarily disable the `x` flag, e.g., `(?-x: )`. -# Example: match multiple regular expressions simultaneously +### Example: match multiple regular expressions simultaneously -This demonstrates how to use a `RegexSet` to match multiple (possibly -overlapping) regular expressions in a single scan of the search text: +This demonstrates how to use a [`RegexSet`] to match multiple (possibly +overlapping) regexes in a single scan of a haystack: ```rust use regex::RegexSet; @@ -166,7 +391,8 @@ let set = RegexSet::new(&[ r"foobar", ]).unwrap(); -// Iterate over and collect all of the matches. +// Iterate over and collect all of the matches. Each match corresponds to the +// ID of the matching pattern. let matches: Vec<_> = set.matches("foobar").into_iter().collect(); assert_eq!(matches, vec![0, 2, 3, 4, 6]); @@ -176,96 +402,225 @@ assert!(!matches.matched(5)); assert!(matches.matched(6)); ``` -# Pay for what you use +# Performance + +This section briefly discusses a few concerns regarding the speed and resource +usage of regexes. -With respect to searching text with a regular expression, there are three -questions that can be asked: +### Only ask for what you need -1. Does the text match this expression? -2. If so, where does it match? -3. Where did the capturing groups match? +When running a search with a regex, there are generally three different types +of information one can ask for: + +1. Does a regex match in a haystack? +2. Where does a regex match in a haystack? +3. Where do each of the capturing groups match in a haystack? Generally speaking, this crate could provide a function to answer only #3, which would subsume #1 and #2 automatically. However, it can be significantly more expensive to compute the location of capturing group matches, so it's best not to do it if you don't need to. -Therefore, only use what you need. For example, don't use `find` if you -only need to test if an expression matches a string. (Use `is_match` -instead.) +Therefore, only ask for what you need. For example, don't use [`Regex::find`] +if you only need to test if a regex matches a haystack. Use [`Regex::is_match`] +instead. + +### Unicode can impact memory usage and search speed + +This crate has first class support for Unicode and it is **enabled by default**. +In many cases, the extra memory required to support it will be negligible and +it typically won't impact search speed. But it can in some cases. + +With respect to memory usage, the impact of Unicode principally manifests +through the use of Unicode character classes. Unicode character classes +tend to be quite large. For example, `\w` by default matches around 140,000 +distinct codepoints. This requires additional memory, and tends to slow down +regex compilation. While a `\w` here and there is unlikely to be noticed, +writing `\w{100}` will for example result in quite a large regex by default. +Indeed, `\w` is considerably larger than its ASCII-only version, so if your +requirements are satisfied by ASCII, it's probably a good idea to stick to +ASCII classes. The ASCII-only version of `\w` can be spelled in a number of +ways. All of the following are equivalent: + +* `[0-9A-Za-z_]` +* `(?-u:\w)` +* `[[:word:]]` +* `[\w&&\p{ascii}]` + +With respect to search speed, Unicode tends to be handled pretty well, even when +using large Unicode character classes. However, some of the faster internal +regex engines cannot handle a Unicode aware word boundary assertion. So if you +don't need Unicode-aware word boundary assertions, you might consider using +`(?-u:\b)` instead of `\b`, where the former uses an ASCII-only definition of +a word character. + +### Literals might accelerate searches + +This crate tends to be quite good at recognizing literals in a regex pattern +and using them to accelerate a search. If it is at all possible to include +some kind of literal in your pattern, then it might make search substantially +faster. For example, in the regex `\w+@\w+`, the engine will look for +occurrences of `@` and then try a reverse match for `\w+` to find the start +position. + +### Avoid re-compiling regexes, especially in a loop + +It is an anti-pattern to compile the same pattern in a loop since regex +compilation is typically expensive. (It takes anywhere from a few microseconds +to a few **milliseconds** depending on the size of the pattern.) Not only is +compilation itself expensive, but this also prevents optimizations that reuse +allocations internally to the regex engine. + +In Rust, it can sometimes be a pain to pass regexes around if they're used from +inside a helper function. Instead, we recommend using crates like [`once_cell`] +and [`lazy_static`] to ensure that patterns are compiled exactly once. + +[`once_cell`]: https://crates.io/crates/once_cell +[`lazy_static`]: https://crates.io/crates/lazy_static + +This example shows how to use `once_cell`: -# Unicode +```rust +use { + once_cell::sync::Lazy, + regex::Regex, +}; -This implementation executes regular expressions **only** on valid UTF-8 -while exposing match locations as byte indices into the search string. (To -relax this restriction, use the [`bytes`](bytes/index.html) sub-module.) -Conceptually, the regex engine works by matching a haystack as if it were a -sequence of Unicode scalar values. +fn some_helper_function(haystack: &str) -> bool { + static RE: Lazy = Lazy::new(|| Regex::new(r"...").unwrap()); + RE.is_match(haystack) +} + +fn main() { + assert!(some_helper_function("abc")); + assert!(!some_helper_function("ac")); +} +``` + +Specifically, in this example, the regex will be compiled when it is used for +the first time. On subsequent uses, it will reuse the previously built `Regex`. +Notice how one can define the `Regex` locally to a specific function. + +### Sharing a regex across threads can result in contention + +While a single `Regex` can be freely used from multiple threads simultaneously, +there is a small synchronization cost that must be paid. Generally speaking, +one shouldn't expect to observe this unless the principal task in each thread +is searching with the regex *and* most searches are on short haystacks. In this +case, internal contention on shared resources can spike and increase latency, +which in turn may slow down each individual search. + +One can work around this by cloning each `Regex` before sending it to another +thread. The cloned regexes will still share the same internal read-only portion +of its compiled state (it's reference counted), but each thread will get +optimized access to the mutable space that is used to run a search. In general, +there is no additional cost in memory to doing this. The only cost is the added +code complexity required to explicitly clone the regex. (If you share the same +`Regex` across multiple threads, each thread still gets its own mutable space, +but accessing that space is slower.) -Only simple case folding is supported. Namely, when matching -case-insensitively, the characters are first mapped using the "simple" case -folding rules defined by Unicode. +# Unicode -Regular expressions themselves are **only** interpreted as a sequence of -Unicode scalar values. This means you can use Unicode characters directly -in your expression: +This section discusses what kind of Unicode support this regex library has. +Before showing some examples, we'll summarize the relevant points: + +* This crate almost fully implements "Basic Unicode Support" (Level 1) as +specified by the [Unicode Technical Standard #18][UTS18]. The full details +of what is supported are documented in [UNICODE.md] in the root of the regex +crate repository. There is virtually no support for "Extended Unicode Support" +(Level 2) from UTS#18. +* The top-level [`Regex`] runs searches *as if* iterating over each of the +codepoints in the haystack. That is, the fundamental atom of matching is a +single codepoint. +* [`bytes::Regex`], in contrast, permits disabling Unicode mode for part of all +of your pattern in all cases. When Unicode mode is disabled, then a search is +run *as if* iterating over each byte in the haystack. That is, the fundamental +atom of matching is a single byte. (A top-level `Regex` also permits disabling +Unicode and thus matching *as if* it were one byte at a time, but only when +doing so wouldn't permit matching invalid UTF-8.) +* When Unicode mode is enabled (the default), `.` will match an entire Unicode +scalar value, even when it is encoded using multiple bytes. When Unicode mode +is disabled (e.g., `(?-u:.)`), then `.` will match a single byte in all cases. +* The character classes `\w`, `\d` and `\s` are all Unicode-aware by default. +Use `(?-u:\w)`, `(?-u:\d)` and `(?-u:\s)` to get their ASCII-only definitions. +* Similarly, `\b` and `\B` use a Unicode definition of a "word" character. +To get ASCII-only word boundaries, use `(?-u:\b)` and `(?-u:\B)`. This also +applies to the special word boundary assertions. (That is, `\b{start}`, +`\b{end}`, `\b{start-half}`, `\b{end-half}`.) +* `^` and `$` are **not** Unicode-aware in multi-line mode. Namely, they only +recognize `\n` (assuming CRLF mode is not enabled) and not any of the other +forms of line terminators defined by Unicode. +* Case insensitive searching is Unicode-aware and uses simple case folding. +* Unicode general categories, scripts and many boolean properties are available +by default via the `\p{property name}` syntax. +* In all cases, matches are reported using byte offsets. Or more precisely, +UTF-8 code unit offsets. This permits constant time indexing and slicing of the +haystack. + +[UTS18]: https://unicode.org/reports/tr18/ +[UNICODE.md]: https://github.com/rust-lang/regex/blob/master/UNICODE.md + +Patterns themselves are **only** interpreted as a sequence of Unicode scalar +values. This means you can use Unicode characters directly in your pattern: ```rust -# use regex::Regex; -# fn main() { +use regex::Regex; + let re = Regex::new(r"(?i)Δ+").unwrap(); -let mat = re.find("ΔδΔ").unwrap(); -assert_eq!((mat.start(), mat.end()), (0, 6)); -# } +let m = re.find("ΔδΔ").unwrap(); +assert_eq!((0, 6), (m.start(), m.end())); +// alternatively: +assert_eq!(0..6, m.range()); ``` -Most features of the regular expressions in this crate are Unicode aware. Here -are some examples: - -* `.` will match any valid UTF-8 encoded Unicode scalar value except for `\n`. - (To also match `\n`, enable the `s` flag, e.g., `(?s:.)`.) -* `\w`, `\d` and `\s` are Unicode aware. For example, `\s` will match all forms - of whitespace categorized by Unicode. -* `\b` matches a Unicode word boundary. -* Negated character classes like `[^a]` match all Unicode scalar values except - for `a`. -* `^` and `$` are **not** Unicode aware in multi-line mode. Namely, they only - recognize `\n` and not any of the other forms of line terminators defined - by Unicode. - -Unicode general categories, scripts, script extensions, ages and a smattering -of boolean properties are available as character classes. For example, you can -match a sequence of numerals, Greek or Cherokee letters: +As noted above, Unicode general categories, scripts, script extensions, ages +and a smattering of boolean properties are available as character classes. For +example, you can match a sequence of numerals, Greek or Cherokee letters: ```rust -# use regex::Regex; -# fn main() { +use regex::Regex; + let re = Regex::new(r"[\pN\p{Greek}\p{Cherokee}]+").unwrap(); -let mat = re.find("abcΔᎠβⅠᏴγδⅡxyz").unwrap(); -assert_eq!((mat.start(), mat.end()), (3, 23)); -# } +let m = re.find("abcΔᎠβⅠᏴγδⅡxyz").unwrap(); +assert_eq!(3..23, m.range()); ``` -For a more detailed breakdown of Unicode support with respect to -[UTS#18](https://unicode.org/reports/tr18/), -please see the -[UNICODE](https://github.com/rust-lang/regex/blob/master/UNICODE.md) -document in the root of the regex repository. +While not specific to Unicode, this library also supports character class set +operations. Namely, one can nest character classes arbitrarily and perform set +operations on them. Those set operations are union (the default), intersection, +difference and symmetric difference. These set operations tend to be most +useful with Unicode character classes. For example, to match any codepoint +that is both in the `Greek` script and in the `Letter` general category: + +```rust +use regex::Regex; + +let re = Regex::new(r"[\p{Greek}&&\pL]+").unwrap(); +let subs: Vec<&str> = re.find_iter("ΔδΔ𐅌ΔδΔ").map(|m| m.as_str()).collect(); +assert_eq!(subs, vec!["ΔδΔ", "ΔδΔ"]); -# Opt out of Unicode support +// If we just matches on Greek, then all codepoints would match! +let re = Regex::new(r"\p{Greek}+").unwrap(); +let subs: Vec<&str> = re.find_iter("ΔδΔ𐅌ΔδΔ").map(|m| m.as_str()).collect(); +assert_eq!(subs, vec!["ΔδΔ𐅌ΔδΔ"]); +``` -The `bytes` sub-module provides a `Regex` type that can be used to match -on `&[u8]`. By default, text is interpreted as UTF-8 just like it is with -the main `Regex` type. However, this behavior can be disabled by turning -off the `u` flag, even if doing so could result in matching invalid UTF-8. -For example, when the `u` flag is disabled, `.` will match any byte instead -of any Unicode scalar value. +### Opt out of Unicode support + +The [`bytes::Regex`] type that can be used to search `&[u8]` haystacks. By +default, haystacks are conventionally treated as UTF-8 just like it is with the +main `Regex` type. However, this behavior can be disabled by turning off the +`u` flag, even if doing so could result in matching invalid UTF-8. For example, +when the `u` flag is disabled, `.` will match any byte instead of any Unicode +scalar value. Disabling the `u` flag is also possible with the standard `&str`-based `Regex` type, but it is only allowed where the UTF-8 invariant is maintained. For example, `(?-u:\w)` is an ASCII-only `\w` character class and is legal in an -`&str`-based `Regex`, but `(?-u:\xFF)` will attempt to match the raw byte -`\xFF`, which is invalid UTF-8 and therefore is illegal in `&str`-based +`&str`-based `Regex`, but `(?-u:\W)` will attempt to match *any byte* that +isn't in `(?-u:\w)`, which in turn includes bytes that are invalid UTF-8. +Similarly, `(?-u:\xFF)` will attempt to match the raw byte `\xFF` (instead of +`U+00FF`), which is invalid UTF-8 and therefore is illegal in `&str`-based regexes. Finally, since Unicode support requires bundling large Unicode data @@ -281,10 +636,11 @@ The syntax supported in this crate is documented below. Note that the regular expression parser and abstract syntax are exposed in a separate crate, [`regex-syntax`](https://docs.rs/regex-syntax). -## Matching one character +### Matching one character