diff --git a/ci/check-profiling.sh b/ci/check-profiling.sh index 207efa770..c69d109e1 100755 --- a/ci/check-profiling.sh +++ b/ci/check-profiling.sh @@ -137,7 +137,7 @@ test -f results/eprintln-Test-helloworld-Check-Full test ! -s results/eprintln-Test-helloworld-Check-Full # llvm-lines. `Debug` not `Check` because it doesn't support `Check` profiles. -# Including both `helloworld` and `regex-1.5.5` benchmarks, as they exercise the +# Including both `helloworld` and `regex-automata-0.4.8` benchmarks, as they exercise the # zero dependency and the greater than zero dependency cases, respectively, the # latter of which has broken before. RUST_BACKTRACE=1 RUST_LOG=raw_cargo_messages=trace,collector=debug,rust_sysroot=debug \ @@ -146,12 +146,12 @@ RUST_BACKTRACE=1 RUST_LOG=raw_cargo_messages=trace,collector=debug,rust_sysroot= --id Test \ --profiles Debug \ --cargo $bindir/cargo \ - --include helloworld,regex-1.5.5 \ + --include helloworld,regex-automata-0.4.8 \ --scenarios Full test -f results/ll-Test-helloworld-Debug-Full grep -q "Lines.*Copies.*Function name" results/ll-Test-helloworld-Debug-Full -test -f results/ll-Test-regex-1.5.5-Debug-Full -grep -q "Lines.*Copies.*Function name" results/ll-Test-regex-1.5.5-Debug-Full +test -f results/ll-Test-regex-automata-0.4.8-Debug-Full +grep -q "Lines.*Copies.*Function name" results/ll-Test-regex-automata-0.4.8-Debug-Full # llvm-ir. `Debug` not `Check` because it works better that way. RUST_BACKTRACE=1 RUST_LOG=raw_cargo_messages=trace,collector=debug,rust_sysroot=debug \ diff --git a/collector/README.md b/collector/README.md index 64531511d..48e9acc62 100644 --- a/collector/README.md +++ b/collector/README.md @@ -235,14 +235,14 @@ Finally, while most of the options you can pass to the collector are supported, the profilers used in the `profile_local` command are not. In Windows, the only currently supported profiler is the `self-profiler`. -As a complete example, let's run just the `regex-1.5.5` benchmark in the `Debug` +As a complete example, let's run just the `regex-automata-0.4.8` benchmark in the `Debug` profile with self-profiling results available: ```pwsh $env:XPERF="C:\Program Files (x86)\Windows Kits\10\Windows Performance Toolkit\xperf.exe" $env:TRACELOG="C:\Program Files (x86)\Windows Kits\10\bin\10.0.19041.0\x64\tracelog.exe" -.\target\release\collector.exe bench_local $env:RUST_ORIGINAL --id Original --profiles Debug --include regex-1.5.5 --self-profile -.\target\release\collector.exe bench_local $env:RUST_MODIFIED --id Modified --profiles Debug --include regex-1.5.5 --self-profile +.\target\release\collector.exe bench_local $env:RUST_ORIGINAL --id Original --profiles Debug --include regex-automata-0.4.8 --self-profile +.\target\release\collector.exe bench_local $env:RUST_MODIFIED --id Modified --profiles Debug --include regex-automata-0.4.8 --self-profile .\target\release\site.exe .\results.db ``` diff --git a/collector/compile-benchmarks/README.md b/collector/compile-benchmarks/README.md index 03fa760c6..a28ee0ff6 100644 --- a/collector/compile-benchmarks/README.md +++ b/collector/compile-benchmarks/README.md @@ -37,7 +37,6 @@ They mostly consist of real-world crates. types, constants, and functions, but relatively little normal code. Stresses the parser. A very widely-used crate. - **nalgebra-0.33.0**: A linear algebra library. It exercises the new trait solver in different ways than the old solver. -- **regex-1.5.5**: A regular expression parser. Used by many Rust programs. - **regex-automata-0.4.8**: A regular expression matching engine. Used by `regex`, which is used by many Rust programs. - **ripgrep-13.0.0**: A line-oriented search tool. A widely-used utility, and a diff --git a/collector/compile-benchmarks/REUSE.toml b/collector/compile-benchmarks/REUSE.toml index 9523f4767..1f7b966a9 100644 --- a/collector/compile-benchmarks/REUSE.toml +++ b/collector/compile-benchmarks/REUSE.toml @@ -180,11 +180,6 @@ path = "regex/**" SPDX-FileCopyrightText = "The Rust Project Developers (see https://thanks.rust-lang.org)" SPDX-License-Identifier = "MIT OR Apache-2.0" -[[annotations]] -path = "regex-1.5.5/**" -SPDX-FileCopyrightText = "The Rust Project Developers (see https://thanks.rust-lang.org)" -SPDX-License-Identifier = "MIT OR Apache-2.0" - [[annotations]] path = "regex-automata-0.4.8/**" SPDX-FileCopyrightText = "The Rust Project Developers (see https://thanks.rust-lang.org)" diff --git a/collector/compile-benchmarks/regex-1.5.5/.cargo_vcs_info.json b/collector/compile-benchmarks/regex-1.5.5/.cargo_vcs_info.json deleted file mode 100644 index 1bd1d5b3d..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/.cargo_vcs_info.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "git": { - "sha1": "d130381b150756ba7e5940efdc6ebdf47f4febc0" - }, - "path_in_vcs": "" -} \ No newline at end of file diff --git a/collector/compile-benchmarks/regex-1.5.5/.gitignore b/collector/compile-benchmarks/regex-1.5.5/.gitignore deleted file mode 100644 index a4bdfcbaf..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -target -bench-log -.*.swp -wiki -tags -examples/debug.rs -tmp/ diff --git a/collector/compile-benchmarks/regex-1.5.5/0-compile-one.patch b/collector/compile-benchmarks/regex-1.5.5/0-compile-one.patch deleted file mode 100644 index 506fb72cc..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/0-compile-one.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/src/compile.rs b/src/compile.rs -index 9db743f..ef1948e 100644 ---- a/src/compile.rs -+++ b/src/compile.rs -@@ -137,6 +137,8 @@ impl Compiler { - } - - fn compile_one(mut self, expr: &Hir) -> result::Result { -+ {} // @030 -+ - // If we're compiling a forward DFA and we aren't anchored, then - // add a `.*?` before the first capture group. - // Other matching engines handle this by baking the logic into the diff --git a/collector/compile-benchmarks/regex-1.5.5/1-is-valid-cap-letter.patch b/collector/compile-benchmarks/regex-1.5.5/1-is-valid-cap-letter.patch deleted file mode 100644 index 86dd177b4..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/1-is-valid-cap-letter.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/src/expand.rs b/src/expand.rs -index 9bea703..3b6ae94 100644 ---- a/src/expand.rs -+++ b/src/expand.rs -@@ -128,6 +128,7 @@ fn find_cap_ref(replacement: &[u8]) -> Option> { - } - - /// Returns true if and only if the given byte is allowed in a capture name. - fn is_valid_cap_letter(b: &u8) -> bool { -+ { } - match *b { - b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true, - _ => false, diff --git a/collector/compile-benchmarks/regex-1.5.5/2-Compiler-new.patch b/collector/compile-benchmarks/regex-1.5.5/2-Compiler-new.patch deleted file mode 100644 index 957d7054c..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/2-Compiler-new.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff --git a/src/compile.rs b/src/compile.rs -index 9db743f..fb812ae 100644 ---- a/src/compile.rs -+++ b/src/compile.rs -@@ -54,6 +54,7 @@ impl Compiler { - /// - /// Various options can be set before calling `compile` on an expression. - pub fn new() -> Self { -+ {} - Compiler { - insts: vec![], - compiled: Program::new(), diff --git a/collector/compile-benchmarks/regex-1.5.5/3-reverse.patch b/collector/compile-benchmarks/regex-1.5.5/3-reverse.patch deleted file mode 100644 index 4386adfea..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/3-reverse.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff --git a/src/compile.rs b/src/compile.rs -index 9db743f..4e56c2d 100644 ---- a/src/compile.rs -+++ b/src/compile.rs -@@ -114,6 +114,7 @@ impl Compiler { - /// When set, the machine returned is suitable for matching text in - /// reverse. In particular, all concatenations are flipped. - pub fn reverse(mut self, yes: bool) -> Self { -+ {} - self.compiled.is_reverse = yes; - self - } diff --git a/collector/compile-benchmarks/regex-1.5.5/4-byte-frequencies.patch b/collector/compile-benchmarks/regex-1.5.5/4-byte-frequencies.patch deleted file mode 100644 index 172b35d10..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/4-byte-frequencies.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/src/freqs.rs b/src/freqs.rs -index 92bafc1..6eb5799 100644 ---- a/src/freqs.rs -+++ b/src/freqs.rs -@@ -2,7 +2,7 @@ - // edit directly - - pub const BYTE_FREQUENCIES: [u8; 256] = [ -- 55, // '\x00' -+ 54+1, // '\x00' - 52, // '\x01' - 51, // '\x02' - 50, // '\x03' diff --git a/collector/compile-benchmarks/regex-1.5.5/5-Job.patch b/collector/compile-benchmarks/regex-1.5.5/5-Job.patch deleted file mode 100644 index 05623d11a..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/5-Job.patch +++ /dev/null @@ -1,14 +0,0 @@ -diff --git a/src/backtrack.rs b/src/backtrack.rs -index 3c06254..4b72fd4 100644 ---- a/src/backtrack.rs -+++ b/src/backtrack.rs -@@ -82,8 +82,8 @@ impl Cache { - /// stack to do it. - #[derive(Clone, Copy, Debug)] - enum Job { -- Inst { ip: InstPtr, at: InputAt }, - SaveRestore { slot: usize, old_pos: Option }, -+ Inst { ip: InstPtr, at: InputAt }, - } - - impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> { diff --git a/collector/compile-benchmarks/regex-1.5.5/6-println.patch b/collector/compile-benchmarks/regex-1.5.5/6-println.patch deleted file mode 100644 index bb926f8ff..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/6-println.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff --git a/src/re_set.rs b/src/re_set.rs -index 95c4306..eff56b0 100644 ---- a/src/re_set.rs -+++ b/src/re_set.rs -@@ -216,6 +216,7 @@ pub struct SetMatches { - impl SetMatches { - /// Whether this set contains any matches. - pub fn matched_any(&self) -> bool { -+ println!("testing"); - self.matched_any - } - diff --git a/collector/compile-benchmarks/regex-1.5.5/CHANGELOG.md b/collector/compile-benchmarks/regex-1.5.5/CHANGELOG.md deleted file mode 100644 index 71d19633d..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/CHANGELOG.md +++ /dev/null @@ -1,1021 +0,0 @@ -1.5.4 (2021-05-06) -================== -This release fixes another compilation failure when building regex. This time, -the fix is for when the `pattern` feature is enabled, which only works on -nightly Rust. CI has been updated to test this case. - -* [BUG #772](https://github.com/rust-lang/regex/pull/772): - Fix build when `pattern` feature is enabled. - - -1.5.3 (2021-05-01) -================== -This releases fixes a bug when building regex with only the `unicode-perl` -feature. It turns out that while CI was building this configuration, it wasn't -actually failing the overall build on a failed compilation. - -* [BUG #769](https://github.com/rust-lang/regex/issues/769): - Fix build in `regex-syntax` when only the `unicode-perl` feature is enabled. - - -1.5.2 (2021-05-01) -================== -This release fixes a performance bug when Unicode word boundaries are used. -Namely, for certain regexes on certain inputs, it's possible for the lazy DFA -to stop searching (causing a fallback to a slower engine) when it doesn't -actually need to. - -[PR #768](https://github.com/rust-lang/regex/pull/768) fixes the bug, which was -originally reported in -[ripgrep#1860](https://github.com/BurntSushi/ripgrep/issues/1860). - - -1.5.1 (2021-04-30) -================== -This is a patch release that fixes a compilation error when the `perf-literal` -feature is not enabled. - - -1.5.0 (2021-04-30) -================== -This release primarily updates to Rust 2018 (finally) and bumps the MSRV to -Rust 1.41 (from Rust 1.28). Rust 1.41 was chosen because it's still reasonably -old, and is what's in Debian stable at the time of writing. - -This release also drops this crate's own bespoke substring search algorithms -in favor of a new -[`memmem` implementation provided by the `memchr` crate](https://docs.rs/memchr/2.4.0/memchr/memmem/index.html). -This will change the performance profile of some regexes, sometimes getting a -little worse, and hopefully more frequently, getting a lot better. Please -report any serious performance regressions if you find them. - - -1.4.6 (2021-04-22) -================== -This is a small patch release that fixes the compiler's size check on how much -heap memory a regex uses. Previously, the compiler did not account for the -heap usage of Unicode character classes. Now it does. It's possible that this -may make some regexes fail to compile that previously did compile. If that -happens, please file an issue. - -* [BUG OSS-fuzz#33579](https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=33579): - Some regexes can use more heap memory than one would expect. - - -1.4.5 (2021-03-14) -================== -This is a small patch release that fixes a regression in the size of a `Regex` -in the 1.4.4 release. Prior to 1.4.4, a `Regex` was 552 bytes. In the 1.4.4 -release, it was 856 bytes due to internal changes. In this release, a `Regex` -is now 16 bytes. In general, the size of a `Regex` was never something that was -on my radar, but this increased size in the 1.4.4 release seems to have crossed -a threshold and resulted in stack overflows in some programs. - -* [BUG #750](https://github.com/rust-lang/regex/pull/750): - Fixes stack overflows seemingly caused by a large `Regex` size by decreasing - its size. - - -1.4.4 (2021-03-11) -================== -This is a small patch release that contains some bug fixes. Notably, it also -drops the `thread_local` (and `lazy_static`, via transitivity) dependencies. - -Bug fixes: - -* [BUG #362](https://github.com/rust-lang/regex/pull/362): - Memory leaks caused by an internal caching strategy should now be fixed. -* [BUG #576](https://github.com/rust-lang/regex/pull/576): - All regex types now implement `UnwindSafe` and `RefUnwindSafe`. -* [BUG #728](https://github.com/rust-lang/regex/pull/749): - Add missing `Replacer` impls for `Vec`, `String`, `Cow`, etc. - - -1.4.3 (2021-01-08) -================== -This is a small patch release that adds some missing standard trait -implementations for some types in the public API. - -Bug fixes: - -* [BUG #734](https://github.com/rust-lang/regex/pull/734): - Add `FusedIterator` and `ExactSizeIterator` impls to iterator types. -* [BUG #735](https://github.com/rust-lang/regex/pull/735): - Add missing `Debug` impls to public API types. - - -1.4.2 (2020-11-01) -================== -This is a small bug fix release that bans `\P{any}`. We previously banned empty -classes like `[^\w\W]`, but missed the `\P{any}` case. In the future, we hope -to permit empty classes. - -* [BUG #722](https://github.com/rust-lang/regex/issues/722): - Ban `\P{any}` to avoid a panic in the regex compiler. Found by OSS-Fuzz. - - -1.4.1 (2020-10-13) -================== -This is a small bug fix release that makes `\p{cf}` work. Previously, it would -report "property not found" even though `cf` is a valid abbreviation for the -`Format` general category. - -* [BUG #719](https://github.com/rust-lang/regex/issues/719): - Fixes bug that prevented `\p{cf}` from working. - - -1.4.0 (2020-10-11) -================== -This releases has a few minor documentation fixes as well as some very minor -API additions. The MSRV remains at Rust 1.28 for now, but this is intended to -increase to at least Rust 1.41.1 soon. - -This release also adds support for OSS-Fuzz. Kudos to -[@DavidKorczynski](https://github.com/DavidKorczynski) -for doing the heavy lifting for that! - -New features: - -* [FEATURE #649](https://github.com/rust-lang/regex/issues/649): - Support `[`, `]` and `.` in capture group names. -* [FEATURE #687](https://github.com/rust-lang/regex/issues/687): - Add `is_empty` predicate to `RegexSet`. -* [FEATURE #689](https://github.com/rust-lang/regex/issues/689): - Implement `Clone` for `SubCaptureMatches`. -* [FEATURE #715](https://github.com/rust-lang/regex/issues/715): - Add `empty` constructor to `RegexSet` for convenience. - -Bug fixes: - -* [BUG #694](https://github.com/rust-lang/regex/issues/694): - Fix doc example for `Replacer::replace_append`. -* [BUG #698](https://github.com/rust-lang/regex/issues/698): - Clarify docs for `s` flag when using a `bytes::Regex`. -* [BUG #711](https://github.com/rust-lang/regex/issues/711): - Clarify `is_match` docs to indicate that it can match anywhere in string. - - -1.3.9 (2020-05-28) -================== -This release fixes a MSRV (Minimum Support Rust Version) regression in the -1.3.8 release. Namely, while 1.3.8 compiles on Rust 1.28, it actually does not -compile on other Rust versions, such as Rust 1.39. - -Bug fixes: - -* [BUG #685](https://github.com/rust-lang/regex/issues/685): - Remove use of `doc_comment` crate, which cannot be used before Rust 1.43. - - -1.3.8 (2020-05-28) -================== -This release contains a couple of important bug fixes driven -by better support for empty-subexpressions in regexes. For -example, regexes like `b|` are now allowed. Major thanks to -[@sliquister](https://github.com/sliquister) for implementing support for this -in [#677](https://github.com/rust-lang/regex/pull/677). - -Bug fixes: - -* [BUG #523](https://github.com/rust-lang/regex/pull/523): - Add note to documentation that spaces can be escaped in `x` mode. -* [BUG #524](https://github.com/rust-lang/regex/issues/524): - Add support for empty sub-expressions, including empty alternations. -* [BUG #659](https://github.com/rust-lang/regex/issues/659): - Fix match bug caused by an empty sub-expression miscompilation. - - -1.3.7 (2020-04-17) -================== -This release contains a small bug fix that fixes how `regex` forwards crate -features to `regex-syntax`. In particular, this will reduce recompilations in -some cases. - -Bug fixes: - -* [BUG #665](https://github.com/rust-lang/regex/pull/665): - Fix feature forwarding to `regex-syntax`. - - -1.3.6 (2020-03-24) -================== -This release contains a sizable (~30%) performance improvement when compiling -some kinds of large regular expressions. - -Performance improvements: - -* [PERF #657](https://github.com/rust-lang/regex/pull/657): - Improvement performance of compiling large regular expressions. - - -1.3.5 (2020-03-12) -================== -This release updates this crate to Unicode 13. - -New features: - -* [FEATURE #653](https://github.com/rust-lang/regex/pull/653): - Update `regex-syntax` to Unicode 13. - - -1.3.4 (2020-01-30) -================== -This is a small bug fix release that fixes a bug related to the scoping of -flags in a regex. Namely, before this fix, a regex like `((?i)a)b)` would -match `aB` despite the fact that `b` should not be matched case insensitively. - -Bug fixes: - -* [BUG #640](https://github.com/rust-lang/regex/issues/640): - Fix bug related to the scoping of flags in a regex. - - -1.3.3 (2020-01-09) -================== -This is a small maintenance release that upgrades the dependency on -`thread_local` from `0.3` to `1.0`. The minimum supported Rust version remains -at Rust 1.28. - - -1.3.2 (2020-01-09) -================== -This is a small maintenance release with some house cleaning and bug fixes. - -New features: - -* [FEATURE #631](https://github.com/rust-lang/regex/issues/631): - Add a `Match::range` method an a `From for Range` impl. - -Bug fixes: - -* [BUG #521](https://github.com/rust-lang/regex/issues/521): - Corrects `/-/.splitn("a", 2)` to return `["a"]` instead of `["a", ""]`. -* [BUG #594](https://github.com/rust-lang/regex/pull/594): - Improve error reporting when writing `\p\`. -* [BUG #627](https://github.com/rust-lang/regex/issues/627): - Corrects `/-/.split("a-")` to return `["a", ""]` instead of `["a"]`. -* [BUG #633](https://github.com/rust-lang/regex/pull/633): - Squash deprecation warnings for the `std::error::Error::description` method. - - -1.3.1 (2019-09-04) -================== -This is a maintenance release with no changes in order to try to work-around -a [docs.rs/Cargo issue](https://github.com/rust-lang/docs.rs/issues/400). - - -1.3.0 (2019-09-03) -================== -This release adds a plethora of new crate features that permit users of regex -to shrink its size considerably, in exchange for giving up either functionality -(such as Unicode support) or runtime performance. When all such features are -disabled, the dependency tree for `regex` shrinks to exactly 1 crate -(`regex-syntax`). More information about the new crate features can be -[found in the docs](https://docs.rs/regex/*/#crate-features). - -Note that while this is a new minor version release, the minimum supported -Rust version for this crate remains at `1.28.0`. - -New features: - -* [FEATURE #474](https://github.com/rust-lang/regex/issues/474): - The `use_std` feature has been deprecated in favor of the `std` feature. - The `use_std` feature will be removed in regex 2. Until then, `use_std` will - remain as an alias for the `std` feature. -* [FEATURE #583](https://github.com/rust-lang/regex/issues/583): - Add a substantial number of crate features shrinking `regex`. - - -1.2.1 (2019-08-03) -================== -This release does a bit of house cleaning. Namely: - -* This repository is now using rustfmt. -* License headers have been removed from all files, in following suit with the - Rust project. -* Teddy has been removed from the `regex` crate, and is now part of the - `aho-corasick` crate. - [See `aho-corasick`'s new `packed` sub-module for details](https://docs.rs/aho-corasick/0.7.6/aho_corasick/packed/index.html). -* The `utf8-ranges` crate has been deprecated, with its functionality moving - into the - [`utf8` sub-module of `regex-syntax`](https://docs.rs/regex-syntax/0.6.11/regex_syntax/utf8/index.html). -* The `ucd-util` dependency has been dropped, in favor of implementing what - little we need inside of `regex-syntax` itself. - -In general, this is part of an ongoing (long term) effort to make optimizations -in the regex engine easier to reason about. The current code is too convoluted -and thus it is very easy to introduce new bugs. This simplification effort is -the primary motivation behind re-working the `aho-corasick` crate to not only -bundle algorithms like Teddy, but to also provide regex-like match semantics -automatically. - -Moving forward, the plan is to join up with the `bstr` and `regex-automata` -crates, with the former providing more sophisticated substring search -algorithms (thereby deleting existing code in `regex`) and the latter providing -ahead-of-time compiled DFAs for cases where they are inexpensive to compute. - - -1.2.0 (2019-07-20) -================== -This release updates regex's minimum supported Rust version to 1.28, which was -release almost 1 year ago. This release also updates regex's Unicode data -tables to 12.1.0. - - -1.1.9 (2019-07-06) -================== -This release contains a bug fix that caused regex's tests to fail, due to a -dependency on an unreleased behavior in regex-syntax. - -* [BUG #593](https://github.com/rust-lang/regex/issues/593): - Move an integration-style test on error messages into regex-syntax. - - -1.1.8 (2019-07-04) -================== -This release contains a few small internal refactorings. One of which fixes -an instance of undefined behavior in a part of the SIMD code. - -Bug fixes: - -* [BUG #545](https://github.com/rust-lang/regex/issues/545): - Improves error messages when a repetition operator is used without a number. -* [BUG #588](https://github.com/rust-lang/regex/issues/588): - Removes use of a repr(Rust) union used for type punning in the Teddy matcher. -* [BUG #591](https://github.com/rust-lang/regex/issues/591): - Update docs for running benchmarks and improve failure modes. - - -1.1.7 (2019-06-09) -================== -This release fixes up a few warnings as a result of recent deprecations. - - -1.1.6 (2019-04-16) -================== -This release fixes a regression introduced by a bug fix (for -[BUG #557](https://github.com/rust-lang/regex/issues/557)) which could cause -the regex engine to enter an infinite loop. This bug was originally -[reported against ripgrep](https://github.com/BurntSushi/ripgrep/issues/1247). - - -1.1.5 (2019-04-01) -================== -This release fixes a bug in regex's dependency specification where it requires -a newer version of regex-syntax, but this wasn't communicated correctly in the -Cargo.toml. This would have been caught by a minimal version check, but this -check was disabled because the `rand` crate itself advertises incorrect -dependency specifications. - -Bug fixes: - -* [BUG #570](https://github.com/rust-lang/regex/pull/570): - Fix regex-syntax minimal version. - - -1.1.4 (2019-03-31) -================== -This release fixes a backwards compatibility regression where Regex was no -longer UnwindSafe. This was caused by the upgrade to aho-corasick 0.7, whose -AhoCorasick type was itself not UnwindSafe. This has been fixed in aho-corasick -0.7.4, which we now require. - -Bug fixes: - -* [BUG #568](https://github.com/rust-lang/regex/pull/568): - Fix an API regression where Regex was no longer UnwindSafe. - - -1.1.3 (2019-03-30) -================== -This releases fixes a few bugs and adds a performance improvement when a regex -is a simple alternation of literals. - -Performance improvements: - -* [OPT #566](https://github.com/rust-lang/regex/pull/566): - Upgrades `aho-corasick` to 0.7 and uses it for `foo|bar|...|quux` regexes. - -Bug fixes: - -* [BUG #527](https://github.com/rust-lang/regex/issues/527): - Fix a bug where the parser would panic on patterns like `((?x))`. -* [BUG #555](https://github.com/rust-lang/regex/issues/555): - Fix a bug where the parser would panic on patterns like `(?m){1,1}`. -* [BUG #557](https://github.com/rust-lang/regex/issues/557): - Fix a bug where captures could lead to an incorrect match. - - -1.1.2 (2019-02-27) -================== -This release fixes a bug found in the fix introduced in 1.1.1. - -Bug fixes: - -* [BUG edf45e6f](https://github.com/rust-lang/regex/commit/edf45e6f): - Fix bug introduced in reverse suffix literal matcher in the 1.1.1 release. - - -1.1.1 (2019-02-27) -================== -This is a small release with one fix for a bug caused by literal optimizations. - -Bug fixes: - -* [BUG 661bf53d](https://github.com/rust-lang/regex/commit/661bf53d): - Fixes a bug in the reverse suffix literal optimization. This was originally - reported - [against ripgrep](https://github.com/BurntSushi/ripgrep/issues/1203). - - -1.1.0 (2018-11-30) -================== -This is a small release with a couple small enhancements. This release also -increases the minimal supported Rust version (MSRV) to 1.24.1 (from 1.20.0). In -accordance with this crate's MSRV policy, this release bumps the minor version -number. - -Performance improvements: - -* [OPT #511](https://github.com/rust-lang/regex/pull/511), - [OPT #540](https://github.com/rust-lang/regex/pull/540): - Improve lazy DFA construction for large regex sets. - -New features: - -* [FEATURE #538](https://github.com/rust-lang/regex/pull/538): - Add Emoji and "break" Unicode properties. See [UNICODE.md](UNICODE.md). - -Bug fixes: - -* [BUG #530](https://github.com/rust-lang/regex/pull/530): - Add Unicode license (for data tables). -* Various typo/doc fixups. - - -1.0.6 (2018-11-06) -================== -This is a small release. - -Performance improvements: - -* [OPT #513](https://github.com/rust-lang/regex/pull/513): - Improve performance of compiling large Unicode classes by 8-10%. - -Bug fixes: - -* [BUG #533](https://github.com/rust-lang/regex/issues/533): - Fix definition of `[[:blank:]]` class that regressed in `regex-syntax 0.5`. - - -1.0.5 (2018-09-06) -================== -This is a small release with an API enhancement. - -New features: - -* [FEATURE #509](https://github.com/rust-lang/regex/pull/509): - Generalize impls of the `Replacer` trait. - - -1.0.4 (2018-08-25) -================== -This is a small release that bumps the quickcheck dependency. - - -1.0.3 (2018-08-24) -================== -This is a small bug fix release. - -Bug fixes: - -* [BUG #504](https://github.com/rust-lang/regex/pull/504): - Fix for Cargo's "minimal version" support. -* [BUG 1e39165f](https://github.com/rust-lang/regex/commit/1e39165f): - Fix doc examples for byte regexes. - - -1.0.2 (2018-07-18) -================== -This release exposes some new lower level APIs on `Regex` that permit -amortizing allocation and controlling the location at which a search is -performed in a more granular way. Most users of the regex crate will not -need or want to use these APIs. - -New features: - -* [FEATURE #493](https://github.com/rust-lang/regex/pull/493): - Add a few lower level APIs for amortizing allocation and more fine grained - searching. - -Bug fixes: - -* [BUG 3981d2ad](https://github.com/rust-lang/regex/commit/3981d2ad): - Correct outdated documentation on `RegexBuilder::dot_matches_new_line`. -* [BUG 7ebe4ae0](https://github.com/rust-lang/regex/commit/7ebe4ae0): - Correct outdated documentation on `Parser::allow_invalid_utf8` in the - `regex-syntax` crate. -* [BUG 24c7770b](https://github.com/rust-lang/regex/commit/24c7770b): - Fix a bug in the HIR printer where it wouldn't correctly escape meta - characters in character classes. - - -1.0.1 (2018-06-19) -================== -This release upgrades regex's Unicode tables to Unicode 11, and enables SIMD -optimizations automatically on Rust stable (1.27 or newer). - -New features: - -* [FEATURE #486](https://github.com/rust-lang/regex/pull/486): - Implement `size_hint` on `RegexSet` match iterators. -* [FEATURE #488](https://github.com/rust-lang/regex/pull/488): - Update Unicode tables for Unicode 11. -* [FEATURE #490](https://github.com/rust-lang/regex/pull/490): - SIMD optimizations are now enabled automatically in Rust stable, for versions - 1.27 and up. No compilation flags or features need to be set. CPU support - SIMD is detected automatically at runtime. - -Bug fixes: - -* [BUG #482](https://github.com/rust-lang/regex/pull/482): - Present a better compilation error when the `use_std` feature isn't used. - - -1.0.0 (2018-05-01) -================== -This release marks the 1.0 release of regex. - -While this release includes some breaking changes, most users of older versions -of the regex library should be able to migrate to 1.0 by simply bumping the -version number. The important changes are as follows: - -* We adopt Rust 1.20 as the new minimum supported version of Rust for regex. - We also tentativley adopt a policy that permits bumping the minimum supported - version of Rust in minor version releases of regex, but no patch releases. - That is, with respect to semver, we do not strictly consider bumping the - minimum version of Rust to be a breaking change, but adopt a conservative - stance as a compromise. -* Octal syntax in regular expressions has been disabled by default. This - permits better error messages that inform users that backreferences aren't - available. Octal syntax can be re-enabled via the corresponding option on - `RegexBuilder`. -* `(?-u:\B)` is no longer allowed in Unicode regexes since it can match at - invalid UTF-8 code unit boundaries. `(?-u:\b)` is still allowed in Unicode - regexes. -* The `From` impl has been removed. This formally removes - the public dependency on `regex-syntax`. -* A new feature, `use_std`, has been added and enabled by default. Disabling - the feature will result in a compilation error. In the future, this may - permit us to support `no_std` environments (w/ `alloc`) in a backwards - compatible way. - -For more information and discussion, please see -[1.0 release tracking issue](https://github.com/rust-lang/regex/issues/457). - - -0.2.11 (2018-05-01) -=================== -This release primarily contains bug fixes. Some of them resolve bugs where -the parser could panic. - -New features: - -* [FEATURE #459](https://github.com/rust-lang/regex/pull/459): - Include C++'s standard regex library and Boost's regex library in the - benchmark harness. We now include D/libphobos, C++/std, C++/boost, Oniguruma, - PCRE1, PCRE2, RE2 and Tcl in the harness. - -Bug fixes: - -* [BUG #445](https://github.com/rust-lang/regex/issues/445): - Clarify order of indices returned by RegexSet match iterator. -* [BUG #461](https://github.com/rust-lang/regex/issues/461): - Improve error messages for invalid regexes like `[\d-a]`. -* [BUG #464](https://github.com/rust-lang/regex/issues/464): - Fix a bug in the error message pretty printer that could cause a panic when - a regex contained a literal `\n` character. -* [BUG #465](https://github.com/rust-lang/regex/issues/465): - Fix a panic in the parser that was caused by applying a repetition operator - to `(?flags)`. -* [BUG #466](https://github.com/rust-lang/regex/issues/466): - Fix a bug where `\pC` was not recognized as an alias for `\p{Other}`. -* [BUG #470](https://github.com/rust-lang/regex/pull/470): - Fix a bug where literal searches did more work than necessary for anchored - regexes. - - -0.2.10 (2018-03-16) -=================== -This release primarily updates the regex crate to changes made in `std::arch` -on nightly Rust. - -New features: - -* [FEATURE #458](https://github.com/rust-lang/regex/pull/458): - The `Hir` type in `regex-syntax` now has a printer. - - -0.2.9 (2018-03-12) -================== -This release introduces a new nightly only feature, `unstable`, which enables -SIMD optimizations for certain types of regexes. No additional compile time -options are necessary, and the regex crate will automatically choose the -best CPU features at run time. As a result, the `simd` (nightly only) crate -dependency has been dropped. - -New features: - -* [FEATURE #456](https://github.com/rust-lang/regex/pull/456): - The regex crate now includes AVX2 optimizations in addition to the extant - SSSE3 optimization. - -Bug fixes: - -* [BUG #455](https://github.com/rust-lang/regex/pull/455): - Fix a bug where `(?x)[ / - ]` failed to parse. - - -0.2.8 (2018-03-12) -================== -Bug gixes: - -* [BUG #454](https://github.com/rust-lang/regex/pull/454): - Fix a bug in the nest limit checker being too aggressive. - - -0.2.7 (2018-03-07) -================== -This release includes a ground-up rewrite of the regex-syntax crate, which has -been in development for over a year. - -New features: - -* Error messages for invalid regexes have been greatly improved. You get these - automatically; you don't need to do anything. In addition to better - formatting, error messages will now explicitly call out the use of look - around. When regex 1.0 is released, this will happen for backreferences as - well. -* Full support for intersection, difference and symmetric difference of - character classes. These can be used via the `&&`, `--` and `~~` binary - operators within classes. -* A Unicode Level 1 conformat implementation of `\p{..}` character classes. - Things like `\p{scx:Hira}`, `\p{age:3.2}` or `\p{Changes_When_Casefolded}` - now work. All property name and value aliases are supported, and properties - are selected via loose matching. e.g., `\p{Greek}` is the same as - `\p{G r E e K}`. -* A new `UNICODE.md` document has been added to this repository that - exhaustively documents support for UTS#18. -* Empty sub-expressions are now permitted in most places. That is, `()+` is - now a valid regex. -* Almost everything in regex-syntax now uses constant stack space, even when - performing anaylsis that requires structural induction. This reduces the risk - of a user provided regular expression causing a stack overflow. -* [FEATURE #174](https://github.com/rust-lang/regex/issues/174): - The `Ast` type in `regex-syntax` now contains span information. -* [FEATURE #424](https://github.com/rust-lang/regex/issues/424): - Support `\u`, `\u{...}`, `\U` and `\U{...}` syntax for specifying code points - in a regular expression. -* [FEATURE #449](https://github.com/rust-lang/regex/pull/449): - Add a `Replace::by_ref` adapter for use of a replacer without consuming it. - -Bug fixes: - -* [BUG #446](https://github.com/rust-lang/regex/issues/446): - We re-enable the Boyer-Moore literal matcher. - - -0.2.6 (2018-02-08) -================== -Bug fixes: - -* [BUG #446](https://github.com/rust-lang/regex/issues/446): - Fixes a bug in the new Boyer-Moore searcher that results in a match failure. - We fix this bug by temporarily disabling Boyer-Moore. - - -0.2.5 (2017-12-30) -================== -Bug fixes: - -* [BUG #437](https://github.com/rust-lang/regex/issues/437): - Fixes a bug in the new Boyer-Moore searcher that results in a panic. - - -0.2.4 (2017-12-30) -================== -New features: - -* [FEATURE #348](https://github.com/rust-lang/regex/pull/348): - Improve performance for capture searches on anchored regex. - (Contributed by @ethanpailes. Nice work!) -* [FEATURE #419](https://github.com/rust-lang/regex/pull/419): - Expand literal searching to include Tuned Boyer-Moore in some cases. - (Contributed by @ethanpailes. Nice work!) - -Bug fixes: - -* [BUG](https://github.com/rust-lang/regex/pull/436): - The regex compiler plugin has been removed. -* [BUG](https://github.com/rust-lang/regex/pull/436): - `simd` has been bumped to `0.2.1`, which fixes a Rust nightly build error. -* [BUG](https://github.com/rust-lang/regex/pull/436): - Bring the benchmark harness up to date. - - -0.2.3 (2017-11-30) -================== -New features: - -* [FEATURE #374](https://github.com/rust-lang/regex/pull/374): - Add `impl From for &str`. -* [FEATURE #380](https://github.com/rust-lang/regex/pull/380): - Derive `Clone` and `PartialEq` on `Error`. -* [FEATURE #400](https://github.com/rust-lang/regex/pull/400): - Update to Unicode 10. - -Bug fixes: - -* [BUG #375](https://github.com/rust-lang/regex/issues/375): - Fix a bug that prevented the bounded backtracker from terminating. -* [BUG #393](https://github.com/rust-lang/regex/issues/393), - [BUG #394](https://github.com/rust-lang/regex/issues/394): - Fix bug with `replace` methods for empty matches. - - -0.2.2 (2017-05-21) -================== -New features: - -* [FEATURE #341](https://github.com/rust-lang/regex/issues/341): - Support nested character classes and intersection operation. - For example, `[\p{Greek}&&\pL]` matches greek letters and - `[[0-9]&&[^4]]` matches every decimal digit except `4`. - (Much thanks to @robinst, who contributed this awesome feature.) - -Bug fixes: - -* [BUG #321](https://github.com/rust-lang/regex/issues/321): - Fix bug in literal extraction and UTF-8 decoding. -* [BUG #326](https://github.com/rust-lang/regex/issues/326): - Add documentation tip about the `(?x)` flag. -* [BUG #333](https://github.com/rust-lang/regex/issues/333): - Show additional replacement example using curly braces. -* [BUG #334](https://github.com/rust-lang/regex/issues/334): - Fix bug when resolving captures after a match. -* [BUG #338](https://github.com/rust-lang/regex/issues/338): - Add example that uses `Captures::get` to API documentation. -* [BUG #353](https://github.com/rust-lang/regex/issues/353): - Fix RegexSet bug that caused match failure in some cases. -* [BUG #354](https://github.com/rust-lang/regex/pull/354): - Fix panic in parser when `(?x)` is used. -* [BUG #358](https://github.com/rust-lang/regex/issues/358): - Fix literal optimization bug with RegexSet. -* [BUG #359](https://github.com/rust-lang/regex/issues/359): - Fix example code in README. -* [BUG #365](https://github.com/rust-lang/regex/pull/365): - Fix bug in `rure_captures_len` in the C binding. -* [BUG #367](https://github.com/rust-lang/regex/issues/367): - Fix byte class bug that caused a panic. - - -0.2.1 -===== -One major bug with `replace_all` has been fixed along with a couple of other -touchups. - -* [BUG #312](https://github.com/rust-lang/regex/issues/312): - Fix documentation for `NoExpand` to reference correct lifetime parameter. -* [BUG #314](https://github.com/rust-lang/regex/issues/314): - Fix a bug with `replace_all` when replacing a match with the empty string. -* [BUG #316](https://github.com/rust-lang/regex/issues/316): - Note a missing breaking change from the `0.2.0` CHANGELOG entry. - (`RegexBuilder::compile` was renamed to `RegexBuilder::build`.) -* [BUG #324](https://github.com/rust-lang/regex/issues/324): - Compiling `regex` should only require one version of `memchr` crate. - - -0.2.0 -===== -This is a new major release of the regex crate, and is an implementation of the -[regex 1.0 RFC](https://github.com/rust-lang/rfcs/blob/master/text/1620-regex-1.0.md). -We are releasing a `0.2` first, and if there are no major problems, we will -release a `1.0` shortly. For `0.2`, the minimum *supported* Rust version is -1.12. - -There are a number of **breaking changes** in `0.2`. They are split into two -types. The first type correspond to breaking changes in regular expression -syntax. The second type correspond to breaking changes in the API. - -Breaking changes for regex syntax: - -* POSIX character classes now require double bracketing. Previously, the regex - `[:upper:]` would parse as the `upper` POSIX character class. Now it parses - as the character class containing the characters `:upper:`. The fix to this - change is to use `[[:upper:]]` instead. Note that variants like - `[[:upper:][:blank:]]` continue to work. -* The character `[` must always be escaped inside a character class. -* The characters `&`, `-` and `~` must be escaped if any one of them are - repeated consecutively. For example, `[&]`, `[\&]`, `[\&\&]`, `[&-&]` are all - equivalent while `[&&]` is illegal. (The motivation for this and the prior - change is to provide a backwards compatible path for adding character class - set notation.) -* A `bytes::Regex` now has Unicode mode enabled by default (like the main - `Regex` type). This means regexes compiled with `bytes::Regex::new` that - don't have the Unicode flag set should add `(?-u)` to recover the original - behavior. - -Breaking changes for the regex API: - -* `find` and `find_iter` now **return `Match` values instead of - `(usize, usize)`.** `Match` values have `start` and `end` methods, which - return the match offsets. `Match` values also have an `as_str` method, - which returns the text of the match itself. -* The `Captures` type now only provides a single iterator over all capturing - matches, which should replace uses of `iter` and `iter_pos`. Uses of - `iter_named` should use the `capture_names` method on `Regex`. -* The `at` method on the `Captures` type has been renamed to `get`, and it - now returns a `Match`. Similarly, the `name` method on `Captures` now returns - a `Match`. -* The `replace` methods now return `Cow` values. The `Cow::Borrowed` variant - is returned when no replacements are made. -* The `Replacer` trait has been completely overhauled. This should only - impact clients that implement this trait explicitly. Standard uses of - the `replace` methods should continue to work unchanged. If you implement - the `Replacer` trait, please consult the new documentation. -* The `quote` free function has been renamed to `escape`. -* The `Regex::with_size_limit` method has been removed. It is replaced by - `RegexBuilder::size_limit`. -* The `RegexBuilder` type has switched from owned `self` method receivers to - `&mut self` method receivers. Most uses will continue to work unchanged, but - some code may require naming an intermediate variable to hold the builder. -* The `compile` method on `RegexBuilder` has been renamed to `build`. -* The free `is_match` function has been removed. It is replaced by compiling - a `Regex` and calling its `is_match` method. -* The `PartialEq` and `Eq` impls on `Regex` have been dropped. If you relied - on these impls, the fix is to define a wrapper type around `Regex`, impl - `Deref` on it and provide the necessary impls. -* The `is_empty` method on `Captures` has been removed. This always returns - `false`, so its use is superfluous. -* The `Syntax` variant of the `Error` type now contains a string instead of - a `regex_syntax::Error`. If you were examining syntax errors more closely, - you'll need to explicitly use the `regex_syntax` crate to re-parse the regex. -* The `InvalidSet` variant of the `Error` type has been removed since it is - no longer used. -* Most of the iterator types have been renamed to match conventions. If you - were using these iterator types explicitly, please consult the documentation - for its new name. For example, `RegexSplits` has been renamed to `Split`. - -A number of bugs have been fixed: - -* [BUG #151](https://github.com/rust-lang/regex/issues/151): - The `Replacer` trait has been changed to permit the caller to control - allocation. -* [BUG #165](https://github.com/rust-lang/regex/issues/165): - Remove the free `is_match` function. -* [BUG #166](https://github.com/rust-lang/regex/issues/166): - Expose more knobs (available in `0.1`) and remove `with_size_limit`. -* [BUG #168](https://github.com/rust-lang/regex/issues/168): - Iterators produced by `Captures` now have the correct lifetime parameters. -* [BUG #175](https://github.com/rust-lang/regex/issues/175): - Fix a corner case in the parsing of POSIX character classes. -* [BUG #178](https://github.com/rust-lang/regex/issues/178): - Drop the `PartialEq` and `Eq` impls on `Regex`. -* [BUG #179](https://github.com/rust-lang/regex/issues/179): - Remove `is_empty` from `Captures` since it always returns false. -* [BUG #276](https://github.com/rust-lang/regex/issues/276): - Position of named capture can now be retrieved from a `Captures`. -* [BUG #296](https://github.com/rust-lang/regex/issues/296): - Remove winapi/kernel32-sys dependency on UNIX. -* [BUG #307](https://github.com/rust-lang/regex/issues/307): - Fix error on emscripten. - - -0.1.80 -====== -* [PR #292](https://github.com/rust-lang/regex/pull/292): - Fixes bug #291, which was introduced by PR #290. - -0.1.79 -====== -* Require regex-syntax 0.3.8. - -0.1.78 -====== -* [PR #290](https://github.com/rust-lang/regex/pull/290): - Fixes bug #289, which caused some regexes with a certain combination - of literals to match incorrectly. - -0.1.77 -====== -* [PR #281](https://github.com/rust-lang/regex/pull/281): - Fixes bug #280 by disabling all literal optimizations when a pattern - is partially anchored. - -0.1.76 -====== -* Tweak criteria for using the Teddy literal matcher. - -0.1.75 -====== -* [PR #275](https://github.com/rust-lang/regex/pull/275): - Improves match verification performance in the Teddy SIMD searcher. -* [PR #278](https://github.com/rust-lang/regex/pull/278): - Replaces slow substring loop in the Teddy SIMD searcher with Aho-Corasick. -* Implemented DoubleEndedIterator on regex set match iterators. - -0.1.74 -====== -* Release regex-syntax 0.3.5 with a minor bug fix. -* Fix bug #272. -* Fix bug #277. -* [PR #270](https://github.com/rust-lang/regex/pull/270): - Fixes bugs #264, #268 and an unreported where the DFA cache size could be - drastically under estimated in some cases (leading to high unexpected memory - usage). - -0.1.73 -====== -* Release `regex-syntax 0.3.4`. -* Bump `regex-syntax` dependency version for `regex` to `0.3.4`. - -0.1.72 -====== -* [PR #262](https://github.com/rust-lang/regex/pull/262): - Fixes a number of small bugs caught by fuzz testing (AFL). - -0.1.71 -====== -* [PR #236](https://github.com/rust-lang/regex/pull/236): - Fix a bug in how suffix literals were extracted, which could lead - to invalid match behavior in some cases. - -0.1.70 -====== -* [PR #231](https://github.com/rust-lang/regex/pull/231): - Add SIMD accelerated multiple pattern search. -* [PR #228](https://github.com/rust-lang/regex/pull/228): - Reintroduce the reverse suffix literal optimization. -* [PR #226](https://github.com/rust-lang/regex/pull/226): - Implements NFA state compression in the lazy DFA. -* [PR #223](https://github.com/rust-lang/regex/pull/223): - A fully anchored RegexSet can now short-circuit. - -0.1.69 -====== -* [PR #216](https://github.com/rust-lang/regex/pull/216): - Tweak the threshold for running backtracking. -* [PR #217](https://github.com/rust-lang/regex/pull/217): - Add upper limit (from the DFA) to capture search (for the NFA). -* [PR #218](https://github.com/rust-lang/regex/pull/218): - Add rure, a C API. - -0.1.68 -====== -* [PR #210](https://github.com/rust-lang/regex/pull/210): - Fixed a performance bug in `bytes::Regex::replace` where `extend` was used - instead of `extend_from_slice`. -* [PR #211](https://github.com/rust-lang/regex/pull/211): - Fixed a bug in the handling of word boundaries in the DFA. -* [PR #213](https://github.com/rust-lang/pull/213): - Added RE2 and Tcl to the benchmark harness. Also added a CLI utility from - running regexes using any of the following regex engines: PCRE1, PCRE2, - Oniguruma, RE2, Tcl and of course Rust's own regexes. - -0.1.67 -====== -* [PR #201](https://github.com/rust-lang/regex/pull/201): - Fix undefined behavior in the `regex!` compiler plugin macro. -* [PR #205](https://github.com/rust-lang/regex/pull/205): - More improvements to DFA performance. Competitive with RE2. See PR for - benchmarks. -* [PR #209](https://github.com/rust-lang/regex/pull/209): - Release 0.1.66 was semver incompatible since it required a newer version - of Rust than previous releases. This PR fixes that. (And `0.1.66` was - yanked.) - -0.1.66 -====== -* Speculative support for Unicode word boundaries was added to the DFA. This - should remove the last common case that disqualified use of the DFA. -* An optimization that scanned for suffix literals and then matched the regular - expression in reverse was removed because it had worst case quadratic time - complexity. It was replaced with a more limited optimization where, given any - regex of the form `re$`, it will be matched in reverse from the end of the - haystack. -* [PR #202](https://github.com/rust-lang/regex/pull/202): - The inner loop of the DFA was heavily optimized to improve cache locality - and reduce the overall number of instructions run on each iteration. This - represents the first use of `unsafe` in `regex` (to elide bounds checks). -* [PR #200](https://github.com/rust-lang/regex/pull/200): - Use of the `mempool` crate (which used thread local storage) was replaced - with a faster version of a similar API in @Amanieu's `thread_local` crate. - It should reduce contention when using a regex from multiple threads - simultaneously. -* PCRE2 JIT benchmarks were added. A benchmark comparison can be found - [here](https://gist.github.com/anonymous/14683c01993e91689f7206a18675901b). - (Includes a comparison with PCRE1's JIT and Oniguruma.) -* A bug where word boundaries weren't being matched correctly in the DFA was - fixed. This only affected use of `bytes::Regex`. -* [#160](https://github.com/rust-lang/regex/issues/160): - `Captures` now has a `Debug` impl. diff --git a/collector/compile-benchmarks/regex-1.5.5/Cargo.lock b/collector/compile-benchmarks/regex-1.5.5/Cargo.lock deleted file mode 100644 index cc51538bf..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/Cargo.lock +++ /dev/null @@ -1,98 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "aho-corasick" -version = "0.7.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" -dependencies = [ - "memchr", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "getrandom" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "libc" -version = "0.2.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18794a8ad5b29321f790b55d93dfba91e125cb1a9edbd4f8e3150acc771c1a5e" - -[[package]] -name = "memchr" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" - -[[package]] -name = "quickcheck" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" -dependencies = [ - "rand", -] - -[[package]] -name = "rand" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" -dependencies = [ - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" -dependencies = [ - "getrandom", -] - -[[package]] -name = "regex" -version = "1.5.5" -dependencies = [ - "aho-corasick", - "lazy_static", - "memchr", - "quickcheck", - "rand", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.6.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" - -[[package]] -name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" diff --git a/collector/compile-benchmarks/regex-1.5.5/Cargo.toml b/collector/compile-benchmarks/regex-1.5.5/Cargo.toml deleted file mode 100644 index 98ded2636..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/Cargo.toml +++ /dev/null @@ -1,151 +0,0 @@ -# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO -# -# When uploading crates to the registry Cargo will automatically -# "normalize" Cargo.toml files for maximal compatibility -# with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies. -# -# If you are reading this file be aware that the original Cargo.toml -# will likely look very different (and much more reasonable). -# See Cargo.toml.orig for the original contents. - -[package] -edition = "2018" -name = "regex" -version = "1.5.5" -authors = ["The Rust Project Developers"] -exclude = [ - "/scripts/*", - "/.github/*", -] -autotests = false -description = """ -An implementation of regular expressions for Rust. This implementation uses -finite automata and guarantees linear time matching on all inputs. -""" -homepage = "https://github.com/rust-lang/regex" -documentation = "https://docs.rs/regex" -readme = "README.md" -categories = ["text-processing"] -license = "MIT OR Apache-2.0" -repository = "https://github.com/rust-lang/regex" - -[profile.bench] -debug = true - -[profile.release] -debug = true - -[profile.test] -debug = true - -[lib] -doctest = false -bench = false - -[[test]] -name = "default" -path = "tests/test_default.rs" - -[[test]] -name = "default-bytes" -path = "tests/test_default_bytes.rs" - -[[test]] -name = "nfa" -path = "tests/test_nfa.rs" - -[[test]] -name = "nfa-utf8bytes" -path = "tests/test_nfa_utf8bytes.rs" - -[[test]] -name = "nfa-bytes" -path = "tests/test_nfa_bytes.rs" - -[[test]] -name = "backtrack" -path = "tests/test_backtrack.rs" - -[[test]] -name = "backtrack-utf8bytes" -path = "tests/test_backtrack_utf8bytes.rs" - -[[test]] -name = "backtrack-bytes" -path = "tests/test_backtrack_bytes.rs" - -[[test]] -name = "crates-regex" -path = "tests/test_crates_regex.rs" - -[dependencies.aho-corasick] -version = "0.7.18" -optional = true - -[dependencies.memchr] -version = "2.4.0" -optional = true - -[dependencies.regex-syntax] -version = "0.6.25" -default-features = false - -[dev-dependencies.lazy_static] -version = "1" - -[dev-dependencies.quickcheck] -version = "1.0.3" -default-features = false - -[dev-dependencies.rand] -version = "0.8.3" -features = [ - "getrandom", - "small_rng", -] -default-features = false - -[features] -default = [ - "std", - "perf", - "unicode", - "regex-syntax/default", -] -pattern = [] -perf = [ - "perf-cache", - "perf-dfa", - "perf-inline", - "perf-literal", -] -perf-cache = [] -perf-dfa = [] -perf-inline = [] -perf-literal = [ - "aho-corasick", - "memchr", -] -std = [] -unicode = [ - "unicode-age", - "unicode-bool", - "unicode-case", - "unicode-gencat", - "unicode-perl", - "unicode-script", - "unicode-segment", - "regex-syntax/unicode", -] -unicode-age = ["regex-syntax/unicode-age"] -unicode-bool = ["regex-syntax/unicode-bool"] -unicode-case = ["regex-syntax/unicode-case"] -unicode-gencat = ["regex-syntax/unicode-gencat"] -unicode-perl = ["regex-syntax/unicode-perl"] -unicode-script = ["regex-syntax/unicode-script"] -unicode-segment = ["regex-syntax/unicode-segment"] -unstable = ["pattern"] -use_std = ["std"] - -[workspace] \ No newline at end of file diff --git a/collector/compile-benchmarks/regex-1.5.5/Cargo.toml.orig b/collector/compile-benchmarks/regex-1.5.5/Cargo.toml.orig deleted file mode 100644 index 6f6ebd4d5..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/Cargo.toml.orig +++ /dev/null @@ -1,194 +0,0 @@ -[package] -name = "regex" -version = "1.5.5" #:version -authors = ["The Rust Project Developers"] -license = "MIT OR Apache-2.0" -readme = "README.md" -repository = "https://github.com/rust-lang/regex" -documentation = "https://docs.rs/regex" -homepage = "https://github.com/rust-lang/regex" -description = """ -An implementation of regular expressions for Rust. This implementation uses -finite automata and guarantees linear time matching on all inputs. -""" -categories = ["text-processing"] -autotests = false -exclude = ["/scripts/*", "/.github/*"] -edition = "2018" - -[workspace] -members = [ - "bench", "regex-capi", "regex-debug", "regex-syntax", -] - -[lib] -# There are no benchmarks in the library code itself -bench = false -# Doc tests fail when some features aren't present. The easiest way to work -# around this is to disable automatic doc testing, but explicitly test them -# with `cargo test --doc`. -doctest = false - -# Features are documented in the "Crate features" section of the crate docs: -# https://docs.rs/regex/*/#crate-features -[features] -default = ["std", "perf", "unicode", "regex-syntax/default"] - -# ECOSYSTEM FEATURES - -# The 'std' feature permits the regex crate to use the standard library. This -# is intended to support future use cases where the regex crate may be able -# to compile without std, and instead just rely on 'core' and 'alloc' (for -# example). Currently, this isn't supported, and removing the 'std' feature -# will prevent regex from compiling. -std = [] -# The 'use_std' feature is DEPRECATED. It will be removed in regex 2. Until -# then, it is an alias for the 'std' feature. -use_std = ["std"] - - -# PERFORMANCE FEATURES - -# Enables all performance features. -perf = ["perf-cache", "perf-dfa", "perf-inline", "perf-literal"] -# Enables fast caching. (If disabled, caching is still used, but is slower.) -# Currently, this feature has no effect. It used to remove the thread_local -# dependency and use a slower internal cache, but now the default cache has -# been improved and thread_local is no longer a dependency at all. -perf-cache = [] -# Enables use of a lazy DFA when possible. -perf-dfa = [] -# Enables aggressive use of inlining. -perf-inline = [] -# Enables literal optimizations. -perf-literal = ["aho-corasick", "memchr"] - - -# UNICODE DATA FEATURES - -# Enables all Unicode features. This expands if new Unicode features are added. -unicode = [ - "unicode-age", - "unicode-bool", - "unicode-case", - "unicode-gencat", - "unicode-perl", - "unicode-script", - "unicode-segment", - "regex-syntax/unicode", -] -# Enables use of the `Age` property, e.g., `\p{Age:3.0}`. -unicode-age = ["regex-syntax/unicode-age"] -# Enables use of a smattering of boolean properties, e.g., `\p{Emoji}`. -unicode-bool = ["regex-syntax/unicode-bool"] -# Enables Unicode-aware case insensitive matching, e.g., `(?i)β`. -unicode-case = ["regex-syntax/unicode-case"] -# Enables Unicode general categories, e.g., `\p{Letter}` or `\pL`. -unicode-gencat = ["regex-syntax/unicode-gencat"] -# Enables Unicode-aware Perl classes corresponding to `\w`, `\s` and `\d`. -unicode-perl = ["regex-syntax/unicode-perl"] -# Enables Unicode scripts and script extensions, e.g., `\p{Greek}`. -unicode-script = ["regex-syntax/unicode-script"] -# Enables Unicode segmentation properties, e.g., `\p{gcb=Extend}`. -unicode-segment = ["regex-syntax/unicode-segment"] - - -# UNSTABLE FEATURES (requires Rust nightly) - -# A blanket feature that governs whether unstable features are enabled or not. -# Unstable features are disabled by default, and typically rely on unstable -# features in rustc itself. -unstable = ["pattern"] - -# Enable to use the unstable pattern traits defined in std. This is enabled -# by default if the unstable feature is enabled. -pattern = [] - -# For very fast prefix literal matching. -[dependencies.aho-corasick] -version = "0.7.18" -optional = true - -# For skipping along search text quickly when a leading byte is known. -[dependencies.memchr] -version = "2.4.0" -optional = true - -# For parsing regular expressions. -[dependencies.regex-syntax] -path = "regex-syntax" -version = "0.6.25" -default-features = false - -[dev-dependencies] -# For examples. -lazy_static = "1" -# For property based tests. -quickcheck = { version = "1.0.3", default-features = false } -# For generating random test data. -rand = { version = "0.8.3", default-features = false, features = ["getrandom", "small_rng"] } -# To check README's example -# TODO: Re-enable this once the MSRV is 1.43 or greater. -# See: https://github.com/rust-lang/regex/issues/684 -# See: https://github.com/rust-lang/regex/issues/685 -# doc-comment = "0.3" - -# Run the test suite on the default behavior of Regex::new. -# This includes a mish mash of NFAs and DFAs, which are chosen automatically -# based on the regex. We test both of the NFA implementations by forcing their -# usage with the test definitions below. (We can't test the DFA implementations -# in the same way since they can't be used for every regex tested.) -[[test]] -path = "tests/test_default.rs" -name = "default" - -# The same as the default tests, but run on bytes::Regex. -[[test]] -path = "tests/test_default_bytes.rs" -name = "default-bytes" - -# Run the test suite on the NFA algorithm over Unicode codepoints. -[[test]] -path = "tests/test_nfa.rs" -name = "nfa" - -# Run the test suite on the NFA algorithm over bytes that match UTF-8 only. -[[test]] -path = "tests/test_nfa_utf8bytes.rs" -name = "nfa-utf8bytes" - -# Run the test suite on the NFA algorithm over arbitrary bytes. -[[test]] -path = "tests/test_nfa_bytes.rs" -name = "nfa-bytes" - -# Run the test suite on the backtracking engine over Unicode codepoints. -[[test]] -path = "tests/test_backtrack.rs" -name = "backtrack" - -# Run the test suite on the backtracking engine over bytes that match UTF-8 -# only. -[[test]] -path = "tests/test_backtrack_utf8bytes.rs" -name = "backtrack-utf8bytes" - -# Run the test suite on the backtracking engine over arbitrary bytes. -[[test]] -path = "tests/test_backtrack_bytes.rs" -name = "backtrack-bytes" - -# Run all backends against each regex found on crates.io and make sure -# that they all do the same thing. -[[test]] -path = "tests/test_crates_regex.rs" -name = "crates-regex" - -[profile.release] -debug = true - -[profile.bench] -debug = true - -[profile.test] -debug = true diff --git a/collector/compile-benchmarks/regex-1.5.5/HACKING.md b/collector/compile-benchmarks/regex-1.5.5/HACKING.md deleted file mode 100644 index 34af5b517..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/HACKING.md +++ /dev/null @@ -1,341 +0,0 @@ -Your friendly guide to hacking and navigating the regex library. - -This guide assumes familiarity with Rust and Cargo, and at least a perusal of -the user facing documentation for this crate. - -If you're looking for background on the implementation in this library, then -you can do no better than Russ Cox's article series on implementing regular -expressions using finite automata: https://swtch.com/~rsc/regexp/ - - -## Architecture overview - -As you probably already know, this library executes regular expressions using -finite automata. In particular, a design goal is to make searching linear -with respect to both the regular expression and the text being searched. -Meeting that design goal on its own is not so hard and can be done with an -implementation of the Pike VM (similar to Thompson's construction, but supports -capturing groups), as described in: https://swtch.com/~rsc/regexp/regexp2.html ---- This library contains such an implementation in src/pikevm.rs. - -Making it fast is harder. One of the key problems with the Pike VM is that it -can be in more than one state at any point in time, and must shuffle capture -positions between them. The Pike VM also spends a lot of time following the -same epsilon transitions over and over again. We can employ one trick to -speed up the Pike VM: extract one or more literal prefixes from the regular -expression and execute specialized code to quickly find matches of those -prefixes in the search text. The Pike VM can then be avoided for most the -search, and instead only executed when a prefix is found. The code to find -prefixes is in the regex-syntax crate (in this repository). The code to search -for literals is in src/literals.rs. When more than one literal prefix is found, -we fall back to an Aho-Corasick DFA using the aho-corasick crate. For one -literal, we use a variant of the Boyer-Moore algorithm. Both Aho-Corasick and -Boyer-Moore use `memchr` when appropriate. The Boyer-Moore variant in this -library also uses elementary frequency analysis to choose the right byte to run -`memchr` with. - -Of course, detecting prefix literals can only take us so far. Not all regular -expressions have literal prefixes. To remedy this, we try another approach -to executing the Pike VM: backtracking, whose implementation can be found in -src/backtrack.rs. One reason why backtracking can be faster is that it avoids -excessive shuffling of capture groups. Of course, backtracking is susceptible -to exponential runtimes, so we keep track of every state we've visited to make -sure we never visit it again. This guarantees linear time execution, but we -pay for it with the memory required to track visited states. Because of the -memory requirement, we only use this engine on small search strings *and* small -regular expressions. - -Lastly, the real workhorse of this library is the "lazy" DFA in src/dfa.rs. -It is distinct from the Pike VM in that the DFA is explicitly represented in -memory and is only ever in one state at a time. It is said to be "lazy" because -the DFA is computed as text is searched, where each byte in the search text -results in at most one new DFA state. It is made fast by caching states. DFAs -are susceptible to exponential state blow up (where the worst case is computing -a new state for every input byte, regardless of what's in the state cache). To -avoid using a lot of memory, the lazy DFA uses a bounded cache. Once the cache -is full, it is wiped and state computation starts over again. If the cache is -wiped too frequently, then the DFA gives up and searching falls back to one of -the aforementioned algorithms. - -All of the above matching engines expose precisely the same matching semantics. -This is indeed tested. (See the section below about testing.) - -The following sub-sections describe the rest of the library and how each of the -matching engines are actually used. - -### Parsing - -Regular expressions are parsed using the regex-syntax crate, which is -maintained in this repository. The regex-syntax crate defines an abstract -syntax and provides very detailed error messages when a parse error is -encountered. Parsing is done in a separate crate so that others may benefit -from its existence, and because it is relatively divorced from the rest of the -regex library. - -The regex-syntax crate also provides sophisticated support for extracting -prefix and suffix literals from regular expressions. - -### Compilation - -The compiler is in src/compile.rs. The input to the compiler is some abstract -syntax for a regular expression and the output is a sequence of opcodes that -matching engines use to execute a search. (One can think of matching engines as -mini virtual machines.) The sequence of opcodes is a particular encoding of a -non-deterministic finite automaton. In particular, the opcodes explicitly rely -on epsilon transitions. - -Consider a simple regular expression like `a|b`. Its compiled form looks like -this: - - 000 Save(0) - 001 Split(2, 3) - 002 'a' (goto: 4) - 003 'b' - 004 Save(1) - 005 Match - -The first column is the instruction pointer and the second column is the -instruction. Save instructions indicate that the current position in the input -should be stored in a captured location. Split instructions represent a binary -branch in the program (i.e., epsilon transitions). The instructions `'a'` and -`'b'` indicate that the literal bytes `'a'` or `'b'` should match. - -In older versions of this library, the compilation looked like this: - - 000 Save(0) - 001 Split(2, 3) - 002 'a' - 003 Jump(5) - 004 'b' - 005 Save(1) - 006 Match - -In particular, empty instructions that merely served to move execution from one -point in the program to another were removed. Instead, every instruction has a -`goto` pointer embedded into it. This resulted in a small performance boost for -the Pike VM, because it was one fewer epsilon transition that it had to follow. - -There exist more instructions and they are defined and documented in -src/prog.rs. - -Compilation has several knobs and a few unfortunately complicated invariants. -Namely, the output of compilation can be one of two types of programs: a -program that executes on Unicode scalar values or a program that executes -on raw bytes. In the former case, the matching engine is responsible for -performing UTF-8 decoding and executing instructions using Unicode codepoints. -In the latter case, the program handles UTF-8 decoding implicitly, so that the -matching engine can execute on raw bytes. All matching engines can execute -either Unicode or byte based programs except for the lazy DFA, which requires -byte based programs. In general, both representations were kept because (1) the -lazy DFA requires byte based programs so that states can be encoded in a memory -efficient manner and (2) the Pike VM benefits greatly from inlining Unicode -character classes into fewer instructions as it results in fewer epsilon -transitions. - -N.B. UTF-8 decoding is built into the compiled program by making use of the -utf8-ranges crate. The compiler in this library factors out common suffixes to -reduce the size of huge character classes (e.g., `\pL`). - -A regrettable consequence of this split in instruction sets is we generally -need to compile two programs; one for NFA execution and one for the lazy DFA. - -In fact, it is worse than that: the lazy DFA is not capable of finding the -starting location of a match in a single scan, and must instead execute a -backwards search after finding the end location. To execute a backwards search, -we must have compiled the regular expression *in reverse*. - -This means that every compilation of a regular expression generally results in -three distinct programs. It would be possible to lazily compile the Unicode -program, since it is never needed if (1) the regular expression uses no word -boundary assertions and (2) the caller never asks for sub-capture locations. - -### Execution - -At the time of writing, there are four matching engines in this library: - -1. The Pike VM (supports captures). -2. Bounded backtracking (supports captures). -3. Literal substring or multi-substring search. -4. Lazy DFA (no support for Unicode word boundary assertions). - -Only the first two matching engines are capable of executing every regular -expression program. They also happen to be the slowest, which means we need -some logic that (1) knows various facts about the regular expression and (2) -knows what the caller wants. Using this information, we can determine which -engine (or engines) to use. - -The logic for choosing which engine to execute is in src/exec.rs and is -documented on the Exec type. Exec values contain regular expression Programs -(defined in src/prog.rs), which contain all the necessary tidbits for actually -executing a regular expression on search text. - -For the most part, the execution logic is straight-forward and follows the -limitations of each engine described above pretty faithfully. The hairiest -part of src/exec.rs by far is the execution of the lazy DFA, since it requires -a forwards and backwards search, and then falls back to either the Pike VM or -backtracking if the caller requested capture locations. - -The Exec type also contains mutable scratch space for each type of matching -engine. This scratch space is used during search (for example, for the lazy -DFA, it contains compiled states that are reused on subsequent searches). - -### Programs - -A regular expression program is essentially a sequence of opcodes produced by -the compiler plus various facts about the regular expression (such as whether -it is anchored, its capture names, etc.). - -### The regex! macro - -The `regex!` macro no longer exists. It was developed in a bygone era as a -compiler plugin during the infancy of the regex crate. Back then, then only -matching engine in the crate was the Pike VM. The `regex!` macro was, itself, -also a Pike VM. The only advantages it offered over the dynamic Pike VM that -was built at runtime were the following: - - 1. Syntax checking was done at compile time. Your Rust program wouldn't - compile if your regex didn't compile. - 2. Reduction of overhead that was proportional to the size of the regex. - For the most part, this overhead consisted of heap allocation, which - was nearly eliminated in the compiler plugin. - -The main takeaway here is that the compiler plugin was a marginally faster -version of a slow regex engine. As the regex crate evolved, it grew other regex -engines (DFA, bounded backtracker) and sophisticated literal optimizations. -The regex macro didn't keep pace, and it therefore became (dramatically) slower -than the dynamic engines. The only reason left to use it was for the compile -time guarantee that your regex is correct. Fortunately, Clippy (the Rust lint -tool) has a lint that checks your regular expression validity, which mostly -replaces that use case. - -Additionally, the regex compiler plugin stopped receiving maintenance. Nobody -complained. At that point, it seemed prudent to just remove it. - -Will a compiler plugin be brought back? The future is murky, but there is -definitely an opportunity there to build something that is faster than the -dynamic engines in some cases. But it will be challenging! As of now, there -are no plans to work on this. - - -## Testing - -A key aspect of any mature regex library is its test suite. A subset of the -tests in this library come from Glenn Fowler's AT&T test suite (its online -presence seems gone at the time of writing). The source of the test suite is -located in src/testdata. The scripts/regex-match-tests.py takes the test suite -in src/testdata and generates tests/matches.rs. - -There are also many other manually crafted tests and regression tests in -tests/tests.rs. Some of these tests were taken from RE2. - -The biggest source of complexity in the tests is related to answering this -question: how can we reuse the tests to check all of our matching engines? One -approach would have been to encode every test into some kind of format (like -the AT&T test suite) and code generate tests for each matching engine. The -approach we use in this library is to create a Cargo.toml entry point for each -matching engine we want to test. The entry points are: - -* `tests/test_default.rs` - tests `Regex::new` -* `tests/test_default_bytes.rs` - tests `bytes::Regex::new` -* `tests/test_nfa.rs` - tests `Regex::new`, forced to use the NFA - algorithm on every regex. -* `tests/test_nfa_bytes.rs` - tests `Regex::new`, forced to use the NFA - algorithm on every regex and use *arbitrary* byte based programs. -* `tests/test_nfa_utf8bytes.rs` - tests `Regex::new`, forced to use the NFA - algorithm on every regex and use *UTF-8* byte based programs. -* `tests/test_backtrack.rs` - tests `Regex::new`, forced to use - backtracking on every regex. -* `tests/test_backtrack_bytes.rs` - tests `Regex::new`, forced to use - backtracking on every regex and use *arbitrary* byte based programs. -* `tests/test_backtrack_utf8bytes.rs` - tests `Regex::new`, forced to use - backtracking on every regex and use *UTF-8* byte based programs. -* `tests/test_crates_regex.rs` - tests to make sure that all of the - backends behave in the same way against a number of quickcheck - generated random inputs. These tests need to be enabled through - the `RUST_REGEX_RANDOM_TEST` environment variable (see - below). - -The lazy DFA and pure literal engines are absent from this list because -they cannot be used on every regular expression. Instead, we rely on -`tests/test_dynamic.rs` to test the lazy DFA and literal engines when possible. - -Since the tests are repeated several times, and because `cargo test` runs all -entry points, it can take a while to compile everything. To reduce compile -times slightly, try using `cargo test --test default`, which will only use the -`tests/test_default.rs` entry point. - -The random testing takes quite a while, so it is not enabled by default. -In order to run the random testing you can set the -`RUST_REGEX_RANDOM_TEST` environment variable to anything before -invoking `cargo test`. Note that this variable is inspected at compile -time, so if the tests don't seem to be running, you may need to run -`cargo clean`. - -## Benchmarking - -The benchmarking in this crate is made up of many micro-benchmarks. Currently, -there are two primary sets of benchmarks: the benchmarks that were adopted -at this library's inception (in `bench/src/misc.rs`) and a newer set of -benchmarks meant to test various optimizations. Specifically, the latter set -contain some analysis and are in `bench/src/sherlock.rs`. Also, the latter -set are all executed on the same lengthy input whereas the former benchmarks -are executed on strings of varying length. - -There is also a smattering of benchmarks for parsing and compilation. - -Benchmarks are in a separate crate so that its dependencies can be managed -separately from the main regex crate. - -Benchmarking follows a similarly wonky setup as tests. There are multiple entry -points: - -* `bench_rust.rs` - benchmarks `Regex::new` -* `bench_rust_bytes.rs` benchmarks `bytes::Regex::new` -* `bench_pcre.rs` - benchmarks PCRE -* `bench_onig.rs` - benchmarks Oniguruma - -The PCRE and Oniguruma benchmarks exist as a comparison point to a mature -regular expression library. In general, this regex library compares favorably -(there are even a few benchmarks that PCRE simply runs too slowly on or -outright can't execute at all). I would love to add other regular expression -library benchmarks (especially RE2). - -If you're hacking on one of the matching engines and just want to see -benchmarks, then all you need to run is: - - $ (cd bench && ./run rust) - -If you want to compare your results with older benchmarks, then try: - - $ (cd bench && ./run rust | tee old) - $ ... make it faster - $ (cd bench && ./run rust | tee new) - $ cargo benchcmp old new --improvements - -The `cargo-benchcmp` utility is available here: -https://github.com/BurntSushi/cargo-benchcmp - -The `./bench/run` utility can run benchmarks for PCRE and Oniguruma too. See -`./bench/bench --help`. - -## Dev Docs - -When digging your teeth into the codebase for the first time, the -crate documentation can be a great resource. By default `rustdoc` -will strip out all documentation of private crate members in an -effort to help consumers of the crate focus on the *interface* -without having to concern themselves with the *implementation*. -Normally this is a great thing, but if you want to start hacking -on regex internals it is not what you want. Many of the private members -of this crate are well documented with rustdoc style comments, and -it would be a shame to miss out on the opportunity that presents. -You can generate the private docs with: - -``` -$ rustdoc --crate-name docs src/lib.rs -o target/doc -L target/debug/deps --no-defaults --passes collapse-docs --passes unindent-comments -``` - -Then just point your browser at `target/doc/regex/index.html`. - -See https://github.com/rust-lang/rust/issues/15347 for more info -about generating developer docs for internal use. diff --git a/collector/compile-benchmarks/regex-1.5.5/LICENSE-APACHE b/collector/compile-benchmarks/regex-1.5.5/LICENSE-APACHE deleted file mode 100644 index 16fe87b06..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/LICENSE-APACHE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/collector/compile-benchmarks/regex-1.5.5/LICENSE-MIT b/collector/compile-benchmarks/regex-1.5.5/LICENSE-MIT deleted file mode 100644 index 39d4bdb5a..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/LICENSE-MIT +++ /dev/null @@ -1,25 +0,0 @@ -Copyright (c) 2014 The Rust Project Developers - -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/collector/compile-benchmarks/regex-1.5.5/PERFORMANCE.md b/collector/compile-benchmarks/regex-1.5.5/PERFORMANCE.md deleted file mode 100644 index 8cd0d9c71..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/PERFORMANCE.md +++ /dev/null @@ -1,277 +0,0 @@ -Your friendly guide to understanding the performance characteristics of this -crate. - -This guide assumes some familiarity with the public API of this crate, which -can be found here: https://docs.rs/regex - -## Theory vs. Practice - -One of the design goals of this crate is to provide worst case linear time -behavior with respect to the text searched using finite state automata. This -means that, *in theory*, the performance of this crate is much better than most -regex implementations, which typically use backtracking which has worst case -exponential time. - -For example, try opening a Python interpreter and typing this: - - >>> import re - >>> re.search('(a*)*c', 'a' * 30).span() - -I'll wait. - -At some point, you'll figure out that it won't terminate any time soon. ^C it. - -The promise of this crate is that *this pathological behavior can't happen*. - -With that said, just because we have protected ourselves against worst case -exponential behavior doesn't mean we are immune from large constant factors -or places where the current regex engine isn't quite optimal. This guide will -detail those cases and provide guidance on how to avoid them, among other -bits of general advice. - -## Thou Shalt Not Compile Regular Expressions In A Loop - -**Advice**: Use `lazy_static` to amortize the cost of `Regex` compilation. - -Don't do it unless you really don't mind paying for it. Compiling a regular -expression in this crate is quite expensive. It is conceivable that it may get -faster some day, but I wouldn't hold out hope for, say, an order of magnitude -improvement. In particular, compilation can take any where from a few dozen -microseconds to a few dozen milliseconds. Yes, milliseconds. Unicode character -classes, in particular, have the largest impact on compilation performance. At -the time of writing, for example, `\pL{100}` takes around 44ms to compile. This -is because `\pL` corresponds to every letter in Unicode and compilation must -turn it into a proper automaton that decodes a subset of UTF-8 which -corresponds to those letters. Compilation also spends some cycles shrinking the -size of the automaton. - -This means that in order to realize efficient regex matching, one must -*amortize the cost of compilation*. Trivially, if a call to `is_match` is -inside a loop, then make sure your call to `Regex::new` is *outside* that loop. - -In many programming languages, regular expressions can be conveniently defined -and compiled in a global scope, and code can reach out and use them as if -they were global static variables. In Rust, there is really no concept of -life-before-main, and therefore, one cannot utter this: - - static MY_REGEX: Regex = Regex::new("...").unwrap(); - -Unfortunately, this would seem to imply that one must pass `Regex` objects -around to everywhere they are used, which can be especially painful depending -on how your program is structured. Thankfully, the -[`lazy_static`](https://crates.io/crates/lazy_static) -crate provides an answer that works well: - - use lazy_static::lazy_static; - use regex::Regex; - - fn some_helper_function(text: &str) -> bool { - lazy_static! { - static ref MY_REGEX: Regex = Regex::new("...").unwrap(); - } - MY_REGEX.is_match(text) - } - -In other words, the `lazy_static!` macro enables us to define a `Regex` *as if* -it were a global static value. What is actually happening under the covers is -that the code inside the macro (i.e., `Regex::new(...)`) is run on *first use* -of `MY_REGEX` via a `Deref` impl. The implementation is admittedly magical, but -it's self contained and everything works exactly as you expect. In particular, -`MY_REGEX` can be used from multiple threads without wrapping it in an `Arc` or -a `Mutex`. On that note... - -## Using a regex from multiple threads - -**Advice**: The performance impact from using a `Regex` from multiple threads -is likely negligible. If necessary, clone the `Regex` so that each thread gets -its own copy. Cloning a regex does not incur any additional memory overhead -than what would be used by using a `Regex` from multiple threads -simultaneously. *Its only cost is ergonomics.* - -It is supported and encouraged to define your regexes using `lazy_static!` as -if they were global static values, and then use them to search text from -multiple threads simultaneously. - -One might imagine that this is possible because a `Regex` represents a -*compiled* program, so that any allocation or mutation is already done, and is -therefore read-only. Unfortunately, this is not true. Each type of search -strategy in this crate requires some kind of mutable scratch space to use -*during search*. For example, when executing a DFA, its states are computed -lazily and reused on subsequent searches. Those states go into that mutable -scratch space. - -The mutable scratch space is an implementation detail, and in general, its -mutation should not be observable from users of this crate. Therefore, it uses -interior mutability. This implies that `Regex` can either only be used from one -thread, or it must do some sort of synchronization. Either choice is -reasonable, but this crate chooses the latter, in particular because it is -ergonomic and makes use with `lazy_static!` straight forward. - -Synchronization implies *some* amount of overhead. When a `Regex` is used from -a single thread, this overhead is negligible. When a `Regex` is used from -multiple threads simultaneously, it is possible for the overhead of -synchronization from contention to impact performance. The specific cases where -contention may happen is if you are calling any of these methods repeatedly -from multiple threads simultaneously: - -* shortest_match -* is_match -* find -* captures - -In particular, every invocation of one of these methods must synchronize with -other threads to retrieve its mutable scratch space before searching can start. -If, however, you are using one of these methods: - -* find_iter -* captures_iter - -Then you may not suffer from contention since the cost of synchronization is -amortized on *construction of the iterator*. That is, the mutable scratch space -is obtained when the iterator is created and retained throughout its lifetime. - -## Only ask for what you need - -**Advice**: Prefer in this order: `is_match`, `find`, `captures`. - -There are three primary search methods on a `Regex`: - -* is_match -* find -* captures - -In general, these are ordered from fastest to slowest. - -`is_match` is fastest because it doesn't actually need to find the start or the -end of the leftmost-first match. It can quit immediately after it knows there -is a match. For example, given the regex `a+` and the haystack, `aaaaa`, the -search will quit after examining the first byte. - -In contrast, `find` must return both the start and end location of the -leftmost-first match. It can use the DFA matcher for this, but must run it -forwards once to find the end of the match *and then run it backwards* to find -the start of the match. The two scans and the cost of finding the real end of -the leftmost-first match make this more expensive than `is_match`. - -`captures` is the most expensive of them all because it must do what `find` -does, and then run either the bounded backtracker or the Pike VM to fill in the -capture group locations. Both of these are simulations of an NFA, which must -spend a lot of time shuffling states around. The DFA limits the performance hit -somewhat by restricting the amount of text that must be searched via an NFA -simulation. - -One other method not mentioned is `shortest_match`. This method has precisely -the same performance characteristics as `is_match`, except it will return the -end location of when it discovered a match. For example, given the regex `a+` -and the haystack `aaaaa`, `shortest_match` may return `1` as opposed to `5`, -the latter of which being the correct end location of the leftmost-first match. - -## Literals in your regex may make it faster - -**Advice**: Literals can reduce the work that the regex engine needs to do. Use -them if you can, especially as prefixes. - -In particular, if your regex starts with a prefix literal, the prefix is -quickly searched before entering the (much slower) regex engine. For example, -given the regex `foo\w+`, the literal `foo` will be searched for using -Boyer-Moore. If there's no match, then no regex engine is ever used. Only when -there's a match is the regex engine invoked at the location of the match, which -effectively permits the regex engine to skip large portions of a haystack. -If a regex is comprised entirely of literals (possibly more than one), then -it's possible that the regex engine can be avoided entirely even when there's a -match. - -When one literal is found, Boyer-Moore is used. When multiple literals are -found, then an optimized version of Aho-Corasick is used. - -This optimization is in particular extended quite a bit in this crate. Here are -a few examples of regexes that get literal prefixes detected: - -* `(foo|bar)` detects `foo` and `bar` -* `(a|b)c` detects `ac` and `bc` -* `[ab]foo[yz]` detects `afooy`, `afooz`, `bfooy` and `bfooz` -* `a?b` detects `a` and `b` -* `a*b` detects `a` and `b` -* `(ab){3,6}` detects `ababab` - -Literals in anchored regexes can also be used for detecting non-matches very -quickly. For example, `^foo\w+` and `\w+foo$` may be able to detect a non-match -just by examining the first (or last) three bytes of the haystack. - -## Unicode word boundaries may prevent the DFA from being used - -**Advice**: In most cases, `\b` should work well. If not, use `(?-u:\b)` -instead of `\b` if you care about consistent performance more than correctness. - -It's a sad state of the current implementation. At the moment, the DFA will try -to interpret Unicode word boundaries as if they were ASCII word boundaries. -If the DFA comes across any non-ASCII byte, it will quit and fall back to an -alternative matching engine that can handle Unicode word boundaries correctly. -The alternate matching engine is generally quite a bit slower (perhaps by an -order of magnitude). If necessary, this can be ameliorated in two ways. - -The first way is to add some number of literal prefixes to your regular -expression. Even though the DFA may not be used, specialized routines will -still kick in to find prefix literals quickly, which limits how much work the -NFA simulation will need to do. - -The second way is to give up on Unicode and use an ASCII word boundary instead. -One can use an ASCII word boundary by disabling Unicode support. That is, -instead of using `\b`, use `(?-u:\b)`. Namely, given the regex `\b.+\b`, it -can be transformed into a regex that uses the DFA with `(?-u:\b).+(?-u:\b)`. It -is important to limit the scope of disabling the `u` flag, since it might lead -to a syntax error if the regex could match arbitrary bytes. For example, if one -wrote `(?-u)\b.+\b`, then a syntax error would be returned because `.` matches -any *byte* when the Unicode flag is disabled. - -The second way isn't appreciably different than just using a Unicode word -boundary in the first place, since the DFA will speculatively interpret it as -an ASCII word boundary anyway. The key difference is that if an ASCII word -boundary is used explicitly, then the DFA won't quit in the presence of -non-ASCII UTF-8 bytes. This results in giving up correctness in exchange for -more consistent performance. - -N.B. When using `bytes::Regex`, Unicode support is disabled by default, so one -can simply write `\b` to get an ASCII word boundary. - -## Excessive counting can lead to exponential state blow up in the DFA - -**Advice**: Don't write regexes that cause DFA state blow up if you care about -match performance. - -Wait, didn't I say that this crate guards against exponential worst cases? -Well, it turns out that the process of converting an NFA to a DFA can lead to -an exponential blow up in the number of states. This crate specifically guards -against exponential blow up by doing two things: - -1. The DFA is computed lazily. That is, a state in the DFA only exists in - memory if it is visited. In particular, the lazy DFA guarantees that *at - most* one state is created for every byte of input. This, on its own, - guarantees linear time complexity. -2. Of course, creating a new state for *every* byte of input means that search - will go incredibly slow because of very large constant factors. On top of - that, creating a state for every byte in a large haystack could result in - exorbitant memory usage. To ameliorate this, the DFA bounds the number of - states it can store. Once it reaches its limit, it flushes its cache. This - prevents reuse of states that it already computed. If the cache is flushed - too frequently, then the DFA will give up and execution will fall back to - one of the NFA simulations. - -In effect, this crate will detect exponential state blow up and fall back to -a search routine with fixed memory requirements. This does, however, mean that -searching will be much slower than one might expect. Regexes that rely on -counting in particular are strong aggravators of this behavior. For example, -matching `[01]*1[01]{20}$` against a random sequence of `0`s and `1`s. - -In the future, it may be possible to increase the bound that the DFA uses, -which would allow the caller to choose how much memory they're willing to -spend. - -## Resist the temptation to "optimize" regexes - -**Advice**: This ain't a backtracking engine. - -An entire book was written on how to optimize Perl-style regular expressions. -Most of those techniques are not applicable for this library. For example, -there is no problem with using non-greedy matching or having lots of -alternations in your regex. diff --git a/collector/compile-benchmarks/regex-1.5.5/README.md b/collector/compile-benchmarks/regex-1.5.5/README.md deleted file mode 100644 index 9acd5bb4a..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/README.md +++ /dev/null @@ -1,250 +0,0 @@ -regex -===== -A Rust library for parsing, compiling, and executing regular expressions. Its -syntax is similar to Perl-style regular expressions, but lacks a few features -like look around and backreferences. In exchange, all searches execute in -linear time with respect to the size of the regular expression and search text. -Much of the syntax and implementation is inspired -by [RE2](https://github.com/google/re2). - -[![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions) -[![Crates.io](https://img.shields.io/crates/v/regex.svg)](https://crates.io/crates/regex) -[![Rust](https://img.shields.io/badge/rust-1.41.1%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex) - -### Documentation - -[Module documentation with examples](https://docs.rs/regex). -The module documentation also includes a comprehensive description of the -syntax supported. - -Documentation with examples for the various matching functions and iterators -can be found on the -[`Regex` type](https://docs.rs/regex/*/regex/struct.Regex.html). - -### Usage - -Add this to your `Cargo.toml`: - -```toml -[dependencies] -regex = "1.5" -``` - -Here's a simple example that matches a date in YYYY-MM-DD format and prints the -year, month and day: - -```rust -use regex::Regex; - -fn main() { - let re = Regex::new(r"(?x) -(?P\d{4}) # the year -- -(?P\d{2}) # the month -- -(?P\d{2}) # the day -").unwrap(); - let caps = re.captures("2010-03-14").unwrap(); - - assert_eq!("2010", &caps["year"]); - assert_eq!("03", &caps["month"]); - assert_eq!("14", &caps["day"]); -} -``` - -If you have lots of dates in text that you'd like to iterate over, then it's -easy to adapt the above example with an iterator: - -```rust -use regex::Regex; - -const TO_SEARCH: &'static str = " -On 2010-03-14, foo happened. On 2014-10-14, bar happened. -"; - -fn main() { - let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap(); - - for caps in re.captures_iter(TO_SEARCH) { - // Note that all of the unwraps are actually OK for this regex - // because the only way for the regex to match is if all of the - // capture groups match. This is not true in general though! - println!("year: {}, month: {}, day: {}", - caps.get(1).unwrap().as_str(), - caps.get(2).unwrap().as_str(), - caps.get(3).unwrap().as_str()); - } -} -``` - -This example outputs: - -```text -year: 2010, month: 03, day: 14 -year: 2014, month: 10, day: 14 -``` - -### Usage: Avoid compiling the same regex in a loop - -It is an anti-pattern to compile the same regular expression in a loop since -compilation is typically expensive. (It takes anywhere from a few microseconds -to a few **milliseconds** depending on the size of the regex.) Not only is -compilation itself expensive, but this also prevents optimizations that reuse -allocations internally to the matching engines. - -In Rust, it can sometimes be a pain to pass regular expressions around if -they're used from inside a helper function. Instead, we recommend using the -[`lazy_static`](https://crates.io/crates/lazy_static) crate to ensure that -regular expressions are compiled exactly once. - -For example: - -```rust,ignore -use regex::Regex; - -fn some_helper_function(text: &str) -> bool { - lazy_static! { - static ref RE: Regex = Regex::new("...").unwrap(); - } - RE.is_match(text) -} -``` - -Specifically, in this example, the regex will be compiled when it is used for -the first time. On subsequent uses, it will reuse the previous compilation. - -### Usage: match regular expressions on `&[u8]` - -The main API of this crate (`regex::Regex`) requires the caller to pass a -`&str` for searching. In Rust, an `&str` is required to be valid UTF-8, which -means the main API can't be used for searching arbitrary bytes. - -To match on arbitrary bytes, use the `regex::bytes::Regex` API. The API -is identical to the main API, except that it takes an `&[u8]` to search -on instead of an `&str`. By default, `.` will match any *byte* using -`regex::bytes::Regex`, while `.` will match any *UTF-8 encoded Unicode scalar -value* using the main API. - -This example shows how to find all null-terminated strings in a slice of bytes: - -```rust -use regex::bytes::Regex; - -let re = Regex::new(r"(?P[^\x00]+)\x00").unwrap(); -let text = b"foo\x00bar\x00baz\x00"; - -// Extract all of the strings without the null terminator from each match. -// The unwrap is OK here since a match requires the `cstr` capture to match. -let cstrs: Vec<&[u8]> = - re.captures_iter(text) - .map(|c| c.name("cstr").unwrap().as_bytes()) - .collect(); -assert_eq!(vec![&b"foo"[..], &b"bar"[..], &b"baz"[..]], cstrs); -``` - -Notice here that the `[^\x00]+` will match any *byte* except for `NUL`. When -using the main API, `[^\x00]+` would instead match any valid UTF-8 sequence -except for `NUL`. - -### Usage: match multiple regular expressions simultaneously - -This demonstrates how to use a `RegexSet` to match multiple (possibly -overlapping) regular expressions in a single scan of the search text: - -```rust -use regex::RegexSet; - -let set = RegexSet::new(&[ - r"\w+", - r"\d+", - r"\pL+", - r"foo", - r"bar", - r"barfoo", - r"foobar", -]).unwrap(); - -// Iterate over and collect all of the matches. -let matches: Vec<_> = set.matches("foobar").into_iter().collect(); -assert_eq!(matches, vec![0, 2, 3, 4, 6]); - -// You can also test whether a particular regex matched: -let matches = set.matches("foobar"); -assert!(!matches.matched(5)); -assert!(matches.matched(6)); -``` - -### Usage: enable SIMD optimizations - -SIMD optimizations are enabled automatically on Rust stable 1.27 and newer. -For nightly versions of Rust, this requires a recent version with the SIMD -features stabilized. - - -### Usage: a regular expression parser - -This repository contains a crate that provides a well tested regular expression -parser, abstract syntax and a high-level intermediate representation for -convenient analysis. It provides no facilities for compilation or execution. -This may be useful if you're implementing your own regex engine or otherwise -need to do analysis on the syntax of a regular expression. It is otherwise not -recommended for general use. - -[Documentation `regex-syntax`.](https://docs.rs/regex-syntax) - - -### Crate features - -This crate comes with several features that permit tweaking the trade off -between binary size, compilation time and runtime performance. Users of this -crate can selectively disable Unicode tables, or choose from a variety of -optimizations performed by this crate to disable. - -When all of these features are disabled, runtime match performance may be much -worse, but if you're matching on short strings, or if high performance isn't -necessary, then such a configuration is perfectly serviceable. To disable -all such features, use the following `Cargo.toml` dependency configuration: - -```toml -[dependencies.regex] -version = "1.3" -default-features = false -# regex currently requires the standard library, you must re-enable it. -features = ["std"] -``` - -This will reduce the dependency tree of `regex` down to a single crate -(`regex-syntax`). - -The full set of features one can disable are -[in the "Crate features" section of the documentation](https://docs.rs/regex/*/#crate-features). - - -### Minimum Rust version policy - -This crate's minimum supported `rustc` version is `1.41.1`. - -The current **tentative** policy is that the minimum Rust version required -to use this crate can be increased in minor version updates. For example, if -regex 1.0 requires Rust 1.20.0, then regex 1.0.z for all values of `z` will -also require Rust 1.20.0 or newer. However, regex 1.y for `y > 0` may require a -newer minimum version of Rust. - -In general, this crate will be conservative with respect to the minimum -supported version of Rust. - - -### License - -This project is licensed under either of - - * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or - https://www.apache.org/licenses/LICENSE-2.0) - * MIT license ([LICENSE-MIT](LICENSE-MIT) or - https://opensource.org/licenses/MIT) - -at your option. - -The data in `regex-syntax/src/unicode_tables/` is licensed under the Unicode -License Agreement -([LICENSE-UNICODE](https://www.unicode.org/copyright.html#License)). diff --git a/collector/compile-benchmarks/regex-1.5.5/UNICODE.md b/collector/compile-benchmarks/regex-1.5.5/UNICODE.md deleted file mode 100644 index df7d21ed9..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/UNICODE.md +++ /dev/null @@ -1,259 +0,0 @@ -# Unicode conformance - -This document describes the regex crate's conformance to Unicode's -[UTS#18](https://unicode.org/reports/tr18/) -report, which lays out 3 levels of support: Basic, Extended and Tailored. - -Full support for Level 1 ("Basic Unicode Support") is provided with two -exceptions: - -1. Line boundaries are not Unicode aware. Namely, only the `\n` - (`END OF LINE`) character is recognized as a line boundary. -2. The compatibility properties specified by - [RL1.2a](https://unicode.org/reports/tr18/#RL1.2a) - are ASCII-only definitions. - -Little to no support is provided for either Level 2 or Level 3. For the most -part, this is because the features are either complex/hard to implement, or at -the very least, very difficult to implement without sacrificing performance. -For example, tackling canonical equivalence such that matching worked as one -would expect regardless of normalization form would be a significant -undertaking. This is at least partially a result of the fact that this regex -engine is based on finite automata, which admits less flexibility normally -associated with backtracking implementations. - - -## RL1.1 Hex Notation - -[UTS#18 RL1.1](https://unicode.org/reports/tr18/#Hex_notation) - -Hex Notation refers to the ability to specify a Unicode code point in a regular -expression via its hexadecimal code point representation. This is useful in -environments that have poor Unicode font rendering or if you need to express a -code point that is not normally displayable. All forms of hexadecimal notation -are supported - - \x7F hex character code (exactly two digits) - \x{10FFFF} any hex character code corresponding to a Unicode code point - \u007F hex character code (exactly four digits) - \u{7F} any hex character code corresponding to a Unicode code point - \U0000007F hex character code (exactly eight digits) - \U{7F} any hex character code corresponding to a Unicode code point - -Briefly, the `\x{...}`, `\u{...}` and `\U{...}` are all exactly equivalent ways -of expressing hexadecimal code points. Any number of digits can be written -within the brackets. In contrast, `\xNN`, `\uNNNN`, `\UNNNNNNNN` are all -fixed-width variants of the same idea. - -Note that when Unicode mode is disabled, any non-ASCII Unicode codepoint is -banned. Additionally, the `\xNN` syntax represents arbitrary bytes when Unicode -mode is disabled. That is, the regex `\xFF` matches the Unicode codepoint -U+00FF (encoded as `\xC3\xBF` in UTF-8) while the regex `(?-u)\xFF` matches -the literal byte `\xFF`. - - -## RL1.2 Properties - -[UTS#18 RL1.2](https://unicode.org/reports/tr18/#Categories) - -Full support for Unicode property syntax is provided. Unicode properties -provide a convenient way to construct character classes of groups of code -points specified by Unicode. The regex crate does not provide exhaustive -support, but covers a useful subset. In particular: - -* [General categories](https://unicode.org/reports/tr18/#General_Category_Property) -* [Scripts and Script Extensions](https://unicode.org/reports/tr18/#Script_Property) -* [Age](https://unicode.org/reports/tr18/#Age) -* A smattering of boolean properties, including all of those specified by - [RL1.2](https://unicode.org/reports/tr18/#RL1.2) explicitly. - -In all cases, property name and value abbreviations are supported, and all -names/values are matched loosely without regard for case, whitespace or -underscores. Property name aliases can be found in Unicode's -[`PropertyAliases.txt`](https://www.unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt) -file, while property value aliases can be found in Unicode's -[`PropertyValueAliases.txt`](https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt) -file. - -The syntax supported is also consistent with the UTS#18 recommendation: - -* `\p{Greek}` selects the `Greek` script. Equivalent expressions follow: - `\p{sc:Greek}`, `\p{Script:Greek}`, `\p{Sc=Greek}`, `\p{script=Greek}`, - `\P{sc!=Greek}`. Similarly for `General_Category` (or `gc` for short) and - `Script_Extensions` (or `scx` for short). -* `\p{age:3.2}` selects all code points in Unicode 3.2. -* `\p{Alphabetic}` selects the "alphabetic" property and can be abbreviated - via `\p{alpha}` (for example). -* Single letter variants for properties with single letter abbreviations. - For example, `\p{Letter}` can be equivalently written as `\pL`. - -The following is a list of all properties supported by the regex crate (starred -properties correspond to properties required by RL1.2): - -* `General_Category` \* (including `Any`, `ASCII` and `Assigned`) -* `Script` \* -* `Script_Extensions` \* -* `Age` -* `ASCII_Hex_Digit` -* `Alphabetic` \* -* `Bidi_Control` -* `Case_Ignorable` -* `Cased` -* `Changes_When_Casefolded` -* `Changes_When_Casemapped` -* `Changes_When_Lowercased` -* `Changes_When_Titlecased` -* `Changes_When_Uppercased` -* `Dash` -* `Default_Ignorable_Code_Point` \* -* `Deprecated` -* `Diacritic` -* `Emoji` -* `Emoji_Presentation` -* `Emoji_Modifier` -* `Emoji_Modifier_Base` -* `Emoji_Component` -* `Extended_Pictographic` -* `Extender` -* `Grapheme_Base` -* `Grapheme_Cluster_Break` -* `Grapheme_Extend` -* `Hex_Digit` -* `IDS_Binary_Operator` -* `IDS_Trinary_Operator` -* `ID_Continue` -* `ID_Start` -* `Join_Control` -* `Logical_Order_Exception` -* `Lowercase` \* -* `Math` -* `Noncharacter_Code_Point` \* -* `Pattern_Syntax` -* `Pattern_White_Space` -* `Prepended_Concatenation_Mark` -* `Quotation_Mark` -* `Radical` -* `Regional_Indicator` -* `Sentence_Break` -* `Sentence_Terminal` -* `Soft_Dotted` -* `Terminal_Punctuation` -* `Unified_Ideograph` -* `Uppercase` \* -* `Variation_Selector` -* `White_Space` \* -* `Word_Break` -* `XID_Continue` -* `XID_Start` - - -## RL1.2a Compatibility Properties - -[UTS#18 RL1.2a](https://unicode.org/reports/tr18/#RL1.2a) - -The regex crate only provides ASCII definitions of the -[compatibility properties documented in UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties) -(sans the `\X` class, for matching grapheme clusters, which isn't provided -at all). This is because it seems to be consistent with most other regular -expression engines, and in particular, because these are often referred to as -"ASCII" or "POSIX" character classes. - -Note that the `\w`, `\s` and `\d` character classes **are** Unicode aware. -Their traditional ASCII definition can be used by disabling Unicode. That is, -`[[:word:]]` and `(?-u)\w` are equivalent. - - -## RL1.3 Subtraction and Intersection - -[UTS#18 RL1.3](https://unicode.org/reports/tr18/#Subtraction_and_Intersection) - -The regex crate provides full support for nested character classes, along with -union, intersection (`&&`), difference (`--`) and symmetric difference (`~~`) -operations on arbitrary character classes. - -For example, to match all non-ASCII letters, you could use either -`[\p{Letter}--\p{Ascii}]` (difference) or `[\p{Letter}&&[^\p{Ascii}]]` -(intersecting the negation). - - -## RL1.4 Simple Word Boundaries - -[UTS#18 RL1.4](https://unicode.org/reports/tr18/#Simple_Word_Boundaries) - -The regex crate provides basic Unicode aware word boundary assertions. A word -boundary assertion can be written as `\b`, or `\B` as its negation. A word -boundary negation corresponds to a zero-width match, where its adjacent -characters correspond to word and non-word, or non-word and word characters. - -Conformance in this case chooses to define word character in the same way that -the `\w` character class is defined: a code point that is a member of one of -the following classes: - -* `\p{Alphabetic}` -* `\p{Join_Control}` -* `\p{gc:Mark}` -* `\p{gc:Decimal_Number}` -* `\p{gc:Connector_Punctuation}` - -In particular, this differs slightly from the -[prescription given in RL1.4](https://unicode.org/reports/tr18/#Simple_Word_Boundaries) -but is permissible according to -[UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties). -Namely, it is convenient and simpler to have `\w` and `\b` be in sync with -one another. - -Finally, Unicode word boundaries can be disabled, which will cause ASCII word -boundaries to be used instead. That is, `\b` is a Unicode word boundary while -`(?-u)\b` is an ASCII-only word boundary. This can occasionally be beneficial -if performance is important, since the implementation of Unicode word -boundaries is currently sub-optimal on non-ASCII text. - - -## RL1.5 Simple Loose Matches - -[UTS#18 RL1.5](https://unicode.org/reports/tr18/#Simple_Loose_Matches) - -The regex crate provides full support for case insensitive matching in -accordance with RL1.5. That is, it uses the "simple" case folding mapping. The -"simple" mapping was chosen because of a key convenient property: every -"simple" mapping is a mapping from exactly one code point to exactly one other -code point. This makes case insensitive matching of character classes, for -example, straight-forward to implement. - -When case insensitive mode is enabled (e.g., `(?i)[a]` is equivalent to `a|A`), -then all characters classes are case folded as well. - - -## RL1.6 Line Boundaries - -[UTS#18 RL1.6](https://unicode.org/reports/tr18/#Line_Boundaries) - -The regex crate only provides support for recognizing the `\n` (`END OF LINE`) -character as a line boundary. This choice was made mostly for implementation -convenience, and to avoid performance cliffs that Unicode word boundaries are -subject to. - -Ideally, it would be nice to at least support `\r\n` as a line boundary as -well, and in theory, this could be done efficiently. - - -## RL1.7 Code Points - -[UTS#18 RL1.7](https://unicode.org/reports/tr18/#Supplementary_Characters) - -The regex crate provides full support for Unicode code point matching. Namely, -the fundamental atom of any match is always a single code point. - -Given Rust's strong ties to UTF-8, the following guarantees are also provided: - -* All matches are reported on valid UTF-8 code unit boundaries. That is, any - match range returned by the public regex API is guaranteed to successfully - slice the string that was searched. -* By consequence of the above, it is impossible to match surrogode code points. - No support for UTF-16 is provided, so this is never necessary. - -Note that when Unicode mode is disabled, the fundamental atom of matching is -no longer a code point but a single byte. When Unicode mode is disabled, many -Unicode features are disabled as well. For example, `(?-u)\pL` is not a valid -regex but `\pL(?-u)\xFF` (matches any Unicode `Letter` followed by the literal -byte `\xFF`) is, for example. diff --git a/collector/compile-benchmarks/regex-1.5.5/examples/regexdna-input.txt b/collector/compile-benchmarks/regex-1.5.5/examples/regexdna-input.txt deleted file mode 100644 index fb2326339..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/examples/regexdna-input.txt +++ /dev/null @@ -1,1671 +0,0 @@ ->ONE Homo sapiens alu -GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA -TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT -AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG -GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG -CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT -GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA -GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA -TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG -AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA -GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT -AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC -AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG -GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC -CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG -AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT -TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA -TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT -GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG -TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT -CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG -CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG -TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA -CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG -AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG -GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC -TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA -TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA -GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT -GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC -ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT -TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC -CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG -CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG -GGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCC -CAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCT -GGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGC -GCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGA -GGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGA -GACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGA -GGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTG -AAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT -CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCA -GTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAA -AAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGC -GGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCT -ACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGG -GAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATC -GCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGC -GGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGG -TCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAA -AAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAG -GAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACT -CCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCC -TGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAG -ACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGC -GTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGA -ACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGA -CAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCA -CTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCA -ACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCG -CCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGG -AGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTC -CGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCG -AGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACC -CCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAG -CTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAG -CCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGG -CCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATC -ACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAA -AAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGC -TGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCC -ACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGG -CTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGG -AGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATT -AGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAA -TCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGC -CTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAA -TCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAG -CCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGT -GGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCG -GGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAG -CGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTG -GGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATG -GTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGT -AATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTT -GCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCT -CAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCG -GGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTC -TCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACT -CGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAG -ATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGG -CGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTG -AGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATA -CAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGG -CAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGC -ACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCAC -GCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTC -GAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCG -GGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCT -TGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGG -CGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCA -GCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGG -CCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGC -GCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGG -CGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGA -CTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGG -CCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAA -ACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCC -CAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGT -GAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAA -AGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGG -ATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTAC -TAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGA -GGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGC -GCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGG -TGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTC -AGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAA -ATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGA -GAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC -AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTG -TAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGAC -CAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGT -GGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAAC -CCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACA -GAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACT -TTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAAC -ATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCC -TGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAG -GTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCG -TCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAG -GCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCC -GTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCT -ACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCC -GAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCC -GGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCAC -CTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAA -ATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTG -AGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCAC -TGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCT -CACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAG -TTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAG -CCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATC -GCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCT -GGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATC -CCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCC -TGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGG -CGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG -AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCG -AGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGG -AGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGT -GAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAA -TCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGC -AGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCA -AAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGG -CGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTC -TACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCG -GGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGAT -CGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCG -CGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAG -GTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACA -AAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCA -GGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCAC -TCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGC -CTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGA -GACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGG -CGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTG -AACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCG -ACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGC -ACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCC -AACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGC -GCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCG -GAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACT -CCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCC -GAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAAC -CCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA -GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGA -GCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAG -GCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGAT -CACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTA -AAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGG -CTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGC -CACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTG -GCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAG -GAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAAT -TAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGA -ATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAG -CCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTA -ATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCA -GCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGG -TGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCC -GGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGA -GCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTT -GGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACAT -GGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTG -TAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGT -TGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTC -TCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGC -GGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGT -CTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTAC -TCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGA -GATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGG -GCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCT -GAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT -ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAG -GCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTG -CACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCA -CGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTT -CGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCC -GGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGC -TTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGG -GCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCC -AGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTG -GCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCG -CGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAG -GCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAG -ACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAG -GCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGA -AACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATC -CCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAG -TGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAA -AAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCG -GATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTA -CTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGG -AGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCG -CGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCG -GTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGT -CAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAA -AATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGG -AGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTC -CAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCT -GTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA -CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCG -TGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAA -CCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGAC -AGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCAC -TTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAA -CATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGC -CTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGA -GGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCC -GTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGA -GGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCC -CGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGC -TACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGC -CGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGC -CGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCA -CCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAA -AATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCT -GAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCA -CTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGC -TCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGA -GTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTA -GCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAAT -CGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCC -TGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAAT -CCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGC -CTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTG -GCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGG -GAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGC -GAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG -GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGG -TGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTA -ATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTG -CAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTC -AAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGG -GCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCT -CTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTC -GGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGA -TCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGC -GCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGA -GGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATAC -AAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGC -AGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCA -CTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACG -CCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCG -AGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGG -GCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTT -GAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGC -GACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAG -CACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGC -CAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCG -CGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGC -GGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGAC -TCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGC -CGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAA -CCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCC -AGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTG -AGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA -GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA -TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT -AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG -GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG -CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT -GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA -GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA -TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG -AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA -GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT -AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC -AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG -GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC -CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG -AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT -TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA -TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT -GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG -TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT -CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG -CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG -TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA -CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG -AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG -GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC -TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA -TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA -GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT -GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC -ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT -TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC -CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG -CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG -GGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCC -CAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCT -GGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGC -GCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGA -GGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGA -GACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGA -GGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTG -AAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT -CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCA -GTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAA -AAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGC -GGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCT -ACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGG -GAGGCTGAGGCAGGAGAATC ->TWO IUB ambiguity codes -cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg -tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa -NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt -cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga -gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa -HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca -tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt -tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt -acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct -tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt -gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa -accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt -RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt -tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag -cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg -ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat -actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg -YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa -KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata -aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa -aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg -gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc -tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK -tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt -ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg -ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa -BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt -aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc -tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc -cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac -aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga -tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga -aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD -gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg -ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV -taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa -ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat -gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg -gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa -tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt -tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt -taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca -cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag -aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt -cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt -ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW -attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag -ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa -attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc -tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta -aagacYRcaggattHaYgtKtaatgcVcaataMYacccatatcacgWDBtgaatcBaata -cKcttRaRtgatgaBDacggtaattaaYtataStgVHDtDctgactcaaatKtacaatgc -gYatBtRaDatHaactgtttatatDttttaaaKVccYcaaccNcBcgHaaVcattHctcg -attaaatBtatgcaaaaatYMctSactHatacgaWacattacMBgHttcgaatVaaaaca -BatatVtctgaaaaWtctRacgBMaatSgRgtgtcgactatcRtattaScctaStagKga -DcWgtYtDDWKRgRtHatRtggtcgaHgggcgtattaMgtcagccaBggWVcWctVaaat -tcgNaatcKWagcNaHtgaaaSaaagctcYctttRVtaaaatNtataaccKtaRgtttaM -tgtKaBtRtNaggaSattHatatWactcagtgtactaKctatttgRYYatKatgtccgtR -tttttatttaatatVgKtttgtatgtNtataRatWYNgtRtHggtaaKaYtKSDcatcKg -taaYatcSRctaVtSMWtVtRWHatttagataDtVggacagVcgKWagBgatBtaaagNc -aRtagcataBggactaacacRctKgttaatcctHgDgttKHHagttgttaatgHBtatHc -DaagtVaBaRccctVgtgDtacRHSctaagagcggWYaBtSaKtHBtaaactYacgNKBa -VYgtaacttagtVttcttaatgtBtatMtMtttaattaatBWccatRtttcatagVgMMt -agctStKctaMactacDNYgKYHgaWcgaHgagattacVgtttgtRaSttaWaVgataat -gtgtYtaStattattMtNgWtgttKaccaatagNYttattcgtatHcWtctaaaNVYKKt -tWtggcDtcgaagtNcagatacgcattaagaccWctgcagcttggNSgaNcHggatgtVt -catNtRaaBNcHVagagaaBtaaSggDaatWaatRccaVgggStctDaacataKttKatt -tggacYtattcSatcttagcaatgaVBMcttDattctYaaRgatgcattttNgVHtKcYR -aatRKctgtaaacRatVSagctgtWacBtKVatctgttttKcgtctaaDcaagtatcSat -aWVgcKKataWaYttcccSaatgaaaacccWgcRctWatNcWtBRttYaattataaNgac -acaatagtttVNtataNaYtaatRaVWKtBatKagtaatataDaNaaaaataMtaagaaS -tccBcaatNgaataWtHaNactgtcDtRcYaaVaaaaaDgtttRatctatgHtgttKtga -aNSgatactttcgagWaaatctKaaDaRttgtggKKagcDgataaattgSaacWaVtaNM -acKtcaDaaatttctRaaVcagNacaScRBatatctRatcctaNatWgRtcDcSaWSgtt -RtKaRtMtKaatgttBHcYaaBtgatSgaSWaScMgatNtctcctatttctYtatMatMt -RRtSaattaMtagaaaaStcgVgRttSVaScagtgDtttatcatcatacRcatatDctta -tcatVRtttataaHtattcYtcaaaatactttgVctagtaaYttagatagtSYacKaaac -gaaKtaaatagataatSatatgaaatSgKtaatVtttatcctgKHaatHattagaaccgt -YaaHactRcggSBNgtgctaaBagBttgtRttaaattYtVRaaaattgtaatVatttctc -ttcatgBcVgtgKgaHaaatattYatagWacNctgaaMcgaattStagWaSgtaaKagtt -ttaagaDgatKcctgtaHtcatggKttVDatcaaggtYcgccagNgtgcVttttagagat -gctaccacggggtNttttaSHaNtatNcctcatSaaVgtactgBHtagcaYggYVKNgta -KBcRttgaWatgaatVtagtcgattYgatgtaatttacDacSctgctaaaStttaWMagD -aaatcaVYctccgggcgaVtaaWtStaKMgDtttcaaMtVgBaatccagNaaatcYRMBg -gttWtaaScKttMWtYataRaDBMaDataatHBcacDaaKDactaMgagttDattaHatH -taYatDtattDcRNStgaatattSDttggtattaaNSYacttcDMgYgBatWtaMagact -VWttctttgYMaYaacRgHWaattgRtaagcattctMKVStatactacHVtatgatcBtV -NataaBttYtSttacKgggWgYDtgaVtYgatDaacattYgatggtRDaVDttNactaSa -MtgNttaacaaSaBStcDctaccacagacgcaHatMataWKYtaYattMcaMtgSttDag -cHacgatcaHttYaKHggagttccgatYcaatgatRaVRcaagatcagtatggScctata -ttaNtagcgacgtgKaaWaactSgagtMYtcttccaKtStaacggMtaagNttattatcg -tctaRcactctctDtaacWYtgaYaSaagaWtNtatttRacatgNaatgttattgWDDcN -aHcctgaaHacSgaataaRaataMHttatMtgaSDSKatatHHaNtacagtccaYatWtc -actaactatKDacSaStcggataHgYatagKtaatKagStaNgtatactatggRHacttg -tattatgtDVagDVaRctacMYattDgtttYgtctatggtKaRSttRccRtaaccttaga -gRatagSaaMaacgcaNtatgaaatcaRaagataatagatactcHaaYKBctccaagaRa -BaStNagataggcgaatgaMtagaatgtcaKttaaatgtaWcaBttaatRcggtgNcaca -aKtttScRtWtgcatagtttWYaagBttDKgcctttatMggNttattBtctagVtacata -aaYttacacaaRttcYtWttgHcaYYtaMgBaBatctNgcDtNttacgacDcgataaSat -YaSttWtcctatKaatgcagHaVaacgctgcatDtgttaSataaaaYSNttatagtaNYt -aDaaaNtggggacttaBggcHgcgtNtaaMcctggtVtaKcgNacNtatVaSWctWtgaW -cggNaBagctctgaYataMgaagatBSttctatacttgtgtKtaattttRagtDtacata -tatatgatNHVgBMtKtaKaNttDHaagatactHaccHtcatttaaagttVaMcNgHata -tKtaNtgYMccttatcaaNagctggacStttcNtggcaVtattactHaSttatgNMVatt -MMDtMactattattgWMSgtHBttStStgatatRaDaagattttctatMtaaaaaggtac -taaVttaSacNaatactgMttgacHaHRttgMacaaaatagttaatatWKRgacDgaRta -tatttattatcYttaWtgtBRtWatgHaaattHataagtVaDtWaVaWtgStcgtMSgaS -RgMKtaaataVacataatgtaSaatttagtcgaaHtaKaatgcacatcggRaggSKctDc -agtcSttcccStYtccRtctctYtcaaKcgagtaMttttcRaYDttgttatctaatcata -NctctgctatcaMatactataggDaHaaSttMtaDtcNatataattctMcStaaBYtaNa -gatgtaatHagagSttgWHVcttatKaYgDctcttggtgttMcRaVgSgggtagacaata -aDtaattSaDaNaHaBctattgNtaccaaRgaVtKNtaaYggHtaKKgHcatctWtctDt -ttctttggSDtNtaStagttataaacaattgcaBaBWggHgcaaaBtYgctaatgaaatW -cDcttHtcMtWWattBHatcatcaaatctKMagtDNatttWaBtHaaaNgMttaaStagt -tctctaatDtcRVaYttgttMtRtgtcaSaaYVgSWDRtaatagctcagDgcWWaaaBaa -RaBctgVgggNgDWStNaNBKcBctaaKtttDcttBaaggBttgaccatgaaaNgttttt -tttatctatgttataccaaDRaaSagtaVtDtcaWatBtacattaWacttaSgtattggD -gKaaatScaattacgWcagKHaaccaYcRcaRttaDttRtttHgaHVggcttBaRgtccc -tDatKaVtKtcRgYtaKttacgtatBtStaagcaattaagaRgBagSaattccSWYttta -ttVaataNctgHgttaaNBgcVYgtRtcccagWNaaaacaDNaBcaaaaRVtcWMgBagM -tttattacgDacttBtactatcattggaaatVccggttRttcatagttVYcatYaSHaHc -ttaaagcNWaHataaaRWtctVtRYtagHtaaaYMataHYtNBctNtKaatattStgaMc -BtRgctaKtgcScSttDgYatcVtggaaKtaagatWccHccgKYctaNNctacaWctttt -gcRtgtVcgaKttcMRHgctaHtVaataaDtatgKDcttatBtDttggNtacttttMtga -acRattaaNagaactcaaaBBVtcDtcgaStaDctgaaaSgttMaDtcgttcaccaaaag -gWtcKcgSMtcDtatgtttStaaBtatagDcatYatWtaaaBacaKgcaDatgRggaaYc -taRtccagattDaWtttggacBaVcHtHtaacDacYgtaatataMagaatgHMatcttat -acgtatttttatattacHactgttataMgStYaattYaccaattgagtcaaattaYtgta -tcatgMcaDcgggtcttDtKgcatgWRtataatatRacacNRBttcHtBgcRttgtgcgt -catacMtttBctatctBaatcattMttMYgattaaVYatgDaatVagtattDacaacDMa -tcMtHcccataagatgBggaccattVWtRtSacatgctcaaggggYtttDtaaNgNtaaB -atggaatgtctRtaBgBtcNYatatNRtagaacMgagSaSDDSaDcctRagtVWSHtVSR -ggaacaBVaccgtttaStagaacaMtactccagtttVctaaRaaHttNcttagcaattta -ttaatRtaaaatctaacDaBttggSagagctacHtaaRWgattcaaBtctRtSHaNtgta -cattVcaHaNaagtataccacaWtaRtaaVKgMYaWgttaKggKMtKcgWatcaDatYtK -SttgtacgaccNctSaattcDcatcttcaaaDKttacHtggttHggRRaRcaWacaMtBW -VHSHgaaMcKattgtaRWttScNattBBatYtaNRgcggaagacHSaattRtttcYgacc -BRccMacccKgatgaacttcgDgHcaaaaaRtatatDtatYVtttttHgSHaSaatagct -NYtaHYaVYttattNtttgaaaYtaKttWtctaNtgagaaaNctNDctaaHgttagDcRt -tatagccBaacgcaRBtRctRtggtaMYYttWtgataatcgaataattattataVaaaaa -ttacNRVYcaaMacNatRttcKatMctgaagactaattataaYgcKcaSYaatMNctcaa -cgtgatttttBacNtgatDccaattattKWWcattttatatatgatBcDtaaaagttgaa -VtaHtaHHtBtataRBgtgDtaataMttRtDgDcttattNtggtctatctaaBcatctaR -atgNacWtaatgaagtcMNaacNgHttatactaWgcNtaStaRgttaaHacccgaYStac -aaaatWggaYaWgaattattcMaactcBKaaaRVNcaNRDcYcgaBctKaacaaaaaSgc -tccYBBHYaVagaatagaaaacagYtctVccaMtcgtttVatcaatttDRtgWctagtac -RttMctgtDctttcKtWttttataaatgVttgBKtgtKWDaWagMtaaagaaattDVtag -gttacatcatttatgtcgMHaVcttaBtVRtcgtaYgBRHatttHgaBcKaYWaatcNSc -tagtaaaaatttacaatcactSWacgtaatgKttWattagttttNaggtctcaagtcact -attcttctaagKggaataMgtttcataagataaaaatagattatDgcBVHWgaBKttDgc -atRHaagcaYcRaattattatgtMatatattgHDtcaDtcaaaHctStattaatHaccga -cNattgatatattttgtgtDtRatagSacaMtcRtcattcccgacacSattgttKaWatt -NHcaacttccgtttSRtgtctgDcgctcaaMagVtBctBMcMcWtgtaacgactctcttR -ggRKSttgYtYatDccagttDgaKccacgVatWcataVaaagaataMgtgataaKYaaat -cHDaacgataYctRtcYatcgcaMgtNttaBttttgatttaRtStgcaacaaaataccVg -aaDgtVgDcStctatatttattaaaaRKDatagaaagaKaaYYcaYSgKStctccSttac -agtcNactttDVttagaaagMHttRaNcSaRaMgBttattggtttaRMggatggcKDgWR -tNaataataWKKacttcKWaaagNaBttaBatMHtccattaacttccccYtcBcYRtaga -ttaagctaaYBDttaNtgaaaccHcaRMtKtaaHMcNBttaNaNcVcgVttWNtDaBatg -ataaVtcWKcttRggWatcattgaRagHgaattNtatttctctattaattaatgaDaaMa -tacgttgggcHaYVaaNaDDttHtcaaHtcVVDgBVagcMacgtgttaaBRNtatRtcag -taagaggtttaagacaVaaggttaWatctccgtVtaDtcDatttccVatgtacNtttccg -tHttatKgScBatgtVgHtYcWagcaKtaMYaaHgtaattaSaHcgcagtWNaatNccNN -YcacgVaagaRacttctcattcccRtgtgtaattagcSttaaStWaMtctNNcSMacatt -ataaactaDgtatWgtagtttaagaaaattgtagtNagtcaataaatttgatMMYactaa -tatcggBWDtVcYttcDHtVttatacYaRgaMaacaStaatcRttttVtagaDtcacWat -ttWtgaaaagaaagNRacDtttStVatBaDNtaactatatcBSMcccaSttccggaMatg -attaaWatKMaBaBatttgataNctgttKtVaagtcagScgaaaDggaWgtgttttKtWt -atttHaatgtagttcactaaKMagttSYBtKtaYgaactcagagRtatagtVtatcaaaW -YagcgNtaDagtacNSaaYDgatBgtcgataacYDtaaactacagWDcYKaagtttatta -gcatcgagttKcatDaattgattatDtcagRtWSKtcgNtMaaaaacaMttKcaWcaaSV -MaaaccagMVtaMaDtMaHaBgaacataBBVtaatVYaNSWcSgNtDNaaKacacBttta -tKtgtttcaaHaMctcagtaacgtcgYtactDcgcctaNgagagcYgatattttaaattt -ccattttacatttDaaRctattttWctttacgtDatYtttcagacgcaaVttagtaaKaa -aRtgVtccataBggacttatttgtttaWNtgttVWtaWNVDaattgtatttBaagcBtaa -BttaaVatcHcaVgacattccNggtcgacKttaaaRtagRtctWagaYggtgMtataatM -tgaaRttattttgWcttNtDRRgMDKacagaaaaggaaaRStcccagtYccVattaNaaK -StNWtgacaVtagaagcttSaaDtcacaacgDYacWDYtgtttKatcVtgcMaDaSKStV -cgtagaaWaKaagtttcHaHgMgMtctataagBtKaaaKKcactggagRRttaagaBaaN -atVVcgRcKSttDaactagtSttSattgttgaaRYatggttVttaataaHttccaagDtg -atNWtaagHtgcYtaactRgcaatgMgtgtRaatRaNaacHKtagactactggaatttcg -ccataacgMctRgatgttaccctaHgtgWaYcactcacYaattcttaBtgacttaaacct -gYgaWatgBttcttVttcgttWttMcNYgtaaaatctYgMgaaattacNgaHgaacDVVM -tttggtHtctaaRgtacagacgHtVtaBMNBgattagcttaRcttacaHcRctgttcaaD -BggttKaacatgKtttYataVaNattccgMcgcgtagtRaVVaattaKaatggttRgaMc -agtatcWBttNtHagctaatctagaaNaaacaYBctatcgcVctBtgcaaagDgttVtga -HtactSNYtaaNccatgtgDacgaVtDcgKaRtacDcttgctaagggcagMDagggtBWR -tttSgccttttttaacgtcHctaVtVDtagatcaNMaVtcVacatHctDWNaataRgcgt -aVHaggtaaaaSgtttMtattDgBtctgatSgtRagagYtctSaKWaataMgattRKtaa -catttYcgtaacacattRWtBtcggtaaatMtaaacBatttctKagtcDtttgcBtKYYB -aKttctVttgttaDtgattttcttccacttgSaaacggaaaNDaattcYNNaWcgaaYat -tttMgcBtcatRtgtaaagatgaWtgaccaYBHgaatagataVVtHtttVgYBtMctaMt -cctgaDcYttgtccaaaRNtacagcMctKaaaggatttacatgtttaaWSaYaKttBtag -DacactagctMtttNaKtctttcNcSattNacttggaacaatDagtattRtgSHaataat -gccVgacccgatactatccctgtRctttgagaSgatcatatcgDcagWaaHSgctYYWta -tHttggttctttatVattatcgactaagtgtagcatVgtgHMtttgtttcgttaKattcM -atttgtttWcaaStNatgtHcaaaDtaagBaKBtRgaBgDtSagtatMtaacYaatYtVc -KatgtgcaacVaaaatactKcRgtaYtgtNgBBNcKtcttaccttKgaRaYcaNKtactt -tgagSBtgtRagaNgcaaaNcacagtVtttHWatgttaNatBgtttaatNgVtctgaata -tcaRtattcttttttttRaaKcRStctcggDgKagattaMaaaKtcaHacttaataataK -taRgDtKVBttttcgtKaggHHcatgttagHggttNctcgtatKKagVagRaaaggaaBt -NatttVKcRttaHctaHtcaaatgtaggHccaBataNaNaggttgcWaatctgatYcaaa -HaatWtaVgaaBttagtaagaKKtaaaKtRHatMaDBtBctagcatWtatttgWttVaaa -ScMNattRactttgtYtttaaaagtaagtMtaMaSttMBtatgaBtttaKtgaatgagYg -tNNacMtcNRacMMHcttWtgtRtctttaacaacattattcYaMagBaacYttMatcttK -cRMtgMNccattaRttNatHaHNaSaaHMacacaVaatacaKaSttHatattMtVatWga -ttttttaYctttKttHgScWaacgHtttcaVaaMgaacagNatcgttaacaaaaagtaca -HBNaattgttKtcttVttaaBtctgctacgBgcWtttcaggacacatMgacatcccagcg -gMgaVKaBattgacttaatgacacacaaaaaatRKaaBctacgtRaDcgtagcVBaacDS -BHaaaaSacatatacagacRNatcttNaaVtaaaataHattagtaaaaSWccgtatWatg -gDttaactattgcccatcttHaSgYataBttBaactattBtcHtgatcaataSttaBtat -KSHYttWggtcYtttBttaataccRgVatStaHaKagaatNtagRMNgtcttYaaSaact -cagDSgagaaYtMttDtMRVgWKWtgMaKtKaDttttgactatacataatcNtatNaHat -tVagacgYgatatatttttgtStWaaatctWaMgagaRttRatacgStgattcttaagaD -taWccaaatRcagcagaaNKagtaaDggcgccBtYtagSBMtactaaataMataBSacRM -gDgattMMgtcHtcaYDtRaDaacggttDaggcMtttatgttaNctaattaVacgaaMMt -aatDccSgtattgaRtWWaccaccgagtactMcgVNgctDctaMScatagcgtcaactat -acRacgHRttgctatttaatgaattataYKttgtaagWgtYttgcHgMtaMattWaWVta -RgcttgYgttBHtYataSccStBtgtagMgtDtggcVaaSBaatagDttgBgtctttctc -attttaNagtHKtaMWcYactVcgcgtatMVtttRacVagDaatcttgctBBcRDgcaac -KttgatSKtYtagBMagaRtcgBattHcBWcaactgatttaatttWDccatttatcgagS -KaWttataHactaHMttaatHtggaHtHagaatgtKtaaRactgtttMatacgatcaagD -gatKaDctataMggtHDtggHacctttRtatcttYattttgacttgaaSaataaatYcgB -aaaaccgNatVBttMacHaKaataagtatKgtcaagactcttaHttcggaattgttDtct -aaccHttttWaaatgaaatataaaWattccYDtKtaaaacggtgaggWVtctattagtga -ctattaagtMgtttaagcatttgSgaaatatccHaaggMaaaattttcWtatKctagDtY -tMcctagagHcactttactatacaaacattaacttaHatcVMYattYgVgtMttaaRtga -aataaDatcaHgtHHatKcDYaatcttMtNcgatYatgSaMaNtcttKcWataScKggta -tcttacgcttWaaagNatgMgHtctttNtaacVtgttcMaaRatccggggactcMtttaY -MtcWRgNctgNccKatcttgYDcMgattNYaRagatHaaHgKctcataRDttacatBatc -cattgDWttatttaWgtcggagaaaaatacaatacSNtgggtttccttacSMaagBatta -caMaNcactMttatgaRBacYcYtcaaaWtagctSaacttWgDMHgaggatgBVgcHaDt -ggaactttggtcNatNgtaKaBcccaNtaagttBaacagtatacDYttcctNgWgcgSMc -acatStctHatgRcNcgtacacaatRttMggaNKKggataaaSaYcMVcMgtaMaHtgat -tYMatYcggtcttcctHtcDccgtgRatcattgcgccgatatMaaYaataaYSggatagc -gcBtNtaaaScaKgttBgagVagttaKagagtatVaactaSacWactSaKatWccaKaaa -atBKgaaKtDMattttgtaaatcRctMatcaaMagMttDgVatggMaaWgttcgaWatga -aatttgRtYtattaWHKcRgctacatKttctaccaaHttRatctaYattaaWatVNccat -NgagtcKttKataStRaatatattcctRWatDctVagttYDgSBaatYgttttgtVaatt -taatagcagMatRaacttBctattgtMagagattaaactaMatVtHtaaatctRgaaaaa -aaatttWacaacaYccYDSaattMatgaccKtaBKWBattgtcaagcHKaagttMMtaat -ttcKcMagNaaKagattggMagaggtaatttYacatcWaaDgatMgKHacMacgcVaaca -DtaDatatYggttBcgtatgWgaSatttgtagaHYRVacaRtctHaaRtatgaactaata -tctSSBgggaaHMWtcaagatKgagtDaSatagttgattVRatNtctMtcSaagaSHaat -aNataataRaaRgattctttaataaagWaRHcYgcatgtWRcttgaaggaMcaataBRaa -ccagStaaacNtttcaatataYtaatatgHaDgcStcWttaacctaRgtYaRtataKtgM -ttttatgactaaaatttacYatcccRWtttHRtattaaatgtttatatttgttYaatMca -RcSVaaDatcgtaYMcatgtagacatgaaattgRtcaaYaaYtRBatKacttataccaNa -aattVaBtctggacaagKaaYaaatatWtMtatcYaaVNtcgHaactBaagKcHgtctac -aatWtaDtSgtaHcataHtactgataNctRgttMtDcDttatHtcgtacatcccaggStt -aBgtcacacWtccNMcNatMVaVgtccDYStatMaccDatggYaRKaaagataRatttHK -tSaaatDgataaacttaHgttgVBtcttVttHgDacgaKatgtatatNYataactctSat -atatattgcHRRYttStggaactHgttttYtttaWtatMcttttctatctDtagVHYgMR -BgtHttcctaatYRttKtaagatggaVRataKDctaMtKBNtMtHNtWtttYcVtattMc -gRaacMcctNSctcatttaaagDcaHtYccSgatgcaatYaaaaDcttcgtaWtaattct -cgttttScttggtaatctttYgtctaactKataHacctMctcttacHtKataacacagcN -RatgKatttttSaaatRYcgDttaMRcgaaattactMtgcgtaagcgttatBtttttaat -taagtNacatHgttcRgacKcBBtVgatKttcgaBaatactDRgtRtgaNacWtcacYtt -aaKcgttctHaKttaNaMgWgWaggtctRgaKgWttSttBtDcNtgtttacaaatYcDRt -gVtgcctattcNtctaaaDMNttttNtggctgagaVctDaacVtWccaagtaacacaNct -gaScattccDHcVBatcgatgtMtaatBgHaatDctMYgagaatgYWKcctaatNaStHa -aaKccgHgcgtYaaYtattgtStgtgcaaRtattaKatattagaWVtcaMtBagttatta -gNaWHcVgcaattttDcMtgtaRHVYtHtctgtaaaaHVtMKacatcgNaatttMatatg -ttgttactagWYtaRacgataKagYNKcattataNaRtgaacKaYgcaaYYacaNccHat -MatDcNgtHttRaWttagaaDcaaaaaatagggtKDtStaDaRtaVtHWKNtgtattVct -SVgRgataDaRaWataBgaagaaKtaataaYgDcaStaNgtaDaaggtattHaRaWMYaY -aWtggttHYgagVtgtgcttttcaaDKcagVcgttagacNaaWtagtaataDttctggtt -VcatcataaagtgKaaaNaMtaBBaattaatWaattgctHaVKaSgDaaVKaHtatatat -HatcatSBagNgHtatcHYMHgttDgtaHtBttWatcgtttaRaattgStKgSKNWKatc -agDtctcagatttctRtYtBatBgHHtKaWtgYBgacVVWaKtacKcDttKMaKaVcggt -gttataagaataaHaatattagtataatMHgttYgaRttagtaRtcaaVatacggtcMcg -agtaaRttacWgactKRYataaaagSattYaWgagatYagKagatgSaagKgttaatMgg -tataatgttWYttatgagaaacctNVataatHcccKtDctcctaatactggctHggaSag -gRtKHaWaattcgSatMatttagaggcYtctaMcgctcataSatatgRagacNaaDagga -VBagaYttKtacNaKgtSYtagttggaWcatcWttaatctatgaVtcgtgtMtatcaYcg -tRccaaYgDctgcMgtgtWgacWtgataacacgcgctBtgttaKtYDtatDcatcagKaV -MctaatcttgVcaaRgcRMtDcgattaHttcaNatgaatMtactacVgtRgatggaWttt -actaaKatgagSaaKggtaNtactVaYtaaKRagaacccacaMtaaMtKtatBcttgtaa -WBtMctaataaVcDaaYtcRHBtcgttNtaaHatttBNgRStVDattBatVtaagttaYa -tVattaagaBcacggtSgtVtatttaRattgatgtaHDKgcaatattKtggcctatgaWD -KRYcggattgRctatNgatacaatMNttctgtcRBYRaaaHctNYattcHtaWcaattct -BtMKtVgYataatMgYtcagcttMDataVtggRtKtgaatgccNcRttcaMtRgattaac -attRcagcctHtWMtgtDRagaKaBtgDttYaaaaKatKgatctVaaYaacWcgcatagB -VtaNtRtYRaggBaaBtgKgttacataagagcatgtRattccacttaccatRaaatgWgD -aMHaYVgVtaSctatcgKaatatattaDgacccYagtgtaYNaaatKcagtBRgagtcca -tgKgaaaccBgaagBtgSttWtacgatWHaYatcgatttRaaNRgcaNaKVacaNtDgat -tgHVaatcDaagcgtatgcNttaDataatcSataaKcaataaHWataBtttatBtcaKtK -tatagttaDgSaYctacaRatNtaWctSaatatttYaKaKtaccWtatcRagacttaYtt -VcKgSDcgagaagatccHtaattctSttatggtKYgtMaHagVaBRatttctgtRgtcta -tgggtaHKgtHacHtSYacgtacacHatacKaaBaVaccaDtatcSaataaHaagagaat -ScagactataaRttagcaaVcaHataKgDacatWccccaagcaBgagWatctaYttgaaa -tctVNcYtttWagHcgcgcDcVaaatgttKcHtNtcaatagtgtNRaactttttcaatgg -WgBcgDtgVgtttctacMtaaataaaRggaaacWaHttaRtNtgctaaRRtVBctYtVta -tDcattDtgaccYatagatYRKatNYKttNgcctagtaWtgaactaMVaacctgaStttc -tgaKVtaaVaRKDttVtVctaDNtataaaDtccccaagtWtcgatcactDgYaBcatcct -MtVtacDaaBtYtMaKNatNtcaNacgDatYcatcgcaRatWBgaacWttKttagYtaat -tcggttgSWttttDWctttacYtatatWtcatDtMgtBttgRtVDggttaacYtacgtac -atgaattgaaWcttMStaDgtatattgaDtcRBcattSgaaVBRgagccaaKtttcDgcg -aSMtatgWattaKttWtgDBMaggBBttBaatWttRtgcNtHcgttttHtKtcWtagHSt -aacagttgatatBtaWSaWggtaataaMttaKacDaatactcBttcaatatHttcBaaSa -aatYggtaRtatNtHcaatcaHtagVtgtattataNggaMtcttHtNagctaaaggtaga -YctMattNaMVNtcKtactBKcaHHcBttaSagaKacataYgctaKaYgttYcgacWVtt -WtSagcaacatcccHaccKtcttaacgaKttcacKtNtacHtatatRtaaatacactaBt -ttgaHaRttggttWtatYagcatYDatcggagagcWBataagRtacctataRKgtBgatg -aDatataSttagBaHtaatNtaDWcWtgtaattacagKttcNtMagtattaNgtctcgtc -ctcttBaHaKcKccgtRcaaYagSattaagtKataDatatatagtcDtaacaWHcaKttD -gaaRcgtgYttgtcatatNtatttttatggccHtgDtYHtWgttatYaacaattcaWtat -NgctcaaaSttRgctaatcaaatNatcgtttaBtNNVtgttataagcaaagattBacgtD -atttNatttaaaDcBgtaSKgacgtagataatttcHMVNttgttBtDtgtaWKaaRMcKM -tHtaVtagataWctccNNaSWtVaHatctcMgggDgtNHtDaDttatatVWttgttattt -aacctttcacaaggaSaDcggttttttatatVtctgVtaacaStDVaKactaMtttaSNa -gtgaaattaNacttSKctattcctctaSagKcaVttaagNaVcttaVaaRNaHaaHttat -gtHttgtgatMccaggtaDcgaccgtWgtWMtttaHcRtattgScctatttKtaaccaag -tYagaHgtWcHaatgccKNRtttagtMYSgaDatctgtgaWDtccMNcgHgcaaacNDaa -aRaStDWtcaaaaHKtaNBctagBtgtattaactaattttVctagaatggcWSatMaccc -ttHttaSgSgtgMRcatRVKtatctgaaaccDNatYgaaVHNgatMgHRtacttaaaRta -tStRtDtatDttYatattHggaBcttHgcgattgaKcKtttcRataMtcgaVttWacatN -catacctRataDDatVaWNcggttgaHtgtMacVtttaBHtgagVttMaataattatgtt -cttagtttgtgcDtSatttgBtcaacHattaaBagVWcgcaSYttMgcttacYKtVtatc -aYaKctgBatgcgggcYcaaaaacgNtctagKBtattatctttKtaVttatagtaYtRag -NtaYataaVtgaatatcHgcaaRataHtacacatgtaNtgtcgYatWMatttgaactacR -ctaWtWtatacaatctBatatgYtaagtatgtgtatSttactVatcttYtaBcKgRaSgg -RaaaaatgcagtaaaWgtaRgcgataatcBaataccgtatttttccatcNHtatWYgatH -SaaaDHttgctgtccHtggggcctaataatttttctatattYWtcattBtgBRcVttaVM -RSgctaatMagtYtttaaaaatBRtcBttcaaVtaacagctccSaaSttKNtHtKYcagc -agaaaccccRtttttaaDcDtaStatccaagcgctHtatcttaDRYgatDHtWcaaaBcW -gKWHttHataagHacgMNKttMKHccaYcatMVaacgttaKgYcaVaaBtacgcaacttt -MctaaHaatgtBatgagaSatgtatgSRgHgWaVWgataaatatttccKagVgataattW -aHNcYggaaatgctHtKtaDtctaaagtMaatVDVactWtSaaWaaMtaHtaSKtcBRaN -cttStggtBttacNagcatagRgtKtgcgaacaacBcgKaatgataagatgaaaattgta -ctgcgggtccHHWHaaNacaBttNKtKtcaaBatatgctaHNgtKcDWgtttatNgVDHg -accaacWctKaaggHttgaRgYaatHcaBacaatgagcaaattactgtaVaaYaDtagat -tgagNKggtggtgKtWKaatacagDRtatRaMRtgattDggtcaaYRtatttNtagaDtc -acaaSDctDtataatcgtactaHttatacaatYaacaaHttHatHtgcgatRRttNgcat -SVtacWWgaaggagtatVMaVaaattScDDKNcaYBYaDatHgtctatBagcaacaagaa -tgagaaRcataaKNaRtBDatcaaacgcattttttaaBtcSgtacaRggatgtMNaattg -gatatWtgagtattaaaVctgcaYMtatgatttttYgaHtgtcttaagWBttHttgtctt -attDtcgtatWtataataSgctaHagcDVcNtaatcaagtaBDaWaDgtttagYctaNcc -DtaKtaHcttaataacccaRKtacaVaatNgcWRaMgaattatgaBaaagattVYaHMDc -aDHtcRcgYtcttaaaWaaaVKgatacRtttRRKYgaatacaWVacVcRtatMacaBtac -tggMataaattttHggNagSctacHgtBagcgtcgtgattNtttgatSaaggMttctttc -ttNtYNagBtaaacaaatttMgaccttacataattgYtcgacBtVMctgStgMDtagtaR -ctHtatgttcatatVRNWataDKatWcgaaaaagttaaaagcacgHNacgtaatctttMR -tgacttttDacctataaacgaaatatgattagaactccSYtaBctttaataacWgaaaYa -tagatgWttcatKtNgatttttcaagHtaYgaaRaDaagtaggagcttatVtagtctttc -attaaaatcgKtattaRttacagVaDatgcatVgattgggtctttHVtagKaaRBtaHta -aggccccaaaaKatggtttaMWgtBtaaacttcactttKHtcgatctccctaYaBacMgt -cttBaBaNgcgaaacaatctagtHccHtKttcRtRVttccVctttcatacYagMVtMcag -aMaaacaataBctgYtaatRaaagattaaccatVRatHtaRagcgcaBcgDttStttttc -VtttaDtKgcaaWaaaaatSccMcVatgtKgtaKgcgatatgtagtSaaaDttatacaaa -catYaRRcVRHctKtcgacKttaaVctaDaatgttMggRcWaacttttHaDaKaDaBctg -taggcgtttaHBccatccattcNHtDaYtaataMttacggctNVaacDattgatatttta -cVttSaattacaaRtataNDgacVtgaacataVRttttaDtcaaacataYDBtttaatBa -DtttYDaDaMccMttNBttatatgagaaMgaNtattHccNataattcaHagtgaaggDga -tgtatatatgYatgaStcataaBStWacgtcccataRMaaDattggttaaattcMKtctM -acaBSactcggaatDDgatDgcWctaacaccgggaVcacWKVacggtaNatatacctMta -tgatagtgcaKagggVaDtgtaacttggagtcKatatcgMcttRaMagcattaBRaStct -YSggaHYtacaactMBaagDcaBDRaaacMYacaHaattagcattaaaHgcgctaaggSc -cKtgaaKtNaBtatDDcKBSaVtgatVYaagVtctSgMctacgttaacWaaattctSgtD -actaaStaaattgcagBBRVctaatatacctNttMcRggctttMttagacRaHcaBaacV -KgaataHttttMgYgattcYaNRgttMgcVaaacaVVcDHaatttgKtMYgtatBtVVct -WgVtatHtacaaHttcacgatagcagtaaNattBatatatttcVgaDagcggttMaagtc -ScHagaaatgcYNggcgtttttMtStggtRatctacttaaatVVtBacttHNttttaRca -aatcacagHgagagtMgatcSWaNRacagDtatactaaDKaSRtgattctccatSaaRtt -aaYctacacNtaRtaactggatgaccYtacactttaattaattgattYgttcagDtNKtt -agDttaaaaaaaBtttaaNaYWKMBaaaacVcBMtatWtgBatatgaacVtattMtYatM -NYDKNcKgDttDaVtaaaatgggatttctgtaaatWtctcWgtVVagtcgRgacttcccc -taDcacagcRcagagtgtWSatgtacatgttaaSttgtaaHcgatgggMagtgaacttat -RtttaVcaccaWaMgtactaatSSaHtcMgaaYtatcgaaggYgggcgtgaNDtgttMNg -aNDMtaattcgVttttaacatgVatgtWVMatatcaKgaaattcaBcctccWcttgaaWH -tWgHtcgNWgaRgctcBgSgaattgcaaHtgattgtgNagtDttHHgBttaaWcaaWagc -aSaHHtaaaVctRaaMagtaDaatHtDMtcVaWMtagSagcttHSattaacaaagtRacM -tRtctgttagcMtcaBatVKtKtKacgagaSNatSactgtatatcBctgagVtYactgta -aattaaaggcYgDHgtaacatSRDatMMccHatKgttaacgactKtgKagtcttcaaHRV -tccttKgtSataatttacaactggatDNgaacttcaRtVaagDcaWatcBctctHYatHa -DaaatttagYatSatccaWtttagaaatVaacBatHcatcgtacaatatcgcNYRcaata -YaRaYtgattVttgaatgaVaactcRcaNStgtgtattMtgaggtNttBaDRcgaaaagc -tNgBcWaWgtSaDcVtgVaatMKBtttcgtttctaaHctaaagYactgMtatBDtcStga -ccgtSDattYaataHctgggaYYttcggttaWaatctggtRagWMaDagtaacBccacta -cgHWMKaatgatWatcctgHcaBaSctVtcMtgtDttacctaVgatYcWaDRaaaaRtag -atcgaMagtggaRaWctctgMgcWttaagKBRtaaDaaWtctgtaagYMttactaHtaat -cttcataacggcacBtSgcgttNHtgtHccatgttttaaagtatcgaKtMttVcataYBB -aKtaMVaVgtattNDSataHcagtWMtaggtaSaaKgttgBtVtttgttatcatKcgHac -acRtctHatNVagSBgatgHtgaRaSgttRcctaacaaattDNttgacctaaYtBgaaaa -tagttattactcttttgatgtNNtVtgtatMgtcttRttcatttgatgacacttcHSaaa -ccaWWDtWagtaRDDVNacVaRatgttBccttaatHtgtaaacStcVNtcacaSRttcYa -gacagaMMttttgMcNttBcgWBtactgVtaRttctccaaYHBtaaagaBattaYacgat -ttacatctgtaaMKaRYtttttactaaVatWgctBtttDVttctggcDaHaggDaagtcg -aWcaagtagtWttHtgKtVataStccaMcWcaagataagatcactctHatgtcYgaKcat -cagatactaagNSStHcctRRNtattgtccttagttagMVgtatagactaactctVcaat -MctgtttgtgttgccttatWgtaBVtttctggMcaaKgDWtcgtaaYStgSactatttHg -atctgKagtagBtVacRaagRtMctatgggcaaaKaaaatacttcHctaRtgtDcttDat -taggaaatttcYHaRaaBttaatggcacKtgctHVcaDcaaaVDaaaVcgMttgtNagcg -taDWgtcgttaatDgKgagcSatatcSHtagtagttggtgtHaWtaHKtatagctgtVga -ttaBVaatgaataagtaatVatSttaHctttKtttgtagttaccttaatcgtagtcctgB -cgactatttVcMacHaaaggaatgDatggKtaHtgStatattaaSagctWcctccRtata -BaDYcgttgcNaagaggatRaaaYtaWgNtSMcaatttactaacatttaaWttHtatBat -tgtcgacaatNgattgcNgtMaaaKaBDattHacttggtRtttaYaacgVactBtaBaKt -gBttatgVttgtVttcaatcWcNctDBaaBgaDHacBttattNtgtDtatttVSaaacag -gatgcRatSgtaSaNtgBatagttcHBgcBBaaattaHgtDattatDaKaatBaaYaaMa -ataaataKtttYtagtBgMatNcatgtttgaNagtgttgtgKaNaSagtttgaSMaYBca -aaacDStagttVacaaaaactaaWttBaagtctgtgcgtMgtaattctcctacctcaNtt -taaccaaaaVtBcacataacaccccBcWMtatVtggaatgaWtcaaWaaaaaaaaWtDta -atatRcctDWtcctaccMtVVatKttaWaaKaaatataaagScHBagaggBaSMtaWaVt -atattactSaaaKNaactatNatccttgaYctattcaaaVgatttYHcRagattttaSat -aggttattcVtaaagaKgtattattKtRttNcggcRgtgtgtWYtaacHgKatKgatYta -cYagDtWcHBDctctgRaYKaYagcactKcacSaRtBttttBHKcMtNtcBatttatttt -tgSatVgaaagaWtcDtagDatatgMacaacRgatatatgtttgtKtNRaatatNatgYc -aHtgHataacKtgagtagtaacYttaNccaaatHcacaacaVDtagtaYtccagcattNt -acKtBtactaaagaBatVtKaaHBctgStgtBgtatgaSNtgDataaccctgtagcaBgt -gatcttaDataStgaMaccaSBBgWagtacKcgattgaDgNNaaaacacagtSatBacKD -gcgtataBKcatacactaSaatYtYcDaactHttcatRtttaatcaattataRtttgtaa -gMcgNttcatcBtYBagtNWNMtSHcattcRctttttRWgaKacKttgggagBcgttcgc -MaWHtaatactgtctctatttataVgtttaBScttttaBMaNaatMacactYtBMggtHa -cMagtaRtctgcatttaHtcaaaatttgagKtgNtactBacaHtcgtatttctMaSRagc -agttaatgtNtaaattgagagWcKtaNttagVtacgatttgaatttcgRtgtWcVatcgt -taaDVctgtttBWgaccagaaagtcSgtVtatagaBccttttcctaaattgHtatcggRa -ttttcaaggcYSKaagWaWtRactaaaacccBatMtttBaatYtaagaactSttcgaaSc -aatagtattgaccaagtgttttctaacatgtttNVaatcaaagagaaaNattaaRtttta -VaaaccgcaggNMtatattVctcaagaggaacgBgtttaacaagttcKcYaatatactaa -ccBaaaSggttcNtattctagttRtBacgScVctcaatttaatYtaaaaaaatgSaatga -tagaMBRatgRcMcgttgaWHtcaVYgaatYtaatctttYttatRaWtctgBtDcgatNa -tcKaBaDgatgtaNatWKctccgatattaacattNaaacDatgBgttctgtDtaaaMggt -gaBaSHataacgccSctaBtttaRBtcNHcDatcDcctagagtcRtaBgWttDRVHagat -tYatgtatcWtaHtttYcattWtaaagtctNgtStggRNcgcggagSSaaagaaaatYcH -DtcgctttaatgYcKBVSgtattRaYBaDaaatBgtatgaHtaaRaRgcaSWNtagatHa -acttNctBtcaccatctMcatattccaSatttgcgaDagDgtatYtaaaVDtaagtttWV -aagtagYatRttaagDcNgacKBcScagHtattatcDaDactaaaaaYgHttBcgaDttg -gataaaKSRcBMaBcgaBSttcWtgNBatRaccgattcatttataacggHVtaattcaca -agagVttaaRaatVVRKcgWtVgacctgDgYaaHaWtctttcacMagggatVgactagMa -aataKaaNWagKatagNaaWtaaaatttgaattttatttgctaaVgaHatBatcaaBWcB -gttcMatcgBaaNgttcgSNaggSaRtttgHtRtattaNttcDcatSaVttttcgaaaaa -ttgHatctaRaggSaNatMDaaatDcacgattttagaHgHaWtYgattaatHNSttatMS -gggNtcKtYatRggtttgtMWVtttaYtagcagBagHaYagttatatggtBacYcattaR -SataBatMtttaaatctHcaaaSaaaagttNSaaWcWRccRtKaagtBWtcaaattSttM -tattggaaaccttaacgttBtWatttatatWcDaatagattcctScacctaagggRaaYt -aNaatgVtBcttaaBaacaMVaaattatStYgRcctgtactatcMcVKatttcgSgatRH -MaaaHtagtaaHtVgcaaataatatcgKKtgccaatBNgaaWcVttgagttaKatagttc -aggKDatDtattgaKaVcaKtaataDataataHSaHcattagttaatRVYcNaHtaRcaa -ggtNHcgtcaaccaBaaagYtHWaaaRcKgaYaaDttgcWYtataRgaatatgtYtgcKt -aNttWacatYHctRaDtYtattcBttttatcSataYaYgttWaRagcacHMgtttHtYtt -YaatcggtatStttcgtRSattaaDaKMaatatactaNBaWgctacacYtgaYVgtgHta -aaRaaRgHtagtWattataaaSDaaWtgMattatcgaaaagtaYRSaWtSgNtBgagcRY -aMDtactaacttaWgtatctagacaagNtattHggataatYttYatcataDcgHgttBtt -ctttVttgccgaaWtaaaacgKgtatctaaaaaNtccDtaDatBMaMggaatNKtatBaa -atVtccRaHtaSacataHattgtttKVYattcataVaattWtcgtgMttcttKtgtctaa -cVtatctatatBRataactcgKatStatattcatHHRttKtccaacgtgggtgRgtgaMt -attattggctatcgtgacMtRcBDtcttgtactaatRHttttaagatcgVMDStattatY -BtttDttgtBtNttgRcMtYtgBacHaWaBaatDKctaagtgaaactaatgRaaKgatcc -aagNaaaatattaggWNtaagtatacttttKcgtcggSYtcttgRctataYcttatataa -agtatattaatttataVaacacaDHatctatttttKYVatHRactttaBHccaWagtact -BtcacgaVgcgttRtttttttSVgtSagtBaaattctgaHgactcttgMcattttagVta -agaattHctHtcaDaaNtaacRggWatagttcgtSttgaDatcNgNagctagDgatcNtt -KgttgtaDtctttRaaYStRatDtgMggactSttaDtagSaVtBDttgtDgccatcacaM -attaaaMtNacaVcgSWcVaaDatcaHaatgaattaMtatccVtctBtaattgtWattat -BRcWcaatgNNtactWYtDaKttaaatcactcagtRaaRgatggtKgcgccaaHgaggat -StattYcaNMtcaBttacttatgagDaNtaMgaaWtgtttcttctaHtMNgttatctaWW -atMtBtaaatagDVatgtBYtatcggcttaagacMRtaHScgatatYgRDtcattatSDa -HggaaataNgaWSRRaaaBaatagBattaDctttgHWNttacaataaaaaaatacggttt -gHgVtaHtWMttNtBtctagtMcgKMgHgYtataHaNagWtcaacYattaataYRgtaWK -gaBctataaccgatttaHaNBRaRaMtccggtNgacMtctcatttgcaattcWgMactta -caaDaaNtactWatVtttagccttMaatcagVaagtctVaaDaBtattaattaYtNaYtg -gattaKtaKctYaMtattYgatattataatKtVgDcttatatNBtcgttgtStttttMag -aggttaHYSttcKgtcKtDNtataagttataagSgttatDtRttattgttttSNggRtca -aKMNatgaatattgtBWtaMacctgggYgaSgaagYataagattacgagaatBtggtRcV -HtgYggaDgaYaKagWagctatagacgaaHgtWaNgacttHRatVaWacKYtgRVNgVcS -gRWctacatcKSactctgWYtBggtataagcttNRttVtgRcaWaaatDMatYattaact -ttcgaagRatSctgccttgcRKaccHtttSNVagtagHagBagttagaccaRtataBcca -taatSHatRtcHagacBWatagcaMtacaRtgtgaaBatctKRtScttccaNaatcNgta -atatWtcaMgactctBtWtaaNactHaaaaRctcgcatggctMcaaNtcagaaaaacaca -gtggggWttRttagtaagaVctVMtcgaatcttcMaaaHcaHBttcgattatgtcaDagc -YRtBtYcgacMgtDcagcgaNgttaataatagcagKYYtcgtaBtYctMaRtaRtDagaa -aacacatgYaBttgattattcgaaNttBctSataaMataWRgaHtttccgtDgaYtatgg -tDgHKgMtatttVtMtVagttaRatMattRagataaccctKctMtSttgaHagtcStcta -tttccSagatgttccacgaggYNttHRacgattcDatatDcataaaatBBttatcgaHtN -HaaatatDNaggctgaNcaaggagttBttMgRagVatBcRtaWgatgBtSgaKtcgHttt -gaatcaaDaHttcSBgHcagtVaaSttDcagccgttNBtgttHagYtattctttRWaaVt -SttcatatKaaRaaaNacaVtVctMtSDtDtRHRcgtaatgctcttaaatSacacaatcg -HattcaWcttaaaatHaaatcNctWttaNMcMtaKctVtcctaagYgatgatcYaaaRac -tctaRDaYagtaacgtDgaggaaatctcaaacatcaScttcKttNtaccatNtaNataca -tttHaaDHgcaDatMWaaBttcRggctMaagctVYcacgatcaDttatYtaatcKatWat -caatVYtNagatttgattgaYttttYgacttVtcKaRagaaaHVgDtaMatKYagagttN -atWttaccNtYtcDWgSatgaRgtMatgKtcgacaagWtacttaagtcgKtgatccttNc -ttatagMatHVggtagcgHctatagccctYttggtaattKNaacgaaYatatVctaataM -aaaYtgVtcKaYtaataacagaatHcacVagatYWHttagaaSMaatWtYtgtaaagNaa -acaVgaWtcacNWgataNttcaSagctMDaRttgNactaccgataMaaatgtttattDtc -aagacgctDHYYatggttcaagccNctccttcMctttagacBtaaWtaWVHggaaaaNat -ttaDtDtgctaaHHtMtatNtMtagtcatttgcaaaRatacagRHtatDNtgtDgaatVg -tVNtcaaatYBMaaaagcaKgtgatgatMgWWMaHttttMgMagatDtataaattaacca -actMtacataaattgRataatacgBtKtaataattRgtatDagDtcRDacctatRcagag -cSHatNtcaScNtttggacNtaaggaccgtgKNttgttNcttgaaRgYgRtNtcagttBc -ttttcHtKtgcttYaaNgYagtaaatgaatggWaMattBHtatctatSgtcYtgcHtaat -tHgaaMtHcagaaSatggtatgccaHBtYtcNattWtgtNgctttaggtttgtWatNtgH -tgcDttactttttttgcNtactKtWRaVcttcatagtgSNKaNccgaataaBttataata -YtSagctttaaatSttggctaaKSaatRccgWHgagDttaaatcatgagMtcgagtVtaD -ggaBtatttgDacataaacgtagYRagBWtgDStKDgatgaagttcattatttaKWcata -aatWRgatataRgttRacaaNKttNtKagaaYaStaactScattattaacgatttaaatg -DtaattagatHgaYataaactatggggatVHtgccgtNgatNYcaStRtagaccacWcaM -tatRagHgVactYtWHtcttcatgatWgagaKggagtatgaWtDtVtNaNtcgYYgtaaa -ctttaDtBactagtaDctatagtaatatttatatataacgHaaaRagKattSagttYtSt ->THREE Homo sapiens frequency -agagagacgatgaaaattaatcgtcaatacgctggcgaacactgagggggacccaatgct -cttctcggtctaaaaaggaatgtgtcagaaattggtcagttcaaaagtagaccggatctt -tgcggagaacaattcacggaacgtagcgttgggaaatatcctttctaccacacatcggat -tttcgccctctcccattatttattgtgttctcacatagaattattgtttagacatccctc -gttgtatggagagttgcccgagcgtaaaggcataatccatataccgccgggtgagtgacc -tgaaattgtttttagttgggatttcgctatggattagcttacacgaagagattctaatgg -tactataggataattataatgctgcgtggcgcagtacaccgttacaaacgtcgttcgcat -atgtggctaacacggtgaaaatacctacatcgtatttgcaatttcggtcgtttcatagag -cgcattgaattactcaaaaattatatatgttgattatttgattagactgcgtggaaagaa -ggggtactcaagccatttgtaaaagctgcatctcgcttaagtttgagagcttacattagt -ctatttcagtcttctaggaaatgtctgtgtgagtggttgtcgtccataggtcactggcat -atgcgattcatgacatgctaaactaagaaagtagattactattaccggcatgcctaatgc -gattgcactgctatgaaggtgcggacgtcgcgcccatgtagccctgataataccaatact -tacatttggtcagcaattctgacattatacctagcacccataaatttactcagacttgag -gacaggctcttggagtcgatcttctgtttgtatgcatgtgatcatatagatgaataagcg -atgcgactagttagggcatagtatagatctgtgtatacagttcagctgaacgtccgcgag -tggaagtacagctgagatctatcctaaaatgcaaccatatcgttcacacatgatatgaac -ccagggggaaacattgagttcagttaaattggcagcgaatcccccaagaagaaggcggag -tgacgttgaacgggcttatggtttttcagtacttcctccgtataagttgagcgaaatgta -aacagaataatcgttgtgttaacaacattaaaatcgcggaatatgatgagaatacacagt -gtgagcatttcacttgtaaaatatctttggtagaacttactttgctttaaatatgttaaa -ccgatctaataatctacaaaacggtagattttgcctagcacattgcgtccttctctattc -agatagaggcaatactcagaaggttttatccaaagcactgtgttgactaacctaagtttt -agtctaataatcatgattgattataggtgccgtggactacatgactcgtccacaaataat -acttagcagatcagcaattggccaagcacccgacttttatttaatggttgtgcaatagtc -cagattcgtattcgggactctttcaaataatagtttcctggcatctaagtaagaaaagct -cataaggaagcgatattatgacacgctcttccgccgctgttttgaaacttgagtattgct -cgtccgaaattgagggtcacttcaaaatttactgagaagacgaagatcgactaaagttaa -aatgctagtccacagttggtcaagttgaattcatccacgagttatatagctattttaatt -tatagtcgagtgtacaaaaaacatccacaataagatttatcttagaataacaacccccgt -atcatcgaaatcctccgttatggcctgactcctcgagcttatagcatttgtgctggcgct -cttgccaggaacttgctcgcgaggtggtgacgagtgagatgatcagtttcattatgatga -tacgattttatcgcgactagttaatcatcatagcaagtaaaatttgaattatgtcattat -catgctccattaacaggttatttaattgatactgacgaaattttttcacaatgggttttc -tagaatttaatatcagtaattgaagccttcataggggtcctactagtatcctacacgacg -caggtccgcagtatcctggagggacgtgttactgattaaaagggtcaaaggaatgaaggc -tcacaatgttacctgcttcaccatagtgagccgatgagttttacattagtactaaatccc -aaatcatactttacgatgaggcttgctagcgctaaagagaatacatacaccaccacatag -aattgttagcgatgatatcaaatagactcctggaagtgtcagggggaaactgttcaatat -ttcgtccacaggactgaccaggcatggaaaagactgacgttggaaactataccatctcac -gcccgacgcttcactaattgatgatccaaaaaatatagcccggattcctgattagcaaag -ggttcacagagaaagatattatcgacgtatatcccaaaaaacagacgtaatgtgcatctt -cgaatcgggatgaatacttgtatcataaaaatgtgacctctagtatacaggttaatgtta -gtgatacacaatactcgtgggccatgggttctcaaataaaatgtaatattgcgtcgatca -ctcacccacgtatttggtctaattatgttttatttagtgacaatccaatagataaccggt -cctattaagggctatatttttagcgaccacgcgtttaaacaaaggattgtatgtagatgg -taccagtttaattgccagtgggcaatcctaagcaaaatgagattctatcctaaagtttgg -gcttgatataagatttcggatgtatgggttttataatcgttggagagctcaatcatgagc -taatacatggatttcgctacctcaccgagagaccttgcatgaagaattctaaccaaaagt -ttaataggccggattggattgagttaattaagaccttgttcagtcatagtaaaaaccctt -aaattttaccgattgacaaagtgagcagtcgcaataccctatgcgaaacgcctcgatagt -gactaggtatacaaggtttttgagttcctttgaaatagttaactaatttaaaattaatta -acgacatggaaatcacagaacctaatgctttgtaggagttatttatgctgtttactgcct -ctacaaccctaataaagcagtcctaagaatgaaacgcatcttttagttcagaaagtggta -tccagggtggtcaatttaataaattcaacatcgggtctcaggatattcggtcatataatt -tattaagggctcttcgagtcttactctgagtgaaattggaaacagtcatccttttcgttg -tgaggcatcttacaccgctatcgatatacaatgcattccaccgcggtgtcccgtacacaa -ggaaacttgttaccttggggatataagaaaactcacacgtctcattattaaactgagtac -aatttttgcacgagaaagtaatgcaatacaatatgatgaaagccagctaatgaaaaggga -tggaacgcacctcggatctgttgcactggattaaaatccgattatttttaaaaatattca -gtgctagagcatatcaggtctacttttttatctggtatgtaaagcccacggagcgatagt -gagatccttacgactcaacgaaaagttataacataactcccgttagccaaagcccaatcc -cgattactgccctaccctaacgtctgccatctaaatatcgaacttgttatgatcaatgtg -actacctcccaccctttccccttcatttgttccactggggataagctagcgttttcagaa -tcaatgcaataagaatagccaattgtctcacttcatcagagctcttggcaattccaggcg -ctacgtggttctggaatatattcatttttcaaatagtaatacgtttagtgttgctattgt -ctacacgtttggatattacgttatgtgagcggacatcaatagttgtctaactctttagta -agccagagatagcactcttagcgaatggataccatcttccataagtttagttaatagtcc -gaaacaactgcttcgagcatatttgaacctccttgtaggcaaatagcctcttcaaagcaa -tcttactaatagatagagtttgttttaagggactactagaaatgggacaatcttaatagt -atgacctaaactgacatttaaagatatatccaggtggcaagcataaagatcattgcgcca -cctccaccgtgggattacttatcagtcgatatcctatatgctaagtttgcgacggcagaa -tacaaactaagctgagttgatgctaaccttacctatgataccccattggaccggttaaca -gccctacttattccaaataaaagaacttttatgctgtagaagctattatagtgatgcctg -gtaacttcagtatattaaaatgacacacatacgccatatagagctcctggaactttgaat -aatgagcgaacttcgaagttgaagagcaagaaaccatatgtcacggttgcctaaagcccg -gtaaccagacatgtgctatcattgatcattatcgaggttttcataaccttgacccattat -cggctgtgcgcggacaagtacttaaatcactagtttcttcacctgcttatcggtaagaaa -taaggttggcaaagaatcgcataagacggacgtagagccgcagcgttgtgcgagtccagg -tgcatgcgcagcaataggattttaaattttgttccatttttaatttagccgtaaggatgt -ccgtaaatgattgaaaattggattcaatctttgggcctatgctactggaacctgatcgac -aaaatttcaaacatacgttaactccgaaagaccgtatttttgcggctagaatagtcagtc -gcttggagccatataccttaccacttaaacgacgtgctcctgtagttgaaatataaacag -aacacaaagactaccgatcatatcaactgaagatctttgtaactttgaggcgaagcaccc -tcttcgagacaactaagagtaaagtaccgggcgccgcaaggagtcgattgggaccctaaa -tcttgacgaattgctaagaggctcagagctaccactgtaatttctctagagcccataata -aatgaacgatacatccgtaggtagcacctaagggattataatggaagccaaatgcagtta -ataatattatatactggcgtacacgattcgacggatctctcacatagtgattcacgaccc -ccccctttgattgacacagcgtcagcattttgcaagaacgatcttctgcatagggtgcgc -caccgtaaggatgacgtcgaagctacaactgggtataatttaccatgcttccctgatgct -gagtgcaatacactaagaatgagtttttaccccatatcaccagtatttgttctgttattg -cgaagaaatggctatgctgagttggcgactaaagtcacccatcctttttattaggtaacc -ccctcccttaaactaactgatttgctggagctgccctgcatacatatactttatcattta -tggacgtccgtgacgcttattatccaccatagtcgatatgctacacggattcattaatgg -atcgtaggagtttaagttatatttactaagatcggtctcggctactatcccgccttaccc -ggcgctatttacggccatttttaatatattgacggtaattattcctatggtttcgaccgc -acgtccttggacaagaaagaatggcaaaaaaaatgtaaaagaaaaaaaatattgagtccc -taccatcatataaaaaatatgtgatgagtaacttgacgaaatgttagtggttattaaaga -ctatctattacaccttttgttttctgtcgtagtatattaaagtctagaagccttacagga -aaatcagggttatacagccgatactccgcagcatgaatcatcgaggaggtgtcctaccat -cgcgccttgtaatcttgtctgtgtatactgtatttagaccttttatacaaagtaaatatc -tcggctttatgtgattgggaggggcctactcaaacatgatgacttgacctaataatcact -gtgcgggcgtcttatgactagctattccttgaaatccaccaccaaatggttaatatgtaa -aaactttgacgatgaaacaaggtgaatgtgtagttactttgtgtaattagctgcgtcgag -cattgcttgtaaaaccgtcaatcgcacacgttacttccataaaatttctacgaatacacc -cttcttaaaaaaaacgtaggaattcacgagtttaacaaacgataactgtataaagtggaa -gtccgaagaaagcagatgcccgaactactcgaagatgtttcgttttcttaaccatagggg -cttcttaatggcccactacgcacattttgttcaagcccgagagggacatccccattacgg -gagtattactaaaactgttccgtaatacgttcagcaagggatgaaaaaggccactgctca -agttattgacgtgggagtattacatcggaagcctgaatcccacactatgatggtctgtac -aggcctagggactgcgtctagacggtattaccggcttctaatcatacgatcgtgagtctt -aacgggaagtaaggctcacacctaccccaaaccatttatctatgtaagtataaaattgtg -cgtaagtgttcaaagtggacaataaagacgtggcaaaaacccccgcacataagccgcttt -agatttcacaaataccaatgcggttaaaaacatccttgagtcgtacatacaccatactcg -cgttaaacggatataacagaagataataaatccggatgtggagtcggtgtaactatagaa -agccaagtgaaataatgcttaccagtcatttagctatacggctttcatttcatgtcaaga -gggtggagtttgacctgtacagttgatatatcaccgatacttagaactcacctaaagcta -aaattgctcgcagcgtgtaatccgcatattacaaacaatagatgggattcattatacata -agacacgatgatctgctttttcaggttgcgagatgttgcctatcgtcaatcgagtcctgc -cttacaccacttaaacaaaagtattgacagggaacctattttcgaggtattatatagtcc -agcttgaatatcaatttgacagttaacctagtgaaaatcagtaagaggaaatacgccaca -ttctccagtgaaattctacgggttatcgtctagtccaactatcaattataactcacgaga -tataagtaaattctcgtacttggcctgatttttattatactttggatccttagtaaacag -gaagggagaaaccttcaacgaaaaacactggattttgttttactctcaaagctcttatat -gacggaaataccctgtcaagtcttaactttattactagactaatgaaatgggcttggggt -ggccagaatcatagtacaatttagcggatacactattcggactttcctatcggctgtctg -gttggataagtatggggactaataggctagacatacctatacttaaactatacaggcgtc -atctatctctgcaactttggagttccctgatgttctcccgccctttgggttcacatcttc -tataccgacacccctaataacgattagtttgtgggttagagtaaattaatacggttaata -ttaatgtatcgttgaaaagctggtgtcgccaataaggtaaccggctaggcagagtatatg -tcacgaagtataactaccctaatgataagctgtaggaataaaattaatgctgtctctaag -cgaagagatatttccgactctgttttaatgacgaatctcattacttctgacttgcaaatg -ttcaatatggcacggtttcacggcacctttgtgacgcatataatgaacttagaagattat -aacgacggaactttatatgataatccgttacgattaaagaatctgttaaatatcataatg -gcattcagttctagaccgtgcatcatggtaaacttactttctctgcatggcgacatacat -ttcgctattcaaattcgcgtgtggttacacccactcgcacctttggaatattaagagaag -atgatcagaaaatccattcgctcaatttttctgacgtacgtctaatttatcctaggagac -aaatcgttttatgtctctcacatttttgaagaaaggttcgagagacaatactcaggtcct -gaactgctagaagatactcggtggagcgtggcaacaatgaaaaactcgtgacataaatga -atgatacttttccaagttcagttaagtgaatatgtttaacatacccggcttttcgatctt -aagctgacgctggacgtgcgagtaatgtcagtctcttacatacactagtgactccaagtt -tcgtcaaaaacgccccctcccttctcgagcccactcacgctatgtattgacgcgaacttg -ttcgggatcagacttttcaggagttcggtcgcgtgtccctatgtgctaatatataagtta -gatcgcattagatgctaatctgaatacttatagacgaccttcaacgagaacgggtaccac -cttgaggctagagttaggtgtgaaacgacaggtagggacatataaaatttgagtgcggct -ttagttaagggtttaattacctactcaaacatcacgctcgcgcccttcgtacgtaatcga -ccatctagaggctaaggggactgtactaggtagtgattaatgatatcctagacgcacgtg -ccttagatcttcagactctgatggtccgcgatcaccgtaattgtagtcctccaactcgat -cactttgttggcgtcaaagaaattacgatatctaaatacttataatacaataaccaagga -tgagaatgactcatcgcgttggagttatattgcttgaagttctatggaatgaaagcacgt -tatctgccgtcccaatatctccagtgagctaattcattggacggtccactttgatcaatc -cccgaggagatgttcggacactttagtctgtaacacttagcgttgagaccacgaacaatt -gattactcagtcttgaaggtgttttccaaagttcattttaaataagactacgataggcct -ttcctattgatataaactacccggctctgttgttcgtgtgagtcgtacttctctgtgttt -ttctgattatagcaagattcgattcttagtgtaaacagcgatttttatttgacccgtcaa -tgagaagcgcataggatctaagcaaaattatcaagttgtgccacaaggtaagatctttcc -agttattgcaggtaggatgtatcccacgttgatagtatgaggtctgacgtcaactgtcta -ggagagttgaccgcgtgcgggtacaccggatttgcatcgatgttgagaacgcagaactcc -cactgtcgtggcggcgttcctgatatttagcaagaggcgttgataaagccctcatcatct -agatctcgacctcatctgccctcttgctccatcattttctacacagactactttcctatc -tacgttagtataattgctttctatcttagtatcatttagagcttctccgtcaacaggttc -gtgctattaaagttagtacgaaagggacaacttgtagcaacgcatttaatcggttttcga -ctacttcgcacaaaatcagataaagaagtttgtcattctattagacattgaattgcgcaa -ttgacttgtaccacttatgatcgaacactgaatcaagactgtgattaactaaaatagaca -agccactatatcaactaataaaaacgcccctggtggtcgaacatagttgactacaggata -attaattggactggagccattacattctctacaatcgtatcacttcccaagtagacaact -ttgaccttgtagtttcatgtacaaaaaaatgctttcgcaggagcacattggtagttcaat -agtttcatgggaacctcttgagccgtcttctgtgggtgtgttcggatagtaggtactgat -aaagtcgtgtcgctttcgatgagagggaattcaccggaaaacaccttggttaacaggata -gtctatgtaaacttcgagacatgtttaagagttaccagcttaatccacggtgctctacta -gtatcatcagctgtcttgcctcgcctagaaatatgcattctatcgttatcctatcaacgg -ttgccgtactgagcagccttattgtggaagagtaatatataaatgtagtcttgtctttac -gaagcagacgtaagtaataatgacttggaataccaaaactaaacatagtggattatcata -ctcaagaactctccagataaataacagtttttacgatacgtcaccaatgagcttaaagat -taggatcctcaaaactgatacaaacgctaattcatttgttattggatccagtatcagtta -aactgaatggagtgaagattgtagaatgttgttctggcctcgcatggggtctaggtgata -tacaatttctcatacttacacggtagtggaaatctgattctagcttcgtagctgactata -ctcaaggaaccactgctcaaggtaggagactagttccgaccctacagtcaaagtggccga -agcttaaactatagactagttgttaaatgctgatttcaagatatcatctatatacagttt -ggacaattatgtgtgcgaaactaaaattcatgctattcagatggatttcacttatgcctt -agaaacagatattgcccgagctcaatcaacagttttagccggaaacaatcgaagcatagg -gacaatgtatcttttcctaaattgccatgtgcagatttctgagtgtcacgaagcgcataa -tagaatcttgtgttgcctcaactcgttgaaaagtttaaaacaatcgcagcagtctttttg -gggtctactgtgtgtttgcaaaataactgaaagaaacgcttgaacaactctgaagtagct -cgagtactcattaaagtgtaacacattagtgaatatcggccaatgaaccaaacgcttccc -ggtacgctatctctctcatcgggaggcgatgtgcaggttatctacgaaagcatcccttta -cgttgagagtgtcgatgcatgaacctcattgtaacaatagcccagcaaattctcatacgt -gcctcagggtccgggcgtactcctccatggaagggcgcgcatctagtgttataccaactc -gctttttaactactatgctgtagttctacaggcatagtggccagtattttctaacttctc -tggatagatgctctcactcctcatccatcacggcttcagtttacgtcttacttgcttgtt -cagcaacggatggaggcattaagtatcttcactgttccctaaaattgctgttcaatatca -aagtaaggacgatacagggaaagctcaagcacactcattgaatactgccccagttgcaac -ctcacttaatctgacaaaaataatgactactctaagtgttgcggaagcagtctcttccac -gagcttgtctgtatcacttcgtataggcatgtaactcgatagacacgaacaccgagtgag -aaactatattcttgcttccgtgtgtgtgacaccaggtaattgatgcggatataagctgga -gatcactcacgcccacacaaggcgctgctacctctttattccaatgtgtaagaatttgct -aacttcatttctagaccgcagctttgcggtcataatttcacggtacggacccttgggtta -gagacttgataacacacttcgcagtttccaccgcgcacatgttttagtggcttctaacat -agaatttttgttgtgacataaagagtgcgtgggagacttgcccgaccgttaagccataat -caattgaaagccccgtgagtcacatctaattggttgtactgcgcatttagctatccttta -gctgactcgaagagattcgattcctaatataggttaattagatggctgccgcgcgaagta -aaacgtgaaaaacgtagtgcgcagatctgcataactcgcgcttaattacttatgagtagt -tccaagttcgctacgttatgagagagattggaattaagcaaatatgttttatggtgattt -tgggatgagaaggactgctaagtacggctactaaacaaatttctaaaaccgccatctacc -ttatcttggagacatttaagttgtatatgtcactagtctagcttttgtctgtgggacgcg -ttctcggaatgagggaaatgcaagagccgattcatcaaatgcttatctaagaaagtagtg -gactattacaccaagcacgaatgccagggaactgctttcttgctcaggacctcgcgacaa -ggtaccccgcataagtcctagaattacatttggtcagcaatgctgacatttgaccgtgaa -aacataattttaatcagaaggcagctcacccgcttgctctagatcttatctttgtatgaa -tgtcagaatttactgcaatatccgttccgaatagtgagggcttagtatagttctctgtat -acaggtcacatcaaactccccctgtcctagtacagctctgagctttaattaattgcatac -atttccttcaatcatcagatgaaaacaccgcgaatcatgctcttctcgtatagggcaaga -gaagcaacaaacaactagcccgactcacgttcatccgccgtatccttgttcagttcttac -tccgtattaggtcagcgaaatctaatcagaataatcggtcgcgtatcaaaattaaaatcc -cgcttgaggttgacaattaaaacgctgagcagttatcggctattagatagtggggtgaaa -gtaattggctggaattatgttaaaacgtgatattaagctaaaatacgctacttgttgccg -acctaattcagtcattcgatattcagttagagccaagaataacaagcttgtataaattga -acggggtgcactaaacgatgtgttactctaatattcagcttggagtatacctgaaggcga -attcatgtatcggccaataataagacgttgaagatcacaatttggactagcaaaagaagg -tgatttatgcgtggggattgagtccactgtacgagtacggtctctggaaaattataggtt -cagggaatataaggaagtaaagataattaccaagagatttttggtatcgctatgacccag -aggtgttctaacgtctgttttgatccgcagaatttctgcctcaatgcatatttgacggac -ttgaactagagcctctaaagttaaatggcgacgcaactgttcctaaacttcaattattac -tactctttttttcctagggtattgtagaggccagtggacaaaataaatcaaatttaagat -gtttcggacattaacatcccccgtagcatagaaatcatcagttatccaatctctcatcga -gcttttacaatttctgctggcgctatggacagcatatgccgcgagacctccgcaagactc -acttgatcactgtaagtatcttcattagaggttagagcctatagttaagctgctgaccta -gtaaaattggtattttctaattttattgctcaagttaaaggttagtgaagggataatgac -gttatttttgaacaatgggttgtattcaattttatatcacgaatggaacccttcattccc -ggcataatactagacgacacgaacaagctccgatctatcagccaggcacgtgttaaggtt -taattccggcaaaccaatgaagcatcaaaaggtgacctgatgcaacttagggtcacgatg -agtttttcaggactacttattacctattaataagttaacatgagccttcataccccgtaa -gacaatacatactccaccaattagaattctgagccatcttatctttttgtatcatcgaag -ggtatggccgaataggttaattagttactcctaacgtctctacaggcatgcatttgacgc -accttcgaaaatagtcaatctctcgccacacgcgtctagtatgcagcatcaaaaatatag -tccacggtttccggattaccaaacgcggcaaagagaaacattgtatcgacggagataact -taatacagaaggaaggggcatcttcgaatacggatgaataattctatctgtttattctga -catcttgttttcaggttaatcttacgcattcaaatgacgcctgccccatgcgtgcgcaat -tattttctaatattgacgagagcaatctcactccttttgggtctatttatgttttattga -ggcacaagcctatacagaacaggtactattaaggccgtgagtgtgagactcaaaccgtgg -aaacaaaggatgggttgttcttggtacaagttttagtgcatgtgggcaatccttaccaaa -atcagatgctatccttaactttgggctgcatttaagatggcggttggaggcctgtgagaa -tcctgcgtgtcatctttaatgaccgaattcatccatgtagattcagatcacacactcatt -ccttgatgttgtctaaacaaaagttgttgtggacgcattggagggagttaagtaacaact -tgggatcgcatacttataaaaattatatgttaaactttcacaaacgctgaagtccaaagt -aactagcccaaacgcctcgagagtcactaggtattaatggtgtttgagttcctgtgaaat -agtgttcgaaggtaaaatttatgtaccaaatcgaaagaacacttaataaggcttgcttgc -acggaggtatgatgtttactgactctacaaccctaattttccagtacgtacattcattcc -aataggttagttctcaaagtgctatacaggctcctcaattgatgatatgcttcagccgct -ctatggatattagctcattttatttaggaagcccgcttagaggcttactatgagggaaat -gccaaaatgtcatacttttcggtgtgtcccatatgacaccgctttacatagaatttgaat -taaaacgcgctctcccgttcactaccatacttggtaccgtgcgcatattacatatagata -taggatcattttttaaagctgtactaggtttgatcgacaatcttatgctatactatatga -tgtaaccctcataatcaataccgatcgtacgatcctagcataggtggcaagcgattttat -gccgattattgtgttaaatagtctgtgagtgtgattatcagggctacgttggtagagggg -ttgtatagacctcgcacacattgtgacatacttaacaatatacgaaaactgatataataa -atccccttacccaaacaccaatcccgttgaatcaactaccataacgtctcccatataaat -tgcctacttgtttgcataaatctgaatacataacaccattgcaccttcttgtgttccaat -cccgttaagattgccttgtcagatgatatgcaagaacaatagcatttgctagcaattatt -aacagctcttcgaattgcctccacataacgcgggagggtatattttaatttggcaaatac -taagtactgttggcgtcatatgctattaacggttggatattaagttatgtcagccgtaag -caagagtgggcgaaatattttgttacccagtgagagcactcttagagtttggatacaata -ggccatatgttgacttaagaggacgtaactacgccgtacaccattgttcaaccgacttct -tggcaaatagaatcgtattagcaatcttaagaatagagacacgttcgtgttagggtatac -tacaaatccgaaaatcttaagaggatcacctaaactgaaatttatacatatttcaacgtg -gatagatttaacataattcagccacctccaacctgggagtaattttcagtagatttacta -gatgattagtggcccaacgcacttgactatataagatctggggatcctaacctgacctat -gagacaaaattggaaacgttaacagcccttatgtgtacaaagaaaagtaagttgttgctg -ttcaacagatgatagtcatgacgcgtaacttcactatagtaaattgaaacaaatacgcaa -tttagacagaatggtacggtcatgaatgacagtaattcgaagtgctagaccaacttaaaa -taggtaaacgtgcccgaaaccccccttaacagaaagctgctatcatggtgcagtatcgac -gtgttcagaaacttgtaacttttgagcaggtccgagcacatggaagtatatcacgtgttt -ctgaaccggcttatccctaagatatatccgtcgcaaactttcgatttagtcccacgtaga -gcccaagcgttgtgcgactccacgtgcatgcccagaaatacgagtttaaatttggttaca -tggttaattttgaccgaagcatcgcactttatgattgataattggattcaatatgtcgcc -ctatgcgaatgcaacatgatccacaatttggctataagacgtttaatccgtatcacactt -tgtttgcggctagtatagtaacgcccgtgcaccaagagtcagtaacaattataagtactc -cgcaggtacttcaaatataaaaactaatcaaacacgacccatatgatcatctgaagatat -ttggaactttctcgacaaccaccctcgtactcaatacttacactaatcgacaggcacacg -caacgtgtacagtcgcaccatattgagtcaagatttgcttagtggcgatgagcgtacacg -cttatttctctagtcacaattagttatctacgagacatcacgagggagcaaataagcgat -gttatggctacacataggcacgtatgaatatgatataagccagttaaacagtcgaaccat -cgagcaaattctcatgcaccaacccacacgttgaggcacaaagagtaagctgtttgaatg -taacttcttctgctgagcgggccccaacgtaaggatcaactagaagagaaaactcggtat -tagtttaaatgcgtcacggagcatgagtgcatttcactaagaatgtctgtgtaaccaata -taacatctatttgttatctgattgcctacttatggctttgcggtcgtggcgactaatgtc -tccaatccttttgaggtcggtaccaactccctttaaattacgctgtgcaggctcatgcac -tgcatacatatacggtagcaggtagggacctcacgcacccttattataatcaatagtagt -tatcagtcaacgaggcaggaatgctgaggtcgaggtgttggtatattttctatgtgccgt -ctaggcgactatcacgcattaccaggcgagatttaagccaattttgaatatagtcaacgt -aatttttactatgggttccaccgaaacgccttgcacaactaagaatcccataaaatatcg -atatcaaataaaagattgtgtcaataccttcatatatattttttcggttgactaacgtga -actaaggttaggggttttgtatgtctatataggaaacagtttcttttctgtcctacttta -gtaaagtcttcaagccttactccaaaatcacggtgattaagccgttactcagcagcatga -ttctgcctgctcgggtcctaaaatccagccttgtaagagtcgctgtgtattagctaggga -gacctttgttaaaaaggatatatcgcggcgggatgtgagtgcgtggcgcatactcaatct -tcagctcgtgtcattataatatctctcccccacgcttttcactagatatgccgtgtaagc -aaacaccttatgcttaatttcgaaaatattggtacttgaaaaaagctgtaggggtactta -atgtctggtaggagatcaggagagaattgagtgtaaaaccgtaaagccctcacctgactt -catgtaaatggcttagaagactccatgatttaataaatactacgaaggaaagactggatc -taaagataactctagtaaggccaactcccttcaatgctgttgccagttataatccaagag -ctgtccttttctgaaccatagcggcttctgaagcgaactagaagcaaagttggttctagc -cagacagccacataccctgtacgggtgtattactaaaactggtccggtattagttcacca -agggaggaattaggcaaaggatctaggtatgcaagtcggagtattacatccctaccctga -atccatcaataggttcctctgtactggccttcgcaatgagtattcaaggttgtacagccg -tataataataagatagtgactatgaacgggaagtaacccgctcaccttccccaaaacatt -gttatatctaagtattaaagtctgccgtagtgttaatactcgaaaataaacaactggcaa -attacaccgcacttaagccgcttttgatttatatttttccaatgcgcttttaaaaataat -tcagtcctacatactaattaagacccttaaacggagatatcacaagttaagttttaacca -tctcgactaggtggaactatagatacccaactcaatttatcattacctgtaatgttccta -gaaggattgcatttcatgtcaagacggtggagtttcacagcgaaacttcagtgtgaacag -attctgagaaatcacctaaacctattagtcagagcacccggttagaaccagttgtcaaaa -aatagagcggttgcatgagacagaagtaacgatgagatccgttgtaacgttgagacatct -ggcctatcgtcaatacagtcctcccttaaaaatatttttaaatactaggcaaacccaaca -taggttagtcctatgtgatacgccacatggtatatcattttgtaacgttacctagggata -atcaggaagtggaattacgcaaaagtagacagtgaaatgcttagggttatagtctagtcc -aaagataaaggataaagcacgtcagagaactatattagccgaatgggaatcattgttagg -agactgtggatcatgtctaaaaagcaacgcagaaacagtcatcgaaaaaatctcgttttt -gtttgaatctaaaagagctttgatgaccgatagtacctgtatactagttactgtattacg -tgtctaatgatttcggattggggtccccagaatcagacgtcattgtagacgattcaagtt -taccaatttaatttcccagctctccttggagaactatcgccaataattgcagtcactttc -cttttctgaaacgataaagccgtcagagttctctgcaacgttggacttacctgaggttct -aacccactttcggttctaatagtagttaacgacacaacgaataacctttactgtggggct -ttcacgatattttttcgcttattattaatggttacgtcataagctggtgtccaaattaag -gttaccggcttcgcagagtagttgtatccaagtataacttccctaatcataagatcgagg -tagaaaattaatgctgtctctaaccgaacagatatgtcccactatgtggtatggacgttg -ctaattacttctgaagggaaattggtcattatggatacgtgtctaccatcaggtcggacg -cagatatggttctgtcttcagttgatccaccgttctttataggataataactgacgatta -aagattatggtaaatagattaagccaattctcttcttgtcagtgaagcatccttaactga -cttgctctgcagcccctcatacatttagctattcaaagtaccggctcgtttcaaactctc -ccacctttggaagaggttgtcaacttgataagtatatcatttacagcattttttcggacg -tacctctaatgtttcattgcagaaaattagttttttctatcgcacattttgcaagtaacg -ttagagacacaattatctgcgaatgaactgctagatctgacgaccgggagcctcgcaaat -atcaaaaaagactgacatatatcaaggagtcgttgacaagtgctggtaagtcaattggtt -tatctgtcccggcgtttcgatcttaagctgaccatgcacggcagagtaatgtcactctcg -ttcttacaagtctgtctccaagggtcggcaaaaaagacccctccattctcgagcccactc -acgatatgtagggacgacaacttgtgcggcttatgaattgtctggactgcgggcgagggt -ccatatctccgaagttagaagggacatacctttagatgataagatcaattcttattgacg -aaattcatccacaacggggaacaacttcaccctagacttacgtctgaaaagacacctagc -gtcttataaaaggtcagtgccccgtttcgtaaggctggaattacctacgcaaacttaaac -ctcgcgcccttccttacgtatcgacaagatagaggctatcgcgaatgtactacggaggca -tgaatcatatactagaaccaagtgcctgtgatattaacaagatgatccgacgcgagcacc -gtaattctaggcataaaactccagcaatttgggggccgaaaacaaatgacgttagctaat -taattatatgacatgatcaaaggaggtcaatcacgcatcgagttcgacgtatattcattg -aacttcgtgcgtttgaaagaaacttttatgaaggcaaaattgatcctgtctcctatttca -tgcgtacctcctagttgataattccccgagcagtggttaggacacttttgtcggtatcaa -gttccggtctcaaaacgtaaaattctgtaatctgtatggatggtctgtgaattagttaat -ttttatgaagtcgtcgagacgcagttcctattgatttattctaaacggagatgtgcttcg -tgggactcggaagtagatctgtgtttatgattattgctactttagatgctgactgttaac -tccgtgttgtttttcaaccgtatatcacaaccgaattggatagaacctatagtttcaagt -tctgccacaaggtatcatatttacagttagtgctggttgcttctttcaaacgtggtgagt -ttgtgctatcacgtcaacggtagagctcagtggaccgagtgcgcgttcaaccctgttcca -gagagggtgtgatagcacatataccacgctcgtcgaggcgttcatgatagtttgcaagag -ccggtgttaaacacatattattattgttatccaactaatcggacctatgcataaagcatt -gtctaaacagaataattgcctatatacggtagttttagtgatttatatcttagtatcagt -tagagcttcgaactcttcaggttcctcatatttaacgttcttcgaaagcgaaaacttcta -caaacgaatgtaagcggttttccaagtagtacctataaatcacagaaagatctgtctcag -tatagttgaaatggtattcagctagtgacgtgtaccaattatcatagttcactcaagcaa -gacgctcattaacgaatatagacaagacactatatcatataataaaaaagaacatggtgc -tcgaacatagttgaattcaccatattgaaggggaatgctgacatgtaattcgctactaga -cgatcaattccctacttgtcaaagttgaactggtacgttcttggaattaaatatgattgc -gctggaccaaattgcgacttcttgagtttcagggcaaacgattgagccggaggatgtccg -tctcttacctttcttgcttatgataaacgacggtccctgtacatcactgggaattctcag -caaaaataattgggtaaatcgagactcgatgtattcggccacaaaggtgttagacgttaa -agattattcaacggggcgataataggatcataaccggtatgcaagcgcattgaaagagcc -atgagatccttatccgataaacgctgcacggtatgtgcagccttattgtcgatcacgaat -ttataaatgtagtctgggctgtaagttgaagacctaagttataatgaagtgcaataccaa -atcgattcatagtggattatcagactcaagatatctcctgataaattacagttgttaaga -tacggataaaatgagatttaagattagcagcctctaatctgtttcaatcccgttggaatg -tggtatgcgatcaaggttaagttaaaatcaagcctgtcttcagtcttgattcttgttctg -ccatcgcatgcggtctacgtgagttaatatgtagcttacgttctagcttgtgctaatctg -agtatagattcgtagaggaatattatcaagcttccacgcctcaacgtacgtgtattggtc -acacaagacactaaaagtggaagtagcgtaaactatagtctagttgttaaatgctcagtt -cttgttatattcgatatactcttggctaatttatgtctgagtatataaaattaatgatat -taacttgcatttcacggatcccttagaaaaagattttgaccgagcgcattataaacggtt -acaccgaatcaatagaagcatacccaatagctttctttgaatttattgcctgcgcaactt -ggctgactctctagatccgaataattctatatggtcgtgacgaaactagttcattactgt -ttaaaatgccaacatgtcttttgggccgataatggctctttgcaaaattactcaatgata -cgattgatcaaagcggtagttgctagtggtagcatgtaagtctatcaaatgtctgattat -ccgaaaatcttccaaaagagtccacgtaccatatctatctcatagcgacgcgaggggaac -cttatctaactatcattccatttaccgggtgactctcgatgcaggatccgattgggataa -attgcccagaaatggctcattcctgactaagggtaaggccgttctcagcaagggaacccc -gcgaatctaggcttataccatctagattgttaactacttgcctgtagttctacagccata -ctggacagttgtttctaaatgatcgggattcatgctagcactcctctgaatgcaccgcgt -aagtttaactattacgtccgtgggcagataaggatggaggctgtatgtatcttaactgtt -acctaatatggctggtaattatcaaagtaaggaccttaatgccatagcgctagcaatcgc -tttgtatactgaccatgtgccaacctctcttaatctgtaaaatataatgtcttagctaac -tgtggacgatcatgtctctgcctagagcttcgctgtatcaattcctatagccagcgtact -agtgacacaacaacaccgtgtgagaaaagatattagtccttacgtctgtctctctacagc -ttattgatgaggattgaacatggacatatagctccccctcaaaagcagatgctacctctt -tattccattctcgaacatttgccgaacttaatttcgacaaacctgaggtcacgtcttaat -ttatcggtaacgtcacgtccctttgagactggataaatatattaccaggggccaacgagc -aattgttggaggcgcttctataatacaaggtgtcttgtcaaagaaagacggcgtgcgtct -cgtgcaactcacttaaccaatattaatgtgaaacccccctctctcacatcttatgcggtg -tactgccctggtacatttcctgtacaggactccaacagtgtagattcctaagatagctgt -tggagttgcctcacgccagatcgaaaaactgaataaactagtgagctgagctgcagaaat -accgcttaattacttatgactagttcaaagggacctacgtgatgtcagacattgcaagga -agaaattaggtttgtgcgtcattttggctggactagcactccttacttcccctactattc -aaatgtcgtaaacagcatgagacaggatcgtgctgacatttaaggtctattgggaacgag -gctacctttggtcgcgcgctcgcgttctccgaatgaccgaaatgcatgagcacagtatgc -aattgcttatagatctaaggtctggtcgttgaaaccaagcacgtaggcctgggaaatcag -ttcttcctcagcaactacacaaaagcgtccaagcattagtacttgtagtaaatgtccgaa -cctatgcgctcatttgaaagtcaaaaaatatttttaagcagtaggcacctaacccgattc -ctctacttagtagctttctttgattctcagaattgactgcaatatcactgcacaattctg -tgccattactagacttctctgtattaacgtctcatcttactaacactcgcctaggacaca -tctgagagtgaagtatttcaatacatttactgaaatcttcagttctaaaatccccgaata -aggctcttatcggtttggccaacacaagaaaaaaacttcttgcaccactcaccttcatac -gcaggagcctggggaacttagtaataactatttcggcagacaaagcttataacaagttgc -cggcgcgtataatatttaaaagaccccttgagctgctcaattaaaacgctcacctggtat -aggctattagatagtgccgtcttagtaaggggcgggaattatcggataaactgatatttt -gataaaataaccgacttgttcacgacataagtcactaaggagattttatctttctccaaa -gtatatcttccttggataatttcaaagcgctgcaatttaagttctgttactagtttatgc -tgctgggaggtgaccggaaggcgtagtaatctagaggcaaattataagaagttcatcata -tcattttcgactacaaaaacaaggtgttgtatgccggcgcattgtgtaaactggacgagt -accctagatggaaaattatacgttaagccaagatttcgatgtaatgataattacctacac -atttttgctatccataggaacaagagctgttctataggctcgtggcatacgaacatttgc -tgccgctatgaatattggaagctcttcaactacagactctattcttaattgccgtcgaaa -atgggccgaatcggctattattaatactcggtttttccgaggggattgttgtcgacagtc -gtaattattattaatattgatgttggtgaggtcatttaaatacaaccttgcagacaatga -ataagggatccaatctctcatactccttttacaattgctcatgcccctatgcaaacctta -tgccgccacacctccgcaactctctcttctgaactgtaagtagcttcattactggtttga -gactatactgaagctgatgacattctaaaatggctattttcgaatgtgattcataatgtt -tatcgtttgggatggcagaatcacgttatttttgatatagcccgggtattctattgtata -gaacgtatgctacaagtcattccccgaagaagactagaagtaaacaacatgcgaccatcg -ttaagccacgcaaggctgtagctttatttcccgataacctatcttccataaatagcggac -agcaggatactgacgctcaacatcagtggttatggtctaatttttaacttttaataaggt -aacttcagcaggcatacacagtaactctttaatttataatcaaattagaagtctgacact -tcttatatttttctatcatccaacgcgatcgcccattagcttattgtgttactaataacg -tatctaaaccaatccttttcaagctactgcctatattgtcaatatatacaaacaacagga -tagtaggctgcttaaaaaatattgtcaaccgtgtacgctttacaatacccggaaatcaca -aactttgtagacaacgagtgaaatttatacactacgaagggccagcgtacaagacccatg -aattaggcgatatgtttattctgacatattggtttatccttaatctgtcgctgtaaaatg -aagccgcccccatccctgcgaattttttttcgaagattcacgactgaaatataaatacgt -ttggctatatttatgttggagggaggcaatagcctttactgttaaccgaagatttagcca -gtgagtgtgacactaaaacactggaataaatgcaggcgttcttctgggtaaaaggtttag -tcaatctcgcctataagttcatatagctctggatataattatctggcccatgcatttatc -atggcgcttggtgccctgtgtgaagccggcctctcatattgaaggtccgaagtattccat -gtacattaagatcactctctcattcatgcatcttggcttaacaaatctggttgtccaagc -tttccaggcacgtatggtacaaattcggatcgaatacttataaaaatgatatgttaaact -gtctaaaacgctcatctacaaagtaaagtgcactaaccaatagagtctcaagaccgtgta -atgctggtgcactgaatgtgtaatacggttagaagggattagttatgttacaaatccatt -gaaaacttaagaagcattgcgtgctcggagggtgcatcttttatcaagagactaacatta -ttttcaacgacgtacatgctttacaatagggtacttatcaaacgccgagaaacgcgccta -tagtgatgttatgattatgacccgatatccattggaccgaattttatgtaggttcccagc -gtactcgcgtaatatctcggtattgccataatgtaatacttgtcggtctctcccagatga -aaaagcgttacagagtatttcaatgaaaaacagcgcgcaacgtcaatacctttaggggta -acggccgctgatttcatatagatatacgataagttggtatagctctactaggtggcatcc -acaatcgttgcatttactatagctggttacaatcataatctataccgttccttacatact -accatagcgggatagcgtttttttgccgttgattgggtttaagaggatgtcagtctcatt -atatccgattcggtgggagagccgttgttttcaaatcgcacactttgtgacataatgtac -aagataacaaaactgatataagatataaactgtcaatatcaccttgacacttgaatcaaa -gtaaattaactcgcaaatataatttgactaattgggtgcagatttctcaattaataaaaa -aatggcaccggatgggcttacaagccccttatcattcacttgtatcatgatttccaagaa -caatagaatttgctagcaagtatgaacagagattcgaattgcatccacagtacgccggag -cgtttattttaatgtggatatgacgatgtactgttggcggcatttgctagtaaccggtcc -ttatttacgtagcgcacacgtaagcatgtctgggagaaatatggtggtacaatctcagag -aaagattacagtttggtttaaataggacttatcgggtcggaagtggaacttaataagcag -tacacaattgggcaacagacgtcttgcctattacaataggattacaatgcgttagatttc -agacacgttcgtgtttggctattcgtcaattccctaaatagttagacgatcaactattat -caaagtgattctttgttcatcctccattcatgtaacagatggcacactacgcataacgcc -gaggaattttaacgagatttaagagagcagttcgggcacaacccacttgactttataaca -gctcggcagcataaacggtaatatgtgacaaatttccaaacgttataagaacgtatgtgt -acttagaaaactaagtggttcatgttcaacagatgtgacgcagcaagcctaacttatcta -ttggttttgctataaaagaacaaagttacacagaatcctaagggcttgtttcacacttat -gcctagtgcttcaccatcttaaaatagcgaaaccggcacgaatcaaaccttaaaacaatg -cgcagatattggtgatggtgactccgggtatgataatggtaactgttgaccagcgcccac -ctcatcgaagtatagaaagtggttaggataaggatgagaccgaacttatttccggccata -actttagattttctacctagtacacaacatcagggcggacacgaaaccgccatcacatca -tataccaggtttaatttgcttaatgggggaagtgtcaacgaaccttcgaactttagcagg -catatggccattatatatggccccagagcagaatgctacagcagacaaaatttggattta -tgtagtttaatacctatcaaacttggtgtgaccatacttgtctaacgacagtgcacaaag -tgtaagttacaattattactactcagcagcttctgcaatgataaaatcttatcatacacg -tcacatatgataatatctacttagggggaacgggctccacaacctacatagtactcaata -cttacactattcgacaggcacaccaaacctgtacagtcccaaaagattgagtcaactttg -cagtactgcagatcacagtaatagcttagttagcgagtcaaaattagttttctacgagac -tgcacgaccgtgcaaatttccgatgtgttggctacaaatagcaacgtatgaatttgtttg -aagccacgtaaactgtacaaccttagagataagtctcaggctactaaaaacacgttgtgg -cactaacaggatcatggttgattcttacttattcggctgaccggcccaataagtaacctt -caactagaacagaataatcgggagtagtttaattcagtcaaggtgcaggtctcattgtaa -ctaacaagctctgtgtaaccaagttaaaatcgttttcttagcggattccctacttatgga -tttgagctcgtccacaatattcgatacaagaagtttgtggtccgtaacaacgaaatttta -attacgctgtgcagcctcatccaaggaattaatagaaggttgatggtaggctccgaacgc -tccatgattataatcaagtggactgtgcagtaaacgaggaaggtatcctgacgtcgtggt -gttcgtttttgttatttgtgccctatacgagtagataaaccatgaacagcacagtgtgaa -cccatggttgattttaggctaccttatttttaatttccgttacacagaaacgaattccac -aactaacatgccattaatttttcgatatcttataaaagatggtcgaaattcattcattta -ttttttttcggttctcgaaagtcaactaagctgtcgcgttttgtttctctttagaggtaa -aagtggctttgatctcctacgtttggatactagtcaaccattactccatttgatccgtga -gtatcacctgtctaacatccagcattatgactcctcggcgaagaaaagacacacttctta -gagtcgatgtgtattagctagggacacagttgtttaatacgatagtgagcccagggaggg -cagtgcgtcccccagtagatttattcagctagtgtaagtataagatatctcacccacgag -gttcaagtgatatgcagtcttagaataatacttatcctgaatttcgatattatgggtact -tcaataatccgctagcgctactttatgtctcgttggacagcaggacacatggcagtctta -aacactaaagacatcacctgaatgaatgtaatgggattacaagaatcaatgaggtattat -atacgacgtaggaaactctggatatatacagtaatctagttacgccatcgcacttcattc -ctctggaaacttagaagacatcagctgtacgtggaggaaccagacccccgtatgtagcca -aatagaaccaaagttgcttatacaaacacacccaatgacaatggaccgctggagttcgta -aactcggaacgtagtactgcacaaacccagcatttagcaataggagctacgtatgcaact -cccacgtggtaataccttcaagctatcaatatataggtgcctagctaatcgcattcgcaa -gcagtattcaagcttgtaaaccagtataataattacagaggctctatgaaacccaacttt -ccagctaaaagtcccaattaaatggttatttcgtacttttaaagtcgcccgttctgttat -tacgcgaattgattctactccaaaattaaacacaaattatcaaccgtttcatttatattt -gtcaatgcagctgtttaaaataaggctctactaaattataattaagacacttattaccag -atttctctagttaagtttgaaccagctcgactaccgcgaaagatacattcccttctctat -ttttcagttcatctatgggtcagagaagcattgaatttattctattcaccctcgtcgttc -acagcgaatcgtcagtgtgatcagtgtatgagaaatatcctaaaccgtttagtcagacca -cacgcttagaacaagtggtctaaaaagactgccctggaaggagtaagaagtatacagctg -atccggtgtatccttcagtcatctgccctatactaattacacgacgcaaggaaaaatagg -tttattttctaggcaaacccttcataggtgactccgatgtgttacgaatcatgcttgaga -atgtgctatcgttaccgacggataataacgatctccaatgaaccaaatgtagaatgtcta -ttgattacccttttactattcgacttagagataggagatagaacctcagtgtactttttt -agccgaatgggaatctttgggaggtgaatggccataaggtcgtaaatccaaccctcttaa -agtcttccatattatatcgttgttcgtggaatcgataacagatttgttgacccatagtaa -atgtatactagtttatgttgtaagtgtagattgttttccgattgccgtccaaactttatg -tcgtaattgtagaccagtaaagttgaccaaggtaagtgcccagcgatcctgcgagatcga -tcgccaatttttccagtcactgtaagtgtaggtttagataaagccgtatgagttatatca -taagggcctcggaaagcagcttcgaaccaaagttcccttataatagtagtttaactataa -aagtatatactggtctgtcgccctttcacgatttgttttaccggtttatgaagcgttacg -tcattagagcggctccaatttaaggttaacggcttccatgtgtagttgtatacaaggata -acttaaagtatctgttcagcgagctagttaagttatcctcgatagaacacaactcagagg -tcccaagatcgggtttgcaacttgctaatttattctcaaggcaaattgggaattatcgat -acctgtataccataaggtcgctcgatgtgatgcttatgtcttctggtgatcctaccttag -ttagtgctgattaacggaacattaatgtttatcgttttgagatttagccaattctctgat -tctaactcaagatgccttatctgacgtgctatgcagcccctaagtattttacattgtaat -aggacacgctcctttaaaactcgccaaaaggtcgttgtggttctctactggttaactata -taatttacagctttgttgagctagttcctctttggtttaagtcctcaatattagttggtt -cgagcgataagttggctagttaccttagtcactatattagatccgaatgttatgcttcat -ctgaagaccgccaccctccaaaatttcttttaagactcacttattgcaaggtgtaggtga -attcggctcgtttctcaagtggtgtatctgtacacgagtttccatattttcatcaacagc -caccgcacacttatgtcactctaggtattaaaagtcgctctacaaggggacgcaattaag -aaacagacatgctagtcaaaaataaacatagcgaggcaccactaattcggccgcttatca -atgggatgctctgcgcgagacgcgccagagctcagtagttagttcggacatacatttact -tcagatgatcaattagttttctacaaatgcttactctaccccgaaaaaagtcaccagact -cttacgtctctttagtatccttccgtcttatataaggtcagtcccccgtttcggtaccct -ggaatttactaagaataatgaaacagcccccaaggacgtacgtttacaaatgatagacca -gatcgcctagcttattccgacgcatgttgcatagaattgaaccaacggaatgtgagagta -actagatgagccgaccacagcacccgtttgcgtcgcagaatacgcctgatagttcggcca -cgaaatcatatgtcctttgagtattaagtatttgtaatgatcaatcgagctcaagcaagc -ttacacttcctcggatattcagggaacttagtgcctttgaaagatacgttgatcaacgaa -aaattgataatggctcatatggaatgcctacctcatagtgctgaattaacacagcactgc -ggacctaacttttcgaggtttcaagttcacgtctcaaaacctaataggctggaatatgta -gggatcctcggtgaatttgtgattgggtttgttgtagtactgaccaagtgaatattcttt -ttttctaaaagcagatctgctgccgggcactacgaaggagatctctgtgtatcattattg -cttcttgacatgatgactcttaaatcactgtgggtgtgcaaaacgatagcacaacccaat -tcgatagtacatattgttgatacttcgcactaaaccgttcatatttaaaggttgtgctcc -ttccttcgttaaatactggtgacttggtcctatctactattagctagacctctggggaac -cacgcccccgtaaaacctgtgcaagagagggggtcatacatcttagacatcgcgcctcca -ccagggaagcattgggtgattgaccaggtgtgtaacaaatatgattattcttatactaat -attagcaaagatgcataatgatttgtattaaatgtataattgaattgataagggtctttt -agtcagtgatagagtagtataaggtagacattagaactcttaaccggacgcagatttttc -ggtcttagtaagccaattagtcgacaaaacaaggtaagagcggttactagtagtacctat -aatgcactgaatcttcggtcgaagtatagttctaatgctatgcagattgtgacggcgaca -aatgttcagacttatatcatgaaacaagctcttgtaagtattgacaaatgaaaagattga -atatttttaaatacaaaatgcgcctacttattaggggaattaaccagattgaaggccaat -cctcacatgtaatgagataatagacgataaatgaaattcttgtaatagttgaactgctac -gtgatgggtattatatatgattgagatcctccaattgccgacgtcttgtcttgatgccca -aaagattgtcaacgaggagctccctcgcgtacctgtcgtccgtatcataaacgacgcgac -atgtacagcactccgaagtataagcaataataatgcgggtaatccagactagatcttttc -ggactcaatgcggtttcacggtaaacatgattaataccggagagtagtcgagcttatcag -cgatgcaagcgaattcattgtgccaggagatacgttgcagataaaaccggcaacgtatgt -caacaagttttggcgatctcgttgtttgtattcgacgaggcgcgggaacttcaagaacta -tcgtatattcaagtccattaccttttagtttcagactggtggagctgactaaagttatat -catcattttgtacactggtttagttaacgataatttcagatttaacatgaccagacgata -atcgctgtatatccagttggaatgtggtttgccagaaaggttaacttataatcaagcctc -tcttcagtcttgattcgtcgtatcccatccattgcgctatacctcagtgtatttggagct -gtagttataccgtgtgctaagatcagtagacatgacgagagcaatattatctaccttaca -agcatcaacggacgtctagtcggaacaaaagactctaaaactcgaacttcaggttaatat -actatagttctgtattcagcagttattcttatattcgatattatcttgcctattggatgt -ctgactttagtatattaatcatagtatctgccatgtaaaggtgccagtactaaatctgtt -tcacagtgcgaattataaacggttacaaccattaaagacaacaagaccctatagctttat -ttgaattttgtcaatgcgcaacttggagctcgcgatacatcccaattagtctatagggtc -gggacgattctacggcatttctggttataatgacaacatggattgtggcccgagaatcgc -tctttcattaattaagcaatcattacagtcttataagcgctacttccgagtggtagcagg -taactcgatataaggtcgcatgagccgaatagcttaaaaaacaggccaccgaacattgat -agagaataccgaccacagcgcaacctttgattactttcattaaattgtacggctcactcg -acatcaagcttaagattgcgataatgtgaactcaaatggatcagtactgaagaaccgtaa -cccacttcgcagaaagcgtacccagagaagatacgctgttacaatatacagggtgaaatt -attgcctgttcttcgtaaccatttcgccaaacttggttagaaatgatagccattcatgat -agaaataagctgaatgataccagtatctttaactatgtagtcagggggaagataacgatg -gtccatgtatgtttctgatatgtgacagtattggccgcgtaatttgctaacgaagctact -taatgcctttgagcttcatatagatttctttaatcaaaatcggcaaaaagatagtatgag -ctataatatatgctagtagagaactctggaccatcatctatatgaatactgattcgagcg -tgcaattactttagcctgcgtactactgactctacaaaacactctgagataagtttgtag -tcagtaagtcgctctctataaaccttttggatgaccattgtacagccacttatagatccc -aataaatagcacaggagacagagtttttcaatgctcgatcatttgccgatagtattttcg -tctaacctcagggcacctattatttgatacctaacctaacggccctttcacaatggagaa -atatatgacatcgggacaaacacaaatggtgggtggccaggagatatgacatggtggcgt -ctctaagaaacacggactccctctaggcaaactcacgtaaccaattttaatgtcaaacaa -aacgctcgaaaagattttgccgtgtaatgacctggtacattgactggtcaggaatacatc -actgtagttgccgtagtgtcctgttggtgttccatcaagacacatcgtataacgcaattt -acgacggacatcagatcaagttatacagattatttaagtatcacgtgtgcattgggacat -aagggatctcacacatgccttggaacatttttgctttgtgccgctttttcgctgcactac -caatccttacttaccagtatattcaaaggtcgttaacagaatgagaaaggttagggctct -aagttatcgtcgattgggatagacgagacatttgcgagcgccctccacggatacgaatct -cccatatcaatgtgaactggatgctatgcagtttagttcttacgtctcctagtggtaaaa -atcaaagtagcactcgcatagcagttattcagaacctaatacacaaaaccgtcaaacatt -ttctaattctaggtatgggccgatcataggagctaaggtgaaactcataaatgttttgtt -agatctagcatcctaaaaagatgcatatactgagtagctggcgtgcattctctcaattgt -atcctttttaactgaactagtcggtcccatttcgtgactgagatctattaaccgataaga -ttaataacactcgcattcgtatcagctcagagtgaagtttttcaataatttgactgatat -attaacttctaaaataaccctttaagcctcggatccgtttcccaatcacatcaaaaattc -ttattccaactatctacggattaacaacgtgcatggggatcgtagtaagaacttgttccg -atcactttgagtatatcaagttgacggcccggttattattgaatagaaacattcacctgc -taaattaaataccgcacatcggatacccgatttcagagggccgtcttactaagggcaggc -tttgttcggtttaactgagatgttcattattttacagtatgcttcaactaatatgtaacg -aaggacagtggatctgtctccatagtagatcttcagtcgtgaatttcataccgctcctat -ttaagttcgcgttcgagttgttgatcatggcacgtgaaagcaacccctagtattctagac -gaaaattttttctagttcatctgataatttgccaattcaaaaacaaccgctggtttcccg -gcgcattctctaaaatggaagtcgaacctagagccattatttgtcggtaacccatgagtt -ccttcttttcagaagttaatacactgtggtcctatacagaggaaaaacagcggttatata -cgatcgtggcataacaacattggatcaagatagcaatttggctacctattctaattctca -ctagattcggtattccactacaatatcggcagattaggattggatgaataatcggtgttt -aagtccggttgcgtctccaatctcctaatttttattaatattgatcttggtgacctattg -taaataaaaacttcaagactttgaataacggtgaaaagatagaagactcatttgaaaatg -gatcatccacagatccaaacattagcaagacactaatccccaactagctattctgatcgc -gatcgtgctgcagtactcctgtcacaatagtctgttcatgatctaattctttttgggctt -tgttcgatggtgattcagaatctttatccggtcgcttccctgtagctactttgtggggat -attgcccggggattatagggttgagatcgtttcctaaaagtatttaaaccaagtagactt -caactaaactacatcagaacatcgtgaagacaccatacgcggtacctttatttaccgata -acatttcttcaagaaataccggtaagcagcataatgaccctaaacagctcggggtatcgt -cgtagttttaaattttatttaggttactgctcaaggaataaaaactaactatttaattta -taataatattacaaggctcacactgattagatttgtctataagacttcgcgatcccccat -taccggattgtcttaagaataaactagataaaccatgcattttctagataaggcctttag -tctaattagatacaaaaaacacgatagttgcatccttaatttattgtgtcaaacctggaa -ccttttaattacccgcaaatcactttatgtcgagactacctctgaaatttattatctacc -taccgcatgaggacttgaaccatcttgtaggagttatgtttattagctaagattcgttta -tcctgtagcggtccatgtatattcaacaagcaaaaagcactcagaattgtttttagttga -gtcaagactgatatataaataagtttccctagttttttcgtggtgggacgatattgaatt -gaatcttaaccgaagagtttcccactctgtcgcacaataatacacgccaatatttccagc -cctgcttatgccttaatcggttactcaatctcccattgaagttcattttgatctgcatag -aagtttcgggcccagccttttttctgccaccttcctccaagctctgtagacgcactctaa -gattgatgctcacatgtattaattctacattaacataaatatataagtcatgcatcttcg -agtaaaatatctggttctccaacatgtcctggcacgtatcgttataatgcccatacatgt -agtattaaaatgattgggttaactggatattaagatcatcgaaattgtaaagtcaaatta -acaatactgtctcaagaccgtgtattcctcgtgctcggaagggctattacgcttacttcc -gttttggtatcttaatatgactttcaaaaattaagttgcagtgagtcctacctgcgtgca -tcggttagcaagagtataaaagttgtttaaacgaactacttgctttacaataccggtcgt -atatatcgccgtgaatccagaagattgtcttctttggattatcaaccgagatcctgtgga -ccgatgttttgggaccttcacagaggactccaggtagagctcgcttttgcattaatctaa -gaattgtacctctctaaaagatctaaaacagtgaatgtgtatttcatggaaaaacacaga -gaaacgtaaattactttaggccgaaaggcacatgagttattatacatatacgagatggtg -gtatacatcgaattcggggcatacactatagttgcattgtatttagctgctttaaataat -atgatattaccttccttacataagacattaccggcataccctggttttcaacttgtgggg -ctttttgacgatcgcactctcatttgatccgagtagggcggtgacccctgcttttcaaat -acaaaaatttcgctatgaaggtaatagattacttttcgctgttatgatagaaacggtaaa -tttaaaattgaaacttctagaaaagtaaagtaacgagaaatgattttgtgaataatgcgg -tcatgattgcgcaagtaagaaaaaaaggcaaaaggatgcgcggaatagaaacttatcagt -cacgggtatcttgatttcattcttcttgtcaattgccgacataggatgaaatcagattcc -aatgcaatacacagtaacccccacccttgattgtaatgtcgatttgaagttgtacgcgtc -gacgaagtggatagtatacgggccttttgtacggtgcgatcaactatgaatctcggcgag -ttagatggtcgtacaatctcacacatagaggtcacttgcctgtaatgacgaattttcggc -taggtactcgaactttattagaagtaaaaatgtgggcaaaagaaggattccattttacaa -gacgattacaatgagttacatgtctctcaacgtagtctttccctagtagtctttgaacta -tttaggtactccagaaaattttagcaaagggtttctgtgtgaatccgccattcatgttta -tgatggaacaataagaataacgccctcgtatgttatcgacagtgaagtcagcagttcggc -caaaaacatattcaatttagtacagatccccagaagttaagctaagtgctctaaaatggc -ctaaacggttatcaaagtaggtctaattactatactaacgggtgcatcgtaataactgct -gtcgatgcaacactatatgatagtgtcgttttgctatatatgtacaatgtgacaaagaag -ccttagcgattcttgcaaacttaggacttcggattctcaatcttaaatgtccgaaaacgc -aaagattcaaaaatttaatctatgagcagatatgcctgatggtgactacgcgtatgttaa -ggctaaatgttgacaaccgcacacataatcgaactattgatagtcgggagcataaccagg -tgaacgtactttgttcacgacatttattgacatgttctaaatacgtctcaaaatcacggc -gcactagaaaacgcaatcaaatcattgtcctggtttaagggccgtaatgccggtagtgtc -aaacttcatgagaactttagctggcttttggccagtatttagggaccaagagcactagcc -ttaagctgaatattttgccatttatctactgttataactttaaaacttggtggcaccaga -cttgtcgatacacacgcatcaatctgtaacgtaaaaggtttactaagaacaagcgtagga -attgagtttatattatatttaaactaaaagatgatattagcttctgagggcgatagggct -ccaaatcataaagaggaatatattattacacgattagaaacccacaacatacctcgaatc -gcccaaaagtttgacgaaacttggcagtactccacatctcagtaatacagttgggagagt -ctcaaatgttgttttattactcaatgaaccaccctcataatttcactgctgttccattaa -atttgcaaacgatcatttgctttgaagaaacgtaaaatcgacaaaattacagataagtag -atgcataataaaaaaaactgctcgctataacacgatcatcgtgcattcttacttaggagc -atcacccgcacaataacgtaccttaaactacaacactattagaccgagtactgtaattca -cgaaagctcaagctcgcattgtaaagaacttgctctctcgtaaaatgtgataatagtttg -cggagaggattcaattattttccattgcacctactccactagattcgataaaagaaggtg -gtcctcccttaaaaagaaatgttaagtaacatcggaaccataagcaaagcatgtaagtga -accgtcatccttccctaagaaacataaaggtttttaataatgtcgactgtgaactataac -tgcatcctttcctgacctactccggttccttgttgttatttctgaacgagaccagtagat -aaacaatgtaaaccacagtgggtaccaatggtgcatgtgacgctaccgttgttttaagtg -cccgtacaaacataagaagtcataatcttacttgaaattaattttgccttttattttttt -tcaggctcgaaattaatgatttgttttttttgaccttctagttacgctaatatgcggtcg -cctgtggtttctattgagtcctataacgggatgggatctaatacgtttggttactagtaa -acaaggtataaatttgataccggagtatcaactgtataacatcaagctttatgactcata -cgcgaagtaatgacacaaggctttcaggagatcgcgagtacagagccactaaggggtgta -ttacgatagtgacaccaccgagcgcactcactccccaagtagatttatgatcctacgcta -agtattagatatataaccaaagaggttctagtcagtgcaactcttagaataataattagc -cggttttgcctttttaggcctaatgcaatattcagctagcccttatgtatctcgcgttcc -acagcaccactcatggcacgcgtttaaactaatcaaatataatctatgaatgttatgcca -gtacttgaataaatcaggttttttataagtccttgcatactctcgttatatactgttaga -gtcttaccccatagaaattctttcatctgcaaacttagaagaattctcagctacggggag -cataaagtccccaggatgttgacaaatacaacaaatgtggcttatacaaacactccatat -gaaaatcgaaccctcgtggtagttttagccgaaccttgtacggataaatccctccatttt -ccaatagcagatacctatcctactacctcgtggtattaaattaaagcttgaaatatagag -ctgcatagcttatccaattcccaagcacgagtctaccgtcgtaaccacgatttgatttac -agacgctagagcaaacccatctttaaacatataagtaaaaattaaagggtgagtgcgtac -gtgtttactagcaacttcgcttattaagacaattgtttataagccataattaaaaacata -tgttcaacaggttcattgatatttgtaattgcacaggtttttaataaggatctacgtaag -tataatgaacaaactttttaccagagttatattctgtactttgaaaatgctcctctaccg -ccttagagactttcaattagattttttgcagttaatctatgcgtaagtgaaccatgcaag -ggatgcgattcaaccgcctcgtgctaaccctatcgtctgtctcataactgtaggtctaat -ataattttcagttttcgaacacataaccctttgaaaatctgctatttaatgtctcacctg -catgcactatcttctatactgctcagaacggctatacgtcactatgctccaagtgacgat -ttaaacgaagcaaggaataataggtttattttagtgcaaaacaattaagtgcggactacg -tgctctttacaataagccttgtgattgggctataggttaagtcccatattaacgatctcc -aatgtacaaaatcgacaatcgctttgcattacccggttactagtcgaattacagatagct -gttagatactcactctaattttggacaacaatcccaatcttggggtcgtctatcgcctga -agctcgtaaatccttccatcttaaacgattacatattatagacttgttcggggtagagat -atcacagttgtgcaaacattgtaaatcgatactagtttatgttggtagtctagttgcttt -taccattccccgaaaaacttgatctactatttcgacaacagtaaacttgaactaggtaag -tgaaaacagagaatgcctcatagtgccactatttgtccactatatgtaagtgtagcttta -cataatccactatgactgagatcattacggcctaggaaagcagcgtagaaaaaaagggcc -cggatattacgactgtaactataaaactagttactggtagcgcgccatgtatagatttgt -tttaccggttgtggttgcgttaacgaatttcagccgcgaaaattgatccgttaaccagtc -catctcgacttctataaaacgataaagtaaagttgatgttcagcctccttcttatggttg -catcgagagtacactactcagtgggaaatagatcggggttcctacttcagattgtattat -ctaggcaattgccgattgtgccatacctggataaaataagctacctacatgtgatgctta -tctattatcgtcatactaccttagggtgtcctgttgaacgctacattaatctttagccgt -ttgagatgttccaatggataggagtctaacgcatgatgaagtttaggaaggcagagcatc -ccactaagtatgtgacagtgtatttcgaaacgagacgttataaatagaaaaaaggtcctt -ctggttctattctgctgaactattgaatggaaagattggttgacctacgtactatttgct -tgaagtcatcaatttgacggggtgagagacatatggtgcatactttacggactctatatt -ttagatcagaagcttagcagtcttctctacaccccctcacgacataattgcttttaagaa -tctatgtttgattcctctacgggaattcggatccgttcgcatgtgcggtttatctaaacc -aggggacatatgttcagctaaagcatacgaacactttgctaactagacgtatgtatagta -gctataaatcccgacgatatttacaaaaagaaatgagactcaaatatatacatagcgacc -ctacacttattcgcaccctgatctaggcgatcctagcacccacacccgaaagtgagcact -agtgtcttccgtattaaatttactgcagttgagattttagttgtctactaaggattactc -taacccgtaataaggatcaagactcggtactagctttactatcattccctatgtgttttc -ctaactcacaagggtacgtaccagcctatgtaattacaataatgataaagacacaaagga -agtaactttacaaatgagtctccagttacactagcttagtccctcccatcttgctttgaa -gtctaaatacgcaatctctgaggatatacagcagaagaacactcataacgttggagtcca -agaattagactcatagggcccccaacatttaatatgtactgtgagtttgaaggtgttcta -ttgttaattcctgctcttgatacatgacacgtactccgtgtttaaggcttcggactgact -ttctttcataagttgagcaacgaaaatttcagaatcgataagttggattcactaactaat -acggctgattgaaaactccactccggacctatatggtcgacctttatacgtaaccgatat -aaaacttataggctggtatatcgagccttcctagcgcaatttcggatggggtttcttcta -ctactcaacaacggaatagtctttgtttagtaaaccagagctcaggacgcccaatacgta -ggagagcgctgtggagcatgtgtcattatggactggagcactcttaaatcactctgcgtg -tgctaaacgatagatcataacatgtcctgagtaaattttcttgatacgtcgcaatatacc -gttattagttaaacgttctcatccgtcatgcgtgaaatacggctgtcgtgctcagatata -ctattagcgactcatctcgcctaacacgcacacgtataaactcggaatgactgccgctct -tacatattagaaatacagactacaccacggaagcattgggtcattctcaaccgctgtata -aaagatgattagtcttataataagattaccaaagaggcagaatcatgggtagtaaatcta -ttattcaagtgattaccgtcgtgtaggcagggagtgaggacgagatggtactcaggacaa -atattaaccggacgaagtggtttacgtcgtactttcactattagtagtaaatacaaggta -acaccggggaatagtactaaatataatgatatctatcttcgggagaacgagtcgtctatt -gctttgaacattctcaaggcgtaaaatgtgctgacttatagcatgatacaaccgattgtt -acttttgtctattcaaaagattgaatagttttttatacaaaagccgcatacttatgacgg -ctagtatacagtttcatcccctagcatcaatgctatggacagtattgaacttataggaaa -ttcttctaatagggcaaatccgtcgtgatgcctattttttttcagtcacatcctcaaatg -gcactagtattgtcgggatcccattaacaggctcaaccacgagctcacgcgaggacatgt -agtccgtatctttaacgaagcgacagcgacagaactcccatggataaccaattataaggc -ccgtaatcctctagacatcgtttaccaataaatccgctttctccgtaatcatgttgaata -ccccagagtagtccagatgataaccgatgaaacacaagtctttctcaatgcacttacggt -gaacttattaccgccaacgtagctcatcaaggttgcgacatctagttgtgtgtttgcgac -gagcccagcgaacttcatcaactttcgtatattcaacgccttgtaattttactttaagac -gcctggtgatgtagattcttagataatcagtttgttatcggctgtactttaccataattt -cacaggtttcaggtcaagaagattatagctgtatatacagttccatgctcggtgcacaga -aacgtgatcggataataatcaatcgcttatgtcgtctttaggcgtatccaatacatgccc -cgataccgcagtgtatttcgacatgtaggtataccgtcgcatttgagctcgagtcaggac -gtcagctagattagattccttaatagaatataccgacctctagtccgaactaaactatag -ataacgccaacttcaggttaattgtctagtcgtctgtttgcagatgggattcttagatga -gtgagtatcggccatattggttcgagcactttagtttttgatgcataggatatgcaatgt -atagctgaaagtactttatctgtttcaaactcacattgattaaaccggtaaacctttaaa -gactacaagaaaatattcagtgagggcaattttgtcaatcacaatcttccagctagagat -acttcacaatttgtcttgaggctacgcaacattagacggattttcgcgttttattgaaat -aatcgaggggcccaagagtatccatagttcattttgtaagatttctttacaggcttatta -cagcttcttcagactcctacatgcttacgagttatatgctagcatgtgaacaatagatta -atatacaggaaaacgtacattgagagagatgaccctacacagcgcaaccgttgagtactt -tcattaaagggtaacgctctcgagacagcatccttaagatggccttattgtcaaatcatt -tgcagaagtacgcaagatccctaaccaacgtagaagaatccctacaaacacatgagacgc -ggtgaaaatagacagggtgttagtattcaatcttcggagtatcaatttcgccaatcttgg -tgagaaagcataccctttcttcagagaaagaagatcaatcataacactatctttaacgag -gtacgcacgcgcatcattacctgcctccatggatctttaggatagcggaaagtattggca -gcgtattgtgatttcgttcctactttatcaatttcacattcatatacatgtcttttatca -aaatcgccaataagataggatgagctatattagatgctagtagagttcgcgccaacatca -tcgataggaatactcaggacagcgtgataggacttttcaatccctaatactctctataat -tataactctctcttaagtttggaggcagtaacgcgctctatataatcagtttgctgcacc -attcttcagcctctgatacatacaaataaattccacagcagtaagagggtttaattgaga -catcttgggaacttaggattttactctaacatcaccgaaacgattattggataccgtacc -taaacgaactttctcaaggcagtaatataggacatccgcaataacacaaatgctgcctcc -ccaggagttatgtcttcctggaggctatatcttacacccactcactataggcaaactaaa -gtttaaatgttgattgtctaaaaaaaagatagataagagttggccggcgtagcacatgcg -aaagtgaatcgtaagctataattctctggacttgaagttctgtcctgttcctctgcaaga -aacaaacttcctttaaagctatttacgacgcacatctcagcaagttataaacatgttgga -agtttctagtcggaattcccaaagaacggatctatctaatgcattcctacatttttcctg -tctgccgatggtgccatcctattcaaagaatttcttaaaagtagattaaatgggactttt -aacaatgagtaaccttacgcctctaagggttcctcgagtgccatacaccagtcaggtccg -agccacatacacggagaacattctaacatagcattctcaactcgatcatttgcaggttac -ttctttcctatcctagtgctaaaaatcatacttgcaatcccatagcacggattaagaacc -taagaaacaattcagtaaaacatgttcgaattcttggtatgggaacatcattgcagctat -ggtctaacgcattaatgtttgggtacatcttccatcatataaacaggaagagtctgacga -cagggagtgcttgcgatcatgtctatcattgtgaaatcaaattgtagctcacatgtcgtc -tatgagagcgtgtatccgataagatttagaaaaatagaagtcgtataagatctcactgaa -cttttgaatgaatgtgaagcatatatgatctgctttaataaaactttatccataggatac -gtttccaaatcaattcaataattattagtcaaaatagataaggatgaacaacctgaaggc -cgatcggacgtagaaagtggtcccatcactttgagttgatattgttgaaccacacgttat -tatggttttcaaacagtctcaggatattgtatatacagataatccgataccagttgtctg -acgcccctcttacgtaccccaccctttgtgacgtttaaagcagttgttcagtattttaaa -ctaggcggcaactaatttggaaagaagcacagtggatatgtctaaattcttgttattcag -gcctgaatttaatacaccgcatagttaacttcgcggtagagttgttcatcatgcctcctc -taagctaccacttctatgatacaccaatagttgttctacggaatctgataattggccaag -tcataaacttccgctgcgttcaacccccttgctcgaatatccaactcgaaaagacagcct -tttggtgtccggaacaaatcagttacttcttttctgatgttaattctctgtggtcagata -cagaccaaaaactccgcggatttaccatcctccaagaacaaatttgcatcaacatagcat -tttggctacatattctaagtctcaatagtttaggttttcaactacattatcccaacatta -ggattggaggaataatagctgggtaagtccccttgcgtctacaatcgactattttttatg -aatatgcttctgccgcacctatggttattaaaaaagtcatgactttgaagaaccctgaaa -agatagatgaatcaggtgtaatggcagcagccaaagagcatataattagcaacactctaa -gaacattatagatatgatgatagcgatcgtcatgatgttatccggtcacaatagtagctt -catcagctaattcgttttgccagtggtgacttgcgctggaagaatcgttatacggtccct -tccctcttgatacggtgggggcttattcaaccgcgtggattgggttgtcatacttgcatt -aaacgatgtaaaccatctagtagtcaactatactaaatcacaaaatagtgatcaatacat -acccgcttcatggttttaaccatttaattgattaaagatattccgctaagaaccattatc -tacctaaactgatcgccgtatcctagtagtttgaaatttgatgtaccgtaatgatcaacg -aagtaaaacgttatattgtatgtagaataataggtcttggagctaaatgatgtgattggt -agtgaagacttacccttacaactttaccggtttctcggaagaatatactagagaatcaat -gcatgggctacataagcactttagtctaatgagataaaaaatacacgagtcttccatcat -gaattttttgtcgaaaaactcgaacctggtaatttaaaccatatatctttatgtcgtcaa -taactctcatatgttttatataacttcccaatcacgacttgtaactgcttgttcgactga -gctgtttgagctatgaggccgggatccggttgagctacatctatttgctacaagaaaaat -gaaagcacatttgttgggagttctggctacactcatagagaaataagtggcccgagtggg -tgcggcctgcctccatattcaagtgtatcttaaaccaagtggttccaacgctcgcgctaa -agaattaaagcctttatttcctccacggagtagcccgtaatccggttcgaaagagaccat -tgaagttaattttcatatccagtgaagtttaggcacaagcatgtgttctgccacatgcct -caaagcgctcttcaaccaagatatgattcatcctaacttcgatgaatgcgtctgtaacat -aaatatagaaggaatgattcggcgagttaattttcgccttctccaacatggcatccctac -gttcgttataaggaccatacatgtaggttttaaaggtttgcggttaatcgatatttacat -catagaaattctatagtcaaatttacaagactctagatactcactcgttgcagccggcta -ggaagcgctttgtaccttacttcccttttcgttgcgtaatatgaatttcatatagtaagt -tcaaggcactcatacctccgtgaagagggtagatagactattaaagttgtttaatagtac -gtattgatggaaatgacccgtaggagatttaccactcaatccacaagattcgctgctgtg -cattatcaaaacagtgcatgtcgaaacatgggttgggtccttcaaacacgaatccaggta -gagatacctttgcaattttt diff --git a/collector/compile-benchmarks/regex-1.5.5/examples/regexdna-output.txt b/collector/compile-benchmarks/regex-1.5.5/examples/regexdna-output.txt deleted file mode 100644 index d36baa5be..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/examples/regexdna-output.txt +++ /dev/null @@ -1,13 +0,0 @@ -agggtaaa|tttaccct 0 -[cgt]gggtaaa|tttaccc[acg] 3 -a[act]ggtaaa|tttacc[agt]t 9 -ag[act]gtaaa|tttac[agt]ct 8 -agg[act]taaa|ttta[agt]cct 10 -aggg[acg]aaa|ttt[cgt]ccct 3 -agggt[cgt]aa|tt[acg]accct 4 -agggta[cgt]a|t[acg]taccct 3 -agggtaa[cgt]|[acg]ttaccct 5 - -101745 -100000 -133640 diff --git a/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna-bytes.rs b/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna-bytes.rs deleted file mode 100644 index 773fd9ba8..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna-bytes.rs +++ /dev/null @@ -1,68 +0,0 @@ -// The Computer Language Benchmarks Game -// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi -// contributed by BurntSushi - -use std::io::{self, Read}; -use std::sync::Arc; -use std::thread; - -macro_rules! regex { - ($re:expr) => { - ::regex::bytes::Regex::new($re).unwrap() - }; -} - -fn main() { - let mut seq = Vec::with_capacity(51 * (1 << 20)); - io::stdin().read_to_end(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, &b""[..]).into_owned(); - let clen = seq.len(); - let seq_arc = Arc::new(seq.clone()); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - let mut counts = vec![]; - for variant in variants { - let seq = seq_arc.clone(); - let restr = variant.to_string(); - let future = thread::spawn(move || variant.find_iter(&seq).count()); - counts.push((restr, future)); - } - - let substs = vec![ - (regex!("B"), &b"(c|g|t)"[..]), - (regex!("D"), &b"(a|g|t)"[..]), - (regex!("H"), &b"(a|c|t)"[..]), - (regex!("K"), &b"(g|t)"[..]), - (regex!("M"), &b"(a|c)"[..]), - (regex!("N"), &b"(a|c|g|t)"[..]), - (regex!("R"), &b"(a|g)"[..]), - (regex!("S"), &b"(c|g)"[..]), - (regex!("V"), &b"(a|c|g)"[..]), - (regex!("W"), &b"(a|t)"[..]), - (regex!("Y"), &b"(c|t)"[..]), - ]; - let mut seq = seq; - for (re, replacement) in substs { - seq = re.replace_all(&seq, replacement).into_owned(); - } - - for (variant, count) in counts { - println!("{} {}", variant, count.join().unwrap()); - } - println!("\n{}\n{}\n{}", ilen, clen, seq.len()); -} diff --git a/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna-cheat.rs b/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna-cheat.rs deleted file mode 100644 index 1bde7ab1f..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna-cheat.rs +++ /dev/null @@ -1,90 +0,0 @@ -// The Computer Language Benchmarks Game -// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi -// contributed by BurntSushi - -// This technically solves the problem posed in the `regex-dna` benchmark, but -// it cheats by combining all of the replacements into a single regex and -// replacing them with a single linear scan. i.e., it re-implements -// `replace_all`. As a result, this is around 25% faster. ---AG - -use std::io::{self, Read}; -use std::sync::Arc; -use std::thread; - -macro_rules! regex { - ($re:expr) => { - ::regex::Regex::new($re).unwrap() - }; -} - -fn main() { - let mut seq = String::with_capacity(50 * (1 << 20)); - io::stdin().read_to_string(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); - let clen = seq.len(); - let seq_arc = Arc::new(seq.clone()); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - let mut counts = vec![]; - for variant in variants { - let seq = seq_arc.clone(); - let restr = variant.to_string(); - let future = thread::spawn(move || variant.find_iter(&seq).count()); - counts.push((restr, future)); - } - - let substs = vec![ - (b'B', "(c|g|t)"), - (b'D', "(a|g|t)"), - (b'H', "(a|c|t)"), - (b'K', "(g|t)"), - (b'M', "(a|c)"), - (b'N', "(a|c|g|t)"), - (b'R', "(a|g)"), - (b'S', "(c|g)"), - (b'V', "(a|c|g)"), - (b'W', "(a|t)"), - (b'Y', "(c|t)"), - ]; // combined into one regex in `replace_all` - let seq = replace_all(&seq, substs); - - for (variant, count) in counts { - println!("{} {}", variant, count.join().unwrap()); - } - println!("\n{}\n{}\n{}", ilen, clen, seq.len()); -} - -fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String { - let mut replacements = vec![""; 256]; - let mut alternates = vec![]; - for (re, replacement) in substs { - replacements[re as usize] = replacement; - alternates.push((re as char).to_string()); - } - - let re = regex!(&alternates.join("|")); - let mut new = String::with_capacity(text.len()); - let mut last_match = 0; - for m in re.find_iter(text) { - new.push_str(&text[last_match..m.start()]); - new.push_str(replacements[text.as_bytes()[m.start()] as usize]); - last_match = m.end(); - } - new.push_str(&text[last_match..]); - new -} diff --git a/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna-replace.rs b/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna-replace.rs deleted file mode 100644 index 20694e06f..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna-replace.rs +++ /dev/null @@ -1,17 +0,0 @@ -use std::io::{self, Read}; - -macro_rules! regex { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re).build().unwrap().into_regex() - }}; -} - -fn main() { - let mut seq = String::with_capacity(50 * (1 << 20)); - io::stdin().read_to_string(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); - println!("original: {}, replaced: {}", ilen, seq.len()); -} diff --git a/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna-single-cheat.rs b/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna-single-cheat.rs deleted file mode 100644 index 70a979c6d..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna-single-cheat.rs +++ /dev/null @@ -1,75 +0,0 @@ -// The Computer Language Benchmarks Game -// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi -// contributed by BurntSushi - -use std::io::{self, Read}; - -macro_rules! regex { - ($re:expr) => { - ::regex::Regex::new($re).unwrap() - }; -} - -fn main() { - let mut seq = String::with_capacity(50 * (1 << 20)); - io::stdin().read_to_string(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); - let clen = seq.len(); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - for re in variants { - println!("{} {}", re.to_string(), re.find_iter(&seq).count()); - } - - let substs = vec![ - (b'B', "(c|g|t)"), - (b'D', "(a|g|t)"), - (b'H', "(a|c|t)"), - (b'K', "(g|t)"), - (b'M', "(a|c)"), - (b'N', "(a|c|g|t)"), - (b'R', "(a|g)"), - (b'S', "(c|g)"), - (b'V', "(a|c|g)"), - (b'W', "(a|t)"), - (b'Y', "(c|t)"), - ]; // combined into one regex in `replace_all` - let seq = replace_all(&seq, substs); - - println!("\n{}\n{}\n{}", ilen, clen, seq.len()); -} - -fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String { - let mut replacements = vec![""; 256]; - let mut alternates = vec![]; - for (re, replacement) in substs { - replacements[re as usize] = replacement; - alternates.push((re as char).to_string()); - } - - let re = regex!(&alternates.join("|")); - let mut new = String::with_capacity(text.len()); - let mut last_match = 0; - for m in re.find_iter(text) { - new.push_str(&text[last_match..m.start()]); - new.push_str(replacements[text.as_bytes()[m.start()] as usize]); - last_match = m.end(); - } - new.push_str(&text[last_match..]); - new -} diff --git a/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna-single.rs b/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna-single.rs deleted file mode 100644 index b47405960..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna-single.rs +++ /dev/null @@ -1,57 +0,0 @@ -// The Computer Language Benchmarks Game -// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi -// contributed by BurntSushi - -use std::io::{self, Read}; - -macro_rules! regex { - ($re:expr) => { - ::regex::Regex::new($re).unwrap() - }; -} - -fn main() { - let mut seq = String::with_capacity(50 * (1 << 20)); - io::stdin().read_to_string(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); - let clen = seq.len(); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - for re in variants { - println!("{} {}", re.to_string(), re.find_iter(&seq).count()); - } - - let substs = vec![ - (regex!("B"), "(c|g|t)"), - (regex!("D"), "(a|g|t)"), - (regex!("H"), "(a|c|t)"), - (regex!("K"), "(g|t)"), - (regex!("M"), "(a|c)"), - (regex!("N"), "(a|c|g|t)"), - (regex!("R"), "(a|g)"), - (regex!("S"), "(c|g)"), - (regex!("V"), "(a|c|g)"), - (regex!("W"), "(a|t)"), - (regex!("Y"), "(c|t)"), - ]; - let mut seq = seq; - for (re, replacement) in substs { - seq = re.replace_all(&seq, replacement).into_owned(); - } - println!("\n{}\n{}\n{}", ilen, clen, seq.len()); -} diff --git a/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna.rs b/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna.rs deleted file mode 100644 index b96518e4c..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/examples/shootout-regex-dna.rs +++ /dev/null @@ -1,68 +0,0 @@ -// The Computer Language Benchmarks Game -// https://benchmarksgame-team.pages.debian.net/benchmarksgame/ -// -// contributed by the Rust Project Developers -// contributed by TeXitoi -// contributed by BurntSushi - -use std::io::{self, Read}; -use std::sync::Arc; -use std::thread; - -macro_rules! regex { - ($re:expr) => { - ::regex::Regex::new($re).unwrap() - }; -} - -fn main() { - let mut seq = String::with_capacity(51 * (1 << 20)); - io::stdin().read_to_string(&mut seq).unwrap(); - let ilen = seq.len(); - - seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); - let clen = seq.len(); - let seq_arc = Arc::new(seq.clone()); - - let variants = vec![ - regex!("agggtaaa|tttaccct"), - regex!("[cgt]gggtaaa|tttaccc[acg]"), - regex!("a[act]ggtaaa|tttacc[agt]t"), - regex!("ag[act]gtaaa|tttac[agt]ct"), - regex!("agg[act]taaa|ttta[agt]cct"), - regex!("aggg[acg]aaa|ttt[cgt]ccct"), - regex!("agggt[cgt]aa|tt[acg]accct"), - regex!("agggta[cgt]a|t[acg]taccct"), - regex!("agggtaa[cgt]|[acg]ttaccct"), - ]; - let mut counts = vec![]; - for variant in variants { - let seq = seq_arc.clone(); - let restr = variant.to_string(); - let future = thread::spawn(move || variant.find_iter(&seq).count()); - counts.push((restr, future)); - } - - let substs = vec![ - (regex!("B"), "(c|g|t)"), - (regex!("D"), "(a|g|t)"), - (regex!("H"), "(a|c|t)"), - (regex!("K"), "(g|t)"), - (regex!("M"), "(a|c)"), - (regex!("N"), "(a|c|g|t)"), - (regex!("R"), "(a|g)"), - (regex!("S"), "(c|g)"), - (regex!("V"), "(a|c|g)"), - (regex!("W"), "(a|t)"), - (regex!("Y"), "(c|t)"), - ]; - let mut seq = seq; - for (re, replacement) in substs { - seq = re.replace_all(&seq, replacement).into_owned(); - } - - for (variant, count) in counts { - println!("{} {}", variant, count.join().unwrap()); - } - println!("\n{}\n{}\n{}", ilen, clen, seq.len()); -} diff --git a/collector/compile-benchmarks/regex-1.5.5/perf-config.json b/collector/compile-benchmarks/regex-1.5.5/perf-config.json deleted file mode 100644 index 54fc0dc0d..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/perf-config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "touch_file": "src/lib.rs", - "category": "primary", - "artifact": "library" -} diff --git a/collector/compile-benchmarks/regex-1.5.5/rustfmt.toml b/collector/compile-benchmarks/regex-1.5.5/rustfmt.toml deleted file mode 100644 index aa37a218b..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/rustfmt.toml +++ /dev/null @@ -1,2 +0,0 @@ -max_width = 79 -use_small_heuristics = "max" diff --git a/collector/compile-benchmarks/regex-1.5.5/src/backtrack.rs b/collector/compile-benchmarks/regex-1.5.5/src/backtrack.rs deleted file mode 100644 index a3d25d662..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/backtrack.rs +++ /dev/null @@ -1,288 +0,0 @@ -// This is the backtracking matching engine. It has the same exact capability -// as the full NFA simulation, except it is artificially restricted to small -// regexes on small inputs because of its memory requirements. -// -// In particular, this is a *bounded* backtracking engine. It retains worst -// case linear time by keeping track of the states that it has visited (using a -// bitmap). Namely, once a state is visited, it is never visited again. Since a -// state is keyed by `(instruction index, input index)`, we have that its time -// complexity is `O(mn)` (i.e., linear in the size of the search text). -// -// The backtracking engine can beat out the NFA simulation on small -// regexes/inputs because it doesn't have to keep track of multiple copies of -// the capture groups. In benchmarks, the backtracking engine is roughly twice -// as fast as the full NFA simulation. Note though that its performance doesn't -// scale, even if you're willing to live with the memory requirements. Namely, -// the bitset has to be zeroed on each execution, which becomes quite expensive -// on large bitsets. - -use crate::exec::ProgramCache; -use crate::input::{Input, InputAt}; -use crate::prog::{InstPtr, Program}; -use crate::re_trait::Slot; - -type Bits = u32; - -const BIT_SIZE: usize = 32; -const MAX_SIZE_BYTES: usize = 256 * (1 << 10); // 256 KB - -/// Returns true iff the given regex and input should be executed by this -/// engine with reasonable memory usage. -pub fn should_exec(num_insts: usize, text_len: usize) -> bool { - // Total memory usage in bytes is determined by: - // - // ((len(insts) * (len(input) + 1) + bits - 1) / bits) * (size_of(u32)) - // - // The actual limit picked is pretty much a heuristic. - // See: https://github.com/rust-lang/regex/issues/215 - let size = ((num_insts * (text_len + 1) + BIT_SIZE - 1) / BIT_SIZE) * 4; - size <= MAX_SIZE_BYTES -} - -/// A backtracking matching engine. -#[derive(Debug)] -pub struct Bounded<'a, 'm, 'r, 's, I> { - prog: &'r Program, - input: I, - matches: &'m mut [bool], - slots: &'s mut [Slot], - m: &'a mut Cache, -} - -/// Shared cached state between multiple invocations of a backtracking engine -/// in the same thread. -#[derive(Clone, Debug)] -pub struct Cache { - jobs: Vec, - visited: Vec, -} - -impl Cache { - /// Create new empty cache for the backtracking engine. - pub fn new(_prog: &Program) -> Self { - Cache { jobs: vec![], visited: vec![] } - } -} - -/// A job is an explicit unit of stack space in the backtracking engine. -/// -/// The "normal" representation is a single state transition, which corresponds -/// to an NFA state and a character in the input. However, the backtracking -/// engine must keep track of old capture group values. We use the explicit -/// stack to do it. -#[derive(Clone, Copy, Debug)] -enum Job { - Inst { ip: InstPtr, at: InputAt }, - SaveRestore { slot: usize, old_pos: Option }, -} - -impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> { - /// Execute the backtracking matching engine. - /// - /// If there's a match, `exec` returns `true` and populates the given - /// captures accordingly. - pub fn exec( - prog: &'r Program, - cache: &ProgramCache, - matches: &'m mut [bool], - slots: &'s mut [Slot], - input: I, - start: usize, - end: usize, - ) -> bool { - let mut cache = cache.borrow_mut(); - let cache = &mut cache.backtrack; - let start = input.at(start); - let mut b = Bounded { - prog: prog, - input: input, - matches: matches, - slots: slots, - m: cache, - }; - b.exec_(start, end) - } - - /// Clears the cache such that the backtracking engine can be executed - /// on some input of fixed length. - fn clear(&mut self) { - // Reset the job memory so that we start fresh. - self.m.jobs.clear(); - - // Now we need to clear the bit state set. - // We do this by figuring out how much space we need to keep track - // of the states we've visited. - // Then we reset all existing allocated space to 0. - // Finally, we request more space if we need it. - // - // This is all a little circuitous, but doing this using unchecked - // operations doesn't seem to have a measurable impact on performance. - // (Probably because backtracking is limited to such small - // inputs/regexes in the first place.) - let visited_len = - (self.prog.len() * (self.input.len() + 1) + BIT_SIZE - 1) - / BIT_SIZE; - self.m.visited.truncate(visited_len); - for v in &mut self.m.visited { - *v = 0; - } - if visited_len > self.m.visited.len() { - let len = self.m.visited.len(); - self.m.visited.reserve_exact(visited_len - len); - for _ in 0..(visited_len - len) { - self.m.visited.push(0); - } - } - } - - /// Start backtracking at the given position in the input, but also look - /// for literal prefixes. - fn exec_(&mut self, mut at: InputAt, end: usize) -> bool { - self.clear(); - // If this is an anchored regex at the beginning of the input, then - // we're either already done or we only need to try backtracking once. - if self.prog.is_anchored_start { - return if !at.is_start() { false } else { self.backtrack(at) }; - } - let mut matched = false; - loop { - if !self.prog.prefixes.is_empty() { - at = match self.input.prefix_at(&self.prog.prefixes, at) { - None => break, - Some(at) => at, - }; - } - matched = self.backtrack(at) || matched; - if matched && self.prog.matches.len() == 1 { - return true; - } - if at.pos() >= end { - break; - } - at = self.input.at(at.next_pos()); - } - matched - } - - /// The main backtracking loop starting at the given input position. - fn backtrack(&mut self, start: InputAt) -> bool { - // N.B. We use an explicit stack to avoid recursion. - // To avoid excessive pushing and popping, most transitions are handled - // in the `step` helper function, which only pushes to the stack when - // there's a capture or a branch. - let mut matched = false; - self.m.jobs.push(Job::Inst { ip: 0, at: start }); - while let Some(job) = self.m.jobs.pop() { - match job { - Job::Inst { ip, at } => { - if self.step(ip, at) { - // Only quit if we're matching one regex. - // If we're matching a regex set, then mush on and - // try to find other matches (if we want them). - if self.prog.matches.len() == 1 { - return true; - } - matched = true; - } - } - Job::SaveRestore { slot, old_pos } => { - if slot < self.slots.len() { - self.slots[slot] = old_pos; - } - } - } - } - matched - } - - fn step(&mut self, mut ip: InstPtr, mut at: InputAt) -> bool { - use crate::prog::Inst::*; - loop { - // This loop is an optimization to avoid constantly pushing/popping - // from the stack. Namely, if we're pushing a job only to run it - // next, avoid the push and just mutate `ip` (and possibly `at`) - // in place. - if self.has_visited(ip, at) { - return false; - } - match self.prog[ip] { - Match(slot) => { - if slot < self.matches.len() { - self.matches[slot] = true; - } - return true; - } - Save(ref inst) => { - if let Some(&old_pos) = self.slots.get(inst.slot) { - // If this path doesn't work out, then we save the old - // capture index (if one exists) in an alternate - // job. If the next path fails, then the alternate - // job is popped and the old capture index is restored. - self.m.jobs.push(Job::SaveRestore { - slot: inst.slot, - old_pos: old_pos, - }); - self.slots[inst.slot] = Some(at.pos()); - } - ip = inst.goto; - } - Split(ref inst) => { - self.m.jobs.push(Job::Inst { ip: inst.goto2, at: at }); - ip = inst.goto1; - } - EmptyLook(ref inst) => { - if self.input.is_empty_match(at, inst) { - ip = inst.goto; - } else { - return false; - } - } - Char(ref inst) => { - if inst.c == at.char() { - ip = inst.goto; - at = self.input.at(at.next_pos()); - } else { - return false; - } - } - Ranges(ref inst) => { - if inst.matches(at.char()) { - ip = inst.goto; - at = self.input.at(at.next_pos()); - } else { - return false; - } - } - Bytes(ref inst) => { - if let Some(b) = at.byte() { - if inst.matches(b) { - ip = inst.goto; - at = self.input.at(at.next_pos()); - continue; - } - } - return false; - } - } - } - } - - fn has_visited(&mut self, ip: InstPtr, at: InputAt) -> bool { - let k = ip * (self.input.len() + 1) + at.pos(); - let k1 = k / BIT_SIZE; - let k2 = usize_to_u32(1 << (k & (BIT_SIZE - 1))); - if self.m.visited[k1] & k2 == 0 { - self.m.visited[k1] |= k2; - false - } else { - true - } - } -} - -fn usize_to_u32(n: usize) -> u32 { - if (n as u64) > (::std::u32::MAX as u64) { - panic!("BUG: {} is too big to fit into u32", n) - } - n as u32 -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/compile.rs b/collector/compile-benchmarks/regex-1.5.5/src/compile.rs deleted file mode 100644 index 069f445c8..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/compile.rs +++ /dev/null @@ -1,1267 +0,0 @@ -use std::collections::HashMap; -use std::fmt; -use std::iter; -use std::result; -use std::sync::Arc; - -use regex_syntax::hir::{self, Hir}; -use regex_syntax::is_word_byte; -use regex_syntax::utf8::{Utf8Range, Utf8Sequence, Utf8Sequences}; - -use crate::prog::{ - EmptyLook, Inst, InstBytes, InstChar, InstEmptyLook, InstPtr, InstRanges, - InstSave, InstSplit, Program, -}; - -use crate::Error; - -type Result = result::Result; -type ResultOrEmpty = result::Result, Error>; - -#[derive(Debug)] -struct Patch { - hole: Hole, - entry: InstPtr, -} - -/// A compiler translates a regular expression AST to a sequence of -/// instructions. The sequence of instructions represents an NFA. -// `Compiler` is only public via the `internal` module, so avoid deriving -// `Debug`. -#[allow(missing_debug_implementations)] -pub struct Compiler { - insts: Vec, - compiled: Program, - capture_name_idx: HashMap, - num_exprs: usize, - size_limit: usize, - suffix_cache: SuffixCache, - utf8_seqs: Option, - byte_classes: ByteClassSet, - // This keeps track of extra bytes allocated while compiling the regex - // program. Currently, this corresponds to two things. First is the heap - // memory allocated by Unicode character classes ('InstRanges'). Second is - // a "fake" amount of memory used by empty sub-expressions, so that enough - // empty sub-expressions will ultimately trigger the compiler to bail - // because of a size limit restriction. (That empty sub-expressions don't - // add to heap memory usage is more-or-less an implementation detail.) In - // the second case, if we don't bail, then an excessively large repetition - // on an empty sub-expression can result in the compiler using a very large - // amount of CPU time. - extra_inst_bytes: usize, -} - -impl Compiler { - /// Create a new regular expression compiler. - /// - /// Various options can be set before calling `compile` on an expression. - pub fn new() -> Self { - Compiler { - insts: vec![], - compiled: Program::new(), - capture_name_idx: HashMap::new(), - num_exprs: 0, - size_limit: 10 * (1 << 20), - suffix_cache: SuffixCache::new(1000), - utf8_seqs: Some(Utf8Sequences::new('\x00', '\x00')), - byte_classes: ByteClassSet::new(), - extra_inst_bytes: 0, - } - } - - /// The size of the resulting program is limited by size_limit. If - /// the program approximately exceeds the given size (in bytes), then - /// compilation will stop and return an error. - pub fn size_limit(mut self, size_limit: usize) -> Self { - self.size_limit = size_limit; - self - } - - /// If bytes is true, then the program is compiled as a byte based - /// automaton, which incorporates UTF-8 decoding into the machine. If it's - /// false, then the automaton is Unicode scalar value based, e.g., an - /// engine utilizing such an automaton is responsible for UTF-8 decoding. - /// - /// The specific invariant is that when returning a byte based machine, - /// the neither the `Char` nor `Ranges` instructions are produced. - /// Conversely, when producing a Unicode scalar value machine, the `Bytes` - /// instruction is never produced. - /// - /// Note that `dfa(true)` implies `bytes(true)`. - pub fn bytes(mut self, yes: bool) -> Self { - self.compiled.is_bytes = yes; - self - } - - /// When disabled, the program compiled may match arbitrary bytes. - /// - /// When enabled (the default), all compiled programs exclusively match - /// valid UTF-8 bytes. - pub fn only_utf8(mut self, yes: bool) -> Self { - self.compiled.only_utf8 = yes; - self - } - - /// When set, the machine returned is suitable for use in the DFA matching - /// engine. - /// - /// In particular, this ensures that if the regex is not anchored in the - /// beginning, then a preceding `.*?` is included in the program. (The NFA - /// based engines handle the preceding `.*?` explicitly, which is difficult - /// or impossible in the DFA engine.) - pub fn dfa(mut self, yes: bool) -> Self { - self.compiled.is_dfa = yes; - self - } - - /// When set, the machine returned is suitable for matching text in - /// reverse. In particular, all concatenations are flipped. - pub fn reverse(mut self, yes: bool) -> Self { - self.compiled.is_reverse = yes; - self - } - - /// Compile a regular expression given its AST. - /// - /// The compiler is guaranteed to succeed unless the program exceeds the - /// specified size limit. If the size limit is exceeded, then compilation - /// stops and returns an error. - pub fn compile(mut self, exprs: &[Hir]) -> result::Result { - debug_assert!(!exprs.is_empty()); - self.num_exprs = exprs.len(); - if exprs.len() == 1 { - self.compile_one(&exprs[0]) - } else { - self.compile_many(exprs) - } - } - - fn compile_one(mut self, expr: &Hir) -> result::Result { - // If we're compiling a forward DFA and we aren't anchored, then - // add a `.*?` before the first capture group. - // Other matching engines handle this by baking the logic into the - // matching engine itself. - let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 }; - self.compiled.is_anchored_start = expr.is_anchored_start(); - self.compiled.is_anchored_end = expr.is_anchored_end(); - if self.compiled.needs_dotstar() { - dotstar_patch = self.c_dotstar()?; - self.compiled.start = dotstar_patch.entry; - } - self.compiled.captures = vec![None]; - let patch = self.c_capture(0, expr)?.unwrap_or(self.next_inst()); - if self.compiled.needs_dotstar() { - self.fill(dotstar_patch.hole, patch.entry); - } else { - self.compiled.start = patch.entry; - } - self.fill_to_next(patch.hole); - self.compiled.matches = vec![self.insts.len()]; - self.push_compiled(Inst::Match(0)); - self.compile_finish() - } - - fn compile_many( - mut self, - exprs: &[Hir], - ) -> result::Result { - debug_assert!(exprs.len() > 1); - - self.compiled.is_anchored_start = - exprs.iter().all(|e| e.is_anchored_start()); - self.compiled.is_anchored_end = - exprs.iter().all(|e| e.is_anchored_end()); - let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 }; - if self.compiled.needs_dotstar() { - dotstar_patch = self.c_dotstar()?; - self.compiled.start = dotstar_patch.entry; - } else { - self.compiled.start = 0; // first instruction is always split - } - self.fill_to_next(dotstar_patch.hole); - - let mut prev_hole = Hole::None; - for (i, expr) in exprs[0..exprs.len() - 1].iter().enumerate() { - self.fill_to_next(prev_hole); - let split = self.push_split_hole(); - let Patch { hole, entry } = - self.c_capture(0, expr)?.unwrap_or(self.next_inst()); - self.fill_to_next(hole); - self.compiled.matches.push(self.insts.len()); - self.push_compiled(Inst::Match(i)); - prev_hole = self.fill_split(split, Some(entry), None); - } - let i = exprs.len() - 1; - let Patch { hole, entry } = - self.c_capture(0, &exprs[i])?.unwrap_or(self.next_inst()); - self.fill(prev_hole, entry); - self.fill_to_next(hole); - self.compiled.matches.push(self.insts.len()); - self.push_compiled(Inst::Match(i)); - self.compile_finish() - } - - fn compile_finish(mut self) -> result::Result { - self.compiled.insts = - self.insts.into_iter().map(|inst| inst.unwrap()).collect(); - self.compiled.byte_classes = self.byte_classes.byte_classes(); - self.compiled.capture_name_idx = Arc::new(self.capture_name_idx); - Ok(self.compiled) - } - - /// Compile expr into self.insts, returning a patch on success, - /// or an error if we run out of memory. - /// - /// All of the c_* methods of the compiler share the contract outlined - /// here. - /// - /// The main thing that a c_* method does is mutate `self.insts` - /// to add a list of mostly compiled instructions required to execute - /// the given expression. `self.insts` contains MaybeInsts rather than - /// Insts because there is some backpatching required. - /// - /// The `Patch` value returned by each c_* method provides metadata - /// about the compiled instructions emitted to `self.insts`. The - /// `entry` member of the patch refers to the first instruction - /// (the entry point), while the `hole` member contains zero or - /// more offsets to partial instructions that need to be backpatched. - /// The c_* routine can't know where its list of instructions are going to - /// jump to after execution, so it is up to the caller to patch - /// these jumps to point to the right place. So compiling some - /// expression, e, we would end up with a situation that looked like: - /// - /// ```text - /// self.insts = [ ..., i1, i2, ..., iexit1, ..., iexitn, ...] - /// ^ ^ ^ - /// | \ / - /// entry \ / - /// hole - /// ``` - /// - /// To compile two expressions, e1 and e2, concatenated together we - /// would do: - /// - /// ```ignore - /// let patch1 = self.c(e1); - /// let patch2 = self.c(e2); - /// ``` - /// - /// while leaves us with a situation that looks like - /// - /// ```text - /// self.insts = [ ..., i1, ..., iexit1, ..., i2, ..., iexit2 ] - /// ^ ^ ^ ^ - /// | | | | - /// entry1 hole1 entry2 hole2 - /// ``` - /// - /// Then to merge the two patches together into one we would backpatch - /// hole1 with entry2 and return a new patch that enters at entry1 - /// and has hole2 for a hole. In fact, if you look at the c_concat - /// method you will see that it does exactly this, though it handles - /// a list of expressions rather than just the two that we use for - /// an example. - /// - /// Ok(None) is returned when an expression is compiled to no - /// instruction, and so no patch.entry value makes sense. - fn c(&mut self, expr: &Hir) -> ResultOrEmpty { - use crate::prog; - use regex_syntax::hir::HirKind::*; - - self.check_size()?; - match *expr.kind() { - Empty => self.c_empty(), - Literal(hir::Literal::Unicode(c)) => self.c_char(c), - Literal(hir::Literal::Byte(b)) => { - assert!(self.compiled.uses_bytes()); - self.c_byte(b) - } - Class(hir::Class::Unicode(ref cls)) => self.c_class(cls.ranges()), - Class(hir::Class::Bytes(ref cls)) => { - if self.compiled.uses_bytes() { - self.c_class_bytes(cls.ranges()) - } else { - assert!(cls.is_all_ascii()); - let mut char_ranges = vec![]; - for r in cls.iter() { - let (s, e) = (r.start() as char, r.end() as char); - char_ranges.push(hir::ClassUnicodeRange::new(s, e)); - } - self.c_class(&char_ranges) - } - } - Anchor(hir::Anchor::StartLine) if self.compiled.is_reverse => { - self.byte_classes.set_range(b'\n', b'\n'); - self.c_empty_look(prog::EmptyLook::EndLine) - } - Anchor(hir::Anchor::StartLine) => { - self.byte_classes.set_range(b'\n', b'\n'); - self.c_empty_look(prog::EmptyLook::StartLine) - } - Anchor(hir::Anchor::EndLine) if self.compiled.is_reverse => { - self.byte_classes.set_range(b'\n', b'\n'); - self.c_empty_look(prog::EmptyLook::StartLine) - } - Anchor(hir::Anchor::EndLine) => { - self.byte_classes.set_range(b'\n', b'\n'); - self.c_empty_look(prog::EmptyLook::EndLine) - } - Anchor(hir::Anchor::StartText) if self.compiled.is_reverse => { - self.c_empty_look(prog::EmptyLook::EndText) - } - Anchor(hir::Anchor::StartText) => { - self.c_empty_look(prog::EmptyLook::StartText) - } - Anchor(hir::Anchor::EndText) if self.compiled.is_reverse => { - self.c_empty_look(prog::EmptyLook::StartText) - } - Anchor(hir::Anchor::EndText) => { - self.c_empty_look(prog::EmptyLook::EndText) - } - WordBoundary(hir::WordBoundary::Unicode) => { - if !cfg!(feature = "unicode-perl") { - return Err(Error::Syntax( - "Unicode word boundaries are unavailable when \ - the unicode-perl feature is disabled" - .to_string(), - )); - } - self.compiled.has_unicode_word_boundary = true; - self.byte_classes.set_word_boundary(); - // We also make sure that all ASCII bytes are in a different - // class from non-ASCII bytes. Otherwise, it's possible for - // ASCII bytes to get lumped into the same class as non-ASCII - // bytes. This in turn may cause the lazy DFA to falsely start - // when it sees an ASCII byte that maps to a byte class with - // non-ASCII bytes. This ensures that never happens. - self.byte_classes.set_range(0, 0x7F); - self.c_empty_look(prog::EmptyLook::WordBoundary) - } - WordBoundary(hir::WordBoundary::UnicodeNegate) => { - if !cfg!(feature = "unicode-perl") { - return Err(Error::Syntax( - "Unicode word boundaries are unavailable when \ - the unicode-perl feature is disabled" - .to_string(), - )); - } - self.compiled.has_unicode_word_boundary = true; - self.byte_classes.set_word_boundary(); - // See comments above for why we set the ASCII range here. - self.byte_classes.set_range(0, 0x7F); - self.c_empty_look(prog::EmptyLook::NotWordBoundary) - } - WordBoundary(hir::WordBoundary::Ascii) => { - self.byte_classes.set_word_boundary(); - self.c_empty_look(prog::EmptyLook::WordBoundaryAscii) - } - WordBoundary(hir::WordBoundary::AsciiNegate) => { - self.byte_classes.set_word_boundary(); - self.c_empty_look(prog::EmptyLook::NotWordBoundaryAscii) - } - Group(ref g) => match g.kind { - hir::GroupKind::NonCapturing => self.c(&g.hir), - hir::GroupKind::CaptureIndex(index) => { - if index as usize >= self.compiled.captures.len() { - self.compiled.captures.push(None); - } - self.c_capture(2 * index as usize, &g.hir) - } - hir::GroupKind::CaptureName { index, ref name } => { - if index as usize >= self.compiled.captures.len() { - let n = name.to_string(); - self.compiled.captures.push(Some(n.clone())); - self.capture_name_idx.insert(n, index as usize); - } - self.c_capture(2 * index as usize, &g.hir) - } - }, - Concat(ref es) => { - if self.compiled.is_reverse { - self.c_concat(es.iter().rev()) - } else { - self.c_concat(es) - } - } - Alternation(ref es) => self.c_alternate(&**es), - Repetition(ref rep) => self.c_repeat(rep), - } - } - - fn c_empty(&mut self) -> ResultOrEmpty { - // See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8 - // See: CVE-2022-24713 - // - // Since 'empty' sub-expressions don't increase the size of - // the actual compiled object, we "fake" an increase in its - // size so that our 'check_size_limit' routine will eventually - // stop compilation if there are too many empty sub-expressions - // (e.g., via a large repetition). - self.extra_inst_bytes += std::mem::size_of::(); - Ok(None) - } - - fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> ResultOrEmpty { - if self.num_exprs > 1 || self.compiled.is_dfa { - // Don't ever compile Save instructions for regex sets because - // they are never used. They are also never used in DFA programs - // because DFAs can't handle captures. - self.c(expr) - } else { - let entry = self.insts.len(); - let hole = self.push_hole(InstHole::Save { slot: first_slot }); - let patch = self.c(expr)?.unwrap_or(self.next_inst()); - self.fill(hole, patch.entry); - self.fill_to_next(patch.hole); - let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 }); - Ok(Some(Patch { hole: hole, entry: entry })) - } - } - - fn c_dotstar(&mut self) -> Result { - Ok(if !self.compiled.only_utf8() { - self.c(&Hir::repetition(hir::Repetition { - kind: hir::RepetitionKind::ZeroOrMore, - greedy: false, - hir: Box::new(Hir::any(true)), - }))? - .unwrap() - } else { - self.c(&Hir::repetition(hir::Repetition { - kind: hir::RepetitionKind::ZeroOrMore, - greedy: false, - hir: Box::new(Hir::any(false)), - }))? - .unwrap() - }) - } - - fn c_char(&mut self, c: char) -> ResultOrEmpty { - if self.compiled.uses_bytes() { - if c.is_ascii() { - let b = c as u8; - let hole = - self.push_hole(InstHole::Bytes { start: b, end: b }); - self.byte_classes.set_range(b, b); - Ok(Some(Patch { hole, entry: self.insts.len() - 1 })) - } else { - self.c_class(&[hir::ClassUnicodeRange::new(c, c)]) - } - } else { - let hole = self.push_hole(InstHole::Char { c: c }); - Ok(Some(Patch { hole, entry: self.insts.len() - 1 })) - } - } - - fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> ResultOrEmpty { - use std::mem::size_of; - - assert!(!ranges.is_empty()); - if self.compiled.uses_bytes() { - Ok(Some(CompileClass { c: self, ranges: ranges }.compile()?)) - } else { - let ranges: Vec<(char, char)> = - ranges.iter().map(|r| (r.start(), r.end())).collect(); - let hole = if ranges.len() == 1 && ranges[0].0 == ranges[0].1 { - self.push_hole(InstHole::Char { c: ranges[0].0 }) - } else { - self.extra_inst_bytes += - ranges.len() * (size_of::() * 2); - self.push_hole(InstHole::Ranges { ranges: ranges }) - }; - Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 })) - } - } - - fn c_byte(&mut self, b: u8) -> ResultOrEmpty { - self.c_class_bytes(&[hir::ClassBytesRange::new(b, b)]) - } - - fn c_class_bytes( - &mut self, - ranges: &[hir::ClassBytesRange], - ) -> ResultOrEmpty { - debug_assert!(!ranges.is_empty()); - - let first_split_entry = self.insts.len(); - let mut holes = vec![]; - let mut prev_hole = Hole::None; - for r in &ranges[0..ranges.len() - 1] { - self.fill_to_next(prev_hole); - let split = self.push_split_hole(); - let next = self.insts.len(); - self.byte_classes.set_range(r.start(), r.end()); - holes.push(self.push_hole(InstHole::Bytes { - start: r.start(), - end: r.end(), - })); - prev_hole = self.fill_split(split, Some(next), None); - } - let next = self.insts.len(); - let r = &ranges[ranges.len() - 1]; - self.byte_classes.set_range(r.start(), r.end()); - holes.push( - self.push_hole(InstHole::Bytes { start: r.start(), end: r.end() }), - ); - self.fill(prev_hole, next); - Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry })) - } - - fn c_empty_look(&mut self, look: EmptyLook) -> ResultOrEmpty { - let hole = self.push_hole(InstHole::EmptyLook { look: look }); - Ok(Some(Patch { hole: hole, entry: self.insts.len() - 1 })) - } - - fn c_concat<'a, I>(&mut self, exprs: I) -> ResultOrEmpty - where - I: IntoIterator, - { - let mut exprs = exprs.into_iter(); - let Patch { mut hole, entry } = loop { - match exprs.next() { - None => return self.c_empty(), - Some(e) => { - if let Some(p) = self.c(e)? { - break p; - } - } - } - }; - for e in exprs { - if let Some(p) = self.c(e)? { - self.fill(hole, p.entry); - hole = p.hole; - } - } - Ok(Some(Patch { hole: hole, entry: entry })) - } - - fn c_alternate(&mut self, exprs: &[Hir]) -> ResultOrEmpty { - debug_assert!( - exprs.len() >= 2, - "alternates must have at least 2 exprs" - ); - - // Initial entry point is always the first split. - let first_split_entry = self.insts.len(); - - // Save up all of the holes from each alternate. They will all get - // patched to point to the same location. - let mut holes = vec![]; - - // true indicates that the hole is a split where we want to fill - // the second branch. - let mut prev_hole = (Hole::None, false); - for e in &exprs[0..exprs.len() - 1] { - if prev_hole.1 { - let next = self.insts.len(); - self.fill_split(prev_hole.0, None, Some(next)); - } else { - self.fill_to_next(prev_hole.0); - } - let split = self.push_split_hole(); - if let Some(Patch { hole, entry }) = self.c(e)? { - holes.push(hole); - prev_hole = (self.fill_split(split, Some(entry), None), false); - } else { - let (split1, split2) = split.dup_one(); - holes.push(split1); - prev_hole = (split2, true); - } - } - if let Some(Patch { hole, entry }) = self.c(&exprs[exprs.len() - 1])? { - holes.push(hole); - if prev_hole.1 { - self.fill_split(prev_hole.0, None, Some(entry)); - } else { - self.fill(prev_hole.0, entry); - } - } else { - // We ignore prev_hole.1. When it's true, it means we have two - // empty branches both pushing prev_hole.0 into holes, so both - // branches will go to the same place anyway. - holes.push(prev_hole.0); - } - Ok(Some(Patch { hole: Hole::Many(holes), entry: first_split_entry })) - } - - fn c_repeat(&mut self, rep: &hir::Repetition) -> ResultOrEmpty { - use regex_syntax::hir::RepetitionKind::*; - match rep.kind { - ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy), - ZeroOrMore => self.c_repeat_zero_or_more(&rep.hir, rep.greedy), - OneOrMore => self.c_repeat_one_or_more(&rep.hir, rep.greedy), - Range(hir::RepetitionRange::Exactly(min_max)) => { - self.c_repeat_range(&rep.hir, rep.greedy, min_max, min_max) - } - Range(hir::RepetitionRange::AtLeast(min)) => { - self.c_repeat_range_min_or_more(&rep.hir, rep.greedy, min) - } - Range(hir::RepetitionRange::Bounded(min, max)) => { - self.c_repeat_range(&rep.hir, rep.greedy, min, max) - } - } - } - - fn c_repeat_zero_or_one( - &mut self, - expr: &Hir, - greedy: bool, - ) -> ResultOrEmpty { - let split_entry = self.insts.len(); - let split = self.push_split_hole(); - let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? { - Some(p) => p, - None => return self.pop_split_hole(), - }; - let split_hole = if greedy { - self.fill_split(split, Some(entry_rep), None) - } else { - self.fill_split(split, None, Some(entry_rep)) - }; - let holes = vec![hole_rep, split_hole]; - Ok(Some(Patch { hole: Hole::Many(holes), entry: split_entry })) - } - - fn c_repeat_zero_or_more( - &mut self, - expr: &Hir, - greedy: bool, - ) -> ResultOrEmpty { - let split_entry = self.insts.len(); - let split = self.push_split_hole(); - let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? { - Some(p) => p, - None => return self.pop_split_hole(), - }; - - self.fill(hole_rep, split_entry); - let split_hole = if greedy { - self.fill_split(split, Some(entry_rep), None) - } else { - self.fill_split(split, None, Some(entry_rep)) - }; - Ok(Some(Patch { hole: split_hole, entry: split_entry })) - } - - fn c_repeat_one_or_more( - &mut self, - expr: &Hir, - greedy: bool, - ) -> ResultOrEmpty { - let Patch { hole: hole_rep, entry: entry_rep } = match self.c(expr)? { - Some(p) => p, - None => return Ok(None), - }; - self.fill_to_next(hole_rep); - let split = self.push_split_hole(); - - let split_hole = if greedy { - self.fill_split(split, Some(entry_rep), None) - } else { - self.fill_split(split, None, Some(entry_rep)) - }; - Ok(Some(Patch { hole: split_hole, entry: entry_rep })) - } - - fn c_repeat_range_min_or_more( - &mut self, - expr: &Hir, - greedy: bool, - min: u32, - ) -> ResultOrEmpty { - let min = u32_to_usize(min); - // Using next_inst() is ok, because we can't return it (concat would - // have to return Some(_) while c_repeat_range_min_or_more returns - // None). - let patch_concat = self - .c_concat(iter::repeat(expr).take(min))? - .unwrap_or(self.next_inst()); - if let Some(patch_rep) = self.c_repeat_zero_or_more(expr, greedy)? { - self.fill(patch_concat.hole, patch_rep.entry); - Ok(Some(Patch { hole: patch_rep.hole, entry: patch_concat.entry })) - } else { - Ok(None) - } - } - - fn c_repeat_range( - &mut self, - expr: &Hir, - greedy: bool, - min: u32, - max: u32, - ) -> ResultOrEmpty { - let (min, max) = (u32_to_usize(min), u32_to_usize(max)); - debug_assert!(min <= max); - let patch_concat = self.c_concat(iter::repeat(expr).take(min))?; - if min == max { - return Ok(patch_concat); - } - // Same reasoning as in c_repeat_range_min_or_more (we know that min < - // max at this point). - let patch_concat = patch_concat.unwrap_or(self.next_inst()); - let initial_entry = patch_concat.entry; - // It is much simpler to compile, e.g., `a{2,5}` as: - // - // aaa?a?a? - // - // But you end up with a sequence of instructions like this: - // - // 0: 'a' - // 1: 'a', - // 2: split(3, 4) - // 3: 'a' - // 4: split(5, 6) - // 5: 'a' - // 6: split(7, 8) - // 7: 'a' - // 8: MATCH - // - // This is *incredibly* inefficient because the splits end - // up forming a chain, which has to be resolved everything a - // transition is followed. - let mut holes = vec![]; - let mut prev_hole = patch_concat.hole; - for _ in min..max { - self.fill_to_next(prev_hole); - let split = self.push_split_hole(); - let Patch { hole, entry } = match self.c(expr)? { - Some(p) => p, - None => return self.pop_split_hole(), - }; - prev_hole = hole; - if greedy { - holes.push(self.fill_split(split, Some(entry), None)); - } else { - holes.push(self.fill_split(split, None, Some(entry))); - } - } - holes.push(prev_hole); - Ok(Some(Patch { hole: Hole::Many(holes), entry: initial_entry })) - } - - /// Can be used as a default value for the c_* functions when the call to - /// c_function is followed by inserting at least one instruction that is - /// always executed after the ones written by the c* function. - fn next_inst(&self) -> Patch { - Patch { hole: Hole::None, entry: self.insts.len() } - } - - fn fill(&mut self, hole: Hole, goto: InstPtr) { - match hole { - Hole::None => {} - Hole::One(pc) => { - self.insts[pc].fill(goto); - } - Hole::Many(holes) => { - for hole in holes { - self.fill(hole, goto); - } - } - } - } - - fn fill_to_next(&mut self, hole: Hole) { - let next = self.insts.len(); - self.fill(hole, next); - } - - fn fill_split( - &mut self, - hole: Hole, - goto1: Option, - goto2: Option, - ) -> Hole { - match hole { - Hole::None => Hole::None, - Hole::One(pc) => match (goto1, goto2) { - (Some(goto1), Some(goto2)) => { - self.insts[pc].fill_split(goto1, goto2); - Hole::None - } - (Some(goto1), None) => { - self.insts[pc].half_fill_split_goto1(goto1); - Hole::One(pc) - } - (None, Some(goto2)) => { - self.insts[pc].half_fill_split_goto2(goto2); - Hole::One(pc) - } - (None, None) => unreachable!( - "at least one of the split \ - holes must be filled" - ), - }, - Hole::Many(holes) => { - let mut new_holes = vec![]; - for hole in holes { - new_holes.push(self.fill_split(hole, goto1, goto2)); - } - if new_holes.is_empty() { - Hole::None - } else if new_holes.len() == 1 { - new_holes.pop().unwrap() - } else { - Hole::Many(new_holes) - } - } - } - } - - fn push_compiled(&mut self, inst: Inst) { - self.insts.push(MaybeInst::Compiled(inst)); - } - - fn push_hole(&mut self, inst: InstHole) -> Hole { - let hole = self.insts.len(); - self.insts.push(MaybeInst::Uncompiled(inst)); - Hole::One(hole) - } - - fn push_split_hole(&mut self) -> Hole { - let hole = self.insts.len(); - self.insts.push(MaybeInst::Split); - Hole::One(hole) - } - - fn pop_split_hole(&mut self) -> ResultOrEmpty { - self.insts.pop(); - Ok(None) - } - - fn check_size(&self) -> result::Result<(), Error> { - use std::mem::size_of; - - let size = - self.extra_inst_bytes + (self.insts.len() * size_of::()); - if size > self.size_limit { - Err(Error::CompiledTooBig(self.size_limit)) - } else { - Ok(()) - } - } -} - -#[derive(Debug)] -enum Hole { - None, - One(InstPtr), - Many(Vec), -} - -impl Hole { - fn dup_one(self) -> (Self, Self) { - match self { - Hole::One(pc) => (Hole::One(pc), Hole::One(pc)), - Hole::None | Hole::Many(_) => { - unreachable!("must be called on single hole") - } - } - } -} - -#[derive(Clone, Debug)] -enum MaybeInst { - Compiled(Inst), - Uncompiled(InstHole), - Split, - Split1(InstPtr), - Split2(InstPtr), -} - -impl MaybeInst { - fn fill(&mut self, goto: InstPtr) { - let maybeinst = match *self { - MaybeInst::Split => MaybeInst::Split1(goto), - MaybeInst::Uncompiled(ref inst) => { - MaybeInst::Compiled(inst.fill(goto)) - } - MaybeInst::Split1(goto1) => { - MaybeInst::Compiled(Inst::Split(InstSplit { - goto1: goto1, - goto2: goto, - })) - } - MaybeInst::Split2(goto2) => { - MaybeInst::Compiled(Inst::Split(InstSplit { - goto1: goto, - goto2: goto2, - })) - } - _ => unreachable!( - "not all instructions were compiled! \ - found uncompiled instruction: {:?}", - self - ), - }; - *self = maybeinst; - } - - fn fill_split(&mut self, goto1: InstPtr, goto2: InstPtr) { - let filled = match *self { - MaybeInst::Split => { - Inst::Split(InstSplit { goto1: goto1, goto2: goto2 }) - } - _ => unreachable!( - "must be called on Split instruction, \ - instead it was called on: {:?}", - self - ), - }; - *self = MaybeInst::Compiled(filled); - } - - fn half_fill_split_goto1(&mut self, goto1: InstPtr) { - let half_filled = match *self { - MaybeInst::Split => goto1, - _ => unreachable!( - "must be called on Split instruction, \ - instead it was called on: {:?}", - self - ), - }; - *self = MaybeInst::Split1(half_filled); - } - - fn half_fill_split_goto2(&mut self, goto2: InstPtr) { - let half_filled = match *self { - MaybeInst::Split => goto2, - _ => unreachable!( - "must be called on Split instruction, \ - instead it was called on: {:?}", - self - ), - }; - *self = MaybeInst::Split2(half_filled); - } - - fn unwrap(self) -> Inst { - match self { - MaybeInst::Compiled(inst) => inst, - _ => unreachable!( - "must be called on a compiled instruction, \ - instead it was called on: {:?}", - self - ), - } - } -} - -#[derive(Clone, Debug)] -enum InstHole { - Save { slot: usize }, - EmptyLook { look: EmptyLook }, - Char { c: char }, - Ranges { ranges: Vec<(char, char)> }, - Bytes { start: u8, end: u8 }, -} - -impl InstHole { - fn fill(&self, goto: InstPtr) -> Inst { - match *self { - InstHole::Save { slot } => { - Inst::Save(InstSave { goto: goto, slot: slot }) - } - InstHole::EmptyLook { look } => { - Inst::EmptyLook(InstEmptyLook { goto: goto, look: look }) - } - InstHole::Char { c } => Inst::Char(InstChar { goto: goto, c: c }), - InstHole::Ranges { ref ranges } => Inst::Ranges(InstRanges { - goto: goto, - ranges: ranges.clone().into_boxed_slice(), - }), - InstHole::Bytes { start, end } => { - Inst::Bytes(InstBytes { goto: goto, start: start, end: end }) - } - } - } -} - -struct CompileClass<'a, 'b> { - c: &'a mut Compiler, - ranges: &'b [hir::ClassUnicodeRange], -} - -impl<'a, 'b> CompileClass<'a, 'b> { - fn compile(mut self) -> Result { - let mut holes = vec![]; - let mut initial_entry = None; - let mut last_split = Hole::None; - let mut utf8_seqs = self.c.utf8_seqs.take().unwrap(); - self.c.suffix_cache.clear(); - - for (i, range) in self.ranges.iter().enumerate() { - let is_last_range = i + 1 == self.ranges.len(); - utf8_seqs.reset(range.start(), range.end()); - let mut it = (&mut utf8_seqs).peekable(); - loop { - let utf8_seq = match it.next() { - None => break, - Some(utf8_seq) => utf8_seq, - }; - if is_last_range && it.peek().is_none() { - let Patch { hole, entry } = self.c_utf8_seq(&utf8_seq)?; - holes.push(hole); - self.c.fill(last_split, entry); - last_split = Hole::None; - if initial_entry.is_none() { - initial_entry = Some(entry); - } - } else { - if initial_entry.is_none() { - initial_entry = Some(self.c.insts.len()); - } - self.c.fill_to_next(last_split); - last_split = self.c.push_split_hole(); - let Patch { hole, entry } = self.c_utf8_seq(&utf8_seq)?; - holes.push(hole); - last_split = - self.c.fill_split(last_split, Some(entry), None); - } - } - } - self.c.utf8_seqs = Some(utf8_seqs); - Ok(Patch { hole: Hole::Many(holes), entry: initial_entry.unwrap() }) - } - - fn c_utf8_seq(&mut self, seq: &Utf8Sequence) -> Result { - if self.c.compiled.is_reverse { - self.c_utf8_seq_(seq) - } else { - self.c_utf8_seq_(seq.into_iter().rev()) - } - } - - fn c_utf8_seq_<'r, I>(&mut self, seq: I) -> Result - where - I: IntoIterator, - { - // The initial instruction for each UTF-8 sequence should be the same. - let mut from_inst = ::std::usize::MAX; - let mut last_hole = Hole::None; - for byte_range in seq { - let key = SuffixCacheKey { - from_inst: from_inst, - start: byte_range.start, - end: byte_range.end, - }; - { - let pc = self.c.insts.len(); - if let Some(cached_pc) = self.c.suffix_cache.get(key, pc) { - from_inst = cached_pc; - continue; - } - } - self.c.byte_classes.set_range(byte_range.start, byte_range.end); - if from_inst == ::std::usize::MAX { - last_hole = self.c.push_hole(InstHole::Bytes { - start: byte_range.start, - end: byte_range.end, - }); - } else { - self.c.push_compiled(Inst::Bytes(InstBytes { - goto: from_inst, - start: byte_range.start, - end: byte_range.end, - })); - } - from_inst = self.c.insts.len().checked_sub(1).unwrap(); - debug_assert!(from_inst < ::std::usize::MAX); - } - debug_assert!(from_inst < ::std::usize::MAX); - Ok(Patch { hole: last_hole, entry: from_inst }) - } -} - -/// `SuffixCache` is a simple bounded hash map for caching suffix entries in -/// UTF-8 automata. For example, consider the Unicode range \u{0}-\u{FFFF}. -/// The set of byte ranges looks like this: -/// -/// [0-7F] -/// [C2-DF][80-BF] -/// [E0][A0-BF][80-BF] -/// [E1-EC][80-BF][80-BF] -/// [ED][80-9F][80-BF] -/// [EE-EF][80-BF][80-BF] -/// -/// Each line above translates to one alternate in the compiled regex program. -/// However, all but one of the alternates end in the same suffix, which is -/// a waste of an instruction. The suffix cache facilitates reusing them across -/// alternates. -/// -/// Note that a HashMap could be trivially used for this, but we don't need its -/// overhead. Some small bounded space (LRU style) is more than enough. -/// -/// This uses similar idea to [`SparseSet`](../sparse/struct.SparseSet.html), -/// except it uses hashes as original indices and then compares full keys for -/// validation against `dense` array. -#[derive(Debug)] -struct SuffixCache { - sparse: Box<[usize]>, - dense: Vec, -} - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -struct SuffixCacheEntry { - key: SuffixCacheKey, - pc: InstPtr, -} - -#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] -struct SuffixCacheKey { - from_inst: InstPtr, - start: u8, - end: u8, -} - -impl SuffixCache { - fn new(size: usize) -> Self { - SuffixCache { - sparse: vec![0usize; size].into(), - dense: Vec::with_capacity(size), - } - } - - fn get(&mut self, key: SuffixCacheKey, pc: InstPtr) -> Option { - let hash = self.hash(&key); - let pos = &mut self.sparse[hash]; - if let Some(entry) = self.dense.get(*pos) { - if entry.key == key { - return Some(entry.pc); - } - } - *pos = self.dense.len(); - self.dense.push(SuffixCacheEntry { key: key, pc: pc }); - None - } - - fn clear(&mut self) { - self.dense.clear(); - } - - fn hash(&self, suffix: &SuffixCacheKey) -> usize { - // Basic FNV-1a hash as described: - // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function - const FNV_PRIME: u64 = 1099511628211; - let mut h = 14695981039346656037; - h = (h ^ (suffix.from_inst as u64)).wrapping_mul(FNV_PRIME); - h = (h ^ (suffix.start as u64)).wrapping_mul(FNV_PRIME); - h = (h ^ (suffix.end as u64)).wrapping_mul(FNV_PRIME); - (h as usize) % self.sparse.len() - } -} - -struct ByteClassSet([bool; 256]); - -impl ByteClassSet { - fn new() -> Self { - ByteClassSet([false; 256]) - } - - fn set_range(&mut self, start: u8, end: u8) { - debug_assert!(start <= end); - if start > 0 { - self.0[start as usize - 1] = true; - } - self.0[end as usize] = true; - } - - fn set_word_boundary(&mut self) { - // We need to mark all ranges of bytes whose pairs result in - // evaluating \b differently. - let iswb = is_word_byte; - let mut b1: u16 = 0; - let mut b2: u16; - while b1 <= 255 { - b2 = b1 + 1; - while b2 <= 255 && iswb(b1 as u8) == iswb(b2 as u8) { - b2 += 1; - } - self.set_range(b1 as u8, (b2 - 1) as u8); - b1 = b2; - } - } - - fn byte_classes(&self) -> Vec { - // N.B. If you're debugging the DFA, it's useful to simply return - // `(0..256).collect()`, which effectively removes the byte classes - // and makes the transitions easier to read. - // (0usize..256).map(|x| x as u8).collect() - let mut byte_classes = vec![0; 256]; - let mut class = 0u8; - let mut i = 0; - loop { - byte_classes[i] = class as u8; - if i >= 255 { - break; - } - if self.0[i] { - class = class.checked_add(1).unwrap(); - } - i += 1; - } - byte_classes - } -} - -impl fmt::Debug for ByteClassSet { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_tuple("ByteClassSet").field(&&self.0[..]).finish() - } -} - -fn u32_to_usize(n: u32) -> usize { - // In case usize is less than 32 bits, we need to guard against overflow. - // On most platforms this compiles to nothing. - // TODO Use `std::convert::TryFrom` once it's stable. - if (n as u64) > (::std::usize::MAX as u64) { - panic!("BUG: {} is too big to be pointer sized", n) - } - n as usize -} - -#[cfg(test)] -mod tests { - use super::ByteClassSet; - - #[test] - fn byte_classes() { - let mut set = ByteClassSet::new(); - set.set_range(b'a', b'z'); - let classes = set.byte_classes(); - assert_eq!(classes[0], 0); - assert_eq!(classes[1], 0); - assert_eq!(classes[2], 0); - assert_eq!(classes[b'a' as usize - 1], 0); - assert_eq!(classes[b'a' as usize], 1); - assert_eq!(classes[b'm' as usize], 1); - assert_eq!(classes[b'z' as usize], 1); - assert_eq!(classes[b'z' as usize + 1], 2); - assert_eq!(classes[254], 2); - assert_eq!(classes[255], 2); - - let mut set = ByteClassSet::new(); - set.set_range(0, 2); - set.set_range(4, 6); - let classes = set.byte_classes(); - assert_eq!(classes[0], 0); - assert_eq!(classes[1], 0); - assert_eq!(classes[2], 0); - assert_eq!(classes[3], 1); - assert_eq!(classes[4], 2); - assert_eq!(classes[5], 2); - assert_eq!(classes[6], 2); - assert_eq!(classes[7], 3); - assert_eq!(classes[255], 3); - } - - #[test] - fn full_byte_classes() { - let mut set = ByteClassSet::new(); - for i in 0..256u16 { - set.set_range(i as u8, i as u8); - } - assert_eq!(set.byte_classes().len(), 256); - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/dfa.rs b/collector/compile-benchmarks/regex-1.5.5/src/dfa.rs deleted file mode 100644 index 4aee8039c..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/dfa.rs +++ /dev/null @@ -1,1949 +0,0 @@ -/*! -The DFA matching engine. - -A DFA provides faster matching because the engine is in exactly one state at -any point in time. In the NFA, there may be multiple active states, and -considerable CPU cycles are spent shuffling them around. In finite automata -speak, the DFA follows epsilon transitions in the regex far less than the NFA. - -A DFA is a classic trade off between time and space. The NFA is slower, but -its memory requirements are typically small and predictable. The DFA is faster, -but given the right regex and the right input, the number of states in the -DFA can grow exponentially. To mitigate this space problem, we do two things: - -1. We implement an *online* DFA. That is, the DFA is constructed from the NFA - during a search. When a new state is computed, it is stored in a cache so - that it may be reused. An important consequence of this implementation - is that states that are never reached for a particular input are never - computed. (This is impossible in an "offline" DFA which needs to compute - all possible states up front.) -2. If the cache gets too big, we wipe it and continue matching. - -In pathological cases, a new state can be created for every byte of input. -(e.g., The regex `(a|b)*a(a|b){20}` on a long sequence of a's and b's.) -In this case, performance regresses to slightly slower than the full NFA -simulation, in large part because the cache becomes useless. If the cache -is wiped too frequently, the DFA quits and control falls back to one of the -NFA simulations. - -Because of the "lazy" nature of this DFA, the inner matching loop is -considerably more complex than one might expect out of a DFA. A number of -tricks are employed to make it fast. Tread carefully. - -N.B. While this implementation is heavily commented, Russ Cox's series of -articles on regexes is strongly recommended: https://swtch.com/~rsc/regexp/ -(As is the DFA implementation in RE2, which heavily influenced this -implementation.) -*/ - -use std::collections::HashMap; -use std::fmt; -use std::iter::repeat; -use std::mem; -use std::sync::Arc; - -use crate::exec::ProgramCache; -use crate::prog::{Inst, Program}; -use crate::sparse::SparseSet; - -/// Return true if and only if the given program can be executed by a DFA. -/// -/// Generally, a DFA is always possible. A pathological case where it is not -/// possible is if the number of NFA states exceeds `u32::MAX`, in which case, -/// this function will return false. -/// -/// This function will also return false if the given program has any Unicode -/// instructions (Char or Ranges) since the DFA operates on bytes only. -pub fn can_exec(insts: &Program) -> bool { - use crate::prog::Inst::*; - // If for some reason we manage to allocate a regex program with more - // than i32::MAX instructions, then we can't execute the DFA because we - // use 32 bit instruction pointer deltas for memory savings. - // If i32::MAX is the largest positive delta, - // then -i32::MAX == i32::MIN + 1 is the largest negative delta, - // and we are OK to use 32 bits. - if insts.dfa_size_limit == 0 || insts.len() > ::std::i32::MAX as usize { - return false; - } - for inst in insts { - match *inst { - Char(_) | Ranges(_) => return false, - EmptyLook(_) | Match(_) | Save(_) | Split(_) | Bytes(_) => {} - } - } - true -} - -/// A reusable cache of DFA states. -/// -/// This cache is reused between multiple invocations of the same regex -/// program. (It is not shared simultaneously between threads. If there is -/// contention, then new caches are created.) -#[derive(Debug)] -pub struct Cache { - /// Group persistent DFA related cache state together. The sparse sets - /// listed below are used as scratch space while computing uncached states. - inner: CacheInner, - /// qcur and qnext are ordered sets with constant time - /// addition/membership/clearing-whole-set and linear time iteration. They - /// are used to manage the sets of NFA states in DFA states when computing - /// cached DFA states. In particular, the order of the NFA states matters - /// for leftmost-first style matching. Namely, when computing a cached - /// state, the set of NFA states stops growing as soon as the first Match - /// instruction is observed. - qcur: SparseSet, - qnext: SparseSet, -} - -/// `CacheInner` is logically just a part of Cache, but groups together fields -/// that aren't passed as function parameters throughout search. (This split -/// is mostly an artifact of the borrow checker. It is happily paid.) -#[derive(Debug)] -struct CacheInner { - /// A cache of pre-compiled DFA states, keyed by the set of NFA states - /// and the set of empty-width flags set at the byte in the input when the - /// state was observed. - /// - /// A StatePtr is effectively a `*State`, but to avoid various inconvenient - /// things, we just pass indexes around manually. The performance impact of - /// this is probably an instruction or two in the inner loop. However, on - /// 64 bit, each StatePtr is half the size of a *State. - compiled: StateMap, - /// The transition table. - /// - /// The transition table is laid out in row-major order, where states are - /// rows and the transitions for each state are columns. At a high level, - /// given state `s` and byte `b`, the next state can be found at index - /// `s * 256 + b`. - /// - /// This is, of course, a lie. A StatePtr is actually a pointer to the - /// *start* of a row in this table. When indexing in the DFA's inner loop, - /// this removes the need to multiply the StatePtr by the stride. Yes, it - /// matters. This reduces the number of states we can store, but: the - /// stride is rarely 256 since we define transitions in terms of - /// *equivalence classes* of bytes. Each class corresponds to a set of - /// bytes that never discriminate a distinct path through the DFA from each - /// other. - trans: Transitions, - /// A set of cached start states, which are limited to the number of - /// permutations of flags set just before the initial byte of input. (The - /// index into this vec is a `EmptyFlags`.) - /// - /// N.B. A start state can be "dead" (i.e., no possible match), so we - /// represent it with a StatePtr. - start_states: Vec, - /// Stack scratch space used to follow epsilon transitions in the NFA. - /// (This permits us to avoid recursion.) - /// - /// The maximum stack size is the number of NFA states. - stack: Vec, - /// The total number of times this cache has been flushed by the DFA - /// because of space constraints. - flush_count: u64, - /// The total heap size of the DFA's cache. We use this to determine when - /// we should flush the cache. - size: usize, - /// Scratch space used when building instruction pointer lists for new - /// states. This helps amortize allocation. - insts_scratch_space: Vec, -} - -/// The transition table. -/// -/// It is laid out in row-major order, with states as rows and byte class -/// transitions as columns. -/// -/// The transition table is responsible for producing valid `StatePtrs`. A -/// `StatePtr` points to the start of a particular row in this table. When -/// indexing to find the next state this allows us to avoid a multiplication -/// when computing an index into the table. -#[derive(Clone)] -struct Transitions { - /// The table. - table: Vec, - /// The stride. - num_byte_classes: usize, -} - -/// Fsm encapsulates the actual execution of the DFA. -#[derive(Debug)] -pub struct Fsm<'a> { - /// prog contains the NFA instruction opcodes. DFA execution uses either - /// the `dfa` instructions or the `dfa_reverse` instructions from - /// `exec::ExecReadOnly`. (It never uses `ExecReadOnly.nfa`, which may have - /// Unicode opcodes that cannot be executed by the DFA.) - prog: &'a Program, - /// The start state. We record it here because the pointer may change - /// when the cache is wiped. - start: StatePtr, - /// The current position in the input. - at: usize, - /// Should we quit after seeing the first match? e.g., When the caller - /// uses `is_match` or `shortest_match`. - quit_after_match: bool, - /// The last state that matched. - /// - /// When no match has occurred, this is set to STATE_UNKNOWN. - /// - /// This is only useful when matching regex sets. The last match state - /// is useful because it contains all of the match instructions seen, - /// thereby allowing us to enumerate which regexes in the set matched. - last_match_si: StatePtr, - /// The input position of the last cache flush. We use this to determine - /// if we're thrashing in the cache too often. If so, the DFA quits so - /// that we can fall back to the NFA algorithm. - last_cache_flush: usize, - /// All cached DFA information that is persisted between searches. - cache: &'a mut CacheInner, -} - -/// The result of running the DFA. -/// -/// Generally, the result is either a match or not a match, but sometimes the -/// DFA runs too slowly because the cache size is too small. In that case, it -/// gives up with the intent of falling back to the NFA algorithm. -/// -/// The DFA can also give up if it runs out of room to create new states, or if -/// it sees non-ASCII bytes in the presence of a Unicode word boundary. -#[derive(Clone, Debug)] -pub enum Result { - Match(T), - NoMatch(usize), - Quit, -} - -impl Result { - /// Returns true if this result corresponds to a match. - pub fn is_match(&self) -> bool { - match *self { - Result::Match(_) => true, - Result::NoMatch(_) | Result::Quit => false, - } - } - - /// Maps the given function onto T and returns the result. - /// - /// If this isn't a match, then this is a no-op. - #[cfg(feature = "perf-literal")] - pub fn map U>(self, mut f: F) -> Result { - match self { - Result::Match(t) => Result::Match(f(t)), - Result::NoMatch(x) => Result::NoMatch(x), - Result::Quit => Result::Quit, - } - } - - /// Sets the non-match position. - /// - /// If this isn't a non-match, then this is a no-op. - fn set_non_match(self, at: usize) -> Result { - match self { - Result::NoMatch(_) => Result::NoMatch(at), - r => r, - } - } -} - -/// `State` is a DFA state. It contains an ordered set of NFA states (not -/// necessarily complete) and a smattering of flags. -/// -/// The flags are packed into the first byte of data. -/// -/// States don't carry their transitions. Instead, transitions are stored in -/// a single row-major table. -/// -/// Delta encoding is used to store the instruction pointers. -/// The first instruction pointer is stored directly starting -/// at data[1], and each following pointer is stored as an offset -/// to the previous one. If a delta is in the range -127..127, -/// it is packed into a single byte; Otherwise the byte 128 (-128 as an i8) -/// is coded as a flag, followed by 4 bytes encoding the delta. -#[derive(Clone, Eq, Hash, PartialEq)] -struct State { - data: Arc<[u8]>, -} - -/// `InstPtr` is a 32 bit pointer into a sequence of opcodes (i.e., it indexes -/// an NFA state). -/// -/// Throughout this library, this is usually set to `usize`, but we force a -/// `u32` here for the DFA to save on space. -type InstPtr = u32; - -/// Adds ip to data using delta encoding with respect to prev. -/// -/// After completion, `data` will contain `ip` and `prev` will be set to `ip`. -fn push_inst_ptr(data: &mut Vec, prev: &mut InstPtr, ip: InstPtr) { - let delta = (ip as i32) - (*prev as i32); - write_vari32(data, delta); - *prev = ip; -} - -struct InstPtrs<'a> { - base: usize, - data: &'a [u8], -} - -impl<'a> Iterator for InstPtrs<'a> { - type Item = usize; - - fn next(&mut self) -> Option { - if self.data.is_empty() { - return None; - } - let (delta, nread) = read_vari32(self.data); - let base = self.base as i32 + delta; - debug_assert!(base >= 0); - debug_assert!(nread > 0); - self.data = &self.data[nread..]; - self.base = base as usize; - Some(self.base) - } -} - -impl State { - fn flags(&self) -> StateFlags { - StateFlags(self.data[0]) - } - - fn inst_ptrs(&self) -> InstPtrs<'_> { - InstPtrs { base: 0, data: &self.data[1..] } - } -} - -/// `StatePtr` is a 32 bit pointer to the start of a row in the transition -/// table. -/// -/// It has many special values. There are two types of special values: -/// sentinels and flags. -/// -/// Sentinels corresponds to special states that carry some kind of -/// significance. There are three such states: unknown, dead and quit states. -/// -/// Unknown states are states that haven't been computed yet. They indicate -/// that a transition should be filled in that points to either an existing -/// cached state or a new state altogether. In general, an unknown state means -/// "follow the NFA's epsilon transitions." -/// -/// Dead states are states that can never lead to a match, no matter what -/// subsequent input is observed. This means that the DFA should quit -/// immediately and return the longest match it has found thus far. -/// -/// Quit states are states that imply the DFA is not capable of matching the -/// regex correctly. Currently, this is only used when a Unicode word boundary -/// exists in the regex *and* a non-ASCII byte is observed. -/// -/// The other type of state pointer is a state pointer with special flag bits. -/// There are two flags: a start flag and a match flag. The lower bits of both -/// kinds always contain a "valid" `StatePtr` (indicated by the `STATE_MAX` -/// mask). -/// -/// The start flag means that the state is a start state, and therefore may be -/// subject to special prefix scanning optimizations. -/// -/// The match flag means that the state is a match state, and therefore the -/// current position in the input (while searching) should be recorded. -/// -/// The above exists mostly in the service of making the inner loop fast. -/// In particular, the inner *inner* loop looks something like this: -/// -/// ```ignore -/// while state <= STATE_MAX and i < len(text): -/// state = state.next[i] -/// ``` -/// -/// This is nice because it lets us execute a lazy DFA as if it were an -/// entirely offline DFA (i.e., with very few instructions). The loop will -/// quit only when we need to examine a case that needs special attention. -type StatePtr = u32; - -/// An unknown state means that the state has not been computed yet, and that -/// the only way to progress is to compute it. -const STATE_UNKNOWN: StatePtr = 1 << 31; - -/// A dead state means that the state has been computed and it is known that -/// once it is entered, no future match can ever occur. -const STATE_DEAD: StatePtr = STATE_UNKNOWN + 1; - -/// A quit state means that the DFA came across some input that it doesn't -/// know how to process correctly. The DFA should quit and another matching -/// engine should be run in its place. -const STATE_QUIT: StatePtr = STATE_DEAD + 1; - -/// A start state is a state that the DFA can start in. -/// -/// Note that start states have their lower bits set to a state pointer. -const STATE_START: StatePtr = 1 << 30; - -/// A match state means that the regex has successfully matched. -/// -/// Note that match states have their lower bits set to a state pointer. -const STATE_MATCH: StatePtr = 1 << 29; - -/// The maximum state pointer. This is useful to mask out the "valid" state -/// pointer from a state with the "start" or "match" bits set. -/// -/// It doesn't make sense to use this with unknown, dead or quit state -/// pointers, since those pointers are sentinels and never have their lower -/// bits set to anything meaningful. -const STATE_MAX: StatePtr = STATE_MATCH - 1; - -/// Byte is a u8 in spirit, but a u16 in practice so that we can represent the -/// special EOF sentinel value. -#[derive(Copy, Clone, Debug)] -struct Byte(u16); - -/// A set of flags for zero-width assertions. -#[derive(Clone, Copy, Eq, Debug, Default, Hash, PartialEq)] -struct EmptyFlags { - start: bool, - end: bool, - start_line: bool, - end_line: bool, - word_boundary: bool, - not_word_boundary: bool, -} - -/// A set of flags describing various configurations of a DFA state. This is -/// represented by a `u8` so that it is compact. -#[derive(Clone, Copy, Eq, Default, Hash, PartialEq)] -struct StateFlags(u8); - -impl Cache { - /// Create new empty cache for the DFA engine. - pub fn new(prog: &Program) -> Self { - // We add 1 to account for the special EOF byte. - let num_byte_classes = (prog.byte_classes[255] as usize + 1) + 1; - let starts = vec![STATE_UNKNOWN; 256]; - let mut cache = Cache { - inner: CacheInner { - compiled: StateMap::new(num_byte_classes), - trans: Transitions::new(num_byte_classes), - start_states: starts, - stack: vec![], - flush_count: 0, - size: 0, - insts_scratch_space: vec![], - }, - qcur: SparseSet::new(prog.insts.len()), - qnext: SparseSet::new(prog.insts.len()), - }; - cache.inner.reset_size(); - cache - } -} - -impl CacheInner { - /// Resets the cache size to account for fixed costs, such as the program - /// and stack sizes. - fn reset_size(&mut self) { - self.size = (self.start_states.len() * mem::size_of::()) - + (self.stack.len() * mem::size_of::()); - } -} - -impl<'a> Fsm<'a> { - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn forward( - prog: &'a Program, - cache: &ProgramCache, - quit_after_match: bool, - text: &[u8], - at: usize, - ) -> Result { - let mut cache = cache.borrow_mut(); - let cache = &mut cache.dfa; - let mut dfa = Fsm { - prog: prog, - start: 0, // filled in below - at: at, - quit_after_match: quit_after_match, - last_match_si: STATE_UNKNOWN, - last_cache_flush: at, - cache: &mut cache.inner, - }; - let (empty_flags, state_flags) = dfa.start_flags(text, at); - dfa.start = - match dfa.start_state(&mut cache.qcur, empty_flags, state_flags) { - None => return Result::Quit, - Some(STATE_DEAD) => return Result::NoMatch(at), - Some(si) => si, - }; - debug_assert!(dfa.start != STATE_UNKNOWN); - dfa.exec_at(&mut cache.qcur, &mut cache.qnext, text) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn reverse( - prog: &'a Program, - cache: &ProgramCache, - quit_after_match: bool, - text: &[u8], - at: usize, - ) -> Result { - let mut cache = cache.borrow_mut(); - let cache = &mut cache.dfa_reverse; - let mut dfa = Fsm { - prog: prog, - start: 0, // filled in below - at: at, - quit_after_match: quit_after_match, - last_match_si: STATE_UNKNOWN, - last_cache_flush: at, - cache: &mut cache.inner, - }; - let (empty_flags, state_flags) = dfa.start_flags_reverse(text, at); - dfa.start = - match dfa.start_state(&mut cache.qcur, empty_flags, state_flags) { - None => return Result::Quit, - Some(STATE_DEAD) => return Result::NoMatch(at), - Some(si) => si, - }; - debug_assert!(dfa.start != STATE_UNKNOWN); - dfa.exec_at_reverse(&mut cache.qcur, &mut cache.qnext, text) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn forward_many( - prog: &'a Program, - cache: &ProgramCache, - matches: &mut [bool], - text: &[u8], - at: usize, - ) -> Result { - debug_assert!(matches.len() == prog.matches.len()); - let mut cache = cache.borrow_mut(); - let cache = &mut cache.dfa; - let mut dfa = Fsm { - prog: prog, - start: 0, // filled in below - at: at, - quit_after_match: false, - last_match_si: STATE_UNKNOWN, - last_cache_flush: at, - cache: &mut cache.inner, - }; - let (empty_flags, state_flags) = dfa.start_flags(text, at); - dfa.start = - match dfa.start_state(&mut cache.qcur, empty_flags, state_flags) { - None => return Result::Quit, - Some(STATE_DEAD) => return Result::NoMatch(at), - Some(si) => si, - }; - debug_assert!(dfa.start != STATE_UNKNOWN); - let result = dfa.exec_at(&mut cache.qcur, &mut cache.qnext, text); - if result.is_match() { - if matches.len() == 1 { - matches[0] = true; - } else { - debug_assert!(dfa.last_match_si != STATE_UNKNOWN); - debug_assert!(dfa.last_match_si != STATE_DEAD); - for ip in dfa.state(dfa.last_match_si).inst_ptrs() { - if let Inst::Match(slot) = dfa.prog[ip] { - matches[slot] = true; - } - } - } - } - result - } - - /// Executes the DFA on a forward NFA. - /// - /// {qcur,qnext} are scratch ordered sets which may be non-empty. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn exec_at( - &mut self, - qcur: &mut SparseSet, - qnext: &mut SparseSet, - text: &[u8], - ) -> Result { - // For the most part, the DFA is basically: - // - // last_match = null - // while current_byte != EOF: - // si = current_state.next[current_byte] - // if si is match - // last_match = si - // return last_match - // - // However, we need to deal with a few things: - // - // 1. This is an *online* DFA, so the current state's next list - // may not point to anywhere yet, so we must go out and compute - // them. (They are then cached into the current state's next list - // to avoid re-computation.) - // 2. If we come across a state that is known to be dead (i.e., never - // leads to a match), then we can quit early. - // 3. If the caller just wants to know if a match occurs, then we - // can quit as soon as we know we have a match. (Full leftmost - // first semantics require continuing on.) - // 4. If we're in the start state, then we can use a pre-computed set - // of prefix literals to skip quickly along the input. - // 5. After the input is exhausted, we run the DFA on one symbol - // that stands for EOF. This is useful for handling empty width - // assertions. - // 6. We can't actually do state.next[byte]. Instead, we have to do - // state.next[byte_classes[byte]], which permits us to keep the - // 'next' list very small. - // - // Since there's a bunch of extra stuff we need to consider, we do some - // pretty hairy tricks to get the inner loop to run as fast as - // possible. - debug_assert!(!self.prog.is_reverse); - - // The last match is the currently known ending match position. It is - // reported as an index to the most recent byte that resulted in a - // transition to a match state and is always stored in capture slot `1` - // when searching forwards. Its maximum value is `text.len()`. - let mut result = Result::NoMatch(self.at); - let (mut prev_si, mut next_si) = (self.start, self.start); - let mut at = self.at; - while at < text.len() { - // This is the real inner loop. We take advantage of special bits - // set in the state pointer to determine whether a state is in the - // "common" case or not. Specifically, the common case is a - // non-match non-start non-dead state that has already been - // computed. So long as we remain in the common case, this inner - // loop will chew through the input. - // - // We also unroll the loop 4 times to amortize the cost of checking - // whether we've consumed the entire input. We are also careful - // to make sure that `prev_si` always represents the previous state - // and `next_si` always represents the next state after the loop - // exits, even if it isn't always true inside the loop. - while next_si <= STATE_MAX && at < text.len() { - // Argument for safety is in the definition of next_si. - prev_si = unsafe { self.next_si(next_si, text, at) }; - at += 1; - if prev_si > STATE_MAX || at + 2 >= text.len() { - mem::swap(&mut prev_si, &mut next_si); - break; - } - next_si = unsafe { self.next_si(prev_si, text, at) }; - at += 1; - if next_si > STATE_MAX { - break; - } - prev_si = unsafe { self.next_si(next_si, text, at) }; - at += 1; - if prev_si > STATE_MAX { - mem::swap(&mut prev_si, &mut next_si); - break; - } - next_si = unsafe { self.next_si(prev_si, text, at) }; - at += 1; - } - if next_si & STATE_MATCH > 0 { - // A match state is outside of the common case because it needs - // special case analysis. In particular, we need to record the - // last position as having matched and possibly quit the DFA if - // we don't need to keep matching. - next_si &= !STATE_MATCH; - result = Result::Match(at - 1); - if self.quit_after_match { - return result; - } - self.last_match_si = next_si; - prev_si = next_si; - - // This permits short-circuiting when matching a regex set. - // In particular, if this DFA state contains only match states, - // then it's impossible to extend the set of matches since - // match states are final. Therefore, we can quit. - if self.prog.matches.len() > 1 { - let state = self.state(next_si); - let just_matches = - state.inst_ptrs().all(|ip| self.prog[ip].is_match()); - if just_matches { - return result; - } - } - - // Another inner loop! If the DFA stays in this particular - // match state, then we can rip through all of the input - // very quickly, and only recording the match location once - // we've left this particular state. - let cur = at; - while (next_si & !STATE_MATCH) == prev_si - && at + 2 < text.len() - { - // Argument for safety is in the definition of next_si. - next_si = unsafe { - self.next_si(next_si & !STATE_MATCH, text, at) - }; - at += 1; - } - if at > cur { - result = Result::Match(at - 2); - } - } else if next_si & STATE_START > 0 { - // A start state isn't in the common case because we may - // want to do quick prefix scanning. If the program doesn't - // have a detected prefix, then start states are actually - // considered common and this case is never reached. - debug_assert!(self.has_prefix()); - next_si &= !STATE_START; - prev_si = next_si; - at = match self.prefix_at(text, at) { - None => return Result::NoMatch(text.len()), - Some(i) => i, - }; - } else if next_si >= STATE_UNKNOWN { - if next_si == STATE_QUIT { - return Result::Quit; - } - // Finally, this corresponds to the case where the transition - // entered a state that can never lead to a match or a state - // that hasn't been computed yet. The latter being the "slow" - // path. - let byte = Byte::byte(text[at - 1]); - // We no longer care about the special bits in the state - // pointer. - prev_si &= STATE_MAX; - // Record where we are. This is used to track progress for - // determining whether we should quit if we've flushed the - // cache too much. - self.at = at; - next_si = match self.next_state(qcur, qnext, prev_si, byte) { - None => return Result::Quit, - Some(STATE_DEAD) => return result.set_non_match(at), - Some(si) => si, - }; - debug_assert!(next_si != STATE_UNKNOWN); - if next_si & STATE_MATCH > 0 { - next_si &= !STATE_MATCH; - result = Result::Match(at - 1); - if self.quit_after_match { - return result; - } - self.last_match_si = next_si; - } - prev_si = next_si; - } else { - prev_si = next_si; - } - } - - // Run the DFA once more on the special EOF sentinel value. - // We don't care about the special bits in the state pointer any more, - // so get rid of them. - prev_si &= STATE_MAX; - prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) { - None => return Result::Quit, - Some(STATE_DEAD) => return result.set_non_match(text.len()), - Some(si) => si & !STATE_START, - }; - debug_assert!(prev_si != STATE_UNKNOWN); - if prev_si & STATE_MATCH > 0 { - prev_si &= !STATE_MATCH; - self.last_match_si = prev_si; - result = Result::Match(text.len()); - } - result - } - - /// Executes the DFA on a reverse NFA. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn exec_at_reverse( - &mut self, - qcur: &mut SparseSet, - qnext: &mut SparseSet, - text: &[u8], - ) -> Result { - // The comments in `exec_at` above mostly apply here too. The main - // difference is that we move backwards over the input and we look for - // the longest possible match instead of the leftmost-first match. - // - // N.B. The code duplication here is regrettable. Efforts to improve - // it without sacrificing performance are welcome. ---AG - debug_assert!(self.prog.is_reverse); - let mut result = Result::NoMatch(self.at); - let (mut prev_si, mut next_si) = (self.start, self.start); - let mut at = self.at; - while at > 0 { - while next_si <= STATE_MAX && at > 0 { - // Argument for safety is in the definition of next_si. - at -= 1; - prev_si = unsafe { self.next_si(next_si, text, at) }; - if prev_si > STATE_MAX || at <= 4 { - mem::swap(&mut prev_si, &mut next_si); - break; - } - at -= 1; - next_si = unsafe { self.next_si(prev_si, text, at) }; - if next_si > STATE_MAX { - break; - } - at -= 1; - prev_si = unsafe { self.next_si(next_si, text, at) }; - if prev_si > STATE_MAX { - mem::swap(&mut prev_si, &mut next_si); - break; - } - at -= 1; - next_si = unsafe { self.next_si(prev_si, text, at) }; - } - if next_si & STATE_MATCH > 0 { - next_si &= !STATE_MATCH; - result = Result::Match(at + 1); - if self.quit_after_match { - return result; - } - self.last_match_si = next_si; - prev_si = next_si; - let cur = at; - while (next_si & !STATE_MATCH) == prev_si && at >= 2 { - // Argument for safety is in the definition of next_si. - at -= 1; - next_si = unsafe { - self.next_si(next_si & !STATE_MATCH, text, at) - }; - } - if at < cur { - result = Result::Match(at + 2); - } - } else if next_si >= STATE_UNKNOWN { - if next_si == STATE_QUIT { - return Result::Quit; - } - let byte = Byte::byte(text[at]); - prev_si &= STATE_MAX; - self.at = at; - next_si = match self.next_state(qcur, qnext, prev_si, byte) { - None => return Result::Quit, - Some(STATE_DEAD) => return result.set_non_match(at), - Some(si) => si, - }; - debug_assert!(next_si != STATE_UNKNOWN); - if next_si & STATE_MATCH > 0 { - next_si &= !STATE_MATCH; - result = Result::Match(at + 1); - if self.quit_after_match { - return result; - } - self.last_match_si = next_si; - } - prev_si = next_si; - } else { - prev_si = next_si; - } - } - - // Run the DFA once more on the special EOF sentinel value. - prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) { - None => return Result::Quit, - Some(STATE_DEAD) => return result.set_non_match(0), - Some(si) => si, - }; - debug_assert!(prev_si != STATE_UNKNOWN); - if prev_si & STATE_MATCH > 0 { - prev_si &= !STATE_MATCH; - self.last_match_si = prev_si; - result = Result::Match(0); - } - result - } - - /// next_si transitions to the next state, where the transition input - /// corresponds to text[i]. - /// - /// This elides bounds checks, and is therefore not safe. - #[cfg_attr(feature = "perf-inline", inline(always))] - unsafe fn next_si(&self, si: StatePtr, text: &[u8], i: usize) -> StatePtr { - // What is the argument for safety here? - // We have three unchecked accesses that could possibly violate safety: - // - // 1. The given byte of input (`text[i]`). - // 2. The class of the byte of input (`classes[text[i]]`). - // 3. The transition for the class (`trans[si + cls]`). - // - // (1) is only safe when calling next_si is guarded by - // `i < text.len()`. - // - // (2) is the easiest case to guarantee since `text[i]` is always a - // `u8` and `self.prog.byte_classes` always has length `u8::MAX`. - // (See `ByteClassSet.byte_classes` in `compile.rs`.) - // - // (3) is only safe if (1)+(2) are safe. Namely, the transitions - // of every state are defined to have length equal to the number of - // byte classes in the program. Therefore, a valid class leads to a - // valid transition. (All possible transitions are valid lookups, even - // if it points to a state that hasn't been computed yet.) (3) also - // relies on `si` being correct, but StatePtrs should only ever be - // retrieved from the transition table, which ensures they are correct. - debug_assert!(i < text.len()); - let b = *text.get_unchecked(i); - debug_assert!((b as usize) < self.prog.byte_classes.len()); - let cls = *self.prog.byte_classes.get_unchecked(b as usize); - self.cache.trans.next_unchecked(si, cls as usize) - } - - /// Computes the next state given the current state and the current input - /// byte (which may be EOF). - /// - /// If STATE_DEAD is returned, then there is no valid state transition. - /// This implies that no permutation of future input can lead to a match - /// state. - /// - /// STATE_UNKNOWN can never be returned. - fn exec_byte( - &mut self, - qcur: &mut SparseSet, - qnext: &mut SparseSet, - mut si: StatePtr, - b: Byte, - ) -> Option { - use crate::prog::Inst::*; - - // Initialize a queue with the current DFA state's NFA states. - qcur.clear(); - for ip in self.state(si).inst_ptrs() { - qcur.insert(ip); - } - - // Before inspecting the current byte, we may need to also inspect - // whether the position immediately preceding the current byte - // satisfies the empty assertions found in the current state. - // - // We only need to do this step if there are any empty assertions in - // the current state. - let is_word_last = self.state(si).flags().is_word(); - let is_word = b.is_ascii_word(); - if self.state(si).flags().has_empty() { - // Compute the flags immediately preceding the current byte. - // This means we only care about the "end" or "end line" flags. - // (The "start" flags are computed immediately following the - // current byte and are handled below.) - let mut flags = EmptyFlags::default(); - if b.is_eof() { - flags.end = true; - flags.end_line = true; - } else if b.as_byte().map_or(false, |b| b == b'\n') { - flags.end_line = true; - } - if is_word_last == is_word { - flags.not_word_boundary = true; - } else { - flags.word_boundary = true; - } - // Now follow epsilon transitions from every NFA state, but make - // sure we only follow transitions that satisfy our flags. - qnext.clear(); - for &ip in &*qcur { - self.follow_epsilons(usize_to_u32(ip), qnext, flags); - } - mem::swap(qcur, qnext); - } - - // Now we set flags for immediately after the current byte. Since start - // states are processed separately, and are the only states that can - // have the StartText flag set, we therefore only need to worry about - // the StartLine flag here. - // - // We do also keep track of whether this DFA state contains a NFA state - // that is a matching state. This is precisely how we delay the DFA - // matching by one byte in order to process the special EOF sentinel - // byte. Namely, if this DFA state containing a matching NFA state, - // then it is the *next* DFA state that is marked as a match. - let mut empty_flags = EmptyFlags::default(); - let mut state_flags = StateFlags::default(); - empty_flags.start_line = b.as_byte().map_or(false, |b| b == b'\n'); - if b.is_ascii_word() { - state_flags.set_word(); - } - // Now follow all epsilon transitions again, but only after consuming - // the current byte. - qnext.clear(); - for &ip in &*qcur { - match self.prog[ip as usize] { - // These states never happen in a byte-based program. - Char(_) | Ranges(_) => unreachable!(), - // These states are handled when following epsilon transitions. - Save(_) | Split(_) | EmptyLook(_) => {} - Match(_) => { - state_flags.set_match(); - if !self.continue_past_first_match() { - break; - } else if self.prog.matches.len() > 1 - && !qnext.contains(ip as usize) - { - // If we are continuing on to find other matches, - // then keep a record of the match states we've seen. - qnext.insert(ip); - } - } - Bytes(ref inst) => { - if b.as_byte().map_or(false, |b| inst.matches(b)) { - self.follow_epsilons( - inst.goto as InstPtr, - qnext, - empty_flags, - ); - } - } - } - } - - let cache = if b.is_eof() && self.prog.matches.len() > 1 { - // If we're processing the last byte of the input and we're - // matching a regex set, then make the next state contain the - // previous states transitions. We do this so that the main - // matching loop can extract all of the match instructions. - mem::swap(qcur, qnext); - // And don't cache this state because it's totally bunk. - false - } else { - true - }; - - // We've now built up the set of NFA states that ought to comprise the - // next DFA state, so try to find it in the cache, and if it doesn't - // exist, cache it. - // - // N.B. We pass `&mut si` here because the cache may clear itself if - // it has gotten too full. When that happens, the location of the - // current state may change. - let mut next = - match self.cached_state(qnext, state_flags, Some(&mut si)) { - None => return None, - Some(next) => next, - }; - if (self.start & !STATE_START) == next { - // Start states can never be match states since all matches are - // delayed by one byte. - debug_assert!(!self.state(next).flags().is_match()); - next = self.start_ptr(next); - } - if next <= STATE_MAX && self.state(next).flags().is_match() { - next |= STATE_MATCH; - } - debug_assert!(next != STATE_UNKNOWN); - // And now store our state in the current state's next list. - if cache { - let cls = self.byte_class(b); - self.cache.trans.set_next(si, cls, next); - } - Some(next) - } - - /// Follows the epsilon transitions starting at (and including) `ip`. The - /// resulting states are inserted into the ordered set `q`. - /// - /// Conditional epsilon transitions (i.e., empty width assertions) are only - /// followed if they are satisfied by the given flags, which should - /// represent the flags set at the current location in the input. - /// - /// If the current location corresponds to the empty string, then only the - /// end line and/or end text flags may be set. If the current location - /// corresponds to a real byte in the input, then only the start line - /// and/or start text flags may be set. - /// - /// As an exception to the above, when finding the initial state, any of - /// the above flags may be set: - /// - /// If matching starts at the beginning of the input, then start text and - /// start line should be set. If the input is empty, then end text and end - /// line should also be set. - /// - /// If matching starts after the beginning of the input, then only start - /// line should be set if the preceding byte is `\n`. End line should never - /// be set in this case. (Even if the following byte is a `\n`, it will - /// be handled in a subsequent DFA state.) - fn follow_epsilons( - &mut self, - ip: InstPtr, - q: &mut SparseSet, - flags: EmptyFlags, - ) { - use crate::prog::EmptyLook::*; - use crate::prog::Inst::*; - - // We need to traverse the NFA to follow epsilon transitions, so avoid - // recursion with an explicit stack. - self.cache.stack.push(ip); - while let Some(mut ip) = self.cache.stack.pop() { - // Try to munch through as many states as possible without - // pushes/pops to the stack. - loop { - // Don't visit states we've already added. - if q.contains(ip as usize) { - break; - } - q.insert(ip as usize); - match self.prog[ip as usize] { - Char(_) | Ranges(_) => unreachable!(), - Match(_) | Bytes(_) => { - break; - } - EmptyLook(ref inst) => { - // Only follow empty assertion states if our flags - // satisfy the assertion. - match inst.look { - StartLine if flags.start_line => { - ip = inst.goto as InstPtr; - } - EndLine if flags.end_line => { - ip = inst.goto as InstPtr; - } - StartText if flags.start => { - ip = inst.goto as InstPtr; - } - EndText if flags.end => { - ip = inst.goto as InstPtr; - } - WordBoundaryAscii if flags.word_boundary => { - ip = inst.goto as InstPtr; - } - NotWordBoundaryAscii - if flags.not_word_boundary => - { - ip = inst.goto as InstPtr; - } - WordBoundary if flags.word_boundary => { - ip = inst.goto as InstPtr; - } - NotWordBoundary if flags.not_word_boundary => { - ip = inst.goto as InstPtr; - } - StartLine | EndLine | StartText | EndText - | WordBoundaryAscii | NotWordBoundaryAscii - | WordBoundary | NotWordBoundary => { - break; - } - } - } - Save(ref inst) => { - ip = inst.goto as InstPtr; - } - Split(ref inst) => { - self.cache.stack.push(inst.goto2 as InstPtr); - ip = inst.goto1 as InstPtr; - } - } - } - } - } - - /// Find a previously computed state matching the given set of instructions - /// and is_match bool. - /// - /// The given set of instructions should represent a single state in the - /// NFA along with all states reachable without consuming any input. - /// - /// The is_match bool should be true if and only if the preceding DFA state - /// contains an NFA matching state. The cached state produced here will - /// then signify a match. (This enables us to delay a match by one byte, - /// in order to account for the EOF sentinel byte.) - /// - /// If the cache is full, then it is wiped before caching a new state. - /// - /// The current state should be specified if it exists, since it will need - /// to be preserved if the cache clears itself. (Start states are - /// always saved, so they should not be passed here.) It takes a mutable - /// pointer to the index because if the cache is cleared, the state's - /// location may change. - fn cached_state( - &mut self, - q: &SparseSet, - mut state_flags: StateFlags, - current_state: Option<&mut StatePtr>, - ) -> Option { - // If we couldn't come up with a non-empty key to represent this state, - // then it is dead and can never lead to a match. - // - // Note that inst_flags represent the set of empty width assertions - // in q. We use this as an optimization in exec_byte to determine when - // we should follow epsilon transitions at the empty string preceding - // the current byte. - let key = match self.cached_state_key(q, &mut state_flags) { - None => return Some(STATE_DEAD), - Some(v) => v, - }; - // In the cache? Cool. Done. - if let Some(si) = self.cache.compiled.get_ptr(&key) { - return Some(si); - } - // If the cache has gotten too big, wipe it. - if self.approximate_size() > self.prog.dfa_size_limit - && !self.clear_cache_and_save(current_state) - { - // Ooops. DFA is giving up. - return None; - } - // Allocate room for our state and add it. - self.add_state(key) - } - - /// Produces a key suitable for describing a state in the DFA cache. - /// - /// The key invariant here is that equivalent keys are produced for any two - /// sets of ordered NFA states (and toggling of whether the previous NFA - /// states contain a match state) that do not discriminate a match for any - /// input. - /// - /// Specifically, q should be an ordered set of NFA states and is_match - /// should be true if and only if the previous NFA states contained a match - /// state. - fn cached_state_key( - &mut self, - q: &SparseSet, - state_flags: &mut StateFlags, - ) -> Option { - use crate::prog::Inst::*; - - // We need to build up enough information to recognize pre-built states - // in the DFA. Generally speaking, this includes every instruction - // except for those which are purely epsilon transitions, e.g., the - // Save and Split instructions. - // - // Empty width assertions are also epsilon transitions, but since they - // are conditional, we need to make them part of a state's key in the - // cache. - - let mut insts = - mem::replace(&mut self.cache.insts_scratch_space, vec![]); - insts.clear(); - // Reserve 1 byte for flags. - insts.push(0); - - let mut prev = 0; - for &ip in q { - let ip = usize_to_u32(ip); - match self.prog[ip as usize] { - Char(_) | Ranges(_) => unreachable!(), - Save(_) | Split(_) => {} - Bytes(_) => push_inst_ptr(&mut insts, &mut prev, ip), - EmptyLook(_) => { - state_flags.set_empty(); - push_inst_ptr(&mut insts, &mut prev, ip) - } - Match(_) => { - push_inst_ptr(&mut insts, &mut prev, ip); - if !self.continue_past_first_match() { - break; - } - } - } - } - // If we couldn't transition to any other instructions and we didn't - // see a match when expanding NFA states previously, then this is a - // dead state and no amount of additional input can transition out - // of this state. - let opt_state = if insts.len() == 1 && !state_flags.is_match() { - None - } else { - let StateFlags(f) = *state_flags; - insts[0] = f; - Some(State { data: Arc::from(&*insts) }) - }; - self.cache.insts_scratch_space = insts; - opt_state - } - - /// Clears the cache, but saves and restores current_state if it is not - /// none. - /// - /// The current state must be provided here in case its location in the - /// cache changes. - /// - /// This returns false if the cache is not cleared and the DFA should - /// give up. - fn clear_cache_and_save( - &mut self, - current_state: Option<&mut StatePtr>, - ) -> bool { - if self.cache.compiled.is_empty() { - // Nothing to clear... - return true; - } - match current_state { - None => self.clear_cache(), - Some(si) => { - let cur = self.state(*si).clone(); - if !self.clear_cache() { - return false; - } - // The unwrap is OK because we just cleared the cache and - // therefore know that the next state pointer won't exceed - // STATE_MAX. - *si = self.restore_state(cur).unwrap(); - true - } - } - } - - /// Wipes the state cache, but saves and restores the current start state. - /// - /// This returns false if the cache is not cleared and the DFA should - /// give up. - fn clear_cache(&mut self) -> bool { - // Bail out of the DFA if we're moving too "slowly." - // A heuristic from RE2: assume the DFA is too slow if it is processing - // 10 or fewer bytes per state. - // Additionally, we permit the cache to be flushed a few times before - // caling it quits. - let nstates = self.cache.compiled.len(); - if self.cache.flush_count >= 3 - && self.at >= self.last_cache_flush - && (self.at - self.last_cache_flush) <= 10 * nstates - { - return false; - } - // Update statistics tracking cache flushes. - self.last_cache_flush = self.at; - self.cache.flush_count += 1; - - // OK, actually flush the cache. - let start = self.state(self.start & !STATE_START).clone(); - let last_match = if self.last_match_si <= STATE_MAX { - Some(self.state(self.last_match_si).clone()) - } else { - None - }; - self.cache.reset_size(); - self.cache.trans.clear(); - self.cache.compiled.clear(); - for s in &mut self.cache.start_states { - *s = STATE_UNKNOWN; - } - // The unwraps are OK because we just cleared the cache and therefore - // know that the next state pointer won't exceed STATE_MAX. - let start_ptr = self.restore_state(start).unwrap(); - self.start = self.start_ptr(start_ptr); - if let Some(last_match) = last_match { - self.last_match_si = self.restore_state(last_match).unwrap(); - } - true - } - - /// Restores the given state back into the cache, and returns a pointer - /// to it. - fn restore_state(&mut self, state: State) -> Option { - // If we've already stored this state, just return a pointer to it. - // None will be the wiser. - if let Some(si) = self.cache.compiled.get_ptr(&state) { - return Some(si); - } - self.add_state(state) - } - - /// Returns the next state given the current state si and current byte - /// b. {qcur,qnext} are used as scratch space for storing ordered NFA - /// states. - /// - /// This tries to fetch the next state from the cache, but if that fails, - /// it computes the next state, caches it and returns a pointer to it. - /// - /// The pointer can be to a real state, or it can be STATE_DEAD. - /// STATE_UNKNOWN cannot be returned. - /// - /// None is returned if a new state could not be allocated (i.e., the DFA - /// ran out of space and thinks it's running too slowly). - fn next_state( - &mut self, - qcur: &mut SparseSet, - qnext: &mut SparseSet, - si: StatePtr, - b: Byte, - ) -> Option { - if si == STATE_DEAD { - return Some(STATE_DEAD); - } - match self.cache.trans.next(si, self.byte_class(b)) { - STATE_UNKNOWN => self.exec_byte(qcur, qnext, si, b), - STATE_QUIT => None, - nsi => Some(nsi), - } - } - - /// Computes and returns the start state, where searching begins at - /// position `at` in `text`. If the state has already been computed, - /// then it is pulled from the cache. If the state hasn't been cached, - /// then it is computed, cached and a pointer to it is returned. - /// - /// This may return STATE_DEAD but never STATE_UNKNOWN. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn start_state( - &mut self, - q: &mut SparseSet, - empty_flags: EmptyFlags, - state_flags: StateFlags, - ) -> Option { - // Compute an index into our cache of start states based on the set - // of empty/state flags set at the current position in the input. We - // don't use every flag since not all flags matter. For example, since - // matches are delayed by one byte, start states can never be match - // states. - let flagi = { - (((empty_flags.start as u8) << 0) - | ((empty_flags.end as u8) << 1) - | ((empty_flags.start_line as u8) << 2) - | ((empty_flags.end_line as u8) << 3) - | ((empty_flags.word_boundary as u8) << 4) - | ((empty_flags.not_word_boundary as u8) << 5) - | ((state_flags.is_word() as u8) << 6)) as usize - }; - match self.cache.start_states[flagi] { - STATE_UNKNOWN => {} - si => return Some(si), - } - q.clear(); - let start = usize_to_u32(self.prog.start); - self.follow_epsilons(start, q, empty_flags); - // Start states can never be match states because we delay every match - // by one byte. Given an empty string and an empty match, the match - // won't actually occur until the DFA processes the special EOF - // sentinel byte. - let sp = match self.cached_state(q, state_flags, None) { - None => return None, - Some(sp) => self.start_ptr(sp), - }; - self.cache.start_states[flagi] = sp; - Some(sp) - } - - /// Computes the set of starting flags for the given position in text. - /// - /// This should only be used when executing the DFA forwards over the - /// input. - fn start_flags(&self, text: &[u8], at: usize) -> (EmptyFlags, StateFlags) { - let mut empty_flags = EmptyFlags::default(); - let mut state_flags = StateFlags::default(); - empty_flags.start = at == 0; - empty_flags.end = text.is_empty(); - empty_flags.start_line = at == 0 || text[at - 1] == b'\n'; - empty_flags.end_line = text.is_empty(); - - let is_word_last = at > 0 && Byte::byte(text[at - 1]).is_ascii_word(); - let is_word = at < text.len() && Byte::byte(text[at]).is_ascii_word(); - if is_word_last { - state_flags.set_word(); - } - if is_word == is_word_last { - empty_flags.not_word_boundary = true; - } else { - empty_flags.word_boundary = true; - } - (empty_flags, state_flags) - } - - /// Computes the set of starting flags for the given position in text. - /// - /// This should only be used when executing the DFA in reverse over the - /// input. - fn start_flags_reverse( - &self, - text: &[u8], - at: usize, - ) -> (EmptyFlags, StateFlags) { - let mut empty_flags = EmptyFlags::default(); - let mut state_flags = StateFlags::default(); - empty_flags.start = at == text.len(); - empty_flags.end = text.is_empty(); - empty_flags.start_line = at == text.len() || text[at] == b'\n'; - empty_flags.end_line = text.is_empty(); - - let is_word_last = - at < text.len() && Byte::byte(text[at]).is_ascii_word(); - let is_word = at > 0 && Byte::byte(text[at - 1]).is_ascii_word(); - if is_word_last { - state_flags.set_word(); - } - if is_word == is_word_last { - empty_flags.not_word_boundary = true; - } else { - empty_flags.word_boundary = true; - } - (empty_flags, state_flags) - } - - /// Returns a reference to a State given a pointer to it. - fn state(&self, si: StatePtr) -> &State { - self.cache.compiled.get_state(si).unwrap() - } - - /// Adds the given state to the DFA. - /// - /// This allocates room for transitions out of this state in - /// self.cache.trans. The transitions can be set with the returned - /// StatePtr. - /// - /// If None is returned, then the state limit was reached and the DFA - /// should quit. - fn add_state(&mut self, state: State) -> Option { - // This will fail if the next state pointer exceeds STATE_PTR. In - // practice, the cache limit will prevent us from ever getting here, - // but maybe callers will set the cache size to something ridiculous... - let si = match self.cache.trans.add() { - None => return None, - Some(si) => si, - }; - // If the program has a Unicode word boundary, then set any transitions - // for non-ASCII bytes to STATE_QUIT. If the DFA stumbles over such a - // transition, then it will quit and an alternative matching engine - // will take over. - if self.prog.has_unicode_word_boundary { - for b in 128..256 { - let cls = self.byte_class(Byte::byte(b as u8)); - self.cache.trans.set_next(si, cls, STATE_QUIT); - } - } - // Finally, put our actual state on to our heap of states and index it - // so we can find it later. - self.cache.size += self.cache.trans.state_heap_size() - + state.data.len() - + (2 * mem::size_of::()) - + mem::size_of::(); - self.cache.compiled.insert(state, si); - // Transition table and set of states and map should all be in sync. - debug_assert!( - self.cache.compiled.len() == self.cache.trans.num_states() - ); - Some(si) - } - - /// Quickly finds the next occurrence of any literal prefixes in the regex. - /// If there are no literal prefixes, then the current position is - /// returned. If there are literal prefixes and one could not be found, - /// then None is returned. - /// - /// This should only be called when the DFA is in a start state. - fn prefix_at(&self, text: &[u8], at: usize) -> Option { - self.prog.prefixes.find(&text[at..]).map(|(s, _)| at + s) - } - - /// Returns the number of byte classes required to discriminate transitions - /// in each state. - /// - /// invariant: num_byte_classes() == len(State.next) - fn num_byte_classes(&self) -> usize { - // We add 1 to account for the special EOF byte. - (self.prog.byte_classes[255] as usize + 1) + 1 - } - - /// Given an input byte or the special EOF sentinel, return its - /// corresponding byte class. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn byte_class(&self, b: Byte) -> usize { - match b.as_byte() { - None => self.num_byte_classes() - 1, - Some(b) => self.u8_class(b), - } - } - - /// Like byte_class, but explicitly for u8s. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn u8_class(&self, b: u8) -> usize { - self.prog.byte_classes[b as usize] as usize - } - - /// Returns true if the DFA should continue searching past the first match. - /// - /// Leftmost first semantics in the DFA are preserved by not following NFA - /// transitions after the first match is seen. - /// - /// On occasion, we want to avoid leftmost first semantics to find either - /// the longest match (for reverse search) or all possible matches (for - /// regex sets). - fn continue_past_first_match(&self) -> bool { - self.prog.is_reverse || self.prog.matches.len() > 1 - } - - /// Returns true if there is a prefix we can quickly search for. - fn has_prefix(&self) -> bool { - !self.prog.is_reverse - && !self.prog.prefixes.is_empty() - && !self.prog.is_anchored_start - } - - /// Sets the STATE_START bit in the given state pointer if and only if - /// we have a prefix to scan for. - /// - /// If there's no prefix, then it's a waste to treat the start state - /// specially. - fn start_ptr(&self, si: StatePtr) -> StatePtr { - if self.has_prefix() { - si | STATE_START - } else { - si - } - } - - /// Approximate size returns the approximate heap space currently used by - /// the DFA. It is used to determine whether the DFA's state cache needs to - /// be wiped. Namely, it is possible that for certain regexes on certain - /// inputs, a new state could be created for every byte of input. (This is - /// bad for memory use, so we bound it with a cache.) - fn approximate_size(&self) -> usize { - self.cache.size + self.prog.approximate_size() - } -} - -/// An abstraction for representing a map of states. The map supports two -/// different ways of state lookup. One is fast constant time access via a -/// state pointer. The other is a hashmap lookup based on the DFA's -/// constituent NFA states. -/// -/// A DFA state internally uses an Arc such that we only need to store the -/// set of NFA states on the heap once, even though we support looking up -/// states by two different means. A more natural way to express this might -/// use raw pointers, but an Arc is safe and effectively achieves the same -/// thing. -#[derive(Debug)] -struct StateMap { - /// The keys are not actually static but rely on always pointing to a - /// buffer in `states` which will never be moved except when clearing - /// the map or on drop, in which case the keys of this map will be - /// removed before - map: HashMap, - /// Our set of states. Note that `StatePtr / num_byte_classes` indexes - /// this Vec rather than just a `StatePtr`. - states: Vec, - /// The number of byte classes in the DFA. Used to index `states`. - num_byte_classes: usize, -} - -impl StateMap { - fn new(num_byte_classes: usize) -> StateMap { - StateMap { - map: HashMap::new(), - states: vec![], - num_byte_classes: num_byte_classes, - } - } - - fn len(&self) -> usize { - self.states.len() - } - - fn is_empty(&self) -> bool { - self.states.is_empty() - } - - fn get_ptr(&self, state: &State) -> Option { - self.map.get(state).cloned() - } - - fn get_state(&self, si: StatePtr) -> Option<&State> { - self.states.get(si as usize / self.num_byte_classes) - } - - fn insert(&mut self, state: State, si: StatePtr) { - self.map.insert(state.clone(), si); - self.states.push(state); - } - - fn clear(&mut self) { - self.map.clear(); - self.states.clear(); - } -} - -impl Transitions { - /// Create a new transition table. - /// - /// The number of byte classes corresponds to the stride. Every state will - /// have `num_byte_classes` slots for transitions. - fn new(num_byte_classes: usize) -> Transitions { - Transitions { table: vec![], num_byte_classes: num_byte_classes } - } - - /// Returns the total number of states currently in this table. - fn num_states(&self) -> usize { - self.table.len() / self.num_byte_classes - } - - /// Allocates room for one additional state and returns a pointer to it. - /// - /// If there's no more room, None is returned. - fn add(&mut self) -> Option { - let si = self.table.len(); - if si > STATE_MAX as usize { - return None; - } - self.table.extend(repeat(STATE_UNKNOWN).take(self.num_byte_classes)); - Some(usize_to_u32(si)) - } - - /// Clears the table of all states. - fn clear(&mut self) { - self.table.clear(); - } - - /// Sets the transition from (si, cls) to next. - fn set_next(&mut self, si: StatePtr, cls: usize, next: StatePtr) { - self.table[si as usize + cls] = next; - } - - /// Returns the transition corresponding to (si, cls). - fn next(&self, si: StatePtr, cls: usize) -> StatePtr { - self.table[si as usize + cls] - } - - /// The heap size, in bytes, of a single state in the transition table. - fn state_heap_size(&self) -> usize { - self.num_byte_classes * mem::size_of::() - } - - /// Like `next`, but uses unchecked access and is therefore not safe. - unsafe fn next_unchecked(&self, si: StatePtr, cls: usize) -> StatePtr { - debug_assert!((si as usize) < self.table.len()); - debug_assert!(cls < self.num_byte_classes); - *self.table.get_unchecked(si as usize + cls) - } -} - -impl StateFlags { - fn is_match(&self) -> bool { - self.0 & 0b0000000_1 > 0 - } - - fn set_match(&mut self) { - self.0 |= 0b0000000_1; - } - - fn is_word(&self) -> bool { - self.0 & 0b000000_1_0 > 0 - } - - fn set_word(&mut self) { - self.0 |= 0b000000_1_0; - } - - fn has_empty(&self) -> bool { - self.0 & 0b00000_1_00 > 0 - } - - fn set_empty(&mut self) { - self.0 |= 0b00000_1_00; - } -} - -impl Byte { - fn byte(b: u8) -> Self { - Byte(b as u16) - } - fn eof() -> Self { - Byte(256) - } - fn is_eof(&self) -> bool { - self.0 == 256 - } - - fn is_ascii_word(&self) -> bool { - let b = match self.as_byte() { - None => return false, - Some(b) => b, - }; - match b { - b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'_' => true, - _ => false, - } - } - - fn as_byte(&self) -> Option { - if self.is_eof() { - None - } else { - Some(self.0 as u8) - } - } -} - -impl fmt::Debug for State { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let ips: Vec = self.inst_ptrs().collect(); - f.debug_struct("State") - .field("flags", &self.flags()) - .field("insts", &ips) - .finish() - } -} - -impl fmt::Debug for Transitions { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut fmtd = f.debug_map(); - for si in 0..self.num_states() { - let s = si * self.num_byte_classes; - let e = s + self.num_byte_classes; - fmtd.entry(&si.to_string(), &TransitionsRow(&self.table[s..e])); - } - fmtd.finish() - } -} - -struct TransitionsRow<'a>(&'a [StatePtr]); - -impl<'a> fmt::Debug for TransitionsRow<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut fmtd = f.debug_map(); - for (b, si) in self.0.iter().enumerate() { - match *si { - STATE_UNKNOWN => {} - STATE_DEAD => { - fmtd.entry(&vb(b as usize), &"DEAD"); - } - si => { - fmtd.entry(&vb(b as usize), &si.to_string()); - } - } - } - fmtd.finish() - } -} - -impl fmt::Debug for StateFlags { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("StateFlags") - .field("is_match", &self.is_match()) - .field("is_word", &self.is_word()) - .field("has_empty", &self.has_empty()) - .finish() - } -} - -/// Helper function for formatting a byte as a nice-to-read escaped string. -fn vb(b: usize) -> String { - use std::ascii::escape_default; - - if b > ::std::u8::MAX as usize { - "EOF".to_owned() - } else { - let escaped = escape_default(b as u8).collect::>(); - String::from_utf8_lossy(&escaped).into_owned() - } -} - -fn usize_to_u32(n: usize) -> u32 { - if (n as u64) > (::std::u32::MAX as u64) { - panic!("BUG: {} is too big to fit into u32", n) - } - n as u32 -} - -#[allow(dead_code)] // useful for debugging -fn show_state_ptr(si: StatePtr) -> String { - let mut s = format!("{:?}", si & STATE_MAX); - if si == STATE_UNKNOWN { - s = format!("{} (unknown)", s); - } - if si == STATE_DEAD { - s = format!("{} (dead)", s); - } - if si == STATE_QUIT { - s = format!("{} (quit)", s); - } - if si & STATE_START > 0 { - s = format!("{} (start)", s); - } - if si & STATE_MATCH > 0 { - s = format!("{} (match)", s); - } - s -} - -/// https://developers.google.com/protocol-buffers/docs/encoding#varints -fn write_vari32(data: &mut Vec, n: i32) { - let mut un = (n as u32) << 1; - if n < 0 { - un = !un; - } - write_varu32(data, un) -} - -/// https://developers.google.com/protocol-buffers/docs/encoding#varints -fn read_vari32(data: &[u8]) -> (i32, usize) { - let (un, i) = read_varu32(data); - let mut n = (un >> 1) as i32; - if un & 1 != 0 { - n = !n; - } - (n, i) -} - -/// https://developers.google.com/protocol-buffers/docs/encoding#varints -fn write_varu32(data: &mut Vec, mut n: u32) { - while n >= 0b1000_0000 { - data.push((n as u8) | 0b1000_0000); - n >>= 7; - } - data.push(n as u8); -} - -/// https://developers.google.com/protocol-buffers/docs/encoding#varints -fn read_varu32(data: &[u8]) -> (u32, usize) { - let mut n: u32 = 0; - let mut shift: u32 = 0; - for (i, &b) in data.iter().enumerate() { - if b < 0b1000_0000 { - return (n | ((b as u32) << shift), i + 1); - } - n |= ((b as u32) & 0b0111_1111) << shift; - shift += 7; - } - (0, 0) -} - -#[cfg(test)] -mod tests { - - use super::{ - push_inst_ptr, read_vari32, read_varu32, write_vari32, write_varu32, - State, StateFlags, - }; - use quickcheck::{quickcheck, Gen, QuickCheck}; - use std::sync::Arc; - - #[test] - fn prop_state_encode_decode() { - fn p(mut ips: Vec, flags: u8) -> bool { - // It looks like our encoding scheme can't handle instruction - // pointers at or above 2**31. We should fix that, but it seems - // unlikely to occur in real code due to the amount of memory - // required for such a state machine. So for now, we just clamp - // our test data. - for ip in &mut ips { - if *ip >= 1 << 31 { - *ip = (1 << 31) - 1; - } - } - let mut data = vec![flags]; - let mut prev = 0; - for &ip in ips.iter() { - push_inst_ptr(&mut data, &mut prev, ip); - } - let state = State { data: Arc::from(&data[..]) }; - - let expected: Vec = - ips.into_iter().map(|ip| ip as usize).collect(); - let got: Vec = state.inst_ptrs().collect(); - expected == got && state.flags() == StateFlags(flags) - } - QuickCheck::new() - .gen(Gen::new(10_000)) - .quickcheck(p as fn(Vec, u8) -> bool); - } - - #[test] - fn prop_read_write_u32() { - fn p(n: u32) -> bool { - let mut buf = vec![]; - write_varu32(&mut buf, n); - let (got, nread) = read_varu32(&buf); - nread == buf.len() && got == n - } - quickcheck(p as fn(u32) -> bool); - } - - #[test] - fn prop_read_write_i32() { - fn p(n: i32) -> bool { - let mut buf = vec![]; - write_vari32(&mut buf, n); - let (got, nread) = read_vari32(&buf); - nread == buf.len() && got == n - } - quickcheck(p as fn(i32) -> bool); - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/error.rs b/collector/compile-benchmarks/regex-1.5.5/src/error.rs deleted file mode 100644 index 3e0ec7521..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/error.rs +++ /dev/null @@ -1,71 +0,0 @@ -use std::fmt; -use std::iter::repeat; - -/// An error that occurred during parsing or compiling a regular expression. -#[derive(Clone, PartialEq)] -pub enum Error { - /// A syntax error. - Syntax(String), - /// The compiled program exceeded the set size limit. - /// The argument is the size limit imposed. - CompiledTooBig(usize), - /// Hints that destructuring should not be exhaustive. - /// - /// This enum may grow additional variants, so this makes sure clients - /// don't count on exhaustive matching. (Otherwise, adding a new variant - /// could break existing code.) - #[doc(hidden)] - __Nonexhaustive, -} - -impl ::std::error::Error for Error { - // TODO: Remove this method entirely on the next breaking semver release. - #[allow(deprecated)] - fn description(&self) -> &str { - match *self { - Error::Syntax(ref err) => err, - Error::CompiledTooBig(_) => "compiled program too big", - Error::__Nonexhaustive => unreachable!(), - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match *self { - Error::Syntax(ref err) => err.fmt(f), - Error::CompiledTooBig(limit) => write!( - f, - "Compiled regex exceeds size limit of {} bytes.", - limit - ), - Error::__Nonexhaustive => unreachable!(), - } - } -} - -// We implement our own Debug implementation so that we show nicer syntax -// errors when people use `Regex::new(...).unwrap()`. It's a little weird, -// but the `Syntax` variant is already storing a `String` anyway, so we might -// as well format it nicely. -impl fmt::Debug for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match *self { - Error::Syntax(ref err) => { - let hr: String = repeat('~').take(79).collect(); - writeln!(f, "Syntax(")?; - writeln!(f, "{}", hr)?; - writeln!(f, "{}", err)?; - writeln!(f, "{}", hr)?; - write!(f, ")")?; - Ok(()) - } - Error::CompiledTooBig(limit) => { - f.debug_tuple("CompiledTooBig").field(&limit).finish() - } - Error::__Nonexhaustive => { - f.debug_tuple("__Nonexhaustive").finish() - } - } - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/exec.rs b/collector/compile-benchmarks/regex-1.5.5/src/exec.rs deleted file mode 100644 index d5fad1c0e..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/exec.rs +++ /dev/null @@ -1,1655 +0,0 @@ -use std::cell::RefCell; -use std::collections::HashMap; -use std::panic::AssertUnwindSafe; -use std::sync::Arc; - -#[cfg(feature = "perf-literal")] -use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind}; -use regex_syntax::hir::literal::Literals; -use regex_syntax::hir::Hir; -use regex_syntax::ParserBuilder; - -use crate::backtrack; -use crate::compile::Compiler; -#[cfg(feature = "perf-dfa")] -use crate::dfa; -use crate::error::Error; -use crate::input::{ByteInput, CharInput}; -use crate::literal::LiteralSearcher; -use crate::pikevm; -use crate::pool::{Pool, PoolGuard}; -use crate::prog::Program; -use crate::re_builder::RegexOptions; -use crate::re_bytes; -use crate::re_set; -use crate::re_trait::{Locations, RegularExpression, Slot}; -use crate::re_unicode; -use crate::utf8::next_utf8; - -/// `Exec` manages the execution of a regular expression. -/// -/// In particular, this manages the various compiled forms of a single regular -/// expression and the choice of which matching engine to use to execute a -/// regular expression. -#[derive(Debug)] -pub struct Exec { - /// All read only state. - ro: Arc, - /// A pool of reusable values for the various matching engines. - /// - /// Note that boxing this value is not strictly necessary, but it is an - /// easy way to ensure that T does not bloat the stack sized used by a pool - /// in the case where T is big. And this turns out to be the case at the - /// time of writing for regex's use of this pool. At the time of writing, - /// the size of a Regex on the stack is 856 bytes. Boxing this value - /// reduces that size to 16 bytes. - pool: Box>, -} - -/// `ExecNoSync` is like `Exec`, except it embeds a reference to a cache. This -/// means it is no longer Sync, but we can now avoid the overhead of -/// synchronization to fetch the cache. -#[derive(Debug)] -pub struct ExecNoSync<'c> { - /// All read only state. - ro: &'c Arc, - /// Caches for the various matching engines. - cache: PoolGuard<'c, ProgramCache>, -} - -/// `ExecNoSyncStr` is like `ExecNoSync`, but matches on &str instead of &[u8]. -#[derive(Debug)] -pub struct ExecNoSyncStr<'c>(ExecNoSync<'c>); - -/// `ExecReadOnly` comprises all read only state for a regex. Namely, all such -/// state is determined at compile time and never changes during search. -#[derive(Debug)] -struct ExecReadOnly { - /// The original regular expressions given by the caller to compile. - res: Vec, - /// A compiled program that is used in the NFA simulation and backtracking. - /// It can be byte-based or Unicode codepoint based. - /// - /// N.B. It is not possibly to make this byte-based from the public API. - /// It is only used for testing byte based programs in the NFA simulations. - nfa: Program, - /// A compiled byte based program for DFA execution. This is only used - /// if a DFA can be executed. (Currently, only word boundary assertions are - /// not supported.) Note that this program contains an embedded `.*?` - /// preceding the first capture group, unless the regex is anchored at the - /// beginning. - dfa: Program, - /// The same as above, except the program is reversed (and there is no - /// preceding `.*?`). This is used by the DFA to find the starting location - /// of matches. - dfa_reverse: Program, - /// A set of suffix literals extracted from the regex. - /// - /// Prefix literals are stored on the `Program`, since they are used inside - /// the matching engines. - suffixes: LiteralSearcher, - /// An Aho-Corasick automaton with leftmost-first match semantics. - /// - /// This is only set when the entire regex is a simple unanchored - /// alternation of literals. We could probably use it more circumstances, - /// but this is already hacky enough in this architecture. - /// - /// N.B. We use u32 as a state ID representation under the assumption that - /// if we were to exhaust the ID space, we probably would have long - /// surpassed the compilation size limit. - #[cfg(feature = "perf-literal")] - ac: Option>, - /// match_type encodes as much upfront knowledge about how we're going to - /// execute a search as possible. - match_type: MatchType, -} - -/// Facilitates the construction of an executor by exposing various knobs -/// to control how a regex is executed and what kinds of resources it's -/// permitted to use. -// `ExecBuilder` is only public via the `internal` module, so avoid deriving -// `Debug`. -#[allow(missing_debug_implementations)] -pub struct ExecBuilder { - options: RegexOptions, - match_type: Option, - bytes: bool, - only_utf8: bool, -} - -/// Parsed represents a set of parsed regular expressions and their detected -/// literals. -struct Parsed { - exprs: Vec, - prefixes: Literals, - suffixes: Literals, - bytes: bool, -} - -impl ExecBuilder { - /// Create a regex execution builder. - /// - /// This uses default settings for everything except the regex itself, - /// which must be provided. Further knobs can be set by calling methods, - /// and then finally, `build` to actually create the executor. - pub fn new(re: &str) -> Self { - Self::new_many(&[re]) - } - - /// Like new, but compiles the union of the given regular expressions. - /// - /// Note that when compiling 2 or more regular expressions, capture groups - /// are completely unsupported. (This means both `find` and `captures` - /// won't work.) - pub fn new_many(res: I) -> Self - where - S: AsRef, - I: IntoIterator, - { - let mut opts = RegexOptions::default(); - opts.pats = res.into_iter().map(|s| s.as_ref().to_owned()).collect(); - Self::new_options(opts) - } - - /// Create a regex execution builder. - pub fn new_options(opts: RegexOptions) -> Self { - ExecBuilder { - options: opts, - match_type: None, - bytes: false, - only_utf8: true, - } - } - - /// Set the matching engine to be automatically determined. - /// - /// This is the default state and will apply whatever optimizations are - /// possible, such as running a DFA. - /// - /// This overrides whatever was previously set via the `nfa` or - /// `bounded_backtracking` methods. - pub fn automatic(mut self) -> Self { - self.match_type = None; - self - } - - /// Sets the matching engine to use the NFA algorithm no matter what - /// optimizations are possible. - /// - /// This overrides whatever was previously set via the `automatic` or - /// `bounded_backtracking` methods. - pub fn nfa(mut self) -> Self { - self.match_type = Some(MatchType::Nfa(MatchNfaType::PikeVM)); - self - } - - /// Sets the matching engine to use a bounded backtracking engine no - /// matter what optimizations are possible. - /// - /// One must use this with care, since the bounded backtracking engine - /// uses memory proportion to `len(regex) * len(text)`. - /// - /// This overrides whatever was previously set via the `automatic` or - /// `nfa` methods. - pub fn bounded_backtracking(mut self) -> Self { - self.match_type = Some(MatchType::Nfa(MatchNfaType::Backtrack)); - self - } - - /// Compiles byte based programs for use with the NFA matching engines. - /// - /// By default, the NFA engines match on Unicode scalar values. They can - /// be made to use byte based programs instead. In general, the byte based - /// programs are slower because of a less efficient encoding of character - /// classes. - /// - /// Note that this does not impact DFA matching engines, which always - /// execute on bytes. - pub fn bytes(mut self, yes: bool) -> Self { - self.bytes = yes; - self - } - - /// When disabled, the program compiled may match arbitrary bytes. - /// - /// When enabled (the default), all compiled programs exclusively match - /// valid UTF-8 bytes. - pub fn only_utf8(mut self, yes: bool) -> Self { - self.only_utf8 = yes; - self - } - - /// Set the Unicode flag. - pub fn unicode(mut self, yes: bool) -> Self { - self.options.unicode = yes; - self - } - - /// Parse the current set of patterns into their AST and extract literals. - fn parse(&self) -> Result { - let mut exprs = Vec::with_capacity(self.options.pats.len()); - let mut prefixes = Some(Literals::empty()); - let mut suffixes = Some(Literals::empty()); - let mut bytes = false; - let is_set = self.options.pats.len() > 1; - // If we're compiling a regex set and that set has any anchored - // expressions, then disable all literal optimizations. - for pat in &self.options.pats { - let mut parser = ParserBuilder::new() - .octal(self.options.octal) - .case_insensitive(self.options.case_insensitive) - .multi_line(self.options.multi_line) - .dot_matches_new_line(self.options.dot_matches_new_line) - .swap_greed(self.options.swap_greed) - .ignore_whitespace(self.options.ignore_whitespace) - .unicode(self.options.unicode) - .allow_invalid_utf8(!self.only_utf8) - .nest_limit(self.options.nest_limit) - .build(); - let expr = - parser.parse(pat).map_err(|e| Error::Syntax(e.to_string()))?; - bytes = bytes || !expr.is_always_utf8(); - - if cfg!(feature = "perf-literal") { - if !expr.is_anchored_start() && expr.is_any_anchored_start() { - // Partial anchors unfortunately make it hard to use - // prefixes, so disable them. - prefixes = None; - } else if is_set && expr.is_anchored_start() { - // Regex sets with anchors do not go well with literal - // optimizations. - prefixes = None; - } - prefixes = prefixes.and_then(|mut prefixes| { - if !prefixes.union_prefixes(&expr) { - None - } else { - Some(prefixes) - } - }); - - if !expr.is_anchored_end() && expr.is_any_anchored_end() { - // Partial anchors unfortunately make it hard to use - // suffixes, so disable them. - suffixes = None; - } else if is_set && expr.is_anchored_end() { - // Regex sets with anchors do not go well with literal - // optimizations. - suffixes = None; - } - suffixes = suffixes.and_then(|mut suffixes| { - if !suffixes.union_suffixes(&expr) { - None - } else { - Some(suffixes) - } - }); - } - exprs.push(expr); - } - Ok(Parsed { - exprs: exprs, - prefixes: prefixes.unwrap_or_else(Literals::empty), - suffixes: suffixes.unwrap_or_else(Literals::empty), - bytes: bytes, - }) - } - - /// Build an executor that can run a regular expression. - pub fn build(self) -> Result { - // Special case when we have no patterns to compile. - // This can happen when compiling a regex set. - if self.options.pats.is_empty() { - let ro = Arc::new(ExecReadOnly { - res: vec![], - nfa: Program::new(), - dfa: Program::new(), - dfa_reverse: Program::new(), - suffixes: LiteralSearcher::empty(), - #[cfg(feature = "perf-literal")] - ac: None, - match_type: MatchType::Nothing, - }); - let pool = ExecReadOnly::new_pool(&ro); - return Ok(Exec { ro: ro, pool }); - } - let parsed = self.parse()?; - let mut nfa = Compiler::new() - .size_limit(self.options.size_limit) - .bytes(self.bytes || parsed.bytes) - .only_utf8(self.only_utf8) - .compile(&parsed.exprs)?; - let mut dfa = Compiler::new() - .size_limit(self.options.size_limit) - .dfa(true) - .only_utf8(self.only_utf8) - .compile(&parsed.exprs)?; - let mut dfa_reverse = Compiler::new() - .size_limit(self.options.size_limit) - .dfa(true) - .only_utf8(self.only_utf8) - .reverse(true) - .compile(&parsed.exprs)?; - - #[cfg(feature = "perf-literal")] - let ac = self.build_aho_corasick(&parsed); - nfa.prefixes = LiteralSearcher::prefixes(parsed.prefixes); - dfa.prefixes = nfa.prefixes.clone(); - dfa.dfa_size_limit = self.options.dfa_size_limit; - dfa_reverse.dfa_size_limit = self.options.dfa_size_limit; - - let mut ro = ExecReadOnly { - res: self.options.pats, - nfa: nfa, - dfa: dfa, - dfa_reverse: dfa_reverse, - suffixes: LiteralSearcher::suffixes(parsed.suffixes), - #[cfg(feature = "perf-literal")] - ac: ac, - match_type: MatchType::Nothing, - }; - ro.match_type = ro.choose_match_type(self.match_type); - - let ro = Arc::new(ro); - let pool = ExecReadOnly::new_pool(&ro); - Ok(Exec { ro, pool }) - } - - #[cfg(feature = "perf-literal")] - fn build_aho_corasick(&self, parsed: &Parsed) -> Option> { - if parsed.exprs.len() != 1 { - return None; - } - let lits = match alternation_literals(&parsed.exprs[0]) { - None => return None, - Some(lits) => lits, - }; - // If we have a small number of literals, then let Teddy handle - // things (see literal/mod.rs). - if lits.len() <= 32 { - return None; - } - Some( - AhoCorasickBuilder::new() - .match_kind(MatchKind::LeftmostFirst) - .auto_configure(&lits) - .build_with_size::(&lits) - // This should never happen because we'd long exceed the - // compilation limit for regexes first. - .expect("AC automaton too big"), - ) - } -} - -impl<'c> RegularExpression for ExecNoSyncStr<'c> { - type Text = str; - - fn slots_len(&self) -> usize { - self.0.slots_len() - } - - fn next_after_empty(&self, text: &str, i: usize) -> usize { - next_utf8(text.as_bytes(), i) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn shortest_match_at(&self, text: &str, start: usize) -> Option { - self.0.shortest_match_at(text.as_bytes(), start) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn is_match_at(&self, text: &str, start: usize) -> bool { - self.0.is_match_at(text.as_bytes(), start) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_at(&self, text: &str, start: usize) -> Option<(usize, usize)> { - self.0.find_at(text.as_bytes(), start) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn captures_read_at( - &self, - locs: &mut Locations, - text: &str, - start: usize, - ) -> Option<(usize, usize)> { - self.0.captures_read_at(locs, text.as_bytes(), start) - } -} - -impl<'c> RegularExpression for ExecNoSync<'c> { - type Text = [u8]; - - /// Returns the number of capture slots in the regular expression. (There - /// are two slots for every capture group, corresponding to possibly empty - /// start and end locations of the capture.) - fn slots_len(&self) -> usize { - self.ro.nfa.captures.len() * 2 - } - - fn next_after_empty(&self, _text: &[u8], i: usize) -> usize { - i + 1 - } - - /// Returns the end of a match location, possibly occurring before the - /// end location of the correct leftmost-first match. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn shortest_match_at(&self, text: &[u8], start: usize) -> Option { - if !self.is_anchor_end_match(text) { - return None; - } - match self.ro.match_type { - #[cfg(feature = "perf-literal")] - MatchType::Literal(ty) => { - self.find_literals(ty, text, start).map(|(_, e)| e) - } - #[cfg(feature = "perf-dfa")] - MatchType::Dfa | MatchType::DfaMany => { - match self.shortest_dfa(text, start) { - dfa::Result::Match(end) => Some(end), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.shortest_nfa(text, start), - } - } - #[cfg(feature = "perf-dfa")] - MatchType::DfaAnchoredReverse => { - match dfa::Fsm::reverse( - &self.ro.dfa_reverse, - self.cache.value(), - true, - &text[start..], - text.len(), - ) { - dfa::Result::Match(_) => Some(text.len()), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.shortest_nfa(text, start), - } - } - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - MatchType::DfaSuffix => { - match self.shortest_dfa_reverse_suffix(text, start) { - dfa::Result::Match(e) => Some(e), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.shortest_nfa(text, start), - } - } - MatchType::Nfa(ty) => self.shortest_nfa_type(ty, text, start), - MatchType::Nothing => None, - } - } - - /// Returns true if and only if the regex matches text. - /// - /// For single regular expressions, this is equivalent to calling - /// shortest_match(...).is_some(). - #[cfg_attr(feature = "perf-inline", inline(always))] - fn is_match_at(&self, text: &[u8], start: usize) -> bool { - if !self.is_anchor_end_match(text) { - return false; - } - // We need to do this dance because shortest_match relies on the NFA - // filling in captures[1], but a RegexSet has no captures. In other - // words, a RegexSet can't (currently) use shortest_match. ---AG - match self.ro.match_type { - #[cfg(feature = "perf-literal")] - MatchType::Literal(ty) => { - self.find_literals(ty, text, start).is_some() - } - #[cfg(feature = "perf-dfa")] - MatchType::Dfa | MatchType::DfaMany => { - match self.shortest_dfa(text, start) { - dfa::Result::Match(_) => true, - dfa::Result::NoMatch(_) => false, - dfa::Result::Quit => self.match_nfa(text, start), - } - } - #[cfg(feature = "perf-dfa")] - MatchType::DfaAnchoredReverse => { - match dfa::Fsm::reverse( - &self.ro.dfa_reverse, - self.cache.value(), - true, - &text[start..], - text.len(), - ) { - dfa::Result::Match(_) => true, - dfa::Result::NoMatch(_) => false, - dfa::Result::Quit => self.match_nfa(text, start), - } - } - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - MatchType::DfaSuffix => { - match self.shortest_dfa_reverse_suffix(text, start) { - dfa::Result::Match(_) => true, - dfa::Result::NoMatch(_) => false, - dfa::Result::Quit => self.match_nfa(text, start), - } - } - MatchType::Nfa(ty) => self.match_nfa_type(ty, text, start), - MatchType::Nothing => false, - } - } - - /// Finds the start and end location of the leftmost-first match, starting - /// at the given location. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_at(&self, text: &[u8], start: usize) -> Option<(usize, usize)> { - if !self.is_anchor_end_match(text) { - return None; - } - match self.ro.match_type { - #[cfg(feature = "perf-literal")] - MatchType::Literal(ty) => self.find_literals(ty, text, start), - #[cfg(feature = "perf-dfa")] - MatchType::Dfa => match self.find_dfa_forward(text, start) { - dfa::Result::Match((s, e)) => Some((s, e)), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => { - self.find_nfa(MatchNfaType::Auto, text, start) - } - }, - #[cfg(feature = "perf-dfa")] - MatchType::DfaAnchoredReverse => { - match self.find_dfa_anchored_reverse(text, start) { - dfa::Result::Match((s, e)) => Some((s, e)), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => { - self.find_nfa(MatchNfaType::Auto, text, start) - } - } - } - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - MatchType::DfaSuffix => { - match self.find_dfa_reverse_suffix(text, start) { - dfa::Result::Match((s, e)) => Some((s, e)), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => { - self.find_nfa(MatchNfaType::Auto, text, start) - } - } - } - MatchType::Nfa(ty) => self.find_nfa(ty, text, start), - MatchType::Nothing => None, - #[cfg(feature = "perf-dfa")] - MatchType::DfaMany => { - unreachable!("BUG: RegexSet cannot be used with find") - } - } - } - - /// Finds the start and end location of the leftmost-first match and also - /// fills in all matching capture groups. - /// - /// The number of capture slots given should be equal to the total number - /// of capture slots in the compiled program. - /// - /// Note that the first two slots always correspond to the start and end - /// locations of the overall match. - fn captures_read_at( - &self, - locs: &mut Locations, - text: &[u8], - start: usize, - ) -> Option<(usize, usize)> { - let slots = locs.as_slots(); - for slot in slots.iter_mut() { - *slot = None; - } - // If the caller unnecessarily uses this, then we try to save them - // from themselves. - match slots.len() { - 0 => return self.find_at(text, start), - 2 => { - return self.find_at(text, start).map(|(s, e)| { - slots[0] = Some(s); - slots[1] = Some(e); - (s, e) - }); - } - _ => {} // fallthrough - } - if !self.is_anchor_end_match(text) { - return None; - } - match self.ro.match_type { - #[cfg(feature = "perf-literal")] - MatchType::Literal(ty) => { - self.find_literals(ty, text, start).and_then(|(s, e)| { - self.captures_nfa_type( - MatchNfaType::Auto, - slots, - text, - s, - e, - ) - }) - } - #[cfg(feature = "perf-dfa")] - MatchType::Dfa => { - if self.ro.nfa.is_anchored_start { - self.captures_nfa(slots, text, start) - } else { - match self.find_dfa_forward(text, start) { - dfa::Result::Match((s, e)) => self.captures_nfa_type( - MatchNfaType::Auto, - slots, - text, - s, - e, - ), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => { - self.captures_nfa(slots, text, start) - } - } - } - } - #[cfg(feature = "perf-dfa")] - MatchType::DfaAnchoredReverse => { - match self.find_dfa_anchored_reverse(text, start) { - dfa::Result::Match((s, e)) => self.captures_nfa_type( - MatchNfaType::Auto, - slots, - text, - s, - e, - ), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.captures_nfa(slots, text, start), - } - } - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - MatchType::DfaSuffix => { - match self.find_dfa_reverse_suffix(text, start) { - dfa::Result::Match((s, e)) => self.captures_nfa_type( - MatchNfaType::Auto, - slots, - text, - s, - e, - ), - dfa::Result::NoMatch(_) => None, - dfa::Result::Quit => self.captures_nfa(slots, text, start), - } - } - MatchType::Nfa(ty) => { - self.captures_nfa_type(ty, slots, text, start, text.len()) - } - MatchType::Nothing => None, - #[cfg(feature = "perf-dfa")] - MatchType::DfaMany => { - unreachable!("BUG: RegexSet cannot be used with captures") - } - } - } -} - -impl<'c> ExecNoSync<'c> { - /// Finds the leftmost-first match using only literal search. - #[cfg(feature = "perf-literal")] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_literals( - &self, - ty: MatchLiteralType, - text: &[u8], - start: usize, - ) -> Option<(usize, usize)> { - use self::MatchLiteralType::*; - match ty { - Unanchored => { - let lits = &self.ro.nfa.prefixes; - lits.find(&text[start..]).map(|(s, e)| (start + s, start + e)) - } - AnchoredStart => { - let lits = &self.ro.nfa.prefixes; - if start == 0 || !self.ro.nfa.is_anchored_start { - lits.find_start(&text[start..]) - .map(|(s, e)| (start + s, start + e)) - } else { - None - } - } - AnchoredEnd => { - let lits = &self.ro.suffixes; - lits.find_end(&text[start..]) - .map(|(s, e)| (start + s, start + e)) - } - AhoCorasick => self - .ro - .ac - .as_ref() - .unwrap() - .find(&text[start..]) - .map(|m| (start + m.start(), start + m.end())), - } - } - - /// Finds the leftmost-first match (start and end) using only the DFA. - /// - /// If the result returned indicates that the DFA quit, then another - /// matching engine should be used. - #[cfg(feature = "perf-dfa")] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_dfa_forward( - &self, - text: &[u8], - start: usize, - ) -> dfa::Result<(usize, usize)> { - use crate::dfa::Result::*; - let end = match dfa::Fsm::forward( - &self.ro.dfa, - self.cache.value(), - false, - text, - start, - ) { - NoMatch(i) => return NoMatch(i), - Quit => return Quit, - Match(end) if start == end => return Match((start, start)), - Match(end) => end, - }; - // Now run the DFA in reverse to find the start of the match. - match dfa::Fsm::reverse( - &self.ro.dfa_reverse, - self.cache.value(), - false, - &text[start..], - end - start, - ) { - Match(s) => Match((start + s, end)), - NoMatch(i) => NoMatch(i), - Quit => Quit, - } - } - - /// Finds the leftmost-first match (start and end) using only the DFA, - /// but assumes the regex is anchored at the end and therefore starts at - /// the end of the regex and matches in reverse. - /// - /// If the result returned indicates that the DFA quit, then another - /// matching engine should be used. - #[cfg(feature = "perf-dfa")] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_dfa_anchored_reverse( - &self, - text: &[u8], - start: usize, - ) -> dfa::Result<(usize, usize)> { - use crate::dfa::Result::*; - match dfa::Fsm::reverse( - &self.ro.dfa_reverse, - self.cache.value(), - false, - &text[start..], - text.len() - start, - ) { - Match(s) => Match((start + s, text.len())), - NoMatch(i) => NoMatch(i), - Quit => Quit, - } - } - - /// Finds the end of the shortest match using only the DFA. - #[cfg(feature = "perf-dfa")] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn shortest_dfa(&self, text: &[u8], start: usize) -> dfa::Result { - dfa::Fsm::forward(&self.ro.dfa, self.cache.value(), true, text, start) - } - - /// Finds the end of the shortest match using only the DFA by scanning for - /// suffix literals. - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn shortest_dfa_reverse_suffix( - &self, - text: &[u8], - start: usize, - ) -> dfa::Result { - match self.exec_dfa_reverse_suffix(text, start) { - None => self.shortest_dfa(text, start), - Some(r) => r.map(|(_, end)| end), - } - } - - /// Finds the end of the shortest match using only the DFA by scanning for - /// suffix literals. It also reports the start of the match. - /// - /// Note that if None is returned, then the optimization gave up to avoid - /// worst case quadratic behavior. A forward scanning DFA should be tried - /// next. - /// - /// If a match is returned and the full leftmost-first match is desired, - /// then a forward scan starting from the beginning of the match must be - /// done. - /// - /// If the result returned indicates that the DFA quit, then another - /// matching engine should be used. - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn exec_dfa_reverse_suffix( - &self, - text: &[u8], - original_start: usize, - ) -> Option> { - use crate::dfa::Result::*; - - let lcs = self.ro.suffixes.lcs(); - debug_assert!(lcs.len() >= 1); - let mut start = original_start; - let mut end = start; - let mut last_literal = start; - while end <= text.len() { - last_literal += match lcs.find(&text[last_literal..]) { - None => return Some(NoMatch(text.len())), - Some(i) => i, - }; - end = last_literal + lcs.len(); - match dfa::Fsm::reverse( - &self.ro.dfa_reverse, - self.cache.value(), - false, - &text[start..end], - end - start, - ) { - Match(0) | NoMatch(0) => return None, - Match(i) => return Some(Match((start + i, end))), - NoMatch(i) => { - start += i; - last_literal += 1; - continue; - } - Quit => return Some(Quit), - }; - } - Some(NoMatch(text.len())) - } - - /// Finds the leftmost-first match (start and end) using only the DFA - /// by scanning for suffix literals. - /// - /// If the result returned indicates that the DFA quit, then another - /// matching engine should be used. - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find_dfa_reverse_suffix( - &self, - text: &[u8], - start: usize, - ) -> dfa::Result<(usize, usize)> { - use crate::dfa::Result::*; - - let match_start = match self.exec_dfa_reverse_suffix(text, start) { - None => return self.find_dfa_forward(text, start), - Some(Match((start, _))) => start, - Some(r) => return r, - }; - // At this point, we've found a match. The only way to quit now - // without a match is if the DFA gives up (seems unlikely). - // - // Now run the DFA forwards to find the proper end of the match. - // (The suffix literal match can only indicate the earliest - // possible end location, which may appear before the end of the - // leftmost-first match.) - match dfa::Fsm::forward( - &self.ro.dfa, - self.cache.value(), - false, - text, - match_start, - ) { - NoMatch(_) => panic!("BUG: reverse match implies forward match"), - Quit => Quit, - Match(e) => Match((match_start, e)), - } - } - - /// Executes the NFA engine to return whether there is a match or not. - /// - /// Ideally, we could use shortest_nfa(...).is_some() and get the same - /// performance characteristics, but regex sets don't have captures, which - /// shortest_nfa depends on. - #[cfg(feature = "perf-dfa")] - fn match_nfa(&self, text: &[u8], start: usize) -> bool { - self.match_nfa_type(MatchNfaType::Auto, text, start) - } - - /// Like match_nfa, but allows specification of the type of NFA engine. - fn match_nfa_type( - &self, - ty: MatchNfaType, - text: &[u8], - start: usize, - ) -> bool { - self.exec_nfa( - ty, - &mut [false], - &mut [], - true, - false, - text, - start, - text.len(), - ) - } - - /// Finds the shortest match using an NFA. - #[cfg(feature = "perf-dfa")] - fn shortest_nfa(&self, text: &[u8], start: usize) -> Option { - self.shortest_nfa_type(MatchNfaType::Auto, text, start) - } - - /// Like shortest_nfa, but allows specification of the type of NFA engine. - fn shortest_nfa_type( - &self, - ty: MatchNfaType, - text: &[u8], - start: usize, - ) -> Option { - let mut slots = [None, None]; - if self.exec_nfa( - ty, - &mut [false], - &mut slots, - true, - true, - text, - start, - text.len(), - ) { - slots[1] - } else { - None - } - } - - /// Like find, but executes an NFA engine. - fn find_nfa( - &self, - ty: MatchNfaType, - text: &[u8], - start: usize, - ) -> Option<(usize, usize)> { - let mut slots = [None, None]; - if self.exec_nfa( - ty, - &mut [false], - &mut slots, - false, - false, - text, - start, - text.len(), - ) { - match (slots[0], slots[1]) { - (Some(s), Some(e)) => Some((s, e)), - _ => None, - } - } else { - None - } - } - - /// Like find_nfa, but fills in captures. - /// - /// `slots` should have length equal to `2 * nfa.captures.len()`. - #[cfg(feature = "perf-dfa")] - fn captures_nfa( - &self, - slots: &mut [Slot], - text: &[u8], - start: usize, - ) -> Option<(usize, usize)> { - self.captures_nfa_type( - MatchNfaType::Auto, - slots, - text, - start, - text.len(), - ) - } - - /// Like captures_nfa, but allows specification of type of NFA engine. - fn captures_nfa_type( - &self, - ty: MatchNfaType, - slots: &mut [Slot], - text: &[u8], - start: usize, - end: usize, - ) -> Option<(usize, usize)> { - if self.exec_nfa( - ty, - &mut [false], - slots, - false, - false, - text, - start, - end, - ) { - match (slots[0], slots[1]) { - (Some(s), Some(e)) => Some((s, e)), - _ => None, - } - } else { - None - } - } - - fn exec_nfa( - &self, - mut ty: MatchNfaType, - matches: &mut [bool], - slots: &mut [Slot], - quit_after_match: bool, - quit_after_match_with_pos: bool, - text: &[u8], - start: usize, - end: usize, - ) -> bool { - use self::MatchNfaType::*; - if let Auto = ty { - if backtrack::should_exec(self.ro.nfa.len(), text.len()) { - ty = Backtrack; - } else { - ty = PikeVM; - } - } - // The backtracker can't return the shortest match position as it is - // implemented today. So if someone calls `shortest_match` and we need - // to run an NFA, then use the PikeVM. - if quit_after_match_with_pos || ty == PikeVM { - self.exec_pikevm( - matches, - slots, - quit_after_match, - text, - start, - end, - ) - } else { - self.exec_backtrack(matches, slots, text, start, end) - } - } - - /// Always run the NFA algorithm. - fn exec_pikevm( - &self, - matches: &mut [bool], - slots: &mut [Slot], - quit_after_match: bool, - text: &[u8], - start: usize, - end: usize, - ) -> bool { - if self.ro.nfa.uses_bytes() { - pikevm::Fsm::exec( - &self.ro.nfa, - self.cache.value(), - matches, - slots, - quit_after_match, - ByteInput::new(text, self.ro.nfa.only_utf8), - start, - end, - ) - } else { - pikevm::Fsm::exec( - &self.ro.nfa, - self.cache.value(), - matches, - slots, - quit_after_match, - CharInput::new(text), - start, - end, - ) - } - } - - /// Always runs the NFA using bounded backtracking. - fn exec_backtrack( - &self, - matches: &mut [bool], - slots: &mut [Slot], - text: &[u8], - start: usize, - end: usize, - ) -> bool { - if self.ro.nfa.uses_bytes() { - backtrack::Bounded::exec( - &self.ro.nfa, - self.cache.value(), - matches, - slots, - ByteInput::new(text, self.ro.nfa.only_utf8), - start, - end, - ) - } else { - backtrack::Bounded::exec( - &self.ro.nfa, - self.cache.value(), - matches, - slots, - CharInput::new(text), - start, - end, - ) - } - } - - /// Finds which regular expressions match the given text. - /// - /// `matches` should have length equal to the number of regexes being - /// searched. - /// - /// This is only useful when one wants to know which regexes in a set - /// match some text. - pub fn many_matches_at( - &self, - matches: &mut [bool], - text: &[u8], - start: usize, - ) -> bool { - use self::MatchType::*; - if !self.is_anchor_end_match(text) { - return false; - } - match self.ro.match_type { - #[cfg(feature = "perf-literal")] - Literal(ty) => { - debug_assert_eq!(matches.len(), 1); - matches[0] = self.find_literals(ty, text, start).is_some(); - matches[0] - } - #[cfg(feature = "perf-dfa")] - Dfa | DfaAnchoredReverse | DfaMany => { - match dfa::Fsm::forward_many( - &self.ro.dfa, - self.cache.value(), - matches, - text, - start, - ) { - dfa::Result::Match(_) => true, - dfa::Result::NoMatch(_) => false, - dfa::Result::Quit => self.exec_nfa( - MatchNfaType::Auto, - matches, - &mut [], - false, - false, - text, - start, - text.len(), - ), - } - } - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - DfaSuffix => { - match dfa::Fsm::forward_many( - &self.ro.dfa, - self.cache.value(), - matches, - text, - start, - ) { - dfa::Result::Match(_) => true, - dfa::Result::NoMatch(_) => false, - dfa::Result::Quit => self.exec_nfa( - MatchNfaType::Auto, - matches, - &mut [], - false, - false, - text, - start, - text.len(), - ), - } - } - Nfa(ty) => self.exec_nfa( - ty, - matches, - &mut [], - false, - false, - text, - start, - text.len(), - ), - Nothing => false, - } - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn is_anchor_end_match(&self, text: &[u8]) -> bool { - #[cfg(not(feature = "perf-literal"))] - fn imp(_: &ExecReadOnly, _: &[u8]) -> bool { - true - } - - #[cfg(feature = "perf-literal")] - fn imp(ro: &ExecReadOnly, text: &[u8]) -> bool { - // Only do this check if the haystack is big (>1MB). - if text.len() > (1 << 20) && ro.nfa.is_anchored_end { - let lcs = ro.suffixes.lcs(); - if lcs.len() >= 1 && !lcs.is_suffix(text) { - return false; - } - } - true - } - - imp(&self.ro, text) - } - - pub fn capture_name_idx(&self) -> &Arc> { - &self.ro.nfa.capture_name_idx - } -} - -impl<'c> ExecNoSyncStr<'c> { - pub fn capture_name_idx(&self) -> &Arc> { - self.0.capture_name_idx() - } -} - -impl Exec { - /// Get a searcher that isn't Sync. - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn searcher(&self) -> ExecNoSync<'_> { - ExecNoSync { - ro: &self.ro, // a clone is too expensive here! (and not needed) - cache: self.pool.get(), - } - } - - /// Get a searcher that isn't Sync and can match on &str. - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn searcher_str(&self) -> ExecNoSyncStr<'_> { - ExecNoSyncStr(self.searcher()) - } - - /// Build a Regex from this executor. - pub fn into_regex(self) -> re_unicode::Regex { - re_unicode::Regex::from(self) - } - - /// Build a RegexSet from this executor. - pub fn into_regex_set(self) -> re_set::unicode::RegexSet { - re_set::unicode::RegexSet::from(self) - } - - /// Build a Regex from this executor that can match arbitrary bytes. - pub fn into_byte_regex(self) -> re_bytes::Regex { - re_bytes::Regex::from(self) - } - - /// Build a RegexSet from this executor that can match arbitrary bytes. - pub fn into_byte_regex_set(self) -> re_set::bytes::RegexSet { - re_set::bytes::RegexSet::from(self) - } - - /// The original regular expressions given by the caller that were - /// compiled. - pub fn regex_strings(&self) -> &[String] { - &self.ro.res - } - - /// Return a slice of capture names. - /// - /// Any capture that isn't named is None. - pub fn capture_names(&self) -> &[Option] { - &self.ro.nfa.captures - } - - /// Return a reference to named groups mapping (from group name to - /// group position). - pub fn capture_name_idx(&self) -> &Arc> { - &self.ro.nfa.capture_name_idx - } -} - -impl Clone for Exec { - fn clone(&self) -> Exec { - let pool = ExecReadOnly::new_pool(&self.ro); - Exec { ro: self.ro.clone(), pool } - } -} - -impl ExecReadOnly { - fn choose_match_type(&self, hint: Option) -> MatchType { - if let Some(MatchType::Nfa(_)) = hint { - return hint.unwrap(); - } - // If the NFA is empty, then we'll never match anything. - if self.nfa.insts.is_empty() { - return MatchType::Nothing; - } - if let Some(literalty) = self.choose_literal_match_type() { - return literalty; - } - if let Some(dfaty) = self.choose_dfa_match_type() { - return dfaty; - } - // We're so totally hosed. - MatchType::Nfa(MatchNfaType::Auto) - } - - /// If a plain literal scan can be used, then a corresponding literal - /// search type is returned. - fn choose_literal_match_type(&self) -> Option { - #[cfg(not(feature = "perf-literal"))] - fn imp(_: &ExecReadOnly) -> Option { - None - } - - #[cfg(feature = "perf-literal")] - fn imp(ro: &ExecReadOnly) -> Option { - // If our set of prefixes is complete, then we can use it to find - // a match in lieu of a regex engine. This doesn't quite work well - // in the presence of multiple regexes, so only do it when there's - // one. - // - // TODO(burntsushi): Also, don't try to match literals if the regex - // is partially anchored. We could technically do it, but we'd need - // to create two sets of literals: all of them and then the subset - // that aren't anchored. We would then only search for all of them - // when at the beginning of the input and use the subset in all - // other cases. - if ro.res.len() != 1 { - return None; - } - if ro.ac.is_some() { - return Some(MatchType::Literal( - MatchLiteralType::AhoCorasick, - )); - } - if ro.nfa.prefixes.complete() { - return if ro.nfa.is_anchored_start { - Some(MatchType::Literal(MatchLiteralType::AnchoredStart)) - } else { - Some(MatchType::Literal(MatchLiteralType::Unanchored)) - }; - } - if ro.suffixes.complete() { - return if ro.nfa.is_anchored_end { - Some(MatchType::Literal(MatchLiteralType::AnchoredEnd)) - } else { - // This case shouldn't happen. When the regex isn't - // anchored, then complete prefixes should imply complete - // suffixes. - Some(MatchType::Literal(MatchLiteralType::Unanchored)) - }; - } - None - } - - imp(self) - } - - /// If a DFA scan can be used, then choose the appropriate DFA strategy. - fn choose_dfa_match_type(&self) -> Option { - #[cfg(not(feature = "perf-dfa"))] - fn imp(_: &ExecReadOnly) -> Option { - None - } - - #[cfg(feature = "perf-dfa")] - fn imp(ro: &ExecReadOnly) -> Option { - if !dfa::can_exec(&ro.dfa) { - return None; - } - // Regex sets require a slightly specialized path. - if ro.res.len() >= 2 { - return Some(MatchType::DfaMany); - } - // If the regex is anchored at the end but not the start, then - // just match in reverse from the end of the haystack. - if !ro.nfa.is_anchored_start && ro.nfa.is_anchored_end { - return Some(MatchType::DfaAnchoredReverse); - } - #[cfg(feature = "perf-literal")] - { - // If there's a longish suffix literal, then it might be faster - // to look for that first. - if ro.should_suffix_scan() { - return Some(MatchType::DfaSuffix); - } - } - // Fall back to your garden variety forward searching lazy DFA. - Some(MatchType::Dfa) - } - - imp(self) - } - - /// Returns true if the program is amenable to suffix scanning. - /// - /// When this is true, as a heuristic, we assume it is OK to quickly scan - /// for suffix literals and then do a *reverse* DFA match from any matches - /// produced by the literal scan. (And then followed by a forward DFA - /// search, since the previously found suffix literal maybe not actually be - /// the end of a match.) - /// - /// This is a bit of a specialized optimization, but can result in pretty - /// big performance wins if 1) there are no prefix literals and 2) the - /// suffix literals are pretty rare in the text. (1) is obviously easy to - /// account for but (2) is harder. As a proxy, we assume that longer - /// strings are generally rarer, so we only enable this optimization when - /// we have a meaty suffix. - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - fn should_suffix_scan(&self) -> bool { - if self.suffixes.is_empty() { - return false; - } - let lcs_len = self.suffixes.lcs().char_len(); - lcs_len >= 3 && lcs_len > self.dfa.prefixes.lcp().char_len() - } - - fn new_pool(ro: &Arc) -> Box> { - let ro = ro.clone(); - Box::new(Pool::new(Box::new(move || { - AssertUnwindSafe(RefCell::new(ProgramCacheInner::new(&ro))) - }))) - } -} - -#[derive(Clone, Copy, Debug)] -enum MatchType { - /// A single or multiple literal search. This is only used when the regex - /// can be decomposed into a literal search. - #[cfg(feature = "perf-literal")] - Literal(MatchLiteralType), - /// A normal DFA search. - #[cfg(feature = "perf-dfa")] - Dfa, - /// A reverse DFA search starting from the end of a haystack. - #[cfg(feature = "perf-dfa")] - DfaAnchoredReverse, - /// A reverse DFA search with suffix literal scanning. - #[cfg(all(feature = "perf-dfa", feature = "perf-literal"))] - DfaSuffix, - /// Use the DFA on two or more regular expressions. - #[cfg(feature = "perf-dfa")] - DfaMany, - /// An NFA variant. - Nfa(MatchNfaType), - /// No match is ever possible, so don't ever try to search. - Nothing, -} - -#[derive(Clone, Copy, Debug)] -#[cfg(feature = "perf-literal")] -enum MatchLiteralType { - /// Match literals anywhere in text. - Unanchored, - /// Match literals only at the start of text. - AnchoredStart, - /// Match literals only at the end of text. - AnchoredEnd, - /// Use an Aho-Corasick automaton. This requires `ac` to be Some on - /// ExecReadOnly. - AhoCorasick, -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum MatchNfaType { - /// Choose between Backtrack and PikeVM. - Auto, - /// NFA bounded backtracking. - /// - /// (This is only set by tests, since it never makes sense to always want - /// backtracking.) - Backtrack, - /// The Pike VM. - /// - /// (This is only set by tests, since it never makes sense to always want - /// the Pike VM.) - PikeVM, -} - -/// `ProgramCache` maintains reusable allocations for each matching engine -/// available to a particular program. -/// -/// We declare this as unwind safe since it's a cache that's only used for -/// performance purposes. If a panic occurs, it is (or should be) always safe -/// to continue using the same regex object. -pub type ProgramCache = AssertUnwindSafe>; - -#[derive(Debug)] -pub struct ProgramCacheInner { - pub pikevm: pikevm::Cache, - pub backtrack: backtrack::Cache, - #[cfg(feature = "perf-dfa")] - pub dfa: dfa::Cache, - #[cfg(feature = "perf-dfa")] - pub dfa_reverse: dfa::Cache, -} - -impl ProgramCacheInner { - fn new(ro: &ExecReadOnly) -> Self { - ProgramCacheInner { - pikevm: pikevm::Cache::new(&ro.nfa), - backtrack: backtrack::Cache::new(&ro.nfa), - #[cfg(feature = "perf-dfa")] - dfa: dfa::Cache::new(&ro.dfa), - #[cfg(feature = "perf-dfa")] - dfa_reverse: dfa::Cache::new(&ro.dfa_reverse), - } - } -} - -/// Alternation literals checks if the given HIR is a simple alternation of -/// literals, and if so, returns them. Otherwise, this returns None. -#[cfg(feature = "perf-literal")] -fn alternation_literals(expr: &Hir) -> Option>> { - use regex_syntax::hir::{HirKind, Literal}; - - // This is pretty hacky, but basically, if `is_alternation_literal` is - // true, then we can make several assumptions about the structure of our - // HIR. This is what justifies the `unreachable!` statements below. - // - // This code should be refactored once we overhaul this crate's - // optimization pipeline, because this is a terribly inflexible way to go - // about things. - - if !expr.is_alternation_literal() { - return None; - } - let alts = match *expr.kind() { - HirKind::Alternation(ref alts) => alts, - _ => return None, // one literal isn't worth it - }; - - let extendlit = |lit: &Literal, dst: &mut Vec| match *lit { - Literal::Unicode(c) => { - let mut buf = [0; 4]; - dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes()); - } - Literal::Byte(b) => { - dst.push(b); - } - }; - - let mut lits = vec![]; - for alt in alts { - let mut lit = vec![]; - match *alt.kind() { - HirKind::Literal(ref x) => extendlit(x, &mut lit), - HirKind::Concat(ref exprs) => { - for e in exprs { - match *e.kind() { - HirKind::Literal(ref x) => extendlit(x, &mut lit), - _ => unreachable!("expected literal, got {:?}", e), - } - } - } - _ => unreachable!("expected literal or concat, got {:?}", alt), - } - lits.push(lit); - } - Some(lits) -} - -#[cfg(test)] -mod test { - #[test] - fn uppercut_s_backtracking_bytes_default_bytes_mismatch() { - use crate::internal::ExecBuilder; - - let backtrack_bytes_re = ExecBuilder::new("^S") - .bounded_backtracking() - .only_utf8(false) - .build() - .map(|exec| exec.into_byte_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let default_bytes_re = ExecBuilder::new("^S") - .only_utf8(false) - .build() - .map(|exec| exec.into_byte_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let input = vec![83, 83]; - - let s1 = backtrack_bytes_re.split(&input); - let s2 = default_bytes_re.split(&input); - for (chunk1, chunk2) in s1.zip(s2) { - assert_eq!(chunk1, chunk2); - } - } - - #[test] - fn unicode_lit_star_backtracking_utf8bytes_default_utf8bytes_mismatch() { - use crate::internal::ExecBuilder; - - let backtrack_bytes_re = ExecBuilder::new(r"^(?u:\*)") - .bounded_backtracking() - .bytes(true) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let default_bytes_re = ExecBuilder::new(r"^(?u:\*)") - .bytes(true) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let input = "**"; - - let s1 = backtrack_bytes_re.split(input); - let s2 = default_bytes_re.split(input); - for (chunk1, chunk2) in s1.zip(s2) { - assert_eq!(chunk1, chunk2); - } - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/expand.rs b/collector/compile-benchmarks/regex-1.5.5/src/expand.rs deleted file mode 100644 index fd9c2d05d..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/expand.rs +++ /dev/null @@ -1,239 +0,0 @@ -use std::str; - -use crate::find_byte::find_byte; - -use crate::re_bytes; -use crate::re_unicode; - -pub fn expand_str( - caps: &re_unicode::Captures<'_>, - mut replacement: &str, - dst: &mut String, -) { - while !replacement.is_empty() { - match find_byte(b'$', replacement.as_bytes()) { - None => break, - Some(i) => { - dst.push_str(&replacement[..i]); - replacement = &replacement[i..]; - } - } - if replacement.as_bytes().get(1).map_or(false, |&b| b == b'$') { - dst.push_str("$"); - replacement = &replacement[2..]; - continue; - } - debug_assert!(!replacement.is_empty()); - let cap_ref = match find_cap_ref(replacement.as_bytes()) { - Some(cap_ref) => cap_ref, - None => { - dst.push_str("$"); - replacement = &replacement[1..]; - continue; - } - }; - replacement = &replacement[cap_ref.end..]; - match cap_ref.cap { - Ref::Number(i) => { - dst.push_str(caps.get(i).map(|m| m.as_str()).unwrap_or("")); - } - Ref::Named(name) => { - dst.push_str( - caps.name(name).map(|m| m.as_str()).unwrap_or(""), - ); - } - } - } - dst.push_str(replacement); -} - -pub fn expand_bytes( - caps: &re_bytes::Captures<'_>, - mut replacement: &[u8], - dst: &mut Vec, -) { - while !replacement.is_empty() { - match find_byte(b'$', replacement) { - None => break, - Some(i) => { - dst.extend(&replacement[..i]); - replacement = &replacement[i..]; - } - } - if replacement.get(1).map_or(false, |&b| b == b'$') { - dst.push(b'$'); - replacement = &replacement[2..]; - continue; - } - debug_assert!(!replacement.is_empty()); - let cap_ref = match find_cap_ref(replacement) { - Some(cap_ref) => cap_ref, - None => { - dst.push(b'$'); - replacement = &replacement[1..]; - continue; - } - }; - replacement = &replacement[cap_ref.end..]; - match cap_ref.cap { - Ref::Number(i) => { - dst.extend(caps.get(i).map(|m| m.as_bytes()).unwrap_or(b"")); - } - Ref::Named(name) => { - dst.extend( - caps.name(name).map(|m| m.as_bytes()).unwrap_or(b""), - ); - } - } - } - dst.extend(replacement); -} - -/// `CaptureRef` represents a reference to a capture group inside some text. -/// The reference is either a capture group name or a number. -/// -/// It is also tagged with the position in the text following the -/// capture reference. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -struct CaptureRef<'a> { - cap: Ref<'a>, - end: usize, -} - -/// A reference to a capture group in some text. -/// -/// e.g., `$2`, `$foo`, `${foo}`. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum Ref<'a> { - Named(&'a str), - Number(usize), -} - -impl<'a> From<&'a str> for Ref<'a> { - fn from(x: &'a str) -> Ref<'a> { - Ref::Named(x) - } -} - -impl From for Ref<'static> { - fn from(x: usize) -> Ref<'static> { - Ref::Number(x) - } -} - -/// Parses a possible reference to a capture group name in the given text, -/// starting at the beginning of `replacement`. -/// -/// If no such valid reference could be found, None is returned. -fn find_cap_ref(replacement: &[u8]) -> Option> { - let mut i = 0; - let rep: &[u8] = replacement.as_ref(); - if rep.len() <= 1 || rep[0] != b'$' { - return None; - } - i += 1; - if rep[i] == b'{' { - return find_cap_ref_braced(rep, i + 1); - } - let mut cap_end = i; - while rep.get(cap_end).map_or(false, is_valid_cap_letter) { - cap_end += 1; - } - if cap_end == i { - return None; - } - // We just verified that the range 0..cap_end is valid ASCII, so it must - // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8 - // check via an unchecked conversion or by parsing the number straight from - // &[u8]. - let cap = - str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name"); - Some(CaptureRef { - cap: match cap.parse::() { - Ok(i) => Ref::Number(i as usize), - Err(_) => Ref::Named(cap), - }, - end: cap_end, - }) -} - -fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option> { - let start = i; - while rep.get(i).map_or(false, |&b| b != b'}') { - i += 1; - } - if !rep.get(i).map_or(false, |&b| b == b'}') { - return None; - } - // When looking at braced names, we don't put any restrictions on the name, - // so it's possible it could be invalid UTF-8. But a capture group name - // can never be invalid UTF-8, so if we have invalid UTF-8, then we can - // safely return None. - let cap = match str::from_utf8(&rep[start..i]) { - Err(_) => return None, - Ok(cap) => cap, - }; - Some(CaptureRef { - cap: match cap.parse::() { - Ok(i) => Ref::Number(i as usize), - Err(_) => Ref::Named(cap), - }, - end: i + 1, - }) -} - -/// Returns true if and only if the given byte is allowed in a capture name. -fn is_valid_cap_letter(b: &u8) -> bool { - match *b { - b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true, - _ => false, - } -} - -#[cfg(test)] -mod tests { - use super::{find_cap_ref, CaptureRef}; - - macro_rules! find { - ($name:ident, $text:expr) => { - #[test] - fn $name() { - assert_eq!(None, find_cap_ref($text.as_bytes())); - } - }; - ($name:ident, $text:expr, $capref:expr) => { - #[test] - fn $name() { - assert_eq!(Some($capref), find_cap_ref($text.as_bytes())); - } - }; - } - - macro_rules! c { - ($name_or_number:expr, $pos:expr) => { - CaptureRef { cap: $name_or_number.into(), end: $pos } - }; - } - - find!(find_cap_ref1, "$foo", c!("foo", 4)); - find!(find_cap_ref2, "${foo}", c!("foo", 6)); - find!(find_cap_ref3, "$0", c!(0, 2)); - find!(find_cap_ref4, "$5", c!(5, 2)); - find!(find_cap_ref5, "$10", c!(10, 3)); - // See https://github.com/rust-lang/regex/pull/585 - // for more on characters following numbers - find!(find_cap_ref6, "$42a", c!("42a", 4)); - find!(find_cap_ref7, "${42}a", c!(42, 5)); - find!(find_cap_ref8, "${42"); - find!(find_cap_ref9, "${42 "); - find!(find_cap_ref10, " $0 "); - find!(find_cap_ref11, "$"); - find!(find_cap_ref12, " "); - find!(find_cap_ref13, ""); - find!(find_cap_ref14, "$1-$2", c!(1, 2)); - find!(find_cap_ref15, "$1_$2", c!("1_", 3)); - find!(find_cap_ref16, "$x-$y", c!("x", 2)); - find!(find_cap_ref17, "$x_$y", c!("x_", 3)); - find!(find_cap_ref18, "${#}", c!("#", 4)); - find!(find_cap_ref19, "${Z[}", c!("Z[", 5)); -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/find_byte.rs b/collector/compile-benchmarks/regex-1.5.5/src/find_byte.rs deleted file mode 100644 index e95f72afb..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/find_byte.rs +++ /dev/null @@ -1,18 +0,0 @@ -/// Searches for the given needle in the given haystack. -/// -/// If the perf-literal feature is enabled, then this uses the super optimized -/// memchr crate. Otherwise, it uses the naive byte-at-a-time implementation. -pub fn find_byte(needle: u8, haystack: &[u8]) -> Option { - #[cfg(not(feature = "perf-literal"))] - fn imp(needle: u8, haystack: &[u8]) -> Option { - haystack.iter().position(|&b| b == needle) - } - - #[cfg(feature = "perf-literal")] - fn imp(needle: u8, haystack: &[u8]) -> Option { - use memchr::memchr; - memchr(needle, haystack) - } - - imp(needle, haystack) -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/freqs.rs b/collector/compile-benchmarks/regex-1.5.5/src/freqs.rs deleted file mode 100644 index fcffa95fb..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/freqs.rs +++ /dev/null @@ -1,261 +0,0 @@ -// NOTE: The following code was generated by "scripts/frequencies.py", do not -// edit directly - -pub const BYTE_FREQUENCIES: [u8; 256] = [ - 55, // '\x00' - 52, // '\x01' - 51, // '\x02' - 50, // '\x03' - 49, // '\x04' - 48, // '\x05' - 47, // '\x06' - 46, // '\x07' - 45, // '\x08' - 103, // '\t' - 242, // '\n' - 66, // '\x0b' - 67, // '\x0c' - 229, // '\r' - 44, // '\x0e' - 43, // '\x0f' - 42, // '\x10' - 41, // '\x11' - 40, // '\x12' - 39, // '\x13' - 38, // '\x14' - 37, // '\x15' - 36, // '\x16' - 35, // '\x17' - 34, // '\x18' - 33, // '\x19' - 56, // '\x1a' - 32, // '\x1b' - 31, // '\x1c' - 30, // '\x1d' - 29, // '\x1e' - 28, // '\x1f' - 255, // ' ' - 148, // '!' - 164, // '"' - 149, // '#' - 136, // '$' - 160, // '%' - 155, // '&' - 173, // "'" - 221, // '(' - 222, // ')' - 134, // '*' - 122, // '+' - 232, // ',' - 202, // '-' - 215, // '.' - 224, // '/' - 208, // '0' - 220, // '1' - 204, // '2' - 187, // '3' - 183, // '4' - 179, // '5' - 177, // '6' - 168, // '7' - 178, // '8' - 200, // '9' - 226, // ':' - 195, // ';' - 154, // '<' - 184, // '=' - 174, // '>' - 126, // '?' - 120, // '@' - 191, // 'A' - 157, // 'B' - 194, // 'C' - 170, // 'D' - 189, // 'E' - 162, // 'F' - 161, // 'G' - 150, // 'H' - 193, // 'I' - 142, // 'J' - 137, // 'K' - 171, // 'L' - 176, // 'M' - 185, // 'N' - 167, // 'O' - 186, // 'P' - 112, // 'Q' - 175, // 'R' - 192, // 'S' - 188, // 'T' - 156, // 'U' - 140, // 'V' - 143, // 'W' - 123, // 'X' - 133, // 'Y' - 128, // 'Z' - 147, // '[' - 138, // '\\' - 146, // ']' - 114, // '^' - 223, // '_' - 151, // '`' - 249, // 'a' - 216, // 'b' - 238, // 'c' - 236, // 'd' - 253, // 'e' - 227, // 'f' - 218, // 'g' - 230, // 'h' - 247, // 'i' - 135, // 'j' - 180, // 'k' - 241, // 'l' - 233, // 'm' - 246, // 'n' - 244, // 'o' - 231, // 'p' - 139, // 'q' - 245, // 'r' - 243, // 's' - 251, // 't' - 235, // 'u' - 201, // 'v' - 196, // 'w' - 240, // 'x' - 214, // 'y' - 152, // 'z' - 182, // '{' - 205, // '|' - 181, // '}' - 127, // '~' - 27, // '\x7f' - 212, // '\x80' - 211, // '\x81' - 210, // '\x82' - 213, // '\x83' - 228, // '\x84' - 197, // '\x85' - 169, // '\x86' - 159, // '\x87' - 131, // '\x88' - 172, // '\x89' - 105, // '\x8a' - 80, // '\x8b' - 98, // '\x8c' - 96, // '\x8d' - 97, // '\x8e' - 81, // '\x8f' - 207, // '\x90' - 145, // '\x91' - 116, // '\x92' - 115, // '\x93' - 144, // '\x94' - 130, // '\x95' - 153, // '\x96' - 121, // '\x97' - 107, // '\x98' - 132, // '\x99' - 109, // '\x9a' - 110, // '\x9b' - 124, // '\x9c' - 111, // '\x9d' - 82, // '\x9e' - 108, // '\x9f' - 118, // '\xa0' - 141, // '¡' - 113, // '¢' - 129, // '£' - 119, // '¤' - 125, // '¥' - 165, // '¦' - 117, // '§' - 92, // '¨' - 106, // '©' - 83, // 'ª' - 72, // '«' - 99, // '¬' - 93, // '\xad' - 65, // '®' - 79, // '¯' - 166, // '°' - 237, // '±' - 163, // '²' - 199, // '³' - 190, // '´' - 225, // 'µ' - 209, // '¶' - 203, // '·' - 198, // '¸' - 217, // '¹' - 219, // 'º' - 206, // '»' - 234, // '¼' - 248, // '½' - 158, // '¾' - 239, // '¿' - 255, // 'À' - 255, // 'Á' - 255, // 'Â' - 255, // 'Ã' - 255, // 'Ä' - 255, // 'Å' - 255, // 'Æ' - 255, // 'Ç' - 255, // 'È' - 255, // 'É' - 255, // 'Ê' - 255, // 'Ë' - 255, // 'Ì' - 255, // 'Í' - 255, // 'Î' - 255, // 'Ï' - 255, // 'Ð' - 255, // 'Ñ' - 255, // 'Ò' - 255, // 'Ó' - 255, // 'Ô' - 255, // 'Õ' - 255, // 'Ö' - 255, // '×' - 255, // 'Ø' - 255, // 'Ù' - 255, // 'Ú' - 255, // 'Û' - 255, // 'Ü' - 255, // 'Ý' - 255, // 'Þ' - 255, // 'ß' - 255, // 'à' - 255, // 'á' - 255, // 'â' - 255, // 'ã' - 255, // 'ä' - 255, // 'å' - 255, // 'æ' - 255, // 'ç' - 255, // 'è' - 255, // 'é' - 255, // 'ê' - 255, // 'ë' - 255, // 'ì' - 255, // 'í' - 255, // 'î' - 255, // 'ï' - 255, // 'ð' - 255, // 'ñ' - 255, // 'ò' - 255, // 'ó' - 255, // 'ô' - 255, // 'õ' - 255, // 'ö' - 255, // '÷' - 255, // 'ø' - 255, // 'ù' - 255, // 'ú' - 255, // 'û' - 255, // 'ü' - 255, // 'ý' - 255, // 'þ' - 255, // 'ÿ' -]; diff --git a/collector/compile-benchmarks/regex-1.5.5/src/input.rs b/collector/compile-benchmarks/regex-1.5.5/src/input.rs deleted file mode 100644 index 5d50ee340..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/input.rs +++ /dev/null @@ -1,432 +0,0 @@ -use std::char; -use std::cmp::Ordering; -use std::fmt; -use std::ops; -use std::u32; - -use crate::literal::LiteralSearcher; -use crate::prog::InstEmptyLook; -use crate::utf8::{decode_last_utf8, decode_utf8}; - -/// Represents a location in the input. -#[derive(Clone, Copy, Debug)] -pub struct InputAt { - pos: usize, - c: Char, - byte: Option, - len: usize, -} - -impl InputAt { - /// Returns true iff this position is at the beginning of the input. - pub fn is_start(&self) -> bool { - self.pos == 0 - } - - /// Returns true iff this position is past the end of the input. - pub fn is_end(&self) -> bool { - self.c.is_none() && self.byte.is_none() - } - - /// Returns the character at this position. - /// - /// If this position is just before or after the input, then an absent - /// character is returned. - pub fn char(&self) -> Char { - self.c - } - - /// Returns the byte at this position. - pub fn byte(&self) -> Option { - self.byte - } - - /// Returns the UTF-8 width of the character at this position. - pub fn len(&self) -> usize { - self.len - } - - /// Returns whether the UTF-8 width of the character at this position - /// is zero. - pub fn is_empty(&self) -> bool { - self.len == 0 - } - - /// Returns the byte offset of this position. - pub fn pos(&self) -> usize { - self.pos - } - - /// Returns the byte offset of the next position in the input. - pub fn next_pos(&self) -> usize { - self.pos + self.len - } -} - -/// An abstraction over input used in the matching engines. -pub trait Input: fmt::Debug { - /// Return an encoding of the position at byte offset `i`. - fn at(&self, i: usize) -> InputAt; - - /// Return the Unicode character occurring next to `at`. - /// - /// If no such character could be decoded, then `Char` is absent. - fn next_char(&self, at: InputAt) -> Char; - - /// Return the Unicode character occurring previous to `at`. - /// - /// If no such character could be decoded, then `Char` is absent. - fn previous_char(&self, at: InputAt) -> Char; - - /// Return true if the given empty width instruction matches at the - /// input position given. - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool; - - /// Scan the input for a matching prefix. - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option; - - /// The number of bytes in the input. - fn len(&self) -> usize; - - /// Whether the input is empty. - fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Return the given input as a sequence of bytes. - fn as_bytes(&self) -> &[u8]; -} - -impl<'a, T: Input> Input for &'a T { - fn at(&self, i: usize) -> InputAt { - (**self).at(i) - } - - fn next_char(&self, at: InputAt) -> Char { - (**self).next_char(at) - } - - fn previous_char(&self, at: InputAt) -> Char { - (**self).previous_char(at) - } - - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { - (**self).is_empty_match(at, empty) - } - - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option { - (**self).prefix_at(prefixes, at) - } - - fn len(&self) -> usize { - (**self).len() - } - - fn as_bytes(&self) -> &[u8] { - (**self).as_bytes() - } -} - -/// An input reader over characters. -#[derive(Clone, Copy, Debug)] -pub struct CharInput<'t>(&'t [u8]); - -impl<'t> CharInput<'t> { - /// Return a new character input reader for the given string. - pub fn new(s: &'t [u8]) -> CharInput<'t> { - CharInput(s) - } -} - -impl<'t> ops::Deref for CharInput<'t> { - type Target = [u8]; - - fn deref(&self) -> &[u8] { - self.0 - } -} - -impl<'t> Input for CharInput<'t> { - fn at(&self, i: usize) -> InputAt { - if i >= self.len() { - InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 } - } else { - let c = decode_utf8(&self[i..]).map(|(c, _)| c).into(); - InputAt { pos: i, c: c, byte: None, len: c.len_utf8() } - } - } - - fn next_char(&self, at: InputAt) -> Char { - at.char() - } - - fn previous_char(&self, at: InputAt) -> Char { - decode_last_utf8(&self[..at.pos()]).map(|(c, _)| c).into() - } - - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { - use crate::prog::EmptyLook::*; - match empty.look { - StartLine => { - let c = self.previous_char(at); - at.pos() == 0 || c == '\n' - } - EndLine => { - let c = self.next_char(at); - at.pos() == self.len() || c == '\n' - } - StartText => at.pos() == 0, - EndText => at.pos() == self.len(), - WordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() != c2.is_word_char() - } - NotWordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() == c2.is_word_char() - } - WordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_byte() != c2.is_word_byte() - } - NotWordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_byte() == c2.is_word_byte() - } - } - } - - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option { - prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s)) - } - - fn len(&self) -> usize { - self.0.len() - } - - fn as_bytes(&self) -> &[u8] { - self.0 - } -} - -/// An input reader over bytes. -#[derive(Clone, Copy, Debug)] -pub struct ByteInput<'t> { - text: &'t [u8], - only_utf8: bool, -} - -impl<'t> ByteInput<'t> { - /// Return a new byte-based input reader for the given string. - pub fn new(text: &'t [u8], only_utf8: bool) -> ByteInput<'t> { - ByteInput { text: text, only_utf8: only_utf8 } - } -} - -impl<'t> ops::Deref for ByteInput<'t> { - type Target = [u8]; - - fn deref(&self) -> &[u8] { - self.text - } -} - -impl<'t> Input for ByteInput<'t> { - fn at(&self, i: usize) -> InputAt { - if i >= self.len() { - InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 } - } else { - InputAt { - pos: i, - c: None.into(), - byte: self.get(i).cloned(), - len: 1, - } - } - } - - fn next_char(&self, at: InputAt) -> Char { - decode_utf8(&self[at.pos()..]).map(|(c, _)| c).into() - } - - fn previous_char(&self, at: InputAt) -> Char { - decode_last_utf8(&self[..at.pos()]).map(|(c, _)| c).into() - } - - fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { - use crate::prog::EmptyLook::*; - match empty.look { - StartLine => { - let c = self.previous_char(at); - at.pos() == 0 || c == '\n' - } - EndLine => { - let c = self.next_char(at); - at.pos() == self.len() || c == '\n' - } - StartText => at.pos() == 0, - EndText => at.pos() == self.len(), - WordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() != c2.is_word_char() - } - NotWordBoundary => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - c1.is_word_char() == c2.is_word_char() - } - WordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - if self.only_utf8 { - // If we must match UTF-8, then we can't match word - // boundaries at invalid UTF-8. - if c1.is_none() && !at.is_start() { - return false; - } - if c2.is_none() && !at.is_end() { - return false; - } - } - c1.is_word_byte() != c2.is_word_byte() - } - NotWordBoundaryAscii => { - let (c1, c2) = (self.previous_char(at), self.next_char(at)); - if self.only_utf8 { - // If we must match UTF-8, then we can't match word - // boundaries at invalid UTF-8. - if c1.is_none() && !at.is_start() { - return false; - } - if c2.is_none() && !at.is_end() { - return false; - } - } - c1.is_word_byte() == c2.is_word_byte() - } - } - } - - fn prefix_at( - &self, - prefixes: &LiteralSearcher, - at: InputAt, - ) -> Option { - prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s)) - } - - fn len(&self) -> usize { - self.text.len() - } - - fn as_bytes(&self) -> &[u8] { - self.text - } -} - -/// An inline representation of `Option`. -/// -/// This eliminates the need to do case analysis on `Option` to determine -/// ordinality with other characters. -/// -/// (The `Option` is not related to encoding. Instead, it is used in the -/// matching engines to represent the beginning and ending boundaries of the -/// search text.) -#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub struct Char(u32); - -impl fmt::Debug for Char { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match char::from_u32(self.0) { - None => write!(f, "Empty"), - Some(c) => write!(f, "{:?}", c), - } - } -} - -impl Char { - /// Returns true iff the character is absent. - #[inline] - pub fn is_none(self) -> bool { - self.0 == u32::MAX - } - - /// Returns the length of the character's UTF-8 encoding. - /// - /// If the character is absent, then `1` is returned. - #[inline] - pub fn len_utf8(self) -> usize { - char::from_u32(self.0).map_or(1, |c| c.len_utf8()) - } - - /// Returns true iff the character is a word character. - /// - /// If the character is absent, then false is returned. - pub fn is_word_char(self) -> bool { - // is_word_character can panic if the Unicode data for \w isn't - // available. However, our compiler ensures that if a Unicode word - // boundary is used, then the data must also be available. If it isn't, - // then the compiler returns an error. - char::from_u32(self.0).map_or(false, regex_syntax::is_word_character) - } - - /// Returns true iff the byte is a word byte. - /// - /// If the byte is absent, then false is returned. - pub fn is_word_byte(self) -> bool { - match char::from_u32(self.0) { - Some(c) if c <= '\u{7F}' => regex_syntax::is_word_byte(c as u8), - None | Some(_) => false, - } - } -} - -impl From for Char { - fn from(c: char) -> Char { - Char(c as u32) - } -} - -impl From> for Char { - fn from(c: Option) -> Char { - c.map_or(Char(u32::MAX), |c| c.into()) - } -} - -impl PartialEq for Char { - #[inline] - fn eq(&self, other: &char) -> bool { - self.0 == *other as u32 - } -} - -impl PartialEq for char { - #[inline] - fn eq(&self, other: &Char) -> bool { - *self as u32 == other.0 - } -} - -impl PartialOrd for Char { - #[inline] - fn partial_cmp(&self, other: &char) -> Option { - self.0.partial_cmp(&(*other as u32)) - } -} - -impl PartialOrd for char { - #[inline] - fn partial_cmp(&self, other: &Char) -> Option { - (*self as u32).partial_cmp(&other.0) - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/lib.rs b/collector/compile-benchmarks/regex-1.5.5/src/lib.rs deleted file mode 100644 index 7f2dec815..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/lib.rs +++ /dev/null @@ -1,767 +0,0 @@ -/*! -This crate provides a library for parsing, compiling, and executing regular -expressions. Its syntax is similar to Perl-style regular expressions, but lacks -a few features like look around and backreferences. In exchange, all searches -execute in linear time with respect to the size of the regular expression and -search text. - -This crate's documentation provides some simple examples, describes -[Unicode support](#unicode) and exhaustively lists the -[supported syntax](#syntax). - -For more specific details on the API for regular expressions, please see the -documentation for the [`Regex`](struct.Regex.html) type. - -# Usage - -This crate is [on crates.io](https://crates.io/crates/regex) and can be -used by adding `regex` to your dependencies in your project's `Cargo.toml`. - -```toml -[dependencies] -regex = "1" -``` - -# Example: find a date - -General use of regular expressions in this package involves compiling an -expression and then using it to search, split or replace text. For example, -to confirm that some text resembles a date: - -```rust -use regex::Regex; -let re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap(); -assert!(re.is_match("2014-01-01")); -``` - -Notice the use of the `^` and `$` anchors. In this crate, every expression -is executed with an implicit `.*?` at the beginning and end, which allows -it to match anywhere in the text. Anchors can be used to ensure that the -full text matches an expression. - -This example also demonstrates the utility of -[raw strings](https://doc.rust-lang.org/stable/reference/tokens.html#raw-string-literals) -in Rust, which -are just like regular strings except they are prefixed with an `r` and do -not process any escape sequences. For example, `"\\d"` is the same -expression as `r"\d"`. - -# Example: Avoid compiling the same regex in a loop - -It is an anti-pattern to compile the same regular expression in a loop -since compilation is typically expensive. (It takes anywhere from a few -microseconds to a few **milliseconds** depending on the size of the -regex.) Not only is compilation itself expensive, but this also prevents -optimizations that reuse allocations internally to the matching engines. - -In Rust, it can sometimes be a pain to pass regular expressions around if -they're used from inside a helper function. Instead, we recommend using the -[`lazy_static`](https://crates.io/crates/lazy_static) crate to ensure that -regular expressions are compiled exactly once. - -For example: - -```rust -use lazy_static::lazy_static; -use regex::Regex; - -fn some_helper_function(text: &str) -> bool { - lazy_static! { - static ref RE: Regex = Regex::new("...").unwrap(); - } - RE.is_match(text) -} - -fn main() {} -``` - -Specifically, in this example, the regex will be compiled when it is used for -the first time. On subsequent uses, it will reuse the previous compilation. - -# Example: iterating over capture groups - -This crate provides convenient iterators for matching an expression -repeatedly against a search string to find successive non-overlapping -matches. For example, to find all dates in a string and be able to access -them by their component pieces: - -```rust -# use regex::Regex; -# fn main() { -let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap(); -let text = "2012-03-14, 2013-01-01 and 2014-07-05"; -for cap in re.captures_iter(text) { - println!("Month: {} Day: {} Year: {}", &cap[2], &cap[3], &cap[1]); -} -// Output: -// Month: 03 Day: 14 Year: 2012 -// Month: 01 Day: 01 Year: 2013 -// Month: 07 Day: 05 Year: 2014 -# } -``` - -Notice that the year is in the capture group indexed at `1`. This is -because the *entire match* is stored in the capture group at index `0`. - -# Example: replacement with named capture groups - -Building on the previous example, perhaps we'd like to rearrange the date -formats. This can be done with text replacement. But to make the code -clearer, we can *name* our capture groups and use those names as variables -in our replacement text: - -```rust -# use regex::Regex; -# fn main() { -let re = Regex::new(r"(?P\d{4})-(?P\d{2})-(?P\d{2})").unwrap(); -let before = "2012-03-14, 2013-01-01 and 2014-07-05"; -let after = re.replace_all(before, "$m/$d/$y"); -assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014"); -# } -``` - -The `replace` methods are actually polymorphic in the replacement, which -provides more flexibility than is seen here. (See the documentation for -`Regex::replace` for more details.) - -Note that if your regex gets complicated, you can use the `x` flag to -enable insignificant whitespace mode, which also lets you write comments: - -```rust -# use regex::Regex; -# fn main() { -let re = Regex::new(r"(?x) - (?P\d{4}) # the year - - - (?P\d{2}) # the month - - - (?P\d{2}) # the day -").unwrap(); -let before = "2012-03-14, 2013-01-01 and 2014-07-05"; -let after = re.replace_all(before, "$m/$d/$y"); -assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014"); -# } -``` - -If you wish to match against whitespace in this mode, you can still use `\s`, -`\n`, `\t`, etc. For escaping a single space character, you can escape it -directly with `\ `, use its hex character code `\x20` or temporarily disable -the `x` flag, e.g., `(?-x: )`. - -# Example: match multiple regular expressions simultaneously - -This demonstrates how to use a `RegexSet` to match multiple (possibly -overlapping) regular expressions in a single scan of the search text: - -```rust -use regex::RegexSet; - -let set = RegexSet::new(&[ - r"\w+", - r"\d+", - r"\pL+", - r"foo", - r"bar", - r"barfoo", - r"foobar", -]).unwrap(); - -// Iterate over and collect all of the matches. -let matches: Vec<_> = set.matches("foobar").into_iter().collect(); -assert_eq!(matches, vec![0, 2, 3, 4, 6]); - -// You can also test whether a particular regex matched: -let matches = set.matches("foobar"); -assert!(!matches.matched(5)); -assert!(matches.matched(6)); -``` - -# Pay for what you use - -With respect to searching text with a regular expression, there are three -questions that can be asked: - -1. Does the text match this expression? -2. If so, where does it match? -3. Where did the capturing groups match? - -Generally speaking, this crate could provide a function to answer only #3, -which would subsume #1 and #2 automatically. However, it can be significantly -more expensive to compute the location of capturing group matches, so it's best -not to do it if you don't need to. - -Therefore, only use what you need. For example, don't use `find` if you -only need to test if an expression matches a string. (Use `is_match` -instead.) - -# Unicode - -This implementation executes regular expressions **only** on valid UTF-8 -while exposing match locations as byte indices into the search string. (To -relax this restriction, use the [`bytes`](bytes/index.html) sub-module.) - -Only simple case folding is supported. Namely, when matching -case-insensitively, the characters are first mapped using the "simple" case -folding rules defined by Unicode. - -Regular expressions themselves are **only** interpreted as a sequence of -Unicode scalar values. This means you can use Unicode characters directly -in your expression: - -```rust -# use regex::Regex; -# fn main() { -let re = Regex::new(r"(?i)Δ+").unwrap(); -let mat = re.find("ΔδΔ").unwrap(); -assert_eq!((mat.start(), mat.end()), (0, 6)); -# } -``` - -Most features of the regular expressions in this crate are Unicode aware. Here -are some examples: - -* `.` will match any valid UTF-8 encoded Unicode scalar value except for `\n`. - (To also match `\n`, enable the `s` flag, e.g., `(?s:.)`.) -* `\w`, `\d` and `\s` are Unicode aware. For example, `\s` will match all forms - of whitespace categorized by Unicode. -* `\b` matches a Unicode word boundary. -* Negated character classes like `[^a]` match all Unicode scalar values except - for `a`. -* `^` and `$` are **not** Unicode aware in multi-line mode. Namely, they only - recognize `\n` and not any of the other forms of line terminators defined - by Unicode. - -Unicode general categories, scripts, script extensions, ages and a smattering -of boolean properties are available as character classes. For example, you can -match a sequence of numerals, Greek or Cherokee letters: - -```rust -# use regex::Regex; -# fn main() { -let re = Regex::new(r"[\pN\p{Greek}\p{Cherokee}]+").unwrap(); -let mat = re.find("abcΔᎠβⅠᏴγδⅡxyz").unwrap(); -assert_eq!((mat.start(), mat.end()), (3, 23)); -# } -``` - -For a more detailed breakdown of Unicode support with respect to -[UTS#18](https://unicode.org/reports/tr18/), -please see the -[UNICODE](https://github.com/rust-lang/regex/blob/master/UNICODE.md) -document in the root of the regex repository. - -# Opt out of Unicode support - -The `bytes` sub-module provides a `Regex` type that can be used to match -on `&[u8]`. By default, text is interpreted as UTF-8 just like it is with -the main `Regex` type. However, this behavior can be disabled by turning -off the `u` flag, even if doing so could result in matching invalid UTF-8. -For example, when the `u` flag is disabled, `.` will match any byte instead -of any Unicode scalar value. - -Disabling the `u` flag is also possible with the standard `&str`-based `Regex` -type, but it is only allowed where the UTF-8 invariant is maintained. For -example, `(?-u:\w)` is an ASCII-only `\w` character class and is legal in an -`&str`-based `Regex`, but `(?-u:\xFF)` will attempt to match the raw byte -`\xFF`, which is invalid UTF-8 and therefore is illegal in `&str`-based -regexes. - -Finally, since Unicode support requires bundling large Unicode data -tables, this crate exposes knobs to disable the compilation of those -data tables, which can be useful for shrinking binary size and reducing -compilation times. For details on how to do that, see the section on [crate -features](#crate-features). - -# Syntax - -The syntax supported in this crate is documented below. - -Note that the regular expression parser and abstract syntax are exposed in -a separate crate, [`regex-syntax`](https://docs.rs/regex-syntax). - -## Matching one character - -
-.             any character except new line (includes new line with s flag)
-\d            digit (\p{Nd})
-\D            not digit
-\pN           One-letter name Unicode character class
-\p{Greek}     Unicode character class (general category or script)
-\PN           Negated one-letter name Unicode character class
-\P{Greek}     negated Unicode character class (general category or script)
-
- -### Character classes - -
-[xyz]         A character class matching either x, y or z (union).
-[^xyz]        A character class matching any character except x, y and z.
-[a-z]         A character class matching any character in range a-z.
-[[:alpha:]]   ASCII character class ([A-Za-z])
-[[:^alpha:]]  Negated ASCII character class ([^A-Za-z])
-[x[^xyz]]     Nested/grouping character class (matching any character except y and z)
-[a-y&&xyz]    Intersection (matching x or y)
-[0-9&&[^4]]   Subtraction using intersection and negation (matching 0-9 except 4)
-[0-9--4]      Direct subtraction (matching 0-9 except 4)
-[a-g~~b-h]    Symmetric difference (matching `a` and `h` only)
-[\[\]]        Escaping in character classes (matching [ or ])
-
- -Any named character class may appear inside a bracketed `[...]` character -class. For example, `[\p{Greek}[:digit:]]` matches any Greek or ASCII -digit. `[\p{Greek}&&\pL]` matches Greek letters. - -Precedence in character classes, from most binding to least: - -1. Ranges: `a-cd` == `[a-c]d` -2. Union: `ab&&bc` == `[ab]&&[bc]` -3. Intersection: `^a-z&&b` == `^[a-z&&b]` -4. Negation - -## Composites - -
-xy    concatenation (x followed by y)
-x|y   alternation (x or y, prefer x)
-
- -## Repetitions - -
-x*        zero or more of x (greedy)
-x+        one or more of x (greedy)
-x?        zero or one of x (greedy)
-x*?       zero or more of x (ungreedy/lazy)
-x+?       one or more of x (ungreedy/lazy)
-x??       zero or one of x (ungreedy/lazy)
-x{n,m}    at least n x and at most m x (greedy)
-x{n,}     at least n x (greedy)
-x{n}      exactly n x
-x{n,m}?   at least n x and at most m x (ungreedy/lazy)
-x{n,}?    at least n x (ungreedy/lazy)
-x{n}?     exactly n x
-
- -## Empty matches - -
-^     the beginning of text (or start-of-line with multi-line mode)
-$     the end of text (or end-of-line with multi-line mode)
-\A    only the beginning of text (even with multi-line mode enabled)
-\z    only the end of text (even with multi-line mode enabled)
-\b    a Unicode word boundary (\w on one side and \W, \A, or \z on other)
-\B    not a Unicode word boundary
-
- -## Grouping and flags - -
-(exp)          numbered capture group (indexed by opening parenthesis)
-(?P<name>exp)  named (also numbered) capture group (allowed chars: [_0-9a-zA-Z.\[\]])
-(?:exp)        non-capturing group
-(?flags)       set flags within current group
-(?flags:exp)   set flags for exp (non-capturing)
-
- -Flags are each a single character. For example, `(?x)` sets the flag `x` -and `(?-x)` clears the flag `x`. Multiple flags can be set or cleared at -the same time: `(?xy)` sets both the `x` and `y` flags and `(?x-y)` sets -the `x` flag and clears the `y` flag. - -All flags are by default disabled unless stated otherwise. They are: - -
-i     case-insensitive: letters match both upper and lower case
-m     multi-line mode: ^ and $ match begin/end of line
-s     allow . to match \n
-U     swap the meaning of x* and x*?
-u     Unicode support (enabled by default)
-x     ignore whitespace and allow line comments (starting with `#`)
-
- -Flags can be toggled within a pattern. Here's an example that matches -case-insensitively for the first part but case-sensitively for the second part: - -```rust -# use regex::Regex; -# fn main() { -let re = Regex::new(r"(?i)a+(?-i)b+").unwrap(); -let cap = re.captures("AaAaAbbBBBb").unwrap(); -assert_eq!(&cap[0], "AaAaAbb"); -# } -``` - -Notice that the `a+` matches either `a` or `A`, but the `b+` only matches -`b`. - -Multi-line mode means `^` and `$` no longer match just at the beginning/end of -the input, but at the beginning/end of lines: - -``` -# use regex::Regex; -let re = Regex::new(r"(?m)^line \d+").unwrap(); -let m = re.find("line one\nline 2\n").unwrap(); -assert_eq!(m.as_str(), "line 2"); -``` - -Note that `^` matches after new lines, even at the end of input: - -``` -# use regex::Regex; -let re = Regex::new(r"(?m)^").unwrap(); -let m = re.find_iter("test\n").last().unwrap(); -assert_eq!((m.start(), m.end()), (5, 5)); -``` - -Here is an example that uses an ASCII word boundary instead of a Unicode -word boundary: - -```rust -# use regex::Regex; -# fn main() { -let re = Regex::new(r"(?-u:\b).+(?-u:\b)").unwrap(); -let cap = re.captures("$$abc$$").unwrap(); -assert_eq!(&cap[0], "abc"); -# } -``` - -## Escape sequences - -
-\*          literal *, works for any punctuation character: \.+*?()|[]{}^$
-\a          bell (\x07)
-\f          form feed (\x0C)
-\t          horizontal tab
-\n          new line
-\r          carriage return
-\v          vertical tab (\x0B)
-\123        octal character code (up to three digits) (when enabled)
-\x7F        hex character code (exactly two digits)
-\x{10FFFF}  any hex character code corresponding to a Unicode code point
-\u007F      hex character code (exactly four digits)
-\u{7F}      any hex character code corresponding to a Unicode code point
-\U0000007F  hex character code (exactly eight digits)
-\U{7F}      any hex character code corresponding to a Unicode code point
-
- -## Perl character classes (Unicode friendly) - -These classes are based on the definitions provided in -[UTS#18](https://www.unicode.org/reports/tr18/#Compatibility_Properties): - -
-\d     digit (\p{Nd})
-\D     not digit
-\s     whitespace (\p{White_Space})
-\S     not whitespace
-\w     word character (\p{Alphabetic} + \p{M} + \d + \p{Pc} + \p{Join_Control})
-\W     not word character
-
- -## ASCII character classes - -
-[[:alnum:]]    alphanumeric ([0-9A-Za-z])
-[[:alpha:]]    alphabetic ([A-Za-z])
-[[:ascii:]]    ASCII ([\x00-\x7F])
-[[:blank:]]    blank ([\t ])
-[[:cntrl:]]    control ([\x00-\x1F\x7F])
-[[:digit:]]    digits ([0-9])
-[[:graph:]]    graphical ([!-~])
-[[:lower:]]    lower case ([a-z])
-[[:print:]]    printable ([ -~])
-[[:punct:]]    punctuation ([!-/:-@\[-`{-~])
-[[:space:]]    whitespace ([\t\n\v\f\r ])
-[[:upper:]]    upper case ([A-Z])
-[[:word:]]     word characters ([0-9A-Za-z_])
-[[:xdigit:]]   hex digit ([0-9A-Fa-f])
-
- -# Crate features - -By default, this crate tries pretty hard to make regex matching both as fast -as possible and as correct as it can be, within reason. This means that there -is a lot of code dedicated to performance, the handling of Unicode data and the -Unicode data itself. Overall, this leads to more dependencies, larger binaries -and longer compile times. This trade off may not be appropriate in all cases, -and indeed, even when all Unicode and performance features are disabled, one -is still left with a perfectly serviceable regex engine that will work well -in many cases. - -This crate exposes a number of features for controlling that trade off. Some -of these features are strictly performance oriented, such that disabling them -won't result in a loss of functionality, but may result in worse performance. -Other features, such as the ones controlling the presence or absence of Unicode -data, can result in a loss of functionality. For example, if one disables the -`unicode-case` feature (described below), then compiling the regex `(?i)a` -will fail since Unicode case insensitivity is enabled by default. Instead, -callers must use `(?i-u)a` instead to disable Unicode case folding. Stated -differently, enabling or disabling any of the features below can only add or -subtract from the total set of valid regular expressions. Enabling or disabling -a feature will never modify the match semantics of a regular expression. - -All features below are enabled by default. - -### Ecosystem features - -* **std** - - When enabled, this will cause `regex` to use the standard library. Currently, - disabling this feature will always result in a compilation error. It is - intended to add `alloc`-only support to regex in the future. - -### Performance features - -* **perf** - - Enables all performance related features. This feature is enabled by default - and will always cover all features that improve performance, even if more - are added in the future. -* **perf-dfa** - - Enables the use of a lazy DFA for matching. The lazy DFA is used to compile - portions of a regex to a very fast DFA on an as-needed basis. This can - result in substantial speedups, usually by an order of magnitude on large - haystacks. The lazy DFA does not bring in any new dependencies, but it can - make compile times longer. -* **perf-inline** - - Enables the use of aggressive inlining inside match routines. This reduces - the overhead of each match. The aggressive inlining, however, increases - compile times and binary size. -* **perf-literal** - - Enables the use of literal optimizations for speeding up matches. In some - cases, literal optimizations can result in speedups of _several_ orders of - magnitude. Disabling this drops the `aho-corasick` and `memchr` dependencies. -* **perf-cache** - - This feature used to enable a faster internal cache at the cost of using - additional dependencies, but this is no longer an option. A fast internal - cache is now used unconditionally with no additional dependencies. This may - change in the future. - -### Unicode features - -* **unicode** - - Enables all Unicode features. This feature is enabled by default, and will - always cover all Unicode features, even if more are added in the future. -* **unicode-age** - - Provide the data for the - [Unicode `Age` property](https://www.unicode.org/reports/tr44/tr44-24.html#Character_Age). - This makes it possible to use classes like `\p{Age:6.0}` to refer to all - codepoints first introduced in Unicode 6.0 -* **unicode-bool** - - Provide the data for numerous Unicode boolean properties. The full list - is not included here, but contains properties like `Alphabetic`, `Emoji`, - `Lowercase`, `Math`, `Uppercase` and `White_Space`. -* **unicode-case** - - Provide the data for case insensitive matching using - [Unicode's "simple loose matches" specification](https://www.unicode.org/reports/tr18/#Simple_Loose_Matches). -* **unicode-gencat** - - Provide the data for - [Unicode general categories](https://www.unicode.org/reports/tr44/tr44-24.html#General_Category_Values). - This includes, but is not limited to, `Decimal_Number`, `Letter`, - `Math_Symbol`, `Number` and `Punctuation`. -* **unicode-perl** - - Provide the data for supporting the Unicode-aware Perl character classes, - corresponding to `\w`, `\s` and `\d`. This is also necessary for using - Unicode-aware word boundary assertions. Note that if this feature is - disabled, the `\s` and `\d` character classes are still available if the - `unicode-bool` and `unicode-gencat` features are enabled, respectively. -* **unicode-script** - - Provide the data for - [Unicode scripts and script extensions](https://www.unicode.org/reports/tr24/). - This includes, but is not limited to, `Arabic`, `Cyrillic`, `Hebrew`, - `Latin` and `Thai`. -* **unicode-segment** - - Provide the data necessary to provide the properties used to implement the - [Unicode text segmentation algorithms](https://www.unicode.org/reports/tr29/). - This enables using classes like `\p{gcb=Extend}`, `\p{wb=Katakana}` and - `\p{sb=ATerm}`. - - -# Untrusted input - -This crate can handle both untrusted regular expressions and untrusted -search text. - -Untrusted regular expressions are handled by capping the size of a compiled -regular expression. -(See [`RegexBuilder::size_limit`](struct.RegexBuilder.html#method.size_limit).) -Without this, it would be trivial for an attacker to exhaust your system's -memory with expressions like `a{100}{100}{100}`. - -Untrusted search text is allowed because the matching engine(s) in this -crate have time complexity `O(mn)` (with `m ~ regex` and `n ~ search -text`), which means there's no way to cause exponential blow-up like with -some other regular expression engines. (We pay for this by disallowing -features like arbitrary look-ahead and backreferences.) - -When a DFA is used, pathological cases with exponential state blow-up are -avoided by constructing the DFA lazily or in an "online" manner. Therefore, -at most one new state can be created for each byte of input. This satisfies -our time complexity guarantees, but can lead to memory growth -proportional to the size of the input. As a stopgap, the DFA is only -allowed to store a fixed number of states. When the limit is reached, its -states are wiped and continues on, possibly duplicating previous work. If -the limit is reached too frequently, it gives up and hands control off to -another matching engine with fixed memory requirements. -(The DFA size limit can also be tweaked. See -[`RegexBuilder::dfa_size_limit`](struct.RegexBuilder.html#method.dfa_size_limit).) -*/ - -#![deny(missing_docs)] -#![cfg_attr(feature = "pattern", feature(pattern))] -#![warn(missing_debug_implementations)] - -#[cfg(not(feature = "std"))] -compile_error!("`std` feature is currently required to build this crate"); - -// To check README's example -// TODO: Re-enable this once the MSRV is 1.43 or greater. -// See: https://github.com/rust-lang/regex/issues/684 -// See: https://github.com/rust-lang/regex/issues/685 -// #[cfg(doctest)] -// doc_comment::doctest!("../README.md"); - -#[cfg(feature = "std")] -pub use crate::error::Error; -#[cfg(feature = "std")] -pub use crate::re_builder::set_unicode::*; -#[cfg(feature = "std")] -pub use crate::re_builder::unicode::*; -#[cfg(feature = "std")] -pub use crate::re_set::unicode::*; -#[cfg(feature = "std")] -#[cfg(feature = "std")] -pub use crate::re_unicode::{ - escape, CaptureLocations, CaptureMatches, CaptureNames, Captures, - Locations, Match, Matches, NoExpand, Regex, Replacer, ReplacerRef, Split, - SplitN, SubCaptureMatches, -}; - -/** -Match regular expressions on arbitrary bytes. - -This module provides a nearly identical API to the one found in the -top-level of this crate. There are two important differences: - -1. Matching is done on `&[u8]` instead of `&str`. Additionally, `Vec` -is used where `String` would have been used. -2. Unicode support can be disabled even when disabling it would result in -matching invalid UTF-8 bytes. - -# Example: match null terminated string - -This shows how to find all null-terminated strings in a slice of bytes: - -```rust -# use regex::bytes::Regex; -let re = Regex::new(r"(?-u)(?P[^\x00]+)\x00").unwrap(); -let text = b"foo\x00bar\x00baz\x00"; - -// Extract all of the strings without the null terminator from each match. -// The unwrap is OK here since a match requires the `cstr` capture to match. -let cstrs: Vec<&[u8]> = - re.captures_iter(text) - .map(|c| c.name("cstr").unwrap().as_bytes()) - .collect(); -assert_eq!(vec![&b"foo"[..], &b"bar"[..], &b"baz"[..]], cstrs); -``` - -# Example: selectively enable Unicode support - -This shows how to match an arbitrary byte pattern followed by a UTF-8 encoded -string (e.g., to extract a title from a Matroska file): - -```rust -# use std::str; -# use regex::bytes::Regex; -let re = Regex::new( - r"(?-u)\x7b\xa9(?:[\x80-\xfe]|[\x40-\xff].)(?u:(.*))" -).unwrap(); -let text = b"\x12\xd0\x3b\x5f\x7b\xa9\x85\xe2\x98\x83\x80\x98\x54\x76\x68\x65"; -let caps = re.captures(text).unwrap(); - -// Notice that despite the `.*` at the end, it will only match valid UTF-8 -// because Unicode mode was enabled with the `u` flag. Without the `u` flag, -// the `.*` would match the rest of the bytes. -let mat = caps.get(1).unwrap(); -assert_eq!((7, 10), (mat.start(), mat.end())); - -// If there was a match, Unicode mode guarantees that `title` is valid UTF-8. -let title = str::from_utf8(&caps[1]).unwrap(); -assert_eq!("☃", title); -``` - -In general, if the Unicode flag is enabled in a capture group and that capture -is part of the overall match, then the capture is *guaranteed* to be valid -UTF-8. - -# Syntax - -The supported syntax is pretty much the same as the syntax for Unicode -regular expressions with a few changes that make sense for matching arbitrary -bytes: - -1. The `u` flag can be disabled even when disabling it might cause the regex to -match invalid UTF-8. When the `u` flag is disabled, the regex is said to be in -"ASCII compatible" mode. -2. In ASCII compatible mode, neither Unicode scalar values nor Unicode -character classes are allowed. -3. In ASCII compatible mode, Perl character classes (`\w`, `\d` and `\s`) -revert to their typical ASCII definition. `\w` maps to `[[:word:]]`, `\d` maps -to `[[:digit:]]` and `\s` maps to `[[:space:]]`. -4. In ASCII compatible mode, word boundaries use the ASCII compatible `\w` to -determine whether a byte is a word byte or not. -5. Hexadecimal notation can be used to specify arbitrary bytes instead of -Unicode codepoints. For example, in ASCII compatible mode, `\xFF` matches the -literal byte `\xFF`, while in Unicode mode, `\xFF` is a Unicode codepoint that -matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal notation when -enabled. -6. In ASCII compatible mode, `.` matches any *byte* except for `\n`. When the -`s` flag is additionally enabled, `.` matches any byte. - -# Performance - -In general, one should expect performance on `&[u8]` to be roughly similar to -performance on `&str`. -*/ -#[cfg(feature = "std")] -pub mod bytes { - pub use crate::re_builder::bytes::*; - pub use crate::re_builder::set_bytes::*; - pub use crate::re_bytes::*; - pub use crate::re_set::bytes::*; -} - -mod backtrack; -mod compile; -#[cfg(feature = "perf-dfa")] -mod dfa; -mod error; -mod exec; -mod expand; -mod find_byte; -mod input; -mod literal; -#[cfg(feature = "pattern")] -mod pattern; -mod pikevm; -mod pool; -mod prog; -mod re_builder; -mod re_bytes; -mod re_set; -mod re_trait; -mod re_unicode; -mod sparse; -mod utf8; - -/// The `internal` module exists to support suspicious activity, such as -/// testing different matching engines and supporting the `regex-debug` CLI -/// utility. -#[doc(hidden)] -#[cfg(feature = "std")] -pub mod internal { - pub use crate::compile::Compiler; - pub use crate::exec::{Exec, ExecBuilder}; - pub use crate::input::{Char, CharInput, Input, InputAt}; - pub use crate::literal::LiteralSearcher; - pub use crate::prog::{EmptyLook, Inst, InstRanges, Program}; -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/literal/imp.rs b/collector/compile-benchmarks/regex-1.5.5/src/literal/imp.rs deleted file mode 100644 index 82f050a0d..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/literal/imp.rs +++ /dev/null @@ -1,402 +0,0 @@ -use std::mem; - -use aho_corasick::{self, packed, AhoCorasick, AhoCorasickBuilder}; -use memchr::{memchr, memchr2, memchr3, memmem}; -use regex_syntax::hir::literal::{Literal, Literals}; - -/// A prefix extracted from a compiled regular expression. -/// -/// A regex prefix is a set of literal strings that *must* be matched at the -/// beginning of a regex in order for the entire regex to match. Similarly -/// for a regex suffix. -#[derive(Clone, Debug)] -pub struct LiteralSearcher { - complete: bool, - lcp: Memmem, - lcs: Memmem, - matcher: Matcher, -} - -#[derive(Clone, Debug)] -enum Matcher { - /// No literals. (Never advances through the input.) - Empty, - /// A set of four or more single byte literals. - Bytes(SingleByteSet), - /// A single substring, using vector accelerated routines when available. - Memmem(Memmem), - /// An Aho-Corasick automaton. - AC { ac: AhoCorasick, lits: Vec }, - /// A packed multiple substring searcher, using SIMD. - /// - /// Note that Aho-Corasick will actually use this packed searcher - /// internally automatically, however, there is some overhead associated - /// with going through the Aho-Corasick machinery. So using the packed - /// searcher directly results in some gains. - Packed { s: packed::Searcher, lits: Vec }, -} - -impl LiteralSearcher { - /// Returns a matcher that never matches and never advances the input. - pub fn empty() -> Self { - Self::new(Literals::empty(), Matcher::Empty) - } - - /// Returns a matcher for literal prefixes from the given set. - pub fn prefixes(lits: Literals) -> Self { - let matcher = Matcher::prefixes(&lits); - Self::new(lits, matcher) - } - - /// Returns a matcher for literal suffixes from the given set. - pub fn suffixes(lits: Literals) -> Self { - let matcher = Matcher::suffixes(&lits); - Self::new(lits, matcher) - } - - fn new(lits: Literals, matcher: Matcher) -> Self { - let complete = lits.all_complete(); - LiteralSearcher { - complete: complete, - lcp: Memmem::new(lits.longest_common_prefix()), - lcs: Memmem::new(lits.longest_common_suffix()), - matcher: matcher, - } - } - - /// Returns true if all matches comprise the entire regular expression. - /// - /// This does not necessarily mean that a literal match implies a match - /// of the regular expression. For example, the regular expression `^a` - /// is comprised of a single complete literal `a`, but the regular - /// expression demands that it only match at the beginning of a string. - pub fn complete(&self) -> bool { - self.complete && !self.is_empty() - } - - /// Find the position of a literal in `haystack` if it exists. - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn find(&self, haystack: &[u8]) -> Option<(usize, usize)> { - use self::Matcher::*; - match self.matcher { - Empty => Some((0, 0)), - Bytes(ref sset) => sset.find(haystack).map(|i| (i, i + 1)), - Memmem(ref s) => s.find(haystack).map(|i| (i, i + s.len())), - AC { ref ac, .. } => { - ac.find(haystack).map(|m| (m.start(), m.end())) - } - Packed { ref s, .. } => { - s.find(haystack).map(|m| (m.start(), m.end())) - } - } - } - - /// Like find, except matches must start at index `0`. - pub fn find_start(&self, haystack: &[u8]) -> Option<(usize, usize)> { - for lit in self.iter() { - if lit.len() > haystack.len() { - continue; - } - if lit == &haystack[0..lit.len()] { - return Some((0, lit.len())); - } - } - None - } - - /// Like find, except matches must end at index `haystack.len()`. - pub fn find_end(&self, haystack: &[u8]) -> Option<(usize, usize)> { - for lit in self.iter() { - if lit.len() > haystack.len() { - continue; - } - if lit == &haystack[haystack.len() - lit.len()..] { - return Some((haystack.len() - lit.len(), haystack.len())); - } - } - None - } - - /// Returns an iterator over all literals to be matched. - pub fn iter(&self) -> LiteralIter<'_> { - match self.matcher { - Matcher::Empty => LiteralIter::Empty, - Matcher::Bytes(ref sset) => LiteralIter::Bytes(&sset.dense), - Matcher::Memmem(ref s) => LiteralIter::Single(&s.finder.needle()), - Matcher::AC { ref lits, .. } => LiteralIter::AC(lits), - Matcher::Packed { ref lits, .. } => LiteralIter::Packed(lits), - } - } - - /// Returns a matcher for the longest common prefix of this matcher. - pub fn lcp(&self) -> &Memmem { - &self.lcp - } - - /// Returns a matcher for the longest common suffix of this matcher. - pub fn lcs(&self) -> &Memmem { - &self.lcs - } - - /// Returns true iff this prefix is empty. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Returns the number of prefixes in this machine. - pub fn len(&self) -> usize { - use self::Matcher::*; - match self.matcher { - Empty => 0, - Bytes(ref sset) => sset.dense.len(), - Memmem(_) => 1, - AC { ref ac, .. } => ac.pattern_count(), - Packed { ref lits, .. } => lits.len(), - } - } - - /// Return the approximate heap usage of literals in bytes. - pub fn approximate_size(&self) -> usize { - use self::Matcher::*; - match self.matcher { - Empty => 0, - Bytes(ref sset) => sset.approximate_size(), - Memmem(ref single) => single.approximate_size(), - AC { ref ac, .. } => ac.heap_bytes(), - Packed { ref s, .. } => s.heap_bytes(), - } - } -} - -impl Matcher { - fn prefixes(lits: &Literals) -> Self { - let sset = SingleByteSet::prefixes(lits); - Matcher::new(lits, sset) - } - - fn suffixes(lits: &Literals) -> Self { - let sset = SingleByteSet::suffixes(lits); - Matcher::new(lits, sset) - } - - fn new(lits: &Literals, sset: SingleByteSet) -> Self { - if lits.literals().is_empty() { - return Matcher::Empty; - } - if sset.dense.len() >= 26 { - // Avoid trying to match a large number of single bytes. - // This is *very* sensitive to a frequency analysis comparison - // between the bytes in sset and the composition of the haystack. - // No matter the size of sset, if its members all are rare in the - // haystack, then it'd be worth using it. How to tune this... IDK. - // ---AG - return Matcher::Empty; - } - if sset.complete { - return Matcher::Bytes(sset); - } - if lits.literals().len() == 1 { - return Matcher::Memmem(Memmem::new(&lits.literals()[0])); - } - - let pats = lits.literals().to_owned(); - let is_aho_corasick_fast = sset.dense.len() <= 1 && sset.all_ascii; - if lits.literals().len() <= 100 && !is_aho_corasick_fast { - let mut builder = packed::Config::new() - .match_kind(packed::MatchKind::LeftmostFirst) - .builder(); - if let Some(s) = builder.extend(&pats).build() { - return Matcher::Packed { s, lits: pats }; - } - } - let ac = AhoCorasickBuilder::new() - .match_kind(aho_corasick::MatchKind::LeftmostFirst) - .dfa(true) - .build_with_size::(&pats) - .unwrap(); - Matcher::AC { ac, lits: pats } - } -} - -#[derive(Debug)] -pub enum LiteralIter<'a> { - Empty, - Bytes(&'a [u8]), - Single(&'a [u8]), - AC(&'a [Literal]), - Packed(&'a [Literal]), -} - -impl<'a> Iterator for LiteralIter<'a> { - type Item = &'a [u8]; - - fn next(&mut self) -> Option { - match *self { - LiteralIter::Empty => None, - LiteralIter::Bytes(ref mut many) => { - if many.is_empty() { - None - } else { - let next = &many[0..1]; - *many = &many[1..]; - Some(next) - } - } - LiteralIter::Single(ref mut one) => { - if one.is_empty() { - None - } else { - let next = &one[..]; - *one = &[]; - Some(next) - } - } - LiteralIter::AC(ref mut lits) => { - if lits.is_empty() { - None - } else { - let next = &lits[0]; - *lits = &lits[1..]; - Some(&**next) - } - } - LiteralIter::Packed(ref mut lits) => { - if lits.is_empty() { - None - } else { - let next = &lits[0]; - *lits = &lits[1..]; - Some(&**next) - } - } - } - } -} - -#[derive(Clone, Debug)] -struct SingleByteSet { - sparse: Vec, - dense: Vec, - complete: bool, - all_ascii: bool, -} - -impl SingleByteSet { - fn new() -> SingleByteSet { - SingleByteSet { - sparse: vec![false; 256], - dense: vec![], - complete: true, - all_ascii: true, - } - } - - fn prefixes(lits: &Literals) -> SingleByteSet { - let mut sset = SingleByteSet::new(); - for lit in lits.literals() { - sset.complete = sset.complete && lit.len() == 1; - if let Some(&b) = lit.get(0) { - if !sset.sparse[b as usize] { - if b > 0x7F { - sset.all_ascii = false; - } - sset.dense.push(b); - sset.sparse[b as usize] = true; - } - } - } - sset - } - - fn suffixes(lits: &Literals) -> SingleByteSet { - let mut sset = SingleByteSet::new(); - for lit in lits.literals() { - sset.complete = sset.complete && lit.len() == 1; - if let Some(&b) = lit.get(lit.len().checked_sub(1).unwrap()) { - if !sset.sparse[b as usize] { - if b > 0x7F { - sset.all_ascii = false; - } - sset.dense.push(b); - sset.sparse[b as usize] = true; - } - } - } - sset - } - - /// Faster find that special cases certain sizes to use memchr. - #[cfg_attr(feature = "perf-inline", inline(always))] - fn find(&self, text: &[u8]) -> Option { - match self.dense.len() { - 0 => None, - 1 => memchr(self.dense[0], text), - 2 => memchr2(self.dense[0], self.dense[1], text), - 3 => memchr3(self.dense[0], self.dense[1], self.dense[2], text), - _ => self._find(text), - } - } - - /// Generic find that works on any sized set. - fn _find(&self, haystack: &[u8]) -> Option { - for (i, &b) in haystack.iter().enumerate() { - if self.sparse[b as usize] { - return Some(i); - } - } - None - } - - fn approximate_size(&self) -> usize { - (self.dense.len() * mem::size_of::()) - + (self.sparse.len() * mem::size_of::()) - } -} - -/// A simple wrapper around the memchr crate's memmem implementation. -/// -/// The API this exposes mirrors the API of previous substring searchers that -/// this supplanted. -#[derive(Clone, Debug)] -pub struct Memmem { - finder: memmem::Finder<'static>, - char_len: usize, -} - -impl Memmem { - fn new(pat: &[u8]) -> Memmem { - Memmem { - finder: memmem::Finder::new(pat).into_owned(), - char_len: char_len_lossy(pat), - } - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn find(&self, haystack: &[u8]) -> Option { - self.finder.find(haystack) - } - - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn is_suffix(&self, text: &[u8]) -> bool { - if text.len() < self.len() { - return false; - } - &text[text.len() - self.len()..] == self.finder.needle() - } - - pub fn len(&self) -> usize { - self.finder.needle().len() - } - - pub fn char_len(&self) -> usize { - self.char_len - } - - fn approximate_size(&self) -> usize { - self.finder.needle().len() * mem::size_of::() - } -} - -fn char_len_lossy(bytes: &[u8]) -> usize { - String::from_utf8_lossy(bytes).chars().count() -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/literal/mod.rs b/collector/compile-benchmarks/regex-1.5.5/src/literal/mod.rs deleted file mode 100644 index 980f52330..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/literal/mod.rs +++ /dev/null @@ -1,55 +0,0 @@ -pub use self::imp::*; - -#[cfg(feature = "perf-literal")] -mod imp; - -#[allow(missing_docs)] -#[cfg(not(feature = "perf-literal"))] -mod imp { - use regex_syntax::hir::literal::Literals; - - #[derive(Clone, Debug)] - pub struct LiteralSearcher(()); - - impl LiteralSearcher { - pub fn empty() -> Self { - LiteralSearcher(()) - } - - pub fn prefixes(_: Literals) -> Self { - LiteralSearcher(()) - } - - pub fn suffixes(_: Literals) -> Self { - LiteralSearcher(()) - } - - pub fn complete(&self) -> bool { - false - } - - pub fn find(&self, _: &[u8]) -> Option<(usize, usize)> { - unreachable!() - } - - pub fn find_start(&self, _: &[u8]) -> Option<(usize, usize)> { - unreachable!() - } - - pub fn find_end(&self, _: &[u8]) -> Option<(usize, usize)> { - unreachable!() - } - - pub fn is_empty(&self) -> bool { - true - } - - pub fn len(&self) -> usize { - 0 - } - - pub fn approximate_size(&self) -> usize { - 0 - } - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/pattern.rs b/collector/compile-benchmarks/regex-1.5.5/src/pattern.rs deleted file mode 100644 index b4ffd8e16..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/pattern.rs +++ /dev/null @@ -1,63 +0,0 @@ -use std::str::pattern::{Pattern, SearchStep, Searcher}; - -use crate::re_unicode::{Matches, Regex}; - -#[derive(Debug)] -pub struct RegexSearcher<'r, 't> { - haystack: &'t str, - it: Matches<'r, 't>, - last_step_end: usize, - next_match: Option<(usize, usize)>, -} - -impl<'r, 't> Pattern<'t> for &'r Regex { - type Searcher = RegexSearcher<'r, 't>; - - fn into_searcher(self, haystack: &'t str) -> RegexSearcher<'r, 't> { - RegexSearcher { - haystack: haystack, - it: self.find_iter(haystack), - last_step_end: 0, - next_match: None, - } - } -} - -unsafe impl<'r, 't> Searcher<'t> for RegexSearcher<'r, 't> { - #[inline] - fn haystack(&self) -> &'t str { - self.haystack - } - - #[inline] - fn next(&mut self) -> SearchStep { - if let Some((s, e)) = self.next_match { - self.next_match = None; - self.last_step_end = e; - return SearchStep::Match(s, e); - } - match self.it.next() { - None => { - if self.last_step_end < self.haystack().len() { - let last = self.last_step_end; - self.last_step_end = self.haystack().len(); - SearchStep::Reject(last, self.haystack().len()) - } else { - SearchStep::Done - } - } - Some(m) => { - let (s, e) = (m.start(), m.end()); - if s == self.last_step_end { - self.last_step_end = e; - SearchStep::Match(s, e) - } else { - self.next_match = Some((s, e)); - let last = self.last_step_end; - self.last_step_end = s; - SearchStep::Reject(last, s) - } - } - } - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/pikevm.rs b/collector/compile-benchmarks/regex-1.5.5/src/pikevm.rs deleted file mode 100644 index 9a1424086..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/pikevm.rs +++ /dev/null @@ -1,360 +0,0 @@ -// This module implements the Pike VM. That is, it guarantees linear time -// search of a regex on any text with memory use proportional to the size of -// the regex. -// -// It is equal in power to the backtracking engine in this crate, except the -// backtracking engine is typically faster on small regexes/texts at the -// expense of a bigger memory footprint. -// -// It can do more than the DFA can (specifically, record capture locations -// and execute Unicode word boundary assertions), but at a slower speed. -// Specifically, the Pike VM executes a DFA implicitly by repeatedly expanding -// epsilon transitions. That is, the Pike VM engine can be in multiple states -// at once where as the DFA is only ever in one state at a time. -// -// Therefore, the Pike VM is generally treated as the fallback when the other -// matching engines either aren't feasible to run or are insufficient. - -use std::mem; - -use crate::exec::ProgramCache; -use crate::input::{Input, InputAt}; -use crate::prog::{InstPtr, Program}; -use crate::re_trait::Slot; -use crate::sparse::SparseSet; - -/// An NFA simulation matching engine. -#[derive(Debug)] -pub struct Fsm<'r, I> { - /// The sequence of opcodes (among other things) that is actually executed. - /// - /// The program may be byte oriented or Unicode codepoint oriented. - prog: &'r Program, - /// An explicit stack used for following epsilon transitions. (This is - /// borrowed from the cache.) - stack: &'r mut Vec, - /// The input to search. - input: I, -} - -/// A cached allocation that can be reused on each execution. -#[derive(Clone, Debug)] -pub struct Cache { - /// A pair of ordered sets for tracking NFA states. - clist: Threads, - nlist: Threads, - /// An explicit stack used for following epsilon transitions. - stack: Vec, -} - -/// An ordered set of NFA states and their captures. -#[derive(Clone, Debug)] -struct Threads { - /// An ordered set of opcodes (each opcode is an NFA state). - set: SparseSet, - /// Captures for every NFA state. - /// - /// It is stored in row-major order, where the columns are the capture - /// slots and the rows are the states. - caps: Vec, - /// The number of capture slots stored per thread. (Every capture has - /// two slots.) - slots_per_thread: usize, -} - -/// A representation of an explicit stack frame when following epsilon -/// transitions. This is used to avoid recursion. -#[derive(Clone, Debug)] -enum FollowEpsilon { - /// Follow transitions at the given instruction pointer. - IP(InstPtr), - /// Restore the capture slot with the given position in the input. - Capture { slot: usize, pos: Slot }, -} - -impl Cache { - /// Create a new allocation used by the NFA machine to record execution - /// and captures. - pub fn new(_prog: &Program) -> Self { - Cache { clist: Threads::new(), nlist: Threads::new(), stack: vec![] } - } -} - -impl<'r, I: Input> Fsm<'r, I> { - /// Execute the NFA matching engine. - /// - /// If there's a match, `exec` returns `true` and populates the given - /// captures accordingly. - pub fn exec( - prog: &'r Program, - cache: &ProgramCache, - matches: &mut [bool], - slots: &mut [Slot], - quit_after_match: bool, - input: I, - start: usize, - end: usize, - ) -> bool { - let mut cache = cache.borrow_mut(); - let cache = &mut cache.pikevm; - cache.clist.resize(prog.len(), prog.captures.len()); - cache.nlist.resize(prog.len(), prog.captures.len()); - let at = input.at(start); - Fsm { prog: prog, stack: &mut cache.stack, input: input }.exec_( - &mut cache.clist, - &mut cache.nlist, - matches, - slots, - quit_after_match, - at, - end, - ) - } - - fn exec_( - &mut self, - mut clist: &mut Threads, - mut nlist: &mut Threads, - matches: &mut [bool], - slots: &mut [Slot], - quit_after_match: bool, - mut at: InputAt, - end: usize, - ) -> bool { - let mut matched = false; - let mut all_matched = false; - clist.set.clear(); - nlist.set.clear(); - 'LOOP: loop { - if clist.set.is_empty() { - // Three ways to bail out when our current set of threads is - // empty. - // - // 1. We have a match---so we're done exploring any possible - // alternatives. Time to quit. (We can't do this if we're - // looking for matches for multiple regexes, unless we know - // they all matched.) - // - // 2. If the expression starts with a '^' we can terminate as - // soon as the last thread dies. - if (matched && matches.len() <= 1) - || all_matched - || (!at.is_start() && self.prog.is_anchored_start) - { - break; - } - - // 3. If there's a literal prefix for the program, try to - // jump ahead quickly. If it can't be found, then we can - // bail out early. - if !self.prog.prefixes.is_empty() { - at = match self.input.prefix_at(&self.prog.prefixes, at) { - None => break, - Some(at) => at, - }; - } - } - - // This simulates a preceding '.*?' for every regex by adding - // a state starting at the current position in the input for the - // beginning of the program only if we don't already have a match. - if clist.set.is_empty() - || (!self.prog.is_anchored_start && !all_matched) - { - self.add(&mut clist, slots, 0, at); - } - // The previous call to "add" actually inspects the position just - // before the current character. For stepping through the machine, - // we can to look at the current character, so we advance the - // input. - let at_next = self.input.at(at.next_pos()); - for i in 0..clist.set.len() { - let ip = clist.set[i]; - if self.step( - &mut nlist, - matches, - slots, - clist.caps(ip), - ip, - at, - at_next, - ) { - matched = true; - all_matched = all_matched || matches.iter().all(|&b| b); - if quit_after_match { - // If we only care if a match occurs (not its - // position), then we can quit right now. - break 'LOOP; - } - if self.prog.matches.len() == 1 { - // We don't need to check the rest of the threads - // in this set because we've matched something - // ("leftmost-first"). However, we still need to check - // threads in the next set to support things like - // greedy matching. - // - // This is only true on normal regexes. For regex sets, - // we need to mush on to observe other matches. - break; - } - } - } - if at.pos() >= end { - break; - } - at = at_next; - mem::swap(clist, nlist); - nlist.set.clear(); - } - matched - } - - /// Step through the input, one token (byte or codepoint) at a time. - /// - /// nlist is the set of states that will be processed on the next token - /// in the input. - /// - /// caps is the set of captures passed by the caller of the NFA. They are - /// written to only when a match state is visited. - /// - /// thread_caps is the set of captures set for the current NFA state, ip. - /// - /// at and at_next are the current and next positions in the input. at or - /// at_next may be EOF. - fn step( - &mut self, - nlist: &mut Threads, - matches: &mut [bool], - slots: &mut [Slot], - thread_caps: &mut [Option], - ip: usize, - at: InputAt, - at_next: InputAt, - ) -> bool { - use crate::prog::Inst::*; - match self.prog[ip] { - Match(match_slot) => { - if match_slot < matches.len() { - matches[match_slot] = true; - } - for (slot, val) in slots.iter_mut().zip(thread_caps.iter()) { - *slot = *val; - } - true - } - Char(ref inst) => { - if inst.c == at.char() { - self.add(nlist, thread_caps, inst.goto, at_next); - } - false - } - Ranges(ref inst) => { - if inst.matches(at.char()) { - self.add(nlist, thread_caps, inst.goto, at_next); - } - false - } - Bytes(ref inst) => { - if let Some(b) = at.byte() { - if inst.matches(b) { - self.add(nlist, thread_caps, inst.goto, at_next); - } - } - false - } - EmptyLook(_) | Save(_) | Split(_) => false, - } - } - - /// Follows epsilon transitions and adds them for processing to nlist, - /// starting at and including ip. - fn add( - &mut self, - nlist: &mut Threads, - thread_caps: &mut [Option], - ip: usize, - at: InputAt, - ) { - self.stack.push(FollowEpsilon::IP(ip)); - while let Some(frame) = self.stack.pop() { - match frame { - FollowEpsilon::IP(ip) => { - self.add_step(nlist, thread_caps, ip, at); - } - FollowEpsilon::Capture { slot, pos } => { - thread_caps[slot] = pos; - } - } - } - } - - /// A helper function for add that avoids excessive pushing to the stack. - fn add_step( - &mut self, - nlist: &mut Threads, - thread_caps: &mut [Option], - mut ip: usize, - at: InputAt, - ) { - // Instead of pushing and popping to the stack, we mutate ip as we - // traverse the set of states. We only push to the stack when we - // absolutely need recursion (restoring captures or following a - // branch). - use crate::prog::Inst::*; - loop { - // Don't visit states we've already added. - if nlist.set.contains(ip) { - return; - } - nlist.set.insert(ip); - match self.prog[ip] { - EmptyLook(ref inst) => { - if self.input.is_empty_match(at, inst) { - ip = inst.goto; - } - } - Save(ref inst) => { - if inst.slot < thread_caps.len() { - self.stack.push(FollowEpsilon::Capture { - slot: inst.slot, - pos: thread_caps[inst.slot], - }); - thread_caps[inst.slot] = Some(at.pos()); - } - ip = inst.goto; - } - Split(ref inst) => { - self.stack.push(FollowEpsilon::IP(inst.goto2)); - ip = inst.goto1; - } - Match(_) | Char(_) | Ranges(_) | Bytes(_) => { - let t = &mut nlist.caps(ip); - for (slot, val) in t.iter_mut().zip(thread_caps.iter()) { - *slot = *val; - } - return; - } - } - } - } -} - -impl Threads { - fn new() -> Self { - Threads { set: SparseSet::new(0), caps: vec![], slots_per_thread: 0 } - } - - fn resize(&mut self, num_insts: usize, ncaps: usize) { - if num_insts == self.set.capacity() { - return; - } - self.slots_per_thread = ncaps * 2; - self.set = SparseSet::new(num_insts); - self.caps = vec![None; self.slots_per_thread * num_insts]; - } - - fn caps(&mut self, pc: usize) -> &mut [Option] { - let i = pc * self.slots_per_thread; - &mut self.caps[i..i + self.slots_per_thread] - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/pool.rs b/collector/compile-benchmarks/regex-1.5.5/src/pool.rs deleted file mode 100644 index 6a6f15b19..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/pool.rs +++ /dev/null @@ -1,333 +0,0 @@ -// This module provides a relatively simple thread-safe pool of reusable -// objects. For the most part, it's implemented by a stack represented by a -// Mutex>. It has one small trick: because unlocking a mutex is somewhat -// costly, in the case where a pool is accessed by the first thread that tried -// to get a value, we bypass the mutex. Here are some benchmarks showing the -// difference. -// -// 1) misc::anchored_literal_long_non_match 21 (18571 MB/s) -// 2) misc::anchored_literal_long_non_match 107 (3644 MB/s) -// 3) misc::anchored_literal_long_non_match 45 (8666 MB/s) -// 4) misc::anchored_literal_long_non_match 19 (20526 MB/s) -// -// (1) represents our baseline: the master branch at the time of writing when -// using the 'thread_local' crate to implement the pool below. -// -// (2) represents a naive pool implemented completely via Mutex>. There -// is no special trick for bypassing the mutex. -// -// (3) is the same as (2), except it uses Mutex>>. It is twice as -// fast because a Box is much smaller than the T we use with a Pool in this -// crate. So pushing and popping a Box from a Vec is quite a bit faster -// than for T. -// -// (4) is the same as (3), but with the trick for bypassing the mutex in the -// case of the first-to-get thread. -// -// Why move off of thread_local? Even though (4) is a hair faster than (1) -// above, this was not the main goal. The main goal was to move off of -// thread_local and find a way to *simply* re-capture some of its speed for -// regex's specific case. So again, why move off of it? The *primary* reason is -// because of memory leaks. See https://github.com/rust-lang/regex/issues/362 -// for example. (Why do I want it to be simple? Well, I suppose what I mean is, -// "use as much safe code as possible to minimize risk and be as sure as I can -// be that it is correct.") -// -// My guess is that the thread_local design is probably not appropriate for -// regex since its memory usage scales to the number of active threads that -// have used a regex, where as the pool below scales to the number of threads -// that simultaneously use a regex. While neither case permits contraction, -// since we own the pool data structure below, we can add contraction if a -// clear use case pops up in the wild. More pressingly though, it seems that -// there are at least some use case patterns where one might have many threads -// sitting around that might have used a regex at one point. While thread_local -// does try to reuse space previously used by a thread that has since stopped, -// its maximal memory usage still scales with the total number of active -// threads. In contrast, the pool below scales with the total number of threads -// *simultaneously* using the pool. The hope is that this uses less memory -// overall. And if it doesn't, we can hopefully tune it somehow. -// -// It seems that these sort of conditions happen frequently -// in FFI inside of other more "managed" languages. This was -// mentioned in the issue linked above, and also mentioned here: -// https://github.com/BurntSushi/rure-go/issues/3. And in particular, users -// confirm that disabling the use of thread_local resolves the leak. -// -// There were other weaker reasons for moving off of thread_local as well. -// Namely, at the time, I was looking to reduce dependencies. And for something -// like regex, maintenance can be simpler when we own the full dependency tree. - -use std::panic::{RefUnwindSafe, UnwindSafe}; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Mutex; - -/// An atomic counter used to allocate thread IDs. -static COUNTER: AtomicUsize = AtomicUsize::new(1); - -thread_local!( - /// A thread local used to assign an ID to a thread. - static THREAD_ID: usize = { - let next = COUNTER.fetch_add(1, Ordering::Relaxed); - // SAFETY: We cannot permit the reuse of thread IDs since reusing a - // thread ID might result in more than one thread "owning" a pool, - // and thus, permit accessing a mutable value from multiple threads - // simultaneously without synchronization. The intent of this panic is - // to be a sanity check. It is not expected that the thread ID space - // will actually be exhausted in practice. - // - // This checks that the counter never wraps around, since atomic - // addition wraps around on overflow. - if next == 0 { - panic!("regex: thread ID allocation space exhausted"); - } - next - }; -); - -/// The type of the function used to create values in a pool when the pool is -/// empty and the caller requests one. -type CreateFn = - Box T + Send + Sync + UnwindSafe + RefUnwindSafe + 'static>; - -/// A simple thread safe pool for reusing values. -/// -/// Getting a value out comes with a guard. When that guard is dropped, the -/// value is automatically put back in the pool. -/// -/// A Pool impls Sync when T is Send (even if it's not Sync). This means -/// that T can use interior mutability. This is possible because a pool is -/// guaranteed to provide a value to exactly one thread at any time. -/// -/// Currently, a pool never contracts in size. Its size is proportional to the -/// number of simultaneous uses. -pub struct Pool { - /// A stack of T values to hand out. These are used when a Pool is - /// accessed by a thread that didn't create it. - stack: Mutex>>, - /// A function to create more T values when stack is empty and a caller - /// has requested a T. - create: CreateFn, - /// The ID of the thread that owns this pool. The owner is the thread - /// that makes the first call to 'get'. When the owner calls 'get', it - /// gets 'owner_val' directly instead of returning a T from 'stack'. - /// See comments elsewhere for details, but this is intended to be an - /// optimization for the common case that makes getting a T faster. - /// - /// It is initialized to a value of zero (an impossible thread ID) as a - /// sentinel to indicate that it is unowned. - owner: AtomicUsize, - /// A value to return when the caller is in the same thread that created - /// the Pool. - owner_val: T, -} - -// SAFETY: Since we want to use a Pool from multiple threads simultaneously -// behind an Arc, we need for it to be Sync. In cases where T is sync, Pool -// would be Sync. However, since we use a Pool to store mutable scratch space, -// we wind up using a T that has interior mutability and is thus itself not -// Sync. So what we *really* want is for our Pool to by Sync even when T is -// not Sync (but is at least Send). -// -// The only non-sync aspect of a Pool is its 'owner_val' field, which is used -// to implement faster access to a pool value in the common case of a pool -// being accessed in the same thread in which it was created. The 'stack' field -// is also shared, but a Mutex where T: Send is already Sync. So we only -// need to worry about 'owner_val'. -// -// The key is to guarantee that 'owner_val' can only ever be accessed from one -// thread. In our implementation below, we guarantee this by only returning the -// 'owner_val' when the ID of the current thread matches the ID of the thread -// that created the Pool. Since this can only ever be one thread, it follows -// that only one thread can access 'owner_val' at any point in time. Thus, it -// is safe to declare that Pool is Sync when T is Send. -// -// NOTE: It would also be possible to make the owning thread be the *first* -// thread that tries to get a value out of a Pool. However, the current -// implementation is a little simpler and it's not clear if making the first -// thread (rather than the creating thread) is meaningfully better. -// -// If there is a way to achieve our performance goals using safe code, then -// I would very much welcome a patch. As it stands, the implementation below -// tries to balance safety with performance. The case where a Regex is used -// from multiple threads simultaneously will suffer a bit since getting a cache -// will require unlocking a mutex. -unsafe impl Sync for Pool {} - -impl ::std::fmt::Debug for Pool { - fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { - f.debug_struct("Pool") - .field("stack", &self.stack) - .field("owner", &self.owner) - .field("owner_val", &self.owner_val) - .finish() - } -} - -/// A guard that is returned when a caller requests a value from the pool. -/// -/// The purpose of the guard is to use RAII to automatically put the value back -/// in the pool once it's dropped. -#[derive(Debug)] -pub struct PoolGuard<'a, T: Send> { - /// The pool that this guard is attached to. - pool: &'a Pool, - /// This is None when the guard represents the special "owned" value. In - /// which case, the value is retrieved from 'pool.owner_val'. - value: Option>, -} - -impl Pool { - /// Create a new pool. The given closure is used to create values in the - /// pool when necessary. - pub fn new(create: CreateFn) -> Pool { - let owner = AtomicUsize::new(0); - let owner_val = create(); - Pool { stack: Mutex::new(vec![]), create, owner, owner_val } - } - - /// Get a value from the pool. The caller is guaranteed to have exclusive - /// access to the given value. - /// - /// Note that there is no guarantee provided about which value in the - /// pool is returned. That is, calling get, dropping the guard (causing - /// the value to go back into the pool) and then calling get again is NOT - /// guaranteed to return the same value received in the first get call. - #[cfg_attr(feature = "perf-inline", inline(always))] - pub fn get(&self) -> PoolGuard<'_, T> { - // Our fast path checks if the caller is the thread that "owns" this - // pool. Or stated differently, whether it is the first thread that - // tried to extract a value from the pool. If it is, then we can return - // a T to the caller without going through a mutex. - // - // SAFETY: We must guarantee that only one thread gets access to this - // value. Since a thread is uniquely identified by the THREAD_ID thread - // local, it follows that is the caller's thread ID is equal to the - // owner, then only one thread may receive this value. - let caller = THREAD_ID.with(|id| *id); - let owner = self.owner.load(Ordering::Relaxed); - if caller == owner { - return self.guard_owned(); - } - self.get_slow(caller, owner) - } - - /// This is the "slow" version that goes through a mutex to pop an - /// allocated value off a stack to return to the caller. (Or, if the stack - /// is empty, a new value is created.) - /// - /// If the pool has no owner, then this will set the owner. - #[cold] - fn get_slow(&self, caller: usize, owner: usize) -> PoolGuard<'_, T> { - use std::sync::atomic::Ordering::Relaxed; - - if owner == 0 { - // The sentinel 0 value means this pool is not yet owned. We - // try to atomically set the owner. If we do, then this thread - // becomes the owner and we can return a guard that represents - // the special T for the owner. - let res = self.owner.compare_exchange(0, caller, Relaxed, Relaxed); - if res.is_ok() { - return self.guard_owned(); - } - } - let mut stack = self.stack.lock().unwrap(); - let value = match stack.pop() { - None => Box::new((self.create)()), - Some(value) => value, - }; - self.guard_stack(value) - } - - /// Puts a value back into the pool. Callers don't need to call this. Once - /// the guard that's returned by 'get' is dropped, it is put back into the - /// pool automatically. - fn put(&self, value: Box) { - let mut stack = self.stack.lock().unwrap(); - stack.push(value); - } - - /// Create a guard that represents the special owned T. - fn guard_owned(&self) -> PoolGuard<'_, T> { - PoolGuard { pool: self, value: None } - } - - /// Create a guard that contains a value from the pool's stack. - fn guard_stack(&self, value: Box) -> PoolGuard<'_, T> { - PoolGuard { pool: self, value: Some(value) } - } -} - -impl<'a, T: Send> PoolGuard<'a, T> { - /// Return the underlying value. - pub fn value(&self) -> &T { - match self.value { - None => &self.pool.owner_val, - Some(ref v) => &**v, - } - } -} - -impl<'a, T: Send> Drop for PoolGuard<'a, T> { - #[cfg_attr(feature = "perf-inline", inline(always))] - fn drop(&mut self) { - if let Some(value) = self.value.take() { - self.pool.put(value); - } - } -} - -#[cfg(test)] -mod tests { - use std::panic::{RefUnwindSafe, UnwindSafe}; - - use super::*; - - #[test] - fn oibits() { - use crate::exec::ProgramCache; - - fn has_oibits() {} - has_oibits::>(); - } - - // Tests that Pool implements the "single owner" optimization. That is, the - // thread that first accesses the pool gets its own copy, while all other - // threads get distinct copies. - #[test] - fn thread_owner_optimization() { - use std::cell::RefCell; - use std::sync::Arc; - - let pool: Arc>>> = - Arc::new(Pool::new(Box::new(|| RefCell::new(vec!['a'])))); - pool.get().value().borrow_mut().push('x'); - - let pool1 = pool.clone(); - let t1 = std::thread::spawn(move || { - let guard = pool1.get(); - let v = guard.value(); - v.borrow_mut().push('y'); - }); - - let pool2 = pool.clone(); - let t2 = std::thread::spawn(move || { - let guard = pool2.get(); - let v = guard.value(); - v.borrow_mut().push('z'); - }); - - t1.join().unwrap(); - t2.join().unwrap(); - - // If we didn't implement the single owner optimization, then one of - // the threads above is likely to have mutated the [a, x] vec that - // we stuffed in the pool before spawning the threads. But since - // neither thread was first to access the pool, and because of the - // optimization, we should be guaranteed that neither thread mutates - // the special owned pool value. - // - // (Technically this is an implementation detail and not a contract of - // Pool's API.) - assert_eq!(vec!['a', 'x'], *pool.get().value().borrow()); - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/prog.rs b/collector/compile-benchmarks/regex-1.5.5/src/prog.rs deleted file mode 100644 index 475a8112a..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/prog.rs +++ /dev/null @@ -1,447 +0,0 @@ -use std::cmp::Ordering; -use std::collections::HashMap; -use std::fmt; -use std::mem; -use std::ops::Deref; -use std::slice; -use std::sync::Arc; - -use crate::input::Char; -use crate::literal::LiteralSearcher; - -/// `InstPtr` represents the index of an instruction in a regex program. -pub type InstPtr = usize; - -/// Program is a sequence of instructions and various facts about thos -/// instructions. -#[derive(Clone)] -pub struct Program { - /// A sequence of instructions that represents an NFA. - pub insts: Vec, - /// Pointers to each Match instruction in the sequence. - /// - /// This is always length 1 unless this program represents a regex set. - pub matches: Vec, - /// The ordered sequence of all capture groups extracted from the AST. - /// Unnamed groups are `None`. - pub captures: Vec>, - /// Pointers to all named capture groups into `captures`. - pub capture_name_idx: Arc>, - /// A pointer to the start instruction. This can vary depending on how - /// the program was compiled. For example, programs for use with the DFA - /// engine have a `.*?` inserted at the beginning of unanchored regular - /// expressions. The actual starting point of the program is after the - /// `.*?`. - pub start: InstPtr, - /// A set of equivalence classes for discriminating bytes in the compiled - /// program. - pub byte_classes: Vec, - /// When true, this program can only match valid UTF-8. - pub only_utf8: bool, - /// When true, this program uses byte range instructions instead of Unicode - /// range instructions. - pub is_bytes: bool, - /// When true, the program is compiled for DFA matching. For example, this - /// implies `is_bytes` and also inserts a preceding `.*?` for unanchored - /// regexes. - pub is_dfa: bool, - /// When true, the program matches text in reverse (for use only in the - /// DFA). - pub is_reverse: bool, - /// Whether the regex must match from the start of the input. - pub is_anchored_start: bool, - /// Whether the regex must match at the end of the input. - pub is_anchored_end: bool, - /// Whether this program contains a Unicode word boundary instruction. - pub has_unicode_word_boundary: bool, - /// A possibly empty machine for very quickly matching prefix literals. - pub prefixes: LiteralSearcher, - /// A limit on the size of the cache that the DFA is allowed to use while - /// matching. - /// - /// The cache limit specifies approximately how much space we're willing to - /// give to the state cache. Once the state cache exceeds the size, it is - /// wiped and all states must be re-computed. - /// - /// Note that this value does not impact correctness. It can be set to 0 - /// and the DFA will run just fine. (It will only ever store exactly one - /// state in the cache, and will likely run very slowly, but it will work.) - /// - /// Also note that this limit is *per thread of execution*. That is, - /// if the same regex is used to search text across multiple threads - /// simultaneously, then the DFA cache is not shared. Instead, copies are - /// made. - pub dfa_size_limit: usize, -} - -impl Program { - /// Creates an empty instruction sequence. Fields are given default - /// values. - pub fn new() -> Self { - Program { - insts: vec![], - matches: vec![], - captures: vec![], - capture_name_idx: Arc::new(HashMap::new()), - start: 0, - byte_classes: vec![0; 256], - only_utf8: true, - is_bytes: false, - is_dfa: false, - is_reverse: false, - is_anchored_start: false, - is_anchored_end: false, - has_unicode_word_boundary: false, - prefixes: LiteralSearcher::empty(), - dfa_size_limit: 2 * (1 << 20), - } - } - - /// If pc is an index to a no-op instruction (like Save), then return the - /// next pc that is not a no-op instruction. - pub fn skip(&self, mut pc: usize) -> usize { - loop { - match self[pc] { - Inst::Save(ref i) => pc = i.goto, - _ => return pc, - } - } - } - - /// Return true if and only if an execution engine at instruction `pc` will - /// always lead to a match. - pub fn leads_to_match(&self, pc: usize) -> bool { - if self.matches.len() > 1 { - // If we have a regex set, then we have more than one ending - // state, so leading to one of those states is generally - // meaningless. - return false; - } - match self[self.skip(pc)] { - Inst::Match(_) => true, - _ => false, - } - } - - /// Returns true if the current configuration demands that an implicit - /// `.*?` be prepended to the instruction sequence. - pub fn needs_dotstar(&self) -> bool { - self.is_dfa && !self.is_reverse && !self.is_anchored_start - } - - /// Returns true if this program uses Byte instructions instead of - /// Char/Range instructions. - pub fn uses_bytes(&self) -> bool { - self.is_bytes || self.is_dfa - } - - /// Returns true if this program exclusively matches valid UTF-8 bytes. - /// - /// That is, if an invalid UTF-8 byte is seen, then no match is possible. - pub fn only_utf8(&self) -> bool { - self.only_utf8 - } - - /// Return the approximate heap usage of this instruction sequence in - /// bytes. - pub fn approximate_size(&self) -> usize { - // The only instruction that uses heap space is Ranges (for - // Unicode codepoint programs) to store non-overlapping codepoint - // ranges. To keep this operation constant time, we ignore them. - (self.len() * mem::size_of::()) - + (self.matches.len() * mem::size_of::()) - + (self.captures.len() * mem::size_of::>()) - + (self.capture_name_idx.len() - * (mem::size_of::() + mem::size_of::())) - + (self.byte_classes.len() * mem::size_of::()) - + self.prefixes.approximate_size() - } -} - -impl Deref for Program { - type Target = [Inst]; - - #[cfg_attr(feature = "perf-inline", inline(always))] - fn deref(&self) -> &Self::Target { - &*self.insts - } -} - -impl fmt::Debug for Program { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use self::Inst::*; - - fn with_goto(cur: usize, goto: usize, fmtd: String) -> String { - if goto == cur + 1 { - fmtd - } else { - format!("{} (goto: {})", fmtd, goto) - } - } - - fn visible_byte(b: u8) -> String { - use std::ascii::escape_default; - let escaped = escape_default(b).collect::>(); - String::from_utf8_lossy(&escaped).into_owned() - } - - for (pc, inst) in self.iter().enumerate() { - match *inst { - Match(slot) => write!(f, "{:04} Match({:?})", pc, slot)?, - Save(ref inst) => { - let s = format!("{:04} Save({})", pc, inst.slot); - write!(f, "{}", with_goto(pc, inst.goto, s))?; - } - Split(ref inst) => { - write!( - f, - "{:04} Split({}, {})", - pc, inst.goto1, inst.goto2 - )?; - } - EmptyLook(ref inst) => { - let s = format!("{:?}", inst.look); - write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?; - } - Char(ref inst) => { - let s = format!("{:?}", inst.c); - write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?; - } - Ranges(ref inst) => { - let ranges = inst - .ranges - .iter() - .map(|r| format!("{:?}-{:?}", r.0, r.1)) - .collect::>() - .join(", "); - write!( - f, - "{:04} {}", - pc, - with_goto(pc, inst.goto, ranges) - )?; - } - Bytes(ref inst) => { - let s = format!( - "Bytes({}, {})", - visible_byte(inst.start), - visible_byte(inst.end) - ); - write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?; - } - } - if pc == self.start { - write!(f, " (start)")?; - } - write!(f, "\n")?; - } - Ok(()) - } -} - -impl<'a> IntoIterator for &'a Program { - type Item = &'a Inst; - type IntoIter = slice::Iter<'a, Inst>; - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - -/// Inst is an instruction code in a Regex program. -/// -/// Regrettably, a regex program either contains Unicode codepoint -/// instructions (Char and Ranges) or it contains byte instructions (Bytes). -/// A regex program can never contain both. -/// -/// It would be worth investigating splitting this into two distinct types and -/// then figuring out how to make the matching engines polymorphic over those -/// types without sacrificing performance. -/// -/// Other than the benefit of moving invariants into the type system, another -/// benefit is the decreased size. If we remove the `Char` and `Ranges` -/// instructions from the `Inst` enum, then its size shrinks from 32 bytes to -/// 24 bytes. (This is because of the removal of a `Box<[]>` in the `Ranges` -/// variant.) Given that byte based machines are typically much bigger than -/// their Unicode analogues (because they can decode UTF-8 directly), this ends -/// up being a pretty significant savings. -#[derive(Clone, Debug)] -pub enum Inst { - /// Match indicates that the program has reached a match state. - /// - /// The number in the match corresponds to the Nth logical regular - /// expression in this program. This index is always 0 for normal regex - /// programs. Values greater than 0 appear when compiling regex sets, and - /// each match instruction gets its own unique value. The value corresponds - /// to the Nth regex in the set. - Match(usize), - /// Save causes the program to save the current location of the input in - /// the slot indicated by InstSave. - Save(InstSave), - /// Split causes the program to diverge to one of two paths in the - /// program, preferring goto1 in InstSplit. - Split(InstSplit), - /// EmptyLook represents a zero-width assertion in a regex program. A - /// zero-width assertion does not consume any of the input text. - EmptyLook(InstEmptyLook), - /// Char requires the regex program to match the character in InstChar at - /// the current position in the input. - Char(InstChar), - /// Ranges requires the regex program to match the character at the current - /// position in the input with one of the ranges specified in InstRanges. - Ranges(InstRanges), - /// Bytes is like Ranges, except it expresses a single byte range. It is - /// used in conjunction with Split instructions to implement multi-byte - /// character classes. - Bytes(InstBytes), -} - -impl Inst { - /// Returns true if and only if this is a match instruction. - pub fn is_match(&self) -> bool { - match *self { - Inst::Match(_) => true, - _ => false, - } - } -} - -/// Representation of the Save instruction. -#[derive(Clone, Debug)] -pub struct InstSave { - /// The next location to execute in the program. - pub goto: InstPtr, - /// The capture slot (there are two slots for every capture in a regex, - /// including the zeroth capture for the entire match). - pub slot: usize, -} - -/// Representation of the Split instruction. -#[derive(Clone, Debug)] -pub struct InstSplit { - /// The first instruction to try. A match resulting from following goto1 - /// has precedence over a match resulting from following goto2. - pub goto1: InstPtr, - /// The second instruction to try. A match resulting from following goto1 - /// has precedence over a match resulting from following goto2. - pub goto2: InstPtr, -} - -/// Representation of the `EmptyLook` instruction. -#[derive(Clone, Debug)] -pub struct InstEmptyLook { - /// The next location to execute in the program if this instruction - /// succeeds. - pub goto: InstPtr, - /// The type of zero-width assertion to check. - pub look: EmptyLook, -} - -/// The set of zero-width match instructions. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum EmptyLook { - /// Start of line or input. - StartLine, - /// End of line or input. - EndLine, - /// Start of input. - StartText, - /// End of input. - EndText, - /// Word character on one side and non-word character on other. - WordBoundary, - /// Word character on both sides or non-word character on both sides. - NotWordBoundary, - /// ASCII word boundary. - WordBoundaryAscii, - /// Not ASCII word boundary. - NotWordBoundaryAscii, -} - -/// Representation of the Char instruction. -#[derive(Clone, Debug)] -pub struct InstChar { - /// The next location to execute in the program if this instruction - /// succeeds. - pub goto: InstPtr, - /// The character to test. - pub c: char, -} - -/// Representation of the Ranges instruction. -#[derive(Clone, Debug)] -pub struct InstRanges { - /// The next location to execute in the program if this instruction - /// succeeds. - pub goto: InstPtr, - /// The set of Unicode scalar value ranges to test. - pub ranges: Box<[(char, char)]>, -} - -impl InstRanges { - /// Tests whether the given input character matches this instruction. - pub fn matches(&self, c: Char) -> bool { - // This speeds up the `match_class_unicode` benchmark by checking - // some common cases quickly without binary search. e.g., Matching - // a Unicode class on predominantly ASCII text. - for r in self.ranges.iter().take(4) { - if c < r.0 { - return false; - } - if c <= r.1 { - return true; - } - } - self.ranges - .binary_search_by(|r| { - if r.1 < c { - Ordering::Less - } else if r.0 > c { - Ordering::Greater - } else { - Ordering::Equal - } - }) - .is_ok() - } - - /// Return the number of distinct characters represented by all of the - /// ranges. - pub fn num_chars(&self) -> usize { - self.ranges - .iter() - .map(|&(s, e)| 1 + (e as u32) - (s as u32)) - .sum::() as usize - } -} - -/// Representation of the Bytes instruction. -#[derive(Clone, Debug)] -pub struct InstBytes { - /// The next location to execute in the program if this instruction - /// succeeds. - pub goto: InstPtr, - /// The start (inclusive) of this byte range. - pub start: u8, - /// The end (inclusive) of this byte range. - pub end: u8, -} - -impl InstBytes { - /// Returns true if and only if the given byte is in this range. - pub fn matches(&self, byte: u8) -> bool { - self.start <= byte && byte <= self.end - } -} - -#[cfg(test)] -mod test { - #[test] - #[cfg(target_pointer_width = "64")] - fn test_size_of_inst() { - use std::mem::size_of; - - use super::Inst; - - assert_eq!(32, size_of::()); - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/re_builder.rs b/collector/compile-benchmarks/regex-1.5.5/src/re_builder.rs deleted file mode 100644 index ee6383690..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/re_builder.rs +++ /dev/null @@ -1,421 +0,0 @@ -/// The set of user configurable options for compiling zero or more regexes. -#[derive(Clone, Debug)] -#[allow(missing_docs)] -pub struct RegexOptions { - pub pats: Vec, - pub size_limit: usize, - pub dfa_size_limit: usize, - pub nest_limit: u32, - pub case_insensitive: bool, - pub multi_line: bool, - pub dot_matches_new_line: bool, - pub swap_greed: bool, - pub ignore_whitespace: bool, - pub unicode: bool, - pub octal: bool, -} - -impl Default for RegexOptions { - fn default() -> Self { - RegexOptions { - pats: vec![], - size_limit: 10 * (1 << 20), - dfa_size_limit: 2 * (1 << 20), - nest_limit: 250, - case_insensitive: false, - multi_line: false, - dot_matches_new_line: false, - swap_greed: false, - ignore_whitespace: false, - unicode: true, - octal: false, - } - } -} - -macro_rules! define_builder { - ($name:ident, $regex_mod:ident, $only_utf8:expr) => { - pub mod $name { - use super::RegexOptions; - use crate::error::Error; - use crate::exec::ExecBuilder; - - use crate::$regex_mod::Regex; - - /// A configurable builder for a regular expression. - /// - /// A builder can be used to configure how the regex is built, for example, by - /// setting the default flags (which can be overridden in the expression - /// itself) or setting various limits. - #[derive(Debug)] - pub struct RegexBuilder(RegexOptions); - - impl RegexBuilder { - /// Create a new regular expression builder with the given pattern. - /// - /// If the pattern is invalid, then an error will be returned when - /// `build` is called. - pub fn new(pattern: &str) -> RegexBuilder { - let mut builder = RegexBuilder(RegexOptions::default()); - builder.0.pats.push(pattern.to_owned()); - builder - } - - /// Consume the builder and compile the regular expression. - /// - /// Note that calling `as_str` on the resulting `Regex` will produce the - /// pattern given to `new` verbatim. Notably, it will not incorporate any - /// of the flags set on this builder. - pub fn build(&self) -> Result { - ExecBuilder::new_options(self.0.clone()) - .only_utf8($only_utf8) - .build() - .map(Regex::from) - } - - /// Set the value for the case insensitive (`i`) flag. - /// - /// When enabled, letters in the pattern will match both upper case and - /// lower case variants. - pub fn case_insensitive( - &mut self, - yes: bool, - ) -> &mut RegexBuilder { - self.0.case_insensitive = yes; - self - } - - /// Set the value for the multi-line matching (`m`) flag. - /// - /// When enabled, `^` matches the beginning of lines and `$` matches the - /// end of lines. - /// - /// By default, they match beginning/end of the input. - pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.multi_line = yes; - self - } - - /// Set the value for the any character (`s`) flag, where in `.` matches - /// anything when `s` is set and matches anything except for new line when - /// it is not set (the default). - /// - /// N.B. "matches anything" means "any byte" when Unicode is disabled and - /// means "any valid UTF-8 encoding of any Unicode scalar value" when - /// Unicode is enabled. - pub fn dot_matches_new_line( - &mut self, - yes: bool, - ) -> &mut RegexBuilder { - self.0.dot_matches_new_line = yes; - self - } - - /// Set the value for the greedy swap (`U`) flag. - /// - /// When enabled, a pattern like `a*` is lazy (tries to find shortest - /// match) and `a*?` is greedy (tries to find longest match). - /// - /// By default, `a*` is greedy and `a*?` is lazy. - pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.swap_greed = yes; - self - } - - /// Set the value for the ignore whitespace (`x`) flag. - /// - /// When enabled, whitespace such as new lines and spaces will be ignored - /// between expressions of the pattern, and `#` can be used to start a - /// comment until the next new line. - pub fn ignore_whitespace( - &mut self, - yes: bool, - ) -> &mut RegexBuilder { - self.0.ignore_whitespace = yes; - self - } - - /// Set the value for the Unicode (`u`) flag. - /// - /// Enabled by default. When disabled, character classes such as `\w` only - /// match ASCII word characters instead of all Unicode word characters. - pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.unicode = yes; - self - } - - /// Whether to support octal syntax or not. - /// - /// Octal syntax is a little-known way of uttering Unicode codepoints in - /// a regular expression. For example, `a`, `\x61`, `\u0061` and - /// `\141` are all equivalent regular expressions, where the last example - /// shows octal syntax. - /// - /// While supporting octal syntax isn't in and of itself a problem, it does - /// make good error messages harder. That is, in PCRE based regex engines, - /// syntax like `\0` invokes a backreference, which is explicitly - /// unsupported in Rust's regex engine. However, many users expect it to - /// be supported. Therefore, when octal support is disabled, the error - /// message will explicitly mention that backreferences aren't supported. - /// - /// Octal syntax is disabled by default. - pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder { - self.0.octal = yes; - self - } - - /// Set the approximate size limit of the compiled regular expression. - /// - /// This roughly corresponds to the number of bytes occupied by a single - /// compiled program. If the program exceeds this number, then a - /// compilation error is returned. - pub fn size_limit( - &mut self, - limit: usize, - ) -> &mut RegexBuilder { - self.0.size_limit = limit; - self - } - - /// Set the approximate size of the cache used by the DFA. - /// - /// This roughly corresponds to the number of bytes that the DFA will - /// use while searching. - /// - /// Note that this is a *per thread* limit. There is no way to set a global - /// limit. In particular, if a regex is used from multiple threads - /// simultaneously, then each thread may use up to the number of bytes - /// specified here. - pub fn dfa_size_limit( - &mut self, - limit: usize, - ) -> &mut RegexBuilder { - self.0.dfa_size_limit = limit; - self - } - - /// Set the nesting limit for this parser. - /// - /// The nesting limit controls how deep the abstract syntax tree is allowed - /// to be. If the AST exceeds the given limit (e.g., with too many nested - /// groups), then an error is returned by the parser. - /// - /// The purpose of this limit is to act as a heuristic to prevent stack - /// overflow for consumers that do structural induction on an `Ast` using - /// explicit recursion. While this crate never does this (instead using - /// constant stack space and moving the call stack to the heap), other - /// crates may. - /// - /// This limit is not checked until the entire Ast is parsed. Therefore, - /// if callers want to put a limit on the amount of heap space used, then - /// they should impose a limit on the length, in bytes, of the concrete - /// pattern string. In particular, this is viable since this parser - /// implementation will limit itself to heap space proportional to the - /// length of the pattern string. - /// - /// Note that a nest limit of `0` will return a nest limit error for most - /// patterns but not all. For example, a nest limit of `0` permits `a` but - /// not `ab`, since `ab` requires a concatenation, which results in a nest - /// depth of `1`. In general, a nest limit is not something that manifests - /// in an obvious way in the concrete syntax, therefore, it should not be - /// used in a granular way. - pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder { - self.0.nest_limit = limit; - self - } - } - } - }; -} - -define_builder!(bytes, re_bytes, false); -define_builder!(unicode, re_unicode, true); - -macro_rules! define_set_builder { - ($name:ident, $regex_mod:ident, $only_utf8:expr) => { - pub mod $name { - use super::RegexOptions; - use crate::error::Error; - use crate::exec::ExecBuilder; - - use crate::re_set::$regex_mod::RegexSet; - - /// A configurable builder for a set of regular expressions. - /// - /// A builder can be used to configure how the regexes are built, for example, - /// by setting the default flags (which can be overridden in the expression - /// itself) or setting various limits. - #[derive(Debug)] - pub struct RegexSetBuilder(RegexOptions); - - impl RegexSetBuilder { - /// Create a new regular expression builder with the given pattern. - /// - /// If the pattern is invalid, then an error will be returned when - /// `build` is called. - pub fn new(patterns: I) -> RegexSetBuilder - where - S: AsRef, - I: IntoIterator, - { - let mut builder = RegexSetBuilder(RegexOptions::default()); - for pat in patterns { - builder.0.pats.push(pat.as_ref().to_owned()); - } - builder - } - - /// Consume the builder and compile the regular expressions into a set. - pub fn build(&self) -> Result { - ExecBuilder::new_options(self.0.clone()) - .only_utf8($only_utf8) - .build() - .map(RegexSet::from) - } - - /// Set the value for the case insensitive (`i`) flag. - pub fn case_insensitive( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.case_insensitive = yes; - self - } - - /// Set the value for the multi-line matching (`m`) flag. - pub fn multi_line( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.multi_line = yes; - self - } - - /// Set the value for the any character (`s`) flag, where in `.` matches - /// anything when `s` is set and matches anything except for new line when - /// it is not set (the default). - /// - /// N.B. "matches anything" means "any byte" for `regex::bytes::RegexSet` - /// expressions and means "any Unicode scalar value" for `regex::RegexSet` - /// expressions. - pub fn dot_matches_new_line( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.dot_matches_new_line = yes; - self - } - - /// Set the value for the greedy swap (`U`) flag. - pub fn swap_greed( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.swap_greed = yes; - self - } - - /// Set the value for the ignore whitespace (`x`) flag. - pub fn ignore_whitespace( - &mut self, - yes: bool, - ) -> &mut RegexSetBuilder { - self.0.ignore_whitespace = yes; - self - } - - /// Set the value for the Unicode (`u`) flag. - pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder { - self.0.unicode = yes; - self - } - - /// Whether to support octal syntax or not. - /// - /// Octal syntax is a little-known way of uttering Unicode codepoints in - /// a regular expression. For example, `a`, `\x61`, `\u0061` and - /// `\141` are all equivalent regular expressions, where the last example - /// shows octal syntax. - /// - /// While supporting octal syntax isn't in and of itself a problem, it does - /// make good error messages harder. That is, in PCRE based regex engines, - /// syntax like `\0` invokes a backreference, which is explicitly - /// unsupported in Rust's regex engine. However, many users expect it to - /// be supported. Therefore, when octal support is disabled, the error - /// message will explicitly mention that backreferences aren't supported. - /// - /// Octal syntax is disabled by default. - pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder { - self.0.octal = yes; - self - } - - /// Set the approximate size limit of the compiled regular expression. - /// - /// This roughly corresponds to the number of bytes occupied by a single - /// compiled program. If the program exceeds this number, then a - /// compilation error is returned. - pub fn size_limit( - &mut self, - limit: usize, - ) -> &mut RegexSetBuilder { - self.0.size_limit = limit; - self - } - - /// Set the approximate size of the cache used by the DFA. - /// - /// This roughly corresponds to the number of bytes that the DFA will - /// use while searching. - /// - /// Note that this is a *per thread* limit. There is no way to set a global - /// limit. In particular, if a regex is used from multiple threads - /// simultaneously, then each thread may use up to the number of bytes - /// specified here. - pub fn dfa_size_limit( - &mut self, - limit: usize, - ) -> &mut RegexSetBuilder { - self.0.dfa_size_limit = limit; - self - } - - /// Set the nesting limit for this parser. - /// - /// The nesting limit controls how deep the abstract syntax tree is allowed - /// to be. If the AST exceeds the given limit (e.g., with too many nested - /// groups), then an error is returned by the parser. - /// - /// The purpose of this limit is to act as a heuristic to prevent stack - /// overflow for consumers that do structural induction on an `Ast` using - /// explicit recursion. While this crate never does this (instead using - /// constant stack space and moving the call stack to the heap), other - /// crates may. - /// - /// This limit is not checked until the entire Ast is parsed. Therefore, - /// if callers want to put a limit on the amount of heap space used, then - /// they should impose a limit on the length, in bytes, of the concrete - /// pattern string. In particular, this is viable since this parser - /// implementation will limit itself to heap space proportional to the - /// length of the pattern string. - /// - /// Note that a nest limit of `0` will return a nest limit error for most - /// patterns but not all. For example, a nest limit of `0` permits `a` but - /// not `ab`, since `ab` requires a concatenation, which results in a nest - /// depth of `1`. In general, a nest limit is not something that manifests - /// in an obvious way in the concrete syntax, therefore, it should not be - /// used in a granular way. - pub fn nest_limit( - &mut self, - limit: u32, - ) -> &mut RegexSetBuilder { - self.0.nest_limit = limit; - self - } - } - } - }; -} - -define_set_builder!(set_bytes, bytes, false); -define_set_builder!(set_unicode, unicode, true); diff --git a/collector/compile-benchmarks/regex-1.5.5/src/re_bytes.rs b/collector/compile-benchmarks/regex-1.5.5/src/re_bytes.rs deleted file mode 100644 index ae55d6d25..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/re_bytes.rs +++ /dev/null @@ -1,1260 +0,0 @@ -use std::borrow::Cow; -use std::collections::HashMap; -use std::fmt; -use std::iter::FusedIterator; -use std::ops::{Index, Range}; -use std::str::FromStr; -use std::sync::Arc; - -use crate::find_byte::find_byte; - -use crate::error::Error; -use crate::exec::{Exec, ExecNoSync}; -use crate::expand::expand_bytes; -use crate::re_builder::bytes::RegexBuilder; -use crate::re_trait::{self, RegularExpression, SubCapturesPosIter}; - -/// Match represents a single match of a regex in a haystack. -/// -/// The lifetime parameter `'t` refers to the lifetime of the matched text. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub struct Match<'t> { - text: &'t [u8], - start: usize, - end: usize, -} - -impl<'t> Match<'t> { - /// Returns the starting byte offset of the match in the haystack. - #[inline] - pub fn start(&self) -> usize { - self.start - } - - /// Returns the ending byte offset of the match in the haystack. - #[inline] - pub fn end(&self) -> usize { - self.end - } - - /// Returns the range over the starting and ending byte offsets of the - /// match in the haystack. - #[inline] - pub fn range(&self) -> Range { - self.start..self.end - } - - /// Returns the matched text. - #[inline] - pub fn as_bytes(&self) -> &'t [u8] { - &self.text[self.range()] - } - - /// Creates a new match from the given haystack and byte offsets. - #[inline] - fn new(haystack: &'t [u8], start: usize, end: usize) -> Match<'t> { - Match { text: haystack, start: start, end: end } - } -} - -impl<'t> From> for Range { - fn from(m: Match<'t>) -> Range { - m.range() - } -} - -/// A compiled regular expression for matching arbitrary bytes. -/// -/// It can be used to search, split or replace text. All searching is done with -/// an implicit `.*?` at the beginning and end of an expression. To force an -/// expression to match the whole string (or a prefix or a suffix), you must -/// use an anchor like `^` or `$` (or `\A` and `\z`). -/// -/// Like the `Regex` type in the parent module, matches with this regex return -/// byte offsets into the search text. **Unlike** the parent `Regex` type, -/// these byte offsets may not correspond to UTF-8 sequence boundaries since -/// the regexes in this module can match arbitrary bytes. -#[derive(Clone)] -pub struct Regex(Exec); - -impl fmt::Display for Regex { - /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -impl fmt::Debug for Regex { - /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self, f) - } -} - -/// A constructor for Regex from an Exec. -/// -/// This is hidden because Exec isn't actually part of the public API. -#[doc(hidden)] -impl From for Regex { - fn from(exec: Exec) -> Regex { - Regex(exec) - } -} - -impl FromStr for Regex { - type Err = Error; - - /// Attempts to parse a string into a regular expression - fn from_str(s: &str) -> Result { - Regex::new(s) - } -} - -/// Core regular expression methods. -impl Regex { - /// Compiles a regular expression. Once compiled, it can be used repeatedly - /// to search, split or replace text in a string. - /// - /// If an invalid expression is given, then an error is returned. - pub fn new(re: &str) -> Result { - RegexBuilder::new(re).build() - } - - /// Returns true if and only if there is a match for the regex in the - /// string given. - /// - /// It is recommended to use this method if all you need to do is test - /// a match, since the underlying matching engine may be able to do less - /// work. - /// - /// # Example - /// - /// Test if some text contains at least one word with exactly 13 ASCII word - /// bytes: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let text = b"I categorically deny having triskaidekaphobia."; - /// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text)); - /// # } - /// ``` - pub fn is_match(&self, text: &[u8]) -> bool { - self.is_match_at(text, 0) - } - - /// Returns the start and end byte range of the leftmost-first match in - /// `text`. If no match exists, then `None` is returned. - /// - /// Note that this should only be used if you want to discover the position - /// of the match. Testing the existence of a match is faster if you use - /// `is_match`. - /// - /// # Example - /// - /// Find the start and end location of the first word with exactly 13 - /// ASCII word bytes: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let text = b"I categorically deny having triskaidekaphobia."; - /// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap(); - /// assert_eq!((mat.start(), mat.end()), (2, 15)); - /// # } - /// ``` - pub fn find<'t>(&self, text: &'t [u8]) -> Option> { - self.find_at(text, 0) - } - - /// Returns an iterator for each successive non-overlapping match in - /// `text`, returning the start and end byte indices with respect to - /// `text`. - /// - /// # Example - /// - /// Find the start and end location of every word with exactly 13 ASCII - /// word bytes: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let text = b"Retroactively relinquishing remunerations is reprehensible."; - /// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) { - /// println!("{:?}", mat); - /// } - /// # } - /// ``` - pub fn find_iter<'r, 't>(&'r self, text: &'t [u8]) -> Matches<'r, 't> { - Matches(self.0.searcher().find_iter(text)) - } - - /// Returns the capture groups corresponding to the leftmost-first - /// match in `text`. Capture group `0` always corresponds to the entire - /// match. If no match is found, then `None` is returned. - /// - /// You should only use `captures` if you need access to the location of - /// capturing group matches. Otherwise, `find` is faster for discovering - /// the location of the overall match. - /// - /// # Examples - /// - /// Say you have some text with movie names and their release years, - /// like "'Citizen Kane' (1941)". It'd be nice if we could search for text - /// looking like that, while also extracting the movie name and its release - /// year separately. - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); - /// let text = b"Not my favorite movie: 'Citizen Kane' (1941)."; - /// let caps = re.captures(text).unwrap(); - /// assert_eq!(caps.get(1).unwrap().as_bytes(), &b"Citizen Kane"[..]); - /// assert_eq!(caps.get(2).unwrap().as_bytes(), &b"1941"[..]); - /// assert_eq!(caps.get(0).unwrap().as_bytes(), &b"'Citizen Kane' (1941)"[..]); - /// // You can also access the groups by index using the Index notation. - /// // Note that this will panic on an invalid index. - /// assert_eq!(&caps[1], b"Citizen Kane"); - /// assert_eq!(&caps[2], b"1941"); - /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); - /// # } - /// ``` - /// - /// Note that the full match is at capture group `0`. Each subsequent - /// capture group is indexed by the order of its opening `(`. - /// - /// We can make this example a bit clearer by using *named* capture groups: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"'(?P[^']+)'\s+\((?P<year>\d{4})\)") - /// .unwrap(); - /// let text = b"Not my favorite movie: 'Citizen Kane' (1941)."; - /// let caps = re.captures(text).unwrap(); - /// assert_eq!(caps.name("title").unwrap().as_bytes(), b"Citizen Kane"); - /// assert_eq!(caps.name("year").unwrap().as_bytes(), b"1941"); - /// assert_eq!(caps.get(0).unwrap().as_bytes(), &b"'Citizen Kane' (1941)"[..]); - /// // You can also access the groups by name using the Index notation. - /// // Note that this will panic on an invalid group name. - /// assert_eq!(&caps["title"], b"Citizen Kane"); - /// assert_eq!(&caps["year"], b"1941"); - /// assert_eq!(&caps[0], b"'Citizen Kane' (1941)"); - /// - /// # } - /// ``` - /// - /// Here we name the capture groups, which we can access with the `name` - /// method or the `Index` notation with a `&str`. Note that the named - /// capture groups are still accessible with `get` or the `Index` notation - /// with a `usize`. - /// - /// The `0`th capture group is always unnamed, so it must always be - /// accessed with `get(0)` or `[0]`. - pub fn captures<'t>(&self, text: &'t [u8]) -> Option<Captures<'t>> { - let mut locs = self.capture_locations(); - self.captures_read_at(&mut locs, text, 0).map(move |_| Captures { - text: text, - locs: locs.0, - named_groups: self.0.capture_name_idx().clone(), - }) - } - - /// Returns an iterator over all the non-overlapping capture groups matched - /// in `text`. This is operationally the same as `find_iter`, except it - /// yields information about capturing group matches. - /// - /// # Example - /// - /// We can use this to find all movie titles and their release years in - /// some text, where the movie is formatted like "'Title' (xxxx)": - /// - /// ```rust - /// # use std::str; use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") - /// .unwrap(); - /// let text = b"'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; - /// for caps in re.captures_iter(text) { - /// let title = str::from_utf8(&caps["title"]).unwrap(); - /// let year = str::from_utf8(&caps["year"]).unwrap(); - /// println!("Movie: {:?}, Released: {:?}", title, year); - /// } - /// // Output: - /// // Movie: Citizen Kane, Released: 1941 - /// // Movie: The Wizard of Oz, Released: 1939 - /// // Movie: M, Released: 1931 - /// # } - /// ``` - pub fn captures_iter<'r, 't>( - &'r self, - text: &'t [u8], - ) -> CaptureMatches<'r, 't> { - CaptureMatches(self.0.searcher().captures_iter(text)) - } - - /// Returns an iterator of substrings of `text` delimited by a match of the - /// regular expression. Namely, each element of the iterator corresponds to - /// text that *isn't* matched by the regular expression. - /// - /// This method will *not* copy the text given. - /// - /// # Example - /// - /// To split a string delimited by arbitrary amounts of spaces or tabs: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"[ \t]+").unwrap(); - /// let fields: Vec<&[u8]> = re.split(b"a b \t c\td e").collect(); - /// assert_eq!(fields, vec![ - /// &b"a"[..], &b"b"[..], &b"c"[..], &b"d"[..], &b"e"[..], - /// ]); - /// # } - /// ``` - pub fn split<'r, 't>(&'r self, text: &'t [u8]) -> Split<'r, 't> { - Split { finder: self.find_iter(text), last: 0 } - } - - /// Returns an iterator of at most `limit` substrings of `text` delimited - /// by a match of the regular expression. (A `limit` of `0` will return no - /// substrings.) Namely, each element of the iterator corresponds to text - /// that *isn't* matched by the regular expression. The remainder of the - /// string that is not split will be the last element in the iterator. - /// - /// This method will *not* copy the text given. - /// - /// # Example - /// - /// Get the first two words in some text: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"\W+").unwrap(); - /// let fields: Vec<&[u8]> = re.splitn(b"Hey! How are you?", 3).collect(); - /// assert_eq!(fields, vec![&b"Hey"[..], &b"How"[..], &b"are you?"[..]]); - /// # } - /// ``` - pub fn splitn<'r, 't>( - &'r self, - text: &'t [u8], - limit: usize, - ) -> SplitN<'r, 't> { - SplitN { splits: self.split(text), n: limit } - } - - /// Replaces the leftmost-first match with the replacement provided. The - /// replacement can be a regular byte string (where `$N` and `$name` are - /// expanded to match capture groups) or a function that takes the matches' - /// `Captures` and returns the replaced byte string. - /// - /// If no match is found, then a copy of the byte string is returned - /// unchanged. - /// - /// # Replacement string syntax - /// - /// All instances of `$name` in the replacement text is replaced with the - /// corresponding capture group `name`. - /// - /// `name` may be an integer corresponding to the index of the - /// capture group (counted by order of opening parenthesis where `0` is the - /// entire match) or it can be a name (consisting of letters, digits or - /// underscores) corresponding to a named capture group. - /// - /// If `name` isn't a valid capture group (whether the name doesn't exist - /// or isn't a valid index), then it is replaced with the empty string. - /// - /// The longest possible name is used. e.g., `$1a` looks up the capture - /// group named `1a` and not the capture group at index `1`. To exert more - /// precise control over the name, use braces, e.g., `${1}a`. - /// - /// To write a literal `$` use `$$`. - /// - /// # Examples - /// - /// Note that this function is polymorphic with respect to the replacement. - /// In typical usage, this can just be a normal byte string: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new("[^01]+").unwrap(); - /// assert_eq!(re.replace(b"1078910", &b""[..]), &b"1010"[..]); - /// # } - /// ``` - /// - /// But anything satisfying the `Replacer` trait will work. For example, a - /// closure of type `|&Captures| -> Vec<u8>` provides direct access to the - /// captures corresponding to a match. This allows one to access capturing - /// group matches easily: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # use regex::bytes::Captures; fn main() { - /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); - /// let result = re.replace(b"Springsteen, Bruce", |caps: &Captures| { - /// let mut replacement = caps[2].to_owned(); - /// replacement.push(b' '); - /// replacement.extend(&caps[1]); - /// replacement - /// }); - /// assert_eq!(result, &b"Bruce Springsteen"[..]); - /// # } - /// ``` - /// - /// But this is a bit cumbersome to use all the time. Instead, a simple - /// syntax is supported that expands `$name` into the corresponding capture - /// group. Here's the last example, but using this expansion technique - /// with named capture groups: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap(); - /// let result = re.replace(b"Springsteen, Bruce", &b"$first $last"[..]); - /// assert_eq!(result, &b"Bruce Springsteen"[..]); - /// # } - /// ``` - /// - /// Note that using `$2` instead of `$first` or `$1` instead of `$last` - /// would produce the same result. To write a literal `$` use `$$`. - /// - /// Sometimes the replacement string requires use of curly braces to - /// delineate a capture group replacement and surrounding literal text. - /// For example, if we wanted to join two words together with an - /// underscore: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap(); - /// let result = re.replace(b"deep fried", &b"${first}_$second"[..]); - /// assert_eq!(result, &b"deep_fried"[..]); - /// # } - /// ``` - /// - /// Without the curly braces, the capture group name `first_` would be - /// used, and since it doesn't exist, it would be replaced with the empty - /// string. - /// - /// Finally, sometimes you just want to replace a literal string with no - /// regard for capturing group expansion. This can be done by wrapping a - /// byte string with `NoExpand`: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// use regex::bytes::NoExpand; - /// - /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap(); - /// let result = re.replace(b"Springsteen, Bruce", NoExpand(b"$2 $last")); - /// assert_eq!(result, &b"$2 $last"[..]); - /// # } - /// ``` - pub fn replace<'t, R: Replacer>( - &self, - text: &'t [u8], - rep: R, - ) -> Cow<'t, [u8]> { - self.replacen(text, 1, rep) - } - - /// Replaces all non-overlapping matches in `text` with the replacement - /// provided. This is the same as calling `replacen` with `limit` set to - /// `0`. - /// - /// See the documentation for `replace` for details on how to access - /// capturing group matches in the replacement text. - pub fn replace_all<'t, R: Replacer>( - &self, - text: &'t [u8], - rep: R, - ) -> Cow<'t, [u8]> { - self.replacen(text, 0, rep) - } - - /// Replaces at most `limit` non-overlapping matches in `text` with the - /// replacement provided. If `limit` is 0, then all non-overlapping matches - /// are replaced. - /// - /// See the documentation for `replace` for details on how to access - /// capturing group matches in the replacement text. - pub fn replacen<'t, R: Replacer>( - &self, - text: &'t [u8], - limit: usize, - mut rep: R, - ) -> Cow<'t, [u8]> { - if let Some(rep) = rep.no_expansion() { - let mut it = self.find_iter(text).enumerate().peekable(); - if it.peek().is_none() { - return Cow::Borrowed(text); - } - let mut new = Vec::with_capacity(text.len()); - let mut last_match = 0; - for (i, m) in it { - if limit > 0 && i >= limit { - break; - } - new.extend_from_slice(&text[last_match..m.start()]); - new.extend_from_slice(&rep); - last_match = m.end(); - } - new.extend_from_slice(&text[last_match..]); - return Cow::Owned(new); - } - - // The slower path, which we use if the replacement needs access to - // capture groups. - let mut it = self.captures_iter(text).enumerate().peekable(); - if it.peek().is_none() { - return Cow::Borrowed(text); - } - let mut new = Vec::with_capacity(text.len()); - let mut last_match = 0; - for (i, cap) in it { - if limit > 0 && i >= limit { - break; - } - // unwrap on 0 is OK because captures only reports matches - let m = cap.get(0).unwrap(); - new.extend_from_slice(&text[last_match..m.start()]); - rep.replace_append(&cap, &mut new); - last_match = m.end(); - } - new.extend_from_slice(&text[last_match..]); - Cow::Owned(new) - } -} - -/// Advanced or "lower level" search methods. -impl Regex { - /// Returns the end location of a match in the text given. - /// - /// This method may have the same performance characteristics as - /// `is_match`, except it provides an end location for a match. In - /// particular, the location returned *may be shorter* than the proper end - /// of the leftmost-first match. - /// - /// # Example - /// - /// Typically, `a+` would match the entire first sequence of `a` in some - /// text, but `shortest_match` can give up as soon as it sees the first - /// `a`. - /// - /// ```rust - /// # use regex::bytes::Regex; - /// # fn main() { - /// let text = b"aaaaa"; - /// let pos = Regex::new(r"a+").unwrap().shortest_match(text); - /// assert_eq!(pos, Some(1)); - /// # } - /// ``` - pub fn shortest_match(&self, text: &[u8]) -> Option<usize> { - self.shortest_match_at(text, 0) - } - - /// Returns the same as shortest_match, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn shortest_match_at( - &self, - text: &[u8], - start: usize, - ) -> Option<usize> { - self.0.searcher().shortest_match_at(text, start) - } - - /// Returns the same as is_match, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn is_match_at(&self, text: &[u8], start: usize) -> bool { - self.shortest_match_at(text, start).is_some() - } - - /// Returns the same as find, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn find_at<'t>( - &self, - text: &'t [u8], - start: usize, - ) -> Option<Match<'t>> { - self.0 - .searcher() - .find_at(text, start) - .map(|(s, e)| Match::new(text, s, e)) - } - - /// This is like `captures`, but uses - /// [`CaptureLocations`](struct.CaptureLocations.html) - /// instead of - /// [`Captures`](struct.Captures.html) in order to amortize allocations. - /// - /// To create a `CaptureLocations` value, use the - /// `Regex::capture_locations` method. - /// - /// This returns the overall match if this was successful, which is always - /// equivalence to the `0`th capture group. - pub fn captures_read<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t [u8], - ) -> Option<Match<'t>> { - self.captures_read_at(locs, text, 0) - } - - /// Returns the same as `captures_read`, but starts the search at the given - /// offset and populates the capture locations given. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn captures_read_at<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t [u8], - start: usize, - ) -> Option<Match<'t>> { - self.0 - .searcher() - .captures_read_at(&mut locs.0, text, start) - .map(|(s, e)| Match::new(text, s, e)) - } - - /// An undocumented alias for `captures_read_at`. - /// - /// The `regex-capi` crate previously used this routine, so to avoid - /// breaking that crate, we continue to provide the name as an undocumented - /// alias. - #[doc(hidden)] - pub fn read_captures_at<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t [u8], - start: usize, - ) -> Option<Match<'t>> { - self.captures_read_at(locs, text, start) - } -} - -/// Auxiliary methods. -impl Regex { - /// Returns the original string of this regex. - pub fn as_str(&self) -> &str { - &self.0.regex_strings()[0] - } - - /// Returns an iterator over the capture names. - pub fn capture_names(&self) -> CaptureNames<'_> { - CaptureNames(self.0.capture_names().iter()) - } - - /// Returns the number of captures. - pub fn captures_len(&self) -> usize { - self.0.capture_names().len() - } - - /// Returns an empty set of capture locations that can be reused in - /// multiple calls to `captures_read` or `captures_read_at`. - pub fn capture_locations(&self) -> CaptureLocations { - CaptureLocations(self.0.searcher().locations()) - } - - /// An alias for `capture_locations` to preserve backward compatibility. - /// - /// The `regex-capi` crate uses this method, so to avoid breaking that - /// crate, we continue to export it as an undocumented API. - #[doc(hidden)] - pub fn locations(&self) -> CaptureLocations { - CaptureLocations(self.0.searcher().locations()) - } -} - -/// An iterator over all non-overlapping matches for a particular string. -/// -/// The iterator yields a tuple of integers corresponding to the start and end -/// of the match. The indices are byte offsets. The iterator stops when no more -/// matches can be found. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the matched byte string. -#[derive(Debug)] -pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSync<'r>>); - -impl<'r, 't> Iterator for Matches<'r, 't> { - type Item = Match<'t>; - - fn next(&mut self) -> Option<Match<'t>> { - let text = self.0.text(); - self.0.next().map(|(s, e)| Match::new(text, s, e)) - } -} - -impl<'r, 't> FusedIterator for Matches<'r, 't> {} - -/// An iterator that yields all non-overlapping capture groups matching a -/// particular regular expression. -/// -/// The iterator stops when no more matches can be found. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the matched byte string. -#[derive(Debug)] -pub struct CaptureMatches<'r, 't>( - re_trait::CaptureMatches<'t, ExecNoSync<'r>>, -); - -impl<'r, 't> Iterator for CaptureMatches<'r, 't> { - type Item = Captures<'t>; - - fn next(&mut self) -> Option<Captures<'t>> { - self.0.next().map(|locs| Captures { - text: self.0.text(), - locs: locs, - named_groups: self.0.regex().capture_name_idx().clone(), - }) - } -} - -impl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {} - -/// Yields all substrings delimited by a regular expression match. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the byte string being split. -#[derive(Debug)] -pub struct Split<'r, 't> { - finder: Matches<'r, 't>, - last: usize, -} - -impl<'r, 't> Iterator for Split<'r, 't> { - type Item = &'t [u8]; - - fn next(&mut self) -> Option<&'t [u8]> { - let text = self.finder.0.text(); - match self.finder.next() { - None => { - if self.last > text.len() { - None - } else { - let s = &text[self.last..]; - self.last = text.len() + 1; // Next call will return None - Some(s) - } - } - Some(m) => { - let matched = &text[self.last..m.start()]; - self.last = m.end(); - Some(matched) - } - } - } -} - -impl<'r, 't> FusedIterator for Split<'r, 't> {} - -/// Yields at most `N` substrings delimited by a regular expression match. -/// -/// The last substring will be whatever remains after splitting. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the byte string being split. -#[derive(Debug)] -pub struct SplitN<'r, 't> { - splits: Split<'r, 't>, - n: usize, -} - -impl<'r, 't> Iterator for SplitN<'r, 't> { - type Item = &'t [u8]; - - fn next(&mut self) -> Option<&'t [u8]> { - if self.n == 0 { - return None; - } - - self.n -= 1; - if self.n > 0 { - return self.splits.next(); - } - - let text = self.splits.finder.0.text(); - if self.splits.last > text.len() { - // We've already returned all substrings. - None - } else { - // self.n == 0, so future calls will return None immediately - Some(&text[self.splits.last..]) - } - } - - fn size_hint(&self) -> (usize, Option<usize>) { - (0, Some(self.n)) - } -} - -impl<'r, 't> FusedIterator for SplitN<'r, 't> {} - -/// An iterator over the names of all possible captures. -/// -/// `None` indicates an unnamed capture; the first element (capture 0, the -/// whole matched region) is always unnamed. -/// -/// `'r` is the lifetime of the compiled regular expression. -#[derive(Clone, Debug)] -pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>); - -impl<'r> Iterator for CaptureNames<'r> { - type Item = Option<&'r str>; - - fn next(&mut self) -> Option<Option<&'r str>> { - self.0 - .next() - .as_ref() - .map(|slot| slot.as_ref().map(|name| name.as_ref())) - } - - fn size_hint(&self) -> (usize, Option<usize>) { - self.0.size_hint() - } - - fn count(self) -> usize { - self.0.count() - } -} - -impl<'r> ExactSizeIterator for CaptureNames<'r> {} - -impl<'r> FusedIterator for CaptureNames<'r> {} - -/// CaptureLocations is a low level representation of the raw offsets of each -/// submatch. -/// -/// You can think of this as a lower level -/// [`Captures`](struct.Captures.html), where this type does not support -/// named capturing groups directly and it does not borrow the text that these -/// offsets were matched on. -/// -/// Primarily, this type is useful when using the lower level `Regex` APIs -/// such as `read_captures`, which permits amortizing the allocation in which -/// capture match locations are stored. -/// -/// In order to build a value of this type, you'll need to call the -/// `capture_locations` method on the `Regex` being used to execute the search. -/// The value returned can then be reused in subsequent searches. -#[derive(Clone, Debug)] -pub struct CaptureLocations(re_trait::Locations); - -/// A type alias for `CaptureLocations` for backwards compatibility. -/// -/// Previously, we exported `CaptureLocations` as `Locations` in an -/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`), -/// we continue re-exporting the same undocumented API. -#[doc(hidden)] -pub type Locations = CaptureLocations; - -impl CaptureLocations { - /// Returns the start and end positions of the Nth capture group. Returns - /// `None` if `i` is not a valid capture group or if the capture group did - /// not match anything. The positions returned are *always* byte indices - /// with respect to the original string matched. - #[inline] - pub fn get(&self, i: usize) -> Option<(usize, usize)> { - self.0.pos(i) - } - - /// Returns the total number of capturing groups. - /// - /// This is always at least `1` since every regex has at least `1` - /// capturing group that corresponds to the entire match. - #[inline] - pub fn len(&self) -> usize { - self.0.len() - } - - /// An alias for the `get` method for backwards compatibility. - /// - /// Previously, we exported `get` as `pos` in an undocumented API. To - /// prevent breaking that code (e.g., in `regex-capi`), we continue - /// re-exporting the same undocumented API. - #[doc(hidden)] - #[inline] - pub fn pos(&self, i: usize) -> Option<(usize, usize)> { - self.get(i) - } -} - -/// Captures represents a group of captured byte strings for a single match. -/// -/// The 0th capture always corresponds to the entire match. Each subsequent -/// index corresponds to the next capture group in the regex. If a capture -/// group is named, then the matched byte string is *also* available via the -/// `name` method. (Note that the 0th capture is always unnamed and so must be -/// accessed with the `get` method.) -/// -/// Positions returned from a capture group are always byte indices. -/// -/// `'t` is the lifetime of the matched text. -pub struct Captures<'t> { - text: &'t [u8], - locs: re_trait::Locations, - named_groups: Arc<HashMap<String, usize>>, -} - -impl<'t> Captures<'t> { - /// Returns the match associated with the capture group at index `i`. If - /// `i` does not correspond to a capture group, or if the capture group - /// did not participate in the match, then `None` is returned. - /// - /// # Examples - /// - /// Get the text of the match with a default of an empty string if this - /// group didn't participate in the match: - /// - /// ```rust - /// # use regex::bytes::Regex; - /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap(); - /// let caps = re.captures(b"abc123").unwrap(); - /// - /// let text1 = caps.get(1).map_or(&b""[..], |m| m.as_bytes()); - /// let text2 = caps.get(2).map_or(&b""[..], |m| m.as_bytes()); - /// assert_eq!(text1, &b"123"[..]); - /// assert_eq!(text2, &b""[..]); - /// ``` - pub fn get(&self, i: usize) -> Option<Match<'t>> { - self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e)) - } - - /// Returns the match for the capture group named `name`. If `name` isn't a - /// valid capture group or didn't match anything, then `None` is returned. - pub fn name(&self, name: &str) -> Option<Match<'t>> { - self.named_groups.get(name).and_then(|&i| self.get(i)) - } - - /// An iterator that yields all capturing matches in the order in which - /// they appear in the regex. If a particular capture group didn't - /// participate in the match, then `None` is yielded for that capture. - /// - /// The first match always corresponds to the overall match of the regex. - pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> { - SubCaptureMatches { caps: self, it: self.locs.iter() } - } - - /// Expands all instances of `$name` in `replacement` to the corresponding - /// capture group `name`, and writes them to the `dst` buffer given. - /// - /// `name` may be an integer corresponding to the index of the capture - /// group (counted by order of opening parenthesis where `0` is the - /// entire match) or it can be a name (consisting of letters, digits or - /// underscores) corresponding to a named capture group. - /// - /// If `name` isn't a valid capture group (whether the name doesn't exist - /// or isn't a valid index), then it is replaced with the empty string. - /// - /// The longest possible name consisting of the characters `[_0-9A-Za-z]` - /// is used. e.g., `$1a` looks up the capture group named `1a` and not the - /// capture group at index `1`. To exert more precise control over the - /// name, or to refer to a capture group name that uses characters outside - /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When - /// using braces, any sequence of valid UTF-8 bytes is permitted. If the - /// sequence does not refer to a capture group name in the corresponding - /// regex, then it is replaced with an empty string. - /// - /// To write a literal `$` use `$$`. - pub fn expand(&self, replacement: &[u8], dst: &mut Vec<u8>) { - expand_bytes(self, replacement, dst) - } - - /// Returns the number of captured groups. - /// - /// This is always at least `1`, since every regex has at least one capture - /// group that corresponds to the full match. - #[inline] - pub fn len(&self) -> usize { - self.locs.len() - } -} - -impl<'t> fmt::Debug for Captures<'t> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_tuple("Captures").field(&CapturesDebug(self)).finish() - } -} - -struct CapturesDebug<'c, 't>(&'c Captures<'t>); - -impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fn escape_bytes(bytes: &[u8]) -> String { - let mut s = String::new(); - for &b in bytes { - s.push_str(&escape_byte(b)); - } - s - } - - fn escape_byte(byte: u8) -> String { - use std::ascii::escape_default; - - let escaped: Vec<u8> = escape_default(byte).collect(); - String::from_utf8_lossy(&escaped).into_owned() - } - - // We'd like to show something nice here, even if it means an - // allocation to build a reverse index. - let slot_to_name: HashMap<&usize, &String> = - self.0.named_groups.iter().map(|(a, b)| (b, a)).collect(); - let mut map = f.debug_map(); - for (slot, m) in self.0.locs.iter().enumerate() { - let m = m.map(|(s, e)| escape_bytes(&self.0.text[s..e])); - if let Some(name) = slot_to_name.get(&slot) { - map.entry(&name, &m); - } else { - map.entry(&slot, &m); - } - } - map.finish() - } -} - -/// Get a group by index. -/// -/// `'t` is the lifetime of the matched text. -/// -/// The text can't outlive the `Captures` object if this method is -/// used, because of how `Index` is defined (normally `a[i]` is part -/// of `a` and can't outlive it); to do that, use `get()` instead. -/// -/// # Panics -/// -/// If there is no group at the given index. -impl<'t> Index<usize> for Captures<'t> { - type Output = [u8]; - - fn index(&self, i: usize) -> &[u8] { - self.get(i) - .map(|m| m.as_bytes()) - .unwrap_or_else(|| panic!("no group at index '{}'", i)) - } -} - -/// Get a group by name. -/// -/// `'t` is the lifetime of the matched text and `'i` is the lifetime -/// of the group name (the index). -/// -/// The text can't outlive the `Captures` object if this method is -/// used, because of how `Index` is defined (normally `a[i]` is part -/// of `a` and can't outlive it); to do that, use `name` instead. -/// -/// # Panics -/// -/// If there is no group named by the given value. -impl<'t, 'i> Index<&'i str> for Captures<'t> { - type Output = [u8]; - - fn index<'a>(&'a self, name: &'i str) -> &'a [u8] { - self.name(name) - .map(|m| m.as_bytes()) - .unwrap_or_else(|| panic!("no group named '{}'", name)) - } -} - -/// An iterator that yields all capturing matches in the order in which they -/// appear in the regex. -/// -/// If a particular capture group didn't participate in the match, then `None` -/// is yielded for that capture. The first match always corresponds to the -/// overall match of the regex. -/// -/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and -/// the lifetime `'t` corresponds to the originally matched text. -#[derive(Clone, Debug)] -pub struct SubCaptureMatches<'c, 't> { - caps: &'c Captures<'t>, - it: SubCapturesPosIter<'c>, -} - -impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> { - type Item = Option<Match<'t>>; - - fn next(&mut self) -> Option<Option<Match<'t>>> { - self.it - .next() - .map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e))) - } -} - -impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {} - -/// Replacer describes types that can be used to replace matches in a byte -/// string. -/// -/// In general, users of this crate shouldn't need to implement this trait, -/// since implementations are already provided for `&[u8]` along with other -/// variants of bytes types and `FnMut(&Captures) -> Vec<u8>` (or any -/// `FnMut(&Captures) -> T` where `T: AsRef<[u8]>`), which covers most use cases. -pub trait Replacer { - /// Appends text to `dst` to replace the current match. - /// - /// The current match is represented by `caps`, which is guaranteed to - /// have a match at capture group `0`. - /// - /// For example, a no-op replacement would be - /// `dst.extend(&caps[0])`. - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>); - - /// Return a fixed unchanging replacement byte string. - /// - /// When doing replacements, if access to `Captures` is not needed (e.g., - /// the replacement byte string does not need `$` expansion), then it can - /// be beneficial to avoid finding sub-captures. - /// - /// In general, this is called once for every call to `replacen`. - fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> { - None - } - - /// Return a `Replacer` that borrows and wraps this `Replacer`. - /// - /// This is useful when you want to take a generic `Replacer` (which might - /// not be cloneable) and use it without consuming it, so it can be used - /// more than once. - /// - /// # Example - /// - /// ``` - /// use regex::bytes::{Regex, Replacer}; - /// - /// fn replace_all_twice<R: Replacer>( - /// re: Regex, - /// src: &[u8], - /// mut rep: R, - /// ) -> Vec<u8> { - /// let dst = re.replace_all(src, rep.by_ref()); - /// let dst = re.replace_all(&dst, rep.by_ref()); - /// dst.into_owned() - /// } - /// ``` - fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> { - ReplacerRef(self) - } -} - -/// By-reference adaptor for a `Replacer` -/// -/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref). -#[derive(Debug)] -pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); - -impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - self.0.replace_append(caps, dst) - } - fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> { - self.0.no_expansion() - } -} - -impl<'a> Replacer for &'a [u8] { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - caps.expand(*self, dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - no_expansion(self) - } -} - -impl<'a> Replacer for &'a Vec<u8> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - caps.expand(*self, dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - no_expansion(self) - } -} - -impl Replacer for Vec<u8> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - caps.expand(self, dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - no_expansion(self) - } -} - -impl<'a> Replacer for Cow<'a, [u8]> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - caps.expand(self.as_ref(), dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - no_expansion(self) - } -} - -impl<'a> Replacer for &'a Cow<'a, [u8]> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - caps.expand(self.as_ref(), dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - no_expansion(self) - } -} - -fn no_expansion<T: AsRef<[u8]>>(t: &T) -> Option<Cow<'_, [u8]>> { - let s = t.as_ref(); - match find_byte(b'$', s) { - Some(_) => None, - None => Some(Cow::Borrowed(s)), - } -} - -impl<F, T> Replacer for F -where - F: FnMut(&Captures<'_>) -> T, - T: AsRef<[u8]>, -{ - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut Vec<u8>) { - dst.extend_from_slice((*self)(caps).as_ref()); - } -} - -/// `NoExpand` indicates literal byte string replacement. -/// -/// It can be used with `replace` and `replace_all` to do a literal byte string -/// replacement without expanding `$name` to their corresponding capture -/// groups. This can be both convenient (to avoid escaping `$`, for example) -/// and performant (since capture groups don't need to be found). -/// -/// `'t` is the lifetime of the literal text. -#[derive(Clone, Debug)] -pub struct NoExpand<'t>(pub &'t [u8]); - -impl<'t> Replacer for NoExpand<'t> { - fn replace_append(&mut self, _: &Captures<'_>, dst: &mut Vec<u8>) { - dst.extend_from_slice(self.0); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, [u8]>> { - Some(Cow::Borrowed(self.0)) - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/re_set.rs b/collector/compile-benchmarks/regex-1.5.5/src/re_set.rs deleted file mode 100644 index 73d59532e..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/re_set.rs +++ /dev/null @@ -1,475 +0,0 @@ -macro_rules! define_set { - ($name:ident, $builder_mod:ident, $text_ty:ty, $as_bytes:expr, - $(#[$doc_regexset_example:meta])* ) => { - pub mod $name { - use std::fmt; - use std::iter; - use std::slice; - use std::vec; - - use crate::error::Error; - use crate::exec::Exec; - use crate::re_builder::$builder_mod::RegexSetBuilder; - use crate::re_trait::RegularExpression; - -/// Match multiple (possibly overlapping) regular expressions in a single scan. -/// -/// A regex set corresponds to the union of two or more regular expressions. -/// That is, a regex set will match text where at least one of its -/// constituent regular expressions matches. A regex set as its formulated here -/// provides a touch more power: it will also report *which* regular -/// expressions in the set match. Indeed, this is the key difference between -/// regex sets and a single `Regex` with many alternates, since only one -/// alternate can match at a time. -/// -/// For example, consider regular expressions to match email addresses and -/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a -/// regex set is constructed from those regexes, then searching the text -/// `foo@example.com` will report both regexes as matching. Of course, one -/// could accomplish this by compiling each regex on its own and doing two -/// searches over the text. The key advantage of using a regex set is that it -/// will report the matching regexes using a *single pass through the text*. -/// If one has hundreds or thousands of regexes to match repeatedly (like a URL -/// router for a complex web application or a user agent matcher), then a regex -/// set can realize huge performance gains. -/// -/// # Example -/// -/// This shows how the above two regexes (for matching email addresses and -/// domains) might work: -/// -$(#[$doc_regexset_example])* -/// -/// Note that it would be possible to adapt the above example to using `Regex` -/// with an expression like: -/// -/// ```text -/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net)) -/// ``` -/// -/// After a match, one could then inspect the capture groups to figure out -/// which alternates matched. The problem is that it is hard to make this -/// approach scale when there are many regexes since the overlap between each -/// alternate isn't always obvious to reason about. -/// -/// # Limitations -/// -/// Regex sets are limited to answering the following two questions: -/// -/// 1. Does any regex in the set match? -/// 2. If so, which regexes in the set match? -/// -/// As with the main `Regex` type, it is cheaper to ask (1) instead of (2) -/// since the matching engines can stop after the first match is found. -/// -/// Other features like finding the location of successive matches or their -/// sub-captures aren't supported. If you need this functionality, the -/// recommended approach is to compile each regex in the set independently and -/// selectively match them based on which regexes in the set matched. -/// -/// # Performance -/// -/// A `RegexSet` has the same performance characteristics as `Regex`. Namely, -/// search takes `O(mn)` time, where `m` is proportional to the size of the -/// regex set and `n` is proportional to the length of the search text. -#[derive(Clone)] -pub struct RegexSet(Exec); - -impl RegexSet { - /// Create a new regex set with the given regular expressions. - /// - /// This takes an iterator of `S`, where `S` is something that can produce - /// a `&str`. If any of the strings in the iterator are not valid regular - /// expressions, then an error is returned. - /// - /// # Example - /// - /// Create a new regex set from an iterator of strings: - /// - /// ```rust - /// # use regex::RegexSet; - /// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap(); - /// assert!(set.is_match("foo")); - /// ``` - pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error> - where S: AsRef<str>, I: IntoIterator<Item=S> { - RegexSetBuilder::new(exprs).build() - } - - /// Create a new empty regex set. - /// - /// # Example - /// - /// ```rust - /// # use regex::RegexSet; - /// let set = RegexSet::empty(); - /// assert!(set.is_empty()); - /// ``` - pub fn empty() -> RegexSet { - RegexSetBuilder::new(&[""; 0]).build().unwrap() - } - - /// Returns true if and only if one of the regexes in this set matches - /// the text given. - /// - /// This method should be preferred if you only need to test whether any - /// of the regexes in the set should match, but don't care about *which* - /// regexes matched. This is because the underlying matching engine will - /// quit immediately after seeing the first match instead of continuing to - /// find all matches. - /// - /// Note that as with searches using `Regex`, the expression is unanchored - /// by default. That is, if the regex does not start with `^` or `\A`, or - /// end with `$` or `\z`, then it is permitted to match anywhere in the - /// text. - /// - /// # Example - /// - /// Tests whether a set matches some text: - /// - /// ```rust - /// # use regex::RegexSet; - /// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap(); - /// assert!(set.is_match("foo")); - /// assert!(!set.is_match("☃")); - /// ``` - pub fn is_match(&self, text: $text_ty) -> bool { - self.is_match_at(text, 0) - } - - /// Returns the same as is_match, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - #[doc(hidden)] - pub fn is_match_at(&self, text: $text_ty, start: usize) -> bool { - self.0.searcher().is_match_at($as_bytes(text), start) - } - - /// Returns the set of regular expressions that match in the given text. - /// - /// The set returned contains the index of each regular expression that - /// matches in the given text. The index is in correspondence with the - /// order of regular expressions given to `RegexSet`'s constructor. - /// - /// The set can also be used to iterate over the matched indices. - /// - /// Note that as with searches using `Regex`, the expression is unanchored - /// by default. That is, if the regex does not start with `^` or `\A`, or - /// end with `$` or `\z`, then it is permitted to match anywhere in the - /// text. - /// - /// # Example - /// - /// Tests which regular expressions match the given text: - /// - /// ```rust - /// # use regex::RegexSet; - /// let set = RegexSet::new(&[ - /// r"\w+", - /// r"\d+", - /// r"\pL+", - /// r"foo", - /// r"bar", - /// r"barfoo", - /// r"foobar", - /// ]).unwrap(); - /// let matches: Vec<_> = set.matches("foobar").into_iter().collect(); - /// assert_eq!(matches, vec![0, 2, 3, 4, 6]); - /// - /// // You can also test whether a particular regex matched: - /// let matches = set.matches("foobar"); - /// assert!(!matches.matched(5)); - /// assert!(matches.matched(6)); - /// ``` - pub fn matches(&self, text: $text_ty) -> SetMatches { - let mut matches = vec![false; self.0.regex_strings().len()]; - let any = self.read_matches_at(&mut matches, text, 0); - SetMatches { - matched_any: any, - matches: matches, - } - } - - /// Returns the same as matches, but starts the search at the given - /// offset and stores the matches into the slice given. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - /// - /// `matches` must have a length that is at least the number of regexes - /// in this set. - /// - /// This method returns true if and only if at least one member of - /// `matches` is true after executing the set against `text`. - #[doc(hidden)] - pub fn read_matches_at( - &self, - matches: &mut [bool], - text: $text_ty, - start: usize, - ) -> bool { - self.0.searcher().many_matches_at(matches, $as_bytes(text), start) - } - - /// Returns the total number of regular expressions in this set. - pub fn len(&self) -> usize { - self.0.regex_strings().len() - } - - /// Returns `true` if this set contains no regular expressions. - pub fn is_empty(&self) -> bool { - self.0.regex_strings().is_empty() - } - - /// Returns the patterns that this set will match on. - /// - /// This function can be used to determine the pattern for a match. The - /// slice returned has exactly as many patterns givens to this regex set, - /// and the order of the slice is the same as the order of the patterns - /// provided to the set. - /// - /// # Example - /// - /// ```rust - /// # use regex::RegexSet; - /// let set = RegexSet::new(&[ - /// r"\w+", - /// r"\d+", - /// r"\pL+", - /// r"foo", - /// r"bar", - /// r"barfoo", - /// r"foobar", - /// ]).unwrap(); - /// let matches: Vec<_> = set - /// .matches("foobar") - /// .into_iter() - /// .map(|match_idx| &set.patterns()[match_idx]) - /// .collect(); - /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]); - /// ``` - pub fn patterns(&self) -> &[String] { - self.0.regex_strings() - } -} - -/// A set of matches returned by a regex set. -#[derive(Clone, Debug)] -pub struct SetMatches { - matched_any: bool, - matches: Vec<bool>, -} - -impl SetMatches { - /// Whether this set contains any matches. - pub fn matched_any(&self) -> bool { - self.matched_any - } - - /// Whether the regex at the given index matched. - /// - /// The index for a regex is determined by its insertion order upon the - /// initial construction of a `RegexSet`, starting at `0`. - /// - /// # Panics - /// - /// If `regex_index` is greater than or equal to `self.len()`. - pub fn matched(&self, regex_index: usize) -> bool { - self.matches[regex_index] - } - - /// The total number of regexes in the set that created these matches. - pub fn len(&self) -> usize { - self.matches.len() - } - - /// Returns an iterator over indexes in the regex that matched. - /// - /// This will always produces matches in ascending order of index, where - /// the index corresponds to the index of the regex that matched with - /// respect to its position when initially building the set. - pub fn iter(&self) -> SetMatchesIter<'_> { - SetMatchesIter((&*self.matches).into_iter().enumerate()) - } -} - -impl IntoIterator for SetMatches { - type IntoIter = SetMatchesIntoIter; - type Item = usize; - - fn into_iter(self) -> Self::IntoIter { - SetMatchesIntoIter(self.matches.into_iter().enumerate()) - } -} - -impl<'a> IntoIterator for &'a SetMatches { - type IntoIter = SetMatchesIter<'a>; - type Item = usize; - - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - -/// An owned iterator over the set of matches from a regex set. -/// -/// This will always produces matches in ascending order of index, where the -/// index corresponds to the index of the regex that matched with respect to -/// its position when initially building the set. -#[derive(Debug)] -pub struct SetMatchesIntoIter(iter::Enumerate<vec::IntoIter<bool>>); - -impl Iterator for SetMatchesIntoIter { - type Item = usize; - - fn next(&mut self) -> Option<usize> { - loop { - match self.0.next() { - None => return None, - Some((_, false)) => {} - Some((i, true)) => return Some(i), - } - } - } - - fn size_hint(&self) -> (usize, Option<usize>) { - self.0.size_hint() - } -} - -impl DoubleEndedIterator for SetMatchesIntoIter { - fn next_back(&mut self) -> Option<usize> { - loop { - match self.0.next_back() { - None => return None, - Some((_, false)) => {} - Some((i, true)) => return Some(i), - } - } - } -} - -impl iter::FusedIterator for SetMatchesIntoIter {} - -/// A borrowed iterator over the set of matches from a regex set. -/// -/// The lifetime `'a` refers to the lifetime of a `SetMatches` value. -/// -/// This will always produces matches in ascending order of index, where the -/// index corresponds to the index of the regex that matched with respect to -/// its position when initially building the set. -#[derive(Clone, Debug)] -pub struct SetMatchesIter<'a>(iter::Enumerate<slice::Iter<'a, bool>>); - -impl<'a> Iterator for SetMatchesIter<'a> { - type Item = usize; - - fn next(&mut self) -> Option<usize> { - loop { - match self.0.next() { - None => return None, - Some((_, &false)) => {} - Some((i, &true)) => return Some(i), - } - } - } - - fn size_hint(&self) -> (usize, Option<usize>) { - self.0.size_hint() - } -} - -impl<'a> DoubleEndedIterator for SetMatchesIter<'a> { - fn next_back(&mut self) -> Option<usize> { - loop { - match self.0.next_back() { - None => return None, - Some((_, &false)) => {} - Some((i, &true)) => return Some(i), - } - } - } -} - -impl<'a> iter::FusedIterator for SetMatchesIter<'a> {} - -#[doc(hidden)] -impl From<Exec> for RegexSet { - fn from(exec: Exec) -> Self { - RegexSet(exec) - } -} - -impl fmt::Debug for RegexSet { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "RegexSet({:?})", self.0.regex_strings()) - } -} - -#[allow(dead_code)] fn as_bytes_str(text: &str) -> &[u8] { text.as_bytes() } -#[allow(dead_code)] fn as_bytes_bytes(text: &[u8]) -> &[u8] { text } - } - } -} - -define_set! { - unicode, - set_unicode, - &str, - as_bytes_str, -/// ```rust -/// # use regex::RegexSet; -/// let set = RegexSet::new(&[ -/// r"[a-z]+@[a-z]+\.(com|org|net)", -/// r"[a-z]+\.(com|org|net)", -/// ]).unwrap(); -/// -/// // Ask whether any regexes in the set match. -/// assert!(set.is_match("foo@example.com")); -/// -/// // Identify which regexes in the set match. -/// let matches: Vec<_> = set.matches("foo@example.com").into_iter().collect(); -/// assert_eq!(vec![0, 1], matches); -/// -/// // Try again, but with text that only matches one of the regexes. -/// let matches: Vec<_> = set.matches("example.com").into_iter().collect(); -/// assert_eq!(vec![1], matches); -/// -/// // Try again, but with text that doesn't match any regex in the set. -/// let matches: Vec<_> = set.matches("example").into_iter().collect(); -/// assert!(matches.is_empty()); -/// ``` -} - -define_set! { - bytes, - set_bytes, - &[u8], - as_bytes_bytes, -/// ```rust -/// # use regex::bytes::RegexSet; -/// let set = RegexSet::new(&[ -/// r"[a-z]+@[a-z]+\.(com|org|net)", -/// r"[a-z]+\.(com|org|net)", -/// ]).unwrap(); -/// -/// // Ask whether any regexes in the set match. -/// assert!(set.is_match(b"foo@example.com")); -/// -/// // Identify which regexes in the set match. -/// let matches: Vec<_> = set.matches(b"foo@example.com").into_iter().collect(); -/// assert_eq!(vec![0, 1], matches); -/// -/// // Try again, but with text that only matches one of the regexes. -/// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect(); -/// assert_eq!(vec![1], matches); -/// -/// // Try again, but with text that doesn't match any regex in the set. -/// let matches: Vec<_> = set.matches(b"example").into_iter().collect(); -/// assert!(matches.is_empty()); -/// ``` -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/re_trait.rs b/collector/compile-benchmarks/regex-1.5.5/src/re_trait.rs deleted file mode 100644 index 680aa5459..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/re_trait.rs +++ /dev/null @@ -1,283 +0,0 @@ -use std::fmt; -use std::iter::FusedIterator; - -/// Slot is a single saved capture location. Note that there are two slots for -/// every capture in a regular expression (one slot each for the start and end -/// of the capture). -pub type Slot = Option<usize>; - -/// Locations represents the offsets of each capturing group in a regex for -/// a single match. -/// -/// Unlike `Captures`, a `Locations` value only stores offsets. -#[doc(hidden)] -#[derive(Clone, Debug)] -pub struct Locations(Vec<Slot>); - -impl Locations { - /// Returns the start and end positions of the Nth capture group. Returns - /// `None` if `i` is not a valid capture group or if the capture group did - /// not match anything. The positions returned are *always* byte indices - /// with respect to the original string matched. - pub fn pos(&self, i: usize) -> Option<(usize, usize)> { - let (s, e) = (i * 2, i * 2 + 1); - match (self.0.get(s), self.0.get(e)) { - (Some(&Some(s)), Some(&Some(e))) => Some((s, e)), - _ => None, - } - } - - /// Creates an iterator of all the capture group positions in order of - /// appearance in the regular expression. Positions are byte indices - /// in terms of the original string matched. - pub fn iter(&self) -> SubCapturesPosIter<'_> { - SubCapturesPosIter { idx: 0, locs: self } - } - - /// Returns the total number of capturing groups. - /// - /// This is always at least `1` since every regex has at least `1` - /// capturing group that corresponds to the entire match. - pub fn len(&self) -> usize { - self.0.len() / 2 - } - - /// Return the individual slots as a slice. - pub(crate) fn as_slots(&mut self) -> &mut [Slot] { - &mut self.0 - } -} - -/// An iterator over capture group positions for a particular match of a -/// regular expression. -/// -/// Positions are byte indices in terms of the original string matched. -/// -/// `'c` is the lifetime of the captures. -#[derive(Clone, Debug)] -pub struct SubCapturesPosIter<'c> { - idx: usize, - locs: &'c Locations, -} - -impl<'c> Iterator for SubCapturesPosIter<'c> { - type Item = Option<(usize, usize)>; - - fn next(&mut self) -> Option<Option<(usize, usize)>> { - if self.idx >= self.locs.len() { - return None; - } - let x = match self.locs.pos(self.idx) { - None => Some(None), - Some((s, e)) => Some(Some((s, e))), - }; - self.idx += 1; - x - } -} - -impl<'c> FusedIterator for SubCapturesPosIter<'c> {} - -/// `RegularExpression` describes types that can implement regex searching. -/// -/// This trait is my attempt at reducing code duplication and to standardize -/// the internal API. Specific duplication that is avoided are the `find` -/// and `capture` iterators, which are slightly tricky. -/// -/// It's not clear whether this trait is worth it, and it also isn't -/// clear whether it's useful as a public trait or not. Methods like -/// `next_after_empty` reak of bad design, but the rest of the methods seem -/// somewhat reasonable. One particular thing this trait would expose would be -/// the ability to start the search of a regex anywhere in a haystack, which -/// isn't possible in the current public API. -pub trait RegularExpression: Sized + fmt::Debug { - /// The type of the haystack. - type Text: ?Sized + fmt::Debug; - - /// The number of capture slots in the compiled regular expression. This is - /// always two times the number of capture groups (two slots per group). - fn slots_len(&self) -> usize; - - /// Allocates fresh space for all capturing groups in this regex. - fn locations(&self) -> Locations { - Locations(vec![None; self.slots_len()]) - } - - /// Returns the position of the next character after `i`. - /// - /// For example, a haystack with type `&[u8]` probably returns `i+1`, - /// whereas a haystack with type `&str` probably returns `i` plus the - /// length of the next UTF-8 sequence. - fn next_after_empty(&self, text: &Self::Text, i: usize) -> usize; - - /// Returns the location of the shortest match. - fn shortest_match_at( - &self, - text: &Self::Text, - start: usize, - ) -> Option<usize>; - - /// Returns whether the regex matches the text given. - fn is_match_at(&self, text: &Self::Text, start: usize) -> bool; - - /// Returns the leftmost-first match location if one exists. - fn find_at( - &self, - text: &Self::Text, - start: usize, - ) -> Option<(usize, usize)>; - - /// Returns the leftmost-first match location if one exists, and also - /// fills in any matching capture slot locations. - fn captures_read_at( - &self, - locs: &mut Locations, - text: &Self::Text, - start: usize, - ) -> Option<(usize, usize)>; - - /// Returns an iterator over all non-overlapping successive leftmost-first - /// matches. - fn find_iter(self, text: &Self::Text) -> Matches<'_, Self> { - Matches { re: self, text: text, last_end: 0, last_match: None } - } - - /// Returns an iterator over all non-overlapping successive leftmost-first - /// matches with captures. - fn captures_iter(self, text: &Self::Text) -> CaptureMatches<'_, Self> { - CaptureMatches(self.find_iter(text)) - } -} - -/// An iterator over all non-overlapping successive leftmost-first matches. -#[derive(Debug)] -pub struct Matches<'t, R> -where - R: RegularExpression, - R::Text: 't, -{ - re: R, - text: &'t R::Text, - last_end: usize, - last_match: Option<usize>, -} - -impl<'t, R> Matches<'t, R> -where - R: RegularExpression, - R::Text: 't, -{ - /// Return the text being searched. - pub fn text(&self) -> &'t R::Text { - self.text - } - - /// Return the underlying regex. - pub fn regex(&self) -> &R { - &self.re - } -} - -impl<'t, R> Iterator for Matches<'t, R> -where - R: RegularExpression, - R::Text: 't + AsRef<[u8]>, -{ - type Item = (usize, usize); - - fn next(&mut self) -> Option<(usize, usize)> { - if self.last_end > self.text.as_ref().len() { - return None; - } - let (s, e) = match self.re.find_at(self.text, self.last_end) { - None => return None, - Some((s, e)) => (s, e), - }; - if s == e { - // This is an empty match. To ensure we make progress, start - // the next search at the smallest possible starting position - // of the next match following this one. - self.last_end = self.re.next_after_empty(self.text, e); - // Don't accept empty matches immediately following a match. - // Just move on to the next match. - if Some(e) == self.last_match { - return self.next(); - } - } else { - self.last_end = e; - } - self.last_match = Some(e); - Some((s, e)) - } -} - -impl<'t, R> FusedIterator for Matches<'t, R> -where - R: RegularExpression, - R::Text: 't + AsRef<[u8]>, -{ -} - -/// An iterator over all non-overlapping successive leftmost-first matches with -/// captures. -#[derive(Debug)] -pub struct CaptureMatches<'t, R>(Matches<'t, R>) -where - R: RegularExpression, - R::Text: 't; - -impl<'t, R> CaptureMatches<'t, R> -where - R: RegularExpression, - R::Text: 't, -{ - /// Return the text being searched. - pub fn text(&self) -> &'t R::Text { - self.0.text() - } - - /// Return the underlying regex. - pub fn regex(&self) -> &R { - self.0.regex() - } -} - -impl<'t, R> Iterator for CaptureMatches<'t, R> -where - R: RegularExpression, - R::Text: 't + AsRef<[u8]>, -{ - type Item = Locations; - - fn next(&mut self) -> Option<Locations> { - if self.0.last_end > self.0.text.as_ref().len() { - return None; - } - let mut locs = self.0.re.locations(); - let (s, e) = match self.0.re.captures_read_at( - &mut locs, - self.0.text, - self.0.last_end, - ) { - None => return None, - Some((s, e)) => (s, e), - }; - if s == e { - self.0.last_end = self.0.re.next_after_empty(self.0.text, e); - if Some(e) == self.0.last_match { - return self.next(); - } - } else { - self.0.last_end = e; - } - self.0.last_match = Some(e); - Some(locs) - } -} - -impl<'t, R> FusedIterator for CaptureMatches<'t, R> -where - R: RegularExpression, - R::Text: 't + AsRef<[u8]>, -{ -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/re_unicode.rs b/collector/compile-benchmarks/regex-1.5.5/src/re_unicode.rs deleted file mode 100644 index e4871a621..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/re_unicode.rs +++ /dev/null @@ -1,1301 +0,0 @@ -use std::borrow::Cow; -use std::collections::HashMap; -use std::fmt; -use std::iter::FusedIterator; -use std::ops::{Index, Range}; -use std::str::FromStr; -use std::sync::Arc; - -use crate::find_byte::find_byte; - -use crate::error::Error; -use crate::exec::{Exec, ExecNoSyncStr}; -use crate::expand::expand_str; -use crate::re_builder::unicode::RegexBuilder; -use crate::re_trait::{self, RegularExpression, SubCapturesPosIter}; - -/// Escapes all regular expression meta characters in `text`. -/// -/// The string returned may be safely used as a literal in a regular -/// expression. -pub fn escape(text: &str) -> String { - regex_syntax::escape(text) -} - -/// Match represents a single match of a regex in a haystack. -/// -/// The lifetime parameter `'t` refers to the lifetime of the matched text. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub struct Match<'t> { - text: &'t str, - start: usize, - end: usize, -} - -impl<'t> Match<'t> { - /// Returns the starting byte offset of the match in the haystack. - #[inline] - pub fn start(&self) -> usize { - self.start - } - - /// Returns the ending byte offset of the match in the haystack. - #[inline] - pub fn end(&self) -> usize { - self.end - } - - /// Returns the range over the starting and ending byte offsets of the - /// match in the haystack. - #[inline] - pub fn range(&self) -> Range<usize> { - self.start..self.end - } - - /// Returns the matched text. - #[inline] - pub fn as_str(&self) -> &'t str { - &self.text[self.range()] - } - - /// Creates a new match from the given haystack and byte offsets. - #[inline] - fn new(haystack: &'t str, start: usize, end: usize) -> Match<'t> { - Match { text: haystack, start: start, end: end } - } -} - -impl<'t> From<Match<'t>> for &'t str { - fn from(m: Match<'t>) -> &'t str { - m.as_str() - } -} - -impl<'t> From<Match<'t>> for Range<usize> { - fn from(m: Match<'t>) -> Range<usize> { - m.range() - } -} - -/// A compiled regular expression for matching Unicode strings. -/// -/// It is represented as either a sequence of bytecode instructions (dynamic) -/// or as a specialized Rust function (native). It can be used to search, split -/// or replace text. All searching is done with an implicit `.*?` at the -/// beginning and end of an expression. To force an expression to match the -/// whole string (or a prefix or a suffix), you must use an anchor like `^` or -/// `$` (or `\A` and `\z`). -/// -/// While this crate will handle Unicode strings (whether in the regular -/// expression or in the search text), all positions returned are **byte -/// indices**. Every byte index is guaranteed to be at a Unicode code point -/// boundary. -/// -/// The lifetimes `'r` and `'t` in this crate correspond to the lifetime of a -/// compiled regular expression and text to search, respectively. -/// -/// The only methods that allocate new strings are the string replacement -/// methods. All other methods (searching and splitting) return borrowed -/// pointers into the string given. -/// -/// # Examples -/// -/// Find the location of a US phone number: -/// -/// ```rust -/// # use regex::Regex; -/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap(); -/// let mat = re.find("phone: 111-222-3333").unwrap(); -/// assert_eq!((mat.start(), mat.end()), (7, 19)); -/// ``` -/// -/// # Using the `std::str::pattern` methods with `Regex` -/// -/// > **Note**: This section requires that this crate is compiled with the -/// > `pattern` Cargo feature enabled, which **requires nightly Rust**. -/// -/// Since `Regex` implements `Pattern`, you can use regexes with methods -/// defined on `&str`. For example, `is_match`, `find`, `find_iter` -/// and `split` can be replaced with `str::contains`, `str::find`, -/// `str::match_indices` and `str::split`. -/// -/// Here are some examples: -/// -/// ```rust,ignore -/// # use regex::Regex; -/// let re = Regex::new(r"\d+").unwrap(); -/// let haystack = "a111b222c"; -/// -/// assert!(haystack.contains(&re)); -/// assert_eq!(haystack.find(&re), Some(1)); -/// assert_eq!(haystack.match_indices(&re).collect::<Vec<_>>(), -/// vec![(1, 4), (5, 8)]); -/// assert_eq!(haystack.split(&re).collect::<Vec<_>>(), vec!["a", "b", "c"]); -/// ``` -#[derive(Clone)] -pub struct Regex(Exec); - -impl fmt::Display for Regex { - /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -impl fmt::Debug for Regex { - /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self, f) - } -} - -#[doc(hidden)] -impl From<Exec> for Regex { - fn from(exec: Exec) -> Regex { - Regex(exec) - } -} - -impl FromStr for Regex { - type Err = Error; - - /// Attempts to parse a string into a regular expression - fn from_str(s: &str) -> Result<Regex, Error> { - Regex::new(s) - } -} - -/// Core regular expression methods. -impl Regex { - /// Compiles a regular expression. Once compiled, it can be used repeatedly - /// to search, split or replace text in a string. - /// - /// If an invalid expression is given, then an error is returned. - pub fn new(re: &str) -> Result<Regex, Error> { - RegexBuilder::new(re).build() - } - - /// Returns true if and only if there is a match for the regex in the - /// string given. - /// - /// It is recommended to use this method if all you need to do is test - /// a match, since the underlying matching engine may be able to do less - /// work. - /// - /// # Example - /// - /// Test if some text contains at least one word with exactly 13 - /// Unicode word characters: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let text = "I categorically deny having triskaidekaphobia."; - /// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text)); - /// # } - /// ``` - pub fn is_match(&self, text: &str) -> bool { - self.is_match_at(text, 0) - } - - /// Returns the start and end byte range of the leftmost-first match in - /// `text`. If no match exists, then `None` is returned. - /// - /// Note that this should only be used if you want to discover the position - /// of the match. Testing the existence of a match is faster if you use - /// `is_match`. - /// - /// # Example - /// - /// Find the start and end location of the first word with exactly 13 - /// Unicode word characters: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let text = "I categorically deny having triskaidekaphobia."; - /// let mat = Regex::new(r"\b\w{13}\b").unwrap().find(text).unwrap(); - /// assert_eq!(mat.start(), 2); - /// assert_eq!(mat.end(), 15); - /// # } - /// ``` - pub fn find<'t>(&self, text: &'t str) -> Option<Match<'t>> { - self.find_at(text, 0) - } - - /// Returns an iterator for each successive non-overlapping match in - /// `text`, returning the start and end byte indices with respect to - /// `text`. - /// - /// # Example - /// - /// Find the start and end location of every word with exactly 13 Unicode - /// word characters: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let text = "Retroactively relinquishing remunerations is reprehensible."; - /// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) { - /// println!("{:?}", mat); - /// } - /// # } - /// ``` - pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> Matches<'r, 't> { - Matches(self.0.searcher_str().find_iter(text)) - } - - /// Returns the capture groups corresponding to the leftmost-first - /// match in `text`. Capture group `0` always corresponds to the entire - /// match. If no match is found, then `None` is returned. - /// - /// You should only use `captures` if you need access to the location of - /// capturing group matches. Otherwise, `find` is faster for discovering - /// the location of the overall match. - /// - /// # Examples - /// - /// Say you have some text with movie names and their release years, - /// like "'Citizen Kane' (1941)". It'd be nice if we could search for text - /// looking like that, while also extracting the movie name and its release - /// year separately. - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap(); - /// let text = "Not my favorite movie: 'Citizen Kane' (1941)."; - /// let caps = re.captures(text).unwrap(); - /// assert_eq!(caps.get(1).unwrap().as_str(), "Citizen Kane"); - /// assert_eq!(caps.get(2).unwrap().as_str(), "1941"); - /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)"); - /// // You can also access the groups by index using the Index notation. - /// // Note that this will panic on an invalid index. - /// assert_eq!(&caps[1], "Citizen Kane"); - /// assert_eq!(&caps[2], "1941"); - /// assert_eq!(&caps[0], "'Citizen Kane' (1941)"); - /// # } - /// ``` - /// - /// Note that the full match is at capture group `0`. Each subsequent - /// capture group is indexed by the order of its opening `(`. - /// - /// We can make this example a bit clearer by using *named* capture groups: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") - /// .unwrap(); - /// let text = "Not my favorite movie: 'Citizen Kane' (1941)."; - /// let caps = re.captures(text).unwrap(); - /// assert_eq!(caps.name("title").unwrap().as_str(), "Citizen Kane"); - /// assert_eq!(caps.name("year").unwrap().as_str(), "1941"); - /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)"); - /// // You can also access the groups by name using the Index notation. - /// // Note that this will panic on an invalid group name. - /// assert_eq!(&caps["title"], "Citizen Kane"); - /// assert_eq!(&caps["year"], "1941"); - /// assert_eq!(&caps[0], "'Citizen Kane' (1941)"); - /// - /// # } - /// ``` - /// - /// Here we name the capture groups, which we can access with the `name` - /// method or the `Index` notation with a `&str`. Note that the named - /// capture groups are still accessible with `get` or the `Index` notation - /// with a `usize`. - /// - /// The `0`th capture group is always unnamed, so it must always be - /// accessed with `get(0)` or `[0]`. - pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> { - let mut locs = self.capture_locations(); - self.captures_read_at(&mut locs, text, 0).map(move |_| Captures { - text: text, - locs: locs.0, - named_groups: self.0.capture_name_idx().clone(), - }) - } - - /// Returns an iterator over all the non-overlapping capture groups matched - /// in `text`. This is operationally the same as `find_iter`, except it - /// yields information about capturing group matches. - /// - /// # Example - /// - /// We can use this to find all movie titles and their release years in - /// some text, where the movie is formatted like "'Title' (xxxx)": - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)") - /// .unwrap(); - /// let text = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)."; - /// for caps in re.captures_iter(text) { - /// println!("Movie: {:?}, Released: {:?}", - /// &caps["title"], &caps["year"]); - /// } - /// // Output: - /// // Movie: Citizen Kane, Released: 1941 - /// // Movie: The Wizard of Oz, Released: 1939 - /// // Movie: M, Released: 1931 - /// # } - /// ``` - pub fn captures_iter<'r, 't>( - &'r self, - text: &'t str, - ) -> CaptureMatches<'r, 't> { - CaptureMatches(self.0.searcher_str().captures_iter(text)) - } - - /// Returns an iterator of substrings of `text` delimited by a match of the - /// regular expression. Namely, each element of the iterator corresponds to - /// text that *isn't* matched by the regular expression. - /// - /// This method will *not* copy the text given. - /// - /// # Example - /// - /// To split a string delimited by arbitrary amounts of spaces or tabs: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"[ \t]+").unwrap(); - /// let fields: Vec<&str> = re.split("a b \t c\td e").collect(); - /// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]); - /// # } - /// ``` - pub fn split<'r, 't>(&'r self, text: &'t str) -> Split<'r, 't> { - Split { finder: self.find_iter(text), last: 0 } - } - - /// Returns an iterator of at most `limit` substrings of `text` delimited - /// by a match of the regular expression. (A `limit` of `0` will return no - /// substrings.) Namely, each element of the iterator corresponds to text - /// that *isn't* matched by the regular expression. The remainder of the - /// string that is not split will be the last element in the iterator. - /// - /// This method will *not* copy the text given. - /// - /// # Example - /// - /// Get the first two words in some text: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"\W+").unwrap(); - /// let fields: Vec<&str> = re.splitn("Hey! How are you?", 3).collect(); - /// assert_eq!(fields, vec!("Hey", "How", "are you?")); - /// # } - /// ``` - pub fn splitn<'r, 't>( - &'r self, - text: &'t str, - limit: usize, - ) -> SplitN<'r, 't> { - SplitN { splits: self.split(text), n: limit } - } - - /// Replaces the leftmost-first match with the replacement provided. - /// The replacement can be a regular string (where `$N` and `$name` are - /// expanded to match capture groups) or a function that takes the matches' - /// `Captures` and returns the replaced string. - /// - /// If no match is found, then a copy of the string is returned unchanged. - /// - /// # Replacement string syntax - /// - /// All instances of `$name` in the replacement text is replaced with the - /// corresponding capture group `name`. - /// - /// `name` may be an integer corresponding to the index of the - /// capture group (counted by order of opening parenthesis where `0` is the - /// entire match) or it can be a name (consisting of letters, digits or - /// underscores) corresponding to a named capture group. - /// - /// If `name` isn't a valid capture group (whether the name doesn't exist - /// or isn't a valid index), then it is replaced with the empty string. - /// - /// The longest possible name is used. e.g., `$1a` looks up the capture - /// group named `1a` and not the capture group at index `1`. To exert more - /// precise control over the name, use braces, e.g., `${1}a`. - /// - /// To write a literal `$` use `$$`. - /// - /// # Examples - /// - /// Note that this function is polymorphic with respect to the replacement. - /// In typical usage, this can just be a normal string: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new("[^01]+").unwrap(); - /// assert_eq!(re.replace("1078910", ""), "1010"); - /// # } - /// ``` - /// - /// But anything satisfying the `Replacer` trait will work. For example, - /// a closure of type `|&Captures| -> String` provides direct access to the - /// captures corresponding to a match. This allows one to access - /// capturing group matches easily: - /// - /// ```rust - /// # use regex::Regex; - /// # use regex::Captures; fn main() { - /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap(); - /// let result = re.replace("Springsteen, Bruce", |caps: &Captures| { - /// format!("{} {}", &caps[2], &caps[1]) - /// }); - /// assert_eq!(result, "Bruce Springsteen"); - /// # } - /// ``` - /// - /// But this is a bit cumbersome to use all the time. Instead, a simple - /// syntax is supported that expands `$name` into the corresponding capture - /// group. Here's the last example, but using this expansion technique - /// with named capture groups: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap(); - /// let result = re.replace("Springsteen, Bruce", "$first $last"); - /// assert_eq!(result, "Bruce Springsteen"); - /// # } - /// ``` - /// - /// Note that using `$2` instead of `$first` or `$1` instead of `$last` - /// would produce the same result. To write a literal `$` use `$$`. - /// - /// Sometimes the replacement string requires use of curly braces to - /// delineate a capture group replacement and surrounding literal text. - /// For example, if we wanted to join two words together with an - /// underscore: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap(); - /// let result = re.replace("deep fried", "${first}_$second"); - /// assert_eq!(result, "deep_fried"); - /// # } - /// ``` - /// - /// Without the curly braces, the capture group name `first_` would be - /// used, and since it doesn't exist, it would be replaced with the empty - /// string. - /// - /// Finally, sometimes you just want to replace a literal string with no - /// regard for capturing group expansion. This can be done by wrapping a - /// byte string with `NoExpand`: - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// use regex::NoExpand; - /// - /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap(); - /// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last")); - /// assert_eq!(result, "$2 $last"); - /// # } - /// ``` - pub fn replace<'t, R: Replacer>( - &self, - text: &'t str, - rep: R, - ) -> Cow<'t, str> { - self.replacen(text, 1, rep) - } - - /// Replaces all non-overlapping matches in `text` with the replacement - /// provided. This is the same as calling `replacen` with `limit` set to - /// `0`. - /// - /// See the documentation for `replace` for details on how to access - /// capturing group matches in the replacement string. - pub fn replace_all<'t, R: Replacer>( - &self, - text: &'t str, - rep: R, - ) -> Cow<'t, str> { - self.replacen(text, 0, rep) - } - - /// Replaces at most `limit` non-overlapping matches in `text` with the - /// replacement provided. If `limit` is 0, then all non-overlapping matches - /// are replaced. - /// - /// See the documentation for `replace` for details on how to access - /// capturing group matches in the replacement string. - pub fn replacen<'t, R: Replacer>( - &self, - text: &'t str, - limit: usize, - mut rep: R, - ) -> Cow<'t, str> { - // If we know that the replacement doesn't have any capture expansions, - // then we can use the fast path. The fast path can make a tremendous - // difference: - // - // 1) We use `find_iter` instead of `captures_iter`. Not asking for - // captures generally makes the regex engines faster. - // 2) We don't need to look up all of the capture groups and do - // replacements inside the replacement string. We just push it - // at each match and be done with it. - if let Some(rep) = rep.no_expansion() { - let mut it = self.find_iter(text).enumerate().peekable(); - if it.peek().is_none() { - return Cow::Borrowed(text); - } - let mut new = String::with_capacity(text.len()); - let mut last_match = 0; - for (i, m) in it { - if limit > 0 && i >= limit { - break; - } - new.push_str(&text[last_match..m.start()]); - new.push_str(&rep); - last_match = m.end(); - } - new.push_str(&text[last_match..]); - return Cow::Owned(new); - } - - // The slower path, which we use if the replacement needs access to - // capture groups. - let mut it = self.captures_iter(text).enumerate().peekable(); - if it.peek().is_none() { - return Cow::Borrowed(text); - } - let mut new = String::with_capacity(text.len()); - let mut last_match = 0; - for (i, cap) in it { - if limit > 0 && i >= limit { - break; - } - // unwrap on 0 is OK because captures only reports matches - let m = cap.get(0).unwrap(); - new.push_str(&text[last_match..m.start()]); - rep.replace_append(&cap, &mut new); - last_match = m.end(); - } - new.push_str(&text[last_match..]); - Cow::Owned(new) - } -} - -/// Advanced or "lower level" search methods. -impl Regex { - /// Returns the end location of a match in the text given. - /// - /// This method may have the same performance characteristics as - /// `is_match`, except it provides an end location for a match. In - /// particular, the location returned *may be shorter* than the proper end - /// of the leftmost-first match. - /// - /// # Example - /// - /// Typically, `a+` would match the entire first sequence of `a` in some - /// text, but `shortest_match` can give up as soon as it sees the first - /// `a`. - /// - /// ```rust - /// # use regex::Regex; - /// # fn main() { - /// let text = "aaaaa"; - /// let pos = Regex::new(r"a+").unwrap().shortest_match(text); - /// assert_eq!(pos, Some(1)); - /// # } - /// ``` - pub fn shortest_match(&self, text: &str) -> Option<usize> { - self.shortest_match_at(text, 0) - } - - /// Returns the same as shortest_match, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn shortest_match_at( - &self, - text: &str, - start: usize, - ) -> Option<usize> { - self.0.searcher_str().shortest_match_at(text, start) - } - - /// Returns the same as is_match, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn is_match_at(&self, text: &str, start: usize) -> bool { - self.shortest_match_at(text, start).is_some() - } - - /// Returns the same as find, but starts the search at the given - /// offset. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn find_at<'t>( - &self, - text: &'t str, - start: usize, - ) -> Option<Match<'t>> { - self.0 - .searcher_str() - .find_at(text, start) - .map(|(s, e)| Match::new(text, s, e)) - } - - /// This is like `captures`, but uses - /// [`CaptureLocations`](struct.CaptureLocations.html) - /// instead of - /// [`Captures`](struct.Captures.html) in order to amortize allocations. - /// - /// To create a `CaptureLocations` value, use the - /// `Regex::capture_locations` method. - /// - /// This returns the overall match if this was successful, which is always - /// equivalence to the `0`th capture group. - pub fn captures_read<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t str, - ) -> Option<Match<'t>> { - self.captures_read_at(locs, text, 0) - } - - /// Returns the same as captures, but starts the search at the given - /// offset and populates the capture locations given. - /// - /// The significance of the starting point is that it takes the surrounding - /// context into consideration. For example, the `\A` anchor can only - /// match when `start == 0`. - pub fn captures_read_at<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t str, - start: usize, - ) -> Option<Match<'t>> { - self.0 - .searcher_str() - .captures_read_at(&mut locs.0, text, start) - .map(|(s, e)| Match::new(text, s, e)) - } - - /// An undocumented alias for `captures_read_at`. - /// - /// The `regex-capi` crate previously used this routine, so to avoid - /// breaking that crate, we continue to provide the name as an undocumented - /// alias. - #[doc(hidden)] - pub fn read_captures_at<'t>( - &self, - locs: &mut CaptureLocations, - text: &'t str, - start: usize, - ) -> Option<Match<'t>> { - self.captures_read_at(locs, text, start) - } -} - -/// Auxiliary methods. -impl Regex { - /// Returns the original string of this regex. - pub fn as_str(&self) -> &str { - &self.0.regex_strings()[0] - } - - /// Returns an iterator over the capture names. - pub fn capture_names(&self) -> CaptureNames<'_> { - CaptureNames(self.0.capture_names().iter()) - } - - /// Returns the number of captures. - pub fn captures_len(&self) -> usize { - self.0.capture_names().len() - } - - /// Returns an empty set of capture locations that can be reused in - /// multiple calls to `captures_read` or `captures_read_at`. - pub fn capture_locations(&self) -> CaptureLocations { - CaptureLocations(self.0.searcher_str().locations()) - } - - /// An alias for `capture_locations` to preserve backward compatibility. - /// - /// The `regex-capi` crate uses this method, so to avoid breaking that - /// crate, we continue to export it as an undocumented API. - #[doc(hidden)] - pub fn locations(&self) -> CaptureLocations { - CaptureLocations(self.0.searcher_str().locations()) - } -} - -/// An iterator over the names of all possible captures. -/// -/// `None` indicates an unnamed capture; the first element (capture 0, the -/// whole matched region) is always unnamed. -/// -/// `'r` is the lifetime of the compiled regular expression. -#[derive(Clone, Debug)] -pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>); - -impl<'r> Iterator for CaptureNames<'r> { - type Item = Option<&'r str>; - - fn next(&mut self) -> Option<Option<&'r str>> { - self.0 - .next() - .as_ref() - .map(|slot| slot.as_ref().map(|name| name.as_ref())) - } - - fn size_hint(&self) -> (usize, Option<usize>) { - self.0.size_hint() - } - - fn count(self) -> usize { - self.0.count() - } -} - -impl<'r> ExactSizeIterator for CaptureNames<'r> {} - -impl<'r> FusedIterator for CaptureNames<'r> {} - -/// Yields all substrings delimited by a regular expression match. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the string being split. -#[derive(Debug)] -pub struct Split<'r, 't> { - finder: Matches<'r, 't>, - last: usize, -} - -impl<'r, 't> Iterator for Split<'r, 't> { - type Item = &'t str; - - fn next(&mut self) -> Option<&'t str> { - let text = self.finder.0.text(); - match self.finder.next() { - None => { - if self.last > text.len() { - None - } else { - let s = &text[self.last..]; - self.last = text.len() + 1; // Next call will return None - Some(s) - } - } - Some(m) => { - let matched = &text[self.last..m.start()]; - self.last = m.end(); - Some(matched) - } - } - } -} - -impl<'r, 't> FusedIterator for Split<'r, 't> {} - -/// Yields at most `N` substrings delimited by a regular expression match. -/// -/// The last substring will be whatever remains after splitting. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the string being split. -#[derive(Debug)] -pub struct SplitN<'r, 't> { - splits: Split<'r, 't>, - n: usize, -} - -impl<'r, 't> Iterator for SplitN<'r, 't> { - type Item = &'t str; - - fn next(&mut self) -> Option<&'t str> { - if self.n == 0 { - return None; - } - - self.n -= 1; - if self.n > 0 { - return self.splits.next(); - } - - let text = self.splits.finder.0.text(); - if self.splits.last > text.len() { - // We've already returned all substrings. - None - } else { - // self.n == 0, so future calls will return None immediately - Some(&text[self.splits.last..]) - } - } - - fn size_hint(&self) -> (usize, Option<usize>) { - (0, Some(self.n)) - } -} - -impl<'r, 't> FusedIterator for SplitN<'r, 't> {} - -/// CaptureLocations is a low level representation of the raw offsets of each -/// submatch. -/// -/// You can think of this as a lower level -/// [`Captures`](struct.Captures.html), where this type does not support -/// named capturing groups directly and it does not borrow the text that these -/// offsets were matched on. -/// -/// Primarily, this type is useful when using the lower level `Regex` APIs -/// such as `read_captures`, which permits amortizing the allocation in which -/// capture match locations are stored. -/// -/// In order to build a value of this type, you'll need to call the -/// `capture_locations` method on the `Regex` being used to execute the search. -/// The value returned can then be reused in subsequent searches. -#[derive(Clone, Debug)] -pub struct CaptureLocations(re_trait::Locations); - -/// A type alias for `CaptureLocations` for backwards compatibility. -/// -/// Previously, we exported `CaptureLocations` as `Locations` in an -/// undocumented API. To prevent breaking that code (e.g., in `regex-capi`), -/// we continue re-exporting the same undocumented API. -#[doc(hidden)] -pub type Locations = CaptureLocations; - -impl CaptureLocations { - /// Returns the start and end positions of the Nth capture group. Returns - /// `None` if `i` is not a valid capture group or if the capture group did - /// not match anything. The positions returned are *always* byte indices - /// with respect to the original string matched. - #[inline] - pub fn get(&self, i: usize) -> Option<(usize, usize)> { - self.0.pos(i) - } - - /// Returns the total number of capturing groups. - /// - /// This is always at least `1` since every regex has at least `1` - /// capturing group that corresponds to the entire match. - #[inline] - pub fn len(&self) -> usize { - self.0.len() - } - - /// An alias for the `get` method for backwards compatibility. - /// - /// Previously, we exported `get` as `pos` in an undocumented API. To - /// prevent breaking that code (e.g., in `regex-capi`), we continue - /// re-exporting the same undocumented API. - #[doc(hidden)] - #[inline] - pub fn pos(&self, i: usize) -> Option<(usize, usize)> { - self.get(i) - } -} - -/// Captures represents a group of captured strings for a single match. -/// -/// The 0th capture always corresponds to the entire match. Each subsequent -/// index corresponds to the next capture group in the regex. If a capture -/// group is named, then the matched string is *also* available via the `name` -/// method. (Note that the 0th capture is always unnamed and so must be -/// accessed with the `get` method.) -/// -/// Positions returned from a capture group are always byte indices. -/// -/// `'t` is the lifetime of the matched text. -pub struct Captures<'t> { - text: &'t str, - locs: re_trait::Locations, - named_groups: Arc<HashMap<String, usize>>, -} - -impl<'t> Captures<'t> { - /// Returns the match associated with the capture group at index `i`. If - /// `i` does not correspond to a capture group, or if the capture group - /// did not participate in the match, then `None` is returned. - /// - /// # Examples - /// - /// Get the text of the match with a default of an empty string if this - /// group didn't participate in the match: - /// - /// ```rust - /// # use regex::Regex; - /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))").unwrap(); - /// let caps = re.captures("abc123").unwrap(); - /// - /// let text1 = caps.get(1).map_or("", |m| m.as_str()); - /// let text2 = caps.get(2).map_or("", |m| m.as_str()); - /// assert_eq!(text1, "123"); - /// assert_eq!(text2, ""); - /// ``` - pub fn get(&self, i: usize) -> Option<Match<'t>> { - self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e)) - } - - /// Returns the match for the capture group named `name`. If `name` isn't a - /// valid capture group or didn't match anything, then `None` is returned. - pub fn name(&self, name: &str) -> Option<Match<'t>> { - self.named_groups.get(name).and_then(|&i| self.get(i)) - } - - /// An iterator that yields all capturing matches in the order in which - /// they appear in the regex. If a particular capture group didn't - /// participate in the match, then `None` is yielded for that capture. - /// - /// The first match always corresponds to the overall match of the regex. - pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> { - SubCaptureMatches { caps: self, it: self.locs.iter() } - } - - /// Expands all instances of `$name` in `replacement` to the corresponding - /// capture group `name`, and writes them to the `dst` buffer given. - /// - /// `name` may be an integer corresponding to the index of the capture - /// group (counted by order of opening parenthesis where `0` is the - /// entire match) or it can be a name (consisting of letters, digits or - /// underscores) corresponding to a named capture group. - /// - /// If `name` isn't a valid capture group (whether the name doesn't exist - /// or isn't a valid index), then it is replaced with the empty string. - /// - /// The longest possible name consisting of the characters `[_0-9A-Za-z]` - /// is used. e.g., `$1a` looks up the capture group named `1a` and not the - /// capture group at index `1`. To exert more precise control over the - /// name, or to refer to a capture group name that uses characters outside - /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When - /// using braces, any sequence of characters is permitted. If the sequence - /// does not refer to a capture group name in the corresponding regex, then - /// it is replaced with an empty string. - /// - /// To write a literal `$` use `$$`. - pub fn expand(&self, replacement: &str, dst: &mut String) { - expand_str(self, replacement, dst) - } - - /// Returns the number of captured groups. - /// - /// This is always at least `1`, since every regex has at least one capture - /// group that corresponds to the full match. - #[inline] - pub fn len(&self) -> usize { - self.locs.len() - } -} - -impl<'t> fmt::Debug for Captures<'t> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_tuple("Captures").field(&CapturesDebug(self)).finish() - } -} - -struct CapturesDebug<'c, 't>(&'c Captures<'t>); - -impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // We'd like to show something nice here, even if it means an - // allocation to build a reverse index. - let slot_to_name: HashMap<&usize, &String> = - self.0.named_groups.iter().map(|(a, b)| (b, a)).collect(); - let mut map = f.debug_map(); - for (slot, m) in self.0.locs.iter().enumerate() { - let m = m.map(|(s, e)| &self.0.text[s..e]); - if let Some(name) = slot_to_name.get(&slot) { - map.entry(&name, &m); - } else { - map.entry(&slot, &m); - } - } - map.finish() - } -} - -/// Get a group by index. -/// -/// `'t` is the lifetime of the matched text. -/// -/// The text can't outlive the `Captures` object if this method is -/// used, because of how `Index` is defined (normally `a[i]` is part -/// of `a` and can't outlive it); to do that, use `get()` instead. -/// -/// # Panics -/// -/// If there is no group at the given index. -impl<'t> Index<usize> for Captures<'t> { - type Output = str; - - fn index(&self, i: usize) -> &str { - self.get(i) - .map(|m| m.as_str()) - .unwrap_or_else(|| panic!("no group at index '{}'", i)) - } -} - -/// Get a group by name. -/// -/// `'t` is the lifetime of the matched text and `'i` is the lifetime -/// of the group name (the index). -/// -/// The text can't outlive the `Captures` object if this method is -/// used, because of how `Index` is defined (normally `a[i]` is part -/// of `a` and can't outlive it); to do that, use `name` instead. -/// -/// # Panics -/// -/// If there is no group named by the given value. -impl<'t, 'i> Index<&'i str> for Captures<'t> { - type Output = str; - - fn index<'a>(&'a self, name: &'i str) -> &'a str { - self.name(name) - .map(|m| m.as_str()) - .unwrap_or_else(|| panic!("no group named '{}'", name)) - } -} - -/// An iterator that yields all capturing matches in the order in which they -/// appear in the regex. -/// -/// If a particular capture group didn't participate in the match, then `None` -/// is yielded for that capture. The first match always corresponds to the -/// overall match of the regex. -/// -/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and -/// the lifetime `'t` corresponds to the originally matched text. -#[derive(Clone, Debug)] -pub struct SubCaptureMatches<'c, 't> { - caps: &'c Captures<'t>, - it: SubCapturesPosIter<'c>, -} - -impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> { - type Item = Option<Match<'t>>; - - fn next(&mut self) -> Option<Option<Match<'t>>> { - self.it - .next() - .map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e))) - } -} - -impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {} - -/// An iterator that yields all non-overlapping capture groups matching a -/// particular regular expression. -/// -/// The iterator stops when no more matches can be found. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the matched string. -#[derive(Debug)] -pub struct CaptureMatches<'r, 't>( - re_trait::CaptureMatches<'t, ExecNoSyncStr<'r>>, -); - -impl<'r, 't> Iterator for CaptureMatches<'r, 't> { - type Item = Captures<'t>; - - fn next(&mut self) -> Option<Captures<'t>> { - self.0.next().map(|locs| Captures { - text: self.0.text(), - locs: locs, - named_groups: self.0.regex().capture_name_idx().clone(), - }) - } -} - -impl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {} - -/// An iterator over all non-overlapping matches for a particular string. -/// -/// The iterator yields a `Match` value. The iterator stops when no more -/// matches can be found. -/// -/// `'r` is the lifetime of the compiled regular expression and `'t` is the -/// lifetime of the matched string. -#[derive(Debug)] -pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSyncStr<'r>>); - -impl<'r, 't> Iterator for Matches<'r, 't> { - type Item = Match<'t>; - - fn next(&mut self) -> Option<Match<'t>> { - let text = self.0.text(); - self.0.next().map(|(s, e)| Match::new(text, s, e)) - } -} - -impl<'r, 't> FusedIterator for Matches<'r, 't> {} - -/// Replacer describes types that can be used to replace matches in a string. -/// -/// In general, users of this crate shouldn't need to implement this trait, -/// since implementations are already provided for `&str` along with other -/// variants of string types and `FnMut(&Captures) -> String` (or any -/// `FnMut(&Captures) -> T` where `T: AsRef<str>`), which covers most use cases. -pub trait Replacer { - /// Appends text to `dst` to replace the current match. - /// - /// The current match is represented by `caps`, which is guaranteed to - /// have a match at capture group `0`. - /// - /// For example, a no-op replacement would be - /// `dst.push_str(caps.get(0).unwrap().as_str())`. - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String); - - /// Return a fixed unchanging replacement string. - /// - /// When doing replacements, if access to `Captures` is not needed (e.g., - /// the replacement byte string does not need `$` expansion), then it can - /// be beneficial to avoid finding sub-captures. - /// - /// In general, this is called once for every call to `replacen`. - fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> { - None - } - - /// Return a `Replacer` that borrows and wraps this `Replacer`. - /// - /// This is useful when you want to take a generic `Replacer` (which might - /// not be cloneable) and use it without consuming it, so it can be used - /// more than once. - /// - /// # Example - /// - /// ``` - /// use regex::{Regex, Replacer}; - /// - /// fn replace_all_twice<R: Replacer>( - /// re: Regex, - /// src: &str, - /// mut rep: R, - /// ) -> String { - /// let dst = re.replace_all(src, rep.by_ref()); - /// let dst = re.replace_all(&dst, rep.by_ref()); - /// dst.into_owned() - /// } - /// ``` - fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> { - ReplacerRef(self) - } -} - -/// By-reference adaptor for a `Replacer` -/// -/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref). -#[derive(Debug)] -pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); - -impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - self.0.replace_append(caps, dst) - } - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - self.0.no_expansion() - } -} - -impl<'a> Replacer for &'a str { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - caps.expand(*self, dst); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - no_expansion(self) - } -} - -impl<'a> Replacer for &'a String { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - self.as_str().replace_append(caps, dst) - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - no_expansion(self) - } -} - -impl Replacer for String { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - self.as_str().replace_append(caps, dst) - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - no_expansion(self) - } -} - -impl<'a> Replacer for Cow<'a, str> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - self.as_ref().replace_append(caps, dst) - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - no_expansion(self) - } -} - -impl<'a> Replacer for &'a Cow<'a, str> { - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - self.as_ref().replace_append(caps, dst) - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - no_expansion(self) - } -} - -fn no_expansion<T: AsRef<str>>(t: &T) -> Option<Cow<'_, str>> { - let s = t.as_ref(); - match find_byte(b'$', s.as_bytes()) { - Some(_) => None, - None => Some(Cow::Borrowed(s)), - } -} - -impl<F, T> Replacer for F -where - F: FnMut(&Captures<'_>) -> T, - T: AsRef<str>, -{ - fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { - dst.push_str((*self)(caps).as_ref()); - } -} - -/// `NoExpand` indicates literal string replacement. -/// -/// It can be used with `replace` and `replace_all` to do a literal string -/// replacement without expanding `$name` to their corresponding capture -/// groups. This can be both convenient (to avoid escaping `$`, for example) -/// and performant (since capture groups don't need to be found). -/// -/// `'t` is the lifetime of the literal text. -#[derive(Clone, Debug)] -pub struct NoExpand<'t>(pub &'t str); - -impl<'t> Replacer for NoExpand<'t> { - fn replace_append(&mut self, _: &Captures<'_>, dst: &mut String) { - dst.push_str(self.0); - } - - fn no_expansion(&mut self) -> Option<Cow<'_, str>> { - Some(Cow::Borrowed(self.0)) - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/sparse.rs b/collector/compile-benchmarks/regex-1.5.5/src/sparse.rs deleted file mode 100644 index 98b726613..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/sparse.rs +++ /dev/null @@ -1,84 +0,0 @@ -use std::fmt; -use std::ops::Deref; -use std::slice; - -/// A sparse set used for representing ordered NFA states. -/// -/// This supports constant time addition and membership testing. Clearing an -/// entire set can also be done in constant time. Iteration yields elements -/// in the order in which they were inserted. -/// -/// The data structure is based on: https://research.swtch.com/sparse -/// Note though that we don't actually use uninitialized memory. We generally -/// reuse allocations, so the initial allocation cost is bareable. However, -/// its other properties listed above are extremely useful. -#[derive(Clone)] -pub struct SparseSet { - /// Dense contains the instruction pointers in the order in which they - /// were inserted. - dense: Vec<usize>, - /// Sparse maps instruction pointers to their location in dense. - /// - /// An instruction pointer is in the set if and only if - /// sparse[ip] < dense.len() && ip == dense[sparse[ip]]. - sparse: Box<[usize]>, -} - -impl SparseSet { - pub fn new(size: usize) -> SparseSet { - SparseSet { - dense: Vec::with_capacity(size), - sparse: vec![0; size].into_boxed_slice(), - } - } - - pub fn len(&self) -> usize { - self.dense.len() - } - - pub fn is_empty(&self) -> bool { - self.dense.is_empty() - } - - pub fn capacity(&self) -> usize { - self.dense.capacity() - } - - pub fn insert(&mut self, value: usize) { - let i = self.len(); - assert!(i < self.capacity()); - self.dense.push(value); - self.sparse[value] = i; - } - - pub fn contains(&self, value: usize) -> bool { - let i = self.sparse[value]; - self.dense.get(i) == Some(&value) - } - - pub fn clear(&mut self) { - self.dense.clear(); - } -} - -impl fmt::Debug for SparseSet { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "SparseSet({:?})", self.dense) - } -} - -impl Deref for SparseSet { - type Target = [usize]; - - fn deref(&self) -> &Self::Target { - &self.dense - } -} - -impl<'a> IntoIterator for &'a SparseSet { - type Item = &'a usize; - type IntoIter = slice::Iter<'a, usize>; - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/src/testdata/LICENSE b/collector/compile-benchmarks/regex-1.5.5/src/testdata/LICENSE deleted file mode 100644 index f47dbf4c4..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/testdata/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -The following license covers testregex.c and all associated test data. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software -without restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, and/or sell copies of the -Software, and to permit persons to whom the Software is furnished to do -so, subject to the following disclaimer: - -THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/collector/compile-benchmarks/regex-1.5.5/src/testdata/README b/collector/compile-benchmarks/regex-1.5.5/src/testdata/README deleted file mode 100644 index 6efc2dad3..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/testdata/README +++ /dev/null @@ -1,17 +0,0 @@ -Test data was taken from the Go distribution, which was in turn taken from the -testregex test suite: - - http://www2.research.att.com/~astopen/testregex/testregex.html - -The LICENSE in this directory corresponds to the LICENSE that the data was -released under. - -The tests themselves were modified for RE2/Go. A couple were modified further -by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them. -(Yes, it seems like RE2/Go includes failing test cases.) This may or may not -have been a bad idea, but I think being consistent with an established Regex -library is worth something. - -Note that these files are read by 'scripts/regex-match-tests.py' and turned -into Rust tests found in 'regex_macros/tests/matches.rs'. - diff --git a/collector/compile-benchmarks/regex-1.5.5/src/testdata/basic.dat b/collector/compile-benchmarks/regex-1.5.5/src/testdata/basic.dat deleted file mode 100644 index 632e1bb41..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/testdata/basic.dat +++ /dev/null @@ -1,221 +0,0 @@ -NOTE all standard compliant implementations should pass these : 2002-05-31 - -BE abracadabra$ abracadabracadabra (7,18) -BE a...b abababbb (2,7) -BE XXXXXX ..XXXXXX (2,8) -E \) () (1,2) -BE a] a]a (0,2) -B } } (0,1) -E \} } (0,1) -BE \] ] (0,1) -B ] ] (0,1) -E ] ] (0,1) -B { { (0,1) -B } } (0,1) -BE ^a ax (0,1) -BE \^a a^a (1,3) -BE a\^ a^ (0,2) -BE a$ aa (1,2) -BE a\$ a$ (0,2) -BE ^$ NULL (0,0) -E $^ NULL (0,0) -E a($) aa (1,2)(2,2) -E a*(^a) aa (0,1)(0,1) -E (..)*(...)* a (0,0) -E (..)*(...)* abcd (0,4)(2,4) -E (ab|a)(bc|c) abc (0,3)(0,2)(2,3) -E (ab)c|abc abc (0,3)(0,2) -E a{0}b ab (1,2) -E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) -E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7) -E a{9876543210} NULL BADBR -E ((a|a)|a) a (0,1)(0,1)(0,1) -E (a*)(a|aa) aaaa (0,4)(0,3)(3,4) -E a*(a.|aa) aaaa (0,4)(2,4) -E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2) -E (a|b)?.* b (0,1)(0,1) -E (a|b)c|a(b|c) ac (0,2)(0,1) -E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2) -E (a|b)*c|(a|ab)*c abc (0,3)(1,2) -E (a|b)*c|(a|ab)*c xc (1,2) -E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2) -E a?(ab|ba)ab abab (0,4)(0,2) -E a?(ac{0}b|ba)ab abab (0,4)(0,2) -E ab|abab abbabab (0,2) -E aba|bab|bba baaabbbaba (5,8) -E aba|bab baaabbbaba (6,9) -E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2) -E (a.|.a.)*|(a|.a...) aa (0,2)(0,2) -E ab|a xabc (1,3) -E ab|a xxabc (2,4) -Ei (?-u)(Ab|cD)* aBcD (0,4)(2,4) -BE [^-] --a (2,3) -BE [a-]* --a (0,3) -BE [a-m-]* --amoma-- (0,4) -E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17) -E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17) -{E [[:upper:]] A (0,1) [[<element>]] not supported -E [[:lower:]]+ `az{ (1,3) -E [[:upper:]]+ @AZ[ (1,3) -# No collation in Go -#BE [[-]] [[-]] (2,4) -#BE [[.NIL.]] NULL ECOLLATE -#BE [[=aleph=]] NULL ECOLLATE -} -BE$ \n \n (0,1) -BEn$ \n \n (0,1) -BE$ [^a] \n (0,1) -BE$ \na \na (0,2) -E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3) -BE xxx xxx (0,3) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3) -E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11) -E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) -E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) -E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81) -E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25) -E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22) -E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11) -BE$ .* \x01\x7f (0,2) -E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57) -L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH -E a*a*a*a*a*b aaaaaaaaab (0,10) -BE ^ NULL (0,0) -BE $ NULL (0,0) -BE ^$ NULL (0,0) -BE ^a$ a (0,1) -BE abc abc (0,3) -BE abc xabcy (1,4) -BE abc ababc (2,5) -BE ab*c abc (0,3) -BE ab*bc abc (0,3) -BE ab*bc abbc (0,4) -BE ab*bc abbbbc (0,6) -E ab+bc abbc (0,4) -E ab+bc abbbbc (0,6) -E ab?bc abbc (0,4) -E ab?bc abc (0,3) -E ab?c abc (0,3) -BE ^abc$ abc (0,3) -BE ^abc abcc (0,3) -BE abc$ aabc (1,4) -BE ^ abc (0,0) -BE $ abc (3,3) -BE a.c abc (0,3) -BE a.c axc (0,3) -BE a.*c axyzc (0,5) -BE a[bc]d abd (0,3) -BE a[b-d]e ace (0,3) -BE a[b-d] aac (1,3) -BE a[-b] a- (0,2) -BE a[b-] a- (0,2) -BE a] a] (0,2) -BE a[]]b a]b (0,3) -BE a[^bc]d aed (0,3) -BE a[^-b]c adc (0,3) -BE a[^]b]c adc (0,3) -E ab|cd abc (0,2) -E ab|cd abcd (0,2) -E a\(b a(b (0,3) -E a\(*b ab (0,2) -E a\(*b a((b (0,4) -E ((a)) abc (0,1)(0,1)(0,1) -E (a)b(c) abc (0,3)(0,1)(2,3) -E a+b+c aabbabc (4,7) -E a* aaa (0,3) -#E (a*)* - (0,0)(0,0) -E (a*)* - (0,0)(?,?) RE2/Go -E (a*)+ - (0,0)(0,0) -#E (a*|b)* - (0,0)(0,0) -E (a*|b)* - (0,0)(?,?) RE2/Go -E (a+|b)* ab (0,2)(1,2) -E (a+|b)+ ab (0,2)(1,2) -E (a+|b)? ab (0,1)(0,1) -BE [^ab]* cde (0,3) -#E (^)* - (0,0)(0,0) -E (^)* - (0,0)(?,?) RE2/Go -BE a* NULL (0,0) -E ([abc])*d abbbcd (0,6)(4,5) -E ([abc])*bcd abcd (0,4)(0,1) -E a|b|c|d|e e (0,1) -E (a|b|c|d|e)f ef (0,2)(0,1) -#E ((a*|b))* - (0,0)(0,0)(0,0) -E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go -BE abcd*efg abcdefg (0,7) -BE ab* xabyabbbz (1,3) -BE ab* xayabbbz (1,2) -E (ab|cd)e abcde (2,5)(2,4) -BE [abhgefdc]ij hij (0,3) -E (a|b)c*d abcd (1,4)(1,2) -E (ab|ab*)bc abc (0,3)(0,1) -E a([bc]*)c* abc (0,3)(1,3) -E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4) -E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4) -E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4) -E a[bcd]*dcdcde adcdcde (0,7) -E (ab|a)b*c abc (0,3)(0,2) -E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4) -BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5) -E ^a(bc+|b[eh])g|.h$ abh (1,3) -E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5) -E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2) -E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6) -E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1) -BE multiple words multiple words yeah (0,14) -E (.*)c(.*) abcde (0,5)(0,2)(3,5) -BE abcd abcd (0,4) -E a(bc)d abcd (0,4)(1,3) -E a[-]?c ac (0,3) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12) -E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12) -E a+(b|c)*d+ aabcdd (0,6)(3,4) -E ^.+$ vivi (0,4) -E ^(.+)$ vivi (0,4)(0,4) -E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19) -E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3) -E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7) -E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7) -E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11) -E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3) -E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7) -E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3) -E ((foo)|bar)!bas bar!bas (0,7)(0,3) -E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7) -E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3) -E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3) -E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7) -E (foo|(bar))!bas foo!bas (0,7)(0,3) -E (foo|bar)!bas bar!bas (0,7)(0,3) -E (foo|bar)!bas foo!bar!bas (4,11)(4,7) -E (foo|bar)!bas foo!bas (0,7)(0,3) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11) -E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11) -E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7) -E .*(/XXX).* /XXX (0,4)(0,4) -E .*(\\XXX).* \XXX (0,4)(0,4) -E \\XXX \XXX (0,4) -E .*(/000).* /000 (0,4)(0,4) -E .*(\\000).* \000 (0,4)(0,4) -E \\000 \000 (0,4) diff --git a/collector/compile-benchmarks/regex-1.5.5/src/testdata/nullsubexpr.dat b/collector/compile-benchmarks/regex-1.5.5/src/testdata/nullsubexpr.dat deleted file mode 100644 index 2e18fbb91..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/testdata/nullsubexpr.dat +++ /dev/null @@ -1,79 +0,0 @@ -NOTE null subexpression matches : 2002-06-06 - -E (a*)* a (0,1)(0,1) -#E SAME x (0,0)(0,0) -E SAME x (0,0)(?,?) RE2/Go -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E (a*)+ a (0,1)(0,1) -E SAME x (0,0)(0,0) -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E (a+)* a (0,1)(0,1) -E SAME x (0,0) -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E (a+)+ a (0,1)(0,1) -E SAME x NOMATCH -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) - -E ([a]*)* a (0,1)(0,1) -#E SAME x (0,0)(0,0) -E SAME x (0,0)(?,?) RE2/Go -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E ([a]*)+ a (0,1)(0,1) -E SAME x (0,0)(0,0) -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaax (0,6)(0,6) -E ([^b]*)* a (0,1)(0,1) -#E SAME b (0,0)(0,0) -E SAME b (0,0)(?,?) RE2/Go -E SAME aaaaaa (0,6)(0,6) -E SAME aaaaaab (0,6)(0,6) -E ([ab]*)* a (0,1)(0,1) -E SAME aaaaaa (0,6)(0,6) -E SAME ababab (0,6)(0,6) -E SAME bababa (0,6)(0,6) -E SAME b (0,1)(0,1) -E SAME bbbbbb (0,6)(0,6) -E SAME aaaabcde (0,5)(0,5) -E ([^a]*)* b (0,1)(0,1) -E SAME bbbbbb (0,6)(0,6) -#E SAME aaaaaa (0,0)(0,0) -E SAME aaaaaa (0,0)(?,?) RE2/Go -E ([^ab]*)* ccccxx (0,6)(0,6) -#E SAME ababab (0,0)(0,0) -E SAME ababab (0,0)(?,?) RE2/Go - -E ((z)+|a)* zabcde (0,2)(1,2) - -#{E a+? aaaaaa (0,1) no *? +? mimimal match ops -#E (a) aaa (0,1)(0,1) -#E (a*?) aaa (0,0)(0,0) -#E (a)*? aaa (0,0) -#E (a*?)*? aaa (0,0) -#} - -B \(a*\)*\(x\) x (0,1)(0,0)(0,1) -B \(a*\)*\(x\) ax (0,2)(0,1)(1,2) -B \(a*\)*\(x\) axa (0,2)(0,1)(1,2) -B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1) -B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2) -B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3) -B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4) -B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3) - -#E (a*)*(x) x (0,1)(0,0)(0,1) -E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go -E (a*)*(x) ax (0,2)(0,1)(1,2) -E (a*)*(x) axa (0,2)(0,1)(1,2) - -E (a*)+(x) x (0,1)(0,0)(0,1) -E (a*)+(x) ax (0,2)(0,1)(1,2) -E (a*)+(x) axa (0,2)(0,1)(1,2) - -E (a*){2}(x) x (0,1)(0,0)(0,1) -E (a*){2}(x) ax (0,2)(1,1)(1,2) -E (a*){2}(x) axa (0,2)(1,1)(1,2) diff --git a/collector/compile-benchmarks/regex-1.5.5/src/testdata/repetition.dat b/collector/compile-benchmarks/regex-1.5.5/src/testdata/repetition.dat deleted file mode 100644 index 3bb212118..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/testdata/repetition.dat +++ /dev/null @@ -1,163 +0,0 @@ -NOTE implicit vs. explicit repetitions : 2009-02-02 - -# Glenn Fowler <gsf@research.att.com> -# conforming matches (column 4) must match one of the following BREs -# NOMATCH -# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)* -# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)* -# i.e., each 3-tuple has two identical elements and one (?,?) - -E ((..)|(.)) NULL NOMATCH -E ((..)|(.))((..)|(.)) NULL NOMATCH -E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH - -E ((..)|(.)){1} NULL NOMATCH -E ((..)|(.)){2} NULL NOMATCH -E ((..)|(.)){3} NULL NOMATCH - -E ((..)|(.))* NULL (0,0) - -E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1) -E ((..)|(.))((..)|(.)) a NOMATCH -E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH - -E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1) -E ((..)|(.)){2} a NOMATCH -E ((..)|(.)){3} a NOMATCH - -E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1) - -E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2) -E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH - -E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2) -E ((..)|(.)){3} aa NOMATCH - -E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?) - -E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3) -E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3) - -E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?) -#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3) -E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go -E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3) - -#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3) -E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go - -E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) -E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4) - -E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?) -#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4) -E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go - -E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?) - -E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) -E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5) - -E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?) -#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5) -E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go - -#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5) -E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go - -E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?) -E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?) - -E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?) -E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?) -E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?) - -E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?) - -NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02 - -# These test a bug in OS X / FreeBSD / NetBSD, and libtree. -# Linux/GLIBC gets the {8,} and {8,8} wrong. - -:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8) -:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8) -:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8) -:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8) -:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8) -:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8) -:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8) -:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8) -:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8) -#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8) -:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8) -:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8) -:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8) -:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8) -:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8) -:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8) -:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go -#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8) -:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go -:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8) - -# These test a fixed bug in my regex-tdfa that did not keep the expanded -# form properly grouped, so right association did the wrong thing with -# these ambiguous patterns (crafted just to test my code when I became -# suspicious of my implementation). The first subexpression should use -# "ab" then "a" then "bcd". - -# OS X / FreeBSD / NetBSD badly fail many of these, with impossible -# results like (0,6)(4,5)(6,6). - -:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1) -:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1) -:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH -:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1) -:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1) -:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH -:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1) -:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1) - -# The above worked on Linux/GLIBC but the following often fail. -# They also trip up OS X / FreeBSD / NetBSD: - -#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6) -:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH -#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6) -:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH -#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6) -:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go -#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6) -:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go diff --git a/collector/compile-benchmarks/regex-1.5.5/src/utf8.rs b/collector/compile-benchmarks/regex-1.5.5/src/utf8.rs deleted file mode 100644 index 6e0608fdb..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/src/utf8.rs +++ /dev/null @@ -1,264 +0,0 @@ -/// A few elementary UTF-8 encoding and decoding functions used by the matching -/// engines. -/// -/// In an ideal world, the matching engines operate on `&str` and we can just -/// lean on the standard library for all our UTF-8 needs. However, to support -/// byte based regexes (that can match on arbitrary bytes which may contain -/// UTF-8), we need to be capable of searching and decoding UTF-8 on a `&[u8]`. -/// The standard library doesn't really recognize this use case, so we have -/// to build it out ourselves. -/// -/// Should this be factored out into a separate crate? It seems independently -/// useful. There are other crates that already exist (e.g., `utf-8`) that have -/// overlapping use cases. Not sure what to do. -use std::char; - -const TAG_CONT: u8 = 0b1000_0000; -const TAG_TWO: u8 = 0b1100_0000; -const TAG_THREE: u8 = 0b1110_0000; -const TAG_FOUR: u8 = 0b1111_0000; - -/// Returns the smallest possible index of the next valid UTF-8 sequence -/// starting after `i`. -pub fn next_utf8(text: &[u8], i: usize) -> usize { - let b = match text.get(i) { - None => return i + 1, - Some(&b) => b, - }; - let inc = if b <= 0x7F { - 1 - } else if b <= 0b110_11111 { - 2 - } else if b <= 0b1110_1111 { - 3 - } else { - 4 - }; - i + inc -} - -/// Decode a single UTF-8 sequence into a single Unicode codepoint from `src`. -/// -/// If no valid UTF-8 sequence could be found, then `None` is returned. -/// Otherwise, the decoded codepoint and the number of bytes read is returned. -/// The number of bytes read (for a valid UTF-8 sequence) is guaranteed to be -/// 1, 2, 3 or 4. -/// -/// Note that a UTF-8 sequence is invalid if it is incorrect UTF-8, encodes a -/// codepoint that is out of range (surrogate codepoints are out of range) or -/// is not the shortest possible UTF-8 sequence for that codepoint. -#[inline] -pub fn decode_utf8(src: &[u8]) -> Option<(char, usize)> { - let b0 = match src.get(0) { - None => return None, - Some(&b) if b <= 0x7F => return Some((b as char, 1)), - Some(&b) => b, - }; - match b0 { - 0b110_00000..=0b110_11111 => { - if src.len() < 2 { - return None; - } - let b1 = src[1]; - if 0b11_000000 & b1 != TAG_CONT { - return None; - } - let cp = ((b0 & !TAG_TWO) as u32) << 6 | ((b1 & !TAG_CONT) as u32); - match cp { - 0x80..=0x7FF => char::from_u32(cp).map(|cp| (cp, 2)), - _ => None, - } - } - 0b1110_0000..=0b1110_1111 => { - if src.len() < 3 { - return None; - } - let (b1, b2) = (src[1], src[2]); - if 0b11_000000 & b1 != TAG_CONT { - return None; - } - if 0b11_000000 & b2 != TAG_CONT { - return None; - } - let cp = ((b0 & !TAG_THREE) as u32) << 12 - | ((b1 & !TAG_CONT) as u32) << 6 - | ((b2 & !TAG_CONT) as u32); - match cp { - // char::from_u32 will disallow surrogate codepoints. - 0x800..=0xFFFF => char::from_u32(cp).map(|cp| (cp, 3)), - _ => None, - } - } - 0b11110_000..=0b11110_111 => { - if src.len() < 4 { - return None; - } - let (b1, b2, b3) = (src[1], src[2], src[3]); - if 0b11_000000 & b1 != TAG_CONT { - return None; - } - if 0b11_000000 & b2 != TAG_CONT { - return None; - } - if 0b11_000000 & b3 != TAG_CONT { - return None; - } - let cp = ((b0 & !TAG_FOUR) as u32) << 18 - | ((b1 & !TAG_CONT) as u32) << 12 - | ((b2 & !TAG_CONT) as u32) << 6 - | ((b3 & !TAG_CONT) as u32); - match cp { - 0x10000..=0x10FFFF => char::from_u32(cp).map(|cp| (cp, 4)), - _ => None, - } - } - _ => None, - } -} - -/// Like `decode_utf8`, but decodes the last UTF-8 sequence in `src` instead -/// of the first. -pub fn decode_last_utf8(src: &[u8]) -> Option<(char, usize)> { - if src.is_empty() { - return None; - } - let mut start = src.len() - 1; - if src[start] <= 0x7F { - return Some((src[start] as char, 1)); - } - while start > src.len().saturating_sub(4) { - start -= 1; - if is_start_byte(src[start]) { - break; - } - } - match decode_utf8(&src[start..]) { - None => None, - Some((_, n)) if n < src.len() - start => None, - Some((cp, n)) => Some((cp, n)), - } -} - -fn is_start_byte(b: u8) -> bool { - b & 0b11_000000 != 0b1_0000000 -} - -#[cfg(test)] -mod tests { - use std::str; - - use quickcheck::quickcheck; - - use super::{ - decode_last_utf8, decode_utf8, TAG_CONT, TAG_FOUR, TAG_THREE, TAG_TWO, - }; - - #[test] - fn prop_roundtrip() { - fn p(given_cp: char) -> bool { - let mut tmp = [0; 4]; - let encoded_len = given_cp.encode_utf8(&mut tmp).len(); - let (got_cp, got_len) = decode_utf8(&tmp[..encoded_len]).unwrap(); - encoded_len == got_len && given_cp == got_cp - } - quickcheck(p as fn(char) -> bool) - } - - #[test] - fn prop_roundtrip_last() { - fn p(given_cp: char) -> bool { - let mut tmp = [0; 4]; - let encoded_len = given_cp.encode_utf8(&mut tmp).len(); - let (got_cp, got_len) = - decode_last_utf8(&tmp[..encoded_len]).unwrap(); - encoded_len == got_len && given_cp == got_cp - } - quickcheck(p as fn(char) -> bool) - } - - #[test] - fn prop_encode_matches_std() { - fn p(cp: char) -> bool { - let mut got = [0; 4]; - let n = cp.encode_utf8(&mut got).len(); - let expected = cp.to_string(); - &got[..n] == expected.as_bytes() - } - quickcheck(p as fn(char) -> bool) - } - - #[test] - fn prop_decode_matches_std() { - fn p(given_cp: char) -> bool { - let mut tmp = [0; 4]; - let n = given_cp.encode_utf8(&mut tmp).len(); - let (got_cp, _) = decode_utf8(&tmp[..n]).unwrap(); - let expected_cp = - str::from_utf8(&tmp[..n]).unwrap().chars().next().unwrap(); - got_cp == expected_cp - } - quickcheck(p as fn(char) -> bool) - } - - #[test] - fn prop_decode_last_matches_std() { - fn p(given_cp: char) -> bool { - let mut tmp = [0; 4]; - let n = given_cp.encode_utf8(&mut tmp).len(); - let (got_cp, _) = decode_last_utf8(&tmp[..n]).unwrap(); - let expected_cp = str::from_utf8(&tmp[..n]) - .unwrap() - .chars() - .rev() - .next() - .unwrap(); - got_cp == expected_cp - } - quickcheck(p as fn(char) -> bool) - } - - #[test] - fn reject_invalid() { - // Invalid start byte - assert_eq!(decode_utf8(&[0xFF]), None); - // Surrogate pair - assert_eq!(decode_utf8(&[0xED, 0xA0, 0x81]), None); - // Invalid continuation byte. - assert_eq!(decode_utf8(&[0xD4, 0xC2]), None); - // Bad lengths - assert_eq!(decode_utf8(&[0xC3]), None); // 2 bytes - assert_eq!(decode_utf8(&[0xEF, 0xBF]), None); // 3 bytes - assert_eq!(decode_utf8(&[0xF4, 0x8F, 0xBF]), None); // 4 bytes - // Not a minimal UTF-8 sequence - assert_eq!(decode_utf8(&[TAG_TWO, TAG_CONT | b'a']), None); - assert_eq!(decode_utf8(&[TAG_THREE, TAG_CONT, TAG_CONT | b'a']), None); - assert_eq!( - decode_utf8(&[TAG_FOUR, TAG_CONT, TAG_CONT, TAG_CONT | b'a',]), - None - ); - } - - #[test] - fn reject_invalid_last() { - // Invalid start byte - assert_eq!(decode_last_utf8(&[0xFF]), None); - // Surrogate pair - assert_eq!(decode_last_utf8(&[0xED, 0xA0, 0x81]), None); - // Bad lengths - assert_eq!(decode_last_utf8(&[0xC3]), None); // 2 bytes - assert_eq!(decode_last_utf8(&[0xEF, 0xBF]), None); // 3 bytes - assert_eq!(decode_last_utf8(&[0xF4, 0x8F, 0xBF]), None); // 4 bytes - // Not a minimal UTF-8 sequence - assert_eq!(decode_last_utf8(&[TAG_TWO, TAG_CONT | b'a']), None); - assert_eq!( - decode_last_utf8(&[TAG_THREE, TAG_CONT, TAG_CONT | b'a',]), - None - ); - assert_eq!( - decode_last_utf8( - &[TAG_FOUR, TAG_CONT, TAG_CONT, TAG_CONT | b'a',] - ), - None - ); - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/test b/collector/compile-benchmarks/regex-1.5.5/test deleted file mode 100755 index b10564f12..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/test +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -set -e - -# This is a convenience script for running a broad swath of tests across -# features. We don't test the complete space, since the complete space is quite -# large. Hopefully once we migrate the test suite to better infrastructure -# (like regex-automata), we'll be able to test more of the space. -echo "===== DEFAULT FEATURES ===" -cargo test - -echo "===== DOC TESTS ===" -cargo test --doc - -features=( - "std" - "std unicode" - "std unicode-perl" - "std perf" - "std perf-cache" - "std perf-dfa" - "std perf-inline" - "std perf-literal" -) -for f in "${features[@]}"; do - echo "===== FEATURE: $f (default) ===" - cargo test --test default --no-default-features --features "$f" - echo "===== FEATURE: $f (default-bytes) ===" - cargo test --test default-bytes --no-default-features --features "$f" -done diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/api.rs b/collector/compile-benchmarks/regex-1.5.5/tests/api.rs deleted file mode 100644 index c7250a8a3..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/api.rs +++ /dev/null @@ -1,234 +0,0 @@ -#[test] -fn empty_regex_empty_match() { - let re = regex!(""); - assert_eq!(vec![(0, 0)], findall!(re, "")); -} - -#[test] -fn empty_regex_nonempty_match() { - let re = regex!(""); - assert_eq!(vec![(0, 0), (1, 1), (2, 2), (3, 3)], findall!(re, "abc")); -} - -#[test] -fn one_zero_length_match() { - let re = regex!(r"[0-9]*"); - assert_eq!(vec![(0, 0), (1, 2), (3, 4)], findall!(re, "a1b2")); -} - -#[test] -fn many_zero_length_match() { - let re = regex!(r"[0-9]*"); - assert_eq!( - vec![(0, 0), (1, 2), (3, 3), (4, 4), (5, 6)], - findall!(re, "a1bbb2") - ); -} - -#[test] -fn many_sequential_zero_length_match() { - let re = regex!(r"[0-9]?"); - assert_eq!( - vec![(0, 0), (1, 2), (2, 3), (4, 5), (6, 6)], - findall!(re, "a12b3c") - ); -} - -#[test] -fn quoted_bracket_set() { - let re = regex!(r"([\x{5b}\x{5d}])"); - assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]")); - let re = regex!(r"([\[\]])"); - assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]")); -} - -#[test] -fn first_range_starts_with_left_bracket() { - let re = regex!(r"([\[-z])"); - assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]")); -} - -#[test] -fn range_ends_with_escape() { - let re = regex!(r"([\[-\x{5d}])"); - assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]")); -} - -#[test] -fn empty_match_find_iter() { - let re = regex!(r".*?"); - assert_eq!(vec![(0, 0), (1, 1), (2, 2), (3, 3)], findall!(re, "abc")); -} - -#[test] -fn empty_match_captures_iter() { - let re = regex!(r".*?"); - let ms: Vec<_> = re - .captures_iter(text!("abc")) - .map(|c| c.get(0).unwrap()) - .map(|m| (m.start(), m.end())) - .collect(); - assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]); -} - -#[test] -fn capture_names() { - let re = regex!(r"(.)(?P<a>.)"); - assert_eq!(3, re.captures_len()); - assert_eq!((3, Some(3)), re.capture_names().size_hint()); - assert_eq!( - vec![None, None, Some("a")], - re.capture_names().collect::<Vec<_>>() - ); -} - -#[test] -fn regex_string() { - assert_eq!(r"[a-zA-Z0-9]+", regex!(r"[a-zA-Z0-9]+").as_str()); - assert_eq!(r"[a-zA-Z0-9]+", &format!("{}", regex!(r"[a-zA-Z0-9]+"))); - assert_eq!(r"[a-zA-Z0-9]+", &format!("{:?}", regex!(r"[a-zA-Z0-9]+"))); -} - -#[test] -fn capture_index() { - let re = regex!(r"^(?P<name>.+)$"); - let cap = re.captures(t!("abc")).unwrap(); - assert_eq!(&cap[0], t!("abc")); - assert_eq!(&cap[1], t!("abc")); - assert_eq!(&cap["name"], t!("abc")); -} - -#[test] -#[should_panic] -#[cfg_attr(all(target_env = "msvc", target_pointer_width = "32"), ignore)] -fn capture_index_panic_usize() { - let re = regex!(r"^(?P<name>.+)$"); - let cap = re.captures(t!("abc")).unwrap(); - let _ = cap[2]; -} - -#[test] -#[should_panic] -#[cfg_attr(all(target_env = "msvc", target_pointer_width = "32"), ignore)] -fn capture_index_panic_name() { - let re = regex!(r"^(?P<name>.+)$"); - let cap = re.captures(t!("abc")).unwrap(); - let _ = cap["bad name"]; -} - -#[test] -fn capture_index_lifetime() { - // This is a test of whether the types on `caps["..."]` are general - // enough. If not, this will fail to typecheck. - fn inner(s: &str) -> usize { - let re = regex!(r"(?P<number>[0-9]+)"); - let caps = re.captures(t!(s)).unwrap(); - caps["number"].len() - } - assert_eq!(3, inner("123")); -} - -#[test] -fn capture_misc() { - let re = regex!(r"(.)(?P<a>a)?(.)(?P<b>.)"); - let cap = re.captures(t!("abc")).unwrap(); - - assert_eq!(5, cap.len()); - - assert_eq!((0, 3), { - let m = cap.get(0).unwrap(); - (m.start(), m.end()) - }); - assert_eq!(None, cap.get(2)); - assert_eq!((2, 3), { - let m = cap.get(4).unwrap(); - (m.start(), m.end()) - }); - - assert_eq!(t!("abc"), match_text!(cap.get(0).unwrap())); - assert_eq!(None, cap.get(2)); - assert_eq!(t!("c"), match_text!(cap.get(4).unwrap())); - - assert_eq!(None, cap.name("a")); - assert_eq!(t!("c"), match_text!(cap.name("b").unwrap())); -} - -#[test] -fn sub_capture_matches() { - let re = regex!(r"([a-z])(([a-z])|([0-9]))"); - let cap = re.captures(t!("a5")).unwrap(); - let subs: Vec<_> = cap.iter().collect(); - - assert_eq!(5, subs.len()); - assert!(subs[0].is_some()); - assert!(subs[1].is_some()); - assert!(subs[2].is_some()); - assert!(subs[3].is_none()); - assert!(subs[4].is_some()); - - assert_eq!(t!("a5"), match_text!(subs[0].unwrap())); - assert_eq!(t!("a"), match_text!(subs[1].unwrap())); - assert_eq!(t!("5"), match_text!(subs[2].unwrap())); - assert_eq!(t!("5"), match_text!(subs[4].unwrap())); -} - -expand!(expand1, r"(?-u)(?P<foo>\w+)", "abc", "$foo", "abc"); -expand!(expand2, r"(?-u)(?P<foo>\w+)", "abc", "$0", "abc"); -expand!(expand3, r"(?-u)(?P<foo>\w+)", "abc", "$1", "abc"); -expand!(expand4, r"(?-u)(?P<foo>\w+)", "abc", "$$1", "$1"); -expand!(expand5, r"(?-u)(?P<foo>\w+)", "abc", "$$foo", "$foo"); -expand!(expand6, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "$b$a", "123abc"); -expand!(expand7, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "z$bz$az", "z"); -expand!( - expand8, - r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", - "abc 123", - ".$b.$a.", - ".123.abc." -); -expand!( - expand9, - r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", - "abc 123", - " $b $a ", - " 123 abc " -); -expand!(expand10, r"(?-u)(?P<a>\w+)\s+(?P<b>\d+)", "abc 123", "$bz$az", ""); - -expand!(expand_name1, r"%(?P<Z>[a-z]+)", "%abc", "$Z%", "abc%"); -expand!(expand_name2, r"\[(?P<Z>[a-z]+)", "[abc", "$Z[", "abc["); -expand!(expand_name3, r"\{(?P<Z>[a-z]+)", "{abc", "$Z{", "abc{"); -expand!(expand_name4, r"\}(?P<Z>[a-z]+)", "}abc", "$Z}", "abc}"); -expand!(expand_name5, r"%([a-z]+)", "%abc", "$1a%", "%"); -expand!(expand_name6, r"%([a-z]+)", "%abc", "${1}a%", "abca%"); -expand!(expand_name7, r"\[(?P<Z[>[a-z]+)", "[abc", "${Z[}[", "abc["); -expand!(expand_name8, r"\[(?P<Z[>[a-z]+)", "[abc", "${foo}[", "["); -expand!(expand_name9, r"\[(?P<Z[>[a-z]+)", "[abc", "${1a}[", "["); -expand!(expand_name10, r"\[(?P<Z[>[a-z]+)", "[abc", "${#}[", "["); -expand!(expand_name11, r"\[(?P<Z[>[a-z]+)", "[abc", "${$$}[", "["); - -split!( - split1, - r"(?-u)\s+", - "a b\nc\td\n\t e", - &[t!("a"), t!("b"), t!("c"), t!("d"), t!("e")] -); -split!( - split2, - r"(?-u)\b", - "a b c", - &[t!(""), t!("a"), t!(" "), t!("b"), t!(" "), t!("c"), t!("")] -); -split!(split3, r"a$", "a", &[t!(""), t!("")]); -split!(split_none, r"-", r"a", &[t!("a")]); -split!(split_trailing_blank, r"-", r"a-", &[t!("a"), t!("")]); -split!(split_trailing_blanks, r"-", r"a--", &[t!("a"), t!(""), t!("")]); -split!(split_empty, r"-", r"", &[t!("")]); - -splitn!(splitn_below_limit, r"-", r"a", 2, &[t!("a")]); -splitn!(splitn_at_limit, r"-", r"a-b", 2, &[t!("a"), t!("b")]); -splitn!(splitn_above_limit, r"-", r"a-b-c", 2, &[t!("a"), t!("b-c")]); -splitn!(splitn_zero_limit, r"-", r"a-b", 0, empty_vec!()); -splitn!(splitn_trailing_blank, r"-", r"a-", 2, &[t!("a"), t!("")]); -splitn!(splitn_trailing_separator, r"-", r"a--", 2, &[t!("a"), t!("-")]); -splitn!(splitn_empty, r"-", r"", 1, &[t!("")]); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/api_str.rs b/collector/compile-benchmarks/regex-1.5.5/tests/api_str.rs deleted file mode 100644 index 480116da7..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/api_str.rs +++ /dev/null @@ -1,34 +0,0 @@ -// These tests don't really make sense with the bytes API, so we only test them -// on the Unicode API. - -#[test] -fn empty_match_unicode_find_iter() { - // Tests that we still yield byte ranges at valid UTF-8 sequence boundaries - // even when we're susceptible to empty width matches. - let re = regex!(r".*?"); - assert_eq!( - vec![(0, 0), (3, 3), (4, 4), (7, 7), (8, 8)], - findall!(re, "Ⅰ1Ⅱ2") - ); -} - -#[test] -fn empty_match_unicode_captures_iter() { - // Same as empty_match_unicode_find_iter, but tests capture iteration. - let re = regex!(r".*?"); - let ms: Vec<_> = re - .captures_iter(text!("Ⅰ1Ⅱ2")) - .map(|c| c.get(0).unwrap()) - .map(|m| (m.start(), m.end())) - .collect(); - assert_eq!(vec![(0, 0), (3, 3), (4, 4), (7, 7), (8, 8)], ms); -} - -#[test] -fn match_as_str() { - let re = regex!(r"fo+"); - let caps = re.captures("barfoobar").unwrap(); - assert_eq!(caps.get(0).map(|m| m.as_str()), Some("foo")); - assert_eq!(caps.get(0).map(From::from), Some("foo")); - assert_eq!(caps.get(0).map(Into::into), Some("foo")); -} diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/bytes.rs b/collector/compile-benchmarks/regex-1.5.5/tests/bytes.rs deleted file mode 100644 index d05f138ed..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/bytes.rs +++ /dev/null @@ -1,107 +0,0 @@ -// These are tests specifically crafted for regexes that can match arbitrary -// bytes. - -// A silly wrapper to make it possible to write and match raw bytes. -struct R<'a>(&'a [u8]); -impl<'a> R<'a> { - fn as_bytes(&self) -> &'a [u8] { - self.0 - } -} - -mat!(word_boundary, r"(?-u) \b", " δ", None); -#[cfg(feature = "unicode-perl")] -mat!(word_boundary_unicode, r" \b", " δ", Some((0, 1))); -mat!(word_not_boundary, r"(?-u) \B", " δ", Some((0, 1))); -#[cfg(feature = "unicode-perl")] -mat!(word_not_boundary_unicode, r" \B", " δ", None); - -mat!(perl_w_ascii, r"(?-u)\w+", "aδ", Some((0, 1))); -#[cfg(feature = "unicode-perl")] -mat!(perl_w_unicode, r"\w+", "aδ", Some((0, 3))); -mat!(perl_d_ascii, r"(?-u)\d+", "1२३9", Some((0, 1))); -#[cfg(feature = "unicode-perl")] -mat!(perl_d_unicode, r"\d+", "1२३9", Some((0, 8))); -mat!(perl_s_ascii, r"(?-u)\s+", " \u{1680}", Some((0, 1))); -#[cfg(feature = "unicode-perl")] -mat!(perl_s_unicode, r"\s+", " \u{1680}", Some((0, 4))); - -// The first `(.+)` matches two Unicode codepoints, but can't match the 5th -// byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and -// matches. -mat!( - mixed1, - r"(.+)(?-u)(.+)", - R(b"\xCE\x93\xCE\x94\xFF"), - Some((0, 5)), - Some((0, 4)), - Some((4, 5)) -); - -mat!(case_ascii_one, r"(?i-u)a", "A", Some((0, 1))); -mat!(case_ascii_class, r"(?i-u)[a-z]+", "AaAaA", Some((0, 5))); -#[cfg(feature = "unicode-case")] -mat!(case_unicode, r"(?i)[a-z]+", "aA\u{212A}aA", Some((0, 7))); -mat!(case_not_unicode, r"(?i-u)[a-z]+", "aA\u{212A}aA", Some((0, 2))); - -mat!(negate_unicode, r"[^a]", "δ", Some((0, 2))); -mat!(negate_not_unicode, r"(?-u)[^a]", "δ", Some((0, 1))); - -// This doesn't match in a normal Unicode regex because the implicit preceding -// `.*?` is Unicode aware. -mat!(dotstar_prefix_not_unicode1, r"(?-u)a", R(b"\xFFa"), Some((1, 2))); -mat!(dotstar_prefix_not_unicode2, r"a", R(b"\xFFa"), Some((1, 2))); - -// Have fun with null bytes. -mat!( - null_bytes, - r"(?-u)(?P<cstr>[^\x00]+)\x00", - R(b"foo\x00"), - Some((0, 4)), - Some((0, 3)) -); - -// Test that lookahead operators work properly in the face of invalid UTF-8. -// See: https://github.com/rust-lang/regex/issues/277 -matiter!( - invalidutf8_anchor1, - r"(?-u)\xcc?^", - R(b"\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4"), - (0, 0) -); -matiter!( - invalidutf8_anchor2, - r"(?-u)^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$", - R(b"\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4"), - (22, 22) -); -matiter!( - invalidutf8_anchor3, - r"(?-u)^|ddp\xff\xffdddddlQd@\x80", - R(b"\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4"), - (0, 0) -); - -// See https://github.com/rust-lang/regex/issues/303 -#[test] -fn negated_full_byte_range() { - assert!(::regex::bytes::Regex::new(r#"(?-u)[^\x00-\xff]"#).is_err()); -} - -matiter!(word_boundary_ascii1, r"(?-u:\B)x(?-u:\B)", "áxβ"); -matiter!( - word_boundary_ascii2, - r"(?-u:\B)", - "0\u{7EF5E}", - (2, 2), - (3, 3), - (4, 4), - (5, 5) -); - -// See: https://github.com/rust-lang/regex/issues/264 -mat!(ascii_boundary_no_capture, r"(?-u)\B", "\u{28f3e}", Some((0, 0))); -mat!(ascii_boundary_capture, r"(?-u)(\B)", "\u{28f3e}", Some((0, 0))); - -// See: https://github.com/rust-lang/regex/issues/271 -mat!(end_not_wb, r"$(?-u:\B)", "\u{5c124}\u{b576c}", Some((8, 8))); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/consistent.rs b/collector/compile-benchmarks/regex-1.5.5/tests/consistent.rs deleted file mode 100644 index 722f2a51a..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/consistent.rs +++ /dev/null @@ -1,238 +0,0 @@ -use regex::internal::ExecBuilder; - -/// Given a regex, check if all of the backends produce the same -/// results on a number of different inputs. -/// -/// For now this just throws quickcheck at the problem, which -/// is not very good because it only really tests half of the -/// problem space. It is pretty unlikely that a random string -/// will match any given regex, so this will probably just -/// be checking that the different backends fail in the same -/// way. This is still worthwhile to test, but is definitely not -/// the whole story. -/// -/// TODO(ethan): In order to cover the other half of the problem -/// space, we should generate a random matching string by inspecting -/// the AST of the input regex. The right way to do this probably -/// involves adding a custom Arbitrary instance around a couple -/// of newtypes. That way we can respect the quickcheck size hinting -/// and shrinking and whatnot. -pub fn backends_are_consistent(re: &str) -> Result<u64, String> { - let standard_backends = vec![ - ( - "bounded_backtracking_re", - ExecBuilder::new(re) - .bounded_backtracking() - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "pikevm_re", - ExecBuilder::new(re) - .nfa() - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "default_re", - ExecBuilder::new(re) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ]; - - let utf8bytes_backends = vec![ - ( - "bounded_backtracking_utf8bytes_re", - ExecBuilder::new(re) - .bounded_backtracking() - .bytes(true) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "pikevm_utf8bytes_re", - ExecBuilder::new(re) - .nfa() - .bytes(true) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "default_utf8bytes_re", - ExecBuilder::new(re) - .bytes(true) - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err))?, - ), - ]; - - let bytes_backends = vec![ - ( - "bounded_backtracking_bytes_re", - ExecBuilder::new(re) - .bounded_backtracking() - .only_utf8(false) - .build() - .map(|exec| exec.into_byte_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "pikevm_bytes_re", - ExecBuilder::new(re) - .nfa() - .only_utf8(false) - .build() - .map(|exec| exec.into_byte_regex()) - .map_err(|err| format!("{}", err))?, - ), - ( - "default_bytes_re", - ExecBuilder::new(re) - .only_utf8(false) - .build() - .map(|exec| exec.into_byte_regex()) - .map_err(|err| format!("{}", err))?, - ), - ]; - - Ok(string_checker::check_backends(&standard_backends)? - + string_checker::check_backends(&utf8bytes_backends)? - + bytes_checker::check_backends(&bytes_backends)?) -} - -// -// A consistency checker parameterized by the input type (&str or &[u8]). -// - -macro_rules! checker { - ($module_name:ident, $regex_type:path, $mk_input:expr) => { - mod $module_name { - use quickcheck; - use quickcheck::{Arbitrary, TestResult}; - - pub fn check_backends( - backends: &[(&str, $regex_type)], - ) -> Result<u64, String> { - let mut total_passed = 0; - for regex in backends[1..].iter() { - total_passed += quickcheck_regex_eq(&backends[0], regex)?; - } - - Ok(total_passed) - } - - fn quickcheck_regex_eq( - &(name1, ref re1): &(&str, $regex_type), - &(name2, ref re2): &(&str, $regex_type), - ) -> Result<u64, String> { - quickcheck::QuickCheck::new() - .quicktest(RegexEqualityTest::new( - re1.clone(), - re2.clone(), - )) - .map_err(|err| { - format!( - "{}(/{}/) and {}(/{}/) are inconsistent.\ - QuickCheck Err: {:?}", - name1, re1, name2, re2, err - ) - }) - } - - struct RegexEqualityTest { - re1: $regex_type, - re2: $regex_type, - } - impl RegexEqualityTest { - fn new(re1: $regex_type, re2: $regex_type) -> Self { - RegexEqualityTest { re1: re1, re2: re2 } - } - } - - impl quickcheck::Testable for RegexEqualityTest { - fn result(&self, gen: &mut quickcheck::Gen) -> TestResult { - let input = $mk_input(gen); - let input = &input; - - if self.re1.find(&input) != self.re2.find(input) { - return TestResult::error(format!( - "find mismatch input={:?}", - input - )); - } - - let cap1 = self.re1.captures(input); - let cap2 = self.re2.captures(input); - match (cap1, cap2) { - (None, None) => {} - (Some(cap1), Some(cap2)) => { - for (c1, c2) in cap1.iter().zip(cap2.iter()) { - if c1 != c2 { - return TestResult::error(format!( - "captures mismatch input={:?}", - input - )); - } - } - } - _ => { - return TestResult::error(format!( - "captures mismatch input={:?}", - input - )) - } - } - - let fi1 = self.re1.find_iter(input); - let fi2 = self.re2.find_iter(input); - for (m1, m2) in fi1.zip(fi2) { - if m1 != m2 { - return TestResult::error(format!( - "find_iter mismatch input={:?}", - input - )); - } - } - - let ci1 = self.re1.captures_iter(input); - let ci2 = self.re2.captures_iter(input); - for (cap1, cap2) in ci1.zip(ci2) { - for (c1, c2) in cap1.iter().zip(cap2.iter()) { - if c1 != c2 { - return TestResult::error(format!( - "captures_iter mismatch input={:?}", - input - )); - } - } - } - - let s1 = self.re1.split(input); - let s2 = self.re2.split(input); - for (chunk1, chunk2) in s1.zip(s2) { - if chunk1 != chunk2 { - return TestResult::error(format!( - "split mismatch input={:?}", - input - )); - } - } - - TestResult::from_bool(true) - } - } - } // mod - }; // rule case -} // macro_rules! - -checker!(string_checker, ::regex::Regex, |gen| String::arbitrary(gen)); -checker!(bytes_checker, ::regex::bytes::Regex, |gen| Vec::<u8>::arbitrary( - gen -)); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/crates_regex.rs b/collector/compile-benchmarks/regex-1.5.5/tests/crates_regex.rs deleted file mode 100644 index 200ec27b2..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/crates_regex.rs +++ /dev/null @@ -1,3287 +0,0 @@ -// DO NOT EDIT. Automatically generated by 'scripts/scrape_crates_io.py' -// on 2018-06-20 09:56:32.820354. - -// autoshutdown-0.1.0: r"\s*(\d+)(\w)\s*" -consistent!(autoshutdown_0, r"\s*(\d+)(\w)\s*"); - -// epub-1.1.1: r"/" -consistent!(epub_0, r"/"); - -// rpi-info-0.2.0: "^Revision\t+: ([0-9a-fA-F]+)" -consistent!(rpi_info_0, "^Revision\t+: ([0-9a-fA-F]+)"); - -// rpi-info-0.2.0: "Serial\t+: ([0-9a-fA-F]+)" -consistent!(rpi_info_1, "Serial\t+: ([0-9a-fA-F]+)"); - -// pnet_macros-0.21.0: r"^u([0-9]+)(be|le|he)?$" -consistent!(pnet_macros_0, r"^u([0-9]+)(be|le|he)?$"); - -// iban_validate-1.0.3: r"^[A-Z]{2}\d{2}[A-Z\d]{1,30}$" -consistent!(iban_validate_0, r"^[A-Z]{2}\d{2}[A-Z\d]{1,30}$"); - -// markifier-0.1.0: r".*\[(?P<percent>.+)%.*\].*" -consistent!(markifier_0, r".*\[(?P<percent>.+)%.*\].*"); - -// mallumo-0.3.0: r"(#include) (\S*)(.*)" -consistent!(mallumo_0, r"(#include) (\S*)(.*)"); - -// mallumo-0.3.0: r"(ERROR: \d+:)(\d+)(: )(.+)" -consistent!(mallumo_1, r"(ERROR: \d+:)(\d+)(: )(.+)"); - -// mallumo-0.3.0: r"(\d+\()(\d+)(?:\) : )(.+)" -consistent!(mallumo_2, r"(\d+\()(\d+)(?:\) : )(.+)"); - -// magnet_more-0.0.1: r"(.+?)(\[.*?\])?" -consistent!(magnet_more_0, r"(.+?)(\[.*?\])?"); - -// magnet_app-0.0.1: r":(?P<k>[a-zA-Z_]+)" -consistent!(magnet_app_0, r":(?P<k>[a-zA-Z_]+)"); - -// yubibomb-0.2.0: r"^\d{6}(?:\s*,\s*\d{6})*$" -consistent!(yubibomb_0, r"^\d{6}(?:\s*,\s*\d{6})*$"); - -// multirust-rs-0.0.4: r"[\\/]([^\\/?]+)(\?.*)?$" -consistent!(multirust_rs_0, r"[\\/]([^\\/?]+)(\?.*)?$"); - -// hueclient-0.3.2: "\"[a-z]*\":null" -consistent!(hueclient_0, "\"[a-z]*\":null"); - -// hueclient-0.3.2: ",+" -consistent!(hueclient_1, ",+"); - -// hueclient-0.3.2: ",\\}" -consistent!(hueclient_2, ",\\}"); - -// hueclient-0.3.2: "\\{," -consistent!(hueclient_3, "\\{,"); - -// aerial-0.1.0: r"[a-zA-Z_\$][a-zA-Z_0-9]*" -consistent!(aerial_0, r"[a-zA-Z_\$][a-zA-Z_0-9]*"); - -// aerial-0.1.0: r"thi[sng]+" -consistent!(aerial_1, r"thi[sng]+"); - -// rvue-0.1.0: r"(.+)\s+\((.+?)\)" -consistent!(rvue_0, r"(.+)\s+\((.+?)\)"); - -// rvue-0.1.0: r"([\d\.]+)\s*out\s*of\s*([\d\.]+)" -consistent!(rvue_1, r"([\d\.]+)\s*out\s*of\s*([\d\.]+)"); - -// rvue-0.1.0: r"^([\d\.]+)\s*(?:\(\))?$" -consistent!(rvue_2, r"^([\d\.]+)\s*(?:\(\))?$"); - -// rvue-0.1.0: r"([\d\.]+)\s*Points\s*Possible" -consistent!(rvue_3, r"([\d\.]+)\s*Points\s*Possible"); - -// rvue-0.1.0: r"([\d\.]+)\s*/\s*([\d\.]+)" -consistent!(rvue_4, r"([\d\.]+)\s*/\s*([\d\.]+)"); - -// rvsim-0.1.0: r"_?([_a-z0-9]+)\s*:\s*([_a-z0-9]+)\s*[,)]" -consistent!(rvsim_0, r"_?([_a-z0-9]+)\s*:\s*([_a-z0-9]+)\s*[,)]"); - -// nereon-0.1.4: "(.*[^\\\\])\\{\\}(.*)" -consistent!(nereon_0, "(.*[^\\\\])\\{\\}(.*)"); - -// next_episode-0.3.0: r"((?i)^(.+).s(\d+)e(\d+).*)$" -consistent!(next_episode_0, r"((?i)^(.+).s(\d+)e(\d+).*)$"); - -// migrant_lib-0.19.2: r"[^a-z0-9-]+" -consistent!(migrant_lib_0, r"[^a-z0-9-]+"); - -// migrant_lib-0.19.2: r"[0-9]{14}_[a-z0-9-]+" -consistent!(migrant_lib_1, r"[0-9]{14}_[a-z0-9-]+"); - -// migrant_lib-0.19.2: r"([0-9]{14}_)?[a-z0-9-]+" -consistent!(migrant_lib_2, r"([0-9]{14}_)?[a-z0-9-]+"); - -// minipre-0.2.0: "$_" -consistent!(minipre_0, "$_"); - -// minifier-0.0.13: r">\s+<" -consistent!(minifier_0, r">\s+<"); - -// minifier-0.0.13: r"\s{2,}|[\r\n]" -consistent!(minifier_1, r"\s{2,}|[\r\n]"); - -// minifier-0.0.13: r"<(style|script)[\w|\s].*?>" -consistent!(minifier_2, r"<(style|script)[\w|\s].*?>"); - -// minifier-0.0.13: "<!--(.|\n)*?-->" -consistent!(minifier_3, "<!--(.|\n)*?-->"); - -// minifier-0.0.13: r"<\w.*?>" -consistent!(minifier_4, r"<\w.*?>"); - -// minifier-0.0.13: r" \s+|\s +" -consistent!(minifier_5, r" \s+|\s +"); - -// minifier-0.0.13: r"\w\s+\w" -consistent!(minifier_6, r"\w\s+\w"); - -// minifier-0.0.13: r"'\s+>" -consistent!(minifier_7, r"'\s+>"); - -// minifier-0.0.13: r"\d\s+>" -consistent!(minifier_8, r"\d\s+>"); - -// ggp-rs-0.1.2: r"(?P<relation>\([^)]+\))|(?P<prop>[a-zA-Z0-9_]+)" -consistent!(ggp_rs_0, r"(?P<relation>\([^)]+\))|(?P<prop>[a-zA-Z0-9_]+)"); - -// ggp-rs-0.1.2: r"\((.*)\)." -consistent!(ggp_rs_1, r"\((.*)\)."); - -// poe-superfilter-0.2.0: "[A-Za-z0-9_]" -consistent!(poe_superfilter_0, "[A-Za-z0-9_]"); - -// poke-a-mango-0.5.0: r"(\d+)x(\d+)" -consistent!(poke_a_mango_0, r"(\d+)x(\d+)"); - -// pop3-rs-0.1.0: r"(?P<nmsg>\d+) (?P<size>\d+)" -consistent!(pop3_rs_0, r"(?P<nmsg>\d+) (?P<size>\d+)"); - -// pop3-rs-0.1.0: r"(?P<msgid>\d+) (?P<uidl>[\x21-\x7E]{1,70})" -consistent!(pop3_rs_1, r"(?P<msgid>\d+) (?P<uidl>[\x21-\x7E]{1,70})"); - -// pop3-rs-0.1.0: r"(<.*>)\r\n$" -consistent!(pop3_rs_2, r"(<.*>)\r\n$"); - -// pop3-rs-0.1.0: r"^(?P<status>\+OK|-ERR) (?P<statustext>.*)" -consistent!(pop3_rs_3, r"^(?P<status>\+OK|-ERR) (?P<statustext>.*)"); - -// pop3-1.0.6: r"^\.\r\n$" -consistent!(pop3_0, r"^\.\r\n$"); - -// pop3-1.0.6: r"\+OK(.*)" -consistent!(pop3_1, r"\+OK(.*)"); - -// pop3-1.0.6: r"-ERR(.*)" -consistent!(pop3_2, r"-ERR(.*)"); - -// pop3-1.0.6: r"\+OK (\d+) (\d+)\r\n" -consistent!(pop3_3, r"\+OK (\d+) (\d+)\r\n"); - -// pop3-1.0.6: r"(\d+) ([\x21-\x7e]+)\r\n" -consistent!(pop3_4, r"(\d+) ([\x21-\x7e]+)\r\n"); - -// pop3-1.0.6: r"\+OK (\d+) ([\x21-\x7e]+)\r\n" -consistent!(pop3_5, r"\+OK (\d+) ([\x21-\x7e]+)\r\n"); - -// pop3-1.0.6: r"(\d+) (\d+)\r\n" -consistent!(pop3_6, r"(\d+) (\d+)\r\n"); - -// pop3-1.0.6: r"\+OK (\d+) (\d+)\r\n" -consistent!(pop3_7, r"\+OK (\d+) (\d+)\r\n"); - -// polk-1.1.3: "github:(\\w+)/?(\\w+)?" -consistent!(polk_0, "github:(\\w+)/?(\\w+)?"); - -// geochunk-0.1.5: "^[0-9]{5}" -consistent!(geochunk_0, "^[0-9]{5}"); - -// generic-dns-update-1.1.4: r"((?:(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?)\.){3}(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?))" -consistent!(generic_dns_update_0, r"((?:(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?)\.){3}(?:0|1[\d]{0,2}|2(?:[0-4]\d?|5[0-5]?|[6-9])?|[3-9]\d?))"); - -// generic-dns-update-1.1.4: r"((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(([0-9A-Fa-f]{1,4}:){0,5}:((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(::([0-9A-Fa-f]{1,4}:){0,5}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))" -consistent!(generic_dns_update_1, r"((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(([0-9A-Fa-f]{1,4}:){0,5}:((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|(::([0-9A-Fa-f]{1,4}:){0,5}((\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d)\.){3}(\d((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\d))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))"); - -// generic-dns-update-1.1.4: r"<value><string>([0-9.]*)</string></value>" -consistent!( - generic_dns_update_2, - r"<value><string>([0-9.]*)</string></value>" -); - -// generic-dns-update-1.1.4: r"<int>([0-9]+)</int>" -consistent!(generic_dns_update_3, r"<int>([0-9]+)</int>"); - -// generic-dns-update-1.1.4: r"<int>([0-9]+)</int>" -consistent!(generic_dns_update_4, r"<int>([0-9]+)</int>"); - -// generic-dns-update-1.1.4: r"<boolean>([0-1]*)</boolean>" -consistent!(generic_dns_update_5, r"<boolean>([0-1]*)</boolean>"); - -// generate-nix-pkg-0.3.0: r"(\d*)\.(\d*)\.(\d*)(-(\S*))?" -consistent!(generate_nix_pkg_0, r"(\d*)\.(\d*)\.(\d*)(-(\S*))?"); - -// generate-nix-pkg-0.3.0: r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?" -consistent!(generate_nix_pkg_1, r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?"); - -// genact-0.6.0: r"arch/([a-z0-9_])+/" -consistent!(genact_0, r"arch/([a-z0-9_])+/"); - -// genact-0.6.0: r"arch/([a-z0-9_])+/" -consistent!(genact_1, r"arch/([a-z0-9_])+/"); - -// cron_rs-0.1.6: r"^\s*((\*(/\d+)?)|[0-9-,/]+)(\s+((\*(/\d+)?)|[0-9-,/]+)){4,5}\s*$" -consistent!( - cron_rs_0, - r"^\s*((\*(/\d+)?)|[0-9-,/]+)(\s+((\*(/\d+)?)|[0-9-,/]+)){4,5}\s*$" -); - -// systemfd-0.3.0: r"^([a-zA-Z]+)::(.+)$" -consistent!(systemfd_0, r"^([a-zA-Z]+)::(.+)$"); - -// symbolic-debuginfo-5.0.2: "__?hidden#\\d+_" -consistent!(symbolic_debuginfo_0, "__?hidden#\\d+_"); - -// symbolic-minidump-5.0.2: r"^Linux ([^ ]+) (.*) \w+(?: GNU/Linux)?$" -consistent!(symbolic_minidump_0, r"^Linux ([^ ]+) (.*) \w+(?: GNU/Linux)?$"); - -// graphql-idl-parser-0.1.1: "^(?u:\\#)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+" -consistent!(graphql_idl_parser_0, "^(?u:\\#)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+"); - -// graphql-idl-parser-0.1.1: "^(?u:=)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+" -consistent!(graphql_idl_parser_1, "^(?u:=)(?u:[\t-\r - \u{85}-\u{85}\u{a0}-\u{a0}\u{1680}-\u{1680}\u{2000}-\u{200a}\u{2028}-\u{2029}\u{202f}-\u{202f}\u{205f}-\u{205f}\u{3000}-\u{3000}])*(?u:.)+"); - -// graphql-idl-parser-0.1.1: "^(?u:[A-Z_-_a-z])(?u:[0-9A-Z_-_a-z])*" -consistent!(graphql_idl_parser_2, "^(?u:[A-Z_-_a-z])(?u:[0-9A-Z_-_a-z])*"); - -// graphql-idl-parser-0.1.1: "^(?u:!)" -consistent!(graphql_idl_parser_3, "^(?u:!)"); - -// graphql-idl-parser-0.1.1: "^(?u:\\()" -consistent!(graphql_idl_parser_4, "^(?u:\\()"); - -// graphql-idl-parser-0.1.1: "^(?u:\\))" -consistent!(graphql_idl_parser_5, "^(?u:\\))"); - -// graphql-idl-parser-0.1.1: "^(?u:,)" -consistent!(graphql_idl_parser_6, "^(?u:,)"); - -// graphql-idl-parser-0.1.1: "^(?u::)" -consistent!(graphql_idl_parser_7, "^(?u::)"); - -// graphql-idl-parser-0.1.1: "^(?u:@)" -consistent!(graphql_idl_parser_8, "^(?u:@)"); - -// graphql-idl-parser-0.1.1: "^(?u:\\[)" -consistent!(graphql_idl_parser_9, "^(?u:\\[)"); - -// graphql-idl-parser-0.1.1: "^(?u:\\])" -consistent!(graphql_idl_parser_10, "^(?u:\\])"); - -// graphql-idl-parser-0.1.1: "^(?u:enum)" -consistent!(graphql_idl_parser_11, "^(?u:enum)"); - -// graphql-idl-parser-0.1.1: "^(?u:implements)" -consistent!(graphql_idl_parser_12, "^(?u:implements)"); - -// graphql-idl-parser-0.1.1: "^(?u:input)" -consistent!(graphql_idl_parser_13, "^(?u:input)"); - -// graphql-idl-parser-0.1.1: "^(?u:interface)" -consistent!(graphql_idl_parser_14, "^(?u:interface)"); - -// graphql-idl-parser-0.1.1: "^(?u:scalar)" -consistent!(graphql_idl_parser_15, "^(?u:scalar)"); - -// graphql-idl-parser-0.1.1: "^(?u:type)" -consistent!(graphql_idl_parser_16, "^(?u:type)"); - -// graphql-idl-parser-0.1.1: "^(?u:union)" -consistent!(graphql_idl_parser_17, "^(?u:union)"); - -// graphql-idl-parser-0.1.1: "^(?u:\\{)" -consistent!(graphql_idl_parser_18, "^(?u:\\{)"); - -// graphql-idl-parser-0.1.1: "^(?u:\\})" -consistent!(graphql_idl_parser_19, "^(?u:\\})"); - -// grimoire-0.1.0: r"(?s)/\*(?P<config>.*?)\*/" -consistent!(grimoire_0, r"(?s)/\*(?P<config>.*?)\*/"); - -// phonenumber-0.2.0+8.9.0: r"[\d]+(?:[~\x{2053}\x{223C}\x{FF5E}][\d]+)?" -consistent!(phonenumber_0, r"[\d]+(?:[~\x{2053}\x{223C}\x{FF5E}][\d]+)?"); - -// phonenumber-0.2.0+8.9.0: r"[, \[\]]" -consistent!(phonenumber_1, r"[, \[\]]"); - -// phonenumber-0.2.0+8.9.0: r"[\\/] *x" -consistent!(phonenumber_2, r"[\\/] *x"); - -// phonenumber-0.2.0+8.9.0: r"[[\P{N}&&\P{L}]&&[^#]]+$" -consistent!(phonenumber_3, r"[[\P{N}&&\P{L}]&&[^#]]+$"); - -// phonenumber-0.2.0+8.9.0: r"(?:.*?[A-Za-z]){3}.*" -consistent!(phonenumber_4, r"(?:.*?[A-Za-z]){3}.*"); - -// phonenumber-0.2.0+8.9.0: r"(\D+)" -consistent!(phonenumber_5, r"(\D+)"); - -// phonenumber-0.2.0+8.9.0: r"(\$\d)" -consistent!(phonenumber_6, r"(\$\d)"); - -// phonenumber-0.2.0+8.9.0: r"\(?\$1\)?" -consistent!(phonenumber_7, r"\(?\$1\)?"); - -// phone_number-0.1.0: r"\D" -consistent!(phone_number_0, r"\D"); - -// phone_number-0.1.0: r"^0+" -consistent!(phone_number_1, r"^0+"); - -// phone_number-0.1.0: r"^89" -consistent!(phone_number_2, r"^89"); - -// phone_number-0.1.0: r"^8+" -consistent!(phone_number_3, r"^8+"); - -// phile-0.1.4: r"^ *(\^_*\^) *$" -consistent!(phile_0, r"^ *(\^_*\^) *$"); - -// phile-0.1.4: r"^[_\p{XID_Start}]$" -consistent!(phile_1, r"^[_\p{XID_Start}]$"); - -// phile-0.1.4: r"^\p{XID_Continue}$" -consistent!(phile_2, r"^\p{XID_Continue}$"); - -// uritemplate-0.1.2: "%25(?P<hex>[0-9a-fA-F][0-9a-fA-F])" -consistent!(uritemplate_0, "%25(?P<hex>[0-9a-fA-F][0-9a-fA-F])"); - -// urdf-rs-0.4.2: "^package://(\\w+)/" -consistent!(urdf_rs_0, "^package://(\\w+)/"); - -// url-match-0.1.7: r"(?P<key>[?&.])" -consistent!(url_match_0, r"(?P<key>[?&.])"); - -// url-match-0.1.7: r":(?P<key>[a-zA-Z0-9_-]+)" -consistent!(url_match_1, r":(?P<key>[a-zA-Z0-9_-]+)"); - -// tsm-sys-0.1.0: r"hello world" -consistent!(tsm_sys_0, r"hello world"); - -// deb-version-0.1.0: "^(?:(?:(?:\\d+:).+)|(?:[^:]+))$" -consistent!(deb_version_0, "^(?:(?:(?:\\d+:).+)|(?:[^:]+))$"); - -// debcargo-2.1.0: r"^(?i)(a|an|the)\s+" -consistent!(debcargo_0, r"^(?i)(a|an|the)\s+"); - -// debcargo-2.1.0: r"^(?i)(rust\s+)?(implementation|library|tool|crate)\s+(of|to|for)\s+" -consistent!( - debcargo_1, - r"^(?i)(rust\s+)?(implementation|library|tool|crate)\s+(of|to|for)\s+" -); - -// feaders-0.2.0: r"^.*\.h$" -consistent!(feaders_0, r"^.*\.h$"); - -// feaders-0.2.0: r"^.*\.c$" -consistent!(feaders_1, r"^.*\.c$"); - -// feaders-0.2.0: r"^.*\.hpp$" -consistent!(feaders_2, r"^.*\.hpp$"); - -// feaders-0.2.0: r"^.*\.cc$" -consistent!(feaders_3, r"^.*\.cc$"); - -// feaders-0.2.0: r"^.*\.cpp$" -consistent!(feaders_4, r"^.*\.cpp$"); - -// hyperscan-0.1.6: r"CPtr\(\w+\)" -consistent!(hyperscan_0, r"CPtr\(\w+\)"); - -// hyperscan-0.1.6: r"^Version:\s(\d\.\d\.\d)\sFeatures:\s+(\w+)?\sMode:\s(\w+)$" -consistent!( - hyperscan_1, - r"^Version:\s(\d\.\d\.\d)\sFeatures:\s+(\w+)?\sMode:\s(\w+)$" -); - -// hyperscan-0.1.6: r"RawDatabase<Block>\{db: \w+\}" -consistent!(hyperscan_2, r"RawDatabase<Block>\{db: \w+\}"); - -// hyperscan-0.1.6: r"RawSerializedDatabase\{p: \w+, len: \d+\}" -consistent!(hyperscan_3, r"RawSerializedDatabase\{p: \w+, len: \d+\}"); - -// ucd-parse-0.1.1: r"[0-9A-F]+" -consistent!(ucd_parse_0, r"[0-9A-F]+"); - -// afsort-0.2.0: r".*" -consistent!(afsort_0, r".*"); - -// afsort-0.2.0: r".*" -consistent!(afsort_1, r".*"); - -// afsort-0.2.0: r".*" -consistent!(afsort_2, r".*"); - -// afsort-0.2.0: r".*" -consistent!(afsort_3, r".*"); - -// afsort-0.2.0: r".*" -consistent!(afsort_4, r".*"); - -// afsort-0.2.0: r".*" -consistent!(afsort_5, r".*"); - -// afsort-0.2.0: r"^[a-z]+$" -consistent!(afsort_6, r"^[a-z]+$"); - -// afsort-0.2.0: r"^[a-z]+$" -consistent!(afsort_7, r"^[a-z]+$"); - -// tin-summer-1.21.4: r"(\.git|\.pijul|_darcs|\.hg)$" -consistent!(tin_summer_0, r"(\.git|\.pijul|_darcs|\.hg)$"); - -// tin-drummer-1.0.1: r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$" -consistent!(tin_drummer_0, r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"); - -// tin-drummer-1.0.1: r".*?\.(stats|conf|h|out|cache.*|dat|pc|info|\.js)$" -consistent!( - tin_drummer_1, - r".*?\.(stats|conf|h|out|cache.*|dat|pc|info|\.js)$" -); - -// tin-drummer-1.0.1: r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$" -consistent!(tin_drummer_2, r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"); - -// tin-drummer-1.0.1: r".*?\.(stats|conf|h|out|cache.*|\.js)$" -consistent!(tin_drummer_3, r".*?\.(stats|conf|h|out|cache.*|\.js)$"); - -// tin-drummer-1.0.1: r"(\.git|\.pijul|_darcs|\.hg)$" -consistent!(tin_drummer_4, r"(\.git|\.pijul|_darcs|\.hg)$"); - -// tin-drummer-1.0.1: r".*?\.(dyn_o|out|d|hi|dyn_hi|dump-.*|p_hi|p_o|prof|tix)$" -consistent!( - tin_drummer_5, - r".*?\.(dyn_o|out|d|hi|dyn_hi|dump-.*|p_hi|p_o|prof|tix)$" -); - -// tin-drummer-1.0.1: r".*?\.(ibc)$" -consistent!(tin_drummer_6, r".*?\.(ibc)$"); - -// tin-drummer-1.0.1: r"\.stack-work|dist-newstyle" -consistent!(tin_drummer_7, r"\.stack-work|dist-newstyle"); - -// timmy-0.3.0: r"_NET_WM_PID\(CARDINAL\) = (\d+)" -consistent!(timmy_0, r"_NET_WM_PID\(CARDINAL\) = (\d+)"); - -// timmy-0.3.0: r"today|yesterday|now" -consistent!(timmy_1, r"today|yesterday|now"); - -// timmy-0.3.0: r"(?P<day>\d{1,2})/(?P<month>\d{1,2})(/(?P<year>\d{4}|\d{2}))?" -consistent!( - timmy_2, - r"(?P<day>\d{1,2})/(?P<month>\d{1,2})(/(?P<year>\d{4}|\d{2}))?" -); - -// timmy-0.3.0: r"(?P<n>\d+) (days?|ds?)(?P<ago>( ago)?)" -consistent!(timmy_3, r"(?P<n>\d+) (days?|ds?)(?P<ago>( ago)?)"); - -// timmy-0.3.0: r"(?P<hr>\d{2}):(?P<mins>\d{2})" -consistent!(timmy_4, r"(?P<hr>\d{2}):(?P<mins>\d{2})"); - -// tinfo-0.5.0: r"^(\d+): \d+ windows \(.*\) \[\d+x\d+\]( \(attached\))?" -consistent!( - tinfo_0, - r"^(\d+): \d+ windows \(.*\) \[\d+x\d+\]( \(attached\))?" -); - -// tinfo-0.5.0: r"^(\d+):(\d+): (.*) \((\d+) panes\) \[(\d+)x(\d+)\]" -consistent!(tinfo_1, r"^(\d+):(\d+): (.*) \((\d+) panes\) \[(\d+)x(\d+)\]"); - -// timespan-0.0.4: r"(?:\\\{start\\\}|\\\{end\\\})" -consistent!(timespan_0, r"(?:\\\{start\\\}|\\\{end\\\})"); - -// timespan-0.0.4: r"(.*)\s+-\s+(.*)" -consistent!(timespan_1, r"(.*)\s+-\s+(.*)"); - -// timespan-0.0.4: r"(.*)\s+(\w+)$" -consistent!(timespan_2, r"(.*)\s+(\w+)$"); - -// timespan-0.0.4: r"(.*)\s+(\w+)$" -consistent!(timespan_3, r"(.*)\s+(\w+)$"); - -// timespan-0.0.4: r"(.*)\s+-\s+(.*)" -consistent!(timespan_4, r"(.*)\s+-\s+(.*)"); - -// titlecase-0.10.0: r"[[:lower:]]" -consistent!(titlecase_0, r"[[:lower:]]"); - -// tight-0.1.3: r"^\d+ (day|week|month|year)s?$" -consistent!(tight_0, r"^\d+ (day|week|month|year)s?$"); - -// tight-0.1.3: r"^\d+ (day|week|month|year)s?$" -consistent!(tight_1, r"^\d+ (day|week|month|year)s?$"); - -// yaml-0.2.1: r"^[-+]?(0|[1-9][0-9_]*)$" -consistent!(yaml_0, r"^[-+]?(0|[1-9][0-9_]*)$"); - -// yaml-0.2.1: r"^([-+]?)0o?([0-7_]+)$" -consistent!(yaml_1, r"^([-+]?)0o?([0-7_]+)$"); - -// yaml-0.2.1: r"^([-+]?)0x([0-9a-fA-F_]+)$" -consistent!(yaml_2, r"^([-+]?)0x([0-9a-fA-F_]+)$"); - -// yaml-0.2.1: r"^([-+]?)0b([0-1_]+)$" -consistent!(yaml_3, r"^([-+]?)0b([0-1_]+)$"); - -// yaml-0.2.1: r"^([-+]?)(\.[0-9]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?)$" -consistent!( - yaml_4, - r"^([-+]?)(\.[0-9]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?)$" -); - -// yaml-0.2.1: r"^[+]?(\.inf|\.Inf|\.INF)$" -consistent!(yaml_5, r"^[+]?(\.inf|\.Inf|\.INF)$"); - -// yaml-0.2.1: r"^-(\.inf|\.Inf|\.INF)$" -consistent!(yaml_6, r"^-(\.inf|\.Inf|\.INF)$"); - -// yaml-0.2.1: r"^(\.nan|\.NaN|\.NAN)$" -consistent!(yaml_7, r"^(\.nan|\.NaN|\.NAN)$"); - -// yaml-0.2.1: r"^(null|Null|NULL|~)$" -consistent!(yaml_8, r"^(null|Null|NULL|~)$"); - -// yaml-0.2.1: r"^(true|True|TRUE|yes|Yes|YES)$" -consistent!(yaml_9, r"^(true|True|TRUE|yes|Yes|YES)$"); - -// yaml-0.2.1: r"^(false|False|FALSE|no|No|NO)$" -consistent!(yaml_10, r"^(false|False|FALSE|no|No|NO)$"); - -// kefia-0.1.0: r"(?m)^(\S+)/(\S+) (\S+)(?: \((.*)\))?$" -consistent!(kefia_0, r"(?m)^(\S+)/(\S+) (\S+)(?: \((.*)\))?$"); - -// risp-0.7.0: "^(\\s+|;.*?(\n|$))+" -consistent!(risp_0, "^(\\s+|;.*?(\n|$))+"); - -// risp-0.7.0: "^\".*?\"" -consistent!(risp_1, "^\".*?\""); - -// risp-0.7.0: r"^[^\s\{\}()\[\]]+" -consistent!(risp_2, r"^[^\s\{\}()\[\]]+"); - -// risp-0.7.0: r"^-?\d+" -consistent!(risp_3, r"^-?\d+"); - -// ripgrep-0.8.1: "^([0-9]+)([KMG])?$" -consistent!(ripgrep_0, "^([0-9]+)([KMG])?$"); - -// riquid-0.0.1: r"^\w+" -consistent!(riquid_0, r"^\w+"); - -// riquid-0.0.1: r"^\d+" -consistent!(riquid_1, r"^\d+"); - -// recursive_disassembler-2.1.2: r"\A(0x)?([a-fA-F0-9]+)\z" -consistent!(recursive_disassembler_0, r"\A(0x)?([a-fA-F0-9]+)\z"); - -// remake-0.1.0: r"^[a-zA-Z_][a-zA-Z0-9_]*" -consistent!(remake_0, r"^[a-zA-Z_][a-zA-Z0-9_]*"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_0, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_1, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_2, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_3, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_4, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_5, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{2})\)" -consistent!(regex_decode_6, r"'(?P<title>[^']+)'\s+\((?P<year>\d{2})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_7, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" -consistent!(regex_decode_8, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)" -consistent!(regex_decode_9, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)" -consistent!(regex_decode_10, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)" -consistent!(regex_decode_11, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)" -consistent!(regex_decode_12, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"); - -// regex-decode-0.1.0: r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)" -consistent!(regex_decode_13, r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})?\)"); - -// regex-cache-0.2.0: "[0-9]{3}-[0-9]{3}-[0-9]{4}" -consistent!(regex_cache_0, "[0-9]{3}-[0-9]{3}-[0-9]{4}"); - -// regex-cache-0.2.0: r"^\d+$" -consistent!(regex_cache_1, r"^\d+$"); - -// regex-cache-0.2.0: r"^[a-z]+$" -consistent!(regex_cache_2, r"^[a-z]+$"); - -// regex-cache-0.2.0: r"^\d+$" -consistent!(regex_cache_3, r"^\d+$"); - -// regex-cache-0.2.0: r"^\d+$" -consistent!(regex_cache_4, r"^\d+$"); - -// regex_dfa-0.5.0: r"\d{4}-\d{2}-\d{2}" -consistent!(regex_dfa_0, r"\d{4}-\d{2}-\d{2}"); - -// reaper-2.0.0: r"^[0-9\p{L} _\\.]{3,16}$" -consistent!(reaper_0, r"^[0-9\p{L} _\\.]{3,16}$"); - -// retdec-0.1.0: r"^attachment; filename=(.+)$" -consistent!(retdec_0, r"^attachment; filename=(.+)$"); - -// renvsubst-0.1.2: r"(\\)(?P<head>\$[0-9A-Za-z_{])" -consistent!(renvsubst_0, r"(\\)(?P<head>\$[0-9A-Za-z_{])"); - -// renvsubst-0.1.2: r"\$([[:word:]]+)" -consistent!(renvsubst_1, r"\$([[:word:]]+)"); - -// renvsubst-0.1.2: r"\$\{([[:word:]]+)\}" -consistent!(renvsubst_2, r"\$\{([[:word:]]+)\}"); - -// rexpect-0.3.0: r"'[a-z]+'" -consistent!(rexpect_0, r"'[a-z]+'"); - -// rexpect-0.3.0: r"^\d{4}-\d{2}-\d{2}$" -consistent!(rexpect_1, r"^\d{4}-\d{2}-\d{2}$"); - -// rexpect-0.3.0: r"-\d{2}-" -consistent!(rexpect_2, r"-\d{2}-"); - -// luther-0.1.0: "^a(b|c)c*$" -consistent!(luther_0, "^a(b|c)c*$"); - -// little_boxes-1.6.0: r"(\x9B|\x1B\[)[0-?]*[ -/]*[@-~]" -consistent!(little_boxes_0, r"(\x9B|\x1B\[)[0-?]*[ -/]*[@-~]"); - -// libimagentrytag-0.8.0: "^[a-zA-Z]([a-zA-Z0-9_-]*)$" -consistent!(libimagentrytag_0, "^[a-zA-Z]([a-zA-Z0-9_-]*)$"); - -// libimaginteraction-0.8.0: r"^[Yy](\n?)$" -consistent!(libimaginteraction_0, r"^[Yy](\n?)$"); - -// libimaginteraction-0.8.0: r"^[Nn](\n?)$" -consistent!(libimaginteraction_1, r"^[Nn](\n?)$"); - -// libimagutil-0.8.0: "^(?P<KEY>([^=]*))=(.*)$" -consistent!(libimagutil_0, "^(?P<KEY>([^=]*))=(.*)$"); - -// libimagutil-0.8.0: "(.*)=(\"(?P<QVALUE>([^\"]*))\"|(?P<VALUE>(.*)))$" -consistent!(libimagutil_1, "(.*)=(\"(?P<QVALUE>([^\"]*))\"|(?P<VALUE>(.*)))$"); - -// linux_ip-0.1.0: r"\s+" -consistent!(linux_ip_0, r"\s+"); - -// linux_ip-0.1.0: r"\s*[\n\r]+\s*" -consistent!(linux_ip_1, r"\s*[\n\r]+\s*"); - -// linux_ip-0.1.0: r"^([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$" -consistent!(linux_ip_2, r"^([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$"); - -// linux_ip-0.1.0: r"^([0-9a-fA-F\.:/]+|default)\s+via\s+([a-z0-9\.:]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$" -consistent!(linux_ip_3, r"^([0-9a-fA-F\.:/]+|default)\s+via\s+([a-z0-9\.:]+)\s+dev\s+([a-z0-9\.]+)\s*(.*)$"); - -// linux_ip-0.1.0: r"^(blackhole)\s+([0-9a-fA-F\.:/]+)$" -consistent!(linux_ip_4, r"^(blackhole)\s+([0-9a-fA-F\.:/]+)$"); - -// linux_ip-0.1.0: r"^(unreachable)\s+([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s+(.*)$" -consistent!( - linux_ip_5, - r"^(unreachable)\s+([0-9a-fA-F\.:/]+)\s+dev\s+([a-z0-9\.]+)\s+(.*)$" -); - -// linux_ip-0.1.0: r"\s*[\n\r]+\s*" -consistent!(linux_ip_6, r"\s*[\n\r]+\s*"); - -// linux_ip-0.1.0: r"^\d+:\s+([a-zA-Z0-9\.-]+)(@\S+)*:\s+(.*)$" -consistent!(linux_ip_7, r"^\d+:\s+([a-zA-Z0-9\.-]+)(@\S+)*:\s+(.*)$"); - -// linux_ip-0.1.0: r"\s*link/ether\s+([a-f0-9:]+)\s+.*" -consistent!(linux_ip_8, r"\s*link/ether\s+([a-f0-9:]+)\s+.*"); - -// linux_ip-0.1.0: r"\s*inet[6]*\s+([0-9a-f:\./]+)\s+.*" -consistent!(linux_ip_9, r"\s*inet[6]*\s+([0-9a-f:\./]+)\s+.*"); - -// linky-0.1.4: r"[^\w -]" -consistent!(linky_0, r"[^\w -]"); - -// linky-0.1.4: r"^(.*):(\d+): [^ ]* ([^ ]*)$" -consistent!(linky_1, r"^(.*):(\d+): [^ ]* ([^ ]*)$"); - -// limonite-0.2.1: r"^(\d{4}-\d{2}-\d{2})-(\d{3})-(.+)$" -consistent!(limonite_0, r"^(\d{4}-\d{2}-\d{2})-(\d{3})-(.+)$"); - -// process-queue-0.1.1: r"^[a-zA-Z]+$" -consistent!(process_queue_0, r"^[a-zA-Z]+$"); - -// pronghorn-0.1.2: r"^\{([a-zA-Z_]+)\}$" -consistent!(pronghorn_0, r"^\{([a-zA-Z_]+)\}$"); - -// protocol-ftp-client-0.1.1: "(?m:^(\\d{3}) (.+)\r$)" -consistent!(protocol_ftp_client_0, "(?m:^(\\d{3}) (.+)\r$)"); - -// protocol-ftp-client-0.1.1: "\"(.+)\"" -consistent!(protocol_ftp_client_1, "\"(.+)\""); - -// protocol-ftp-client-0.1.1: "(\\w+) [Tt]ype: (\\w+)" -consistent!(protocol_ftp_client_2, "(\\w+) [Tt]ype: (\\w+)"); - -// protocol-ftp-client-0.1.1: "(?m:^(\\d{3})-.+\r$)" -consistent!(protocol_ftp_client_3, "(?m:^(\\d{3})-.+\r$)"); - -// protocol-ftp-client-0.1.1: "Entering Passive Mode \\((\\d+),(\\d+),(\\d+),(\\d+),(\\d+),(\\d+)\\)" -consistent!( - protocol_ftp_client_4, - "Entering Passive Mode \\((\\d+),(\\d+),(\\d+),(\\d+),(\\d+),(\\d+)\\)" -); - -// protocol-ftp-client-0.1.1: "(?m:^(.+)\r$)" -consistent!(protocol_ftp_client_5, "(?m:^(.+)\r$)"); - -// protocol-ftp-client-0.1.1: "^([d-])(?:[rwx-]{3}){3} +\\d+ +\\w+ +\\w+ +(\\d+) +(.+) +(.+)$" -consistent!( - protocol_ftp_client_6, - "^([d-])(?:[rwx-]{3}){3} +\\d+ +\\w+ +\\w+ +(\\d+) +(.+) +(.+)$" -); - -// article-date-extractor-0.1.1: r"([\./\-_]{0,1}(19|20)\d{2})[\./\-_]{0,1}(([0-3]{0,1}[0-9][\./\-_])|(\w{3,5}[\./\-_]))([0-3]{0,1}[0-9][\./\-]{0,1})" -consistent!(article_date_extractor_0, r"([\./\-_]{0,1}(19|20)\d{2})[\./\-_]{0,1}(([0-3]{0,1}[0-9][\./\-_])|(\w{3,5}[\./\-_]))([0-3]{0,1}[0-9][\./\-]{0,1})"); - -// article-date-extractor-0.1.1: r"(?i)publishdate|pubdate|timestamp|article_date|articledate|date" -consistent!( - article_date_extractor_1, - r"(?i)publishdate|pubdate|timestamp|article_date|articledate|date" -); - -// arthas_plugin-0.1.1: r"type\((.*)\)" -consistent!(arthas_plugin_0, r"type\((.*)\)"); - -// arthas_plugin-0.1.1: r"Vec<(.*)>" -consistent!(arthas_plugin_1, r"Vec<(.*)>"); - -// arthas_plugin-0.1.1: r"Option<(.*)>" -consistent!(arthas_plugin_2, r"Option<(.*)>"); - -// arthas_plugin-0.1.1: r"HashMap<[a-z0-9A-Z]+, *(.*)>" -consistent!(arthas_plugin_3, r"HashMap<[a-z0-9A-Z]+, *(.*)>"); - -// arthas_derive-0.1.0: "Vec *< *(.*) *>" -consistent!(arthas_derive_0, "Vec *< *(.*) *>"); - -// arthas_derive-0.1.0: r"Option *< *(.*) *>" -consistent!(arthas_derive_1, r"Option *< *(.*) *>"); - -// arthas_derive-0.1.0: r"HashMap *< *[a-z0-9A-Z]+ *, *(.*) *>" -consistent!(arthas_derive_2, r"HashMap *< *[a-z0-9A-Z]+ *, *(.*) *>"); - -// arpabet-0.2.0: r"^([\w\-\(\)\.']+)\s+([^\s].*)\s*$" -consistent!(arpabet_0, r"^([\w\-\(\)\.']+)\s+([^\s].*)\s*$"); - -// arpabet-0.2.0: r"^;;;\s+" -consistent!(arpabet_1, r"^;;;\s+"); - -// glossy_codegen-0.2.0: r"/\*.*?\*/|//.*" -consistent!(glossy_codegen_0, r"/\*.*?\*/|//.*"); - -// glossy_codegen-0.2.0: "^\\s*#\\s*include\\s+<([:print:]+)>\\s*$" -consistent!(glossy_codegen_1, "^\\s*#\\s*include\\s+<([:print:]+)>\\s*$"); - -// glossy_codegen-0.2.0: "^\\s*#\\s*include\\s+\"([:print:]+)\"\\s*$" -consistent!(glossy_codegen_2, "^\\s*#\\s*include\\s+\"([:print:]+)\"\\s*$"); - -// glossy_codegen-0.2.0: r"^\s*#\s*version\s+(\d+)" -consistent!(glossy_codegen_3, r"^\s*#\s*version\s+(\d+)"); - -// glossy_codegen-0.2.0: r"^\s*$" -consistent!(glossy_codegen_4, r"^\s*$"); - -// gluster-1.0.1: r"(?P<addr>via \S+)" -consistent!(gluster_0, r"(?P<addr>via \S+)"); - -// gluster-1.0.1: r"(?P<src>src \S+)" -consistent!(gluster_1, r"(?P<src>src \S+)"); - -// gl_helpers-0.1.7: r"(.*)\[\d+\]" -consistent!(gl_helpers_0, r"(.*)\[\d+\]"); - -// gl_helpers-0.1.7: r"(\d+).(\d+)" -consistent!(gl_helpers_1, r"(\d+).(\d+)"); - -// glr-parser-0.0.1: r"(?P<c>[\\\.\+\*\?\(\)\|\[\]\{\}\^\$])" -consistent!(glr_parser_0, r"(?P<c>[\\\.\+\*\?\(\)\|\[\]\{\}\^\$])"); - -// glr-parser-0.0.1: r"^\w+$" -consistent!(glr_parser_1, r"^\w+$"); - -// glr-parser-0.0.1: "'[^']+'" -consistent!(glr_parser_2, "'[^']+'"); - -// hoodlum-0.5.0: r"(?m)//.*" -consistent!(hoodlum_0, r"(?m)//.*"); - -// form-checker-0.2.2: r"^1\d{10}$" -consistent!(form_checker_0, r"^1\d{10}$"); - -// form-checker-0.2.2: r"(?i)^[\w.%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4}$" -consistent!(form_checker_1, r"(?i)^[\w.%+-]+@(?:[A-Z0-9-]+\.)+[A-Z]{2,4}$"); - -// wikibase-0.2.0: r"(?P<user_agent>[a-zA-Z0-9-_]+/[0-9\.]+)" -consistent!(wikibase_0, r"(?P<user_agent>[a-zA-Z0-9-_]+/[0-9\.]+)"); - -// wifiscanner-0.3.6: r"Cell [0-9]{2,} - Address:" -consistent!(wifiscanner_0, r"Cell [0-9]{2,} - Address:"); - -// wifiscanner-0.3.6: r"([0-9a-zA-Z]{1}[0-9a-zA-Z]{1}[:]{1}){5}[0-9a-zA-Z]{1}[0-9a-zA-Z]{1}" -consistent!( - wifiscanner_1, - r"([0-9a-zA-Z]{1}[0-9a-zA-Z]{1}[:]{1}){5}[0-9a-zA-Z]{1}[0-9a-zA-Z]{1}" -); - -// wifiscanner-0.3.6: r"Signal level=(\d+)/100" -consistent!(wifiscanner_2, r"Signal level=(\d+)/100"); - -// bbcode-1.0.2: r"(?s)\[b\](.*?)\[/b\]" -consistent!(bbcode_0, r"(?s)\[b\](.*?)\[/b\]"); - -// bbcode-1.0.2: r"(?s)\[i\](.*?)\[/i\]" -consistent!(bbcode_1, r"(?s)\[i\](.*?)\[/i\]"); - -// bbcode-1.0.2: r"(?s)\[u\](.*?)\[/u\]" -consistent!(bbcode_2, r"(?s)\[u\](.*?)\[/u\]"); - -// bbcode-1.0.2: r"(?s)\[s\](.*?)\[/s\]" -consistent!(bbcode_3, r"(?s)\[s\](.*?)\[/s\]"); - -// bbcode-1.0.2: r"(?s)\[size=(\d+)](.*?)\[/size\]" -consistent!(bbcode_4, r"(?s)\[size=(\d+)](.*?)\[/size\]"); - -// bbcode-1.0.2: r"(?s)\[color=(.+)](.*?)\[/color\]" -consistent!(bbcode_5, r"(?s)\[color=(.+)](.*?)\[/color\]"); - -// bbcode-1.0.2: r"(?s)\[center\](.*?)\[/center\]" -consistent!(bbcode_6, r"(?s)\[center\](.*?)\[/center\]"); - -// bbcode-1.0.2: r"(?s)\[left\](.*?)\[/left\]" -consistent!(bbcode_7, r"(?s)\[left\](.*?)\[/left\]"); - -// bbcode-1.0.2: r"(?s)\[right\](.*?)\[/right\]" -consistent!(bbcode_8, r"(?s)\[right\](.*?)\[/right\]"); - -// bbcode-1.0.2: r"(?s)\[table\](.*?)\[/table\]" -consistent!(bbcode_9, r"(?s)\[table\](.*?)\[/table\]"); - -// bbcode-1.0.2: r"(?s)\[td\](.*?)\[/td\]" -consistent!(bbcode_10, r"(?s)\[td\](.*?)\[/td\]"); - -// bbcode-1.0.2: r"(?s)\[tr\](.*?)\[/tr\]" -consistent!(bbcode_11, r"(?s)\[tr\](.*?)\[/tr\]"); - -// bbcode-1.0.2: r"(?s)\[th\](.*?)\[/th\]" -consistent!(bbcode_12, r"(?s)\[th\](.*?)\[/th\]"); - -// bbcode-1.0.2: r"(?s)\[url\](.*?)\[/url\]" -consistent!(bbcode_13, r"(?s)\[url\](.*?)\[/url\]"); - -// bbcode-1.0.2: r"(?s)\[url=(.+)\](.*?)\[/url\]" -consistent!(bbcode_14, r"(?s)\[url=(.+)\](.*?)\[/url\]"); - -// bbcode-1.0.2: r"(?s)\[quote\](.*?)\[/quote\]" -consistent!(bbcode_15, r"(?s)\[quote\](.*?)\[/quote\]"); - -// bbcode-1.0.2: r"(?s)\[quote=(.+)\](.*?)\[/quote\]" -consistent!(bbcode_16, r"(?s)\[quote=(.+)\](.*?)\[/quote\]"); - -// bbcode-1.0.2: r"(?s)\[img=(\d+)x(\d+)(\b.*)?\](.*?)\[/img\]" -consistent!(bbcode_17, r"(?s)\[img=(\d+)x(\d+)(\b.*)?\](.*?)\[/img\]"); - -// bbcode-1.0.2: r"(?s)\[img=(.+)(\b.*)?\](.*?)\[/img\]" -consistent!(bbcode_18, r"(?s)\[img=(.+)(\b.*)?\](.*?)\[/img\]"); - -// bbcode-1.0.2: r"(?s)\[img(\b.*)?\](.*?)\[/img\]" -consistent!(bbcode_19, r"(?s)\[img(\b.*)?\](.*?)\[/img\]"); - -// bbcode-1.0.2: r"(?s)\[ol\](.*?)\[/ol\]" -consistent!(bbcode_20, r"(?s)\[ol\](.*?)\[/ol\]"); - -// bbcode-1.0.2: r"(?s)\[ul\](.*?)\[/ul\]" -consistent!(bbcode_21, r"(?s)\[ul\](.*?)\[/ul\]"); - -// bbcode-1.0.2: r"(?s)\[list\](.*?)\[/list\]" -consistent!(bbcode_22, r"(?s)\[list\](.*?)\[/list\]"); - -// bbcode-1.0.2: r"(?s)\[youtube\](.*?)\[/youtube\]" -consistent!(bbcode_23, r"(?s)\[youtube\](.*?)\[/youtube\]"); - -// bbcode-1.0.2: r"(?s)\[youtube=(\d+)x(\d+)\](.*?)\[/youtube\]" -consistent!(bbcode_24, r"(?s)\[youtube=(\d+)x(\d+)\](.*?)\[/youtube\]"); - -// bbcode-1.0.2: r"(?s)\[li\](.*?)\[/li\]" -consistent!(bbcode_25, r"(?s)\[li\](.*?)\[/li\]"); - -// block-utils-0.5.0: r"loop\d+" -consistent!(block_utils_0, r"loop\d+"); - -// block-utils-0.5.0: r"ram\d+" -consistent!(block_utils_1, r"ram\d+"); - -// block-utils-0.5.0: r"md\d+" -consistent!(block_utils_2, r"md\d+"); - -// kvvliveapi-0.1.0: r"^([1-9]) min$" -consistent!(kvvliveapi_0, r"^([1-9]) min$"); - -// rfc822_sanitizer-0.3.3: r"(\d{2}):(\d{2}):(\d{2})" -consistent!(rfc822_sanitizer_0, r"(\d{2}):(\d{2}):(\d{2})"); - -// rfc822_sanitizer-0.3.3: r"(\d{1,2}):(\d{1,2}):(\d{1,2})" -consistent!(rfc822_sanitizer_1, r"(\d{1,2}):(\d{1,2}):(\d{1,2})"); - -// faker-0.0.4: r"[2-9]" -consistent!(faker_0, r"[2-9]"); - -// faker-0.0.4: r"[1-9]" -consistent!(faker_1, r"[1-9]"); - -// faker-0.0.4: r"[0-9]" -consistent!(faker_2, r"[0-9]"); - -// faker-0.0.4: r"\d{10}" -consistent!(faker_3, r"\d{10}"); - -// faker-0.0.4: r"\d{1}" -consistent!(faker_4, r"\d{1}"); - -// faker-0.0.4: r"^\w+" -consistent!(faker_5, r"^\w+"); - -// faker-0.0.4: r"^\w+" -consistent!(faker_6, r"^\w+"); - -// faker-0.0.4: r"^(\w+\.? ?){2,3}$" -consistent!(faker_7, r"^(\w+\.? ?){2,3}$"); - -// faker-0.0.4: r"^[A-Z][a-z]+\.?$" -consistent!(faker_8, r"^[A-Z][a-z]+\.?$"); - -// faker-0.0.4: r"^[A-Z][A-Za-z]*\.?$" -consistent!(faker_9, r"^[A-Z][A-Za-z]*\.?$"); - -// faker-0.0.4: r"http://lorempixel.com/100/100/\w+" -consistent!(faker_10, r"http://lorempixel.com/100/100/\w+"); - -// faker-0.0.4: r"http://lorempixel.com/100/100/cats" -consistent!(faker_11, r"http://lorempixel.com/100/100/cats"); - -// fancy-regex-0.1.0: "(?i:ß)" -consistent!(fancy_regex_0, "(?i:ß)"); - -// fancy-regex-0.1.0: "(?i:\\x{0587})" -consistent!(fancy_regex_1, "(?i:\\x{0587})"); - -// fancy-regex-0.1.0: "^\\\\([!-/:-@\\[-`\\{-~aftnrv]|[0-7]{1,3}|x[0-9a-fA-F]{2}|x\\{[0-9a-fA-F]{1,6}\\})" -consistent!(fancy_regex_2, "^\\\\([!-/:-@\\[-`\\{-~aftnrv]|[0-7]{1,3}|x[0-9a-fA-F]{2}|x\\{[0-9a-fA-F]{1,6}\\})"); - -// fancy-prompt-0.1.5: r"/([^/])[^/]+/" -consistent!(fancy_prompt_0, r"/([^/])[^/]+/"); - -// fancy-prompt-0.1.5: r"^([^:]+):.*?(?::([^:]+))?$" -consistent!(fancy_prompt_1, r"^([^:]+):.*?(?::([^:]+))?$"); - -// fanta-0.2.0: r"^(/?__\w+__)/(.*)" -consistent!(fanta_0, r"^(/?__\w+__)/(.*)"); - -// fanta-cli-0.1.1: r"(.)([A-Z])" -consistent!(fanta_cli_0, r"(.)([A-Z])"); - -// fanta-cli-0.1.1: "\\{:[^\\s]+\\}" -consistent!(fanta_cli_1, "\\{:[^\\s]+\\}"); - -// amethyst_tools-0.7.1: "(?P<last>[^\r])\n" -consistent!(amethyst_tools_0, "(?P<last>[^\r])\n"); - -// amigo-0.3.1: r"^-?\d+(\.\d)?" -consistent!(amigo_0, r"^-?\d+(\.\d)?"); - -// amigo-0.3.1: r"^[a-zA-Z_]+[\w-]*[!?_]?" -consistent!(amigo_1, r"^[a-zA-Z_]+[\w-]*[!?_]?"); - -// amigo-0.3.1: r"^\(" -consistent!(amigo_2, r"^\("); - -// amigo-0.3.1: r"^\)" -consistent!(amigo_3, r"^\)"); - -// amigo-0.3.1: r"^\s+" -consistent!(amigo_4, r"^\s+"); - -// ethcore-logger-1.12.0: "\x1b\\[[^m]+m" -consistent!(ethcore_logger_0, "\x1b\\[[^m]+m"); - -// dash2html-1.0.1: r"__.*?__" -consistent!(dash2html_0, r"__.*?__"); - -// dash2html-1.0.1: r"(?i)@(?:time|clipboard|cursor|date)" -consistent!(dash2html_1, r"(?i)@(?:time|clipboard|cursor|date)"); - -// os_type-2.0.0: r"^Microsoft Windows \[Version\s(\d+\.\d+\.\d+)\]$" -consistent!(os_type_0, r"^Microsoft Windows \[Version\s(\d+\.\d+\.\d+)\]$"); - -// os_type-2.0.0: r"ProductName:\s([\w\s]+)\n" -consistent!(os_type_1, r"ProductName:\s([\w\s]+)\n"); - -// os_type-2.0.0: r"ProductVersion:\s(\w+\.\w+\.\w+)" -consistent!(os_type_2, r"ProductVersion:\s(\w+\.\w+\.\w+)"); - -// os_type-2.0.0: r"BuildVersion:\s(\w+)" -consistent!(os_type_3, r"BuildVersion:\s(\w+)"); - -// os_type-2.0.0: r"(\w+) Linux release" -consistent!(os_type_4, r"(\w+) Linux release"); - -// os_type-2.0.0: r"release\s([\w\.]+)" -consistent!(os_type_5, r"release\s([\w\.]+)"); - -// os_type-2.0.0: r"Distributor ID:\s(\w+)" -consistent!(os_type_6, r"Distributor ID:\s(\w+)"); - -// os_type-2.0.0: r"Release:\s([\w\.]+)" -consistent!(os_type_7, r"Release:\s([\w\.]+)"); - -// bindgen-0.37.0: r"typename type\-parameter\-\d+\-\d+::.+" -consistent!(bindgen_0, r"typename type\-parameter\-\d+\-\d+::.+"); - -// imap-0.8.1: "^+(.*)\r\n" -consistent!(imap_0, "^+(.*)\r\n"); - -// image-base64-0.1.0: r"^ffd8ffe0" -consistent!(image_base64_0, r"^ffd8ffe0"); - -// image-base64-0.1.0: r"^89504e47" -consistent!(image_base64_1, r"^89504e47"); - -// image-base64-0.1.0: r"^47494638" -consistent!(image_base64_2, r"^47494638"); - -// json-pointer-0.3.2: "^(/([^/~]|~[01])*)*$" -consistent!(json_pointer_0, "^(/([^/~]|~[01])*)*$"); - -// json-pointer-0.3.2: "^#(/([^/~%]|~[01]|%[0-9a-fA-F]{2})*)*$" -consistent!(json_pointer_1, "^#(/([^/~%]|~[01]|%[0-9a-fA-F]{2})*)*$"); - -// mysql_common-0.7.0: r"^5.5.5-(\d{1,2})\.(\d{1,2})\.(\d{1,3})-MariaDB" -consistent!(mysql_common_0, r"^5.5.5-(\d{1,2})\.(\d{1,2})\.(\d{1,3})-MariaDB"); - -// mysql_common-0.7.0: r"^(\d{1,2})\.(\d{1,2})\.(\d{1,3})(.*)" -consistent!(mysql_common_1, r"^(\d{1,2})\.(\d{1,2})\.(\d{1,3})(.*)"); - -// government_id-0.1.0: r"^[0-9]{4}[0-9A-Z]{2}[0-9]{3}$" -consistent!(government_id_0, r"^[0-9]{4}[0-9A-Z]{2}[0-9]{3}$"); - -// ohmers-0.1.1: r"UniqueIndexViolation: (\w+)" -consistent!(ohmers_0, r"UniqueIndexViolation: (\w+)"); - -// eliza-1.0.0: r"(.*) you are (.*)" -consistent!(eliza_0, r"(.*) you are (.*)"); - -// eliza-1.0.0: r"(.*) you are (.*)" -consistent!(eliza_1, r"(.*) you are (.*)"); - -// eliza-1.0.0: r"(.*) you are (.*)" -consistent!(eliza_2, r"(.*) you are (.*)"); - -// chema-0.0.5: "^\\s*\\*" -consistent!(chema_0, "^\\s*\\*"); - -// chema-0.0.5: "^\\s*@(\\w+)\\s+(.*)" -consistent!(chema_1, "^\\s*@(\\w+)\\s+(.*)"); - -// chord3-0.3.0: r"^\s*#" -consistent!(chord3_0, r"^\s*#"); - -// chord3-0.3.0: r"\{(?P<cmd>\w+)(?::?\s*(?P<arg>.*))?\}" -consistent!(chord3_1, r"\{(?P<cmd>\w+)(?::?\s*(?P<arg>.*))?\}"); - -// chord3-0.3.0: r"\{(eot|end_of_tab):?\s*" -consistent!(chord3_2, r"\{(eot|end_of_tab):?\s*"); - -// chord3-0.3.0: r"([^\[]*)(?:\[([^\]]*)\])?" -consistent!(chord3_3, r"([^\[]*)(?:\[([^\]]*)\])?"); - -// checkmail-0.1.1: "^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$" -consistent!(checkmail_0, "^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"); - -// cntk-0.2.1: r"\b\w\w+\b" -consistent!(cntk_0, r"\b\w\w+\b"); - -// cntk-0.2.1: r"\b\w\w+\b" -consistent!(cntk_1, r"\b\w\w+\b"); - -// cniguru-0.1.0: r"\(id: (\d+)\)" -consistent!(cniguru_0, r"\(id: (\d+)\)"); - -// upm_lib-0.3.0: r"^(\d+)\.(\d+)\.(\d+)(?:-([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?(?:\+([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?$" -consistent!(upm_lib_0, r"^(\d+)\.(\d+)\.(\d+)(?:-([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?(?:\+([\dA-Za-z-]+(?:\.[\dA-Za-z-]+)*))?$"); - -// avro-0.2.1: r"^\s*(\*+(\s+))?" -consistent!(avro_0, r"^\s*(\*+(\s+))?"); - -// avro-0.2.1: r"^\s*(\*+)?" -consistent!(avro_1, r"^\s*(\*+)?"); - -// nomi-0.0.2: "[0-9]+" -consistent!(nomi_0, "[0-9]+"); - -// nodes-0.1.0: "([0-9]+)@(?:nodes|n)?:([^@]+)?" -consistent!(nodes_0, "([0-9]+)@(?:nodes|n)?:([^@]+)?"); - -// not-stakkr-1.0.0: r"(?i)in (\d+) (second|minute|hour|day|week)s?" -consistent!(not_stakkr_0, r"(?i)in (\d+) (second|minute|hour|day|week)s?"); - -// notetxt-0.0.1: "^([A-Za-z0-9 -_:]+)\n-+\n" -consistent!(notetxt_0, "^([A-Za-z0-9 -_:]+)\n-+\n"); - -// nail-0.1.0-pre.0: r"^-?[0-9]+(\.[0-9]+)?([eE]-?[0-9]+)?$" -consistent!(nail_0, r"^-?[0-9]+(\.[0-9]+)?([eE]-?[0-9]+)?$"); - -// nail-0.1.0-pre.0: r"^-?[0-9]+$" -consistent!(nail_1, r"^-?[0-9]+$"); - -// askalono-0.2.0: r"[^\w\s\pP]+" -consistent!(askalono_0, r"[^\w\s\pP]+"); - -// askalono-0.2.0: r"(?x)[ \t\p{Zs} \\ / \| \x2044 ]+" -consistent!(askalono_1, r"(?x)[ \t\p{Zs} \\ / \| \x2044 ]+"); - -// askalono-0.2.0: r"\p{Pd}+" -consistent!(askalono_2, r"\p{Pd}+"); - -// askalono-0.2.0: r"\p{Ps}+" -consistent!(askalono_3, r"\p{Ps}+"); - -// askalono-0.2.0: r"\p{Pe}+" -consistent!(askalono_4, r"\p{Pe}+"); - -// askalono-0.2.0: r"\p{Pc}+" -consistent!(askalono_5, r"\p{Pc}+"); - -// askalono-0.2.0: r"[©Ⓒⓒ]" -consistent!(askalono_6, r"[©Ⓒⓒ]"); - -// askalono-0.2.0: r"[\r\n\v\f]" -consistent!(askalono_7, r"[\r\n\v\f]"); - -// askalono-0.2.0: r"\n{3,}" -consistent!(askalono_8, r"\n{3,}"); - -// askalono-0.2.0: r"[^\w\s]+" -consistent!(askalono_9, r"[^\w\s]+"); - -// askalono-0.2.0: r"\s+" -consistent!(askalono_10, r"\s+"); - -// assembunny_plus-0.0.3: r"[^0-9a-zA-Z_]" -consistent!(assembunny_plus_0, r"[^0-9a-zA-Z_]"); - -// assembunny_plus-0.0.3: r"[0-9]" -consistent!(assembunny_plus_1, r"[0-9]"); - -// salt-compressor-0.4.0: r"(?m)^Minion (\S*) did not respond\. No job will be sent\.$" -consistent!( - salt_compressor_0, - r"(?m)^Minion (\S*) did not respond\. No job will be sent\.$" -); - -// sabisabi-0.4.1: r"</?[^>]+?>" -consistent!(sabisabi_0, r"</?[^>]+?>"); - -// sabisabi-0.4.1: r"\([^)]*\)" -consistent!(sabisabi_1, r"\([^)]*\)"); - -// sassers-0.13.5-h28: "@import \"([^\"]*)\";" -consistent!(sassers_0, "@import \"([^\"]*)\";"); - -// shadowsocks-0.6.2: r"[A-Za-z\d-]{1,63}$" -consistent!(shadowsocks_0, r"[A-Za-z\d-]{1,63}$"); - -// shkeleton-0.1.5: "[abc]+" -consistent!(shkeleton_0, "[abc]+"); - -// shellwords-0.1.0: r"([^A-Za-z0-9_\-.,:/@\n])" -consistent!(shellwords_0, r"([^A-Za-z0-9_\-.,:/@\n])"); - -// shellwords-0.1.0: r"\n" -consistent!(shellwords_1, r"\n"); - -// shush-0.1.5: "(?P<num>[0-9]+)(?P<units>[dhms])" -consistent!(shush_0, "(?P<num>[0-9]+)(?P<units>[dhms])"); - -// woothee-0.8.0: r"(?:Chrome|CrMo|CriOS)/([.0-9]+)" -consistent!(woothee_0, r"(?:Chrome|CrMo|CriOS)/([.0-9]+)"); - -// woothee-0.8.0: r"Vivaldi/([.0-9]+)" -consistent!(woothee_1, r"Vivaldi/([.0-9]+)"); - -// woothee-0.8.0: r"Firefox/([.0-9]+)" -consistent!(woothee_2, r"Firefox/([.0-9]+)"); - -// woothee-0.8.0: r"^Mozilla/[.0-9]+ \((?:Mobile|Tablet);(?:.*;)? rv:([.0-9]+)\) Gecko/[.0-9]+ Firefox/[.0-9]+$" -consistent!(woothee_3, r"^Mozilla/[.0-9]+ \((?:Mobile|Tablet);(?:.*;)? rv:([.0-9]+)\) Gecko/[.0-9]+ Firefox/[.0-9]+$"); - -// woothee-0.8.0: r"FxiOS/([.0-9]+)" -consistent!(woothee_4, r"FxiOS/([.0-9]+)"); - -// woothee-0.8.0: r"\(([^;)]+);FOMA;" -consistent!(woothee_5, r"\(([^;)]+);FOMA;"); - -// woothee-0.8.0: r"jig browser[^;]+; ([^);]+)" -consistent!(woothee_6, r"jig browser[^;]+; ([^);]+)"); - -// woothee-0.8.0: r"(?i)rss(?:reader|bar|[-_ /;()]|[ +]*/)" -consistent!(woothee_7, r"(?i)rss(?:reader|bar|[-_ /;()]|[ +]*/)"); - -// woothee-0.8.0: r"(?i)(?:bot|crawler|spider)(?:[-_ ./;@()]|$)" -consistent!(woothee_8, r"(?i)(?:bot|crawler|spider)(?:[-_ ./;@()]|$)"); - -// woothee-0.8.0: r"(?i)(?:feed|web) ?parser" -consistent!(woothee_9, r"(?i)(?:feed|web) ?parser"); - -// woothee-0.8.0: r"(?i)watch ?dog" -consistent!(woothee_10, r"(?i)watch ?dog"); - -// woothee-0.8.0: r"Edge/([.0-9]+)" -consistent!(woothee_11, r"Edge/([.0-9]+)"); - -// woothee-0.8.0: r"MSIE ([.0-9]+);" -consistent!(woothee_12, r"MSIE ([.0-9]+);"); - -// woothee-0.8.0: r"Version/([.0-9]+)" -consistent!(woothee_13, r"Version/([.0-9]+)"); - -// woothee-0.8.0: r"Opera[/ ]([.0-9]+)" -consistent!(woothee_14, r"Opera[/ ]([.0-9]+)"); - -// woothee-0.8.0: r"OPR/([.0-9]+)" -consistent!(woothee_15, r"OPR/([.0-9]+)"); - -// woothee-0.8.0: r"Version/([.0-9]+)" -consistent!(woothee_16, r"Version/([.0-9]+)"); - -// woothee-0.8.0: r"(?:SoftBank|Vodafone|J-PHONE)/[.0-9]+/([^ /;()]+)" -consistent!(woothee_17, r"(?:SoftBank|Vodafone|J-PHONE)/[.0-9]+/([^ /;()]+)"); - -// woothee-0.8.0: r"Trident/([.0-9]+);" -consistent!(woothee_18, r"Trident/([.0-9]+);"); - -// woothee-0.8.0: r" rv:([.0-9]+)" -consistent!(woothee_19, r" rv:([.0-9]+)"); - -// woothee-0.8.0: r"IEMobile/([.0-9]+);" -consistent!(woothee_20, r"IEMobile/([.0-9]+);"); - -// woothee-0.8.0: r"(?:WILLCOM|DDIPOCKET);[^/]+/([^ /;()]+)" -consistent!(woothee_21, r"(?:WILLCOM|DDIPOCKET);[^/]+/([^ /;()]+)"); - -// woothee-0.8.0: r"Windows ([ .a-zA-Z0-9]+)[;\\)]" -consistent!(woothee_22, r"Windows ([ .a-zA-Z0-9]+)[;\\)]"); - -// woothee-0.8.0: r"^Phone(?: OS)? ([.0-9]+)" -consistent!(woothee_23, r"^Phone(?: OS)? ([.0-9]+)"); - -// woothee-0.8.0: r"iP(hone;|ad;|od) .*like Mac OS X" -consistent!(woothee_24, r"iP(hone;|ad;|od) .*like Mac OS X"); - -// woothee-0.8.0: r"Version/([.0-9]+)" -consistent!(woothee_25, r"Version/([.0-9]+)"); - -// woothee-0.8.0: r"rv:(\d+\.\d+\.\d+)" -consistent!(woothee_26, r"rv:(\d+\.\d+\.\d+)"); - -// woothee-0.8.0: r"FreeBSD ([^;\)]+);" -consistent!(woothee_27, r"FreeBSD ([^;\)]+);"); - -// woothee-0.8.0: r"CrOS ([^\)]+)\)" -consistent!(woothee_28, r"CrOS ([^\)]+)\)"); - -// woothee-0.8.0: r"Android[- ](\d+\.\d+(?:\.\d+)?)" -consistent!(woothee_29, r"Android[- ](\d+\.\d+(?:\.\d+)?)"); - -// woothee-0.8.0: r"PSP \(PlayStation Portable\); ([.0-9]+)\)" -consistent!(woothee_30, r"PSP \(PlayStation Portable\); ([.0-9]+)\)"); - -// woothee-0.8.0: r"PLAYSTATION 3;? ([.0-9]+)\)" -consistent!(woothee_31, r"PLAYSTATION 3;? ([.0-9]+)\)"); - -// woothee-0.8.0: r"PlayStation Vita ([.0-9]+)\)" -consistent!(woothee_32, r"PlayStation Vita ([.0-9]+)\)"); - -// woothee-0.8.0: r"PlayStation 4 ([.0-9]+)\)" -consistent!(woothee_33, r"PlayStation 4 ([.0-9]+)\)"); - -// woothee-0.8.0: r"BB10(?:.+)Version/([.0-9]+) " -consistent!(woothee_34, r"BB10(?:.+)Version/([.0-9]+) "); - -// woothee-0.8.0: r"BlackBerry(?:\d+)/([.0-9]+) " -consistent!(woothee_35, r"BlackBerry(?:\d+)/([.0-9]+) "); - -// woothee-0.8.0: r"; CPU(?: iPhone)? OS (\d+_\d+(?:_\d+)?) like Mac OS X" -consistent!( - woothee_36, - r"; CPU(?: iPhone)? OS (\d+_\d+(?:_\d+)?) like Mac OS X" -); - -// woothee-0.8.0: r"Mac OS X (10[._]\d+(?:[._]\d+)?)(?:\)|;)" -consistent!(woothee_37, r"Mac OS X (10[._]\d+(?:[._]\d+)?)(?:\)|;)"); - -// woothee-0.8.0: r"^(?:Apache-HttpClient/|Jakarta Commons-HttpClient/|Java/)" -consistent!( - woothee_38, - r"^(?:Apache-HttpClient/|Jakarta Commons-HttpClient/|Java/)" -); - -// woothee-0.8.0: r"[- ]HttpClient(/|$)" -consistent!(woothee_39, r"[- ]HttpClient(/|$)"); - -// woothee-0.8.0: r"^(?:PHP|WordPress|CakePHP|PukiWiki|PECL::HTTP)(?:/| |$)" -consistent!( - woothee_40, - r"^(?:PHP|WordPress|CakePHP|PukiWiki|PECL::HTTP)(?:/| |$)" -); - -// woothee-0.8.0: r"(?:PEAR HTTP_Request|HTTP_Request)(?: class|2)" -consistent!(woothee_41, r"(?:PEAR HTTP_Request|HTTP_Request)(?: class|2)"); - -// woothee-0.8.0: r"(?:Rome Client |UnwindFetchor/|ia_archiver |Summify |PostRank/)" -consistent!( - woothee_42, - r"(?:Rome Client |UnwindFetchor/|ia_archiver |Summify |PostRank/)" -); - -// woothee-0.8.0: r"Sleipnir/([.0-9]+)" -consistent!(woothee_43, r"Sleipnir/([.0-9]+)"); - -// word_replace-0.0.3: r"@@[a-z|A-Z|\d]+@@" -consistent!(word_replace_0, r"@@[a-z|A-Z|\d]+@@"); - -// wordcount-0.1.0: r"\w+" -consistent!(wordcount_0, r"\w+"); - -// just-0.3.12: "^([^=]+)=(.*)$" -consistent!(just_0, "^([^=]+)=(.*)$"); - -// emote-0.1.0: r":[a-zA-Z_]+?:" -consistent!(emote_0, r":[a-zA-Z_]+?:"); - -// emojicons-1.0.1: r":([a-zA-Z0-9_+-]+):" -consistent!(emojicons_0, r":([a-zA-Z0-9_+-]+):"); - -// git2_codecommit-0.1.2: r"git-codecommit\.([a-z0-9-]+)\.amazonaws\.com" -consistent!( - git2_codecommit_0, - r"git-codecommit\.([a-z0-9-]+)\.amazonaws\.com" -); - -// git-workarea-3.1.2: r"^submodule\.(?P<name>.*)\.(?P<key>[^=]*)=(?P<value>.*)$" -consistent!( - git_workarea_0, - r"^submodule\.(?P<name>.*)\.(?P<key>[^=]*)=(?P<value>.*)$" -); - -// git-shell-enforce-directory-1.0.0: r"^(?P<command>git-(?:receive|upload)-pack) '(?P<path>.+)'$" -consistent!( - git_shell_enforce_directory_0, - r"^(?P<command>git-(?:receive|upload)-pack) '(?P<path>.+)'$" -); - -// git-journal-1.6.3: r"[ \n]:(.*?):" -consistent!(git_journal_0, r"[ \n]:(.*?):"); - -// git-find-0.3.2: r"^git@(?P<host>[[:alnum:]\._-]+):(?P<path>[[:alnum:]\._\-/]+).git$" -consistent!( - git_find_0, - r"^git@(?P<host>[[:alnum:]\._-]+):(?P<path>[[:alnum:]\._\-/]+).git$" -); - -// gitlab-api-0.6.0: r"private_token=\w{20}" -consistent!(gitlab_api_0, r"private_token=\w{20}"); - -// td-client-0.7.0: "^(http://|https://)" -consistent!(td_client_0, "^(http://|https://)"); - -// karaconv-0.3.0: r"--(?P<type>[a-zA-Z]+)-- (?P<contents>.*)" -consistent!(karaconv_0, r"--(?P<type>[a-zA-Z]+)-- (?P<contents>.*)"); - -// katana-1.0.2: r"(?P<comp>et al\.)(?:\.)" -consistent!(katana_0, r"(?P<comp>et al\.)(?:\.)"); - -// katana-1.0.2: r"\.{3}" -consistent!(katana_1, r"\.{3}"); - -// katana-1.0.2: r"(?P<number>[0-9]+)\.(?P<decimal>[0-9]+)" -consistent!(katana_2, r"(?P<number>[0-9]+)\.(?P<decimal>[0-9]+)"); - -// katana-1.0.2: r"\s\.(?P<nums>[0-9]+)" -consistent!(katana_3, r"\s\.(?P<nums>[0-9]+)"); - -// katana-1.0.2: r"(?:[A-Za-z]\.){2,}" -consistent!(katana_4, r"(?:[A-Za-z]\.){2,}"); - -// katana-1.0.2: r"(?P<init>[A-Z])(?P<point>\.)" -consistent!(katana_5, r"(?P<init>[A-Z])(?P<point>\.)"); - -// katana-1.0.2: r"(?P<title>[A-Z][a-z]{1,3})(\.)" -consistent!(katana_6, r"(?P<title>[A-Z][a-z]{1,3})(\.)"); - -// katana-1.0.2: r"&==&(?P<p>[.!?])" -consistent!(katana_7, r"&==&(?P<p>[.!?])"); - -// katana-1.0.2: r"&\^&(?P<p>[.!?])" -consistent!(katana_8, r"&\^&(?P<p>[.!?])"); - -// katana-1.0.2: r"&\*\*&(?P<p>[.!?])" -consistent!(katana_9, r"&\*\*&(?P<p>[.!?])"); - -// katana-1.0.2: r"&=&(?P<p>[.!?])" -consistent!(katana_10, r"&=&(?P<p>[.!?])"); - -// katana-1.0.2: r"&##&(?P<p>[.!?])" -consistent!(katana_11, r"&##&(?P<p>[.!?])"); - -// katana-1.0.2: r"&\$&(?P<p>[.!?])" -consistent!(katana_12, r"&\$&(?P<p>[.!?])"); - -// kailua_syntax-1.1.0: r"@(?:_|\d+(?:/\d+(?:-\d+)?)?)" -consistent!(kailua_syntax_0, r"@(?:_|\d+(?:/\d+(?:-\d+)?)?)"); - -// kailua_syntax-1.1.0: r"<(\d+)>" -consistent!(kailua_syntax_1, r"<(\d+)>"); - -// ftp-3.0.1: r"\((\d+),(\d+),(\d+),(\d+),(\d+),(\d+)\)" -consistent!(ftp_0, r"\((\d+),(\d+),(\d+),(\d+),(\d+),(\d+)\)"); - -// ftp-3.0.1: r"\b(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\b" -consistent!(ftp_1, r"\b(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\b"); - -// ftp-3.0.1: r"\s+(\d+)\s*$" -consistent!(ftp_2, r"\s+(\d+)\s*$"); - -// vat-0.1.0: r"<countryCode>(.*?)</countryCode>" -consistent!(vat_0, r"<countryCode>(.*?)</countryCode>"); - -// vat-0.1.0: r"<vatNumber>(.*?)</vatNumber>" -consistent!(vat_1, r"<vatNumber>(.*?)</vatNumber>"); - -// vat-0.1.0: r"<name>(.*?)</name>" -consistent!(vat_2, r"<name>(.*?)</name>"); - -// vat-0.1.0: r"<address>(?s)(.*?)(?-s)</address>" -consistent!(vat_3, r"<address>(?s)(.*?)(?-s)</address>"); - -// vat-0.1.0: r"<valid>(true|false)</valid>" -consistent!(vat_4, r"<valid>(true|false)</valid>"); - -// vat-0.1.0: r"^ATU\d{8}$" -consistent!(vat_5, r"^ATU\d{8}$"); - -// vat-0.1.0: r"^BE0?\d{9, 10}$" -consistent!(vat_6, r"^BE0?\d{9, 10}$"); - -// vat-0.1.0: r"^BG\d{9,10}$" -consistent!(vat_7, r"^BG\d{9,10}$"); - -// vat-0.1.0: r"^HR\d{11}$" -consistent!(vat_8, r"^HR\d{11}$"); - -// vat-0.1.0: r"^CY\d{8}[A-Z]$" -consistent!(vat_9, r"^CY\d{8}[A-Z]$"); - -// vat-0.1.0: r"^CZ\d{8,10}$" -consistent!(vat_10, r"^CZ\d{8,10}$"); - -// vat-0.1.0: r"^DK\d{8}$" -consistent!(vat_11, r"^DK\d{8}$"); - -// vat-0.1.0: r"^EE\d{9}$" -consistent!(vat_12, r"^EE\d{9}$"); - -// vat-0.1.0: r"^FI\d{8}$" -consistent!(vat_13, r"^FI\d{8}$"); - -// vat-0.1.0: r"^FR[A-HJ-NP-Z0-9][A-HJ-NP-Z0-9]\d{9}$" -consistent!(vat_14, r"^FR[A-HJ-NP-Z0-9][A-HJ-NP-Z0-9]\d{9}$"); - -// vat-0.1.0: r"^DE\d{9}$" -consistent!(vat_15, r"^DE\d{9}$"); - -// vat-0.1.0: r"^EL\d{9}$" -consistent!(vat_16, r"^EL\d{9}$"); - -// vat-0.1.0: r"^HU\d{8}$" -consistent!(vat_17, r"^HU\d{8}$"); - -// vat-0.1.0: r"^IE\d[A-Z0-9\+\*]\d{5}[A-Z]{1,2}$" -consistent!(vat_18, r"^IE\d[A-Z0-9\+\*]\d{5}[A-Z]{1,2}$"); - -// vat-0.1.0: r"^IT\d{11}$" -consistent!(vat_19, r"^IT\d{11}$"); - -// vat-0.1.0: r"^LV\d{11}$" -consistent!(vat_20, r"^LV\d{11}$"); - -// vat-0.1.0: r"^LT(\d{9}|\d{12})$" -consistent!(vat_21, r"^LT(\d{9}|\d{12})$"); - -// vat-0.1.0: r"^LU\d{8}$" -consistent!(vat_22, r"^LU\d{8}$"); - -// vat-0.1.0: r"^MT\d{8}$" -consistent!(vat_23, r"^MT\d{8}$"); - -// vat-0.1.0: r"^NL\d{9}B\d{2}$" -consistent!(vat_24, r"^NL\d{9}B\d{2}$"); - -// vat-0.1.0: r"^PL\d{10}$" -consistent!(vat_25, r"^PL\d{10}$"); - -// vat-0.1.0: r"^PT\d{9}$" -consistent!(vat_26, r"^PT\d{9}$"); - -// vat-0.1.0: r"^RO\d{2,10}$" -consistent!(vat_27, r"^RO\d{2,10}$"); - -// vat-0.1.0: r"^SK\d{10}$" -consistent!(vat_28, r"^SK\d{10}$"); - -// vat-0.1.0: r"^SI\d{8}$" -consistent!(vat_29, r"^SI\d{8}$"); - -// vat-0.1.0: r"^ES[A-Z0-9]\d{7}[A-Z0-9]$" -consistent!(vat_30, r"^ES[A-Z0-9]\d{7}[A-Z0-9]$"); - -// vat-0.1.0: r"^SE\d{10}01$" -consistent!(vat_31, r"^SE\d{10}01$"); - -// vat-0.1.0: r"^(GB(GD|HA)\d{3}|GB\d{9}|GB\d{12})$" -consistent!(vat_32, r"^(GB(GD|HA)\d{3}|GB\d{9}|GB\d{12})$"); - -// eve-0.1.1: r"\{\{(.*)\}\}" -consistent!(eve_0, r"\{\{(.*)\}\}"); - -// egc-0.1.2: "^mio" -consistent!(egc_0, "^mio"); - -// pew-0.2.3: "" -consistent!(pew_0, ""); - -// pew-0.2.3: "" -consistent!(pew_1, ""); - -// mob-0.4.3: "y" -consistent!(mob_0, "y"); - -// lit-0.2.8: "@([a-z]+)" -consistent!(lit_0, "@([a-z]+)"); - -// lit-0.2.8: "([A-Z-]+):(.*)" -consistent!(lit_1, "([A-Z-]+):(.*)"); - -// lit-0.2.8: "^[a-zA-Z_][a-zA-Z0-9_]*$" -consistent!(lit_2, "^[a-zA-Z_][a-zA-Z0-9_]*$"); - -// avm-1.0.1: r"\d+\.\d+\.\d+" -consistent!(avm_0, r"\d+\.\d+\.\d+"); - -// avm-1.0.1: r"\d+\.\d+\.\d+" -consistent!(avm_1, r"\d+\.\d+\.\d+"); - -// orm-0.2.0: r"^Vec<(.+)>$" -consistent!(orm_0, r"^Vec<(.+)>$"); - -// sgf-0.1.5: r"\\(\r\n|\n\r|\n|\r)" -consistent!(sgf_0, r"\\(\r\n|\n\r|\n|\r)"); - -// sgf-0.1.5: r"\\(.)" -consistent!(sgf_1, r"\\(.)"); - -// sgf-0.1.5: r"\r\n|\n\r|\n|\r" -consistent!(sgf_2, r"\r\n|\n\r|\n|\r"); - -// sgf-0.1.5: r"([\]\\:])" -consistent!(sgf_3, r"([\]\\:])"); - -// dok-0.2.0: "^Bearer realm=\"(.+?)\",service=\"(.+?)\",scope=\"(.+?)\"$" -consistent!( - dok_0, - "^Bearer realm=\"(.+?)\",service=\"(.+?)\",scope=\"(.+?)\"$" -); - -// d20-0.1.0: r"([+-]?\s*\d+[dD]\d+|[+-]?\s*\d+)" -consistent!(d20_0, r"([+-]?\s*\d+[dD]\d+|[+-]?\s*\d+)"); - -// dvb-0.3.0: "E" -consistent!(dvb_0, "E"); - -// dvb-0.3.0: "^F" -consistent!(dvb_1, "^F"); - -// dvb-0.3.0: "^S" -consistent!(dvb_2, "^S"); - -// ger-0.2.0: r"Change-Id: (I[a-f0-9]{40})$" -consistent!(ger_0, r"Change-Id: (I[a-f0-9]{40})$"); - -// ger-0.2.0: r"(refs|ref|fix|fixes|close|closes)\s+([A-Z]{2,5}-[0-9]{1,5})$" -consistent!( - ger_1, - r"(refs|ref|fix|fixes|close|closes)\s+([A-Z]{2,5}-[0-9]{1,5})$" -); - -// n5-0.2.1: r"(\d+)(\.(\d+))?(\.(\d+))?(.*)" -consistent!(n5_0, r"(\d+)(\.(\d+))?(\.(\d+))?(.*)"); - -// po-0.1.4: r"[A-Za-z0-9]" -consistent!(po_0, r"[A-Za-z0-9]"); - -// carnix-0.8.5: "path is (‘|')?([^’'\n]*)(’|')?" -consistent!(carnix_0, "path is (‘|')?([^’'\n]*)(’|')?"); - -// carnix-0.8.5: r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?(.*)?" -consistent!(carnix_1, r"^(\S*) (\d*)\.(\d*)\.(\d*)(-(\S*))?(.*)?"); - -// carnix-0.8.5: r"(\d*)\.(\d*)\.(\d*)(-(\S*))?" -consistent!(carnix_2, r"(\d*)\.(\d*)\.(\d*)(-(\S*))?"); - -// carnix-0.8.5: r"(\S*)-(\d*)\.(\d*)\.(\d*)(-(\S*))?" -consistent!(carnix_3, r"(\S*)-(\d*)\.(\d*)\.(\d*)(-(\S*))?"); - -// caseless-0.2.1: r"^# CaseFolding-(\d+)\.(\d+)\.(\d+).txt$" -consistent!(caseless_0, r"^# CaseFolding-(\d+)\.(\d+)\.(\d+).txt$"); - -// caseless-0.2.1: r"^([0-9A-F]+); [CF]; ([0-9A-F ]+);" -consistent!(caseless_1, r"^([0-9A-F]+); [CF]; ([0-9A-F ]+);"); - -// cabot-0.2.0: "\r?\n\r?\n" -consistent!(cabot_0, "\r?\n\r?\n"); - -// cabot-0.2.0: "\r?\n" -consistent!(cabot_1, "\r?\n"); - -// card-validate-2.2.1: r"^600" -consistent!(card_validate_0, r"^600"); - -// card-validate-2.2.1: r"^5019" -consistent!(card_validate_1, r"^5019"); - -// card-validate-2.2.1: r"^4" -consistent!(card_validate_2, r"^4"); - -// card-validate-2.2.1: r"^(5[1-5]|2[2-7])" -consistent!(card_validate_3, r"^(5[1-5]|2[2-7])"); - -// card-validate-2.2.1: r"^3[47]" -consistent!(card_validate_4, r"^3[47]"); - -// card-validate-2.2.1: r"^3[0689]" -consistent!(card_validate_5, r"^3[0689]"); - -// card-validate-2.2.1: r"^6([045]|22)" -consistent!(card_validate_6, r"^6([045]|22)"); - -// card-validate-2.2.1: r"^(62|88)" -consistent!(card_validate_7, r"^(62|88)"); - -// card-validate-2.2.1: r"^35" -consistent!(card_validate_8, r"^35"); - -// card-validate-2.2.1: r"^[0-9]+$" -consistent!(card_validate_9, r"^[0-9]+$"); - -// cargo-testify-0.3.0: r"\d{1,} passed.*filtered out" -consistent!(cargo_testify_0, r"\d{1,} passed.*filtered out"); - -// cargo-testify-0.3.0: r"error(:|\[).*" -consistent!(cargo_testify_1, r"error(:|\[).*"); - -// cargo-wix-0.0.5: r"<(.*?)>" -consistent!(cargo_wix_0, r"<(.*?)>"); - -// cargo-wix-0.0.5: r"<(.*?)>" -consistent!(cargo_wix_1, r"<(.*?)>"); - -// cargo-wix-0.0.5: r"<(.*?)>" -consistent!(cargo_wix_2, r"<(.*?)>"); - -// cargo-wix-0.0.5: r"<(.*?)>" -consistent!(cargo_wix_3, r"<(.*?)>"); - -// cargo-incremental-0.1.23: r"(?m)^incremental: re-using (\d+) out of (\d+) modules$" -consistent!( - cargo_incremental_0, - r"(?m)^incremental: re-using (\d+) out of (\d+) modules$" -); - -// cargo-incremental-0.1.23: "(?m)(warning|error): (.*)\n --> ([^:]:\\d+:\\d+)$" -consistent!( - cargo_incremental_1, - "(?m)(warning|error): (.*)\n --> ([^:]:\\d+:\\d+)$" -); - -// cargo-incremental-0.1.23: r"(?m)^test (.*) \.\.\. (\w+)" -consistent!(cargo_incremental_2, r"(?m)^test (.*) \.\.\. (\w+)"); - -// cargo-incremental-0.1.23: r"(?m)(\d+) passed; (\d+) failed; (\d+) ignored; \d+ measured" -consistent!( - cargo_incremental_3, - r"(?m)(\d+) passed; (\d+) failed; (\d+) ignored; \d+ measured" -); - -// cargo-testjs-0.1.2: r"^[^-]+-[0-9a-f]+\.js$" -consistent!(cargo_testjs_0, r"^[^-]+-[0-9a-f]+\.js$"); - -// cargo-tarpaulin-0.6.2: r"\s*//" -consistent!(cargo_tarpaulin_0, r"\s*//"); - -// cargo-tarpaulin-0.6.2: r"/\*" -consistent!(cargo_tarpaulin_1, r"/\*"); - -// cargo-tarpaulin-0.6.2: r"\*/" -consistent!(cargo_tarpaulin_2, r"\*/"); - -// cargo-culture-kit-0.1.0: r"^fo" -consistent!(cargo_culture_kit_0, r"^fo"); - -// cargo-screeps-0.1.3: "\\s+" -consistent!(cargo_screeps_0, "\\s+"); - -// cargo-brew-0.1.4: r"`(\S+) v([0-9.]+)" -consistent!(cargo_brew_0, r"`(\S+) v([0-9.]+)"); - -// cargo-release-0.10.2: "^\\[.+\\]" -consistent!(cargo_release_0, "^\\[.+\\]"); - -// cargo-release-0.10.2: "^\\[\\[.+\\]\\]" -consistent!(cargo_release_1, "^\\[\\[.+\\]\\]"); - -// cargo-edit-0.3.0-beta.1: r"^https://github.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$" -consistent!( - cargo_edit_0, - r"^https://github.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$" -); - -// cargo-edit-0.3.0-beta.1: r"^https://gitlab.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$" -consistent!( - cargo_edit_1, - r"^https://gitlab.com/([-_0-9a-zA-Z]+)/([-_0-9a-zA-Z]+)(/|.git)?$" -); - -// cargo-disassemble-0.1.1: ".*" -consistent!(cargo_disassemble_0, ".*"); - -// cargo-demangle-0.1.2: r"(?m)(?P<symbol>_ZN[0-9]+.*E)" -consistent!(cargo_demangle_0, r"(?m)(?P<symbol>_ZN[0-9]+.*E)"); - -// cargo-coverage-annotations-0.1.5: r"^\s*\}(?:\)*;?|\s*else\s*\{)$" -consistent!(cargo_coverage_annotations_0, r"^\s*\}(?:\)*;?|\s*else\s*\{)$"); - -// cargo-urlcrate-1.0.1: "[\u{001b}\u{009b}][\\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><]" -consistent!(cargo_urlcrate_0, "[\u{001b}\u{009b}][\\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><]"); - -// cargo-script-0.2.8: r"^\s*\*( |$)" -consistent!(cargo_script_0, r"^\s*\*( |$)"); - -// cargo-script-0.2.8: r"^(\s+)" -consistent!(cargo_script_1, r"^(\s+)"); - -// cargo-script-0.2.8: r"/\*|\*/" -consistent!(cargo_script_2, r"/\*|\*/"); - -// cargo-script-0.2.8: r"^\s*//!" -consistent!(cargo_script_3, r"^\s*//!"); - -// cargo-script-0.2.8: r"^#![^\[].*?(\r\n|\n)" -consistent!(cargo_script_4, r"^#![^\[].*?(\r\n|\n)"); - -// cargo-update-1.5.2: r"cargo-install-update\.exe-v.+" -consistent!(cargo_update_0, r"cargo-install-update\.exe-v.+"); - -// canteen-0.4.1: r"^<(?:(int|uint|str|float|path):)?([\w_][a-zA-Z0-9_]*)>$" -consistent!( - canteen_0, - r"^<(?:(int|uint|str|float|path):)?([\w_][a-zA-Z0-9_]*)>$" -); - -// thruster-cli-0.1.3: r"(.)([A-Z])" -consistent!(thruster_cli_0, r"(.)([A-Z])"); - -// thieves-cant-0.1.0: "([Z]+)$" -consistent!(thieves_cant_0, "([Z]+)$"); - -// codeowners-0.1.3: r"^@\S+/\S+" -consistent!(codeowners_0, r"^@\S+/\S+"); - -// codeowners-0.1.3: r"^@\S+" -consistent!(codeowners_1, r"^@\S+"); - -// codeowners-0.1.3: r"^\S+@\S+" -consistent!(codeowners_2, r"^\S+@\S+"); - -// conserve-0.4.2: r"^b0000 {21} complete 20[-0-9T:+]+\s +\d+s\n$" -consistent!(conserve_0, r"^b0000 {21} complete 20[-0-9T:+]+\s +\d+s\n$"); - -// commodore-0.3.0: r"(?P<greeting>\S+?) (?P<name>\S+?)$" -consistent!(commodore_0, r"(?P<greeting>\S+?) (?P<name>\S+?)$"); - -// corollary-0.3.0: r"([ \t]*)```haskell([\s\S]*?)```" -consistent!(corollary_0, r"([ \t]*)```haskell([\s\S]*?)```"); - -// corollary-0.3.0: r"\b((?:a|b|t)\d*)\b" -consistent!(corollary_1, r"\b((?:a|b|t)\d*)\b"); - -// colorizex-0.1.3: "NB" -consistent!(colorizex_0, "NB"); - -// colorstring-0.0.1: r"(?i)\[[a-z0-9_-]+\]" -consistent!(colorstring_0, r"(?i)\[[a-z0-9_-]+\]"); - -// colorstring-0.0.1: r"^(?i)(\[[a-z0-9_-]+\])+" -consistent!(colorstring_1, r"^(?i)(\[[a-z0-9_-]+\])+"); - -// cosmogony-0.3.0: "name:(.+)" -consistent!(cosmogony_0, "name:(.+)"); - -// cobalt-bin-0.12.1: r"(?m:^ {0,3}\[[^\]]+\]:.+$)" -consistent!(cobalt_bin_0, r"(?m:^ {0,3}\[[^\]]+\]:.+$)"); - -// comrak-0.2.12: r"[^\p{L}\p{M}\p{N}\p{Pc} -]" -consistent!(comrak_0, r"[^\p{L}\p{M}\p{N}\p{Pc} -]"); - -// content-blocker-0.2.3: "" -consistent!(content_blocker_0, ""); - -// content-blocker-0.2.3: "(?i)hi" -consistent!(content_blocker_1, "(?i)hi"); - -// content-blocker-0.2.3: "http[s]?://domain.org" -consistent!(content_blocker_2, "http[s]?://domain.org"); - -// content-blocker-0.2.3: "(?i)http[s]?://domain.org" -consistent!(content_blocker_3, "(?i)http[s]?://domain.org"); - -// content-blocker-0.2.3: "http://domain.org" -consistent!(content_blocker_4, "http://domain.org"); - -// content-blocker-0.2.3: "http://domain.org" -consistent!(content_blocker_5, "http://domain.org"); - -// content-blocker-0.2.3: "ad.html" -consistent!(content_blocker_6, "ad.html"); - -// content-blocker-0.2.3: "ad.html" -consistent!(content_blocker_7, "ad.html"); - -// content-blocker-0.2.3: "http://domain.org" -consistent!(content_blocker_8, "http://domain.org"); - -// content-blocker-0.2.3: "http://domain.org/nocookies.sjs" -consistent!(content_blocker_9, "http://domain.org/nocookies.sjs"); - -// content-blocker-0.2.3: "http://domain.org/nocookies.sjs" -consistent!(content_blocker_10, "http://domain.org/nocookies.sjs"); - -// content-blocker-0.2.3: "http://domain.org/hideme.jpg" -consistent!(content_blocker_11, "http://domain.org/hideme.jpg"); - -// content-blocker-0.2.3: "http://domain.org/ok.html" -consistent!(content_blocker_12, "http://domain.org/ok.html"); - -// content-blocker-0.2.3: "http://domain.org/ok.html\\?except_this=1" -consistent!(content_blocker_13, "http://domain.org/ok.html\\?except_this=1"); - -// victoria-dom-0.1.2: "[A-Za-z0-9=]" -consistent!(victoria_dom_0, "[A-Za-z0-9=]"); - -// numbat-1.0.0: r"^nsq://" -consistent!(numbat_0, r"^nsq://"); - -// airkorea-0.1.2: r"[\s\t\r\n]" -consistent!(airkorea_0, r"[\s\t\r\n]"); - -// airkorea-0.1.2: r"([\{\[,])|([\}\]])" -consistent!(airkorea_1, r"([\{\[,])|([\}\]])"); - -// airkorea-0.1.2: r"[^.\d]+$" -consistent!(airkorea_2, r"[^.\d]+$"); - -// rofl-0.0.1: r"\b" -// consistent!(rofl_0, r"\b"); - -// rogcat-0.2.15: r"--------- beginning of.*" -consistent!(rogcat_0, r"--------- beginning of.*"); - -// rogcat-0.2.15: r"a|e|i|o|u" -consistent!(rogcat_1, r"a|e|i|o|u"); - -// rogcat-0.2.15: r"^(\d+)([kMG])$" -consistent!(rogcat_2, r"^(\d+)([kMG])$"); - -// media_filename-0.1.4: "\\.([A-Za-z0-9]{2,4})$" -consistent!(media_filename_0, "\\.([A-Za-z0-9]{2,4})$"); - -// media_filename-0.1.4: "([0-9]{3,4}p|[0-9]{3,4}x[0-9]{3,4})" -consistent!(media_filename_1, "([0-9]{3,4}p|[0-9]{3,4}x[0-9]{3,4})"); - -// media_filename-0.1.4: "(?:^\\[([^]]+)\\]|- ?([^-]+)$)" -consistent!(media_filename_2, "(?:^\\[([^]]+)\\]|- ?([^-]+)$)"); - -// media_filename-0.1.4: "(?:[eE]([0-9]{2,3})|[^0-9A-Za-z]([0-9]{2,3})(?:v[0-9])?[^0-9A-Za-z])" -consistent!( - media_filename_3, - "(?:[eE]([0-9]{2,3})|[^0-9A-Za-z]([0-9]{2,3})(?:v[0-9])?[^0-9A-Za-z])" -); - -// media_filename-0.1.4: "[sS]([0-9]{1,2})" -consistent!(media_filename_4, "[sS]([0-9]{1,2})"); - -// media_filename-0.1.4: "((?i)(?:PPV.)?[HP]DTV|(?:HD)?CAM|BRRIP|[^a-z]TS[^a-z]|(?:PPV )?WEB.?DL(?: DVDRip)?|HDRip|DVDRip|CamRip|W[EB]BRip|BluRay|BD|DVD|DvDScr|hdtv)" -consistent!(media_filename_5, "((?i)(?:PPV.)?[HP]DTV|(?:HD)?CAM|BRRIP|[^a-z]TS[^a-z]|(?:PPV )?WEB.?DL(?: DVDRip)?|HDRip|DVDRip|CamRip|W[EB]BRip|BluRay|BD|DVD|DvDScr|hdtv)"); - -// media_filename-0.1.4: "((19[0-9]|20[01])[0-9])" -consistent!(media_filename_6, "((19[0-9]|20[01])[0-9])"); - -// media_filename-0.1.4: "((?i)xvid|x264|h\\.?264)" -consistent!(media_filename_7, "((?i)xvid|x264|h\\.?264)"); - -// media_filename-0.1.4: "((?i)MP3|DD5\\.?1|Dual[- ]Audio|LiNE|DTS|AAC(?:\\.?2\\.0)?|AC3(?:\\.5\\.1)?)" -consistent!(media_filename_8, "((?i)MP3|DD5\\.?1|Dual[- ]Audio|LiNE|DTS|AAC(?:\\.?2\\.0)?|AC3(?:\\.5\\.1)?)"); - -// media_filename-0.1.4: "\\[([0-9A-F]{8})\\]" -consistent!(media_filename_9, "\\[([0-9A-F]{8})\\]"); - -// termimage-0.3.2: r"(\d+)[xX](\d+)" -consistent!(termimage_0, r"(\d+)[xX](\d+)"); - -// teensy-0.1.0: r".*(\d{4}-\d{2}-\d{2}).*" -consistent!(teensy_0, r".*(\d{4}-\d{2}-\d{2}).*"); - -// telescreen-0.1.3: r"<@(.+)>" -consistent!(telescreen_0, r"<@(.+)>"); - -// tempus_fugit-0.4.4: r"^(\d+)" -consistent!(tempus_fugit_0, r"^(\d+)"); - -// fselect-0.4.1: "(\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)" -consistent!(fselect_0, "(\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)"); - -// fselect-0.4.1: "(%|_|\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)" -consistent!(fselect_1, "(%|_|\\?|\\.|\\*|\\[|\\]|\\(|\\)|\\^|\\$)"); - -// fs_eventbridge-0.1.0: r"^([A-Z]+)(?:\s(.+))?\s*" -consistent!(fs_eventbridge_0, r"^([A-Z]+)(?:\s(.+))?\s*"); - -// joseki-0.0.1: r"(\w{1,2})\[(.+?)\]" -consistent!(joseki_0, r"(\w{1,2})\[(.+?)\]"); - -// tweetr-0.2.1: r"(?i)in (\d+) (second|minute|hour|day|week)s?" -consistent!(tweetr_0, r"(?i)in (\d+) (second|minute|hour|day|week)s?"); - -// bullet_core-0.1.1: "^(?u:[0-9])+" -consistent!(bullet_core_0, "^(?u:[0-9])+"); - -// bullet_core-0.1.1: "^(?u:[0-9])+(?u:\\.)(?u:[0-9])+" -consistent!(bullet_core_1, "^(?u:[0-9])+(?u:\\.)(?u:[0-9])+"); - -// bullet_core-0.1.1: "^(?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ff-stﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+" -consistent!(bullet_core_2, "^(?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ff-stﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+"); - -// bullet_core-0.1.1: "^(?u:d/d)((?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ff-stﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+)" -consistent!(bullet_core_3, "^(?u:d/d)((?u:[A-Za-zª-ªµ-µº-ºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬ-ˬˮ-ˮͰ-ʹͶ-ͷͺ-ͽͿ-ͿΆ-ΆΈ-ΊΌ-ΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-Ֆՙ-ՙա-ևא-תװ-ײؠ-يٮ-ٯٱ-ۓە-ەۥ-ۦۮ-ۯۺ-ۼۿ-ۿܐ-ܐܒ-ܯݍ-ޥޱ-ޱߊ-ߪߴ-ߵߺ-ߺࠀ-ࠕࠚ-ࠚࠤ-ࠤࠨ-ࠨࡀ-ࡘࢠ-ࢴऄ-हऽ-ऽॐ-ॐक़-ॡॱ-ঀঅ-ঌএ-ঐও-নপ-রল-লশ-হঽ-ঽৎ-ৎড়-ঢ়য়-ৡৰ-ৱਅ-ਊਏ-ਐਓ-ਨਪ-ਰਲ-ਲ਼ਵ-ਸ਼ਸ-ਹਖ਼-ੜਫ਼-ਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલ-ળવ-હઽ-ઽૐ-ૐૠ-ૡૹ-ૹଅ-ଌଏ-ଐଓ-ନପ-ରଲ-ଳଵ-ହଽ-ଽଡ଼-ଢ଼ୟ-ୡୱ-ୱஃ-ஃஅ-ஊஎ-ஐஒ-கங-சஜ-ஜஞ-டண-தந-பம-ஹௐ-ௐఅ-ఌఎ-ఐఒ-నప-హఽ-ఽౘ-ౚౠ-ౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽ-ಽೞ-ೞೠ-ೡೱ-ೲഅ-ഌഎ-ഐഒ-ഺഽ-ഽൎ-ൎൟ-ൡൺ-ൿඅ-ඖක-නඳ-රල-ලව-ෆก-ะา-ำเ-ๆກ-ຂຄ-ຄງ-ຈຊ-ຊຍ-ຍດ-ທນ-ຟມ-ຣລ-ລວ-ວສ-ຫອ-ະາ-ຳຽ-ຽເ-ໄໆ-ໆໜ-ໟༀ-ༀཀ-ཇཉ-ཬྈ-ྌက-ဪဿ-ဿၐ-ၕၚ-ၝၡ-ၡၥ-ၦၮ-ၰၵ-ႁႎ-ႎႠ-ჅჇ-ჇჍ-Ⴭა-ჺჼ-ቈቊ-ቍቐ-ቖቘ-ቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀ-ዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏽᏸ-ᏽᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛱ-ᛸᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗ-ៗៜ-ៜᠠ-ᡷᢀ-ᢨᢪ-ᢪᢰ-ᣵᤀ-ᤞᥐ-ᥭᥰ-ᥴᦀ-ᦫᦰ-ᧉᨀ-ᨖᨠ-ᩔᪧ-ᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮ-ᮯᮺ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᳵ-ᳶᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙ-ὙὛ-ὛὝ-ὝὟ-ώᾀ-ᾴᾶ-ᾼι-ιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱ-ⁱⁿ-ⁿₐ-ₜℂ-ℂℇ-ℇℊ-ℓℕ-ℕℙ-ℝℤ-ℤΩ-Ωℨ-ℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎ-ⅎↃ-ↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⳲ-ⳳⴀ-ⴥⴧ-ⴧⴭ-ⴭⴰ-ⵧⵯ-ⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ-ⸯ々-〆〱-〵〻-〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆺㇰ-ㇿ㐀-䶵一-鿕ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪ-ꘫꙀ-ꙮꙿ-ꚝꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋ-ꞭꞰ-ꞷꟷ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻ-ꣻꣽ-ꣽꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏ-ꧏꧠ-ꧤꧦ-ꧯꧺ-ꧾꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺ-ꩺꩾ-ꪯꪱ-ꪱꪵ-ꪶꪹ-ꪽꫀ-ꫀꫂ-ꫂꫛ-ꫝꫠ-ꫪꫲ-ꫴꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꬰ-ꭚꭜ-ꭥꭰ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-舘並-龎ff-stﬓ-ﬗיִ-יִײַ-ﬨשׁ-זּטּ-לּמּ-מּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼-𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌟𐌰-𐍀𐍂-𐍉𐍐-𐍵𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐔀-𐔧𐔰-𐕣𐘀-𐜶𐝀-𐝕𐝠-𐝧𐠀-𐠅𐠈-𐠈𐠊-𐠵𐠷-𐠸𐠼-𐠼𐠿-𐡕𐡠-𐡶𐢀-𐢞𐣠-𐣲𐣴-𐣵𐤀-𐤕𐤠-𐤹𐦀-𐦷𐦾-𐦿𐨀-𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐪀-𐪜𐫀-𐫇𐫉-𐫤𐬀-𐬵𐭀-𐭕𐭠-𐭲𐮀-𐮑𐰀-𐱈𐲀-𐲲𐳀-𐳲𑀃-𑀷𑂃-𑂯𑃐-𑃨𑄃-𑄦𑅐-𑅲𑅶-𑅶𑆃-𑆲𑇁-𑇄𑇚-𑇚𑇜-𑇜𑈀-𑈑𑈓-𑈫𑊀-𑊆𑊈-𑊈𑊊-𑊍𑊏-𑊝𑊟-𑊨𑊰-𑋞𑌅-𑌌𑌏-𑌐𑌓-𑌨𑌪-𑌰𑌲-𑌳𑌵-𑌹𑌽-𑌽𑍐-𑍐𑍝-𑍡𑒀-𑒯𑓄-𑓅𑓇-𑓇𑖀-𑖮𑗘-𑗛𑘀-𑘯𑙄-𑙄𑚀-𑚪𑜀-𑜙𑢠-𑣟𑣿-𑣿𑫀-𑫸𒀀-𒎙𒒀-𒕃𓀀-𓐮𔐀-𔙆𖠀-𖨸𖩀-𖩞𖫐-𖫭𖬀-𖬯𖭀-𖭃𖭣-𖭷𖭽-𖮏𖼀-𖽄𖽐-𖽐𖾓-𖾟𛀀-𛀁𛰀-𛱪𛱰-𛱼𛲀-𛲈𛲐-𛲙𝐀-𝑔𝑖-𝒜𝒞-𝒟𝒢-𝒢𝒥-𝒦𝒩-𝒬𝒮-𝒹𝒻-𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆-𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𞠀-𞣄𞸀-𞸃𞸅-𞸟𞸡-𞸢𞸤-𞸤𞸧-𞸧𞸩-𞸲𞸴-𞸷𞸹-𞸹𞸻-𞸻𞹂-𞹂𞹇-𞹇𞹉-𞹉𞹋-𞹋𞹍-𞹏𞹑-𞹒𞹔-𞹔𞹗-𞹗𞹙-𞹙𞹛-𞹛𞹝-𞹝𞹟-𞹟𞹡-𞹢𞹤-𞹤𞹧-𞹪𞹬-𞹲𞹴-𞹷𞹹-𞹼𞹾-𞹾𞺀-𞺉𞺋-𞺛𞺡-𞺣𞺥-𞺩𞺫-𞺻𠀀-𪛖𪜀-𫜴𫝀-𫠝𫠠-𬺡丽-𪘀])+)"); - -// bullet_core-0.1.1: "^(?u:\\()" -consistent!(bullet_core_4, "^(?u:\\()"); - -// bullet_core-0.1.1: "^(?u:\\))" -consistent!(bullet_core_5, "^(?u:\\))"); - -// bullet_core-0.1.1: "^(?u:\\*)" -consistent!(bullet_core_6, "^(?u:\\*)"); - -// bullet_core-0.1.1: "^(?u:\\+)" -consistent!(bullet_core_7, "^(?u:\\+)"); - -// bullet_core-0.1.1: "^(?u:,)" -consistent!(bullet_core_8, "^(?u:,)"); - -// bullet_core-0.1.1: "^(?u:\\-)" -consistent!(bullet_core_9, "^(?u:\\-)"); - -// bullet_core-0.1.1: "^(?u:/)" -consistent!(bullet_core_10, "^(?u:/)"); - -// bullet_core-0.1.1: "^(?u:\\[)" -consistent!(bullet_core_11, "^(?u:\\[)"); - -// bullet_core-0.1.1: "^(?u:\\])" -consistent!(bullet_core_12, "^(?u:\\])"); - -// bullet_core-0.1.1: "^(?u:\\^)" -consistent!(bullet_core_13, "^(?u:\\^)"); - -// bullet_core-0.1.1: "^(?u:·)" -consistent!(bullet_core_14, "^(?u:·)"); - -// actix-web-0.6.13: "//+" -consistent!(actix_web_0, "//+"); - -// actix-web-0.6.13: "//+" -consistent!(actix_web_1, "//+"); - -// althea_kernel_interface-0.1.0: r"(\S*) .* (\S*) (REACHABLE|STALE|DELAY)" -consistent!( - althea_kernel_interface_0, - r"(\S*) .* (\S*) (REACHABLE|STALE|DELAY)" -); - -// althea_kernel_interface-0.1.0: r"-s (.*) --ip6-dst (.*)/.* bcnt = (.*)" -consistent!( - althea_kernel_interface_1, - r"-s (.*) --ip6-dst (.*)/.* bcnt = (.*)" -); - -// alcibiades-0.3.0: r"\buci(?:\s|$)" -consistent!(alcibiades_0, r"\buci(?:\s|$)"); - -// ruma-identifiers-0.11.0: r"\A[a-z0-9._=-]+\z" -consistent!(ruma_identifiers_0, r"\A[a-z0-9._=-]+\z"); - -// rusqbin-0.2.3: r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})$" -consistent!(rusqbin_0, r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})$"); - -// rusqbin-0.2.3: r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})/requests/?$" -consistent!(rusqbin_1, r"/rusqbins/((?i)[A-F0-9]{8}\-[A-F0-9]{4}\-4[A-F0-9]{3}\-[89AB][A-F0-9]{3}\-[A-F0-9]{12})/requests/?$"); - -// rust-install-0.0.4: r"^(nightly|beta|stable)(?:-(\d{4}-\d{2}-\d{2}))?$" -consistent!( - rust_install_0, - r"^(nightly|beta|stable)(?:-(\d{4}-\d{2}-\d{2}))?$" -); - -// rust_inbox-0.0.5: "^+(.*)\r\n" -consistent!(rust_inbox_0, "^+(.*)\r\n"); - -// rust_inbox-0.0.5: r"^\* CAPABILITY (.*)\r\n" -consistent!(rust_inbox_1, r"^\* CAPABILITY (.*)\r\n"); - -// rust_inbox-0.0.5: r"^([a-zA-Z0-9]+) (OK|NO|BAD)(.*)" -consistent!(rust_inbox_2, r"^([a-zA-Z0-9]+) (OK|NO|BAD)(.*)"); - -// rust_inbox-0.0.5: r"^\* (\d+) EXISTS\r\n" -consistent!(rust_inbox_3, r"^\* (\d+) EXISTS\r\n"); - -// rust_inbox-0.0.5: r"^\* (\d+) RECENT\r\n" -consistent!(rust_inbox_4, r"^\* (\d+) RECENT\r\n"); - -// rust_inbox-0.0.5: r"^\* FLAGS (.+)\r\n" -consistent!(rust_inbox_5, r"^\* FLAGS (.+)\r\n"); - -// rust_inbox-0.0.5: r"^\* OK \[UNSEEN (\d+)\](.*)\r\n" -consistent!(rust_inbox_6, r"^\* OK \[UNSEEN (\d+)\](.*)\r\n"); - -// rust_inbox-0.0.5: r"^\* OK \[UIDVALIDITY (\d+)\](.*)\r\n" -consistent!(rust_inbox_7, r"^\* OK \[UIDVALIDITY (\d+)\](.*)\r\n"); - -// rust_inbox-0.0.5: r"^\* OK \[UIDNEXT (\d+)\](.*)\r\n" -consistent!(rust_inbox_8, r"^\* OK \[UIDNEXT (\d+)\](.*)\r\n"); - -// rust_inbox-0.0.5: r"^\* OK \[PERMANENTFLAGS (.+)\](.*)\r\n" -consistent!(rust_inbox_9, r"^\* OK \[PERMANENTFLAGS (.+)\](.*)\r\n"); - -// rustml-0.0.7: r"^[a-z]+ (\d+)$" -consistent!(rustml_0, r"^[a-z]+ (\d+)$"); - -// rustml-0.0.7: r"^[a-z]+ (\d+)$" -consistent!(rustml_1, r"^[a-z]+ (\d+)$"); - -// rustml-0.0.7: r"^[a-z]+ (\d+)$" -consistent!(rustml_2, r"^[a-z]+ (\d+)$"); - -// rustfmt-0.10.0: r"([^\\](\\\\)*)\\[\n\r][[:space:]]*" -consistent!(rustfmt_0, r"([^\\](\\\\)*)\\[\n\r][[:space:]]*"); - -// rustfmt-core-0.4.0: r"(^\s*$)|(^\s*//\s*rustfmt-[^:]+:\s*\S+)" -consistent!(rustfmt_core_0, r"(^\s*$)|(^\s*//\s*rustfmt-[^:]+:\s*\S+)"); - -// rustfmt-core-0.4.0: r"^## `([^`]+)`" -consistent!(rustfmt_core_1, r"^## `([^`]+)`"); - -// rustfmt-core-0.4.0: r"([^\\](\\\\)*)\\[\n\r][[:space:]]*" -consistent!(rustfmt_core_2, r"([^\\](\\\\)*)\\[\n\r][[:space:]]*"); - -// rustfmt-core-0.4.0: r"\s;" -consistent!(rustfmt_core_3, r"\s;"); - -// rust-enum-derive-0.4.0: r"^(0x)?([:digit:]+)$" -consistent!(rust_enum_derive_0, r"^(0x)?([:digit:]+)$"); - -// rust-enum-derive-0.4.0: r"^([:digit:]+)[:space:]*<<[:space:]*([:digit:]+)$" -consistent!( - rust_enum_derive_1, - r"^([:digit:]+)[:space:]*<<[:space:]*([:digit:]+)$" -); - -// rust-enum-derive-0.4.0: r"^[:space:]*([[:alnum:]_]+)([:space:]*=[:space:]*([:graph:]+))?[:space:]*," -consistent!(rust_enum_derive_2, r"^[:space:]*([[:alnum:]_]+)([:space:]*=[:space:]*([:graph:]+))?[:space:]*,"); - -// rust-enum-derive-0.4.0: r"^#define[:space:]+([:graph:]+)[:space:]+([:graph:]+)" -consistent!( - rust_enum_derive_3, - r"^#define[:space:]+([:graph:]+)[:space:]+([:graph:]+)" -); - -// rustsourcebundler-0.2.0: r"^\s*pub mod (.+);$" -consistent!(rustsourcebundler_0, r"^\s*pub mod (.+);$"); - -// rustsourcebundler-0.2.0: r"^\s*pub mod (.+);$" -consistent!(rustsourcebundler_1, r"^\s*pub mod (.+);$"); - -// rustfmt-nightly-0.8.2: r"([^\\](\\\\)*)\\[\n\r][[:space:]]*" -consistent!(rustfmt_nightly_0, r"([^\\](\\\\)*)\\[\n\r][[:space:]]*"); - -// rustfmt-nightly-0.8.2: r"\s;" -consistent!(rustfmt_nightly_1, r"\s;"); - -// rustache-0.1.0: r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)" -consistent!(rustache_0, r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)"); - -// rustfilt-0.2.0: r"_ZN[\$\._[:alnum:]]*" -consistent!(rustfilt_0, r"_ZN[\$\._[:alnum:]]*"); - -// rustache-lists-0.1.2: r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)" -consistent!(rustache_lists_0, r"(?s)(.*?)([ \t\r\n]*)(\{\{(\{?\S?\s*?[\w\.\s]*.*?\s*?\}?)\}\})([ \t\r\n]*)"); - -// rural-0.7.3: "(.+)=(.+)" -consistent!(rural_0, "(.+)=(.+)"); - -// rural-0.7.3: "(.*):(.+)" -consistent!(rural_1, "(.*):(.+)"); - -// rural-0.7.3: "(.+):=(.+)" -consistent!(rural_2, "(.+):=(.+)"); - -// rural-0.7.3: "(.*)==(.+)" -consistent!(rural_3, "(.*)==(.+)"); - -// rusoto_credential-0.11.0: r"^\[([^\]]+)\]$" -consistent!(rusoto_credential_0, r"^\[([^\]]+)\]$"); - -// rumblebars-0.3.0: "([:blank:]*)$" -consistent!(rumblebars_0, "([:blank:]*)$"); - -// rumblebars-0.3.0: "(\r?\n)[:blank:]*(\\{\\{~?[#!/](?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z" -consistent!(rumblebars_1, "(\r?\n)[:blank:]*(\\{\\{~?[#!/](?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z"); - -// rumblebars-0.3.0: "(\r?\n[:blank:]*)(\\{\\{~?>(?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z" -consistent!( - rumblebars_2, - "(\r?\n[:blank:]*)(\\{\\{~?>(?:\\}?[^}])*\\}\\})[:blank:]*(:?\r?\n)?\\z" -); - -// rumblebars-0.3.0: "((?:[:blank:]|\r?\n)*)(\r?\n)[:blank:]*$" -consistent!(rumblebars_3, "((?:[:blank:]|\r?\n)*)(\r?\n)[:blank:]*$"); - -// rumblebars-0.3.0: "^([:blank:]*\r?\n)(.*)" -consistent!(rumblebars_4, "^([:blank:]*\r?\n)(.*)"); - -// diesel_cli-1.3.1: r"(?P<stamp>[\d-]*)_hello" -consistent!(diesel_cli_0, r"(?P<stamp>[\d-]*)_hello"); - -// dishub-0.1.1: r"(\d+)s" -consistent!(dishub_0, r"(\d+)s"); - -// spreadsheet_textconv-0.1.0: r"\n" -consistent!(spreadsheet_textconv_0, r"\n"); - -// spreadsheet_textconv-0.1.0: r"\r" -consistent!(spreadsheet_textconv_1, r"\r"); - -// spreadsheet_textconv-0.1.0: r"\t" -consistent!(spreadsheet_textconv_2, r"\t"); - -// split_aud-0.1.0: r"DELAY (-?\d+)ms" -consistent!(split_aud_0, r"DELAY (-?\d+)ms"); - -// split_aud-0.1.0: r"Trim\((\d+), ?(\d+)\)" -consistent!(split_aud_1, r"Trim\((\d+), ?(\d+)\)"); - -// spotrust-0.0.5: r"spotify:[a-z]+:[a-zA-Z0-9]+" -consistent!(spotrust_0, r"spotify:[a-z]+:[a-zA-Z0-9]+"); - -// spaceslugs-0.1.0: r"[^\x00-\x7F]" -consistent!(spaceslugs_0, r"[^\x00-\x7F]"); - -// spaceslugs-0.1.0: r"[']+" -consistent!(spaceslugs_1, r"[']+"); - -// spaceslugs-0.1.0: r"\W+" -consistent!(spaceslugs_2, r"\W+"); - -// spaceslugs-0.1.0: r"[ ]+" -consistent!(spaceslugs_3, r"[ ]+"); - -// space_email_api-0.1.1: "PHPSESSID=([0-9a-f]+)" -consistent!(space_email_api_0, "PHPSESSID=([0-9a-f]+)"); - -// lorikeet-0.7.0: "[^0-9.,]" -consistent!(lorikeet_0, "[^0-9.,]"); - -// claude-0.3.0: r"^(?:\b|(-)?)(\p{Currency_Symbol})?((?:(?:\d{1,3}[\.,])+\d{3})|\d+)(?:[\.,](\d{2}))?\b$" -consistent!(claude_0, r"^(?:\b|(-)?)(\p{Currency_Symbol})?((?:(?:\d{1,3}[\.,])+\d{3})|\d+)(?:[\.,](\d{2}))?\b$"); - -// clam-0.1.6: r"<%=\s*(.+?)\s*%>" -consistent!(clam_0, r"<%=\s*(.+?)\s*%>"); - -// classifier-0.0.3: r"(\s)" -consistent!(classifier_0, r"(\s)"); - -// click-0.3.2: r"(-----BEGIN .*-----\n)((?:(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)*\n)+)(-----END .*-----)" -consistent!(click_0, r"(-----BEGIN .*-----\n)((?:(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)*\n)+)(-----END .*-----)"); - -// click-0.3.2: r"-----BEGIN PRIVATE KEY-----" -consistent!(click_1, r"-----BEGIN PRIVATE KEY-----"); - -// ultrastar-txt-0.1.2: r"#([A-Z3a-z]*):(.*)" -consistent!(ultrastar_txt_0, r"#([A-Z3a-z]*):(.*)"); - -// ultrastar-txt-0.1.2: "^-\\s?(-?[0-9]+)\\s*$" -consistent!(ultrastar_txt_1, "^-\\s?(-?[0-9]+)\\s*$"); - -// ultrastar-txt-0.1.2: "^-\\s?(-?[0-9]+)\\s+(-?[0-9]+)" -consistent!(ultrastar_txt_2, "^-\\s?(-?[0-9]+)\\s+(-?[0-9]+)"); - -// ultrastar-txt-0.1.2: "^(.)\\s*(-?[0-9]+)\\s+(-?[0-9]+)\\s+(-?[0-9]+)\\s?(.*)" -consistent!( - ultrastar_txt_3, - "^(.)\\s*(-?[0-9]+)\\s+(-?[0-9]+)\\s+(-?[0-9]+)\\s?(.*)" -); - -// ultrastar-txt-0.1.2: "^P\\s?(-?[0-9]+)" -consistent!(ultrastar_txt_4, "^P\\s?(-?[0-9]+)"); - -// db-accelerate-2.0.0: r"^template\.add($|\..+$)" -consistent!(db_accelerate_0, r"^template\.add($|\..+$)"); - -// db-accelerate-2.0.0: r"^template\.sub($|\..+$)" -consistent!(db_accelerate_1, r"^template\.sub($|\..+$)"); - -// sterling-0.3.0: r"(\d+)([cegps])" -consistent!(sterling_0, r"(\d+)([cegps])"); - -// stache-0.2.0: r"[^\w]" -consistent!(stache_0, r"[^\w]"); - -// strukt-0.1.0: "\"([<>]?)([xcbB\\?hHiIlLqQfdspP]*)\"" -consistent!(strukt_0, "\"([<>]?)([xcbB\\?hHiIlLqQfdspP]*)\""); - -// steamid-ng-0.3.1: r"^STEAM_([0-4]):([0-1]):([0-9]{1,10})$" -consistent!(steamid_ng_0, r"^STEAM_([0-4]):([0-1]):([0-9]{1,10})$"); - -// steamid-ng-0.3.1: r"^\[([AGMPCgcLTIUai]):([0-4]):([0-9]{1,10})(:([0-9]+))?\]$" -consistent!( - steamid_ng_1, - r"^\[([AGMPCgcLTIUai]):([0-4]):([0-9]{1,10})(:([0-9]+))?\]$" -); - -// strscan-0.1.1: r"^\w+" -consistent!(strscan_0, r"^\w+"); - -// strscan-0.1.1: r"^\s+" -consistent!(strscan_1, r"^\s+"); - -// strscan-0.1.1: r"^\w+" -consistent!(strscan_2, r"^\w+"); - -// strscan-0.1.1: r"^\s+" -consistent!(strscan_3, r"^\s+"); - -// strscan-0.1.1: r"^(\w+)\s+" -consistent!(strscan_4, r"^(\w+)\s+"); - -// tk-carbon-0.2.0: r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$" -consistent!(tk_carbon_0, r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$"); - -// tk-carbon-0.2.0: r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$" -consistent!(tk_carbon_1, r"^([a-zA-Z0-9\.-]+)(?:\s+(\d+))$"); - -// evalrs-0.0.10: r"extern\s+crate\s+([a-z0-9_]+)\s*;(\s*//(.+))?" -consistent!(evalrs_0, r"extern\s+crate\s+([a-z0-9_]+)\s*;(\s*//(.+))?"); - -// evalrs-0.0.10: r"(?m)^# " -consistent!(evalrs_1, r"(?m)^# "); - -// evalrs-0.0.10: r"(?m)^\s*fn +main *\( *\)" -consistent!(evalrs_2, r"(?m)^\s*fn +main *\( *\)"); - -// evalrs-0.0.10: r"(extern\s+crate\s+[a-z0-9_]+\s*;)" -consistent!(evalrs_3, r"(extern\s+crate\s+[a-z0-9_]+\s*;)"); - -// gate_build-0.5.0: "(.*)_t([0-9]+)" -consistent!(gate_build_0, "(.*)_t([0-9]+)"); - -// rake-0.1.1: r"[^\P{P}-]|\s+-\s+" -consistent!(rake_0, r"[^\P{P}-]|\s+-\s+"); - -// rafy-0.2.1: r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*" -consistent!(rafy_0, r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*"); - -// raven-0.2.1: r"^(?P<protocol>.*?)://(?P<public_key>.*?):(?P<secret_key>.*?)@(?P<host>.*?)/(?P<path>.*/)?(?P<project_id>.*)$" -consistent!(raven_0, r"^(?P<protocol>.*?)://(?P<public_key>.*?):(?P<secret_key>.*?)@(?P<host>.*?)/(?P<path>.*/)?(?P<project_id>.*)$"); - -// rargs-0.2.0: r"\{[[:space:]]*[^{}]*[[:space:]]*\}" -consistent!(rargs_0, r"\{[[:space:]]*[^{}]*[[:space:]]*\}"); - -// rargs-0.2.0: r"^\{[[:space:]]*(?P<name>[[:word:]]*)[[:space:]]*\}$" -consistent!(rargs_1, r"^\{[[:space:]]*(?P<name>[[:word:]]*)[[:space:]]*\}$"); - -// rargs-0.2.0: r"^\{[[:space:]]*(?P<num>-?\d+)[[:space:]]*\}$" -consistent!(rargs_2, r"^\{[[:space:]]*(?P<num>-?\d+)[[:space:]]*\}$"); - -// rargs-0.2.0: r"^\{(?P<left>-?\d*)?\.\.(?P<right>-?\d*)?(?::(?P<sep>.*))?\}$" -consistent!( - rargs_3, - r"^\{(?P<left>-?\d*)?\.\.(?P<right>-?\d*)?(?::(?P<sep>.*))?\}$" -); - -// rargs-0.2.0: r"(.*?)[[:space:]]+|(.*?)$" -consistent!(rargs_4, r"(.*?)[[:space:]]+|(.*?)$"); - -// indradb-lib-0.15.0: r"[a-zA-Z0-9]{8}" -consistent!(indradb_lib_0, r"[a-zA-Z0-9]{8}"); - -// fungi-lang-0.1.50: r"::" -consistent!(fungi_lang_0, r"::"); - -// nickel-0.10.1: "/hello/(?P<name>[a-zA-Z]+)" -consistent!(nickel_0, "/hello/(?P<name>[a-zA-Z]+)"); - -// nickel-0.10.1: "/hello/(?P<name>[a-zA-Z]+)" -consistent!(nickel_1, "/hello/(?P<name>[a-zA-Z]+)"); - -// pact_verifier-0.4.0: r"\{(\w+)\}" -consistent!(pact_verifier_0, r"\{(\w+)\}"); - -// pact_matching-0.4.1: "application/.*json" -consistent!(pact_matching_0, "application/.*json"); - -// pact_matching-0.4.1: "application/json.*" -consistent!(pact_matching_1, "application/json.*"); - -// pact_matching-0.4.1: "application/.*xml" -consistent!(pact_matching_2, "application/.*xml"); - -// pangu-0.2.0: "([\"'\\(\\[\\{{<\u{201c}])(\\s*)(.+?)(\\s*)([\"'\\)\\]\\}}>\u{201d}])" -consistent!( - pangu_0, - "([\"'\\(\\[\\{{<\u{201c}])(\\s*)(.+?)(\\s*)([\"'\\)\\]\\}}>\u{201d}])" -); - -// pangu-0.2.0: "([\\(\\[\\{{<\u{201c}]+)(\\s*)(.+?)(\\s*)([\\)\\]\\}}>\u{201d}]+)" -consistent!( - pangu_1, - "([\\(\\[\\{{<\u{201c}]+)(\\s*)(.+?)(\\s*)([\\)\\]\\}}>\u{201d}]+)" -); - -// parser-haskell-0.2.0: r"\{-[\s\S]*?-\}" -consistent!(parser_haskell_0, r"\{-[\s\S]*?-\}"); - -// parser-haskell-0.2.0: r"(?m);+\s*$" -consistent!(parser_haskell_1, r"(?m);+\s*$"); - -// parser-haskell-0.2.0: r"(?m)^#(if|ifn?def|endif|else|include|elif).*" -consistent!(parser_haskell_2, r"(?m)^#(if|ifn?def|endif|else|include|elif).*"); - -// parser-haskell-0.2.0: r"'([^'\\]|\\[A-Z]{1,3}|\\.)'" -consistent!(parser_haskell_3, r"'([^'\\]|\\[A-Z]{1,3}|\\.)'"); - -// parser-haskell-0.2.0: r"forall\s+(.*?)\." -consistent!(parser_haskell_4, r"forall\s+(.*?)\."); - -// html2md-0.2.1: "\\s{2,}" -consistent!(html2md_0, "\\s{2,}"); - -// html2md-0.2.1: "\\n{2,}" -consistent!(html2md_1, "\\n{2,}"); - -// html2md-0.2.1: "(?m)(\\S) $" -consistent!(html2md_2, "(?m)(\\S) $"); - -// html2md-0.2.1: "(?m)^[-*] " -consistent!(html2md_3, "(?m)^[-*] "); - -// ovpnfile-0.1.2: r"#.*$" -consistent!(ovpnfile_0, r"#.*$"); - -// ovpnfile-0.1.2: r"^<(\S+)>" -consistent!(ovpnfile_1, r"^<(\S+)>"); - -// ovpnfile-0.1.2: r"^</(\S+)>" -consistent!(ovpnfile_2, r"^</(\S+)>"); - -// screenruster-saver-fractal-0.1.1: r"#([:xdigit:]{2})([:xdigit:]{2})([:xdigit:]{2})" -consistent!( - screenruster_saver_fractal_0, - r"#([:xdigit:]{2})([:xdigit:]{2})([:xdigit:]{2})" -); - -// scarlet-0.2.2: r"rgb\((?: *(\d{1,3}),)(?: *(\d{1,3}),)(?: *(\d{1,3}))\)" -consistent!( - scarlet_0, - r"rgb\((?: *(\d{1,3}),)(?: *(\d{1,3}),)(?: *(\d{1,3}))\)" -); - -// cpp_to_rust_generator-0.2.0: r"^([\w:]+)<(.+)>$" -consistent!(cpp_to_rust_generator_0, r"^([\w:]+)<(.+)>$"); - -// cpp_to_rust_generator-0.2.0: r"^type-parameter-(\d+)-(\d+)$" -consistent!(cpp_to_rust_generator_1, r"^type-parameter-(\d+)-(\d+)$"); - -// cpp_to_rust_generator-0.2.0: r"^([\w~]+)<[^<>]+>$" -consistent!(cpp_to_rust_generator_2, r"^([\w~]+)<[^<>]+>$"); - -// cpp_to_rust_generator-0.2.0: r"(signals|Q_SIGNALS)\s*:" -consistent!(cpp_to_rust_generator_3, r"(signals|Q_SIGNALS)\s*:"); - -// cpp_to_rust_generator-0.2.0: r"(slots|Q_SLOTS)\s*:" -consistent!(cpp_to_rust_generator_4, r"(slots|Q_SLOTS)\s*:"); - -// cpp_to_rust_generator-0.2.0: r"(public|protected|private)\s*:" -consistent!(cpp_to_rust_generator_5, r"(public|protected|private)\s*:"); - -// cpp_to_rust-0.5.3: r"^([\w:]+)<(.+)>$" -consistent!(cpp_to_rust_0, r"^([\w:]+)<(.+)>$"); - -// cpp_to_rust-0.5.3: r"^type-parameter-(\d+)-(\d+)$" -consistent!(cpp_to_rust_1, r"^type-parameter-(\d+)-(\d+)$"); - -// cpp_to_rust-0.5.3: r"^([\w~]+)<[^<>]+>$" -consistent!(cpp_to_rust_2, r"^([\w~]+)<[^<>]+>$"); - -// cpp_to_rust-0.5.3: r"(signals|Q_SIGNALS)\s*:" -consistent!(cpp_to_rust_3, r"(signals|Q_SIGNALS)\s*:"); - -// cpp_to_rust-0.5.3: r"(slots|Q_SLOTS)\s*:" -consistent!(cpp_to_rust_4, r"(slots|Q_SLOTS)\s*:"); - -// cpp_to_rust-0.5.3: r"(public|protected|private)\s*:" -consistent!(cpp_to_rust_5, r"(public|protected|private)\s*:"); - -// fritzbox_logs-0.2.0: "(\\d{2}\\.\\d{2}\\.\\d{2}) (\\d{2}:\\d{2}:\\d{2}) (.*)" -consistent!( - fritzbox_logs_0, - "(\\d{2}\\.\\d{2}\\.\\d{2}) (\\d{2}:\\d{2}:\\d{2}) (.*)" -); - -// fractal-matrix-api-3.29.0: r"mxc://(?P<server>[^/]+)/(?P<media>.+)" -consistent!(fractal_matrix_api_0, r"mxc://(?P<server>[^/]+)/(?P<media>.+)"); - -// smtp2go-0.1.4: r"^api-[a-zA-Z0-9]{32}$" -consistent!(smtp2go_0, r"^api-[a-zA-Z0-9]{32}$"); - -// pusher-0.3.1: r"^[-a-zA-Z0-9_=@,.;]+$" -consistent!(pusher_0, r"^[-a-zA-Z0-9_=@,.;]+$"); - -// pusher-0.3.1: r"\A\d+\.\d+\z" -consistent!(pusher_1, r"\A\d+\.\d+\z"); - -// bakervm-0.9.0: r"^\.(.+?) +?(.+)$" -consistent!(bakervm_0, r"^\.(.+?) +?(.+)$"); - -// bakervm-0.9.0: r"^\.([^\s]+)$" -consistent!(bakervm_1, r"^\.([^\s]+)$"); - -// bakervm-0.9.0: r"^include! +([^\s]+)$" -consistent!(bakervm_2, r"^include! +([^\s]+)$"); - -// bakervm-0.9.0: r"^@(\d+)$" -consistent!(bakervm_3, r"^@(\d+)$"); - -// bakervm-0.9.0: r"^true|false$" -consistent!(bakervm_4, r"^true|false$"); - -// bakervm-0.9.0: r"^(-?\d+)?\.[0-9]+$" -consistent!(bakervm_5, r"^(-?\d+)?\.[0-9]+$"); - -// bakervm-0.9.0: r"^(-?\d+)?$" -consistent!(bakervm_6, r"^(-?\d+)?$"); - -// bakervm-0.9.0: r"^#([0-9abcdefABCDEF]{6})$" -consistent!(bakervm_7, r"^#([0-9abcdefABCDEF]{6})$"); - -// bakervm-0.9.0: r"^'(.)'$" -consistent!(bakervm_8, r"^'(.)'$"); - -// bakervm-0.9.0: r"^\$vi\((\d+)\)$" -consistent!(bakervm_9, r"^\$vi\((\d+)\)$"); - -// bakervm-0.9.0: r"^\$key\((\d+)\)$" -consistent!(bakervm_10, r"^\$key\((\d+)\)$"); - -// banana-0.0.2: "(?P<type>[A-Z^']+) (?P<route>[^']+) HTTP/(?P<http>[^']+)" -consistent!( - banana_0, - "(?P<type>[A-Z^']+) (?P<route>[^']+) HTTP/(?P<http>[^']+)" -); - -// serial-key-2.0.0: r"[A-F0-9]{8}" -consistent!(serial_key_0, r"[A-F0-9]{8}"); - -// serde-hjson-0.8.1: "[\\\\\"\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]" -consistent!(serde_hjson_0, "[\\\\\"\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]"); - -// serde-hjson-0.8.1: "[\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]" -consistent!(serde_hjson_1, "[\x00-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]"); - -// serde-hjson-0.8.1: "'''|[\x00-\x09\x0b\x0c\x0e-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]" -consistent!(serde_hjson_2, "'''|[\x00-\x09\x0b\x0c\x0e-\x1f\x7f-\u{9f}\u{00ad}\u{0600}-\u{0604}\u{070f}\u{17b4}\u{17b5}\u{200c}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{fff0}-\u{ffff}]"); - -// serde-odbc-0.1.0: r"/todos/(?P<id>\d+)" -consistent!(serde_odbc_0, r"/todos/(?P<id>\d+)"); - -// sentry-0.6.0: r"^(?:_<)?([a-zA-Z0-9_]+?)(?:\.\.|::)" -consistent!(sentry_0, r"^(?:_<)?([a-zA-Z0-9_]+?)(?:\.\.|::)"); - -// sentiment-0.1.1: r"[^a-zA-Z0 -]+" -consistent!(sentiment_0, r"[^a-zA-Z0 -]+"); - -// sentiment-0.1.1: r" {2,}" -consistent!(sentiment_1, r" {2,}"); - -// verilog-0.0.1: r"(?m)//.*" -consistent!(verilog_0, r"(?m)//.*"); - -// verex-0.2.2: "(?P<robot>C3PO)" -consistent!(verex_0, "(?P<robot>C3PO)"); - -// handlebars-0.32.4: ">|<|\"|&" -consistent!(handlebars_0, ">|<|\"|&"); - -// haikunator-0.1.2: r"^\w+-\w+-[0123456789]{4}$" -consistent!(haikunator_0, r"^\w+-\w+-[0123456789]{4}$"); - -// haikunator-0.1.2: r"^\w+@\w+@[0123456789]{4}$" -consistent!(haikunator_1, r"^\w+@\w+@[0123456789]{4}$"); - -// haikunator-0.1.2: r"^\w+-\w+-[0123456789abcdef]{4}$" -consistent!(haikunator_2, r"^\w+-\w+-[0123456789abcdef]{4}$"); - -// haikunator-0.1.2: r"^\w+-\w+-[0123456789忠犬ハチ公]{10}$" -consistent!(haikunator_3, r"^\w+-\w+-[0123456789忠犬ハチ公]{10}$"); - -// haikunator-0.1.2: r"^\w+-\w+$" -consistent!(haikunator_4, r"^\w+-\w+$"); - -// haikunator-0.1.2: r"^\w+-\w+-[foo]{4}$" -consistent!(haikunator_5, r"^\w+-\w+-[foo]{4}$"); - -// haikunator-0.1.2: r"^\w+-\w+-[0123456789忠犬ハチ公]{5}$" -consistent!(haikunator_6, r"^\w+-\w+-[0123456789忠犬ハチ公]{5}$"); - -// bobbin-cli-0.8.3: r"(.*)" -consistent!(bobbin_cli_0, r"(.*)"); - -// bobbin-cli-0.8.3: r"rustc (.*)" -consistent!(bobbin_cli_1, r"rustc (.*)"); - -// bobbin-cli-0.8.3: r"cargo (.*)" -consistent!(bobbin_cli_2, r"cargo (.*)"); - -// bobbin-cli-0.8.3: r"xargo (.*)\n" -consistent!(bobbin_cli_3, r"xargo (.*)\n"); - -// bobbin-cli-0.8.3: r"Open On-Chip Debugger (.*)" -consistent!(bobbin_cli_4, r"Open On-Chip Debugger (.*)"); - -// bobbin-cli-0.8.3: r"arm-none-eabi-gcc \(GNU Tools for ARM Embedded Processors[^\)]*\) (.*)" -consistent!( - bobbin_cli_5, - r"arm-none-eabi-gcc \(GNU Tools for ARM Embedded Processors[^\)]*\) (.*)" -); - -// bobbin-cli-0.8.3: r"(?m).*\nBasic Open Source SAM-BA Application \(BOSSA\) Version (.*)\n" -consistent!( - bobbin_cli_6, - r"(?m).*\nBasic Open Source SAM-BA Application \(BOSSA\) Version (.*)\n" -); - -// bobbin-cli-0.8.3: r"(?m)SEGGER J-Link Commander (.*)\n" -consistent!(bobbin_cli_7, r"(?m)SEGGER J-Link Commander (.*)\n"); - -// bobbin-cli-0.8.3: r"(?m)Teensy Loader, Command Line, Version (.*)\n" -consistent!(bobbin_cli_8, r"(?m)Teensy Loader, Command Line, Version (.*)\n"); - -// bobbin-cli-0.8.3: r"dfu-util (.*)\n" -consistent!(bobbin_cli_9, r"dfu-util (.*)\n"); - -// borsholder-0.9.1: r"^/static/[\w.]+$" -consistent!(borsholder_0, r"^/static/[\w.]+$"); - -// borsholder-0.9.1: r"^/timeline/([0-9]+)$" -consistent!(borsholder_1, r"^/timeline/([0-9]+)$"); - -// fblog-1.0.1: "\u{001B}\\[[\\d;]*[^\\d;]" -consistent!(fblog_0, "\u{001B}\\[[\\d;]*[^\\d;]"); - -// fblog-1.0.1: "\u{001B}\\[[\\d;]*[^\\d;]" -consistent!(fblog_1, "\u{001B}\\[[\\d;]*[^\\d;]"); - -// toml-query-0.6.0: r"^\[\d+\]$" -consistent!(toml_query_0, r"^\[\d+\]$"); - -// todo-txt-1.1.0: r" (?P<key>[^\s]+):(?P<value>[^\s^/]+)" -consistent!(todo_txt_0, r" (?P<key>[^\s]+):(?P<value>[^\s^/]+)"); - -// findr-0.1.5: r"\band\b" -consistent!(findr_0, r"\band\b"); - -// findr-0.1.5: r"\bor\b" -consistent!(findr_1, r"\bor\b"); - -// findr-0.1.5: r"\bnot\b" -consistent!(findr_2, r"\bnot\b"); - -// file-sniffer-3.0.1: r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$" -consistent!(file_sniffer_0, r".*?\.(a|la|lo|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|S|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"); - -// file-sniffer-3.0.1: r".*?\.(stats|conf|h|cache.*|dat|pc|info)$" -consistent!(file_sniffer_1, r".*?\.(stats|conf|h|cache.*|dat|pc|info)$"); - -// file-sniffer-3.0.1: r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$" -consistent!(file_sniffer_2, r".*?\.(exe|a|la|o|ll|keter|bc|dyn_o|out|d|rlib|crate|min\.js|hi|dyn_hi|jsexe|webapp|js\.externs|ibc|toc|aux|fdb_latexmk|fls|egg-info|whl|js_a|js_hi|jld|ji|js_o|so.*|dump-.*|vmb|crx|orig|elmo|elmi|pyc|mod|p_hi|p_o|prof|tix)$"); - -// file-sniffer-3.0.1: r".*?\.(stats|conf|h|cache.*)$" -consistent!(file_sniffer_3, r".*?\.(stats|conf|h|cache.*)$"); - -// file-sniffer-3.0.1: r"(\.git|\.pijul|_darcs|\.hg)$" -consistent!(file_sniffer_4, r"(\.git|\.pijul|_darcs|\.hg)$"); - -// file_logger-0.1.0: "test" -consistent!(file_logger_0, "test"); - -// file_scanner-0.2.0: r"foo" -consistent!(file_scanner_0, r"foo"); - -// file_scanner-0.2.0: r"a+b" -consistent!(file_scanner_1, r"a+b"); - -// file_scanner-0.2.0: r"a[ab]*b" -consistent!(file_scanner_2, r"a[ab]*b"); - -// file_scanner-0.2.0: r"\s+" -consistent!(file_scanner_3, r"\s+"); - -// file_scanner-0.2.0: r"\s+" -consistent!(file_scanner_4, r"\s+"); - -// cellsplit-0.2.1: r"^\s*([^\s]+) %cellsplit<\d+>$" -consistent!(cellsplit_0, r"^\s*([^\s]+) %cellsplit<\d+>$"); - -// cellsplit-0.2.1: r"^\s*([^\s]+) %cellsplit<\d+>$" -consistent!(cellsplit_1, r"^\s*([^\s]+) %cellsplit<\d+>$"); - -// aterm-0.20.0: r"^[+\-]?[0-9]+" -consistent!(aterm_0, r"^[+\-]?[0-9]+"); - -// aterm-0.20.0: r"^[+\-]?[0-9]+\.[0-9]*([eE][+\-]?[0-9]+)?" -consistent!(aterm_1, r"^[+\-]?[0-9]+\.[0-9]*([eE][+\-]?[0-9]+)?"); - -// atarashii_imap-0.3.0: r"^[*] OK" -consistent!(atarashii_imap_0, r"^[*] OK"); - -// atarashii_imap-0.3.0: r"FLAGS\s\((.+)\)" -consistent!(atarashii_imap_1, r"FLAGS\s\((.+)\)"); - -// atarashii_imap-0.3.0: r"\[PERMANENTFLAGS\s\((.+)\)\]" -consistent!(atarashii_imap_2, r"\[PERMANENTFLAGS\s\((.+)\)\]"); - -// atarashii_imap-0.3.0: r"\[UIDVALIDITY\s(\d+)\]" -consistent!(atarashii_imap_3, r"\[UIDVALIDITY\s(\d+)\]"); - -// atarashii_imap-0.3.0: r"(\d+)\sEXISTS" -consistent!(atarashii_imap_4, r"(\d+)\sEXISTS"); - -// atarashii_imap-0.3.0: r"(\d+)\sRECENT" -consistent!(atarashii_imap_5, r"(\d+)\sRECENT"); - -// atarashii_imap-0.3.0: r"\[UNSEEN\s(\d+)\]" -consistent!(atarashii_imap_6, r"\[UNSEEN\s(\d+)\]"); - -// atarashii_imap-0.3.0: r"\[UIDNEXT\s(\d+)\]" -consistent!(atarashii_imap_7, r"\[UIDNEXT\s(\d+)\]"); - -// editorconfig-1.0.0: r"\\(\{|\})" -consistent!(editorconfig_0, r"\\(\{|\})"); - -// editorconfig-1.0.0: r"(^|[^\\])\\\|" -consistent!(editorconfig_1, r"(^|[^\\])\\\|"); - -// editorconfig-1.0.0: r"\[([^\]]*)$" -consistent!(editorconfig_2, r"\[([^\]]*)$"); - -// editorconfig-1.0.0: r"\[(.*/.*)\]" -consistent!(editorconfig_3, r"\[(.*/.*)\]"); - -// editorconfig-1.0.0: r"\{(-?\d+\\\.\\\.-?\d+)\}" -consistent!(editorconfig_4, r"\{(-?\d+\\\.\\\.-?\d+)\}"); - -// editorconfig-1.0.0: r"\{([^,]+)\}" -consistent!(editorconfig_5, r"\{([^,]+)\}"); - -// editorconfig-1.0.0: r"\{(([^\}].*)?(,|\|)(.*[^\\])?)\}" -consistent!(editorconfig_6, r"\{(([^\}].*)?(,|\|)(.*[^\\])?)\}"); - -// editorconfig-1.0.0: r"^/" -consistent!(editorconfig_7, r"^/"); - -// editorconfig-1.0.0: r"(^|[^\\])(\{|\})" -consistent!(editorconfig_8, r"(^|[^\\])(\{|\})"); - -// edmunge-1.0.0: "^#!.*\n" -consistent!(edmunge_0, "^#!.*\n"); - -// unicode_names2_macros-0.2.0: r"\\N\{(.*?)(?:\}|$)" -consistent!(unicode_names2_macros_0, r"\\N\{(.*?)(?:\}|$)"); - -// unidiff-0.2.1: r"^--- (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?" -consistent!( - unidiff_0, - r"^--- (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?" -); - -// unidiff-0.2.1: r"^\+\+\+ (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?" -consistent!( - unidiff_1, - r"^\+\+\+ (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?" -); - -// unidiff-0.2.1: r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)" -consistent!(unidiff_2, r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)"); - -// unidiff-0.2.1: r"^(?P<line_type>[- \n\+\\]?)(?P<value>.*)" -consistent!(unidiff_3, r"^(?P<line_type>[- \n\+\\]?)(?P<value>.*)"); - -// slippy-map-tiles-0.13.1: "/?(?P<zoom>[0-9]?[0-9])/(?P<x>[0-9]{1,10})/(?P<y>[0-9]{1,10})(\\.[a-zA-Z]{3,4})?$" -consistent!(slippy_map_tiles_0, "/?(?P<zoom>[0-9]?[0-9])/(?P<x>[0-9]{1,10})/(?P<y>[0-9]{1,10})(\\.[a-zA-Z]{3,4})?$"); - -// slippy-map-tiles-0.13.1: r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$" -consistent!(slippy_map_tiles_1, r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?) (?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$"); - -// slippy-map-tiles-0.13.1: r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$" -consistent!(slippy_map_tiles_2, r"^(?P<minlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<minlat>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlon>-?[0-9]{1,3}(\.[0-9]{1,10})?),(?P<maxlat>-?[0-9]{1,3}(\.[0-9]{1,10})?)$"); - -// sonos-0.1.2: r"^https?://(.+?):1400/xml" -consistent!(sonos_0, r"^https?://(.+?):1400/xml"); - -// validator_derive-0.7.0: r"^[a-z]{2}$" -consistent!(validator_derive_0, r"^[a-z]{2}$"); - -// validator_derive-0.7.0: r"[a-z]{2}" -consistent!(validator_derive_1, r"[a-z]{2}"); - -// validator_derive-0.7.0: r"[a-z]{2}" -consistent!(validator_derive_2, r"[a-z]{2}"); - -// nginx-config-0.8.0: r"one of \d+ options" -consistent!(nginx_config_0, r"one of \d+ options"); - -// waltz-0.4.0: r"[\s,]" -consistent!(waltz_0, r"[\s,]"); - -// warheadhateus-0.2.1: r"^aws_access_key_id = (.*)" -consistent!(warheadhateus_0, r"^aws_access_key_id = (.*)"); - -// warheadhateus-0.2.1: r"^aws_secret_access_key = (.*)" -consistent!(warheadhateus_1, r"^aws_secret_access_key = (.*)"); - -// warheadhateus-0.2.1: r"^aws_access_key_id = (.*)" -consistent!(warheadhateus_2, r"^aws_access_key_id = (.*)"); - -// warheadhateus-0.2.1: r"^aws_secret_access_key = (.*)" -consistent!(warheadhateus_3, r"^aws_secret_access_key = (.*)"); - -// jieba-rs-0.2.2: r"([\u{4E00}-\u{9FD5}a-zA-Z0-9+#&\._%]+)" -consistent!(jieba_rs_0, r"([\u{4E00}-\u{9FD5}a-zA-Z0-9+#&\._%]+)"); - -// jieba-rs-0.2.2: r"(\r\n|\s)" -consistent!(jieba_rs_1, r"(\r\n|\s)"); - -// jieba-rs-0.2.2: "([\u{4E00}-\u{9FD5}]+)" -consistent!(jieba_rs_2, "([\u{4E00}-\u{9FD5}]+)"); - -// jieba-rs-0.2.2: r"[^a-zA-Z0-9+#\n]" -consistent!(jieba_rs_3, r"[^a-zA-Z0-9+#\n]"); - -// jieba-rs-0.2.2: r"([\u{4E00}-\u{9FD5}]+)" -consistent!(jieba_rs_4, r"([\u{4E00}-\u{9FD5}]+)"); - -// jieba-rs-0.2.2: r"([a-zA-Z0-9]+(?:.\d+)?%?)" -consistent!(jieba_rs_5, r"([a-zA-Z0-9]+(?:.\d+)?%?)"); - -// lalrpop-0.15.2: r"Span\([0-9 ,]*\)" -consistent!(lalrpop_0, r"Span\([0-9 ,]*\)"); - -// lalrpop-snap-0.15.2: r"Span\([0-9 ,]*\)" -consistent!(lalrpop_snap_0, r"Span\([0-9 ,]*\)"); - -// nlp-tokenize-0.1.0: r"[\S]+" -consistent!(nlp_tokenize_0, r"[\S]+"); - -// kbgpg-0.1.2: "[[:xdigit:]][70]" -consistent!(kbgpg_0, "[[:xdigit:]][70]"); - -// cdbd-0.1.1: r"^((?P<address>.*):)?(?P<port>\d+)$" -consistent!(cdbd_0, r"^((?P<address>.*):)?(?P<port>\d+)$"); - -// mbutiles-0.1.1: r"[\w\s=+-/]+\((\{(.|\n)*\})\);?" -consistent!(mbutiles_0, r"[\w\s=+-/]+\((\{(.|\n)*\})\);?"); - -// extrahop-0.2.5: r"^-\d+(?:ms|s|m|h|d|w|y)?$" -consistent!(extrahop_0, r"^-\d+(?:ms|s|m|h|d|w|y)?$"); - -// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$" -consistent!(pippin_0, "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$"); - -// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$" -consistent!( - pippin_1, - "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$" -); - -// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$" -consistent!(pippin_2, "^((?:.*)-)?ss(0|[1-9][0-9]*)\\.pip$"); - -// pippin-0.1.0: "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$" -consistent!( - pippin_3, - "^((?:.*)-)?ss(0|[1-9][0-9]*)-cl(0|[1-9][0-9]*)\\.piplog$" -); - -// pippin-0.1.0: "^.*pn(0|[1-9][0-9]*)(-ss(0|[1-9][0-9]*)(\\.pip|-cl(0|[1-9][0-9]*)\\.piplog))?$" -consistent!(pippin_4, "^.*pn(0|[1-9][0-9]*)(-ss(0|[1-9][0-9]*)(\\.pip|-cl(0|[1-9][0-9]*)\\.piplog))?$"); - -// pippin-0.1.0: "^(.*)-ss(?:0|[1-9][0-9]*)(?:\\.pip|-cl(?:0|[1-9][0-9]*)\\.piplog)$" -consistent!( - pippin_5, - "^(.*)-ss(?:0|[1-9][0-9]*)(?:\\.pip|-cl(?:0|[1-9][0-9]*)\\.piplog)$" -); - -// pinyin-0.3.0: r"(?i)[āáǎàēéěèōóǒòīíǐìūúǔùüǘǚǜńň]" -consistent!( - pinyin_0, - r"(?i)[āáǎàēéěèōóǒòīíǐìūúǔùüǘǚǜńň]" -); - -// pinyin-0.3.0: r"([aeoiuvnm])([0-4])$" -consistent!(pinyin_1, r"([aeoiuvnm])([0-4])$"); - -// duration-parser-0.2.0: r"(?P<value>\d+)(?P<units>[a-z])" -consistent!(duration_parser_0, r"(?P<value>\d+)(?P<units>[a-z])"); - -// dutree-0.2.7: r"^\d+\D?$" -consistent!(dutree_0, r"^\d+\D?$"); - -// djangohashers-0.3.0: r"^[A-Za-z0-9]*$" -consistent!(djangohashers_0, r"^[A-Za-z0-9]*$"); - -// rtag-0.3.5: r"^[A-Z][A-Z0-9]{2,}$" -consistent!(rtag_0, r"^[A-Z][A-Z0-9]{2,}$"); - -// rtag-0.3.5: r"^http://www\.emusic\.com" -consistent!(rtag_1, r"^http://www\.emusic\.com"); - -// rtag-0.3.5: r"^[A-Z][A-Z0-9]{2,}" -consistent!(rtag_2, r"^[A-Z][A-Z0-9]{2,}"); - -// rtag-0.3.5: r"(^[\x{0}|\x{feff}|\x{fffe}]*|[\x{0}|\x{feff}|\x{fffe}]*$)" -consistent!( - rtag_3, - r"(^[\x{0}|\x{feff}|\x{fffe}]*|[\x{0}|\x{feff}|\x{fffe}]*$)" -); - -// rtow-0.1.0: r"(\d+)[xX](\d+)" -consistent!(rtow_0, r"(\d+)[xX](\d+)"); - -// pleingres-sql-plugin-0.1.0: r"\$([a-zA-Z0-9_]+)" -consistent!(pleingres_sql_plugin_0, r"\$([a-zA-Z0-9_]+)"); - -// dono-2.0.0: "[\\n]+" -consistent!(dono_0, "[\\n]+"); - -// dono-2.0.0: "(?m)^\\n" -consistent!(dono_1, "(?m)^\\n"); - -// dono-2.0.0: "(?m)^\\n" -consistent!(dono_2, "(?m)^\\n"); - -// ssb-common-0.3.0: r"^[0-9A-Za-z\+/]{43}=\.ed25519$" -consistent!(ssb_common_0, r"^[0-9A-Za-z\+/]{43}=\.ed25519$"); - -// ssb-common-0.3.0: r"^[0-9A-Za-z\+/]{86}==\.ed25519$" -consistent!(ssb_common_1, r"^[0-9A-Za-z\+/]{86}==\.ed25519$"); - -// ssb-common-0.3.0: r"^[0-9A-Za-z\+/]{43}=\.sha256$" -consistent!(ssb_common_2, r"^[0-9A-Za-z\+/]{43}=\.sha256$"); - -// mozversion-0.1.3: r"^(?P<major>\d+)\.(?P<minor>\d+)(?:\.(?P<patch>\d+))?(?:(?P<pre0>[a-z]+)(?P<pre1>\d*))?$" -consistent!(mozversion_0, r"^(?P<major>\d+)\.(?P<minor>\d+)(?:\.(?P<patch>\d+))?(?:(?P<pre0>[a-z]+)(?P<pre1>\d*))?$"); - -// monger-0.5.6: r"^(\d+)\.(\d+)$" -consistent!(monger_0, r"^(\d+)\.(\d+)$"); - -// mongo_rub-0.0.2: r"^[rv]2\.6" -consistent!(mongo_rub_0, r"^[rv]2\.6"); - -// flow-0.3.5: "body value" -consistent!(flow_0, "body value"); - -// flow-0.3.5: "start marker" -consistent!(flow_1, "start marker"); - -// flow-0.3.5: "end marker" -consistent!(flow_2, "end marker"); - -// flow-0.3.5: "body value" -consistent!(flow_3, "body value"); - -// vobsub-0.2.3: "^([A-Za-z/ ]+): (.*)" -consistent!(vobsub_0, "^([A-Za-z/ ]+): (.*)"); - -// voidmap-1.1.2: r"#([^\s=]+)*" -consistent!(voidmap_0, r"#([^\s=]+)*"); - -// voidmap-1.1.2: r"#(\S+)*" -consistent!(voidmap_1, r"#(\S+)*"); - -// voidmap-1.1.2: r"#prio=(\d+)" -consistent!(voidmap_2, r"#prio=(\d+)"); - -// voidmap-1.1.2: r"\[(\S+)\]" -consistent!(voidmap_3, r"\[(\S+)\]"); - -// voidmap-1.1.2: r"#limit=(\d+)" -consistent!(voidmap_4, r"#limit=(\d+)"); - -// voidmap-1.1.2: r"#tagged=(\S+)" -consistent!(voidmap_5, r"#tagged=(\S+)"); - -// voidmap-1.1.2: r"#rev\b" -consistent!(voidmap_6, r"#rev\b"); - -// voidmap-1.1.2: r"#done\b" -consistent!(voidmap_7, r"#done\b"); - -// voidmap-1.1.2: r"#open\b" -consistent!(voidmap_8, r"#open\b"); - -// voidmap-1.1.2: r"#since=(\S+)" -consistent!(voidmap_9, r"#since=(\S+)"); - -// voidmap-1.1.2: r"#until=(\S+)" -consistent!(voidmap_10, r"#until=(\S+)"); - -// voidmap-1.1.2: r"#plot=(\S+)" -consistent!(voidmap_11, r"#plot=(\S+)"); - -// voidmap-1.1.2: r"#n=(\d+)" -consistent!(voidmap_12, r"#n=(\d+)"); - -// voidmap-1.1.2: r"(\S+)" -consistent!(voidmap_13, r"(\S+)"); - -// voidmap-1.1.2: r"(?P<y>\d+)y" -consistent!(voidmap_14, r"(?P<y>\d+)y"); - -// voidmap-1.1.2: r"(?P<m>\d+)m" -consistent!(voidmap_15, r"(?P<m>\d+)m"); - -// voidmap-1.1.2: r"(?P<w>\d+)w" -consistent!(voidmap_16, r"(?P<w>\d+)w"); - -// voidmap-1.1.2: r"(?P<d>\d+)d" -consistent!(voidmap_17, r"(?P<d>\d+)d"); - -// voidmap-1.1.2: r"(?P<h>\d+)h" -consistent!(voidmap_18, r"(?P<h>\d+)h"); - -// voidmap-1.1.2: r"C-(.)" -consistent!(voidmap_19, r"C-(.)"); - -// qt_generator-0.2.0: r"^\.\./qt[^/]+/" -consistent!(qt_generator_0, r"^\.\./qt[^/]+/"); - -// qt_generator-0.2.0: "(href|src)=\"([^\"]*)\"" -consistent!(qt_generator_1, "(href|src)=\"([^\"]*)\""); - -// kryptos-0.6.1: r"[01]{5}" -consistent!(kryptos_0, r"[01]{5}"); - -// cifar_10_loader-0.2.0: "data_batch_[1-5].bin" -consistent!(cifar_10_loader_0, "data_batch_[1-5].bin"); - -// cifar_10_loader-0.2.0: "test_batch.bin" -consistent!(cifar_10_loader_1, "test_batch.bin"); - -// circadian-0.6.0: r"^\d+.\d+s$" -consistent!(circadian_0, r"^\d+.\d+s$"); - -// circadian-0.6.0: r"^\d+:\d+$" -consistent!(circadian_1, r"^\d+:\d+$"); - -// circadian-0.6.0: r"^\d+:\d+m$" -consistent!(circadian_2, r"^\d+:\d+m$"); - -// cicada-0.8.1: r"!!" -consistent!(cicada_0, r"!!"); - -// cicada-0.8.1: r"^([^`]*)`([^`]+)`(.*)$" -consistent!(cicada_1, r"^([^`]*)`([^`]+)`(.*)$"); - -// cicada-0.8.1: r"\*+" -consistent!(cicada_2, r"\*+"); - -// cicada-0.8.1: r"([^\$]*)\$\{?([A-Za-z0-9\?\$_]+)\}?(.*)" -consistent!(cicada_3, r"([^\$]*)\$\{?([A-Za-z0-9\?\$_]+)\}?(.*)"); - -// cicada-0.8.1: r"^ *alias +([a-zA-Z0-9_\.-]+)=(.*)$" -consistent!(cicada_4, r"^ *alias +([a-zA-Z0-9_\.-]+)=(.*)$"); - -// vterm-sys-0.1.0: r"hi" -consistent!(vterm_sys_0, r"hi"); - -// skim-0.5.0: r".*?\t" -consistent!(skim_0, r".*?\t"); - -// skim-0.5.0: r".*?[\t ]" -consistent!(skim_1, r".*?[\t ]"); - -// skim-0.5.0: r"(\{-?[0-9.,q]*?})" -consistent!(skim_2, r"(\{-?[0-9.,q]*?})"); - -// skim-0.5.0: r"[ \t\n]+" -consistent!(skim_3, r"[ \t\n]+"); - -// skim-0.5.0: r"[ \t\n]+" -consistent!(skim_4, r"[ \t\n]+"); - -// skim-0.5.0: r"([^ |]+( +\| +[^ |]*)+)|( +)" -consistent!(skim_5, r"([^ |]+( +\| +[^ |]*)+)|( +)"); - -// skim-0.5.0: r" +\| +" -consistent!(skim_6, r" +\| +"); - -// skim-0.5.0: r"^(?P<left>-?\d+)?(?P<sep>\.\.)?(?P<right>-?\d+)?$" -consistent!(skim_7, r"^(?P<left>-?\d+)?(?P<sep>\.\.)?(?P<right>-?\d+)?$"); - -// skim-0.5.0: "," -consistent!(skim_8, ","); - -// skim-0.5.0: ".*?," -consistent!(skim_9, ".*?,"); - -// skim-0.5.0: ".*?," -consistent!(skim_10, ".*?,"); - -// skim-0.5.0: "," -consistent!(skim_11, ","); - -// skim-0.5.0: r"\x1B\[(?:([0-9]+;[0-9]+[Hf])|([0-9]+[ABCD])|(s|u|2J|K)|([0-9;]*m)|(=[0-9]+[hI]))" -consistent!(skim_12, r"\x1B\[(?:([0-9]+;[0-9]+[Hf])|([0-9]+[ABCD])|(s|u|2J|K)|([0-9;]*m)|(=[0-9]+[hI]))"); - -// egg-mode-text-1.14.7: r"[-_./]\z" -consistent!(egg_mode_text_0, r"[-_./]\z"); - -// java-properties-1.1.1: "^[ \t\r\n\x0c]*[#!]" -consistent!(java_properties_0, "^[ \t\r\n\x0c]*[#!]"); - -// java-properties-1.1.1: r"^[ \t\x0c]*[#!][^\r\n]*$" -consistent!(java_properties_1, r"^[ \t\x0c]*[#!][^\r\n]*$"); - -// java-properties-1.1.1: r"^([ \t\x0c]*[:=][ \t\x0c]*|[ \t\x0c]+)$" -consistent!(java_properties_2, r"^([ \t\x0c]*[:=][ \t\x0c]*|[ \t\x0c]+)$"); - -// ipaddress-0.1.2: r":.+\." -consistent!(ipaddress_0, r":.+\."); - -// ipaddress-0.1.2: r"\." -consistent!(ipaddress_1, r"\."); - -// ipaddress-0.1.2: r":" -consistent!(ipaddress_2, r":"); - -// iptables-0.2.2: r"v(\d+)\.(\d+)\.(\d+)" -consistent!(iptables_0, r"v(\d+)\.(\d+)\.(\d+)"); - -// rsure-0.8.1: r"^([^-]+)-(.*)\.dat\.gz$" -consistent!(rsure_0, r"^([^-]+)-(.*)\.dat\.gz$"); - -// rs-jsonpath-0.1.0: "^(.*?)(<=|<|==|>=|>)(.*?)$" -consistent!(rs_jsonpath_0, "^(.*?)(<=|<|==|>=|>)(.*?)$"); - -// oatie-0.3.0: r"(\n|^)(\w+):([\n\w\W]+?)(\n(?:\w)|(\n\]))" -consistent!(oatie_0, r"(\n|^)(\w+):([\n\w\W]+?)(\n(?:\w)|(\n\]))"); - -// weld-0.2.0: "#.*$" -consistent!(weld_0, "#.*$"); - -// weld-0.2.0: r"^[A-Za-z$_][A-Za-z0-9$_]*$" -consistent!(weld_1, r"^[A-Za-z$_][A-Za-z0-9$_]*$"); - -// weld-0.2.0: r"^[0-9]+[cC]$" -consistent!(weld_2, r"^[0-9]+[cC]$"); - -// weld-0.2.0: r"^0b[0-1]+[cC]$" -consistent!(weld_3, r"^0b[0-1]+[cC]$"); - -// weld-0.2.0: r"^0x[0-9a-fA-F]+[cC]$" -consistent!(weld_4, r"^0x[0-9a-fA-F]+[cC]$"); - -// weld-0.2.0: r"^[0-9]+$" -consistent!(weld_5, r"^[0-9]+$"); - -// weld-0.2.0: r"^0b[0-1]+$" -consistent!(weld_6, r"^0b[0-1]+$"); - -// weld-0.2.0: r"^0x[0-9a-fA-F]+$" -consistent!(weld_7, r"^0x[0-9a-fA-F]+$"); - -// weld-0.2.0: r"^[0-9]+[lL]$" -consistent!(weld_8, r"^[0-9]+[lL]$"); - -// weld-0.2.0: r"^0b[0-1]+[lL]$" -consistent!(weld_9, r"^0b[0-1]+[lL]$"); - -// weld-0.2.0: r"^0x[0-9a-fA-F]+[lL]$" -consistent!(weld_10, r"^0x[0-9a-fA-F]+[lL]$"); - -// webgl_generator-0.1.0: "([(, ])enum\\b" -consistent!(webgl_generator_0, "([(, ])enum\\b"); - -// webgl_generator-0.1.0: "\\bAcquireResourcesCallback\\b" -consistent!(webgl_generator_1, "\\bAcquireResourcesCallback\\b"); - -// weave-0.2.0: r"^(\d+)(,(\d+))?([acd]).*$" -consistent!(weave_0, r"^(\d+)(,(\d+))?([acd]).*$"); - -// wemo-0.0.12: r"<BinaryState>(\d)(\|-?\d+)*</BinaryState>" -consistent!(wemo_0, r"<BinaryState>(\d)(\|-?\d+)*</BinaryState>"); - -// webscale-0.9.4: r"(http[s]?://[^\s]+)" -consistent!(webscale_0, r"(http[s]?://[^\s]+)"); - -// svgrep-1.1.0: r"^\d+.*$" -consistent!(svgrep_0, r"^\d+.*$"); - -// ignore-0.4.2: r"^[\pL\pN]+$" -consistent!(ignore_0, r"^[\pL\pN]+$"); - -// ommui_string_patterns-0.1.2: r"^([A-Za-z][0-9A-Za-z_]*)?$" -consistent!(ommui_string_patterns_0, r"^([A-Za-z][0-9A-Za-z_]*)?$"); - -// ommui_string_patterns-0.1.2: r"^(\S+(?:.*\S)?)?$" -consistent!(ommui_string_patterns_1, r"^(\S+(?:.*\S)?)?$"); - -// opcua-types-0.3.0: "^(?P<min>[0-9]{1,10})(:(?P<max>[0-9]{1,10}))?$" -consistent!(opcua_types_0, "^(?P<min>[0-9]{1,10})(:(?P<max>[0-9]{1,10}))?$"); - -// opcua-types-0.3.0: r"^(ns=(?P<ns>[0-9]+);)?(?P<t>[isgb])=(?P<v>.+)$" -consistent!(opcua_types_1, r"^(ns=(?P<ns>[0-9]+);)?(?P<t>[isgb])=(?P<v>.+)$"); - -// open_read_later-1.1.1: r"^(.+?)\s*:\s*(.+)$" -consistent!(open_read_later_0, r"^(.+?)\s*:\s*(.+)$"); - -// youtube-downloader-0.1.0: r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*" -consistent!(youtube_downloader_0, r"^.*(?:(?:youtu\.be/|v/|vi/|u/w/|embed/)|(?:(?:watch)?\?v(?:i)?=|\&v(?:i)?=))([^#\&\?]*).*"); - -// yobot-0.1.1: "." -consistent!(yobot_0, "."); - -// yobot-0.1.1: r"." -consistent!(yobot_1, r"."); - -// yobot-0.1.1: r".+" -consistent!(yobot_2, r".+"); - -// yobot-0.1.1: r"." -consistent!(yobot_3, r"."); - -// ubiquity-0.1.5: r"foo" -consistent!(ubiquity_0, r"foo"); - -// ubiquity-0.1.5: r"/target/" -consistent!(ubiquity_1, r"/target/"); - -// ubiquity-0.1.5: r".DS_Store" -consistent!(ubiquity_2, r".DS_Store"); - -// qasm-1.0.0: r"//.*" -consistent!(qasm_0, r"//.*"); - -// drill-0.3.5: r"\{\{ *([a-z\._]+) *\}\}" -consistent!(drill_0, r"\{\{ *([a-z\._]+) *\}\}"); - -// queryst-2.0.0: r"^([^\]\[]+)" -consistent!(queryst_0, r"^([^\]\[]+)"); - -// queryst-2.0.0: r"(\[[^\]\[]*\])" -consistent!(queryst_1, r"(\[[^\]\[]*\])"); - -// qui-vive-0.1.0: r"^/(\w+)$" -consistent!(qui_vive_0, r"^/(\w+)$"); - -// qui-vive-0.1.0: r"^/key$" -consistent!(qui_vive_1, r"^/key$"); - -// qui-vive-0.1.0: r"^/key/(\w+)$" -consistent!(qui_vive_2, r"^/key/(\w+)$"); - -// qui-vive-0.1.0: r"^/url$" -consistent!(qui_vive_3, r"^/url$"); - -// qui-vive-0.1.0: r"^/url/(\w+)$" -consistent!(qui_vive_4, r"^/url/(\w+)$"); - -// qui-vive-0.1.0: r"^/inv$" -consistent!(qui_vive_5, r"^/inv$"); - -// qui-vive-0.1.0: r"^/inv/(\w+)$" -consistent!(qui_vive_6, r"^/inv/(\w+)$"); - -// subdiff-0.1.0: r"\b" -// consistent!(subdiff_0, r"\b"); - -// substudy-0.4.5: r"^(\d+)/(\d+)$" -consistent!(substudy_0, r"^(\d+)/(\d+)$"); - -// substudy-0.4.5: r"\s+" -consistent!(substudy_1, r"\s+"); - -// substudy-0.4.5: r"<[a-z/][^>]*>" -consistent!(substudy_2, r"<[a-z/][^>]*>"); - -// substudy-0.4.5: r"(\([^)]*\)|♪[^♪]*♪|[A-Z]{2,} ?:)" -consistent!(substudy_3, r"(\([^)]*\)|♪[^♪]*♪|[A-Z]{2,} ?:)"); - -// substudy-0.4.5: r"\s+" -consistent!(substudy_4, r"\s+"); - -// isbnid-0.1.3: r"^(\d(-| )?){9}(x|X|\d|(\d(-| )?){3}\d)$" -consistent!(isbnid_0, r"^(\d(-| )?){9}(x|X|\d|(\d(-| )?){3}\d)$"); - -// isbnid-0.1.3: r"[^0-9X]" -consistent!(isbnid_1, r"[^0-9X]"); - -// ispc-0.3.5: r"Intel\(r\) SPMD Program Compiler \(ispc\), (\d+\.\d+\.\d+)" -consistent!( - ispc_0, - r"Intel\(r\) SPMD Program Compiler \(ispc\), (\d+\.\d+\.\d+)" -); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/crazy.rs b/collector/compile-benchmarks/regex-1.5.5/tests/crazy.rs deleted file mode 100644 index 293ac1ae7..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/crazy.rs +++ /dev/null @@ -1,459 +0,0 @@ -mat!(ascii_literal, r"a", "a", Some((0, 1))); - -// Some crazy expressions from regular-expressions.info. -mat!( - match_ranges, - r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", - "num: 255", - Some((5, 8)) -); -mat!( - match_ranges_not, - r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b", - "num: 256", - None -); -mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3))); -mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3))); -mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4))); -mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None); -mat!( - match_email, - r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", - "mine is jam.slam@gmail.com ", - Some((8, 26)) -); -mat!( - match_email_not, - r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b", - "mine is jam.slam@gmail ", - None -); -mat!( - match_email_big, - r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?", - "mine is jam.slam@gmail.com ", - Some((8, 26)) -); -mat!( - match_date1, - r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", - "1900-01-01", - Some((0, 10)) -); -mat!( - match_date2, - r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", - "1900-00-01", - None -); -mat!( - match_date3, - r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$", - "1900-13-01", - None -); - -// Do some crazy dancing with the start/end assertions. -matiter!(match_start_end_empty, r"^$", "", (0, 0)); -matiter!(match_start_end_empty_many_1, r"^$^$^$", "", (0, 0)); -matiter!(match_start_end_empty_many_2, r"^^^$$$", "", (0, 0)); -matiter!(match_start_end_empty_rev, r"$^", "", (0, 0)); -matiter!( - match_start_end_empty_rep, - r"(?:^$)*", - "a\nb\nc", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4), - (5, 5) -); -matiter!( - match_start_end_empty_rep_rev, - r"(?:$^)*", - "a\nb\nc", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4), - (5, 5) -); - -// Test negated character classes. -mat!(negclass_letters, r"[^ac]", "acx", Some((2, 3))); -mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3))); -mat!(negclass_letter_space, r"[^a[:space:]]", "a x", Some((2, 3))); -mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3))); -mat!(negclass_space, r"[^[:space:]]", " a", Some((1, 2))); -mat!(negclass_space_comma, r"[^,[:space:]]", ", a", Some((2, 3))); -mat!(negclass_comma_space, r"[^[:space:],]", " ,a", Some((2, 3))); -mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2))); - -// Test that repeated empty expressions don't loop forever. -mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2))); -mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2))); -mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2))); -mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2))); -mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2))); -mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2))); -mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2))); -mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2))); -mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2))); -mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2))); -mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2))); -mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2))); - -// Test that we handle various flavors of empty expressions. -matiter!(match_empty1, r"", "", (0, 0)); -matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3)); -matiter!(match_empty12, r"|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty13, r"b|", "abc", (0, 0), (1, 2), (3, 3)); -matiter!(match_empty14, r"|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty15, r"z|", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty16, r"|", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty17, r"||", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty18, r"||z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty19, r"(?:)|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty20, r"b|(?:)", "abc", (0, 0), (1, 2), (3, 3)); -matiter!(match_empty21, r"(?:|)", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty22, r"(?:|)|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3)); -matiter!(match_empty23, r"a(?:)|b", "abc", (0, 1), (1, 2)); - -// Test that the DFA can handle pathological cases. -// (This should result in the DFA's cache being flushed too frequently, which -// should cause it to quit and fall back to the NFA algorithm.) -#[test] -fn dfa_handles_pathological_case() { - fn ones_and_zeroes(count: usize) -> String { - use rand::rngs::SmallRng; - use rand::{Rng, SeedableRng}; - - let mut rng = SmallRng::from_entropy(); - let mut s = String::new(); - for _ in 0..count { - if rng.gen() { - s.push('1'); - } else { - s.push('0'); - } - } - s - } - - let re = regex!(r"[01]*1[01]{20}$"); - let text = { - let mut pieces = ones_and_zeroes(100_000); - pieces.push('1'); - pieces.push_str(&ones_and_zeroes(20)); - pieces - }; - assert!(re.is_match(text!(&*text))); -} - -#[test] -fn nest_limit_makes_it_parse() { - use regex::RegexBuilder; - - RegexBuilder::new( - r#"(?-u) - 2(?: - [45]\d{3}| - 7(?: - 1[0-267]| - 2[0-289]| - 3[0-29]| - 4[01]| - 5[1-3]| - 6[013]| - 7[0178]| - 91 - )| - 8(?: - 0[125]| - [139][1-6]| - 2[0157-9]| - 41| - 6[1-35]| - 7[1-5]| - 8[1-8]| - 90 - )| - 9(?: - 0[0-2]| - 1[0-4]| - 2[568]| - 3[3-6]| - 5[5-7]| - 6[0167]| - 7[15]| - 8[0146-9] - ) - )\d{4}| - 3(?: - 12?[5-7]\d{2}| - 0(?: - 2(?: - [025-79]\d| - [348]\d{1,2} - )| - 3(?: - [2-4]\d| - [56]\d? - ) - )| - 2(?: - 1\d{2}| - 2(?: - [12]\d| - [35]\d{1,2}| - 4\d? - ) - )| - 3(?: - 1\d{2}| - 2(?: - [2356]\d| - 4\d{1,2} - ) - )| - 4(?: - 1\d{2}| - 2(?: - 2\d{1,2}| - [47]| - 5\d{2} - ) - )| - 5(?: - 1\d{2}| - 29 - )| - [67]1\d{2}| - 8(?: - 1\d{2}| - 2(?: - 2\d{2}| - 3| - 4\d - ) - ) - )\d{3}| - 4(?: - 0(?: - 2(?: - [09]\d| - 7 - )| - 33\d{2} - )| - 1\d{3}| - 2(?: - 1\d{2}| - 2(?: - [25]\d?| - [348]\d| - [67]\d{1,2} - ) - )| - 3(?: - 1\d{2}(?: - \d{2} - )?| - 2(?: - [045]\d| - [236-9]\d{1,2} - )| - 32\d{2} - )| - 4(?: - [18]\d{2}| - 2(?: - [2-46]\d{2}| - 3 - )| - 5[25]\d{2} - )| - 5(?: - 1\d{2}| - 2(?: - 3\d| - 5 - ) - )| - 6(?: - [18]\d{2}| - 2(?: - 3(?: - \d{2} - )?| - [46]\d{1,2}| - 5\d{2}| - 7\d - )| - 5(?: - 3\d?| - 4\d| - [57]\d{1,2}| - 6\d{2}| - 8 - ) - )| - 71\d{2}| - 8(?: - [18]\d{2}| - 23\d{2}| - 54\d{2} - )| - 9(?: - [18]\d{2}| - 2[2-5]\d{2}| - 53\d{1,2} - ) - )\d{3}| - 5(?: - 02[03489]\d{2}| - 1\d{2}| - 2(?: - 1\d{2}| - 2(?: - 2(?: - \d{2} - )?| - [457]\d{2} - ) - )| - 3(?: - 1\d{2}| - 2(?: - [37](?: - \d{2} - )?| - [569]\d{2} - ) - )| - 4(?: - 1\d{2}| - 2[46]\d{2} - )| - 5(?: - 1\d{2}| - 26\d{1,2} - )| - 6(?: - [18]\d{2}| - 2| - 53\d{2} - )| - 7(?: - 1| - 24 - )\d{2}| - 8(?: - 1| - 26 - )\d{2}| - 91\d{2} - )\d{3}| - 6(?: - 0(?: - 1\d{2}| - 2(?: - 3\d{2}| - 4\d{1,2} - ) - )| - 2(?: - 2[2-5]\d{2}| - 5(?: - [3-5]\d{2}| - 7 - )| - 8\d{2} - )| - 3(?: - 1| - 2[3478] - )\d{2}| - 4(?: - 1| - 2[34] - )\d{2}| - 5(?: - 1| - 2[47] - )\d{2}| - 6(?: - [18]\d{2}| - 6(?: - 2(?: - 2\d| - [34]\d{2} - )| - 5(?: - [24]\d{2}| - 3\d| - 5\d{1,2} - ) - ) - )| - 72[2-5]\d{2}| - 8(?: - 1\d{2}| - 2[2-5]\d{2} - )| - 9(?: - 1\d{2}| - 2[2-6]\d{2} - ) - )\d{3}| - 7(?: - (?: - 02| - [3-589]1| - 6[12]| - 72[24] - )\d{2}| - 21\d{3}| - 32 - )\d{3}| - 8(?: - (?: - 4[12]| - [5-7]2| - 1\d? - )| - (?: - 0| - 3[12]| - [5-7]1| - 217 - )\d - )\d{4}| - 9(?: - [35]1| - (?: - [024]2| - 81 - )\d| - (?: - 1| - [24]1 - )\d{2} - )\d{3} - "#, - ) - .build() - .unwrap(); -} diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/flags.rs b/collector/compile-benchmarks/regex-1.5.5/tests/flags.rs deleted file mode 100644 index c33b82d43..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/flags.rs +++ /dev/null @@ -1,31 +0,0 @@ -mat!(match_flag_case, "(?-u)(?i)abc", "ABC", Some((0, 3))); -mat!(match_flag_weird_case, "(?-u)(?i)a(?-i)bc", "Abc", Some((0, 3))); -mat!(match_flag_weird_case_not, "(?-u)(?i)a(?-i)bc", "ABC", None); -mat!(match_flag_case_dotnl, "(?-u)(?is)a(?u:.)", "A\n", Some((0, 2))); -mat!( - match_flag_case_dotnl_toggle, - "(?-u)(?is)a(?u:.)(?-is)a(?u:.)", - "A\nab", - Some((0, 4)) -); -mat!( - match_flag_case_dotnl_toggle_not, - "(?-u)(?is)a(?u:.)(?-is)a(?u:.)", - "A\na\n", - None -); -mat!( - match_flag_case_dotnl_toggle_ok, - "(?-u)(?is)a(?u:.)(?-is:a(?u:.))?", - "A\na\n", - Some((0, 2)) -); -mat!( - match_flag_multi, - r"(?-u)(?m)(?:^\d+$\n?)+", - "123\n456\n789", - Some((0, 11)) -); -mat!(match_flag_ungreedy, "(?U)a+", "aa", Some((0, 1))); -mat!(match_flag_ungreedy_greedy, "(?U)a+?", "aa", Some((0, 2))); -mat!(match_flag_ungreedy_noop, "(?U)(?-U)a+", "aa", Some((0, 2))); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/fowler.rs b/collector/compile-benchmarks/regex-1.5.5/tests/fowler.rs deleted file mode 100644 index 7f56a758d..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/fowler.rs +++ /dev/null @@ -1,1588 +0,0 @@ -// DO NOT EDIT. Automatically generated by 'scripts/regex-match-tests.py' -// on 2019-09-02 11:07:37.849994. - -// Tests from basic.dat -mat!(match_basic_3, r"abracadabra$", r"abracadabracadabra", Some((7, 18))); -mat!(match_basic_4, r"a...b", r"abababbb", Some((2, 7))); -mat!(match_basic_5, r"XXXXXX", r"..XXXXXX", Some((2, 8))); -mat!(match_basic_6, r"\)", r"()", Some((1, 2))); -mat!(match_basic_7, r"a]", r"a]a", Some((0, 2))); -mat!(match_basic_9, r"\}", r"}", Some((0, 1))); -mat!(match_basic_10, r"\]", r"]", Some((0, 1))); -mat!(match_basic_12, r"]", r"]", Some((0, 1))); -mat!(match_basic_15, r"^a", r"ax", Some((0, 1))); -mat!(match_basic_16, r"\^a", r"a^a", Some((1, 3))); -mat!(match_basic_17, r"a\^", r"a^", Some((0, 2))); -mat!(match_basic_18, r"a$", r"aa", Some((1, 2))); -mat!(match_basic_19, r"a\$", r"a$", Some((0, 2))); -mat!(match_basic_20, r"^$", r"", Some((0, 0))); -mat!(match_basic_21, r"$^", r"", Some((0, 0))); -mat!(match_basic_22, r"a($)", r"aa", Some((1, 2)), Some((2, 2))); -mat!(match_basic_23, r"a*(^a)", r"aa", Some((0, 1)), Some((0, 1))); -mat!(match_basic_24, r"(..)*(...)*", r"a", Some((0, 0))); -mat!(match_basic_25, r"(..)*(...)*", r"abcd", Some((0, 4)), Some((2, 4))); -mat!( - match_basic_26, - r"(ab|a)(bc|c)", - r"abc", - Some((0, 3)), - Some((0, 2)), - Some((2, 3)) -); -mat!(match_basic_27, r"(ab)c|abc", r"abc", Some((0, 3)), Some((0, 2))); -mat!(match_basic_28, r"a{0}b", r"ab", Some((1, 2))); -mat!( - match_basic_29, - r"(a*)(b?)(b+)b{3}", - r"aaabbbbbbb", - Some((0, 10)), - Some((0, 3)), - Some((3, 4)), - Some((4, 7)) -); -mat!( - match_basic_30, - r"(a*)(b{0,1})(b{1,})b{3}", - r"aaabbbbbbb", - Some((0, 10)), - Some((0, 3)), - Some((3, 4)), - Some((4, 7)) -); -mat!( - match_basic_32, - r"((a|a)|a)", - r"a", - Some((0, 1)), - Some((0, 1)), - Some((0, 1)) -); -mat!( - match_basic_33, - r"(a*)(a|aa)", - r"aaaa", - Some((0, 4)), - Some((0, 3)), - Some((3, 4)) -); -mat!(match_basic_34, r"a*(a.|aa)", r"aaaa", Some((0, 4)), Some((2, 4))); -mat!( - match_basic_35, - r"a(b)|c(d)|a(e)f", - r"aef", - Some((0, 3)), - None, - None, - Some((1, 2)) -); -mat!(match_basic_36, r"(a|b)?.*", r"b", Some((0, 1)), Some((0, 1))); -mat!(match_basic_37, r"(a|b)c|a(b|c)", r"ac", Some((0, 2)), Some((0, 1))); -mat!( - match_basic_38, - r"(a|b)c|a(b|c)", - r"ab", - Some((0, 2)), - None, - Some((1, 2)) -); -mat!(match_basic_39, r"(a|b)*c|(a|ab)*c", r"abc", Some((0, 3)), Some((1, 2))); -mat!(match_basic_40, r"(a|b)*c|(a|ab)*c", r"xc", Some((1, 2))); -mat!( - match_basic_41, - r"(.a|.b).*|.*(.a|.b)", - r"xa", - Some((0, 2)), - Some((0, 2)) -); -mat!(match_basic_42, r"a?(ab|ba)ab", r"abab", Some((0, 4)), Some((0, 2))); -mat!(match_basic_43, r"a?(ac{0}b|ba)ab", r"abab", Some((0, 4)), Some((0, 2))); -mat!(match_basic_44, r"ab|abab", r"abbabab", Some((0, 2))); -mat!(match_basic_45, r"aba|bab|bba", r"baaabbbaba", Some((5, 8))); -mat!(match_basic_46, r"aba|bab", r"baaabbbaba", Some((6, 9))); -mat!( - match_basic_47, - r"(aa|aaa)*|(a|aaaaa)", - r"aa", - Some((0, 2)), - Some((0, 2)) -); -mat!( - match_basic_48, - r"(a.|.a.)*|(a|.a...)", - r"aa", - Some((0, 2)), - Some((0, 2)) -); -mat!(match_basic_49, r"ab|a", r"xabc", Some((1, 3))); -mat!(match_basic_50, r"ab|a", r"xxabc", Some((2, 4))); -mat!( - match_basic_51, - r"(?i)(?-u)(Ab|cD)*", - r"aBcD", - Some((0, 4)), - Some((2, 4)) -); -mat!(match_basic_52, r"[^-]", r"--a", Some((2, 3))); -mat!(match_basic_53, r"[a-]*", r"--a", Some((0, 3))); -mat!(match_basic_54, r"[a-m-]*", r"--amoma--", Some((0, 4))); -mat!( - match_basic_55, - r":::1:::0:|:::1:1:0:", - r":::0:::1:::1:::0:", - Some((8, 17)) -); -mat!( - match_basic_56, - r":::1:::0:|:::1:1:1:", - r":::0:::1:::1:::0:", - Some((8, 17)) -); -mat!(match_basic_57, r"[[:upper:]]", r"A", Some((0, 1))); -mat!(match_basic_58, r"[[:lower:]]+", r"`az{", Some((1, 3))); -mat!(match_basic_59, r"[[:upper:]]+", r"@AZ[", Some((1, 3))); -mat!( - match_basic_65, - r" -", - r" -", - Some((0, 1)) -); -mat!( - match_basic_66, - r" -", - r" -", - Some((0, 1)) -); -mat!( - match_basic_67, - r"[^a]", - r" -", - Some((0, 1)) -); -mat!( - match_basic_68, - r" -a", - r" -a", - Some((0, 2)) -); -mat!( - match_basic_69, - r"(a)(b)(c)", - r"abc", - Some((0, 3)), - Some((0, 1)), - Some((1, 2)), - Some((2, 3)) -); -mat!(match_basic_70, r"xxx", r"xxx", Some((0, 3))); -mat!( - match_basic_71, - r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", - r"feb 6,", - Some((0, 6)) -); -mat!( - match_basic_72, - r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", - r"2/7", - Some((0, 3)) -); -mat!( - match_basic_73, - r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", - r"feb 1,Feb 6", - Some((5, 11)) -); -mat!( - match_basic_74, - r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))", - r"x", - Some((0, 1)), - Some((0, 1)), - Some((0, 1)) -); -mat!( - match_basic_75, - r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*", - r"xx", - Some((0, 2)), - Some((1, 2)), - Some((1, 2)) -); -mat!( - match_basic_76, - r"a?(ab|ba)*", - r"ababababababababababababababababababababababababababababababababababababababababa", - Some((0, 81)), - Some((79, 81)) -); -mat!( - match_basic_77, - r"abaa|abbaa|abbbaa|abbbbaa", - r"ababbabbbabbbabbbbabbbbaa", - Some((18, 25)) -); -mat!( - match_basic_78, - r"abaa|abbaa|abbbaa|abbbbaa", - r"ababbabbbabbbabbbbabaa", - Some((18, 22)) -); -mat!( - match_basic_79, - r"aaac|aabc|abac|abbc|baac|babc|bbac|bbbc", - r"baaabbbabac", - Some((7, 11)) -); -mat!(match_basic_80, r".*", r"", Some((0, 2))); -mat!( - match_basic_81, - r"aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll", - r"XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa", - Some((53, 57)) -); -mat!(match_basic_83, r"a*a*a*a*a*b", r"aaaaaaaaab", Some((0, 10))); -mat!(match_basic_84, r"^", r"", Some((0, 0))); -mat!(match_basic_85, r"$", r"", Some((0, 0))); -mat!(match_basic_86, r"^$", r"", Some((0, 0))); -mat!(match_basic_87, r"^a$", r"a", Some((0, 1))); -mat!(match_basic_88, r"abc", r"abc", Some((0, 3))); -mat!(match_basic_89, r"abc", r"xabcy", Some((1, 4))); -mat!(match_basic_90, r"abc", r"ababc", Some((2, 5))); -mat!(match_basic_91, r"ab*c", r"abc", Some((0, 3))); -mat!(match_basic_92, r"ab*bc", r"abc", Some((0, 3))); -mat!(match_basic_93, r"ab*bc", r"abbc", Some((0, 4))); -mat!(match_basic_94, r"ab*bc", r"abbbbc", Some((0, 6))); -mat!(match_basic_95, r"ab+bc", r"abbc", Some((0, 4))); -mat!(match_basic_96, r"ab+bc", r"abbbbc", Some((0, 6))); -mat!(match_basic_97, r"ab?bc", r"abbc", Some((0, 4))); -mat!(match_basic_98, r"ab?bc", r"abc", Some((0, 3))); -mat!(match_basic_99, r"ab?c", r"abc", Some((0, 3))); -mat!(match_basic_100, r"^abc$", r"abc", Some((0, 3))); -mat!(match_basic_101, r"^abc", r"abcc", Some((0, 3))); -mat!(match_basic_102, r"abc$", r"aabc", Some((1, 4))); -mat!(match_basic_103, r"^", r"abc", Some((0, 0))); -mat!(match_basic_104, r"$", r"abc", Some((3, 3))); -mat!(match_basic_105, r"a.c", r"abc", Some((0, 3))); -mat!(match_basic_106, r"a.c", r"axc", Some((0, 3))); -mat!(match_basic_107, r"a.*c", r"axyzc", Some((0, 5))); -mat!(match_basic_108, r"a[bc]d", r"abd", Some((0, 3))); -mat!(match_basic_109, r"a[b-d]e", r"ace", Some((0, 3))); -mat!(match_basic_110, r"a[b-d]", r"aac", Some((1, 3))); -mat!(match_basic_111, r"a[-b]", r"a-", Some((0, 2))); -mat!(match_basic_112, r"a[b-]", r"a-", Some((0, 2))); -mat!(match_basic_113, r"a]", r"a]", Some((0, 2))); -mat!(match_basic_114, r"a[]]b", r"a]b", Some((0, 3))); -mat!(match_basic_115, r"a[^bc]d", r"aed", Some((0, 3))); -mat!(match_basic_116, r"a[^-b]c", r"adc", Some((0, 3))); -mat!(match_basic_117, r"a[^]b]c", r"adc", Some((0, 3))); -mat!(match_basic_118, r"ab|cd", r"abc", Some((0, 2))); -mat!(match_basic_119, r"ab|cd", r"abcd", Some((0, 2))); -mat!(match_basic_120, r"a\(b", r"a(b", Some((0, 3))); -mat!(match_basic_121, r"a\(*b", r"ab", Some((0, 2))); -mat!(match_basic_122, r"a\(*b", r"a((b", Some((0, 4))); -mat!( - match_basic_123, - r"((a))", - r"abc", - Some((0, 1)), - Some((0, 1)), - Some((0, 1)) -); -mat!( - match_basic_124, - r"(a)b(c)", - r"abc", - Some((0, 3)), - Some((0, 1)), - Some((2, 3)) -); -mat!(match_basic_125, r"a+b+c", r"aabbabc", Some((4, 7))); -mat!(match_basic_126, r"a*", r"aaa", Some((0, 3))); -mat!(match_basic_128, r"(a*)*", r"-", Some((0, 0)), None); -mat!(match_basic_129, r"(a*)+", r"-", Some((0, 0)), Some((0, 0))); -mat!(match_basic_131, r"(a*|b)*", r"-", Some((0, 0)), None); -mat!(match_basic_132, r"(a+|b)*", r"ab", Some((0, 2)), Some((1, 2))); -mat!(match_basic_133, r"(a+|b)+", r"ab", Some((0, 2)), Some((1, 2))); -mat!(match_basic_134, r"(a+|b)?", r"ab", Some((0, 1)), Some((0, 1))); -mat!(match_basic_135, r"[^ab]*", r"cde", Some((0, 3))); -mat!(match_basic_137, r"(^)*", r"-", Some((0, 0)), None); -mat!(match_basic_138, r"a*", r"", Some((0, 0))); -mat!(match_basic_139, r"([abc])*d", r"abbbcd", Some((0, 6)), Some((4, 5))); -mat!(match_basic_140, r"([abc])*bcd", r"abcd", Some((0, 4)), Some((0, 1))); -mat!(match_basic_141, r"a|b|c|d|e", r"e", Some((0, 1))); -mat!(match_basic_142, r"(a|b|c|d|e)f", r"ef", Some((0, 2)), Some((0, 1))); -mat!(match_basic_144, r"((a*|b))*", r"-", Some((0, 0)), None, None); -mat!(match_basic_145, r"abcd*efg", r"abcdefg", Some((0, 7))); -mat!(match_basic_146, r"ab*", r"xabyabbbz", Some((1, 3))); -mat!(match_basic_147, r"ab*", r"xayabbbz", Some((1, 2))); -mat!(match_basic_148, r"(ab|cd)e", r"abcde", Some((2, 5)), Some((2, 4))); -mat!(match_basic_149, r"[abhgefdc]ij", r"hij", Some((0, 3))); -mat!(match_basic_150, r"(a|b)c*d", r"abcd", Some((1, 4)), Some((1, 2))); -mat!(match_basic_151, r"(ab|ab*)bc", r"abc", Some((0, 3)), Some((0, 1))); -mat!(match_basic_152, r"a([bc]*)c*", r"abc", Some((0, 3)), Some((1, 3))); -mat!( - match_basic_153, - r"a([bc]*)(c*d)", - r"abcd", - Some((0, 4)), - Some((1, 3)), - Some((3, 4)) -); -mat!( - match_basic_154, - r"a([bc]+)(c*d)", - r"abcd", - Some((0, 4)), - Some((1, 3)), - Some((3, 4)) -); -mat!( - match_basic_155, - r"a([bc]*)(c+d)", - r"abcd", - Some((0, 4)), - Some((1, 2)), - Some((2, 4)) -); -mat!(match_basic_156, r"a[bcd]*dcdcde", r"adcdcde", Some((0, 7))); -mat!(match_basic_157, r"(ab|a)b*c", r"abc", Some((0, 3)), Some((0, 2))); -mat!( - match_basic_158, - r"((a)(b)c)(d)", - r"abcd", - Some((0, 4)), - Some((0, 3)), - Some((0, 1)), - Some((1, 2)), - Some((3, 4)) -); -mat!(match_basic_159, r"[A-Za-z_][A-Za-z0-9_]*", r"alpha", Some((0, 5))); -mat!(match_basic_160, r"^a(bc+|b[eh])g|.h$", r"abh", Some((1, 3))); -mat!( - match_basic_161, - r"(bc+d$|ef*g.|h?i(j|k))", - r"effgz", - Some((0, 5)), - Some((0, 5)) -); -mat!( - match_basic_162, - r"(bc+d$|ef*g.|h?i(j|k))", - r"ij", - Some((0, 2)), - Some((0, 2)), - Some((1, 2)) -); -mat!( - match_basic_163, - r"(bc+d$|ef*g.|h?i(j|k))", - r"reffgz", - Some((1, 6)), - Some((1, 6)) -); -mat!( - match_basic_164, - r"(((((((((a)))))))))", - r"a", - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)), - Some((0, 1)) -); -mat!( - match_basic_165, - r"multiple words", - r"multiple words yeah", - Some((0, 14)) -); -mat!( - match_basic_166, - r"(.*)c(.*)", - r"abcde", - Some((0, 5)), - Some((0, 2)), - Some((3, 5)) -); -mat!(match_basic_167, r"abcd", r"abcd", Some((0, 4))); -mat!(match_basic_168, r"a(bc)d", r"abcd", Some((0, 4)), Some((1, 3))); -mat!(match_basic_169, r"a[-]?c", r"ac", Some((0, 3))); -mat!( - match_basic_170, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Qaddafi", - Some((0, 15)), - None, - Some((10, 12)) -); -mat!( - match_basic_171, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Mo'ammar Gadhafi", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_172, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Kaddafi", - Some((0, 15)), - None, - Some((10, 12)) -); -mat!( - match_basic_173, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Qadhafi", - Some((0, 15)), - None, - Some((10, 12)) -); -mat!( - match_basic_174, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Gadafi", - Some((0, 14)), - None, - Some((10, 11)) -); -mat!( - match_basic_175, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Mu'ammar Qadafi", - Some((0, 15)), - None, - Some((11, 12)) -); -mat!( - match_basic_176, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Moamar Gaddafi", - Some((0, 14)), - None, - Some((9, 11)) -); -mat!( - match_basic_177, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Mu'ammar Qadhdhafi", - Some((0, 18)), - None, - Some((13, 15)) -); -mat!( - match_basic_178, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Khaddafi", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_179, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Ghaddafy", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_180, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Ghadafi", - Some((0, 15)), - None, - Some((11, 12)) -); -mat!( - match_basic_181, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Ghaddafi", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_182, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muamar Kaddafi", - Some((0, 14)), - None, - Some((9, 11)) -); -mat!( - match_basic_183, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Quathafi", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_184, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Muammar Gheddafi", - Some((0, 16)), - None, - Some((11, 13)) -); -mat!( - match_basic_185, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Moammar Khadafy", - Some((0, 15)), - None, - Some((11, 12)) -); -mat!( - match_basic_186, - r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", - r"Moammar Qudhafi", - Some((0, 15)), - None, - Some((10, 12)) -); -mat!(match_basic_187, r"a+(b|c)*d+", r"aabcdd", Some((0, 6)), Some((3, 4))); -mat!(match_basic_188, r"^.+$", r"vivi", Some((0, 4))); -mat!(match_basic_189, r"^(.+)$", r"vivi", Some((0, 4)), Some((0, 4))); -mat!( - match_basic_190, - r"^([^!.]+).att.com!(.+)$", - r"gryphon.att.com!eby", - Some((0, 19)), - Some((0, 7)), - Some((16, 19)) -); -mat!( - match_basic_191, - r"^([^!]+!)?([^!]+)$", - r"bas", - Some((0, 3)), - None, - Some((0, 3)) -); -mat!( - match_basic_192, - r"^([^!]+!)?([^!]+)$", - r"bar!bas", - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!( - match_basic_193, - r"^([^!]+!)?([^!]+)$", - r"foo!bas", - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!( - match_basic_194, - r"^.+!([^!]+!)([^!]+)$", - r"foo!bar!bas", - Some((0, 11)), - Some((4, 8)), - Some((8, 11)) -); -mat!( - match_basic_195, - r"((foo)|(bar))!bas", - r"bar!bas", - Some((0, 7)), - Some((0, 3)), - None, - Some((0, 3)) -); -mat!( - match_basic_196, - r"((foo)|(bar))!bas", - r"foo!bar!bas", - Some((4, 11)), - Some((4, 7)), - None, - Some((4, 7)) -); -mat!( - match_basic_197, - r"((foo)|(bar))!bas", - r"foo!bas", - Some((0, 7)), - Some((0, 3)), - Some((0, 3)) -); -mat!( - match_basic_198, - r"((foo)|bar)!bas", - r"bar!bas", - Some((0, 7)), - Some((0, 3)) -); -mat!( - match_basic_199, - r"((foo)|bar)!bas", - r"foo!bar!bas", - Some((4, 11)), - Some((4, 7)) -); -mat!( - match_basic_200, - r"((foo)|bar)!bas", - r"foo!bas", - Some((0, 7)), - Some((0, 3)), - Some((0, 3)) -); -mat!( - match_basic_201, - r"(foo|(bar))!bas", - r"bar!bas", - Some((0, 7)), - Some((0, 3)), - Some((0, 3)) -); -mat!( - match_basic_202, - r"(foo|(bar))!bas", - r"foo!bar!bas", - Some((4, 11)), - Some((4, 7)), - Some((4, 7)) -); -mat!( - match_basic_203, - r"(foo|(bar))!bas", - r"foo!bas", - Some((0, 7)), - Some((0, 3)) -); -mat!( - match_basic_204, - r"(foo|bar)!bas", - r"bar!bas", - Some((0, 7)), - Some((0, 3)) -); -mat!( - match_basic_205, - r"(foo|bar)!bas", - r"foo!bar!bas", - Some((4, 11)), - Some((4, 7)) -); -mat!( - match_basic_206, - r"(foo|bar)!bas", - r"foo!bas", - Some((0, 7)), - Some((0, 3)) -); -mat!( - match_basic_207, - r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", - r"foo!bar!bas", - Some((0, 11)), - Some((0, 11)), - None, - None, - Some((4, 8)), - Some((8, 11)) -); -mat!( - match_basic_208, - r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", - r"bas", - Some((0, 3)), - None, - Some((0, 3)) -); -mat!( - match_basic_209, - r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", - r"bar!bas", - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!( - match_basic_210, - r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", - r"foo!bar!bas", - Some((0, 11)), - None, - None, - Some((4, 8)), - Some((8, 11)) -); -mat!( - match_basic_211, - r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", - r"foo!bas", - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!( - match_basic_212, - r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", - r"bas", - Some((0, 3)), - Some((0, 3)), - None, - Some((0, 3)) -); -mat!( - match_basic_213, - r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", - r"bar!bas", - Some((0, 7)), - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!( - match_basic_214, - r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", - r"foo!bar!bas", - Some((0, 11)), - Some((0, 11)), - None, - None, - Some((4, 8)), - Some((8, 11)) -); -mat!( - match_basic_215, - r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", - r"foo!bas", - Some((0, 7)), - Some((0, 7)), - Some((0, 4)), - Some((4, 7)) -); -mat!(match_basic_216, r".*(/XXX).*", r"/XXX", Some((0, 4)), Some((0, 4))); -mat!(match_basic_217, r".*(\\XXX).*", r"\XXX", Some((0, 4)), Some((0, 4))); -mat!(match_basic_218, r"\\XXX", r"\XXX", Some((0, 4))); -mat!(match_basic_219, r".*(/000).*", r"/000", Some((0, 4)), Some((0, 4))); -mat!(match_basic_220, r".*(\\000).*", r"\000", Some((0, 4)), Some((0, 4))); -mat!(match_basic_221, r"\\000", r"\000", Some((0, 4))); - -// Tests from nullsubexpr.dat -mat!(match_nullsubexpr_3, r"(a*)*", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_5, r"(a*)*", r"x", Some((0, 0)), None); -mat!(match_nullsubexpr_6, r"(a*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_7, r"(a*)*", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_8, r"(a*)+", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_9, r"(a*)+", r"x", Some((0, 0)), Some((0, 0))); -mat!(match_nullsubexpr_10, r"(a*)+", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_11, r"(a*)+", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_12, r"(a+)*", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_13, r"(a+)*", r"x", Some((0, 0))); -mat!(match_nullsubexpr_14, r"(a+)*", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_15, r"(a+)*", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_16, r"(a+)+", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_17, r"(a+)+", r"x", None); -mat!(match_nullsubexpr_18, r"(a+)+", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_19, r"(a+)+", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_21, r"([a]*)*", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_23, r"([a]*)*", r"x", Some((0, 0)), None); -mat!(match_nullsubexpr_24, r"([a]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_25, r"([a]*)*", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_26, r"([a]*)+", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_27, r"([a]*)+", r"x", Some((0, 0)), Some((0, 0))); -mat!(match_nullsubexpr_28, r"([a]*)+", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_29, r"([a]*)+", r"aaaaaax", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_30, r"([^b]*)*", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_32, r"([^b]*)*", r"b", Some((0, 0)), None); -mat!(match_nullsubexpr_33, r"([^b]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!( - match_nullsubexpr_34, - r"([^b]*)*", - r"aaaaaab", - Some((0, 6)), - Some((0, 6)) -); -mat!(match_nullsubexpr_35, r"([ab]*)*", r"a", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_36, r"([ab]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_37, r"([ab]*)*", r"ababab", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_38, r"([ab]*)*", r"bababa", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_39, r"([ab]*)*", r"b", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_40, r"([ab]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6))); -mat!( - match_nullsubexpr_41, - r"([ab]*)*", - r"aaaabcde", - Some((0, 5)), - Some((0, 5)) -); -mat!(match_nullsubexpr_42, r"([^a]*)*", r"b", Some((0, 1)), Some((0, 1))); -mat!(match_nullsubexpr_43, r"([^a]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6))); -mat!(match_nullsubexpr_45, r"([^a]*)*", r"aaaaaa", Some((0, 0)), None); -mat!( - match_nullsubexpr_46, - r"([^ab]*)*", - r"ccccxx", - Some((0, 6)), - Some((0, 6)) -); -mat!(match_nullsubexpr_48, r"([^ab]*)*", r"ababab", Some((0, 0)), None); -mat!( - match_nullsubexpr_50, - r"((z)+|a)*", - r"zabcde", - Some((0, 2)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_69, - r"(a*)*(x)", - r"x", - Some((0, 1)), - None, - Some((0, 1)) -); -mat!( - match_nullsubexpr_70, - r"(a*)*(x)", - r"ax", - Some((0, 2)), - Some((0, 1)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_71, - r"(a*)*(x)", - r"axa", - Some((0, 2)), - Some((0, 1)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_73, - r"(a*)+(x)", - r"x", - Some((0, 1)), - Some((0, 0)), - Some((0, 1)) -); -mat!( - match_nullsubexpr_74, - r"(a*)+(x)", - r"ax", - Some((0, 2)), - Some((0, 1)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_75, - r"(a*)+(x)", - r"axa", - Some((0, 2)), - Some((0, 1)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_77, - r"(a*){2}(x)", - r"x", - Some((0, 1)), - Some((0, 0)), - Some((0, 1)) -); -mat!( - match_nullsubexpr_78, - r"(a*){2}(x)", - r"ax", - Some((0, 2)), - Some((1, 1)), - Some((1, 2)) -); -mat!( - match_nullsubexpr_79, - r"(a*){2}(x)", - r"axa", - Some((0, 2)), - Some((1, 1)), - Some((1, 2)) -); - -// Tests from repetition.dat -mat!(match_repetition_10, r"((..)|(.))", r"", None); -mat!(match_repetition_11, r"((..)|(.))((..)|(.))", r"", None); -mat!(match_repetition_12, r"((..)|(.))((..)|(.))((..)|(.))", r"", None); -mat!(match_repetition_14, r"((..)|(.)){1}", r"", None); -mat!(match_repetition_15, r"((..)|(.)){2}", r"", None); -mat!(match_repetition_16, r"((..)|(.)){3}", r"", None); -mat!(match_repetition_18, r"((..)|(.))*", r"", Some((0, 0))); -mat!( - match_repetition_20, - r"((..)|(.))", - r"a", - Some((0, 1)), - Some((0, 1)), - None, - Some((0, 1)) -); -mat!(match_repetition_21, r"((..)|(.))((..)|(.))", r"a", None); -mat!(match_repetition_22, r"((..)|(.))((..)|(.))((..)|(.))", r"a", None); -mat!( - match_repetition_24, - r"((..)|(.)){1}", - r"a", - Some((0, 1)), - Some((0, 1)), - None, - Some((0, 1)) -); -mat!(match_repetition_25, r"((..)|(.)){2}", r"a", None); -mat!(match_repetition_26, r"((..)|(.)){3}", r"a", None); -mat!( - match_repetition_28, - r"((..)|(.))*", - r"a", - Some((0, 1)), - Some((0, 1)), - None, - Some((0, 1)) -); -mat!( - match_repetition_30, - r"((..)|(.))", - r"aa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_31, - r"((..)|(.))((..)|(.))", - r"aa", - Some((0, 2)), - Some((0, 1)), - None, - Some((0, 1)), - Some((1, 2)), - None, - Some((1, 2)) -); -mat!(match_repetition_32, r"((..)|(.))((..)|(.))((..)|(.))", r"aa", None); -mat!( - match_repetition_34, - r"((..)|(.)){1}", - r"aa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_35, - r"((..)|(.)){2}", - r"aa", - Some((0, 2)), - Some((1, 2)), - None, - Some((1, 2)) -); -mat!(match_repetition_36, r"((..)|(.)){3}", r"aa", None); -mat!( - match_repetition_38, - r"((..)|(.))*", - r"aa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_40, - r"((..)|(.))", - r"aaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_41, - r"((..)|(.))((..)|(.))", - r"aaa", - Some((0, 3)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 3)), - None, - Some((2, 3)) -); -mat!( - match_repetition_42, - r"((..)|(.))((..)|(.))((..)|(.))", - r"aaa", - Some((0, 3)), - Some((0, 1)), - None, - Some((0, 1)), - Some((1, 2)), - None, - Some((1, 2)), - Some((2, 3)), - None, - Some((2, 3)) -); -mat!( - match_repetition_44, - r"((..)|(.)){1}", - r"aaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_46, - r"((..)|(.)){2}", - r"aaa", - Some((0, 3)), - Some((2, 3)), - Some((0, 2)), - Some((2, 3)) -); -mat!( - match_repetition_47, - r"((..)|(.)){3}", - r"aaa", - Some((0, 3)), - Some((2, 3)), - None, - Some((2, 3)) -); -mat!( - match_repetition_50, - r"((..)|(.))*", - r"aaa", - Some((0, 3)), - Some((2, 3)), - Some((0, 2)), - Some((2, 3)) -); -mat!( - match_repetition_52, - r"((..)|(.))", - r"aaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_53, - r"((..)|(.))((..)|(.))", - r"aaaa", - Some((0, 4)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_54, - r"((..)|(.))((..)|(.))((..)|(.))", - r"aaaa", - Some((0, 4)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 3)), - None, - Some((2, 3)), - Some((3, 4)), - None, - Some((3, 4)) -); -mat!( - match_repetition_56, - r"((..)|(.)){1}", - r"aaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_57, - r"((..)|(.)){2}", - r"aaaa", - Some((0, 4)), - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_59, - r"((..)|(.)){3}", - r"aaaa", - Some((0, 4)), - Some((3, 4)), - Some((0, 2)), - Some((3, 4)) -); -mat!( - match_repetition_61, - r"((..)|(.))*", - r"aaaa", - Some((0, 4)), - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_63, - r"((..)|(.))", - r"aaaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_64, - r"((..)|(.))((..)|(.))", - r"aaaaa", - Some((0, 4)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_65, - r"((..)|(.))((..)|(.))((..)|(.))", - r"aaaaa", - Some((0, 5)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 4)), - Some((2, 4)), - None, - Some((4, 5)), - None, - Some((4, 5)) -); -mat!( - match_repetition_67, - r"((..)|(.)){1}", - r"aaaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_68, - r"((..)|(.)){2}", - r"aaaaa", - Some((0, 4)), - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_70, - r"((..)|(.)){3}", - r"aaaaa", - Some((0, 5)), - Some((4, 5)), - Some((2, 4)), - Some((4, 5)) -); -mat!( - match_repetition_73, - r"((..)|(.))*", - r"aaaaa", - Some((0, 5)), - Some((4, 5)), - Some((2, 4)), - Some((4, 5)) -); -mat!( - match_repetition_75, - r"((..)|(.))", - r"aaaaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_76, - r"((..)|(.))((..)|(.))", - r"aaaaaa", - Some((0, 4)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_77, - r"((..)|(.))((..)|(.))((..)|(.))", - r"aaaaaa", - Some((0, 6)), - Some((0, 2)), - Some((0, 2)), - None, - Some((2, 4)), - Some((2, 4)), - None, - Some((4, 6)), - Some((4, 6)), - None -); -mat!( - match_repetition_79, - r"((..)|(.)){1}", - r"aaaaaa", - Some((0, 2)), - Some((0, 2)), - Some((0, 2)), - None -); -mat!( - match_repetition_80, - r"((..)|(.)){2}", - r"aaaaaa", - Some((0, 4)), - Some((2, 4)), - Some((2, 4)), - None -); -mat!( - match_repetition_81, - r"((..)|(.)){3}", - r"aaaaaa", - Some((0, 6)), - Some((4, 6)), - Some((4, 6)), - None -); -mat!( - match_repetition_83, - r"((..)|(.))*", - r"aaaaaa", - Some((0, 6)), - Some((4, 6)), - Some((4, 6)), - None -); -mat!( - match_repetition_90, - r"X(.?){0,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_91, - r"X(.?){1,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_92, - r"X(.?){2,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_93, - r"X(.?){3,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_94, - r"X(.?){4,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_95, - r"X(.?){5,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_96, - r"X(.?){6,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_97, - r"X(.?){7,}Y", - r"X1234567Y", - Some((0, 9)), - Some((7, 8)) -); -mat!( - match_repetition_98, - r"X(.?){8,}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_100, - r"X(.?){0,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_102, - r"X(.?){1,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_104, - r"X(.?){2,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_106, - r"X(.?){3,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_108, - r"X(.?){4,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_110, - r"X(.?){5,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_112, - r"X(.?){6,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_114, - r"X(.?){7,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_115, - r"X(.?){8,8}Y", - r"X1234567Y", - Some((0, 9)), - Some((8, 8)) -); -mat!( - match_repetition_126, - r"(a|ab|c|bcd){0,}(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_127, - r"(a|ab|c|bcd){1,}(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_128, - r"(a|ab|c|bcd){2,}(d*)", - r"ababcd", - Some((0, 6)), - Some((3, 6)), - Some((6, 6)) -); -mat!( - match_repetition_129, - r"(a|ab|c|bcd){3,}(d*)", - r"ababcd", - Some((0, 6)), - Some((3, 6)), - Some((6, 6)) -); -mat!(match_repetition_130, r"(a|ab|c|bcd){4,}(d*)", r"ababcd", None); -mat!( - match_repetition_131, - r"(a|ab|c|bcd){0,10}(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_132, - r"(a|ab|c|bcd){1,10}(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_133, - r"(a|ab|c|bcd){2,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((3, 6)), - Some((6, 6)) -); -mat!( - match_repetition_134, - r"(a|ab|c|bcd){3,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((3, 6)), - Some((6, 6)) -); -mat!(match_repetition_135, r"(a|ab|c|bcd){4,10}(d*)", r"ababcd", None); -mat!( - match_repetition_136, - r"(a|ab|c|bcd)*(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_137, - r"(a|ab|c|bcd)+(d*)", - r"ababcd", - Some((0, 1)), - Some((0, 1)), - Some((1, 1)) -); -mat!( - match_repetition_143, - r"(ab|a|c|bcd){0,}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_145, - r"(ab|a|c|bcd){1,}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_147, - r"(ab|a|c|bcd){2,}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_149, - r"(ab|a|c|bcd){3,}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!(match_repetition_150, r"(ab|a|c|bcd){4,}(d*)", r"ababcd", None); -mat!( - match_repetition_152, - r"(ab|a|c|bcd){0,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_154, - r"(ab|a|c|bcd){1,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_156, - r"(ab|a|c|bcd){2,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_158, - r"(ab|a|c|bcd){3,10}(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!(match_repetition_159, r"(ab|a|c|bcd){4,10}(d*)", r"ababcd", None); -mat!( - match_repetition_161, - r"(ab|a|c|bcd)*(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); -mat!( - match_repetition_163, - r"(ab|a|c|bcd)+(d*)", - r"ababcd", - Some((0, 6)), - Some((4, 5)), - Some((5, 6)) -); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/macros.rs b/collector/compile-benchmarks/regex-1.5.5/tests/macros.rs deleted file mode 100644 index e70e9489f..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/macros.rs +++ /dev/null @@ -1,160 +0,0 @@ -// Convenience macros. - -macro_rules! findall { - ($re:expr, $text:expr) => {{ - $re.find_iter(text!($text)) - .map(|m| (m.start(), m.end())).collect::<Vec<_>>() - }} -} - -// Macros for automatically producing tests. - -macro_rules! ismatch { - ($name:ident, $re:expr, $text:expr, $ismatch:expr) => { - #[test] - fn $name() { - let re = regex!($re); - assert_eq!($ismatch, re.is_match(text!($text))); - } - }; -} - -macro_rules! mat( - ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => ( - #[test] - fn $name() { - let text = text!($text); - let expected: Vec<Option<_>> = vec![$($loc)+]; - let r = regex!($re); - let got: Vec<Option<_>> = match r.captures(text) { - Some(c) => { - assert!(r.is_match(text)); - assert!(r.shortest_match(text).is_some()); - r.capture_names() - .enumerate() - .map(|(i, _)| c.get(i).map(|m| (m.start(), m.end()))) - .collect() - } - None => vec![None], - }; - // The test set sometimes leave out capture groups, so truncate - // actual capture groups to match test set. - let mut sgot = &got[..]; - if sgot.len() > expected.len() { - sgot = &sgot[0..expected.len()] - } - if expected != sgot { - panic!("For RE '{}' against '{:?}', \ - expected '{:?}' but got '{:?}'", - $re, text, expected, sgot); - } - } - ); -); - -macro_rules! matiter( - ($name:ident, $re:expr, $text:expr) => ( - #[test] - fn $name() { - let text = text!($text); - let expected: Vec<(usize, usize)> = vec![]; - let r = regex!($re); - let got: Vec<_> = - r.find_iter(text).map(|m| (m.start(), m.end())).collect(); - if expected != got { - panic!("For RE '{}' against '{:?}', \ - expected '{:?}' but got '{:?}'", - $re, text, expected, got); - } - let captures_got: Vec<_> = - r.captures_iter(text) - .map(|c| c.get(0).unwrap()) - .map(|m| (m.start(), m.end())) - .collect(); - if captures_got != got { - panic!("For RE '{}' against '{:?}', \ - got '{:?}' using find_iter but got '{:?}' \ - using captures_iter", - $re, text, got, captures_got); - } - } - ); - ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => ( - #[test] - fn $name() { - let text = text!($text); - let expected: Vec<_> = vec![$($loc)+]; - let r = regex!($re); - let got: Vec<_> = - r.find_iter(text).map(|m| (m.start(), m.end())).collect(); - if expected != got { - panic!("For RE '{}' against '{:?}', \ - expected '{:?}' but got '{:?}'", - $re, text, expected, got); - } - let captures_got: Vec<_> = - r.captures_iter(text) - .map(|c| c.get(0).unwrap()) - .map(|m| (m.start(), m.end())) - .collect(); - if captures_got != got { - panic!("For RE '{}' against '{:?}', \ - got '{:?}' using find_iter but got '{:?}' \ - using captures_iter", - $re, text, got, captures_got); - } - } - ); -); - -macro_rules! matset { - ($name:ident, $res:expr, $text:expr, $($match_index:expr),*) => { - #[test] - fn $name() { - let text = text!($text); - let set = regex_set!($res); - assert!(set.is_match(text)); - let expected = vec![$($match_index),*]; - let matches = set.matches(text); - assert!(matches.matched_any()); - let got: Vec<_> = matches.into_iter().collect(); - assert_eq!(expected, got); - } - } -} - -macro_rules! nomatset { - ($name:ident, $res:expr, $text:expr) => { - #[test] - fn $name() { - let text = text!($text); - let set = regex_set!($res); - assert!(!set.is_match(text)); - let matches = set.matches(text); - assert!(!matches.matched_any()); - assert_eq!(0, matches.into_iter().count()); - } - } -} - -macro_rules! split { - ($name:ident, $re:expr, $text:expr, $expected:expr) => { - #[test] - fn $name() { - let re = regex!($re); - let splitted: Vec<_> = re.split(t!($text)).collect(); - assert_eq!($expected, &*splitted); - } - } -} - -macro_rules! splitn { - ($name:ident, $re:expr, $text:expr, $limit:expr, $expected:expr) => { - #[test] - fn $name() { - let re = regex!($re); - let splitted: Vec<_> = re.splitn(t!($text), $limit).collect(); - assert_eq!($expected, &*splitted); - } - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/macros_bytes.rs b/collector/compile-benchmarks/regex-1.5.5/tests/macros_bytes.rs deleted file mode 100644 index 3d6c8c3ac..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/macros_bytes.rs +++ /dev/null @@ -1,39 +0,0 @@ -// Macros for use in writing tests generic over &str/&[u8]. -macro_rules! text { ($text:expr) => { $text.as_bytes() } } -macro_rules! t { ($re:expr) => { text!($re) } } -macro_rules! match_text { ($text:expr) => { $text.as_bytes() } } -macro_rules! use_ { ($($path: tt)*) => { use regex::bytes::$($path)*; } } -macro_rules! empty_vec { () => { <Vec<&[u8]>>::new() } } -macro_rules! bytes { ($text:expr) => { $text } } - -macro_rules! no_expand { - ($text:expr) => {{ - use regex::bytes::NoExpand; - NoExpand(text!($text)) - }} -} - -macro_rules! show { - ($text:expr) => {{ - use std::ascii::escape_default; - let mut s = vec![]; - for &b in bytes!($text) { - s.extend(escape_default(b)); - } - String::from_utf8(s).unwrap() - }} -} - -macro_rules! expand { - ($name:ident, $re:expr, $text:expr, $expand:expr, $expected:expr) => { - #[test] - fn $name() { - let re = regex!($re); - let cap = re.captures(t!($text)).unwrap(); - - let mut got = vec![]; - cap.expand(t!($expand), &mut got); - assert_eq!(show!(t!($expected)), show!(&*got)); - } - } -} diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/macros_str.rs b/collector/compile-benchmarks/regex-1.5.5/tests/macros_str.rs deleted file mode 100644 index 7b7eb110c..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/macros_str.rs +++ /dev/null @@ -1,38 +0,0 @@ -// Macros for use in writing tests generic over &str/&[u8]. -macro_rules! text { ($text:expr) => { $text } } -macro_rules! t { ($text:expr) => { text!($text) } } -macro_rules! match_text { ($text:expr) => { $text.as_str() } } -macro_rules! use_ { ($($path: tt)*) => { use regex::$($path)*; } } -macro_rules! empty_vec { () => { <Vec<&str>>::new() } } -macro_rules! bytes { ($text:expr) => { std::str::from_utf8($text.as_ref()).unwrap() } } - -macro_rules! no_expand { - ($text:expr) => {{ - use regex::NoExpand; - NoExpand(text!($text)) - }} -} - -macro_rules! show { ($text:expr) => { $text } } - -// N.B. The expansion API for &str and &[u8] APIs differs slightly for now, -// but they should be unified in 1.0. Then we can move this macro back into -// tests/api.rs where it is used. ---AG -macro_rules! expand { - ($name:ident, $re:expr, $text:expr, $expand:expr, $expected:expr) => { - #[test] - fn $name() { - let re = regex!($re); - let cap = re.captures(t!($text)).unwrap(); - - let mut got = String::new(); - cap.expand(t!($expand), &mut got); - assert_eq!(show!(t!($expected)), show!(&*got)); - } - } -} - -#[cfg(feature = "pattern")] -macro_rules! searcher_expr { ($e:expr) => ($e) } -#[cfg(not(feature = "pattern"))] -macro_rules! searcher_expr { ($e:expr) => ({}) } diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/misc.rs b/collector/compile-benchmarks/regex-1.5.5/tests/misc.rs deleted file mode 100644 index 314811e25..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/misc.rs +++ /dev/null @@ -1,4 +0,0 @@ -mat!(prefix_literal_match, r"^abc", r"abc", Some((0, 3))); -mat!(prefix_literal_nomatch, r"^abc", r"zabc", None); -mat!(one_literal_edge, r"abc", r"xxxxxab", None); -matiter!(terminates, r"a$", r"a", (0, 1)); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/multiline.rs b/collector/compile-benchmarks/regex-1.5.5/tests/multiline.rs deleted file mode 100644 index 62ee47b62..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/multiline.rs +++ /dev/null @@ -1,144 +0,0 @@ -matiter!( - match_multi_1, - r"(?m)^[a-z]+$", - "abc\ndef\nxyz", - (0, 3), - (4, 7), - (8, 11) -); -matiter!(match_multi_2, r"(?m)^$", "abc\ndef\nxyz"); -matiter!(match_multi_3, r"(?m)^", "abc\ndef\nxyz", (0, 0), (4, 4), (8, 8)); -matiter!(match_multi_4, r"(?m)$", "abc\ndef\nxyz", (3, 3), (7, 7), (11, 11)); -matiter!( - match_multi_5, - r"(?m)^[a-z]", - "abc\ndef\nxyz", - (0, 1), - (4, 5), - (8, 9) -); -matiter!(match_multi_6, r"(?m)[a-z]^", "abc\ndef\nxyz"); -matiter!( - match_multi_7, - r"(?m)[a-z]$", - "abc\ndef\nxyz", - (2, 3), - (6, 7), - (10, 11) -); -matiter!(match_multi_8, r"(?m)$[a-z]", "abc\ndef\nxyz"); -matiter!(match_multi_9, r"(?m)^$", "", (0, 0)); - -matiter!( - match_multi_rep_1, - r"(?m)(?:^$)*", - "a\nb\nc", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4), - (5, 5) -); -matiter!( - match_multi_rep_2, - r"(?m)(?:^|a)+", - "a\naaa\n", - (0, 0), - (2, 2), - (3, 5), - (6, 6) -); -matiter!( - match_multi_rep_3, - r"(?m)(?:^|a)*", - "a\naaa\n", - (0, 1), - (2, 5), - (6, 6) -); -matiter!( - match_multi_rep_4, - r"(?m)(?:^[a-z])+", - "abc\ndef\nxyz", - (0, 1), - (4, 5), - (8, 9) -); -matiter!( - match_multi_rep_5, - r"(?m)(?:^[a-z]{3}\n?)+", - "abc\ndef\nxyz", - (0, 11) -); -matiter!( - match_multi_rep_6, - r"(?m)(?:^[a-z]{3}\n?)*", - "abc\ndef\nxyz", - (0, 11) -); -matiter!( - match_multi_rep_7, - r"(?m)(?:\n?[a-z]{3}$)+", - "abc\ndef\nxyz", - (0, 11) -); -matiter!( - match_multi_rep_8, - r"(?m)(?:\n?[a-z]{3}$)*", - "abc\ndef\nxyz", - (0, 11) -); -matiter!( - match_multi_rep_9, - r"(?m)^*", - "\naa\n", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4) -); -matiter!(match_multi_rep_10, r"(?m)^+", "\naa\n", (0, 0), (1, 1), (4, 4)); -matiter!( - match_multi_rep_11, - r"(?m)$*", - "\naa\n", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4) -); -matiter!(match_multi_rep_12, r"(?m)$+", "\naa\n", (0, 0), (3, 3), (4, 4)); -matiter!(match_multi_rep_13, r"(?m)(?:$\n)+", "\n\naaa\n\n", (0, 2), (5, 7)); -matiter!( - match_multi_rep_14, - r"(?m)(?:$\n)*", - "\n\naaa\n\n", - (0, 2), - (3, 3), - (4, 4), - (5, 7) -); -matiter!(match_multi_rep_15, r"(?m)(?:$\n^)+", "\n\naaa\n\n", (0, 2), (5, 7)); -matiter!( - match_multi_rep_16, - r"(?m)(?:^|$)+", - "\n\naaa\n\n", - (0, 0), - (1, 1), - (2, 2), - (5, 5), - (6, 6), - (7, 7) -); -matiter!( - match_multi_rep_17, - r"(?m)(?:$\n)*", - "\n\naaa\n\n", - (0, 2), - (3, 3), - (4, 4), - (5, 7) -); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/noparse.rs b/collector/compile-benchmarks/regex-1.5.5/tests/noparse.rs deleted file mode 100644 index 8ded1dce7..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/noparse.rs +++ /dev/null @@ -1,45 +0,0 @@ -macro_rules! noparse( - ($name:ident, $re:expr) => ( - #[test] - fn $name() { - let re = $re; - match regex_new!(re) { - Err(_) => {}, - Ok(_) => panic!("Regex '{}' should cause a parse error.", re), - } - } - ); -); - -noparse!(fail_no_repeat_arg, "*"); -noparse!(fail_incomplete_escape, "\\"); -noparse!(fail_class_incomplete, "[A-"); -noparse!(fail_class_not_closed, "[A"); -noparse!(fail_class_no_begin, r"[\A]"); -noparse!(fail_class_no_end, r"[\z]"); -noparse!(fail_class_no_boundary, r"[\b]"); -noparse!(fail_open_paren, "("); -noparse!(fail_close_paren, ")"); -noparse!(fail_invalid_range, "[a-Z]"); -noparse!(fail_empty_capture_name, "(?P<>a)"); -noparse!(fail_bad_capture_name, "(?P<na-me>)"); -noparse!(fail_bad_flag, "(?a)a"); -noparse!(fail_too_big, "a{10000000}"); -noparse!(fail_counted_no_close, "a{1001"); -noparse!(fail_counted_decreasing, "a{2,1}"); -noparse!(fail_counted_nonnegative, "a{-1,1}"); -noparse!(fail_unfinished_cap, "(?"); -noparse!(fail_unfinished_escape, "\\"); -noparse!(fail_octal_digit, r"\8"); -noparse!(fail_hex_digit, r"\xG0"); -noparse!(fail_hex_short, r"\xF"); -noparse!(fail_hex_long_digits, r"\x{fffg}"); -noparse!(fail_flag_bad, "(?a)"); -noparse!(fail_flag_empty, "(?)"); -noparse!(fail_double_neg, "(?-i-i)"); -noparse!(fail_neg_empty, "(?i-)"); -noparse!(fail_dupe_named, "(?P<a>.)(?P<a>.)"); -noparse!(fail_range_end_no_class, "[a-[:lower:]]"); -noparse!(fail_range_end_no_begin, r"[a-\A]"); -noparse!(fail_range_end_no_end, r"[a-\z]"); -noparse!(fail_range_end_no_boundary, r"[a-\b]"); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/regression.rs b/collector/compile-benchmarks/regex-1.5.5/tests/regression.rs deleted file mode 100644 index 44b90832b..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/regression.rs +++ /dev/null @@ -1,219 +0,0 @@ -// See: https://github.com/rust-lang/regex/issues/48 -#[test] -fn invalid_regexes_no_crash() { - assert!(regex_new!("(*)").is_err()); - assert!(regex_new!("(?:?)").is_err()); - assert!(regex_new!("(?)").is_err()); - assert!(regex_new!("*").is_err()); -} - -// See: https://github.com/rust-lang/regex/issues/98 -#[test] -fn regression_many_repeat_stack_overflow() { - let re = regex!("^.{1,2500}"); - assert_eq!(vec![(0, 1)], findall!(re, "a")); -} - -// See: https://github.com/rust-lang/regex/issues/555 -#[test] -fn regression_invalid_repetition_expr() { - assert!(regex_new!("(?m){1,1}").is_err()); -} - -// See: https://github.com/rust-lang/regex/issues/527 -#[test] -fn regression_invalid_flags_expression() { - assert!(regex_new!("(((?x)))").is_ok()); -} - -// See: https://github.com/rust-lang/regex/issues/75 -mat!(regression_unsorted_binary_search_1, r"(?i-u)[a_]+", "A_", Some((0, 2))); -mat!(regression_unsorted_binary_search_2, r"(?i-u)[A_]+", "a_", Some((0, 2))); - -// See: https://github.com/rust-lang/regex/issues/99 -#[cfg(feature = "unicode-case")] -mat!(regression_negated_char_class_1, r"(?i)[^x]", "x", None); -#[cfg(feature = "unicode-case")] -mat!(regression_negated_char_class_2, r"(?i)[^x]", "X", None); - -// See: https://github.com/rust-lang/regex/issues/101 -mat!(regression_ascii_word_underscore, r"[[:word:]]", "_", Some((0, 1))); - -// See: https://github.com/rust-lang/regex/issues/129 -#[test] -fn regression_captures_rep() { - let re = regex!(r"([a-f]){2}(?P<foo>[x-z])"); - let caps = re.captures(text!("abx")).unwrap(); - assert_eq!(match_text!(caps.name("foo").unwrap()), text!("x")); -} - -// See: https://github.com/rust-lang/regex/issues/153 -mat!(regression_alt_in_alt1, r"ab?|$", "az", Some((0, 1))); -mat!(regression_alt_in_alt2, r"^(.*?)(\n|\r\n?|$)", "ab\rcd", Some((0, 3))); - -// See: https://github.com/rust-lang/regex/issues/169 -mat!(regression_leftmost_first_prefix, r"z*azb", "azb", Some((0, 3))); - -// See: https://github.com/rust-lang/regex/issues/76 -#[cfg(all(feature = "unicode-case", feature = "unicode-gencat"))] -mat!(uni_case_lower_nocase_flag, r"(?i)\p{Ll}+", "ΛΘΓΔα", Some((0, 10))); - -// See: https://github.com/rust-lang/regex/issues/191 -mat!(many_alternates, r"1|2|3|4|5|6|7|8|9|10|int", "int", Some((0, 3))); - -// burntsushi was bad and didn't create an issue for this bug. -mat!(anchored_prefix1, r"^a[[:^space:]]", "a ", None); -mat!(anchored_prefix2, r"^a[[:^space:]]", "foo boo a ", None); -mat!(anchored_prefix3, r"^-[a-z]", "r-f", None); - -// See: https://github.com/rust-lang/regex/issues/204 -#[cfg(feature = "unicode-perl")] -split!( - split_on_word_boundary, - r"\b", - r"Should this (work?)", - &[ - t!(""), - t!("Should"), - t!(" "), - t!("this"), - t!(" ("), - t!("work"), - t!("?)") - ] -); -#[cfg(feature = "unicode-perl")] -matiter!( - word_boundary_dfa, - r"\b", - "a b c", - (0, 0), - (1, 1), - (2, 2), - (3, 3), - (4, 4), - (5, 5) -); - -// See: https://github.com/rust-lang/regex/issues/268 -matiter!(partial_anchor, r"^a|b", "ba", (0, 1)); - -// See: https://github.com/rust-lang/regex/issues/280 -ismatch!(partial_anchor_alternate_begin, r"^a|z", "yyyyya", false); -ismatch!(partial_anchor_alternate_end, r"a$|z", "ayyyyy", false); - -// See: https://github.com/rust-lang/regex/issues/289 -mat!(lits_unambiguous1, r"(ABC|CDA|BC)X", "CDAX", Some((0, 4))); - -// See: https://github.com/rust-lang/regex/issues/291 -mat!( - lits_unambiguous2, - r"((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$", - "CIMG2341", - Some((0, 8)), - Some((0, 4)), - None, - Some((0, 4)), - Some((4, 8)) -); - -// See: https://github.com/rust-lang/regex/issues/271 -mat!(endl_or_wb, r"(?m:$)|(?-u:\b)", "\u{6084e}", Some((4, 4))); -mat!(zero_or_end, r"(?i-u:\x00)|$", "\u{e682f}", Some((4, 4))); -mat!(y_or_endl, r"(?i-u:y)|(?m:$)", "\u{b4331}", Some((4, 4))); -#[cfg(feature = "unicode-perl")] -mat!(wb_start_x, r"(?u:\b)^(?-u:X)", "X", Some((0, 1))); - -// See: https://github.com/rust-lang/regex/issues/321 -ismatch!(strange_anchor_non_complete_prefix, r"a^{2}", "", false); -ismatch!(strange_anchor_non_complete_suffix, r"${2}a", "", false); - -// See: https://github.com/BurntSushi/ripgrep/issues/1203 -ismatch!(reverse_suffix1, r"[0-4][0-4][0-4]000", "153.230000", true); -ismatch!(reverse_suffix2, r"[0-9][0-9][0-9]000", "153.230000\n", true); -matiter!(reverse_suffix3, r"[0-9][0-9][0-9]000", "153.230000\n", (4, 10)); - -// See: https://github.com/rust-lang/regex/issues/334 -// See: https://github.com/rust-lang/regex/issues/557 -mat!( - captures_after_dfa_premature_end1, - r"a(b*(X|$))?", - "abcbX", - Some((0, 1)), - None, - None -); -mat!( - captures_after_dfa_premature_end2, - r"a(bc*(X|$))?", - "abcbX", - Some((0, 1)), - None, - None -); -mat!(captures_after_dfa_premature_end3, r"(aa$)?", "aaz", Some((0, 0))); - -// See: https://github.com/rust-lang/regex/issues/437 -ismatch!( - literal_panic, - r"typename type\-parameter\-[0-9]+\-[0-9]+::.+", - "test", - false -); - -// See: https://github.com/rust-lang/regex/issues/533 -ismatch!( - blank_matches_nothing_between_space_and_tab, - r"[[:blank:]]", - "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\ - \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\ - \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}", - false -); - -ismatch!( - inverted_blank_matches_everything_between_space_and_tab, - r"^[[:^blank:]]+$", - "\u{a}\u{b}\u{c}\u{d}\u{e}\u{f}\ - \u{10}\u{11}\u{12}\u{13}\u{14}\u{15}\u{16}\u{17}\ - \u{18}\u{19}\u{1a}\u{1b}\u{1c}\u{1d}\u{1e}\u{1f}", - true -); - -// Tests that our Aho-Corasick optimization works correctly. It only -// kicks in when we have >32 literals. By "works correctly," we mean that -// leftmost-first match semantics are properly respected. That is, samwise -// should match, not sam. -mat!( - ahocorasick1, - "samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|\ - A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z", - "samwise", - Some((0, 7)) -); - -// See: https://github.com/BurntSushi/ripgrep/issues/1247 -#[test] -#[cfg(feature = "unicode-perl")] -fn regression_nfa_stops1() { - let re = ::regex::bytes::Regex::new(r"\bs(?:[ab])").unwrap(); - assert_eq!(0, re.find_iter(b"s\xE4").count()); -} - -// See: https://github.com/rust-lang/regex/issues/640 -#[cfg(feature = "unicode-case")] -matiter!( - flags_are_unset, - r"((?i)foo)|Bar", - "foo Foo bar Bar", - (0, 3), - (4, 7), - (12, 15) -); - -// See: https://github.com/rust-lang/regex/issues/659 -// -// Note that 'Ј' is not 'j', but cyrillic Je -// https://en.wikipedia.org/wiki/Je_(Cyrillic) -ismatch!(empty_group_match, r"()Ј01", "zЈ01", true); -matiter!(empty_group_find, r"()Ј01", "zЈ01", (1, 5)); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/regression_fuzz.rs b/collector/compile-benchmarks/regex-1.5.5/tests/regression_fuzz.rs deleted file mode 100644 index 4e76704d2..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/regression_fuzz.rs +++ /dev/null @@ -1,31 +0,0 @@ -// These tests are only run for the "default" test target because some of them -// can take quite a long time. Some of them take long enough that it's not -// practical to run them in debug mode. :-/ - -// See: https://oss-fuzz.com/testcase-detail/5673225499181056 -// -// Ignored by default since it takes too long in debug mode (almost a minute). -#[test] -#[ignore] -fn fuzz1() { - regex!(r"1}{55}{0}*{1}{55}{55}{5}*{1}{55}+{56}|;**"); -} - -// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=26505 -// See: https://github.com/rust-lang/regex/issues/722 -#[test] -fn empty_any_errors_no_panic() { - assert!(regex_new!(r"\P{any}").is_err()); -} - -// This tests that a very large regex errors during compilation instead of -// using gratuitous amounts of memory. The specific problem is that the -// compiler wasn't accounting for the memory used by Unicode character classes -// correctly. -// -// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=33579 -#[test] -fn big_regex_fails_to_compile() { - let pat = "[\u{0}\u{e}\u{2}\\w~~>[l\t\u{0}]p?<]{971158}"; - assert!(regex_new!(pat).is_err()); -} diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/replace.rs b/collector/compile-benchmarks/regex-1.5.5/tests/replace.rs deleted file mode 100644 index 1dc610635..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/replace.rs +++ /dev/null @@ -1,230 +0,0 @@ -macro_rules! replace( - ($name:ident, $which:ident, $re:expr, - $search:expr, $replace:expr, $result:expr) => ( - #[test] - fn $name() { - let re = regex!($re); - assert_eq!(re.$which(text!($search), $replace), text!($result)); - } - ); -); - -replace!(first, replace, r"[0-9]", "age: 26", t!("Z"), "age: Z6"); -replace!(plus, replace, r"[0-9]+", "age: 26", t!("Z"), "age: Z"); -replace!(all, replace_all, r"[0-9]", "age: 26", t!("Z"), "age: ZZ"); -replace!( - groups, - replace, - r"(?-u)(\S+)\s+(\S+)", - "w1 w2", - t!("$2 $1"), - "w2 w1" -); -replace!( - double_dollar, - replace, - r"(?-u)(\S+)\s+(\S+)", - "w1 w2", - t!("$2 $$1"), - "w2 $1" -); -// replace!(adjacent_index, replace, -// r"([^aeiouy])ies$", "skies", t!("$1y"), "sky"); -replace!( - named, - replace_all, - r"(?-u)(?P<first>\S+)\s+(?P<last>\S+)(?P<space>\s*)", - "w1 w2 w3 w4", - t!("$last $first$space"), - "w2 w1 w4 w3" -); -replace!( - trim, - replace_all, - "^[ \t]+|[ \t]+$", - " \t trim me\t \t", - t!(""), - "trim me" -); -replace!(number_hypen, replace, r"(.)(.)", "ab", t!("$1-$2"), "a-b"); -// replace!(number_underscore, replace, r"(.)(.)", "ab", t!("$1_$2"), "a_b"); -replace!( - simple_expand, - replace_all, - r"(?-u)(\w) (\w)", - "a b", - t!("$2 $1"), - "b a" -); -replace!( - literal_dollar1, - replace_all, - r"(?-u)(\w+) (\w+)", - "a b", - t!("$$1"), - "$1" -); -replace!( - literal_dollar2, - replace_all, - r"(?-u)(\w+) (\w+)", - "a b", - t!("$2 $$c $1"), - "b $c a" -); -replace!( - no_expand1, - replace, - r"(?-u)(\S+)\s+(\S+)", - "w1 w2", - no_expand!("$2 $1"), - "$2 $1" -); -replace!( - no_expand2, - replace, - r"(?-u)(\S+)\s+(\S+)", - "w1 w2", - no_expand!("$$1"), - "$$1" -); -use_!(Captures); -replace!( - closure_returning_reference, - replace, - r"([0-9]+)", - "age: 26", - |captures: &Captures<'_>| { - match_text!(captures.get(1).unwrap())[0..1].to_owned() - }, - "age: 2" -); -replace!( - closure_returning_value, - replace, - r"[0-9]+", - "age: 26", - |_captures: &Captures<'_>| t!("Z").to_owned(), - "age: Z" -); - -// See https://github.com/rust-lang/regex/issues/314 -replace!( - match_at_start_replace_with_empty, - replace_all, - r"foo", - "foobar", - t!(""), - "bar" -); - -// See https://github.com/rust-lang/regex/issues/393 -replace!(single_empty_match, replace, r"^", "bar", t!("foo"), "foobar"); - -// See https://github.com/rust-lang/regex/issues/399 -replace!( - capture_longest_possible_name, - replace_all, - r"(.)", - "b", - t!("${1}a $1a"), - "ba " -); - -replace!( - impl_string, - replace, - r"[0-9]", - "age: 26", - t!("Z".to_string()), - "age: Z6" -); -replace!( - impl_string_ref, - replace, - r"[0-9]", - "age: 26", - t!(&"Z".to_string()), - "age: Z6" -); -replace!( - impl_cow_str_borrowed, - replace, - r"[0-9]", - "age: 26", - t!(std::borrow::Cow::<'_, str>::Borrowed("Z")), - "age: Z6" -); -replace!( - impl_cow_str_borrowed_ref, - replace, - r"[0-9]", - "age: 26", - t!(&std::borrow::Cow::<'_, str>::Borrowed("Z")), - "age: Z6" -); -replace!( - impl_cow_str_owned, - replace, - r"[0-9]", - "age: 26", - t!(std::borrow::Cow::<'_, str>::Owned("Z".to_string())), - "age: Z6" -); -replace!( - impl_cow_str_owned_ref, - replace, - r"[0-9]", - "age: 26", - t!(&std::borrow::Cow::<'_, str>::Owned("Z".to_string())), - "age: Z6" -); - -replace!( - impl_vec_u8, - replace, - r"[0-9]", - "age: 26", - bytes!(vec![b'Z']), - "age: Z6" -); -replace!( - impl_vec_u8_ref, - replace, - r"[0-9]", - "age: 26", - bytes!(&vec![b'Z']), - "age: Z6" -); -replace!( - impl_cow_slice_borrowed, - replace, - r"[0-9]", - "age: 26", - bytes!(std::borrow::Cow::<'_, [u8]>::Borrowed(&[b'Z'])), - "age: Z6" -); -replace!( - impl_cow_slice_borrowed_ref, - replace, - r"[0-9]", - "age: 26", - bytes!(&std::borrow::Cow::<'_, [u8]>::Borrowed(&[b'Z'])), - "age: Z6" -); -replace!( - impl_cow_slice_owned, - replace, - r"[0-9]", - "age: 26", - bytes!(std::borrow::Cow::<'_, [u8]>::Owned(vec![b'Z'])), - "age: Z6" -); -replace!( - impl_cow_slice_owned_ref, - replace, - r"[0-9]", - "age: 26", - bytes!(&std::borrow::Cow::<'_, [u8]>::Owned(vec![b'Z'])), - "age: Z6" -); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/searcher.rs b/collector/compile-benchmarks/regex-1.5.5/tests/searcher.rs deleted file mode 100644 index 3779f54c3..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/searcher.rs +++ /dev/null @@ -1,95 +0,0 @@ -macro_rules! searcher { - ($name:ident, $re:expr, $haystack:expr) => ( - searcher!($name, $re, $haystack, vec vec![]); - ); - ($name:ident, $re:expr, $haystack:expr, $($steps:expr,)*) => ( - searcher!($name, $re, $haystack, vec vec![$($steps),*]); - ); - ($name:ident, $re:expr, $haystack:expr, $($steps:expr),*) => ( - searcher!($name, $re, $haystack, vec vec![$($steps),*]); - ); - ($name:ident, $re:expr, $haystack:expr, vec $expect_steps:expr) => ( - #[test] - #[allow(unused_imports)] - fn $name() { - searcher_expr! {{ - use std::str::pattern::{Pattern, Searcher}; - use std::str::pattern::SearchStep::{Match, Reject, Done}; - let re = regex!($re); - let mut se = re.into_searcher($haystack); - let mut got_steps = vec![]; - loop { - match se.next() { - Done => break, - step => { got_steps.push(step); } - } - } - assert_eq!(got_steps, $expect_steps); - }} - } - ); -} - -searcher!(searcher_empty_regex_empty_haystack, r"", "", Match(0, 0)); -searcher!( - searcher_empty_regex, - r"", - "ab", - Match(0, 0), - Reject(0, 1), - Match(1, 1), - Reject(1, 2), - Match(2, 2) -); -searcher!(searcher_empty_haystack, r"\d", ""); -searcher!(searcher_one_match, r"\d", "5", Match(0, 1)); -searcher!(searcher_no_match, r"\d", "a", Reject(0, 1)); -searcher!( - searcher_two_adjacent_matches, - r"\d", - "56", - Match(0, 1), - Match(1, 2) -); -searcher!( - searcher_two_non_adjacent_matches, - r"\d", - "5a6", - Match(0, 1), - Reject(1, 2), - Match(2, 3) -); -searcher!(searcher_reject_first, r"\d", "a6", Reject(0, 1), Match(1, 2)); -searcher!( - searcher_one_zero_length_matches, - r"\d*", - "a1b2", - Match(0, 0), // ^ - Reject(0, 1), // a - Match(1, 2), // a1 - Reject(2, 3), // a1b - Match(3, 4), // a1b2 -); -searcher!( - searcher_many_zero_length_matches, - r"\d*", - "a1bbb2", - Match(0, 0), // ^ - Reject(0, 1), // a - Match(1, 2), // a1 - Reject(2, 3), // a1b - Match(3, 3), // a1bb - Reject(3, 4), // a1bb - Match(4, 4), // a1bbb - Reject(4, 5), // a1bbb - Match(5, 6), // a1bbba -); -searcher!( - searcher_unicode, - r".+?", - "Ⅰ1Ⅱ2", - Match(0, 3), - Match(3, 4), - Match(4, 7), - Match(7, 8) -); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/set.rs b/collector/compile-benchmarks/regex-1.5.5/tests/set.rs deleted file mode 100644 index 37fcf8700..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/set.rs +++ /dev/null @@ -1,67 +0,0 @@ -matset!(set1, &["a", "a"], "a", 0, 1); -matset!(set2, &["a", "a"], "ba", 0, 1); -matset!(set3, &["a", "b"], "a", 0); -matset!(set4, &["a", "b"], "b", 1); -matset!(set5, &["a|b", "b|a"], "b", 0, 1); -matset!(set6, &["foo", "oo"], "foo", 0, 1); -matset!(set7, &["^foo", "bar$"], "foo", 0); -matset!(set8, &["^foo", "bar$"], "foo bar", 0, 1); -matset!(set9, &["^foo", "bar$"], "bar", 1); -matset!(set10, &[r"[a-z]+$", "foo"], "01234 foo", 0, 1); -matset!(set11, &[r"[a-z]+$", "foo"], "foo 01234", 1); -matset!(set12, &[r".*?", "a"], "zzzzzza", 0, 1); -matset!(set13, &[r".*", "a"], "zzzzzza", 0, 1); -matset!(set14, &[r".*", "a"], "zzzzzz", 0); -matset!(set15, &[r"(?-u)\ba\b"], "hello a bye", 0); -matset!(set16, &["a"], "a", 0); -matset!(set17, &[".*a"], "a", 0); -matset!(set18, &["a", "β"], "β", 1); - -// regexes that match the empty string -matset!(setempty1, &["", "a"], "abc", 0, 1); -matset!(setempty2, &["", "b"], "abc", 0, 1); -matset!(setempty3, &["", "z"], "abc", 0); -matset!(setempty4, &["a", ""], "abc", 0, 1); -matset!(setempty5, &["b", ""], "abc", 0, 1); -matset!(setempty6, &["z", ""], "abc", 1); -matset!(setempty7, &["b", "(?:)"], "abc", 0, 1); -matset!(setempty8, &["(?:)", "b"], "abc", 0, 1); -matset!(setempty9, &["c(?:)", "b"], "abc", 0, 1); - -nomatset!(nset1, &["a", "a"], "b"); -nomatset!(nset2, &["^foo", "bar$"], "bar foo"); -nomatset!( - nset3, - { - let xs: &[&str] = &[]; - xs - }, - "a" -); -nomatset!(nset4, &[r"^rooted$", r"\.log$"], "notrooted"); - -// See: https://github.com/rust-lang/regex/issues/187 -#[test] -fn regression_subsequent_matches() { - let set = regex_set!(&["ab", "b"]); - let text = text!("ba"); - assert!(set.matches(text).matched(1)); - assert!(set.matches(text).matched(1)); -} - -#[test] -fn get_set_patterns() { - let set = regex_set!(&["a", "b"]); - assert_eq!(vec!["a", "b"], set.patterns()); -} - -#[test] -fn len_and_empty() { - let empty = regex_set!(&[""; 0]); - assert_eq!(empty.len(), 0); - assert!(empty.is_empty()); - - let not_empty = regex_set!(&["ab", "b"]); - assert_eq!(not_empty.len(), 2); - assert!(!not_empty.is_empty()); -} diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/shortest_match.rs b/collector/compile-benchmarks/regex-1.5.5/tests/shortest_match.rs deleted file mode 100644 index f8b4fed15..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/shortest_match.rs +++ /dev/null @@ -1,14 +0,0 @@ -macro_rules! shortmat { - ($name:ident, $re:expr, $text:expr, $shortest_match:expr) => { - #[test] - fn $name() { - let text = text!($text); - let re = regex!($re); - assert_eq!($shortest_match, re.shortest_match(text)); - } - }; -} - -shortmat!(t01, r"a+", r"aa", Some(1)); -// Test that the reverse suffix optimization gets it right. -shortmat!(t02, r".*(?:abcd)+", r"abcdabcd", Some(4)); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/suffix_reverse.rs b/collector/compile-benchmarks/regex-1.5.5/tests/suffix_reverse.rs deleted file mode 100644 index 774c9e85f..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/suffix_reverse.rs +++ /dev/null @@ -1,6 +0,0 @@ -mat!(t01, r".*abcd", r"abcd", Some((0, 4))); -mat!(t02, r".*(?:abcd)+", r"abcd", Some((0, 4))); -mat!(t03, r".*(?:abcd)+", r"abcdabcd", Some((0, 8))); -mat!(t04, r".*(?:abcd)+", r"abcdxabcd", Some((0, 9))); -mat!(t05, r".*x(?:abcd)+", r"abcdxabcd", Some((0, 9))); -mat!(t06, r"[^abcd]*x(?:abcd)+", r"abcdxabcd", Some((4, 9))); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/test_backtrack.rs b/collector/compile-benchmarks/regex-1.5.5/tests/test_backtrack.rs deleted file mode 100644 index fb934e2d8..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/test_backtrack.rs +++ /dev/null @@ -1,56 +0,0 @@ -#![cfg_attr(feature = "pattern", feature(pattern))] - -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re) - .bounded_backtracking() - .build() - .map(|e| e.into_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re) - .bounded_backtracking() - .build() - .map(|e| e.into_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_str.rs"); -include!("macros.rs"); - -mod api; -mod api_str; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod searcher; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/test_backtrack_bytes.rs b/collector/compile-benchmarks/regex-1.5.5/tests/test_backtrack_bytes.rs deleted file mode 100644 index a59426c94..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/test_backtrack_bytes.rs +++ /dev/null @@ -1,55 +0,0 @@ -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re) - .bounded_backtracking() - .only_utf8(false) - .build() - .map(|e| e.into_byte_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re) - .bounded_backtracking() - .only_utf8(false) - .build() - .map(|e| e.into_byte_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_bytes.rs"); -include!("macros.rs"); - -mod api; -mod bytes; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_ascii; diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/test_backtrack_utf8bytes.rs b/collector/compile-benchmarks/regex-1.5.5/tests/test_backtrack_utf8bytes.rs deleted file mode 100644 index 6d308e9e1..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/test_backtrack_utf8bytes.rs +++ /dev/null @@ -1,58 +0,0 @@ -#![cfg_attr(feature = "pattern", feature(pattern))] - -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re) - .bounded_backtracking() - .bytes(true) - .build() - .map(|e| e.into_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re) - .bounded_backtracking() - .bytes(true) - .build() - .map(|e| e.into_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_str.rs"); -include!("macros.rs"); - -mod api; -mod api_str; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod searcher; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/test_crates_regex.rs b/collector/compile-benchmarks/regex-1.5.5/tests/test_crates_regex.rs deleted file mode 100644 index a68160472..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/test_crates_regex.rs +++ /dev/null @@ -1,54 +0,0 @@ -/* - * This test is a minimal version of <rofl_0> and <subdiff_0> - * - * Once this bug gets fixed, uncomment rofl_0 and subdiff_0 - * (in `tests/crates_regex.rs`). -#[test] -fn word_boundary_backtracking_default_mismatch() { - use regex::internal::ExecBuilder; - - let backtrack_re = ExecBuilder::new(r"\b") - .bounded_backtracking() - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let default_re = ExecBuilder::new(r"\b") - .build() - .map(|exec| exec.into_regex()) - .map_err(|err| format!("{}", err)) - .unwrap(); - - let input = "䅅\\u{a0}"; - - let fi1 = backtrack_re.find_iter(input); - let fi2 = default_re.find_iter(input); - for (m1, m2) in fi1.zip(fi2) { - assert_eq!(m1, m2); - } -} -*/ - -mod consistent; - -mod crates_regex { - - macro_rules! consistent { - ($test_name:ident, $regex_src:expr) => { - #[test] - fn $test_name() { - use super::consistent::backends_are_consistent; - - if option_env!("RUST_REGEX_RANDOM_TEST").is_some() { - match backends_are_consistent($regex_src) { - Ok(_) => {} - Err(err) => panic!("{}", err), - } - } - } - }; - } - - include!("crates_regex.rs"); -} diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/test_default.rs b/collector/compile-benchmarks/regex-1.5.5/tests/test_default.rs deleted file mode 100644 index be627f7a6..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/test_default.rs +++ /dev/null @@ -1,222 +0,0 @@ -#![cfg_attr(feature = "pattern", feature(pattern))] - -use regex; - -// Due to macro scoping rules, this definition only applies for the modules -// defined below. Effectively, it allows us to use the same tests for both -// native and dynamic regexes. -// -// This is also used to test the various matching engines. This one exercises -// the normal code path which automatically chooses the engine based on the -// regex and the input. Other dynamic tests explicitly set the engine to use. -macro_rules! regex_new { - ($re:expr) => {{ - use regex::Regex; - Regex::new($re) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::RegexSet; - RegexSet::new($re) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_str.rs"); -include!("macros.rs"); - -mod api; -mod api_str; -mod crazy; -mod flags; -mod fowler; -mod misc; -mod multiline; -mod noparse; -mod regression; -mod regression_fuzz; -mod replace; -mod searcher; -mod set; -mod shortest_match; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; - -#[test] -fn disallow_non_utf8() { - assert!(regex::Regex::new(r"(?-u)\xFF").is_err()); - assert!(regex::Regex::new(r"(?-u).").is_err()); - assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err()); - assert!(regex::Regex::new(r"(?-u)☃").is_err()); -} - -#[test] -fn disallow_octal() { - assert!(regex::Regex::new(r"\0").is_err()); -} - -#[test] -fn allow_octal() { - assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok()); -} - -#[test] -fn oibits() { - use regex::bytes; - use regex::{Regex, RegexBuilder, RegexSet, RegexSetBuilder}; - use std::panic::{RefUnwindSafe, UnwindSafe}; - - fn assert_send<T: Send>() {} - fn assert_sync<T: Sync>() {} - fn assert_unwind_safe<T: UnwindSafe>() {} - fn assert_ref_unwind_safe<T: RefUnwindSafe>() {} - - assert_send::<Regex>(); - assert_sync::<Regex>(); - assert_unwind_safe::<Regex>(); - assert_ref_unwind_safe::<Regex>(); - assert_send::<RegexBuilder>(); - assert_sync::<RegexBuilder>(); - assert_unwind_safe::<RegexBuilder>(); - assert_ref_unwind_safe::<RegexBuilder>(); - - assert_send::<bytes::Regex>(); - assert_sync::<bytes::Regex>(); - assert_unwind_safe::<bytes::Regex>(); - assert_ref_unwind_safe::<bytes::Regex>(); - assert_send::<bytes::RegexBuilder>(); - assert_sync::<bytes::RegexBuilder>(); - assert_unwind_safe::<bytes::RegexBuilder>(); - assert_ref_unwind_safe::<bytes::RegexBuilder>(); - - assert_send::<RegexSet>(); - assert_sync::<RegexSet>(); - assert_unwind_safe::<RegexSet>(); - assert_ref_unwind_safe::<RegexSet>(); - assert_send::<RegexSetBuilder>(); - assert_sync::<RegexSetBuilder>(); - assert_unwind_safe::<RegexSetBuilder>(); - assert_ref_unwind_safe::<RegexSetBuilder>(); - - assert_send::<bytes::RegexSet>(); - assert_sync::<bytes::RegexSet>(); - assert_unwind_safe::<bytes::RegexSet>(); - assert_ref_unwind_safe::<bytes::RegexSet>(); - assert_send::<bytes::RegexSetBuilder>(); - assert_sync::<bytes::RegexSetBuilder>(); - assert_unwind_safe::<bytes::RegexSetBuilder>(); - assert_ref_unwind_safe::<bytes::RegexSetBuilder>(); -} - -// See: https://github.com/rust-lang/regex/issues/568 -#[test] -fn oibits_regression() { - use regex::Regex; - use std::panic; - - let _ = panic::catch_unwind(|| Regex::new("a").unwrap()); -} - -// See: https://github.com/rust-lang/regex/issues/750 -#[test] -#[cfg(target_pointer_width = "64")] -fn regex_is_reasonably_small() { - use std::mem::size_of; - - use regex::bytes; - use regex::{Regex, RegexSet}; - - assert_eq!(16, size_of::<Regex>()); - assert_eq!(16, size_of::<RegexSet>()); - assert_eq!(16, size_of::<bytes::Regex>()); - assert_eq!(16, size_of::<bytes::RegexSet>()); -} - -// See: https://github.com/rust-lang/regex/security/advisories/GHSA-m5pq-gvj9-9vr8 -// See: CVE-2022-24713 -// -// We test that our regex compiler will correctly return a "too big" error when -// we try to use a very large repetition on an *empty* sub-expression. -// -// At the time this test was written, the regex compiler does not represent -// empty sub-expressions with any bytecode instructions. In effect, it's an -// "optimization" to leave them out, since they would otherwise correspond -// to an unconditional JUMP in the regex bytecode (i.e., an unconditional -// epsilon transition in the NFA graph). Therefore, an empty sub-expression -// represents an interesting case for the compiler's size limits. Since it -// doesn't actually contribute any additional memory to the compiled regex -// instructions, the size limit machinery never detects it. Instead, it just -// dumbly tries to compile the empty sub-expression N times, where N is the -// repetition size. -// -// When N is very large, this will cause the compiler to essentially spin and -// do nothing for a decently large amount of time. It causes the regex to take -// quite a bit of time to compile, despite the concrete syntax of the regex -// being quite small. -// -// The degree to which this is actually a problem is somewhat of a judgment -// call. Some regexes simply take a long time to compile. But in general, you -// should be able to reasonably control this by setting lower or higher size -// limits on the compiled object size. But this mitigation doesn't work at all -// for this case. -// -// This particular test is somewhat narrow. It merely checks that regex -// compilation will, at some point, return a "too big" error. Before the -// fix landed, this test would eventually fail because the regex would be -// successfully compiled (after enough time elapsed). So while this test -// doesn't check that we exit in a reasonable amount of time, it does at least -// check that we are properly returning an error at some point. -#[test] -fn big_empty_regex_fails() { - use regex::Regex; - - let result = Regex::new("(?:){4294967295}"); - assert!(result.is_err()); -} - -// Below is a "billion laughs" variant of the previous test case. -#[test] -fn big_empty_reps_chain_regex_fails() { - use regex::Regex; - - let result = Regex::new("(?:){64}{64}{64}{64}{64}{64}"); - assert!(result.is_err()); -} - -// Below is another situation where a zero-length sub-expression can be -// introduced. -#[test] -fn big_zero_reps_regex_fails() { - use regex::Regex; - - let result = Regex::new(r"x{0}{4294967295}"); - assert!(result.is_err()); -} - -// Testing another case for completeness. -#[test] -fn empty_alt_regex_fails() { - use regex::Regex; - - let result = Regex::new(r"(?:|){4294967295}"); - assert!(result.is_err()); -} diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/test_default_bytes.rs b/collector/compile-benchmarks/regex-1.5.5/tests/test_default_bytes.rs deleted file mode 100644 index f200596ba..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/test_default_bytes.rs +++ /dev/null @@ -1,75 +0,0 @@ -macro_rules! regex_new { - ($re:expr) => {{ - use regex::bytes::Regex; - Regex::new($re) - }}; -} - -macro_rules! regex_set_new { - ($res:expr) => {{ - use regex::bytes::RegexSet; - RegexSet::new($res) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_bytes.rs"); -include!("macros.rs"); - -// A silly wrapper to make it possible to write and match raw bytes. -struct R<'a>(&'a [u8]); -impl<'a> R<'a> { - fn as_bytes(&self) -> &'a [u8] { - self.0 - } -} - -// See: https://github.com/rust-lang/regex/issues/321 -// -// These tests are here because they do not have the same behavior in every -// regex engine. -mat!(invalid_utf8_nfa1, r".", R(b"\xD4\xC2\x65\x2B\x0E\xFE"), Some((2, 3))); -mat!(invalid_utf8_nfa2, r"${2}ä", R(b"\xD4\xC2\x65\x2B\x0E\xFE"), None); -mat!( - invalid_utf8_nfa3, - r".", - R(b"\x0A\xDB\x82\x6E\x33\x01\xDD\x33\xCD"), - Some((1, 3)) -); -mat!( - invalid_utf8_nfa4, - r"${2}ä", - R(b"\x0A\xDB\x82\x6E\x33\x01\xDD\x33\xCD"), - None -); - -mod api; -mod bytes; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod set; -mod shortest_match; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/test_nfa.rs b/collector/compile-benchmarks/regex-1.5.5/tests/test_nfa.rs deleted file mode 100644 index e5a67d180..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/test_nfa.rs +++ /dev/null @@ -1,50 +0,0 @@ -#![cfg_attr(feature = "pattern", feature(pattern))] - -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re).nfa().build().map(|e| e.into_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re).nfa().build().map(|e| e.into_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_str.rs"); -include!("macros.rs"); - -mod api; -mod api_str; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod searcher; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/test_nfa_bytes.rs b/collector/compile-benchmarks/regex-1.5.5/tests/test_nfa_bytes.rs deleted file mode 100644 index 0a10e032a..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/test_nfa_bytes.rs +++ /dev/null @@ -1,55 +0,0 @@ -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re) - .nfa() - .only_utf8(false) - .build() - .map(|e| e.into_byte_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re) - .nfa() - .only_utf8(false) - .build() - .map(|e| e.into_byte_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_bytes.rs"); -include!("macros.rs"); - -mod api; -mod bytes; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/test_nfa_utf8bytes.rs b/collector/compile-benchmarks/regex-1.5.5/tests/test_nfa_utf8bytes.rs deleted file mode 100644 index 36a572b5f..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/test_nfa_utf8bytes.rs +++ /dev/null @@ -1,54 +0,0 @@ -#![cfg_attr(feature = "pattern", feature(pattern))] - -macro_rules! regex_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new($re).nfa().bytes(true).build().map(|e| e.into_regex()) - }}; -} - -macro_rules! regex { - ($re:expr) => { - regex_new!($re).unwrap() - }; -} - -macro_rules! regex_set_new { - ($re:expr) => {{ - use regex::internal::ExecBuilder; - ExecBuilder::new_many($re) - .nfa() - .bytes(true) - .build() - .map(|e| e.into_regex_set()) - }}; -} - -macro_rules! regex_set { - ($res:expr) => { - regex_set_new!($res).unwrap() - }; -} - -// Must come before other module definitions. -include!("macros_str.rs"); -include!("macros.rs"); - -mod api; -mod api_str; -mod crazy; -mod flags; -mod fowler; -mod multiline; -mod noparse; -mod regression; -mod replace; -mod searcher; -mod set; -mod suffix_reverse; -#[cfg(feature = "unicode")] -mod unicode; -#[cfg(feature = "unicode-perl")] -mod word_boundary; -#[cfg(feature = "unicode-perl")] -mod word_boundary_unicode; diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/unicode.rs b/collector/compile-benchmarks/regex-1.5.5/tests/unicode.rs deleted file mode 100644 index 9f1cd0c01..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/unicode.rs +++ /dev/null @@ -1,234 +0,0 @@ -mat!(uni_literal, r"☃", "☃", Some((0, 3))); -mat!(uni_literal_plus, r"☃+", "☃", Some((0, 3))); -mat!(uni_literal_casei_plus, r"(?i)☃+", "☃", Some((0, 3))); -mat!(uni_class_plus, r"[☃Ⅰ]+", "☃", Some((0, 3))); -mat!(uni_one, r"\pN", "Ⅰ", Some((0, 3))); -mat!(uni_mixed, r"\pN+", "Ⅰ1Ⅱ2", Some((0, 8))); -mat!(uni_not, r"\PN+", "abⅠ", Some((0, 2))); -mat!(uni_not_class, r"[\PN]+", "abⅠ", Some((0, 2))); -mat!(uni_not_class_neg, r"[^\PN]+", "abⅠ", Some((2, 5))); -mat!(uni_case, r"(?i)Δ", "δ", Some((0, 2))); -mat!(uni_case_upper, r"\p{Lu}+", "ΛΘΓΔα", Some((0, 8))); -mat!(uni_case_upper_nocase_flag, r"(?i)\p{Lu}+", "ΛΘΓΔα", Some((0, 10))); -mat!(uni_case_upper_nocase, r"\p{L}+", "ΛΘΓΔα", Some((0, 10))); -mat!(uni_case_lower, r"\p{Ll}+", "ΛΘΓΔα", Some((8, 10))); - -// Test the Unicode friendliness of Perl character classes. -mat!(uni_perl_w, r"\w+", "dδd", Some((0, 4))); -mat!(uni_perl_w_not, r"\w+", "⥡", None); -mat!(uni_perl_w_neg, r"\W+", "⥡", Some((0, 3))); -mat!(uni_perl_d, r"\d+", "1२३9", Some((0, 8))); -mat!(uni_perl_d_not, r"\d+", "Ⅱ", None); -mat!(uni_perl_d_neg, r"\D+", "Ⅱ", Some((0, 3))); -mat!(uni_perl_s, r"\s+", " ", Some((0, 3))); -mat!(uni_perl_s_not, r"\s+", "☃", None); -mat!(uni_perl_s_neg, r"\S+", "☃", Some((0, 3))); - -// And do the same for word boundaries. -mat!(uni_boundary_none, r"\d\b", "6δ", None); -mat!(uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1))); -mat!(uni_not_boundary_none, r"\d\B", "6δ", Some((0, 1))); -mat!(uni_not_boundary_ogham, r"\d\B", "6 ", None); - -// Test general categories. -// -// We should test more, but there's a lot. Write a script to generate more of -// these tests. -mat!(uni_class_gencat_cased_letter, r"\p{Cased_Letter}", "A", Some((0, 3))); -mat!( - uni_class_gencat_close_punctuation, - r"\p{Close_Punctuation}", - "❯", - Some((0, 3)) -); -mat!( - uni_class_gencat_connector_punctuation, - r"\p{Connector_Punctuation}", - "⁀", - Some((0, 3)) -); -mat!(uni_class_gencat_control, r"\p{Control}", "\u{9f}", Some((0, 2))); -mat!( - uni_class_gencat_currency_symbol, - r"\p{Currency_Symbol}", - "£", - Some((0, 3)) -); -mat!( - uni_class_gencat_dash_punctuation, - r"\p{Dash_Punctuation}", - "〰", - Some((0, 3)) -); -mat!(uni_class_gencat_decimal_numer, r"\p{Decimal_Number}", "𑓙", Some((0, 4))); -mat!( - uni_class_gencat_enclosing_mark, - r"\p{Enclosing_Mark}", - "\u{A672}", - Some((0, 3)) -); -mat!( - uni_class_gencat_final_punctuation, - r"\p{Final_Punctuation}", - "⸡", - Some((0, 3)) -); -mat!(uni_class_gencat_format, r"\p{Format}", "\u{E007F}", Some((0, 4))); -// See: https://github.com/rust-lang/regex/issues/719 -mat!(uni_class_gencat_format_abbrev1, r"\p{cf}", "\u{E007F}", Some((0, 4))); -mat!(uni_class_gencat_format_abbrev2, r"\p{gc=cf}", "\u{E007F}", Some((0, 4))); -mat!( - uni_class_gencat_initial_punctuation, - r"\p{Initial_Punctuation}", - "⸜", - Some((0, 3)) -); -mat!(uni_class_gencat_letter, r"\p{Letter}", "Έ", Some((0, 2))); -mat!(uni_class_gencat_letter_number, r"\p{Letter_Number}", "ↂ", Some((0, 3))); -mat!( - uni_class_gencat_line_separator, - r"\p{Line_Separator}", - "\u{2028}", - Some((0, 3)) -); -mat!( - uni_class_gencat_lowercase_letter, - r"\p{Lowercase_Letter}", - "ϛ", - Some((0, 2)) -); -mat!(uni_class_gencat_mark, r"\p{Mark}", "\u{E01EF}", Some((0, 4))); -mat!(uni_class_gencat_math, r"\p{Math}", "⋿", Some((0, 3))); -mat!( - uni_class_gencat_modifier_letter, - r"\p{Modifier_Letter}", - "𖭃", - Some((0, 4)) -); -mat!( - uni_class_gencat_modifier_symbol, - r"\p{Modifier_Symbol}", - "🏿", - Some((0, 4)) -); -mat!( - uni_class_gencat_nonspacing_mark, - r"\p{Nonspacing_Mark}", - "\u{1E94A}", - Some((0, 4)) -); -mat!(uni_class_gencat_number, r"\p{Number}", "⓿", Some((0, 3))); -mat!( - uni_class_gencat_open_punctuation, - r"\p{Open_Punctuation}", - "⦅", - Some((0, 3)) -); -mat!(uni_class_gencat_other, r"\p{Other}", "\u{bc9}", Some((0, 3))); -mat!(uni_class_gencat_other_letter, r"\p{Other_Letter}", "ꓷ", Some((0, 3))); -mat!(uni_class_gencat_other_number, r"\p{Other_Number}", "㉏", Some((0, 3))); -mat!( - uni_class_gencat_other_punctuation, - r"\p{Other_Punctuation}", - "𞥞", - Some((0, 4)) -); -mat!(uni_class_gencat_other_symbol, r"\p{Other_Symbol}", "⅌", Some((0, 3))); -mat!( - uni_class_gencat_paragraph_separator, - r"\p{Paragraph_Separator}", - "\u{2029}", - Some((0, 3)) -); -mat!( - uni_class_gencat_private_use, - r"\p{Private_Use}", - "\u{10FFFD}", - Some((0, 4)) -); -mat!(uni_class_gencat_punctuation, r"\p{Punctuation}", "𑁍", Some((0, 4))); -mat!(uni_class_gencat_separator, r"\p{Separator}", "\u{3000}", Some((0, 3))); -mat!( - uni_class_gencat_space_separator, - r"\p{Space_Separator}", - "\u{205F}", - Some((0, 3)) -); -mat!( - uni_class_gencat_spacing_mark, - r"\p{Spacing_Mark}", - "\u{16F7E}", - Some((0, 4)) -); -mat!(uni_class_gencat_symbol, r"\p{Symbol}", "⯈", Some((0, 3))); -mat!( - uni_class_gencat_titlecase_letter, - r"\p{Titlecase_Letter}", - "ῼ", - Some((0, 3)) -); -mat!( - uni_class_gencat_unassigned, - r"\p{Unassigned}", - "\u{10FFFF}", - Some((0, 4)) -); -mat!( - uni_class_gencat_uppercase_letter, - r"\p{Uppercase_Letter}", - "Ꝋ", - Some((0, 3)) -); - -// Test a smattering of properties. -mat!(uni_class_prop_emoji1, r"\p{Emoji}", "\u{23E9}", Some((0, 3))); -mat!(uni_class_prop_emoji2, r"\p{emoji}", "\u{1F21A}", Some((0, 4))); -mat!( - uni_class_prop_picto1, - r"\p{extendedpictographic}", - "\u{1FA6E}", - Some((0, 4)) -); -mat!( - uni_class_prop_picto2, - r"\p{extendedpictographic}", - "\u{1FFFD}", - Some((0, 4)) -); - -// grapheme_cluster_break -mat!( - uni_class_gcb_prepend, - r"\p{grapheme_cluster_break=prepend}", - "\u{11D46}", - Some((0, 4)) -); -mat!( - uni_class_gcb_ri1, - r"\p{gcb=regional_indicator}", - "\u{1F1E6}", - Some((0, 4)) -); -mat!(uni_class_gcb_ri2, r"\p{gcb=ri}", "\u{1F1E7}", Some((0, 4))); -mat!( - uni_class_gcb_ri3, - r"\p{gcb=regionalindicator}", - "\u{1F1FF}", - Some((0, 4)) -); -mat!(uni_class_gcb_lvt, r"\p{gcb=lvt}", "\u{C989}", Some((0, 3))); -mat!(uni_class_gcb_zwj, r"\p{gcb=zwj}", "\u{200D}", Some((0, 3))); - -// word_break -mat!(uni_class_wb1, r"\p{word_break=Hebrew_Letter}", "\u{FB46}", Some((0, 3))); -mat!(uni_class_wb2, r"\p{wb=hebrewletter}", "\u{FB46}", Some((0, 3))); -mat!(uni_class_wb3, r"\p{wb=ExtendNumLet}", "\u{FF3F}", Some((0, 3))); -mat!(uni_class_wb4, r"\p{wb=WSegSpace}", "\u{3000}", Some((0, 3))); -mat!(uni_class_wb5, r"\p{wb=numeric}", "\u{1E950}", Some((0, 4))); - -// sentence_break -mat!(uni_class_sb1, r"\p{sentence_break=Lower}", "\u{0469}", Some((0, 2))); -mat!(uni_class_sb2, r"\p{sb=lower}", "\u{0469}", Some((0, 2))); -mat!(uni_class_sb3, r"\p{sb=Close}", "\u{FF60}", Some((0, 3))); -mat!(uni_class_sb4, r"\p{sb=Close}", "\u{1F677}", Some((0, 4))); -mat!(uni_class_sb5, r"\p{sb=SContinue}", "\u{FF64}", Some((0, 3))); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/word_boundary.rs b/collector/compile-benchmarks/regex-1.5.5/tests/word_boundary.rs deleted file mode 100644 index 7fe97a297..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/word_boundary.rs +++ /dev/null @@ -1,89 +0,0 @@ -// Many of these are cribbed from RE2's test suite. - -matiter!(wb1, r"\b", ""); -matiter!(wb2, r"\b", "a", (0, 0), (1, 1)); -matiter!(wb3, r"\b", "ab", (0, 0), (2, 2)); -matiter!(wb4, r"^\b", "ab", (0, 0)); -matiter!(wb5, r"\b$", "ab", (2, 2)); -matiter!(wb6, r"^\b$", "ab"); -matiter!(wb7, r"\bbar\b", "nobar bar foo bar", (6, 9), (14, 17)); -matiter!(wb8, r"a\b", "faoa x", (3, 4)); -matiter!(wb9, r"\bbar", "bar x", (0, 3)); -matiter!(wb10, r"\bbar", "foo\nbar x", (4, 7)); -matiter!(wb11, r"bar\b", "foobar", (3, 6)); -matiter!(wb12, r"bar\b", "foobar\nxxx", (3, 6)); -matiter!(wb13, r"(foo|bar|[A-Z])\b", "foo", (0, 3)); -matiter!(wb14, r"(foo|bar|[A-Z])\b", "foo\n", (0, 3)); -matiter!(wb15, r"\b(foo|bar|[A-Z])", "foo", (0, 3)); -matiter!(wb16, r"\b(foo|bar|[A-Z])\b", "X", (0, 1)); -matiter!(wb17, r"\b(foo|bar|[A-Z])\b", "XY"); -matiter!(wb18, r"\b(foo|bar|[A-Z])\b", "bar", (0, 3)); -matiter!(wb19, r"\b(foo|bar|[A-Z])\b", "foo", (0, 3)); -matiter!(wb20, r"\b(foo|bar|[A-Z])\b", "foo\n", (0, 3)); -matiter!(wb21, r"\b(foo|bar|[A-Z])\b", "ffoo bbar N x", (10, 11)); -matiter!(wb22, r"\b(fo|foo)\b", "fo", (0, 2)); -matiter!(wb23, r"\b(fo|foo)\b", "foo", (0, 3)); -matiter!(wb24, r"\b\b", ""); -matiter!(wb25, r"\b\b", "a", (0, 0), (1, 1)); -matiter!(wb26, r"\b$", ""); -matiter!(wb27, r"\b$", "x", (1, 1)); -matiter!(wb28, r"\b$", "y x", (3, 3)); -matiter!(wb29, r"\b.$", "x", (0, 1)); -matiter!(wb30, r"^\b(fo|foo)\b", "fo", (0, 2)); -matiter!(wb31, r"^\b(fo|foo)\b", "foo", (0, 3)); -matiter!(wb32, r"^\b$", ""); -matiter!(wb33, r"^\b$", "x"); -matiter!(wb34, r"^\b.$", "x", (0, 1)); -matiter!(wb35, r"^\b.\b$", "x", (0, 1)); -matiter!(wb36, r"^^^^^\b$$$$$", ""); -matiter!(wb37, r"^^^^^\b.$$$$$", "x", (0, 1)); -matiter!(wb38, r"^^^^^\b$$$$$", "x"); -matiter!(wb39, r"^^^^^\b\b\b.\b\b\b$$$$$", "x", (0, 1)); -matiter!(wb40, r"\b.+\b", "$$abc$$", (2, 5)); -matiter!(wb41, r"\b", "a b c", (0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)); - -matiter!(nb1, r"\Bfoo\B", "n foo xfoox that", (7, 10)); -matiter!(nb2, r"a\B", "faoa x", (1, 2)); -matiter!(nb3, r"\Bbar", "bar x"); -matiter!(nb4, r"\Bbar", "foo\nbar x"); -matiter!(nb5, r"bar\B", "foobar"); -matiter!(nb6, r"bar\B", "foobar\nxxx"); -matiter!(nb7, r"(foo|bar|[A-Z])\B", "foox", (0, 3)); -matiter!(nb8, r"(foo|bar|[A-Z])\B", "foo\n"); -matiter!(nb9, r"\B", "", (0, 0)); -matiter!(nb10, r"\B", "x"); -matiter!(nb11, r"\B(foo|bar|[A-Z])", "foo"); -matiter!(nb12, r"\B(foo|bar|[A-Z])\B", "xXy", (1, 2)); -matiter!(nb13, r"\B(foo|bar|[A-Z])\B", "XY"); -matiter!(nb14, r"\B(foo|bar|[A-Z])\B", "XYZ", (1, 2)); -matiter!(nb15, r"\B(foo|bar|[A-Z])\B", "abara", (1, 4)); -matiter!(nb16, r"\B(foo|bar|[A-Z])\B", "xfoo_", (1, 4)); -matiter!(nb17, r"\B(foo|bar|[A-Z])\B", "xfoo\n"); -matiter!(nb18, r"\B(foo|bar|[A-Z])\B", "foo bar vNX", (9, 10)); -matiter!(nb19, r"\B(fo|foo)\B", "xfoo", (1, 3)); -matiter!(nb20, r"\B(foo|fo)\B", "xfooo", (1, 4)); -matiter!(nb21, r"\B\B", "", (0, 0)); -matiter!(nb22, r"\B\B", "x"); -matiter!(nb23, r"\B$", "", (0, 0)); -matiter!(nb24, r"\B$", "x"); -matiter!(nb25, r"\B$", "y x"); -matiter!(nb26, r"\B.$", "x"); -matiter!(nb27, r"^\B(fo|foo)\B", "fo"); -matiter!(nb28, r"^\B(fo|foo)\B", "foo"); -matiter!(nb29, r"^\B", "", (0, 0)); -matiter!(nb30, r"^\B", "x"); -matiter!(nb31, r"^\B\B", "", (0, 0)); -matiter!(nb32, r"^\B\B", "x"); -matiter!(nb33, r"^\B$", "", (0, 0)); -matiter!(nb34, r"^\B$", "x"); -matiter!(nb35, r"^\B.$", "x"); -matiter!(nb36, r"^\B.\B$", "x"); -matiter!(nb37, r"^^^^^\B$$$$$", "", (0, 0)); -matiter!(nb38, r"^^^^^\B.$$$$$", "x"); -matiter!(nb39, r"^^^^^\B$$$$$", "x"); - -// These work for both Unicode and ASCII because all matches are reported as -// byte offsets, and « and » do not correspond to word boundaries at either -// the character or byte level. -matiter!(unicode1, r"\bx\b", "«x", (2, 3)); -matiter!(unicode2, r"\bx\b", "x»", (0, 1)); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/word_boundary_ascii.rs b/collector/compile-benchmarks/regex-1.5.5/tests/word_boundary_ascii.rs deleted file mode 100644 index 5a3cf1166..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/word_boundary_ascii.rs +++ /dev/null @@ -1,9 +0,0 @@ -// ASCII word boundaries are completely oblivious to Unicode characters. -// For Unicode word boundaries, the tests are precisely inverted. -matiter!(ascii1, r"(?-u:\b)x(?-u:\b)", "áxβ", (2, 3)); -matiter!(ascii2, r"(?-u:\B)x(?-u:\B)", "áxβ"); -matiter!(ascii3, r"(?-u:\B)", "0\u{7EF5E}", (2, 2), (3, 3), (4, 4), (5, 5)); - -// We still get Unicode word boundaries by default in byte regexes. -matiter!(unicode1, r"\bx\b", "áxβ"); -matiter!(unicode2, r"\Bx\B", "áxβ", (2, 3)); diff --git a/collector/compile-benchmarks/regex-1.5.5/tests/word_boundary_unicode.rs b/collector/compile-benchmarks/regex-1.5.5/tests/word_boundary_unicode.rs deleted file mode 100644 index c41355ffc..000000000 --- a/collector/compile-benchmarks/regex-1.5.5/tests/word_boundary_unicode.rs +++ /dev/null @@ -1,6 +0,0 @@ -// Unicode word boundaries know about Unicode characters. -// For ASCII word boundaries, the tests are precisely inverted. -matiter!(unicode1, r"\bx\b", "áxβ"); -matiter!(unicode2, r"\Bx\B", "áxβ", (2, 3)); - -matiter!(ascii1, r"(?-u:\b)x(?-u:\b)", "áxβ", (2, 3));